From 5a422aba704c375a307a902bafe658342e209906 Mon Sep 17 00:00:00 2001
From: scuri <scuri>
Date: Fri, 17 Oct 2008 06:10:15 +0000
Subject: First commit - moving from LuaForge to SourceForge

---
 src/.cvsignore                                |   10 +
 src/COPYRIGHT                                 |   32 +
 src/Makefile                                  |   22 +
 src/README                                    |   11 +
 src/config.mak                                |  141 +
 src/fftw/config.c                             |  163 +
 src/fftw/config.h                             |  171 +
 src/fftw/executor.c                           |  465 +++
 src/fftw/fftw-int.h                           |  500 +++
 src/fftw/fftw.h                               |  421 +++
 src/fftw/fftwnd.c                             |  806 +++++
 src/fftw/fn_1.c                               |   62 +
 src/fftw/fn_10.c                              |  285 ++
 src/fftw/fn_11.c                              |  312 ++
 src/fftw/fn_12.c                              |  324 ++
 src/fftw/fn_13.c                              |  546 ++++
 src/fftw/fn_14.c                              |  409 +++
 src/fftw/fn_15.c                              |  452 +++
 src/fftw/fn_16.c                              |  442 +++
 src/fftw/fn_2.c                               |   68 +
 src/fftw/fn_3.c                               |   93 +
 src/fftw/fn_32.c                              | 1049 ++++++
 src/fftw/fn_4.c                               |  102 +
 src/fftw/fn_5.c                               |  150 +
 src/fftw/fn_6.c                               |  159 +
 src/fftw/fn_64.c                              | 2464 ++++++++++++++
 src/fftw/fn_7.c                               |  200 ++
 src/fftw/fn_8.c                               |  202 ++
 src/fftw/fn_9.c                               |  283 ++
 src/fftw/fni_1.c                              |   62 +
 src/fftw/fni_10.c                             |  285 ++
 src/fftw/fni_11.c                             |  312 ++
 src/fftw/fni_12.c                             |  324 ++
 src/fftw/fni_13.c                             |  546 ++++
 src/fftw/fni_14.c                             |  409 +++
 src/fftw/fni_15.c                             |  452 +++
 src/fftw/fni_16.c                             |  442 +++
 src/fftw/fni_2.c                              |   68 +
 src/fftw/fni_3.c                              |   93 +
 src/fftw/fni_32.c                             | 1049 ++++++
 src/fftw/fni_4.c                              |  102 +
 src/fftw/fni_5.c                              |  150 +
 src/fftw/fni_6.c                              |  159 +
 src/fftw/fni_64.c                             | 2464 ++++++++++++++
 src/fftw/fni_7.c                              |  200 ++
 src/fftw/fni_8.c                              |  202 ++
 src/fftw/fni_9.c                              |  283 ++
 src/fftw/ftw_10.c                             |  378 +++
 src/fftw/ftw_16.c                             |  614 ++++
 src/fftw/ftw_2.c                              |   85 +
 src/fftw/ftw_3.c                              |  121 +
 src/fftw/ftw_32.c                             | 1398 ++++++++
 src/fftw/ftw_4.c                              |  141 +
 src/fftw/ftw_5.c                              |  197 ++
 src/fftw/ftw_6.c                              |  220 ++
 src/fftw/ftw_64.c                             | 3203 ++++++++++++++++++
 src/fftw/ftw_7.c                              |  272 ++
 src/fftw/ftw_8.c                              |  285 ++
 src/fftw/ftw_9.c                              |  377 +++
 src/fftw/ftwi_10.c                            |  378 +++
 src/fftw/ftwi_16.c                            |  614 ++++
 src/fftw/ftwi_2.c                             |   85 +
 src/fftw/ftwi_3.c                             |  121 +
 src/fftw/ftwi_32.c                            | 1398 ++++++++
 src/fftw/ftwi_4.c                             |  141 +
 src/fftw/ftwi_5.c                             |  197 ++
 src/fftw/ftwi_6.c                             |  220 ++
 src/fftw/ftwi_64.c                            | 3207 ++++++++++++++++++
 src/fftw/ftwi_7.c                             |  272 ++
 src/fftw/ftwi_8.c                             |  285 ++
 src/fftw/ftwi_9.c                             |  377 +++
 src/fftw/generic.c                            |  102 +
 src/fftw/malloc.c                             |  240 ++
 src/fftw/planner.c                            |  475 +++
 src/fftw/putils.c                             |  555 ++++
 src/fftw/rader.c                              |  365 +++
 src/fftw/timer.c                              |  164 +
 src/fftw/twiddle.c                            |  218 ++
 src/fftw/wisdom.c                             |  317 ++
 src/fftw/wisdomio.c                           |  104 +
 src/fftw3/api/api.h                           |   70 +
 src/fftw3/api/apiplan.c                       |   70 +
 src/fftw3/api/config.h                        |  301 ++
 src/fftw3/api/configure.c                     |   31 +
 src/fftw3/api/execute-dft-c2r.c               |   29 +
 src/fftw3/api/execute-dft-r2c.c               |   29 +
 src/fftw3/api/execute-dft.c                   |   32 +
 src/fftw3/api/execute-r2r.c                   |   29 +
 src/fftw3/api/execute-split-dft-c2r.c         |   29 +
 src/fftw3/api/execute-split-dft-r2c.c         |   29 +
 src/fftw3/api/execute-split-dft.c             |   29 +
 src/fftw3/api/execute.c                       |   27 +
 src/fftw3/api/export-wisdom-to-file.c         |   29 +
 src/fftw3/api/export-wisdom-to-string.c       |   82 +
 src/fftw3/api/export-wisdom.c                 |   44 +
 src/fftw3/api/extract-reim.c                  |   36 +
 src/fftw3/api/f77api.c                        |  156 +
 src/fftw3/api/f77funcs.h                      |  438 +++
 src/fftw3/api/fftw3.h                         |  272 ++
 src/fftw3/api/flops.c                         |   27 +
 src/fftw3/api/forget-wisdom.c                 |   27 +
 src/fftw3/api/import-system-wisdom.c          |   46 +
 src/fftw3/api/import-wisdom-from-file.c       |   71 +
 src/fftw3/api/import-wisdom-from-string.c     |   50 +
 src/fftw3/api/import-wisdom.c                 |   46 +
 src/fftw3/api/map-r2r-kind.c                  |   50 +
 src/fftw3/api/mapflags.c                      |  115 +
 src/fftw3/api/mkprinter-file.c                |   59 +
 src/fftw3/api/mktensor-iodims.c               |   62 +
 src/fftw3/api/mktensor-rowmajor.c             |   61 +
 src/fftw3/api/plan-dft-1d.c                   |   27 +
 src/fftw3/api/plan-dft-2d.c                   |   30 +
 src/fftw3/api/plan-dft-3d.c                   |   32 +
 src/fftw3/api/plan-dft-c2r-1d.c               |   26 +
 src/fftw3/api/plan-dft-c2r-2d.c               |   29 +
 src/fftw3/api/plan-dft-c2r-3d.c               |   31 +
 src/fftw3/api/plan-dft-c2r.c                  |   27 +
 src/fftw3/api/plan-dft-r2c-1d.c               |   26 +
 src/fftw3/api/plan-dft-r2c-2d.c               |   29 +
 src/fftw3/api/plan-dft-r2c-3d.c               |   31 +
 src/fftw3/api/plan-dft-r2c.c                  |   29 +
 src/fftw3/api/plan-dft.c                      |   30 +
 src/fftw3/api/plan-guru-dft-c2r.c             |   44 +
 src/fftw3/api/plan-guru-dft-r2c.c             |   43 +
 src/fftw3/api/plan-guru-dft.c                 |   44 +
 src/fftw3/api/plan-guru-r2r.c                 |   47 +
 src/fftw3/api/plan-guru-split-dft-c2r.c       |   40 +
 src/fftw3/api/plan-guru-split-dft-r2c.c       |   39 +
 src/fftw3/api/plan-guru-split-dft.c           |   39 +
 src/fftw3/api/plan-many-dft-c2r.c             |   59 +
 src/fftw3/api/plan-many-dft-r2c.c             |   57 +
 src/fftw3/api/plan-many-dft.c                 |   51 +
 src/fftw3/api/plan-many-r2r.c                 |   52 +
 src/fftw3/api/plan-r2r-1d.c                   |   26 +
 src/fftw3/api/plan-r2r-2d.c                   |   33 +
 src/fftw3/api/plan-r2r-3d.c                   |   36 +
 src/fftw3/api/plan-r2r.c                      |   28 +
 src/fftw3/api/print-plan.c                    |   34 +
 src/fftw3/api/rdft2-pad.c                     |   39 +
 src/fftw3/api/the-planner.c                   |   42 +
 src/fftw3/api/version.c                       |   49 +
 src/fftw3/api/x77.h                           |   32 +
 src/fftw3/dft/buffered.c                      |  323 ++
 src/fftw3/dft/codelet-dft.h                   |  113 +
 src/fftw3/dft/codelets/f.h                    |    1 +
 src/fftw3/dft/codelets/inplace/icodlist.c     |   20 +
 src/fftw3/dft/codelets/inplace/q1_2.c         |   87 +
 src/fftw3/dft/codelets/inplace/q1_3.c         |  168 +
 src/fftw3/dft/codelets/inplace/q1_4.c         |  266 ++
 src/fftw3/dft/codelets/inplace/q1_5.c         |  477 +++
 src/fftw3/dft/codelets/inplace/q1_6.c         |  654 ++++
 src/fftw3/dft/codelets/inplace/q1_8.c         | 1149 +++++++
 src/fftw3/dft/codelets/n.c                    |   39 +
 src/fftw3/dft/codelets/n.h                    |   23 +
 src/fftw3/dft/codelets/q.h                    |    1 +
 src/fftw3/dft/codelets/standard/m1_16.c       |  305 ++
 src/fftw3/dft/codelets/standard/m1_32.c       |  684 ++++
 src/fftw3/dft/codelets/standard/m1_64.c       | 1568 +++++++++
 src/fftw3/dft/codelets/standard/n1_10.c       |  195 ++
 src/fftw3/dft/codelets/standard/n1_11.c       |  178 +
 src/fftw3/dft/codelets/standard/n1_12.c       |  224 ++
 src/fftw3/dft/codelets/standard/n1_13.c       |  352 ++
 src/fftw3/dft/codelets/standard/n1_14.c       |  249 ++
 src/fftw3/dft/codelets/standard/n1_15.c       |  291 ++
 src/fftw3/dft/codelets/standard/n1_16.c       |  294 ++
 src/fftw3/dft/codelets/standard/n1_2.c        |   61 +
 src/fftw3/dft/codelets/standard/n1_3.c        |   76 +
 src/fftw3/dft/codelets/standard/n1_4.c        |   83 +
 src/fftw3/dft/codelets/standard/n1_5.c        |  111 +
 src/fftw3/dft/codelets/standard/n1_6.c        |  119 +
 src/fftw3/dft/codelets/standard/n1_7.c        |  129 +
 src/fftw3/dft/codelets/standard/n1_8.c        |  147 +
 src/fftw3/dft/codelets/standard/n1_9.c        |  193 ++
 src/fftw3/dft/codelets/standard/scodlist.c    |   82 +
 src/fftw3/dft/codelets/standard/t1_10.c       |  253 ++
 src/fftw3/dft/codelets/standard/t1_12.c       |  300 ++
 src/fftw3/dft/codelets/standard/t1_15.c       |  405 +++
 src/fftw3/dft/codelets/standard/t1_16.c       |  406 +++
 src/fftw3/dft/codelets/standard/t1_2.c        |   75 +
 src/fftw3/dft/codelets/standard/t1_3.c        |   97 +
 src/fftw3/dft/codelets/standard/t1_32.c       |  892 +++++
 src/fftw3/dft/codelets/standard/t1_4.c        |  111 +
 src/fftw3/dft/codelets/standard/t1_5.c        |  143 +
 src/fftw3/dft/codelets/standard/t1_6.c        |  160 +
 src/fftw3/dft/codelets/standard/t1_64.c       | 2001 ++++++++++++
 src/fftw3/dft/codelets/standard/t1_7.c        |  178 +
 src/fftw3/dft/codelets/standard/t1_8.c        |  203 ++
 src/fftw3/dft/codelets/standard/t1_9.c        |  256 ++
 src/fftw3/dft/codelets/standard/t2_16.c       |  411 +++
 src/fftw3/dft/codelets/standard/t2_32.c       |  853 +++++
 src/fftw3/dft/codelets/standard/t2_4.c        |  108 +
 src/fftw3/dft/codelets/standard/t2_64.c       | 1906 +++++++++++
 src/fftw3/dft/codelets/standard/t2_8.c        |  192 ++
 src/fftw3/dft/codelets/t.c                    |   36 +
 src/fftw3/dft/codelets/t.h                    |   23 +
 src/fftw3/dft/conf.c                          |   51 +
 src/fftw3/dft/ct-dif.c                        |  115 +
 src/fftw3/dft/ct-dit.c                        |  116 +
 src/fftw3/dft/ct-ditbuf.c                     |  175 +
 src/fftw3/dft/ct-ditf.c                       |  105 +
 src/fftw3/dft/ct.c                            |  170 +
 src/fftw3/dft/ct.h                            |   70 +
 src/fftw3/dft/dft.h                           |   89 +
 src/fftw3/dft/direct.c                        |  152 +
 src/fftw3/dft/generic.c                       |  204 ++
 src/fftw3/dft/indirect.c                      |  238 ++
 src/fftw3/dft/kdft-dif.c                      |   32 +
 src/fftw3/dft/kdft-difsq.c                    |   28 +
 src/fftw3/dft/kdft-dit.c                      |   33 +
 src/fftw3/dft/kdft.c                          |   29 +
 src/fftw3/dft/nop.c                           |   88 +
 src/fftw3/dft/plan.c                          |   33 +
 src/fftw3/dft/problem.c                       |  121 +
 src/fftw3/dft/rader-omega.c                   |   57 +
 src/fftw3/dft/rader.c                         |  491 +++
 src/fftw3/dft/rank-geq2.c                     |  211 ++
 src/fftw3/dft/rank0.c                         |  235 ++
 src/fftw3/dft/solve.c                         |   33 +
 src/fftw3/dft/vrank-geq1.c                    |  215 ++
 src/fftw3/dft/vrank2-transpose.c              |  155 +
 src/fftw3/dft/vrank3-transpose.c              |  207 ++
 src/fftw3/dft/zero.c                          |   51 +
 src/fftw3/kernel/align.c                      |   39 +
 src/fftw3/kernel/alloc.c                      |  404 +++
 src/fftw3/kernel/assert.c                     |   31 +
 src/fftw3/kernel/awake.c                      |   30 +
 src/fftw3/kernel/cycle.h                      |  420 +++
 src/fftw3/kernel/debug.c                      |   54 +
 src/fftw3/kernel/hash.c                       |   31 +
 src/fftw3/kernel/iabs.c                       |   28 +
 src/fftw3/kernel/ifftw.h                      |  848 +++++
 src/fftw3/kernel/kbuffered.c                  |   44 +
 src/fftw3/kernel/kct.c                        |   31 +
 src/fftw3/kernel/kplan.c                      |   74 +
 src/fftw3/kernel/kproblem.c                   |   40 +
 src/fftw3/kernel/krader.c                     |   68 +
 src/fftw3/kernel/md5-1.c                      |   54 +
 src/fftw3/kernel/md5.c                        |  143 +
 src/fftw3/kernel/minmax.c                     |   33 +
 src/fftw3/kernel/ops.c                        |   63 +
 src/fftw3/kernel/pickdim.c                    |   82 +
 src/fftw3/kernel/planner.c                    |  695 ++++
 src/fftw3/kernel/primes.c                     |  135 +
 src/fftw3/kernel/print.c                      |  210 ++
 src/fftw3/kernel/scan.c                       |  204 ++
 src/fftw3/kernel/solver.c                     |   48 +
 src/fftw3/kernel/solvtab.c                    |   33 +
 src/fftw3/kernel/square.c                     |   28 +
 src/fftw3/kernel/stride.c                     |   41 +
 src/fftw3/kernel/tensor.c                     |  123 +
 src/fftw3/kernel/tensor1.c                    |   37 +
 src/fftw3/kernel/tensor2.c                    |   37 +
 src/fftw3/kernel/tensor4.c                    |   73 +
 src/fftw3/kernel/tensor5.c                    |   93 +
 src/fftw3/kernel/tensor7.c                    |  130 +
 src/fftw3/kernel/tensor8.c                    |   35 +
 src/fftw3/kernel/tensor9.c                    |   37 +
 src/fftw3/kernel/timer.c                      |  179 ++
 src/fftw3/kernel/transpose.c                  |  430 +++
 src/fftw3/kernel/trig.c                       |   45 +
 src/fftw3/kernel/trig1.c                      |   70 +
 src/fftw3/kernel/twiddle.c                    |  200 ++
 src/fftw3/rdft/buffered2.c                    |  418 +++
 src/fftw3/rdft/codelet-rdft.h                 |  192 ++
 src/fftw3/rdft/codelets/hb.h                  |   23 +
 src/fftw3/rdft/codelets/hc2r.c                |   44 +
 src/fftw3/rdft/codelets/hc2r.h                |   23 +
 src/fftw3/rdft/codelets/hc2r/hb_10.c          |  265 ++
 src/fftw3/rdft/codelets/hc2r/hb_12.c          |  299 ++
 src/fftw3/rdft/codelets/hc2r/hb_15.c          |  404 +++
 src/fftw3/rdft/codelets/hc2r/hb_16.c          |  412 +++
 src/fftw3/rdft/codelets/hc2r/hb_2.c           |   72 +
 src/fftw3/rdft/codelets/hc2r/hb_3.c           |   97 +
 src/fftw3/rdft/codelets/hc2r/hb_32.c          |  890 +++++
 src/fftw3/rdft/codelets/hc2r/hb_4.c           |  111 +
 src/fftw3/rdft/codelets/hc2r/hb_5.c           |  146 +
 src/fftw3/rdft/codelets/hc2r/hb_6.c           |  160 +
 src/fftw3/rdft/codelets/hc2r/hb_64.c          | 1972 ++++++++++++
 src/fftw3/rdft/codelets/hc2r/hb_7.c           |  173 +
 src/fftw3/rdft/codelets/hc2r/hb_8.c           |  197 ++
 src/fftw3/rdft/codelets/hc2r/hb_9.c           |  248 ++
 src/fftw3/rdft/codelets/hc2r/hc2rIII_10.c     |  113 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_12.c     |  130 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_15.c     |  169 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_16.c     |  173 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_2.c      |   59 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_3.c      |   64 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_32.c     |  371 +++
 src/fftw3/rdft/codelets/hc2r/hc2rIII_4.c      |   66 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_5.c      |   79 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_6.c      |   77 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_7.c      |   81 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_8.c      |   97 +
 src/fftw3/rdft/codelets/hc2r/hc2rIII_9.c      |  120 +
 src/fftw3/rdft/codelets/hc2r/hc2r_10.c        |  118 +
 src/fftw3/rdft/codelets/hc2r/hc2r_11.c        |  101 +
 src/fftw3/rdft/codelets/hc2r/hc2r_12.c        |  126 +
 src/fftw3/rdft/codelets/hc2r/hc2r_13.c        |  196 ++
 src/fftw3/rdft/codelets/hc2r/hc2r_14.c        |  134 +
 src/fftw3/rdft/codelets/hc2r/hc2r_15.c        |  161 +
 src/fftw3/rdft/codelets/hc2r/hc2r_16.c        |  165 +
 src/fftw3/rdft/codelets/hc2r/hc2r_3.c         |   64 +
 src/fftw3/rdft/codelets/hc2r/hc2r_32.c        |  336 ++
 src/fftw3/rdft/codelets/hc2r/hc2r_4.c         |   70 +
 src/fftw3/rdft/codelets/hc2r/hc2r_5.c         |   79 +
 src/fftw3/rdft/codelets/hc2r/hc2r_6.c         |   79 +
 src/fftw3/rdft/codelets/hc2r/hc2r_7.c         |   81 +
 src/fftw3/rdft/codelets/hc2r/hc2r_8.c         |   95 +
 src/fftw3/rdft/codelets/hc2r/hc2r_9.c         |  120 +
 src/fftw3/rdft/codelets/hc2r/hcodlist.c       |  102 +
 src/fftw3/rdft/codelets/hc2r/mhc2rIII_32.c    |  381 +++
 src/fftw3/rdft/codelets/hc2r/mhc2rIII_64.c    |  819 +++++
 src/fftw3/rdft/codelets/hc2r/mhc2r_128.c      | 1652 ++++++++++
 src/fftw3/rdft/codelets/hc2r/mhc2r_32.c       |  346 ++
 src/fftw3/rdft/codelets/hc2r/mhc2r_64.c       |  733 +++++
 src/fftw3/rdft/codelets/hc2rIII.h             |   23 +
 src/fftw3/rdft/codelets/hf.h                  |   23 +
 src/fftw3/rdft/codelets/hfb.c                 |   41 +
 src/fftw3/rdft/codelets/r2hc.c                |   44 +
 src/fftw3/rdft/codelets/r2hc.h                |   23 +
 src/fftw3/rdft/codelets/r2hc/hf2_16.c         |  411 +++
 src/fftw3/rdft/codelets/r2hc/hf2_32.c         |  853 +++++
 src/fftw3/rdft/codelets/r2hc/hf2_4.c          |  108 +
 src/fftw3/rdft/codelets/r2hc/hf2_64.c         | 1906 +++++++++++
 src/fftw3/rdft/codelets/r2hc/hf2_8.c          |  192 ++
 src/fftw3/rdft/codelets/r2hc/hf_10.c          |  253 ++
 src/fftw3/rdft/codelets/r2hc/hf_12.c          |  300 ++
 src/fftw3/rdft/codelets/r2hc/hf_15.c          |  405 +++
 src/fftw3/rdft/codelets/r2hc/hf_16.c          |  406 +++
 src/fftw3/rdft/codelets/r2hc/hf_2.c           |   75 +
 src/fftw3/rdft/codelets/r2hc/hf_3.c           |   97 +
 src/fftw3/rdft/codelets/r2hc/hf_32.c          |  892 +++++
 src/fftw3/rdft/codelets/r2hc/hf_4.c           |  111 +
 src/fftw3/rdft/codelets/r2hc/hf_5.c           |  143 +
 src/fftw3/rdft/codelets/r2hc/hf_6.c           |  160 +
 src/fftw3/rdft/codelets/r2hc/hf_64.c          | 2001 ++++++++++++
 src/fftw3/rdft/codelets/r2hc/hf_7.c           |  178 +
 src/fftw3/rdft/codelets/r2hc/hf_8.c           |  203 ++
 src/fftw3/rdft/codelets/r2hc/hf_9.c           |  256 ++
 src/fftw3/rdft/codelets/r2hc/mr2hcII_32.c     |  368 +++
 src/fftw3/rdft/codelets/r2hc/mr2hcII_64.c     |  799 +++++
 src/fftw3/rdft/codelets/r2hc/mr2hc_128.c      | 1647 ++++++++++
 src/fftw3/rdft/codelets/r2hc/mr2hc_32.c       |  330 ++
 src/fftw3/rdft/codelets/r2hc/mr2hc_64.c       |  729 +++++
 src/fftw3/rdft/codelets/r2hc/r2hcII_10.c      |  112 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_12.c      |  128 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_15.c      |  155 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_16.c      |  171 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_2.c       |   58 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_3.c       |   63 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_32.c      |  358 +++
 src/fftw3/rdft/codelets/r2hc/r2hcII_4.c       |   65 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_5.c       |   78 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_6.c       |   73 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_7.c       |   83 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_8.c       |   96 +
 src/fftw3/rdft/codelets/r2hc/r2hcII_9.c       |  118 +
 src/fftw3/rdft/codelets/r2hc/r2hc_10.c        |  115 +
 src/fftw3/rdft/codelets/r2hc/r2hc_11.c        |  102 +
 src/fftw3/rdft/codelets/r2hc/r2hc_12.c        |  124 +
 src/fftw3/rdft/codelets/r2hc/r2hc_13.c        |  194 ++
 src/fftw3/rdft/codelets/r2hc/r2hc_14.c        |  130 +
 src/fftw3/rdft/codelets/r2hc/r2hc_15.c        |  168 +
 src/fftw3/rdft/codelets/r2hc/r2hc_16.c        |  159 +
 src/fftw3/rdft/codelets/r2hc/r2hc_2.c         |   58 +
 src/fftw3/rdft/codelets/r2hc/r2hc_3.c         |   63 +
 src/fftw3/rdft/codelets/r2hc/r2hc_32.c        |  320 ++
 src/fftw3/rdft/codelets/r2hc/r2hc_4.c         |   64 +
 src/fftw3/rdft/codelets/r2hc/r2hc_5.c         |   78 +
 src/fftw3/rdft/codelets/r2hc/r2hc_6.c         |   79 +
 src/fftw3/rdft/codelets/r2hc/r2hc_7.c         |   83 +
 src/fftw3/rdft/codelets/r2hc/r2hc_8.c         |   92 +
 src/fftw3/rdft/codelets/r2hc/r2hc_9.c         |  115 +
 src/fftw3/rdft/codelets/r2hc/rhcodlist.c      |  114 +
 src/fftw3/rdft/codelets/r2hcII.h              |   23 +
 src/fftw3/rdft/codelets/r2r.c                 |   38 +
 src/fftw3/rdft/codelets/r2r.h                 |   23 +
 src/fftw3/rdft/codelets/r2r/e01_8.c           |  118 +
 src/fftw3/rdft/codelets/r2r/e10_8.c           |  118 +
 src/fftw3/rdft/codelets/r2r/rrcodlist.c       |   12 +
 src/fftw3/rdft/dft-r2hc.c                     |  187 ++
 src/fftw3/rdft/dht-r2hc.c                     |  148 +
 src/fftw3/rdft/dht-rader.c                    |  344 ++
 src/fftw3/rdft/direct2.c                      |  216 ++
 src/fftw3/rdft/hc2hc-buf.c                    |  245 ++
 src/fftw3/rdft/hc2hc-dif.c                    |  124 +
 src/fftw3/rdft/hc2hc-dit.c                    |  126 +
 src/fftw3/rdft/hc2hc.c                        |  222 ++
 src/fftw3/rdft/hc2hc.h                        |   66 +
 src/fftw3/rdft/khc2hc-dif.c                   |   33 +
 src/fftw3/rdft/khc2hc-dit.c                   |   33 +
 src/fftw3/rdft/khc2r.c                        |   29 +
 src/fftw3/rdft/kr2hc.c                        |   29 +
 src/fftw3/rdft/kr2r.c                         |   28 +
 src/fftw3/rdft/nop2.c                         |   90 +
 src/fftw3/rdft/plan2.c                        |   33 +
 src/fftw3/rdft/problem2.c                     |  142 +
 src/fftw3/rdft/rader-hc2hc.c                  |  513 +++
 src/fftw3/rdft/rank-geq2-rdft2.c              |  243 ++
 src/fftw3/rdft/rank0-rdft2.c                  |  195 ++
 src/fftw3/rdft/rbuffered.c                    |  299 ++
 src/fftw3/rdft/rconf.c                        |   62 +
 src/fftw3/rdft/rdft-dht.c                     |  223 ++
 src/fftw3/rdft/rdft.h                         |  161 +
 src/fftw3/rdft/rdft2-inplace-strides.c        |   61 +
 src/fftw3/rdft/rdft2-radix2.c                 |  479 +++
 src/fftw3/rdft/rdft2-strides.c                |   38 +
 src/fftw3/rdft/rdft2-tensor-max-index.c       |   44 +
 src/fftw3/rdft/rdirect.c                      |  259 ++
 src/fftw3/rdft/rgeneric.c                     |  371 +++
 src/fftw3/rdft/rindirect.c                    |  242 ++
 src/fftw3/rdft/rnop.c                         |   86 +
 src/fftw3/rdft/rplan.c                        |   33 +
 src/fftw3/rdft/rproblem.c                     |  235 ++
 src/fftw3/rdft/rrank-geq2.c                   |  216 ++
 src/fftw3/rdft/rrank0.c                       |  194 ++
 src/fftw3/rdft/rsolve.c                       |   31 +
 src/fftw3/rdft/rvrank-geq1.c                  |  221 ++
 src/fftw3/rdft/solve2.c                       |   31 +
 src/fftw3/rdft/vrank-geq1-rdft2.c             |  220 ++
 src/fftw3/reodft/redft00e-r2hc-pad.c          |  201 ++
 src/fftw3/reodft/redft00e-r2hc.c              |  216 ++
 src/fftw3/reodft/reoconf.c                    |   42 +
 src/fftw3/reodft/reodft.h                     |   41 +
 src/fftw3/reodft/reodft010e-r2hc.c            |  409 +++
 src/fftw3/reodft/reodft11e-r2hc-odd.c         |  304 ++
 src/fftw3/reodft/reodft11e-r2hc.c             |  295 ++
 src/fftw3/reodft/reodft11e-radix2.c           |  515 +++
 src/fftw3/reodft/rodft00e-r2hc-pad.c          |  200 ++
 src/fftw3/reodft/rodft00e-r2hc.c              |  212 ++
 src/im.def                                    |  196 ++
 src/im.rc                                     |   19 +
 src/im_attrib.cpp                             |  316 ++
 src/im_avi.def                                |    2 +
 src/im_avi.mak                                |   10 +
 src/im_bin.cpp                                |  111 +
 src/im_binfile.cpp                            |  644 ++++
 src/im_capture.def                            |   27 +
 src/im_capture.mak                            |   67 +
 src/im_capture_dx.cpp                         | 2255 +++++++++++++
 src/im_colorhsi.cpp                           |  243 ++
 src/im_colormode.cpp                          |   87 +
 src/im_colorutil.cpp                          |   27 +
 src/im_compress.cpp                           |   44 +
 src/im_convertbitmap.cpp                      |  121 +
 src/im_convertcolor.cpp                       |  883 +++++
 src/im_converttype.cpp                        |  551 ++++
 src/im_counter.cpp                            |  151 +
 src/im_datatype.cpp                           |   54 +
 src/im_dib.cpp                                | 1136 +++++++
 src/im_dibxbitmap.cpp                         |  181 ++
 src/im_ecw.def                                |    2 +
 src/im_ecw.mak                                |   16 +
 src/im_fftw.def                               |    7 +
 src/im_fftw.mak                               |   45 +
 src/im_fftw3.mak                              |  151 +
 src/im_file.cpp                               |  428 +++
 src/im_filebuffer.cpp                         |  695 ++++
 src/im_fileraw.cpp                            |   64 +
 src/im_format.cpp                             |  289 ++
 src/im_format_all.cpp                         |   37 +
 src/im_format_avi.cpp                         |  668 ++++
 src/im_format_bmp.cpp                         |  939 ++++++
 src/im_format_ecw.cpp                         |  375 +++
 src/im_format_gif.cpp                         | 1497 +++++++++
 src/im_format_ico.cpp                         |  650 ++++
 src/im_format_jp2.cpp                         |  493 +++
 src/im_format_jpeg.cpp                        |  820 +++++
 src/im_format_krn.cpp                         |  377 +++
 src/im_format_led.cpp                         |  360 +++
 src/im_format_pcx.cpp                         |  701 ++++
 src/im_format_png.cpp                         |  910 ++++++
 src/im_format_pnm.cpp                         |  502 +++
 src/im_format_ras.cpp                         |  598 ++++
 src/im_format_raw.cpp                         |  270 ++
 src/im_format_sgi.cpp                         |  607 ++++
 src/im_format_tga.cpp                         | 1104 +++++++
 src/im_format_tiff.cpp                        | 1421 ++++++++
 src/im_format_wmv.cpp                         | 1619 ++++++++++
 src/im_image.cpp                              |  746 +++++
 src/im_jp2.def                                |    2 +
 src/im_jp2.mak                                |   48 +
 src/im_lib.cpp                                |   34 +
 src/im_lua3.c                                 | 1297 ++++++++
 src/im_palette.cpp                            |  551 ++++
 src/im_process.def                            |  162 +
 src/im_process.mak                            |   36 +
 src/im_rgb2map.cpp                            |  964 ++++++
 src/im_str.cpp                                |   67 +
 src/im_sysfile_unix.cpp                       |  211 ++
 src/im_sysfile_win32.cpp                      |  202 ++
 src/im_wmv.def                                |    2 +
 src/im_wmv.mak                                |   23 +
 src/imlua3.def                                |    2 +
 src/imlua3.mak                                |   12 +
 src/imlua5.mak                                |   15 +
 src/imlua_avi.mak                             |   17 +
 src/imlua_capture5.mak                        |   17 +
 src/imlua_fftw5.mak                           |   18 +
 src/imlua_jp2.mak                             |   17 +
 src/imlua_process5.mak                        |   18 +
 src/imlua_wmv.mak                             |   17 +
 src/jas_binfile.c                             |   97 +
 src/libexif/_stdint.h                         |   19 +
 src/libexif/canon/exif-mnote-data-canon.c     |  281 ++
 src/libexif/canon/exif-mnote-data-canon.h     |   45 +
 src/libexif/canon/mnote-canon-entry.c         |  590 ++++
 src/libexif/canon/mnote-canon-entry.h         |   43 +
 src/libexif/canon/mnote-canon-tag.c           |   75 +
 src/libexif/canon/mnote-canon-tag.h           |   52 +
 src/libexif/config.h                          |   12 +
 src/libexif/exif-byte-order.c                 |   39 +
 src/libexif/exif-byte-order.h                 |   39 +
 src/libexif/exif-content.c                    |  209 ++
 src/libexif/exif-content.h                    |   76 +
 src/libexif/exif-data.c                       | 1039 ++++++
 src/libexif/exif-data.h                       |  104 +
 src/libexif/exif-entry.c                      | 1383 ++++++++
 src/libexif/exif-entry.h                      |   69 +
 src/libexif/exif-format.c                     |   72 +
 src/libexif/exif-format.h                     |   50 +
 src/libexif/exif-ifd.c                        |   49 +
 src/libexif/exif-ifd.h                        |   35 +
 src/libexif/exif-loader.c                     |  349 ++
 src/libexif/exif-loader.h                     |   57 +
 src/libexif/exif-log.c                        |  148 +
 src/libexif/exif-log.h                        |   75 +
 src/libexif/exif-mem.c                        |   95 +
 src/libexif/exif-mem.h                        |   54 +
 src/libexif/exif-mnote-data-priv.h            |   76 +
 src/libexif/exif-mnote-data.c                 |  158 +
 src/libexif/exif-mnote-data.h                 |   54 +
 src/libexif/exif-tag.c                        |  686 ++++
 src/libexif/exif-tag.h                        |  148 +
 src/libexif/exif-utils.c                      |  214 ++
 src/libexif/exif-utils.h                      |   81 +
 src/libexif/exif.c                            | 1274 ++++++++
 src/libexif/exif.h                            |   86 +
 src/libexif/i18n.h                            |   51 +
 src/libexif/olympus/exif-mnote-data-olympus.c |  403 +++
 src/libexif/olympus/exif-mnote-data-olympus.h |   45 +
 src/libexif/olympus/mnote-olympus-entry.c     |  540 ++++
 src/libexif/olympus/mnote-olympus-entry.h     |   43 +
 src/libexif/olympus/mnote-olympus-tag.c       |  155 +
 src/libexif/olympus/mnote-olympus-tag.h       |  130 +
 src/libexif/pentax/exif-mnote-data-pentax.c   |  209 ++
 src/libexif/pentax/exif-mnote-data-pentax.h   |   44 +
 src/libexif/pentax/mnote-pentax-entry.c       |  210 ++
 src/libexif/pentax/mnote-pentax-entry.h       |   43 +
 src/libexif/pentax/mnote-pentax-tag.c         |   94 +
 src/libexif/pentax/mnote-pentax-tag.h         |   74 +
 src/libjasper/base/jas_cm.c                   | 1282 ++++++++
 src/libjasper/base/jas_debug.c                |  137 +
 src/libjasper/base/jas_getopt.c               |  168 +
 src/libjasper/base/jas_icc.c                  | 1722 ++++++++++
 src/libjasper/base/jas_iccdata.c              |  517 +++
 src/libjasper/base/jas_image.c                | 1516 +++++++++
 src/libjasper/base/jas_init.c                 |  170 +
 src/libjasper/base/jas_malloc.c               |  131 +
 src/libjasper/base/jas_seq.c                  |  454 +++
 src/libjasper/base/jas_stream.c               | 1151 +++++++
 src/libjasper/base/jas_string.c               |   96 +
 src/libjasper/base/jas_tmr.c                  |  149 +
 src/libjasper/base/jas_tvp.c                  |  237 ++
 src/libjasper/base/jas_version.c              |   67 +
 src/libjasper/jasper/jas_cm.h                 |  266 ++
 src/libjasper/jasper/jas_config.h             |   32 +
 src/libjasper/jasper/jas_debug.h              |  114 +
 src/libjasper/jasper/jas_fix.h                |  358 +++
 src/libjasper/jasper/jas_getopt.h             |  131 +
 src/libjasper/jasper/jas_icc.h                |  407 +++
 src/libjasper/jasper/jas_image.h              |  617 ++++
 src/libjasper/jasper/jas_init.h               |   83 +
 src/libjasper/jasper/jas_malloc.h             |  124 +
 src/libjasper/jasper/jas_math.h               |  117 +
 src/libjasper/jasper/jas_seq.h                |  301 ++
 src/libjasper/jasper/jas_stream.h             |  466 +++
 src/libjasper/jasper/jas_string.h             |   95 +
 src/libjasper/jasper/jas_tmr.h                |  103 +
 src/libjasper/jasper/jas_tvp.h                |  151 +
 src/libjasper/jasper/jas_types.h              |  228 ++
 src/libjasper/jasper/jas_version.h            |  118 +
 src/libjasper/jasper/jasper.h                 |   93 +
 src/libjasper/jp2/jp2_cod.c                   |  980 ++++++
 src/libjasper/jp2/jp2_cod.h                   |  328 ++
 src/libjasper/jp2/jp2_dec.c                   |  702 ++++
 src/libjasper/jp2/jp2_dec.h                   |   85 +
 src/libjasper/jp2/jp2_enc.c                   |  469 +++
 src/libjasper/jpc/jpc_bs.c                    |  440 +++
 src/libjasper/jpc/jpc_bs.h                    |  231 ++
 src/libjasper/jpc/jpc_cod.h                   |   78 +
 src/libjasper/jpc/jpc_cs.c                    | 1644 ++++++++++
 src/libjasper/jpc/jpc_cs.h                    |  763 +++++
 src/libjasper/jpc/jpc_dec.c                   | 2348 ++++++++++++++
 src/libjasper/jpc/jpc_dec.h                   |  696 ++++
 src/libjasper/jpc/jpc_enc.c                   | 2626 +++++++++++++++
 src/libjasper/jpc/jpc_enc.h                   |  646 ++++
 src/libjasper/jpc/jpc_fix.h                   |  144 +
 src/libjasper/jpc/jpc_flt.h                   |   80 +
 src/libjasper/jpc/jpc_math.c                  |  121 +
 src/libjasper/jpc/jpc_math.h                  |   99 +
 src/libjasper/jpc/jpc_mct.c                   |  291 ++
 src/libjasper/jpc/jpc_mct.h                   |  111 +
 src/libjasper/jpc/jpc_mqcod.c                 |  179 ++
 src/libjasper/jpc/jpc_mqcod.h                 |  124 +
 src/libjasper/jpc/jpc_mqdec.c                 |  306 ++
 src/libjasper/jpc/jpc_mqdec.h                 |  271 ++
 src/libjasper/jpc/jpc_mqenc.c                 |  392 +++
 src/libjasper/jpc/jpc_mqenc.h                 |  236 ++
 src/libjasper/jpc/jpc_qmfb.c                  | 3152 ++++++++++++++++++
 src/libjasper/jpc/jpc_qmfb.h                  |  113 +
 src/libjasper/jpc/jpc_t1cod.c                 |  497 +++
 src/libjasper/jpc/jpc_t1cod.h                 |  295 ++
 src/libjasper/jpc/jpc_t1dec.c                 |  927 ++++++
 src/libjasper/jpc/jpc_t1dec.h                 |   88 +
 src/libjasper/jpc/jpc_t1enc.c                 |  964 ++++++
 src/libjasper/jpc/jpc_t1enc.h                 |   93 +
 src/libjasper/jpc/jpc_t2cod.c                 |  684 ++++
 src/libjasper/jpc/jpc_t2cod.h                 |  299 ++
 src/libjasper/jpc/jpc_t2dec.c                 |  581 ++++
 src/libjasper/jpc/jpc_t2dec.h                 |   95 +
 src/libjasper/jpc/jpc_t2enc.c                 |  655 ++++
 src/libjasper/jpc/jpc_t2enc.h                 |  106 +
 src/libjasper/jpc/jpc_tagtree.c               |  393 +++
 src/libjasper/jpc/jpc_tagtree.h               |  167 +
 src/libjasper/jpc/jpc_tsfb.c                  |  288 ++
 src/libjasper/jpc/jpc_tsfb.h                  |  138 +
 src/libjasper/jpc/jpc_util.c                  |  194 ++
 src/libjasper/jpc/jpc_util.h                  |   77 +
 src/libjpeg/jcapimin.c                        |  280 ++
 src/libjpeg/jcapistd.c                        |  161 +
 src/libjpeg/jccoefct.c                        |  449 +++
 src/libjpeg/jccolor.c                         |  459 +++
 src/libjpeg/jcdctmgr.c                        |  387 +++
 src/libjpeg/jchuff.c                          |  909 ++++++
 src/libjpeg/jchuff.h                          |   47 +
 src/libjpeg/jcinit.c                          |   72 +
 src/libjpeg/jcmainct.c                        |  293 ++
 src/libjpeg/jcmarker.c                        |  664 ++++
 src/libjpeg/jcmaster.c                        |  590 ++++
 src/libjpeg/jcomapi.c                         |  106 +
 src/libjpeg/jconfig.h                         |   26 +
 src/libjpeg/jcparam.c                         |  610 ++++
 src/libjpeg/jcphuff.c                         |  833 +++++
 src/libjpeg/jcprepct.c                        |  354 ++
 src/libjpeg/jcsample.c                        |  519 +++
 src/libjpeg/jctrans.c                         |  388 +++
 src/libjpeg/jdapimin.c                        |  395 +++
 src/libjpeg/jdapistd.c                        |  275 ++
 src/libjpeg/jdatadst.c                        |  151 +
 src/libjpeg/jdatasrc.c                        |  212 ++
 src/libjpeg/jdcoefct.c                        |  736 +++++
 src/libjpeg/jdcolor.c                         |  396 +++
 src/libjpeg/jdct.h                            |  176 +
 src/libjpeg/jddctmgr.c                        |  269 ++
 src/libjpeg/jdhuff.c                          |  684 ++++
 src/libjpeg/jdhuff.h                          |  201 ++
 src/libjpeg/jdinput.c                         |  381 +++
 src/libjpeg/jdmainct.c                        |  512 +++
 src/libjpeg/jdmarker.c                        | 1360 ++++++++
 src/libjpeg/jdmaster.c                        |  557 ++++
 src/libjpeg/jdmerge.c                         |  400 +++
 src/libjpeg/jdphuff.c                         |  668 ++++
 src/libjpeg/jdpostct.c                        |  290 ++
 src/libjpeg/jdsample.c                        |  478 +++
 src/libjpeg/jdtrans.c                         |  143 +
 src/libjpeg/jerror.c                          |  252 ++
 src/libjpeg/jerror.h                          |  291 ++
 src/libjpeg/jfdctflt.c                        |  168 +
 src/libjpeg/jfdctfst.c                        |  224 ++
 src/libjpeg/jfdctint.c                        |  283 ++
 src/libjpeg/jidctflt.c                        |  242 ++
 src/libjpeg/jidctfst.c                        |  368 +++
 src/libjpeg/jidctint.c                        |  389 +++
 src/libjpeg/jidctred.c                        |  398 +++
 src/libjpeg/jinclude.h                        |   97 +
 src/libjpeg/jmemmgr.c                         | 1118 +++++++
 src/libjpeg/jmemnobs.c                        |  109 +
 src/libjpeg/jmemsys.h                         |  198 ++
 src/libjpeg/jmorecfg.h                        |  364 +++
 src/libjpeg/jpegint.h                         |  392 +++
 src/libjpeg/jpeglib.h                         | 1096 +++++++
 src/libjpeg/jquant1.c                         |  856 +++++
 src/libjpeg/jquant2.c                         | 1310 ++++++++
 src/libjpeg/jutils.c                          |  179 ++
 src/libjpeg/jversion.h                        |   14 +
 src/liblzf/lzf.h                              |  101 +
 src/liblzf/lzfP.h                             |  157 +
 src/liblzf/lzf_c.c                            |  242 ++
 src/liblzf/lzf_d.c                            |  116 +
 src/libpng/png.c                              |  798 +++++
 src/libpng/png.h                              | 3540 ++++++++++++++++++++
 src/libpng/pngconf.h                          | 1483 +++++++++
 src/libpng/pngerror.c                         |  341 ++
 src/libpng/pnggccrd.c                         |  101 +
 src/libpng/pngget.c                           |  901 ++++++
 src/libpng/pngmem.c                           |  608 ++++
 src/libpng/pngpread.c                         | 1584 +++++++++
 src/libpng/pngread.c                          | 1472 +++++++++
 src/libpng/pngrio.c                           |  167 +
 src/libpng/pngrtran.c                         | 4284 +++++++++++++++++++++++++
 src/libpng/pngrutil.c                         | 3163 ++++++++++++++++++
 src/libpng/pngset.c                           | 1246 +++++++
 src/libpng/pngtrans.c                         |  662 ++++
 src/libpng/pngvcrd.c                          |    1 +
 src/libpng/pngwio.c                           |  234 ++
 src/libpng/pngwrite.c                         | 1514 +++++++++
 src/libpng/pngwtran.c                         |  572 ++++
 src/libpng/pngwutil.c                         | 2792 ++++++++++++++++
 src/libtiff/t4.h                              |  285 ++
 src/libtiff/tif_aux.c                         |  267 ++
 src/libtiff/tif_close.c                       |  119 +
 src/libtiff/tif_codec.c                       |  150 +
 src/libtiff/tif_color.c                       |  275 ++
 src/libtiff/tif_compress.c                    |  286 ++
 src/libtiff/tif_config.h                      |  263 ++
 src/libtiff/tif_dir.c                         | 1350 ++++++++
 src/libtiff/tif_dir.h                         |  199 ++
 src/libtiff/tif_dirinfo.c                     |  846 +++++
 src/libtiff/tif_dirread.c                     | 1789 +++++++++++
 src/libtiff/tif_dirwrite.c                    | 1243 +++++++
 src/libtiff/tif_dumpmode.c                    |  117 +
 src/libtiff/tif_error.c                       |   73 +
 src/libtiff/tif_extension.c                   |  111 +
 src/libtiff/tif_fax3.c                        | 1566 +++++++++
 src/libtiff/tif_fax3.h                        |  525 +++
 src/libtiff/tif_fax3sm.c                      | 1253 ++++++++
 src/libtiff/tif_flush.c                       |   67 +
 src/libtiff/tif_getimage.c                    | 2598 +++++++++++++++
 src/libtiff/tif_jpeg.c                        | 1946 +++++++++++
 src/libtiff/tif_luv.c                         | 1606 +++++++++
 src/libtiff/tif_lzw.c                         | 1084 +++++++
 src/libtiff/tif_next.c                        |  144 +
 src/libtiff/tif_ojpeg.c                       | 2629 +++++++++++++++
 src/libtiff/tif_open.c                        |  683 ++++
 src/libtiff/tif_packbits.c                    |  293 ++
 src/libtiff/tif_pixarlog.c                    | 1342 ++++++++
 src/libtiff/tif_predict.c                     |  626 ++++
 src/libtiff/tif_predict.h                     |   64 +
 src/libtiff/tif_print.c                       |  639 ++++
 src/libtiff/tif_read.c                        |  650 ++++
 src/libtiff/tif_strip.c                       |  294 ++
 src/libtiff/tif_swab.c                        |  235 ++
 src/libtiff/tif_thunder.c                     |  158 +
 src/libtiff/tif_tile.c                        |  273 ++
 src/libtiff/tif_version.c                     |   33 +
 src/libtiff/tif_warning.c                     |   74 +
 src/libtiff/tif_write.c                       |  725 +++++
 src/libtiff/tif_zip.c                         |  378 +++
 src/libtiff/tiff.h                            |  647 ++++
 src/libtiff/tiffio.h                          |  515 +++
 src/libtiff/tiffiop.h                         |  323 ++
 src/libtiff/tiffvers.h                        |    9 +
 src/libtiff/uvcode.h                          |  173 +
 src/lua5/.cvsignore                           |    9 +
 src/lua5/im_fftw.lua                          |   48 +
 src/lua5/im_process.lua                       |  326 ++
 src/lua5/imlua.c                              |  246 ++
 src/lua5/imlua.def                            |   24 +
 src/lua5/imlua_aux.c                          |  255 ++
 src/lua5/imlua_aux.h                          |   82 +
 src/lua5/imlua_avi.c                          |   44 +
 src/lua5/imlua_avi.def                        |    4 +
 src/lua5/imlua_capture.c                      |  421 +++
 src/lua5/imlua_capture.def                    |    5 +
 src/lua5/imlua_convert.c                      |   79 +
 src/lua5/imlua_fftw.c                         |  162 +
 src/lua5/imlua_fftw.def                       |    4 +
 src/lua5/imlua_file.c                         |  661 ++++
 src/lua5/imlua_image.c                        | 1061 ++++++
 src/lua5/imlua_image.h                        |   38 +
 src/lua5/imlua_jp2.c                          |   44 +
 src/lua5/imlua_jp2.def                        |    4 +
 src/lua5/imlua_kernel.c                       |  182 ++
 src/lua5/imlua_palette.c                      |  399 +++
 src/lua5/imlua_palette.h                      |   32 +
 src/lua5/imlua_process.c                      | 3091 ++++++++++++++++++
 src/lua5/imlua_process.def                    |    4 +
 src/lua5/imlua_util.c                         |  279 ++
 src/lua5/imlua_wmv.c                          |   44 +
 src/lua5/imlua_wmv.def                        |    4 +
 src/make_uname                                |   13 +
 src/make_uname.bat                            |   73 +
 src/old_im.cpp                                |  440 +++
 src/old_imcolor.c                             |   75 +
 src/old_imresize.c                            |  117 +
 src/process/im_analyze.cpp                    | 1262 ++++++++
 src/process/im_arithmetic_bin.cpp             |  503 +++
 src/process/im_arithmetic_un.cpp              |  210 ++
 src/process/im_canny.cpp                      |  254 ++
 src/process/im_color.cpp                      |  255 ++
 src/process/im_convolve.cpp                   | 1512 +++++++++
 src/process/im_convolve_rank.cpp              |  701 ++++
 src/process/im_distance.cpp                   |  512 +++
 src/process/im_effects.cpp                    |   86 +
 src/process/im_fft.cpp                        |  218 ++
 src/process/im_geometric.cpp                  |  724 +++++
 src/process/im_histogram.cpp                  |  105 +
 src/process/im_houghline.cpp                  |  435 +++
 src/process/im_kernel.cpp                     |  293 ++
 src/process/im_logic.cpp                      |  136 +
 src/process/im_morphology_bin.cpp             |  317 ++
 src/process/im_morphology_gray.cpp            |  231 ++
 src/process/im_quantize.cpp                   |   65 +
 src/process/im_render.cpp                     |  532 +++
 src/process/im_resize.cpp                     |  332 ++
 src/process/im_statistics.cpp                 |  341 ++
 src/process/im_threshold.cpp                  |  391 +++
 src/process/im_tonegamut.cpp                  |  322 ++
 src/tecmake_compact.mak                       | 1080 +++++++
 src/tiff_binfile.c                            |  139 +
 src/zlib/adler32.c                            |  149 +
 src/zlib/compress.c                           |   79 +
 src/zlib/crc32.c                              |  423 +++
 src/zlib/crc32.h                              |  441 +++
 src/zlib/deflate.c                            | 1736 ++++++++++
 src/zlib/deflate.h                            |  331 ++
 src/zlib/gzio.c                               | 1026 ++++++
 src/zlib/infback.c                            |  623 ++++
 src/zlib/inffast.c                            |  318 ++
 src/zlib/inffast.h                            |   11 +
 src/zlib/inffixed.h                           |   94 +
 src/zlib/inflate.c                            | 1368 ++++++++
 src/zlib/inflate.h                            |  115 +
 src/zlib/inftrees.c                           |  329 ++
 src/zlib/inftrees.h                           |   55 +
 src/zlib/trees.c                              | 1219 +++++++
 src/zlib/trees.h                              |  128 +
 src/zlib/uncompr.c                            |   61 +
 src/zlib/zconf.h                              |  332 ++
 src/zlib/zlib.h                               | 1357 ++++++++
 src/zlib/zutil.c                              |  318 ++
 src/zlib/zutil.h                              |  269 ++
 833 files changed, 286629 insertions(+)
 create mode 100755 src/.cvsignore
 create mode 100644 src/COPYRIGHT
 create mode 100644 src/Makefile
 create mode 100644 src/README
 create mode 100644 src/config.mak
 create mode 100644 src/fftw/config.c
 create mode 100644 src/fftw/config.h
 create mode 100644 src/fftw/executor.c
 create mode 100644 src/fftw/fftw-int.h
 create mode 100644 src/fftw/fftw.h
 create mode 100644 src/fftw/fftwnd.c
 create mode 100644 src/fftw/fn_1.c
 create mode 100644 src/fftw/fn_10.c
 create mode 100644 src/fftw/fn_11.c
 create mode 100644 src/fftw/fn_12.c
 create mode 100644 src/fftw/fn_13.c
 create mode 100644 src/fftw/fn_14.c
 create mode 100644 src/fftw/fn_15.c
 create mode 100644 src/fftw/fn_16.c
 create mode 100644 src/fftw/fn_2.c
 create mode 100644 src/fftw/fn_3.c
 create mode 100644 src/fftw/fn_32.c
 create mode 100644 src/fftw/fn_4.c
 create mode 100644 src/fftw/fn_5.c
 create mode 100644 src/fftw/fn_6.c
 create mode 100644 src/fftw/fn_64.c
 create mode 100644 src/fftw/fn_7.c
 create mode 100644 src/fftw/fn_8.c
 create mode 100644 src/fftw/fn_9.c
 create mode 100644 src/fftw/fni_1.c
 create mode 100644 src/fftw/fni_10.c
 create mode 100644 src/fftw/fni_11.c
 create mode 100644 src/fftw/fni_12.c
 create mode 100644 src/fftw/fni_13.c
 create mode 100644 src/fftw/fni_14.c
 create mode 100644 src/fftw/fni_15.c
 create mode 100644 src/fftw/fni_16.c
 create mode 100644 src/fftw/fni_2.c
 create mode 100644 src/fftw/fni_3.c
 create mode 100644 src/fftw/fni_32.c
 create mode 100644 src/fftw/fni_4.c
 create mode 100644 src/fftw/fni_5.c
 create mode 100644 src/fftw/fni_6.c
 create mode 100644 src/fftw/fni_64.c
 create mode 100644 src/fftw/fni_7.c
 create mode 100644 src/fftw/fni_8.c
 create mode 100644 src/fftw/fni_9.c
 create mode 100644 src/fftw/ftw_10.c
 create mode 100644 src/fftw/ftw_16.c
 create mode 100644 src/fftw/ftw_2.c
 create mode 100644 src/fftw/ftw_3.c
 create mode 100644 src/fftw/ftw_32.c
 create mode 100644 src/fftw/ftw_4.c
 create mode 100644 src/fftw/ftw_5.c
 create mode 100644 src/fftw/ftw_6.c
 create mode 100644 src/fftw/ftw_64.c
 create mode 100644 src/fftw/ftw_7.c
 create mode 100644 src/fftw/ftw_8.c
 create mode 100644 src/fftw/ftw_9.c
 create mode 100644 src/fftw/ftwi_10.c
 create mode 100644 src/fftw/ftwi_16.c
 create mode 100644 src/fftw/ftwi_2.c
 create mode 100644 src/fftw/ftwi_3.c
 create mode 100644 src/fftw/ftwi_32.c
 create mode 100644 src/fftw/ftwi_4.c
 create mode 100644 src/fftw/ftwi_5.c
 create mode 100644 src/fftw/ftwi_6.c
 create mode 100644 src/fftw/ftwi_64.c
 create mode 100644 src/fftw/ftwi_7.c
 create mode 100644 src/fftw/ftwi_8.c
 create mode 100644 src/fftw/ftwi_9.c
 create mode 100644 src/fftw/generic.c
 create mode 100644 src/fftw/malloc.c
 create mode 100644 src/fftw/planner.c
 create mode 100644 src/fftw/putils.c
 create mode 100644 src/fftw/rader.c
 create mode 100644 src/fftw/timer.c
 create mode 100644 src/fftw/twiddle.c
 create mode 100644 src/fftw/wisdom.c
 create mode 100644 src/fftw/wisdomio.c
 create mode 100644 src/fftw3/api/api.h
 create mode 100644 src/fftw3/api/apiplan.c
 create mode 100644 src/fftw3/api/config.h
 create mode 100644 src/fftw3/api/configure.c
 create mode 100644 src/fftw3/api/execute-dft-c2r.c
 create mode 100644 src/fftw3/api/execute-dft-r2c.c
 create mode 100644 src/fftw3/api/execute-dft.c
 create mode 100644 src/fftw3/api/execute-r2r.c
 create mode 100644 src/fftw3/api/execute-split-dft-c2r.c
 create mode 100644 src/fftw3/api/execute-split-dft-r2c.c
 create mode 100644 src/fftw3/api/execute-split-dft.c
 create mode 100644 src/fftw3/api/execute.c
 create mode 100644 src/fftw3/api/export-wisdom-to-file.c
 create mode 100644 src/fftw3/api/export-wisdom-to-string.c
 create mode 100644 src/fftw3/api/export-wisdom.c
 create mode 100644 src/fftw3/api/extract-reim.c
 create mode 100644 src/fftw3/api/f77api.c
 create mode 100644 src/fftw3/api/f77funcs.h
 create mode 100644 src/fftw3/api/fftw3.h
 create mode 100644 src/fftw3/api/flops.c
 create mode 100644 src/fftw3/api/forget-wisdom.c
 create mode 100644 src/fftw3/api/import-system-wisdom.c
 create mode 100644 src/fftw3/api/import-wisdom-from-file.c
 create mode 100644 src/fftw3/api/import-wisdom-from-string.c
 create mode 100644 src/fftw3/api/import-wisdom.c
 create mode 100644 src/fftw3/api/map-r2r-kind.c
 create mode 100644 src/fftw3/api/mapflags.c
 create mode 100644 src/fftw3/api/mkprinter-file.c
 create mode 100644 src/fftw3/api/mktensor-iodims.c
 create mode 100644 src/fftw3/api/mktensor-rowmajor.c
 create mode 100644 src/fftw3/api/plan-dft-1d.c
 create mode 100644 src/fftw3/api/plan-dft-2d.c
 create mode 100644 src/fftw3/api/plan-dft-3d.c
 create mode 100644 src/fftw3/api/plan-dft-c2r-1d.c
 create mode 100644 src/fftw3/api/plan-dft-c2r-2d.c
 create mode 100644 src/fftw3/api/plan-dft-c2r-3d.c
 create mode 100644 src/fftw3/api/plan-dft-c2r.c
 create mode 100644 src/fftw3/api/plan-dft-r2c-1d.c
 create mode 100644 src/fftw3/api/plan-dft-r2c-2d.c
 create mode 100644 src/fftw3/api/plan-dft-r2c-3d.c
 create mode 100644 src/fftw3/api/plan-dft-r2c.c
 create mode 100644 src/fftw3/api/plan-dft.c
 create mode 100644 src/fftw3/api/plan-guru-dft-c2r.c
 create mode 100644 src/fftw3/api/plan-guru-dft-r2c.c
 create mode 100644 src/fftw3/api/plan-guru-dft.c
 create mode 100644 src/fftw3/api/plan-guru-r2r.c
 create mode 100644 src/fftw3/api/plan-guru-split-dft-c2r.c
 create mode 100644 src/fftw3/api/plan-guru-split-dft-r2c.c
 create mode 100644 src/fftw3/api/plan-guru-split-dft.c
 create mode 100644 src/fftw3/api/plan-many-dft-c2r.c
 create mode 100644 src/fftw3/api/plan-many-dft-r2c.c
 create mode 100644 src/fftw3/api/plan-many-dft.c
 create mode 100644 src/fftw3/api/plan-many-r2r.c
 create mode 100644 src/fftw3/api/plan-r2r-1d.c
 create mode 100644 src/fftw3/api/plan-r2r-2d.c
 create mode 100644 src/fftw3/api/plan-r2r-3d.c
 create mode 100644 src/fftw3/api/plan-r2r.c
 create mode 100644 src/fftw3/api/print-plan.c
 create mode 100644 src/fftw3/api/rdft2-pad.c
 create mode 100644 src/fftw3/api/the-planner.c
 create mode 100644 src/fftw3/api/version.c
 create mode 100644 src/fftw3/api/x77.h
 create mode 100644 src/fftw3/dft/buffered.c
 create mode 100644 src/fftw3/dft/codelet-dft.h
 create mode 100644 src/fftw3/dft/codelets/f.h
 create mode 100644 src/fftw3/dft/codelets/inplace/icodlist.c
 create mode 100644 src/fftw3/dft/codelets/inplace/q1_2.c
 create mode 100644 src/fftw3/dft/codelets/inplace/q1_3.c
 create mode 100644 src/fftw3/dft/codelets/inplace/q1_4.c
 create mode 100644 src/fftw3/dft/codelets/inplace/q1_5.c
 create mode 100644 src/fftw3/dft/codelets/inplace/q1_6.c
 create mode 100644 src/fftw3/dft/codelets/inplace/q1_8.c
 create mode 100644 src/fftw3/dft/codelets/n.c
 create mode 100644 src/fftw3/dft/codelets/n.h
 create mode 100644 src/fftw3/dft/codelets/q.h
 create mode 100644 src/fftw3/dft/codelets/standard/m1_16.c
 create mode 100644 src/fftw3/dft/codelets/standard/m1_32.c
 create mode 100644 src/fftw3/dft/codelets/standard/m1_64.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_10.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_11.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_12.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_13.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_14.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_15.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_16.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_2.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_3.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_4.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_5.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_6.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_7.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_8.c
 create mode 100644 src/fftw3/dft/codelets/standard/n1_9.c
 create mode 100644 src/fftw3/dft/codelets/standard/scodlist.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_10.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_12.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_15.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_16.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_2.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_3.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_32.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_4.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_5.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_6.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_64.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_7.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_8.c
 create mode 100644 src/fftw3/dft/codelets/standard/t1_9.c
 create mode 100644 src/fftw3/dft/codelets/standard/t2_16.c
 create mode 100644 src/fftw3/dft/codelets/standard/t2_32.c
 create mode 100644 src/fftw3/dft/codelets/standard/t2_4.c
 create mode 100644 src/fftw3/dft/codelets/standard/t2_64.c
 create mode 100644 src/fftw3/dft/codelets/standard/t2_8.c
 create mode 100644 src/fftw3/dft/codelets/t.c
 create mode 100644 src/fftw3/dft/codelets/t.h
 create mode 100644 src/fftw3/dft/conf.c
 create mode 100644 src/fftw3/dft/ct-dif.c
 create mode 100644 src/fftw3/dft/ct-dit.c
 create mode 100644 src/fftw3/dft/ct-ditbuf.c
 create mode 100644 src/fftw3/dft/ct-ditf.c
 create mode 100644 src/fftw3/dft/ct.c
 create mode 100644 src/fftw3/dft/ct.h
 create mode 100644 src/fftw3/dft/dft.h
 create mode 100644 src/fftw3/dft/direct.c
 create mode 100644 src/fftw3/dft/generic.c
 create mode 100644 src/fftw3/dft/indirect.c
 create mode 100644 src/fftw3/dft/kdft-dif.c
 create mode 100644 src/fftw3/dft/kdft-difsq.c
 create mode 100644 src/fftw3/dft/kdft-dit.c
 create mode 100644 src/fftw3/dft/kdft.c
 create mode 100644 src/fftw3/dft/nop.c
 create mode 100644 src/fftw3/dft/plan.c
 create mode 100644 src/fftw3/dft/problem.c
 create mode 100644 src/fftw3/dft/rader-omega.c
 create mode 100644 src/fftw3/dft/rader.c
 create mode 100644 src/fftw3/dft/rank-geq2.c
 create mode 100644 src/fftw3/dft/rank0.c
 create mode 100644 src/fftw3/dft/solve.c
 create mode 100644 src/fftw3/dft/vrank-geq1.c
 create mode 100644 src/fftw3/dft/vrank2-transpose.c
 create mode 100644 src/fftw3/dft/vrank3-transpose.c
 create mode 100644 src/fftw3/dft/zero.c
 create mode 100644 src/fftw3/kernel/align.c
 create mode 100644 src/fftw3/kernel/alloc.c
 create mode 100644 src/fftw3/kernel/assert.c
 create mode 100644 src/fftw3/kernel/awake.c
 create mode 100644 src/fftw3/kernel/cycle.h
 create mode 100644 src/fftw3/kernel/debug.c
 create mode 100644 src/fftw3/kernel/hash.c
 create mode 100644 src/fftw3/kernel/iabs.c
 create mode 100644 src/fftw3/kernel/ifftw.h
 create mode 100644 src/fftw3/kernel/kbuffered.c
 create mode 100644 src/fftw3/kernel/kct.c
 create mode 100644 src/fftw3/kernel/kplan.c
 create mode 100644 src/fftw3/kernel/kproblem.c
 create mode 100644 src/fftw3/kernel/krader.c
 create mode 100644 src/fftw3/kernel/md5-1.c
 create mode 100644 src/fftw3/kernel/md5.c
 create mode 100644 src/fftw3/kernel/minmax.c
 create mode 100644 src/fftw3/kernel/ops.c
 create mode 100644 src/fftw3/kernel/pickdim.c
 create mode 100644 src/fftw3/kernel/planner.c
 create mode 100644 src/fftw3/kernel/primes.c
 create mode 100644 src/fftw3/kernel/print.c
 create mode 100644 src/fftw3/kernel/scan.c
 create mode 100644 src/fftw3/kernel/solver.c
 create mode 100644 src/fftw3/kernel/solvtab.c
 create mode 100644 src/fftw3/kernel/square.c
 create mode 100644 src/fftw3/kernel/stride.c
 create mode 100644 src/fftw3/kernel/tensor.c
 create mode 100644 src/fftw3/kernel/tensor1.c
 create mode 100644 src/fftw3/kernel/tensor2.c
 create mode 100644 src/fftw3/kernel/tensor4.c
 create mode 100644 src/fftw3/kernel/tensor5.c
 create mode 100644 src/fftw3/kernel/tensor7.c
 create mode 100644 src/fftw3/kernel/tensor8.c
 create mode 100644 src/fftw3/kernel/tensor9.c
 create mode 100644 src/fftw3/kernel/timer.c
 create mode 100644 src/fftw3/kernel/transpose.c
 create mode 100644 src/fftw3/kernel/trig.c
 create mode 100644 src/fftw3/kernel/trig1.c
 create mode 100644 src/fftw3/kernel/twiddle.c
 create mode 100644 src/fftw3/rdft/buffered2.c
 create mode 100644 src/fftw3/rdft/codelet-rdft.h
 create mode 100644 src/fftw3/rdft/codelets/hb.h
 create mode 100644 src/fftw3/rdft/codelets/hc2r.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r.h
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_10.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_12.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_15.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_16.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_2.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_3.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_32.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_4.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_5.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_6.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_64.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_7.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_8.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hb_9.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_10.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_12.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_15.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_16.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_2.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_3.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_32.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_4.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_5.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_6.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_7.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_8.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2rIII_9.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_10.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_11.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_12.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_13.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_14.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_15.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_16.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_3.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_32.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_4.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_5.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_6.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_7.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_8.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hc2r_9.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/hcodlist.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/mhc2rIII_32.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/mhc2rIII_64.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/mhc2r_128.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/mhc2r_32.c
 create mode 100644 src/fftw3/rdft/codelets/hc2r/mhc2r_64.c
 create mode 100644 src/fftw3/rdft/codelets/hc2rIII.h
 create mode 100644 src/fftw3/rdft/codelets/hf.h
 create mode 100644 src/fftw3/rdft/codelets/hfb.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc.h
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf2_16.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf2_32.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf2_4.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf2_64.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf2_8.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_10.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_12.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_15.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_16.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_2.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_3.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_32.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_4.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_5.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_6.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_64.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_7.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_8.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/hf_9.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/mr2hcII_32.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/mr2hcII_64.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/mr2hc_128.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/mr2hc_32.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/mr2hc_64.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_10.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_12.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_15.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_16.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_2.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_3.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_32.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_4.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_5.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_6.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_7.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_8.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hcII_9.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_10.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_11.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_12.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_13.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_14.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_15.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_16.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_2.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_3.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_32.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_4.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_5.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_6.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_7.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_8.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/r2hc_9.c
 create mode 100644 src/fftw3/rdft/codelets/r2hc/rhcodlist.c
 create mode 100644 src/fftw3/rdft/codelets/r2hcII.h
 create mode 100644 src/fftw3/rdft/codelets/r2r.c
 create mode 100644 src/fftw3/rdft/codelets/r2r.h
 create mode 100644 src/fftw3/rdft/codelets/r2r/e01_8.c
 create mode 100644 src/fftw3/rdft/codelets/r2r/e10_8.c
 create mode 100644 src/fftw3/rdft/codelets/r2r/rrcodlist.c
 create mode 100644 src/fftw3/rdft/dft-r2hc.c
 create mode 100644 src/fftw3/rdft/dht-r2hc.c
 create mode 100644 src/fftw3/rdft/dht-rader.c
 create mode 100644 src/fftw3/rdft/direct2.c
 create mode 100644 src/fftw3/rdft/hc2hc-buf.c
 create mode 100644 src/fftw3/rdft/hc2hc-dif.c
 create mode 100644 src/fftw3/rdft/hc2hc-dit.c
 create mode 100644 src/fftw3/rdft/hc2hc.c
 create mode 100644 src/fftw3/rdft/hc2hc.h
 create mode 100644 src/fftw3/rdft/khc2hc-dif.c
 create mode 100644 src/fftw3/rdft/khc2hc-dit.c
 create mode 100644 src/fftw3/rdft/khc2r.c
 create mode 100644 src/fftw3/rdft/kr2hc.c
 create mode 100644 src/fftw3/rdft/kr2r.c
 create mode 100644 src/fftw3/rdft/nop2.c
 create mode 100644 src/fftw3/rdft/plan2.c
 create mode 100644 src/fftw3/rdft/problem2.c
 create mode 100644 src/fftw3/rdft/rader-hc2hc.c
 create mode 100644 src/fftw3/rdft/rank-geq2-rdft2.c
 create mode 100644 src/fftw3/rdft/rank0-rdft2.c
 create mode 100644 src/fftw3/rdft/rbuffered.c
 create mode 100644 src/fftw3/rdft/rconf.c
 create mode 100644 src/fftw3/rdft/rdft-dht.c
 create mode 100644 src/fftw3/rdft/rdft.h
 create mode 100644 src/fftw3/rdft/rdft2-inplace-strides.c
 create mode 100644 src/fftw3/rdft/rdft2-radix2.c
 create mode 100644 src/fftw3/rdft/rdft2-strides.c
 create mode 100644 src/fftw3/rdft/rdft2-tensor-max-index.c
 create mode 100644 src/fftw3/rdft/rdirect.c
 create mode 100644 src/fftw3/rdft/rgeneric.c
 create mode 100644 src/fftw3/rdft/rindirect.c
 create mode 100644 src/fftw3/rdft/rnop.c
 create mode 100644 src/fftw3/rdft/rplan.c
 create mode 100644 src/fftw3/rdft/rproblem.c
 create mode 100644 src/fftw3/rdft/rrank-geq2.c
 create mode 100644 src/fftw3/rdft/rrank0.c
 create mode 100644 src/fftw3/rdft/rsolve.c
 create mode 100644 src/fftw3/rdft/rvrank-geq1.c
 create mode 100644 src/fftw3/rdft/solve2.c
 create mode 100644 src/fftw3/rdft/vrank-geq1-rdft2.c
 create mode 100644 src/fftw3/reodft/redft00e-r2hc-pad.c
 create mode 100644 src/fftw3/reodft/redft00e-r2hc.c
 create mode 100644 src/fftw3/reodft/reoconf.c
 create mode 100644 src/fftw3/reodft/reodft.h
 create mode 100644 src/fftw3/reodft/reodft010e-r2hc.c
 create mode 100644 src/fftw3/reodft/reodft11e-r2hc-odd.c
 create mode 100644 src/fftw3/reodft/reodft11e-r2hc.c
 create mode 100644 src/fftw3/reodft/reodft11e-radix2.c
 create mode 100644 src/fftw3/reodft/rodft00e-r2hc-pad.c
 create mode 100644 src/fftw3/reodft/rodft00e-r2hc.c
 create mode 100644 src/im.def
 create mode 100644 src/im.rc
 create mode 100644 src/im_attrib.cpp
 create mode 100644 src/im_avi.def
 create mode 100644 src/im_avi.mak
 create mode 100644 src/im_bin.cpp
 create mode 100644 src/im_binfile.cpp
 create mode 100644 src/im_capture.def
 create mode 100644 src/im_capture.mak
 create mode 100644 src/im_capture_dx.cpp
 create mode 100644 src/im_colorhsi.cpp
 create mode 100644 src/im_colormode.cpp
 create mode 100644 src/im_colorutil.cpp
 create mode 100644 src/im_compress.cpp
 create mode 100644 src/im_convertbitmap.cpp
 create mode 100644 src/im_convertcolor.cpp
 create mode 100644 src/im_converttype.cpp
 create mode 100644 src/im_counter.cpp
 create mode 100644 src/im_datatype.cpp
 create mode 100644 src/im_dib.cpp
 create mode 100644 src/im_dibxbitmap.cpp
 create mode 100644 src/im_ecw.def
 create mode 100644 src/im_ecw.mak
 create mode 100644 src/im_fftw.def
 create mode 100644 src/im_fftw.mak
 create mode 100644 src/im_fftw3.mak
 create mode 100644 src/im_file.cpp
 create mode 100644 src/im_filebuffer.cpp
 create mode 100644 src/im_fileraw.cpp
 create mode 100644 src/im_format.cpp
 create mode 100644 src/im_format_all.cpp
 create mode 100644 src/im_format_avi.cpp
 create mode 100644 src/im_format_bmp.cpp
 create mode 100644 src/im_format_ecw.cpp
 create mode 100644 src/im_format_gif.cpp
 create mode 100644 src/im_format_ico.cpp
 create mode 100644 src/im_format_jp2.cpp
 create mode 100644 src/im_format_jpeg.cpp
 create mode 100644 src/im_format_krn.cpp
 create mode 100644 src/im_format_led.cpp
 create mode 100644 src/im_format_pcx.cpp
 create mode 100644 src/im_format_png.cpp
 create mode 100644 src/im_format_pnm.cpp
 create mode 100644 src/im_format_ras.cpp
 create mode 100644 src/im_format_raw.cpp
 create mode 100644 src/im_format_sgi.cpp
 create mode 100644 src/im_format_tga.cpp
 create mode 100644 src/im_format_tiff.cpp
 create mode 100644 src/im_format_wmv.cpp
 create mode 100644 src/im_image.cpp
 create mode 100644 src/im_jp2.def
 create mode 100644 src/im_jp2.mak
 create mode 100644 src/im_lib.cpp
 create mode 100644 src/im_lua3.c
 create mode 100644 src/im_palette.cpp
 create mode 100644 src/im_process.def
 create mode 100644 src/im_process.mak
 create mode 100644 src/im_rgb2map.cpp
 create mode 100644 src/im_str.cpp
 create mode 100644 src/im_sysfile_unix.cpp
 create mode 100644 src/im_sysfile_win32.cpp
 create mode 100644 src/im_wmv.def
 create mode 100644 src/im_wmv.mak
 create mode 100644 src/imlua3.def
 create mode 100644 src/imlua3.mak
 create mode 100644 src/imlua5.mak
 create mode 100644 src/imlua_avi.mak
 create mode 100644 src/imlua_capture5.mak
 create mode 100644 src/imlua_fftw5.mak
 create mode 100644 src/imlua_jp2.mak
 create mode 100644 src/imlua_process5.mak
 create mode 100644 src/imlua_wmv.mak
 create mode 100644 src/jas_binfile.c
 create mode 100644 src/libexif/_stdint.h
 create mode 100644 src/libexif/canon/exif-mnote-data-canon.c
 create mode 100644 src/libexif/canon/exif-mnote-data-canon.h
 create mode 100644 src/libexif/canon/mnote-canon-entry.c
 create mode 100644 src/libexif/canon/mnote-canon-entry.h
 create mode 100644 src/libexif/canon/mnote-canon-tag.c
 create mode 100644 src/libexif/canon/mnote-canon-tag.h
 create mode 100644 src/libexif/config.h
 create mode 100644 src/libexif/exif-byte-order.c
 create mode 100644 src/libexif/exif-byte-order.h
 create mode 100644 src/libexif/exif-content.c
 create mode 100644 src/libexif/exif-content.h
 create mode 100644 src/libexif/exif-data.c
 create mode 100644 src/libexif/exif-data.h
 create mode 100644 src/libexif/exif-entry.c
 create mode 100644 src/libexif/exif-entry.h
 create mode 100644 src/libexif/exif-format.c
 create mode 100644 src/libexif/exif-format.h
 create mode 100644 src/libexif/exif-ifd.c
 create mode 100644 src/libexif/exif-ifd.h
 create mode 100644 src/libexif/exif-loader.c
 create mode 100644 src/libexif/exif-loader.h
 create mode 100644 src/libexif/exif-log.c
 create mode 100644 src/libexif/exif-log.h
 create mode 100644 src/libexif/exif-mem.c
 create mode 100644 src/libexif/exif-mem.h
 create mode 100644 src/libexif/exif-mnote-data-priv.h
 create mode 100644 src/libexif/exif-mnote-data.c
 create mode 100644 src/libexif/exif-mnote-data.h
 create mode 100644 src/libexif/exif-tag.c
 create mode 100644 src/libexif/exif-tag.h
 create mode 100644 src/libexif/exif-utils.c
 create mode 100644 src/libexif/exif-utils.h
 create mode 100644 src/libexif/exif.c
 create mode 100644 src/libexif/exif.h
 create mode 100644 src/libexif/i18n.h
 create mode 100644 src/libexif/olympus/exif-mnote-data-olympus.c
 create mode 100644 src/libexif/olympus/exif-mnote-data-olympus.h
 create mode 100644 src/libexif/olympus/mnote-olympus-entry.c
 create mode 100644 src/libexif/olympus/mnote-olympus-entry.h
 create mode 100644 src/libexif/olympus/mnote-olympus-tag.c
 create mode 100644 src/libexif/olympus/mnote-olympus-tag.h
 create mode 100644 src/libexif/pentax/exif-mnote-data-pentax.c
 create mode 100644 src/libexif/pentax/exif-mnote-data-pentax.h
 create mode 100644 src/libexif/pentax/mnote-pentax-entry.c
 create mode 100644 src/libexif/pentax/mnote-pentax-entry.h
 create mode 100644 src/libexif/pentax/mnote-pentax-tag.c
 create mode 100644 src/libexif/pentax/mnote-pentax-tag.h
 create mode 100644 src/libjasper/base/jas_cm.c
 create mode 100644 src/libjasper/base/jas_debug.c
 create mode 100644 src/libjasper/base/jas_getopt.c
 create mode 100644 src/libjasper/base/jas_icc.c
 create mode 100644 src/libjasper/base/jas_iccdata.c
 create mode 100644 src/libjasper/base/jas_image.c
 create mode 100644 src/libjasper/base/jas_init.c
 create mode 100644 src/libjasper/base/jas_malloc.c
 create mode 100644 src/libjasper/base/jas_seq.c
 create mode 100644 src/libjasper/base/jas_stream.c
 create mode 100644 src/libjasper/base/jas_string.c
 create mode 100644 src/libjasper/base/jas_tmr.c
 create mode 100644 src/libjasper/base/jas_tvp.c
 create mode 100644 src/libjasper/base/jas_version.c
 create mode 100644 src/libjasper/jasper/jas_cm.h
 create mode 100644 src/libjasper/jasper/jas_config.h
 create mode 100644 src/libjasper/jasper/jas_debug.h
 create mode 100644 src/libjasper/jasper/jas_fix.h
 create mode 100644 src/libjasper/jasper/jas_getopt.h
 create mode 100644 src/libjasper/jasper/jas_icc.h
 create mode 100644 src/libjasper/jasper/jas_image.h
 create mode 100644 src/libjasper/jasper/jas_init.h
 create mode 100644 src/libjasper/jasper/jas_malloc.h
 create mode 100644 src/libjasper/jasper/jas_math.h
 create mode 100644 src/libjasper/jasper/jas_seq.h
 create mode 100644 src/libjasper/jasper/jas_stream.h
 create mode 100644 src/libjasper/jasper/jas_string.h
 create mode 100644 src/libjasper/jasper/jas_tmr.h
 create mode 100644 src/libjasper/jasper/jas_tvp.h
 create mode 100644 src/libjasper/jasper/jas_types.h
 create mode 100644 src/libjasper/jasper/jas_version.h
 create mode 100644 src/libjasper/jasper/jasper.h
 create mode 100644 src/libjasper/jp2/jp2_cod.c
 create mode 100644 src/libjasper/jp2/jp2_cod.h
 create mode 100644 src/libjasper/jp2/jp2_dec.c
 create mode 100644 src/libjasper/jp2/jp2_dec.h
 create mode 100644 src/libjasper/jp2/jp2_enc.c
 create mode 100644 src/libjasper/jpc/jpc_bs.c
 create mode 100644 src/libjasper/jpc/jpc_bs.h
 create mode 100644 src/libjasper/jpc/jpc_cod.h
 create mode 100644 src/libjasper/jpc/jpc_cs.c
 create mode 100644 src/libjasper/jpc/jpc_cs.h
 create mode 100644 src/libjasper/jpc/jpc_dec.c
 create mode 100644 src/libjasper/jpc/jpc_dec.h
 create mode 100644 src/libjasper/jpc/jpc_enc.c
 create mode 100644 src/libjasper/jpc/jpc_enc.h
 create mode 100644 src/libjasper/jpc/jpc_fix.h
 create mode 100644 src/libjasper/jpc/jpc_flt.h
 create mode 100644 src/libjasper/jpc/jpc_math.c
 create mode 100644 src/libjasper/jpc/jpc_math.h
 create mode 100644 src/libjasper/jpc/jpc_mct.c
 create mode 100644 src/libjasper/jpc/jpc_mct.h
 create mode 100644 src/libjasper/jpc/jpc_mqcod.c
 create mode 100644 src/libjasper/jpc/jpc_mqcod.h
 create mode 100644 src/libjasper/jpc/jpc_mqdec.c
 create mode 100644 src/libjasper/jpc/jpc_mqdec.h
 create mode 100644 src/libjasper/jpc/jpc_mqenc.c
 create mode 100644 src/libjasper/jpc/jpc_mqenc.h
 create mode 100644 src/libjasper/jpc/jpc_qmfb.c
 create mode 100644 src/libjasper/jpc/jpc_qmfb.h
 create mode 100644 src/libjasper/jpc/jpc_t1cod.c
 create mode 100644 src/libjasper/jpc/jpc_t1cod.h
 create mode 100644 src/libjasper/jpc/jpc_t1dec.c
 create mode 100644 src/libjasper/jpc/jpc_t1dec.h
 create mode 100644 src/libjasper/jpc/jpc_t1enc.c
 create mode 100644 src/libjasper/jpc/jpc_t1enc.h
 create mode 100644 src/libjasper/jpc/jpc_t2cod.c
 create mode 100644 src/libjasper/jpc/jpc_t2cod.h
 create mode 100644 src/libjasper/jpc/jpc_t2dec.c
 create mode 100644 src/libjasper/jpc/jpc_t2dec.h
 create mode 100644 src/libjasper/jpc/jpc_t2enc.c
 create mode 100644 src/libjasper/jpc/jpc_t2enc.h
 create mode 100644 src/libjasper/jpc/jpc_tagtree.c
 create mode 100644 src/libjasper/jpc/jpc_tagtree.h
 create mode 100644 src/libjasper/jpc/jpc_tsfb.c
 create mode 100644 src/libjasper/jpc/jpc_tsfb.h
 create mode 100644 src/libjasper/jpc/jpc_util.c
 create mode 100644 src/libjasper/jpc/jpc_util.h
 create mode 100644 src/libjpeg/jcapimin.c
 create mode 100644 src/libjpeg/jcapistd.c
 create mode 100644 src/libjpeg/jccoefct.c
 create mode 100644 src/libjpeg/jccolor.c
 create mode 100644 src/libjpeg/jcdctmgr.c
 create mode 100644 src/libjpeg/jchuff.c
 create mode 100644 src/libjpeg/jchuff.h
 create mode 100644 src/libjpeg/jcinit.c
 create mode 100644 src/libjpeg/jcmainct.c
 create mode 100644 src/libjpeg/jcmarker.c
 create mode 100644 src/libjpeg/jcmaster.c
 create mode 100644 src/libjpeg/jcomapi.c
 create mode 100644 src/libjpeg/jconfig.h
 create mode 100644 src/libjpeg/jcparam.c
 create mode 100644 src/libjpeg/jcphuff.c
 create mode 100644 src/libjpeg/jcprepct.c
 create mode 100644 src/libjpeg/jcsample.c
 create mode 100644 src/libjpeg/jctrans.c
 create mode 100644 src/libjpeg/jdapimin.c
 create mode 100644 src/libjpeg/jdapistd.c
 create mode 100644 src/libjpeg/jdatadst.c
 create mode 100644 src/libjpeg/jdatasrc.c
 create mode 100644 src/libjpeg/jdcoefct.c
 create mode 100644 src/libjpeg/jdcolor.c
 create mode 100644 src/libjpeg/jdct.h
 create mode 100644 src/libjpeg/jddctmgr.c
 create mode 100644 src/libjpeg/jdhuff.c
 create mode 100644 src/libjpeg/jdhuff.h
 create mode 100644 src/libjpeg/jdinput.c
 create mode 100644 src/libjpeg/jdmainct.c
 create mode 100644 src/libjpeg/jdmarker.c
 create mode 100644 src/libjpeg/jdmaster.c
 create mode 100644 src/libjpeg/jdmerge.c
 create mode 100644 src/libjpeg/jdphuff.c
 create mode 100644 src/libjpeg/jdpostct.c
 create mode 100644 src/libjpeg/jdsample.c
 create mode 100644 src/libjpeg/jdtrans.c
 create mode 100644 src/libjpeg/jerror.c
 create mode 100644 src/libjpeg/jerror.h
 create mode 100644 src/libjpeg/jfdctflt.c
 create mode 100644 src/libjpeg/jfdctfst.c
 create mode 100644 src/libjpeg/jfdctint.c
 create mode 100644 src/libjpeg/jidctflt.c
 create mode 100644 src/libjpeg/jidctfst.c
 create mode 100644 src/libjpeg/jidctint.c
 create mode 100644 src/libjpeg/jidctred.c
 create mode 100644 src/libjpeg/jinclude.h
 create mode 100644 src/libjpeg/jmemmgr.c
 create mode 100644 src/libjpeg/jmemnobs.c
 create mode 100644 src/libjpeg/jmemsys.h
 create mode 100644 src/libjpeg/jmorecfg.h
 create mode 100644 src/libjpeg/jpegint.h
 create mode 100644 src/libjpeg/jpeglib.h
 create mode 100644 src/libjpeg/jquant1.c
 create mode 100644 src/libjpeg/jquant2.c
 create mode 100644 src/libjpeg/jutils.c
 create mode 100644 src/libjpeg/jversion.h
 create mode 100644 src/liblzf/lzf.h
 create mode 100644 src/liblzf/lzfP.h
 create mode 100644 src/liblzf/lzf_c.c
 create mode 100644 src/liblzf/lzf_d.c
 create mode 100644 src/libpng/png.c
 create mode 100644 src/libpng/png.h
 create mode 100644 src/libpng/pngconf.h
 create mode 100644 src/libpng/pngerror.c
 create mode 100644 src/libpng/pnggccrd.c
 create mode 100644 src/libpng/pngget.c
 create mode 100644 src/libpng/pngmem.c
 create mode 100644 src/libpng/pngpread.c
 create mode 100644 src/libpng/pngread.c
 create mode 100644 src/libpng/pngrio.c
 create mode 100644 src/libpng/pngrtran.c
 create mode 100644 src/libpng/pngrutil.c
 create mode 100644 src/libpng/pngset.c
 create mode 100644 src/libpng/pngtrans.c
 create mode 100644 src/libpng/pngvcrd.c
 create mode 100644 src/libpng/pngwio.c
 create mode 100644 src/libpng/pngwrite.c
 create mode 100644 src/libpng/pngwtran.c
 create mode 100644 src/libpng/pngwutil.c
 create mode 100644 src/libtiff/t4.h
 create mode 100644 src/libtiff/tif_aux.c
 create mode 100644 src/libtiff/tif_close.c
 create mode 100644 src/libtiff/tif_codec.c
 create mode 100644 src/libtiff/tif_color.c
 create mode 100644 src/libtiff/tif_compress.c
 create mode 100644 src/libtiff/tif_config.h
 create mode 100644 src/libtiff/tif_dir.c
 create mode 100644 src/libtiff/tif_dir.h
 create mode 100644 src/libtiff/tif_dirinfo.c
 create mode 100644 src/libtiff/tif_dirread.c
 create mode 100644 src/libtiff/tif_dirwrite.c
 create mode 100644 src/libtiff/tif_dumpmode.c
 create mode 100644 src/libtiff/tif_error.c
 create mode 100644 src/libtiff/tif_extension.c
 create mode 100644 src/libtiff/tif_fax3.c
 create mode 100644 src/libtiff/tif_fax3.h
 create mode 100644 src/libtiff/tif_fax3sm.c
 create mode 100644 src/libtiff/tif_flush.c
 create mode 100644 src/libtiff/tif_getimage.c
 create mode 100644 src/libtiff/tif_jpeg.c
 create mode 100644 src/libtiff/tif_luv.c
 create mode 100644 src/libtiff/tif_lzw.c
 create mode 100644 src/libtiff/tif_next.c
 create mode 100644 src/libtiff/tif_ojpeg.c
 create mode 100644 src/libtiff/tif_open.c
 create mode 100644 src/libtiff/tif_packbits.c
 create mode 100644 src/libtiff/tif_pixarlog.c
 create mode 100644 src/libtiff/tif_predict.c
 create mode 100644 src/libtiff/tif_predict.h
 create mode 100644 src/libtiff/tif_print.c
 create mode 100644 src/libtiff/tif_read.c
 create mode 100644 src/libtiff/tif_strip.c
 create mode 100644 src/libtiff/tif_swab.c
 create mode 100644 src/libtiff/tif_thunder.c
 create mode 100644 src/libtiff/tif_tile.c
 create mode 100644 src/libtiff/tif_version.c
 create mode 100644 src/libtiff/tif_warning.c
 create mode 100644 src/libtiff/tif_write.c
 create mode 100644 src/libtiff/tif_zip.c
 create mode 100644 src/libtiff/tiff.h
 create mode 100644 src/libtiff/tiffio.h
 create mode 100644 src/libtiff/tiffiop.h
 create mode 100644 src/libtiff/tiffvers.h
 create mode 100644 src/libtiff/uvcode.h
 create mode 100644 src/lua5/.cvsignore
 create mode 100644 src/lua5/im_fftw.lua
 create mode 100644 src/lua5/im_process.lua
 create mode 100644 src/lua5/imlua.c
 create mode 100644 src/lua5/imlua.def
 create mode 100644 src/lua5/imlua_aux.c
 create mode 100644 src/lua5/imlua_aux.h
 create mode 100644 src/lua5/imlua_avi.c
 create mode 100644 src/lua5/imlua_avi.def
 create mode 100644 src/lua5/imlua_capture.c
 create mode 100644 src/lua5/imlua_capture.def
 create mode 100644 src/lua5/imlua_convert.c
 create mode 100644 src/lua5/imlua_fftw.c
 create mode 100644 src/lua5/imlua_fftw.def
 create mode 100644 src/lua5/imlua_file.c
 create mode 100644 src/lua5/imlua_image.c
 create mode 100644 src/lua5/imlua_image.h
 create mode 100644 src/lua5/imlua_jp2.c
 create mode 100644 src/lua5/imlua_jp2.def
 create mode 100644 src/lua5/imlua_kernel.c
 create mode 100644 src/lua5/imlua_palette.c
 create mode 100644 src/lua5/imlua_palette.h
 create mode 100644 src/lua5/imlua_process.c
 create mode 100644 src/lua5/imlua_process.def
 create mode 100644 src/lua5/imlua_util.c
 create mode 100644 src/lua5/imlua_wmv.c
 create mode 100644 src/lua5/imlua_wmv.def
 create mode 100644 src/make_uname
 create mode 100644 src/make_uname.bat
 create mode 100644 src/old_im.cpp
 create mode 100644 src/old_imcolor.c
 create mode 100644 src/old_imresize.c
 create mode 100644 src/process/im_analyze.cpp
 create mode 100644 src/process/im_arithmetic_bin.cpp
 create mode 100644 src/process/im_arithmetic_un.cpp
 create mode 100644 src/process/im_canny.cpp
 create mode 100644 src/process/im_color.cpp
 create mode 100644 src/process/im_convolve.cpp
 create mode 100644 src/process/im_convolve_rank.cpp
 create mode 100644 src/process/im_distance.cpp
 create mode 100644 src/process/im_effects.cpp
 create mode 100644 src/process/im_fft.cpp
 create mode 100644 src/process/im_geometric.cpp
 create mode 100644 src/process/im_histogram.cpp
 create mode 100644 src/process/im_houghline.cpp
 create mode 100644 src/process/im_kernel.cpp
 create mode 100644 src/process/im_logic.cpp
 create mode 100644 src/process/im_morphology_bin.cpp
 create mode 100644 src/process/im_morphology_gray.cpp
 create mode 100644 src/process/im_quantize.cpp
 create mode 100644 src/process/im_render.cpp
 create mode 100644 src/process/im_resize.cpp
 create mode 100644 src/process/im_statistics.cpp
 create mode 100644 src/process/im_threshold.cpp
 create mode 100644 src/process/im_tonegamut.cpp
 create mode 100644 src/tecmake_compact.mak
 create mode 100644 src/tiff_binfile.c
 create mode 100644 src/zlib/adler32.c
 create mode 100644 src/zlib/compress.c
 create mode 100644 src/zlib/crc32.c
 create mode 100644 src/zlib/crc32.h
 create mode 100644 src/zlib/deflate.c
 create mode 100644 src/zlib/deflate.h
 create mode 100644 src/zlib/gzio.c
 create mode 100644 src/zlib/infback.c
 create mode 100644 src/zlib/inffast.c
 create mode 100644 src/zlib/inffast.h
 create mode 100644 src/zlib/inffixed.h
 create mode 100644 src/zlib/inflate.c
 create mode 100644 src/zlib/inflate.h
 create mode 100644 src/zlib/inftrees.c
 create mode 100644 src/zlib/inftrees.h
 create mode 100644 src/zlib/trees.c
 create mode 100644 src/zlib/trees.h
 create mode 100644 src/zlib/uncompr.c
 create mode 100644 src/zlib/zconf.h
 create mode 100644 src/zlib/zlib.h
 create mode 100644 src/zlib/zutil.c
 create mode 100644 src/zlib/zutil.h

(limited to 'src')
diff --git a/src/.cvsignore b/src/.cvsignore
new file mode 100755
index 0000000..09da2e3
--- /dev/null
+++ b/src/.cvsignore
@@ -0,0 +1,10 @@
+im_capture.wlib
+so_locations
+*.dep
+*.wdep
+*.loh
+.plan
+.project
+*.err
+*.make
+umake_uname.bat
\ No newline at end of file
diff --git a/src/COPYRIGHT b/src/COPYRIGHT
new file mode 100644
index 0000000..19df0a0
--- /dev/null
+++ b/src/COPYRIGHT
@@ -0,0 +1,32 @@
+IM License
+-----------
+
+IM is licensed under the terms of the MIT license reproduced below.
+This means that IM is free software and can be used for both academic
+and commercial purposes at absolutely no cost.
+
+===============================================================================
+
+Copyright (C) 1994-2008 Tecgraf, PUC-Rio.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===============================================================================
+
+(end of COPYRIGHT)
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..b51a310
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,22 @@
+
+.PHONY: do_all im im_jp2 im_process im_fftw imlua3 imlua5 imlua_jp2 imlua_process5 imlua_fftw5
+do_all: im im_jp2 im_process im_fftw imlua3 imlua5 imlua_jp2 imlua_process5 imlua_fftw5
+
+im:
+	$(MAKE) --no-print-directory -f tecmake_compact.mak
+im_jp2:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=im_jp2
+im_process:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=im_process
+im_fftw:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=im_fftw
+imlua3:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=imlua3
+imlua5:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=imlua5
+imlua_jp2:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=imlua_jp2
+imlua_process5:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=imlua_process5
+imlua_fftw5:
+	@$(MAKE) --no-print-directory -f tecmake_compact.mak MF=imlua_fftw5
diff --git a/src/README b/src/README
new file mode 100644
index 0000000..3dbcbd5
--- /dev/null
+++ b/src/README
@@ -0,0 +1,11 @@
+README for IM
+
+  IM is a toolkit for Digital Imaging. IM is based on 4 concepts: Image Representation, Storage, Processing and Capture. The main goal of the library is to provide a simple API and abstraction of images for scientific applications.
+  The most popular file formats are supported: TIFF, BMP, PNG, JPEG, GIF and AVI. Image representation includes scientific data types. About a hundred Image Processing operations are available.
+
+  Build instructions and usage are available in the IM documentation.
+
+  For complete information, visit IM's web site at http://www.tecgraf.puc-rio.br/im
+  or access its documentation in the HTML folder.
+
+(end of README)
diff --git a/src/config.mak b/src/config.mak
new file mode 100644
index 0000000..f78b59b
--- /dev/null
+++ b/src/config.mak
@@ -0,0 +1,141 @@
+PROJNAME = im
+LIBNAME = im
+OPT = YES
+
+INCLUDES = . ../include
+                     
+# WORDS_BIGENDIAN used by libTIFF
+ifeq ($(TEC_SYSARCH), ppc)
+  DEFINES = WORDS_BIGENDIAN
+endif
+ifeq ($(TEC_SYSARCH), mips)
+  DEFINES = WORDS_BIGENDIAN
+endif
+ifeq ($(TEC_SYSARCH), sparc)
+  DEFINES = WORDS_BIGENDIAN
+endif
+
+SRCTIFF = \
+    tif_aux.c       tif_dirwrite.c   tif_jpeg.c      tif_print.c    \
+    tif_close.c     tif_dumpmode.c   tif_luv.c       tif_read.c     \
+    tif_codec.c     tif_error.c      tif_lzw.c       tif_strip.c    \
+    tif_color.c     tif_extension.c  tif_next.c      tif_swab.c     \
+    tif_compress.c  tif_fax3.c       tif_open.c      tif_thunder.c  \
+    tif_dir.c       tif_fax3sm.c     tif_packbits.c  tif_tile.c     \
+    tif_dirinfo.c   tif_flush.c      tif_pixarlog.c  tif_zip.c      \
+    tif_dirread.c   tif_getimage.c   tif_predict.c   tif_version.c  \
+    tif_write.c     tif_warning.c    tif_ojpeg.c
+SRCTIFF  := $(addprefix libtiff/, $(SRCTIFF))
+INCLUDES += libtiff 
+
+SRCJPEG = \
+    jcapimin.c  jcmarker.c  jdapimin.c  jdinput.c   jdtrans.c   jidctred.c \
+    jcapistd.c  jcmaster.c  jdapistd.c  jdmainct.c  jerror.c    jmemmgr.c  \
+    jccoefct.c  jcomapi.c   jdatadst.c  jdmarker.c  jfdctflt.c  jmemnobs.c \
+    jccolor.c   jcparam.c   jdatasrc.c  jdmaster.c  jfdctfst.c  jquant1.c  \
+    jcdctmgr.c  jcphuff.c   jdcoefct.c  jdmerge.c   jfdctint.c  jquant2.c  \
+    jchuff.c    jcprepct.c  jdcolor.c   jdphuff.c   jidctflt.c  jutils.c   \
+    jcinit.c    jcsample.c  jddctmgr.c  jdpostct.c  jidctfst.c             \
+    jcmainct.c  jctrans.c   jdhuff.c    jdsample.c  jidctint.c
+SRCJPEG  := $(addprefix libjpeg/, $(SRCJPEG))
+INCLUDES += libjpeg 
+
+SRCPNG = \
+    png.c       pngget.c    pngread.c   pngrutil.c     pngwtran.c  \
+    pngerror.c  pngmem.c    pngrio.c    pngset.c    pngwio.c    pngwutil.c  \
+    pngpread.c  pngrtran.c  pngtrans.c  pngwrite.c
+SRCPNG  := $(addprefix libpng/, $(SRCPNG))
+INCLUDES += libpng 
+DEFINES += PNG_NO_STDIO PNG_TIME_RFC1123_SUPPORTED
+
+SRCZLIB = \
+    adler32.c   crc32.c    gzio.c     inffast.c  inftrees.c  uncompr.c \
+    compress.c  deflate.c  infback.c  inflate.c  trees.c     zutil.c
+SRCZLIB  := $(addprefix zlib/, $(SRCZLIB))
+INCLUDES += zlib
+
+SRCEXIF = \
+    canon/exif-mnote-data-canon.c  canon/mnote-canon-entry.c  canon/mnote-canon-tag.c              \
+    olympus/exif-mnote-data-olympus.c  olympus/mnote-olympus-entry.c  olympus/mnote-olympus-tag.c  \
+    pentax/exif-mnote-data-pentax.c  pentax/mnote-pentax-entry.c  pentax/mnote-pentax-tag.c        \
+    exif-byte-order.c  exif-entry.c  exif-utils.c    exif-format.c  exif-mnote-data.c              \
+    exif-content.c  exif-ifd.c  exif-tag.c exif-data.c  exif-loader.c exif-log.c exif-mem.c
+SRCEXIF  := $(addprefix libexif/, $(SRCEXIF))
+INCLUDES += libexif
+
+SRCLZF = \
+    lzf_c.c lzf_d.c
+SRCLZF  := $(addprefix liblzf/, $(SRCLZF))
+INCLUDES += liblzf
+
+SRC = \
+    old_imcolor.c         old_imresize.c      tiff_binfile.c       im_converttype.cpp \
+    im_attrib.cpp         im_format.cpp       im_format_tga.cpp    im_filebuffer.cpp \
+    im_bin.cpp            im_format_all.cpp   im_format_tiff.cpp   im_format_raw.cpp \
+    im_binfile.cpp        im_format_sgi.cpp   im_datatype.cpp      im_format_pcx.cpp \
+    im_colorhsi.cpp       im_format_bmp.cpp   im_image.cpp         im_rgb2map.cpp    \
+    im_colormode.cpp      im_format_gif.cpp   im_lib.cpp           im_format_pnm.cpp \
+    im_colorutil.cpp      im_format_ico.cpp   im_palette.cpp       im_format_png.cpp \
+    im_convertbitmap.cpp  im_format_led.cpp   im_counter.cpp       im_str.cpp        \
+    im_convertcolor.cpp   im_format_jpeg.cpp  im_fileraw.cpp       im_format_krn.cpp \
+    im_file.cpp           im_format_ras.cpp   old_im.cpp           im_compress.cpp   \
+    $(SRCJPEG) $(SRCTIFF) $(SRCPNG) $(SRCZLIB) $(SRCLZF)
+
+ifneq ($(findstring Win, $(TEC_SYSNAME)), )
+    SRC += im_sysfile_win32.cpp im_dib.cpp im_dibxbitmap.cpp
+    
+    ifneq ($(findstring dll, $(TEC_UNAME)), )
+      SRC += im.rc
+    endif
+    
+    ifeq ($(findstring _64, $(TEC_UNAME)), )
+      # optimize PNG lib for VC
+      ifneq ($(findstring vc, $(TEC_UNAME)), )
+        SRC += libpng/pngvcrd.c
+        DEFINES += PNG_USE_PNGVCRD
+      endif
+      ifneq ($(findstring dll, $(TEC_UNAME)), )
+        SRC += libpng/pngvcrd.c
+        DEFINES += PNG_USE_PNGVCRD
+      endif         
+    endif         
+    
+    # force the definition of math functions using float
+    # Watcom does not define them
+    ifneq ($(findstring ow, $(TEC_UNAME)), )
+      DEFINES += IM_DEFMATHFLOAT
+    endif         
+    
+    ifneq ($(findstring bc, $(TEC_UNAME)), )
+      DEFINES += IM_DEFMATHFLOAT
+    else
+      USE_EXIF = Yes
+    endif
+else
+    SRC += im_sysfile_unix.cpp
+endif
+
+ifdef USE_EXIF
+  SRC += $(SRCEXIF)    
+  DEFINES += USE_EXIF
+endif  
+
+ifneq ($(findstring Linux, $(TEC_UNAME)), )
+    # optimize PNG lib for Linux in x86
+    ifeq "$(TEC_SYSARCH)" "x86"
+      SRC += libpng/pnggccrd.c
+      DEFINES += PNG_USE_PNGGCCRD
+    endif
+endif
+
+ifneq ($(findstring AIX, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif
+
+ifneq ($(findstring SunOS, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif
+      
+ifneq ($(findstring HP-UX, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif
diff --git a/src/fftw/config.c b/src/fftw/config.c
new file mode 100644
index 0000000..84fb0ac
--- /dev/null
+++ b/src/fftw/config.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* config.c -- this file contains all the codelets the system knows about */
+
+/* $Id: config.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#include "fftw-int.h"
+
+/* the signature is the same as the size, for now */
+#define NOTW_CODELET(x) \
+	 &fftw_no_twiddle_##x##_desc
+#define NOTWI_CODELET(x) \
+	 &fftwi_no_twiddle_##x##_desc
+
+#define TWIDDLE_CODELET(x) \
+	 &fftw_twiddle_##x##_desc
+
+#define TWIDDLEI_CODELET(x) \
+	 &fftwi_twiddle_##x##_desc
+
+/* automatically-generated list of codelets */
+
+extern fftw_codelet_desc fftw_no_twiddle_1_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_1_desc;
+extern fftw_codelet_desc fftw_no_twiddle_2_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_2_desc;
+extern fftw_codelet_desc fftw_no_twiddle_3_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_3_desc;
+extern fftw_codelet_desc fftw_no_twiddle_4_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_4_desc;
+extern fftw_codelet_desc fftw_no_twiddle_5_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_5_desc;
+extern fftw_codelet_desc fftw_no_twiddle_6_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_6_desc;
+extern fftw_codelet_desc fftw_no_twiddle_7_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_7_desc;
+extern fftw_codelet_desc fftw_no_twiddle_8_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_8_desc;
+extern fftw_codelet_desc fftw_no_twiddle_9_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_9_desc;
+extern fftw_codelet_desc fftw_no_twiddle_10_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_10_desc;
+extern fftw_codelet_desc fftw_no_twiddle_11_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_11_desc;
+extern fftw_codelet_desc fftw_no_twiddle_12_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_12_desc;
+extern fftw_codelet_desc fftw_no_twiddle_13_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_13_desc;
+extern fftw_codelet_desc fftw_no_twiddle_14_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_14_desc;
+extern fftw_codelet_desc fftw_no_twiddle_15_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_15_desc;
+extern fftw_codelet_desc fftw_no_twiddle_16_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_16_desc;
+extern fftw_codelet_desc fftw_no_twiddle_32_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_32_desc;
+extern fftw_codelet_desc fftw_no_twiddle_64_desc;
+extern fftw_codelet_desc fftwi_no_twiddle_64_desc;
+extern fftw_codelet_desc fftw_twiddle_2_desc;
+extern fftw_codelet_desc fftwi_twiddle_2_desc;
+extern fftw_codelet_desc fftw_twiddle_3_desc;
+extern fftw_codelet_desc fftwi_twiddle_3_desc;
+extern fftw_codelet_desc fftw_twiddle_4_desc;
+extern fftw_codelet_desc fftwi_twiddle_4_desc;
+extern fftw_codelet_desc fftw_twiddle_5_desc;
+extern fftw_codelet_desc fftwi_twiddle_5_desc;
+extern fftw_codelet_desc fftw_twiddle_6_desc;
+extern fftw_codelet_desc fftwi_twiddle_6_desc;
+extern fftw_codelet_desc fftw_twiddle_7_desc;
+extern fftw_codelet_desc fftwi_twiddle_7_desc;
+extern fftw_codelet_desc fftw_twiddle_8_desc;
+extern fftw_codelet_desc fftwi_twiddle_8_desc;
+extern fftw_codelet_desc fftw_twiddle_9_desc;
+extern fftw_codelet_desc fftwi_twiddle_9_desc;
+extern fftw_codelet_desc fftw_twiddle_10_desc;
+extern fftw_codelet_desc fftwi_twiddle_10_desc;
+extern fftw_codelet_desc fftw_twiddle_16_desc;
+extern fftw_codelet_desc fftwi_twiddle_16_desc;
+extern fftw_codelet_desc fftw_twiddle_32_desc;
+extern fftw_codelet_desc fftwi_twiddle_32_desc;
+extern fftw_codelet_desc fftw_twiddle_64_desc;
+extern fftw_codelet_desc fftwi_twiddle_64_desc;
+
+fftw_codelet_desc *fftw_config[] = {
+     NOTW_CODELET(1),
+     NOTWI_CODELET(1),
+     NOTW_CODELET(2),
+     NOTWI_CODELET(2),
+     NOTW_CODELET(3),
+     NOTWI_CODELET(3),
+     NOTW_CODELET(4),
+     NOTWI_CODELET(4),
+     NOTW_CODELET(5),
+     NOTWI_CODELET(5),
+     NOTW_CODELET(6),
+     NOTWI_CODELET(6),
+     NOTW_CODELET(7),
+     NOTWI_CODELET(7),
+     NOTW_CODELET(8),
+     NOTWI_CODELET(8),
+     NOTW_CODELET(9),
+     NOTWI_CODELET(9),
+     NOTW_CODELET(10),
+     NOTWI_CODELET(10),
+     NOTW_CODELET(11),
+     NOTWI_CODELET(11),
+     NOTW_CODELET(12),
+     NOTWI_CODELET(12),
+     NOTW_CODELET(13),
+     NOTWI_CODELET(13),
+     NOTW_CODELET(14),
+     NOTWI_CODELET(14),
+     NOTW_CODELET(15),
+     NOTWI_CODELET(15),
+     NOTW_CODELET(16),
+     NOTWI_CODELET(16),
+     NOTW_CODELET(32),
+     NOTWI_CODELET(32),
+     NOTW_CODELET(64),
+     NOTWI_CODELET(64),
+     TWIDDLE_CODELET(2),
+     TWIDDLEI_CODELET(2),
+     TWIDDLE_CODELET(3),
+     TWIDDLEI_CODELET(3),
+     TWIDDLE_CODELET(4),
+     TWIDDLEI_CODELET(4),
+     TWIDDLE_CODELET(5),
+     TWIDDLEI_CODELET(5),
+     TWIDDLE_CODELET(6),
+     TWIDDLEI_CODELET(6),
+     TWIDDLE_CODELET(7),
+     TWIDDLEI_CODELET(7),
+     TWIDDLE_CODELET(8),
+     TWIDDLEI_CODELET(8),
+     TWIDDLE_CODELET(9),
+     TWIDDLEI_CODELET(9),
+     TWIDDLE_CODELET(10),
+     TWIDDLEI_CODELET(10),
+     TWIDDLE_CODELET(16),
+     TWIDDLEI_CODELET(16),
+     TWIDDLE_CODELET(32),
+     TWIDDLEI_CODELET(32),
+     TWIDDLE_CODELET(64),
+     TWIDDLEI_CODELET(64),
+     (fftw_codelet_desc *) 0
+};
diff --git a/src/fftw/config.h b/src/fftw/config.h
new file mode 100644
index 0000000..ef5cd64
--- /dev/null
+++ b/src/fftw/config.h
@@ -0,0 +1,171 @@
+/* -*- C -*- */
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* fftw.h -- system-wide definitions */
+/* $Id: config.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* configuration options (guessed by configure) */
+
+/* Define to empty if the keyword does not work.  */
+/* #undef const */
+
+/* Define if you have the gettimeofday function.  */
+/* #undef HAVE_GETTIMEOFDAY */
+
+/* Define if you have the BSDgettimeofday function.  */
+/* #undef HAVE_BSDGETTIMEOFDAY */
+
+/* Define if you have the <sys/time.h> header file.  */
+/* #undef HAVE_SYS_TIME_H */
+
+/* Define if you have the <unistd.h> header file.  */
+/* #undef HAVE_UNISTD_H */
+
+/* Define if you have the <getopt.h> header file.  */
+/* #undef HAVE_GETOPT_H */
+
+/* Define if you have the <malloc.h> header file */
+/* #undef HAVE_MALLOC_H */
+
+/* Define if you have gethrtime() a la Solaris 2 */
+/* #undef HAVE_GETHRTIME */
+/* #undef HAVE_HRTIME_T */
+
+/* Define to sizeof int and long long, if available: */
+#define SIZEOF_INT 0
+#define SIZEOF_LONG_LONG 0
+
+#if (SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT)
+#  define LONGLONG_IS_TWOINTS
+#endif
+
+/* Define to use "unsafe" modular multiply (can cause integer overflow
+   and errors for transforms of large prime sizes using Rader). */
+/* #undef FFTW_ENABLE_UNSAFE_MULMOD */
+
+/* Define if you have getopt() */
+/* #undef HAVE_GETOPT */
+
+/* Define if you have getopt_long() */
+/* #undef HAVE_GETOPT_LONG */
+
+/* Define if you have isnan() */
+/* #undef HAVE_ISNAN */
+
+/* Define for enabling the high resolution Pentium timer */
+/* #undef FFTW_ENABLE_PENTIUM_TIMER */
+
+/*
+ * When using FFTW_ENABLE_PENTIUM_TIMER, set FFTW_CYCLES_PER_SEC 
+ * to your real CPU clock speed! 
+ */
+/* This is for 200 MHz */
+/* #define FFTW_CYCLES_PER_SEC 200000000L */
+
+/*
+ * Define to enable a gcc/x86 specific hack that aligns
+ * the stack to an 8-byte boundary 
+ */
+/* #undef FFTW_ENABLE_I386_HACKS */
+
+/* Define when using a version of gcc that aligns the stack properly */
+/* #undef FFTW_GCC_ALIGNS_STACK */
+
+/* Define to enable extra runtime checks for debugging. */
+/* #undef FFTW_DEBUG */
+
+/* Define to enable vector-recurse feature. */
+/* #undef FFTW_ENABLE_VECTOR_RECURSE */
+
+/*
+ * Define to enable extra runtime checks for the alignment of variables
+ * in the codelets (causes coredump for misaligned double on x86). 
+ */
+/* #undef FFTW_DEBUG_ALIGNMENT */
+
+#define FFTW_VERSION "2.1.5" 
+
+/* Use Win32 high-resolution timer */
+#if defined(__WIN32__) || defined(WIN32) || defined(_WINDOWS)
+#  define HAVE_WIN32_TIMER
+#  define HAVE_WIN32
+#endif
+
+/* Use MacOS Time Manager timer */
+#if defined(MAC) || defined(macintosh)
+#  define HAVE_MAC_TIMER
+#  define HAVE_MACOS
+
+/* Define to use nanosecond timer on PCI PowerMacs: */
+/* (WARNING: experimental, use at your own risk.) */
+/* #undef HAVE_MAC_PCI_TIMER */
+#endif
+
+/* define if you have alloca.h: */
+/* #undef HAVE_ALLOCA_H */
+
+/* define if you have the alloca function: */
+/* #undef HAVE_ALLOCA */
+
+/************************** threads configuration ************************/
+
+/* The following preprocessor symbols select which threads library
+   to use when compiling the FFTW threads parallel libraries: */
+
+/* #undef FFTW_USING_SOLARIS_THREADS */
+/* #undef FFTW_USING_POSIX_THREADS */
+/* #undef FFTW_USING_BEOS_THREADS */
+/* #undef FFTW_USING_MACH_THREADS */
+/* #undef FFTW_USING_OPENMP_THREADS */
+/* #undef FFTW_USING_SGIMP_THREADS */
+
+/* on AIX, this gets defined to PTHREAD_CREATE_UNDETACHED, as that
+   system uses a non-standard name for this attribute (sigh). */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* #undef HAVE_MACH_CTHREADS_H */
+/* #undef HAVE_CTHREADS_H */
+/* #undef HAVE_CTHREAD_H */
+
+#ifdef HAVE_WIN32
+#define FFTW_USING_WIN32_THREADS
+#endif
+
+#ifdef HAVE_MACOS
+#define FFTW_USING_MACOS_THREADS
+#endif
+
+/*********************** fortran wrapper configuration *********************/
+
+/* F77_FUNC_ is defined to a macro F77_FUNC_(name,NAME) by autoconf, that
+   takes an identifier name (lower case) and NAME (upper case) and returns
+   the appropriately mangled identifier for the Fortran linker.  On
+   non-Unix systems you will have to define this manually.  For example,
+   if your linker converts identifiers to lower-case followed by an
+   underscore, you would do: #define F77_FUNC_(name,NAME) name ## _ 
+*/
+/* #undef F77_FUNC_ */
+
+/* The following symbols control how MPI_Comm data structures are
+   translated between Fortran and C for the fftw_mpi wrappers.  See
+   the file mpi/fftw_f77_mpi.h for more information. */
+/* #undef HAVE_MPI_COMM_F2C */
+/* #undef FFTW_USE_F77_MPI_COMM */
+/* #undef FFTW_USE_F77_MPI_COMM_P */
diff --git a/src/fftw/executor.c b/src/fftw/executor.c
new file mode 100644
index 0000000..fb200ab
--- /dev/null
+++ b/src/fftw/executor.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * executor.c -- execute the fft
+ */
+
+/* $Id: executor.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+#include "fftw-int.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+const char *fftw_version = "FFTW V" FFTW_VERSION " ($Id: executor.c,v 1.1 2008/10/17 06:13:18 scuri Exp $)";
+
+/*
+ * This function is called in other files, so we cannot declare
+ * it static. 
+ */
+void fftw_strided_copy(int n, fftw_complex *in, int ostride,
+		       fftw_complex *out)
+{
+     int i;
+     fftw_real r0, r1, i0, i1;
+     fftw_real r2, r3, i2, i3;
+
+     i = 0;
+
+     for (; i < (n & 3); ++i) {
+	  out[i * ostride] = in[i];
+     }
+
+     for (; i < n; i += 4) {
+	  r0 = c_re(in[i]);
+	  i0 = c_im(in[i]);
+	  r1 = c_re(in[i + 1]);
+	  i1 = c_im(in[i + 1]);
+	  r2 = c_re(in[i + 2]);
+	  i2 = c_im(in[i + 2]);
+	  r3 = c_re(in[i + 3]);
+	  i3 = c_im(in[i + 3]);
+	  c_re(out[i * ostride]) = r0;
+	  c_im(out[i * ostride]) = i0;
+	  c_re(out[(i + 1) * ostride]) = r1;
+	  c_im(out[(i + 1) * ostride]) = i1;
+	  c_re(out[(i + 2) * ostride]) = r2;
+	  c_im(out[(i + 2) * ostride]) = i2;
+	  c_re(out[(i + 3) * ostride]) = r3;
+	  c_im(out[(i + 3) * ostride]) = i3;
+     }
+}
+
+static void executor_many(int n, const fftw_complex *in,
+			  fftw_complex *out,
+			  fftw_plan_node *p,
+			  int istride,
+			  int ostride,
+			  int howmany, int idist, int odist,
+			  fftw_recurse_kind recurse_kind)
+{
+     int s;
+
+     switch (p->type) {
+	 case FFTW_NOTW:
+	      {
+		   fftw_notw_codelet *codelet = p->nodeu.notw.codelet;
+
+		   HACK_ALIGN_STACK_ODD;
+		   for (s = 0; s < howmany; ++s)
+			codelet(in + s * idist,
+				out + s * odist,
+				istride, ostride);
+		   break;
+	      }
+
+	 default:
+	      for (s = 0; s < howmany; ++s)
+		   fftw_executor_simple(n, in + s * idist,
+					out + s * odist,
+					p, istride, ostride,
+					recurse_kind);
+     }
+}
+
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+
+/* executor_many_vector is like executor_many, but it pushes the
+   howmany loop down to the leaves of the transform: */
+static void executor_many_vector(int n, const fftw_complex *in,
+				 fftw_complex *out,
+				 fftw_plan_node *p,
+				 int istride,
+				 int ostride,
+				 int howmany, int idist, int odist)
+{
+     int s;
+
+     switch (p->type) {
+	 case FFTW_NOTW:
+	      {
+		   fftw_notw_codelet *codelet = p->nodeu.notw.codelet;
+
+		   HACK_ALIGN_STACK_ODD;
+		   for (s = 0; s < howmany; ++s)
+			codelet(in + s * idist,
+				out + s * odist,
+				istride, ostride);
+		   break;
+	      }
+
+	 case FFTW_TWIDDLE:
+	      {
+		   int r = p->nodeu.twiddle.size;
+		   int m = n / r;
+		   fftw_twiddle_codelet *codelet;
+		   fftw_complex *W;
+
+		   for (s = 0; s < r; ++s)
+			executor_many_vector(m, in + s * istride, 
+					     out + s * (m * ostride),
+					     p->nodeu.twiddle.recurse,
+					     istride * r, ostride,
+					     howmany, idist, odist);
+
+		   codelet = p->nodeu.twiddle.codelet;
+		   W = p->nodeu.twiddle.tw->twarray;
+
+		   /* This may not be the right thing.  We maybe should have
+		      the howmany loop for the twiddle codelets at the
+		      topmost level of the recursion, since odist is big;
+		      i.e. separate recursions for twiddle and notwiddle. */
+		   HACK_ALIGN_STACK_EVEN;
+		   for (s = 0; s < howmany; ++s)
+			codelet(out + s * odist, W, m * ostride, m, ostride);
+
+		   break;
+	      }
+
+	 case FFTW_GENERIC:
+	      {
+		   int r = p->nodeu.generic.size;
+		   int m = n / r;
+		   fftw_generic_codelet *codelet;
+		   fftw_complex *W;
+
+		   for (s = 0; s < r; ++s)
+			executor_many_vector(m, in + s * istride, 
+					     out + s * (m * ostride),
+					     p->nodeu.generic.recurse,
+					     istride * r, ostride,
+					     howmany, idist, odist);
+
+		   codelet = p->nodeu.generic.codelet;
+		   W = p->nodeu.generic.tw->twarray;
+		   for (s = 0; s < howmany; ++s)
+			codelet(out + s * odist, W, m, r, n, ostride);
+
+		   break;
+	      }
+
+	 case FFTW_RADER:
+	      {
+		   int r = p->nodeu.rader.size;
+		   int m = n / r;
+		   fftw_rader_codelet *codelet;
+		   fftw_complex *W;
+
+		   for (s = 0; s < r; ++s)
+			executor_many_vector(m, in + s * istride, 
+					     out + s * (m * ostride),
+					     p->nodeu.rader.recurse,
+					     istride * r, ostride,
+					     howmany, idist, odist);
+
+		   codelet = p->nodeu.rader.codelet;
+		   W = p->nodeu.rader.tw->twarray;
+		   for (s = 0; s < howmany; ++s)
+			codelet(out + s * odist, W, m, r, ostride,
+				p->nodeu.rader.rader_data);
+
+		   break;
+	      }
+
+	 default:
+	      fftw_die("BUG in executor: invalid plan\n");
+	      break;
+     }     
+}
+
+#endif /* FFTW_ENABLE_VECTOR_RECURSE */
+
+/*
+ * Do *not* declare simple executor static--we need to call it
+ * from other files...also, preface its name with "fftw_"
+ * to avoid any possible name collisions. 
+ */
+void fftw_executor_simple(int n, const fftw_complex *in,
+			  fftw_complex *out,
+			  fftw_plan_node *p,
+			  int istride,
+			  int ostride,
+			  fftw_recurse_kind recurse_kind)
+{
+     switch (p->type) {
+	 case FFTW_NOTW:
+	      HACK_ALIGN_STACK_ODD;
+	      (p->nodeu.notw.codelet)(in, out, istride, ostride);
+	      break;
+
+	 case FFTW_TWIDDLE:
+	      {
+		   int r = p->nodeu.twiddle.size;
+		   int m = n / r;
+		   fftw_twiddle_codelet *codelet;
+		   fftw_complex *W;
+
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+		   if (recurse_kind == FFTW_NORMAL_RECURSE)
+#endif
+			executor_many(m, in, out,
+				      p->nodeu.twiddle.recurse,
+				      istride * r, ostride,
+				      r, istride, m * ostride,
+				      FFTW_NORMAL_RECURSE);
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+		   else
+			executor_many_vector(m, in, out,
+					     p->nodeu.twiddle.recurse,
+					     istride * r, ostride,
+					     r, istride, m * ostride);
+#endif
+
+		   codelet = p->nodeu.twiddle.codelet;
+		   W = p->nodeu.twiddle.tw->twarray;
+
+		   HACK_ALIGN_STACK_EVEN;
+		   codelet(out, W, m * ostride, m, ostride);
+
+		   break;
+	      }
+
+	 case FFTW_GENERIC:
+	      {
+		   int r = p->nodeu.generic.size;
+		   int m = n / r;
+		   fftw_generic_codelet *codelet;
+		   fftw_complex *W;
+
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+		   if (recurse_kind == FFTW_NORMAL_RECURSE)
+#endif
+			executor_many(m, in, out,
+				      p->nodeu.generic.recurse,
+				      istride * r, ostride,
+				      r, istride, m * ostride,
+                                      FFTW_NORMAL_RECURSE);
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+		   else
+			executor_many_vector(m, in, out,
+					     p->nodeu.generic.recurse,
+					     istride * r, ostride,
+					     r, istride, m * ostride);
+#endif
+
+		   codelet = p->nodeu.generic.codelet;
+		   W = p->nodeu.generic.tw->twarray;
+		   codelet(out, W, m, r, n, ostride);
+
+		   break;
+	      }
+
+	 case FFTW_RADER:
+	      {
+		   int r = p->nodeu.rader.size;
+		   int m = n / r;
+		   fftw_rader_codelet *codelet;
+		   fftw_complex *W;
+
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+		   if (recurse_kind == FFTW_NORMAL_RECURSE)
+#endif
+			executor_many(m, in, out,
+				      p->nodeu.rader.recurse,
+				      istride * r, ostride,
+				      r, istride, m * ostride,
+                                      FFTW_NORMAL_RECURSE);
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+		   else
+			executor_many_vector(m, in, out,
+					     p->nodeu.rader.recurse,
+					     istride * r, ostride,
+					     r, istride, m * ostride);
+#endif
+
+		   codelet = p->nodeu.rader.codelet;
+		   W = p->nodeu.rader.tw->twarray;
+		   codelet(out, W, m, r, ostride,
+			   p->nodeu.rader.rader_data);
+
+		   break;
+	      }
+
+	 default:
+	      fftw_die("BUG in executor: invalid plan\n");
+	      break;
+     }
+}
+
+static void executor_simple_inplace(int n, fftw_complex *in,
+				    fftw_complex *out,
+				    fftw_plan_node *p,
+				    int istride,
+				    fftw_recurse_kind recurse_kind)
+{
+     switch (p->type) {
+	 case FFTW_NOTW:
+	      HACK_ALIGN_STACK_ODD;
+	      (p->nodeu.notw.codelet)(in, in, istride, istride);
+	      break;
+
+	 default:
+	      {
+		   fftw_complex *tmp;
+
+		   if (out)
+			tmp = out;
+		   else
+			tmp = (fftw_complex *)
+			    fftw_malloc(n * sizeof(fftw_complex));
+
+		   fftw_executor_simple(n, in, tmp, p, istride, 1,
+					recurse_kind);
+		   fftw_strided_copy(n, tmp, istride, in);
+
+		   if (!out)
+			fftw_free(tmp);
+	      }
+     }
+}
+
+static void executor_many_inplace(int n, fftw_complex *in,
+				  fftw_complex *out,
+				  fftw_plan_node *p,
+				  int istride,
+				  int howmany, int idist,
+				  fftw_recurse_kind recurse_kind)
+{
+     switch (p->type) {
+	 case FFTW_NOTW:
+	      {
+		   fftw_notw_codelet *codelet = p->nodeu.notw.codelet;
+		   int s;
+
+		   HACK_ALIGN_STACK_ODD;
+		   for (s = 0; s < howmany; ++s)
+			codelet(in + s * idist,
+				in + s * idist,
+				istride, istride);
+		   break;
+	      }
+
+	 default:
+	      {
+		   int s;
+		   fftw_complex *tmp;
+		   if (out)
+			tmp = out;
+		   else
+			tmp = (fftw_complex *)
+			    fftw_malloc(n * sizeof(fftw_complex));
+
+		   for (s = 0; s < howmany; ++s) {
+			fftw_executor_simple(n,
+					     in + s * idist,
+					     tmp,
+					     p, istride, 1, recurse_kind);
+			fftw_strided_copy(n, tmp, istride, in + s * idist);
+		   }
+
+		   if (!out)
+			fftw_free(tmp);
+	      }
+     }
+}
+
+/* user interface */
+void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride,
+	  int idist, fftw_complex *out, int ostride, int odist)
+{
+     int n = plan->n;
+
+     if (plan->flags & FFTW_IN_PLACE) {
+	  if (howmany == 1) {
+	       executor_simple_inplace(n, in, out, plan->root, istride,
+				       plan->recurse_kind);
+	  } else {
+	       executor_many_inplace(n, in, out, plan->root, istride, howmany,
+				     idist, plan->recurse_kind);
+	  }
+     } else {
+	  if (howmany == 1) {
+	       fftw_executor_simple(n, in, out, plan->root, istride, ostride,
+				    plan->recurse_kind);
+	  } else {
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+	       int vector_size = plan->vector_size;
+	       if (vector_size <= 1)
+#endif
+		    executor_many(n, in, out, plan->root, istride, ostride,
+				  howmany, idist, odist, plan->recurse_kind);
+#ifdef FFTW_ENABLE_VECTOR_RECURSE
+	       else {
+		    int s;
+		    int num_vects = howmany / vector_size;
+		    fftw_plan_node *root = plan->root;
+
+		    for (s = 0; s < num_vects; ++s)
+			 executor_many_vector(n, 
+					     in + s * (vector_size * idist), 
+					     out + s * (vector_size * odist),
+					     root,
+					     istride, ostride,
+					     vector_size, idist, odist);
+
+		    s = howmany % vector_size;
+		    if (s > 0)
+			 executor_many(n,
+				       in + num_vects * (vector_size * idist), 
+				       out + num_vects * (vector_size * odist),
+				       root,
+				       istride, ostride,
+				       s, idist, odist, 
+				       FFTW_NORMAL_RECURSE);
+	       }
+#endif
+	  }
+     }
+}
+
+void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out)
+{
+     int n = plan->n;
+
+     if (plan->flags & FFTW_IN_PLACE)
+	  executor_simple_inplace(n, in, out, plan->root, 1,
+				  plan->recurse_kind);
+     else
+	  fftw_executor_simple(n, in, out, plan->root, 1, 1,
+			       plan->recurse_kind);
+}
diff --git a/src/fftw/fftw-int.h b/src/fftw/fftw-int.h
new file mode 100644
index 0000000..2c363fc
--- /dev/null
+++ b/src/fftw/fftw-int.h
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* fftw.h -- system-wide definitions */
+/* $Id: fftw-int.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#ifndef FFTW_INT_H
+#define FFTW_INT_H
+#include "config.h"
+#include "fftw.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/****************************************************************************/
+/*                            Private Functions                             */
+/****************************************************************************/
+
+extern fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d);
+extern void fftw_destroy_twiddle(fftw_twiddle *tw);
+
+extern void fftw_strided_copy(int, fftw_complex *, int, fftw_complex *);
+extern void fftw_executor_simple(int, const fftw_complex *, fftw_complex *,
+				 fftw_plan_node *, int, int,
+				 fftw_recurse_kind recurse_kind);
+
+extern fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
+					  fftw_direction dir, int flags);
+extern fftw_plan *fftwnd_new_plan_array(int rank);
+extern fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
+					      int rank, const int *n,
+					      fftw_direction dir, int flags);
+extern fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
+					       int rank, const int *n,
+					       const int *n_after,
+					       fftw_direction dir, int flags,
+					       fftw_complex *in, int istride,
+					       fftw_complex *out, int ostride);
+extern int fftwnd_work_size(int rank, const int *n, int flags, int ncopies);
+
+extern void fftwnd_aux(fftwnd_plan p, int cur_dim,
+		       fftw_complex *in, int istride,
+		       fftw_complex *out, int ostride,
+		       fftw_complex *work);
+extern void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
+			       int howmany,
+			       fftw_complex *in, int istride, int idist,
+			       fftw_complex *out, int ostride, int odist,
+			       fftw_complex *work);
+
+/* wisdom prototypes */
+enum fftw_wisdom_category {
+     FFTW_WISDOM, RFFTW_WISDOM
+};
+
+extern int fftw_wisdom_lookup(int n, int flags, fftw_direction dir,
+			      enum fftw_wisdom_category category,
+			      int istride, int ostride,
+			      enum fftw_node_type *type,
+			      int *signature,
+			      fftw_recurse_kind *recurse_kind, int replace_p);
+extern void fftw_wisdom_add(int n, int flags, fftw_direction dir,
+			    enum fftw_wisdom_category cat,
+			    int istride, int ostride,
+			    enum fftw_node_type type,
+			    int signature,
+			    fftw_recurse_kind recurse_kind);
+
+/* Private planner functions: */
+extern double fftw_estimate_node(fftw_plan_node *p);
+extern fftw_plan_node *fftw_make_node_notw(int size,
+					const fftw_codelet_desc *config);
+extern fftw_plan_node *fftw_make_node_real2hc(int size,
+					const fftw_codelet_desc *config);
+extern fftw_plan_node *fftw_make_node_hc2real(int size,
+					const fftw_codelet_desc *config);
+extern fftw_plan_node *fftw_make_node_twiddle(int n,
+					 const fftw_codelet_desc *config,
+					      fftw_plan_node *recurse,
+					      int flags);
+extern fftw_plan_node *fftw_make_node_hc2hc(int n,
+					    fftw_direction dir,
+					 const fftw_codelet_desc *config,
+					    fftw_plan_node *recurse,
+					    int flags);
+extern fftw_plan_node *fftw_make_node_generic(int n, int size,
+					      fftw_generic_codelet *codelet,
+					      fftw_plan_node *recurse,
+					      int flags);
+extern fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
+					       fftw_direction dir,
+					       fftw_rgeneric_codelet * codelet,
+					       fftw_plan_node *recurse,
+					       int flags);
+extern int fftw_factor(int n);
+extern fftw_plan_node *fftw_make_node(void);
+extern fftw_plan fftw_make_plan(int n, fftw_direction dir,
+				fftw_plan_node *root, int flags,
+				enum fftw_node_type wisdom_type,
+				int wisdom_signature,
+				fftw_recurse_kind recurse_kind,
+				int vector_size);
+extern void fftw_use_plan(fftw_plan p);
+extern void fftw_use_node(fftw_plan_node *p);
+extern void fftw_destroy_plan_internal(fftw_plan p);
+extern fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2);
+extern fftw_plan fftw_lookup(fftw_plan *table, int n, int flags,
+			     int vector_size);
+extern void fftw_insert(fftw_plan *table, fftw_plan this_plan);
+extern void fftw_make_empty_table(fftw_plan *table);
+extern void fftw_destroy_table(fftw_plan *table);
+extern void fftw_complete_twiddle(fftw_plan_node *p, int n);
+
+extern fftw_plan_node *fftw_make_node_rader(int n, int size,
+					    fftw_direction dir,
+					    fftw_plan_node *recurse,
+					    int flags);
+extern fftw_rader_data *fftw_rader_top;
+
+/* undocumented debugging hook */
+typedef void (*fftw_plan_hook_ptr) (fftw_plan plan);
+extern DL_IMPORT(fftw_plan_hook_ptr) fftw_plan_hook;
+extern DL_IMPORT(fftw_plan_hook_ptr) rfftw_plan_hook;
+
+/****************************************************************************/
+/*                          Overflow-safe multiply                          */
+/****************************************************************************/
+
+/* The Rader routines do a lot of operations of the form (x * y) % p, which
+   are vulnerable to overflow problems for large p.  To get around this,
+   we either use "long long" arithmetic (if it is available and double
+   the size of int), or default to a subroutine defined in twiddle.c. */
+
+#if defined(FFTW_ENABLE_UNSAFE_MULMOD)
+#  define MULMOD(x,y,p) (((x) * (y)) % (p))
+#elif defined(LONGLONG_IS_TWOINTS)
+#  define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \
+				% ((long long) (p))))
+#else
+#  define USE_FFTW_SAFE_MULMOD
+#  define MULMOD(x,y,p) fftw_safe_mulmod(x,y,p)
+extern int fftw_safe_mulmod(int x, int y, int p);
+#endif
+
+/****************************************************************************/
+/*                           Floating Point Types                           */
+/****************************************************************************/
+
+/*
+ * We use these definitions to make it easier for people to change
+ * FFTW to use long double and similar types. You shouldn't have to
+ * change this just to use float or double. 
+ */
+
+/*
+ * Change this if your floating-point constants need to be expressed
+ * in a special way.  For example, if fftw_real is long double, you
+ * will need to append L to your fp constants to make them of the
+ * same precision.  Do this by changing "x" below to "x##L". 
+ */
+#define FFTW_KONST(x) ((fftw_real) x)
+
+/*
+ * Ordinarily, we use the standard sin/cos functions to compute trig.
+ * constants.  You'll need to change these if fftw_real has more
+ * than double precision.
+ */
+#define FFTW_TRIG_SIN sin
+#define FFTW_TRIG_COS cos
+typedef double FFTW_TRIG_REAL;	/* the argument type for sin and cos */
+
+#define FFTW_K2PI FFTW_KONST(6.2831853071795864769252867665590057683943388)
+
+/****************************************************************************/
+/*                               gcc/x86 hacks                              */
+/****************************************************************************/
+
+/*
+ * gcc 2.[78].x and x86 specific hacks.  These macros align the stack
+ * pointer so that the double precision temporary variables in the
+ * codelets will be aligned to a multiple of 8 bytes (*way* faster on
+ * pentium and pentiumpro)
+ */
+#ifdef __GNUC__
+#  ifdef __i386__
+#    ifdef FFTW_ENABLE_I386_HACKS
+#      ifndef FFTW_GCC_ALIGNS_STACK
+#      ifndef FFTW_ENABLE_FLOAT
+#        define FFTW_USING_I386_HACKS
+#        define HACK_ALIGN_STACK_EVEN {                                    \
+           if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4);  \
+         }
+
+#        define HACK_ALIGN_STACK_ODD {                                     \
+           if (!(((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); \
+         }
+
+#      endif /* ! FFTW_ENABLE_FLOAT */
+#      endif /* ! FFTW_GCC_ALIGNS_STACK */
+#    endif /* FFTW_ENABLE_I386_HACKS */
+
+#    ifdef FFTW_DEBUG_ALIGNMENT
+#      define ASSERT_ALIGNED_DOUBLE {                                      \
+         double __foo;                                                       \
+         if ((((long) &__foo) & 0x7)) abort();                               \
+       }
+#    endif /* FFTW_DEBUG_ALIGNMENT */
+
+#  endif /* __i386__ */
+#endif /* __GNUC__ */
+
+#ifndef HACK_ALIGN_STACK_EVEN
+#  define HACK_ALIGN_STACK_EVEN {}
+#endif
+#ifndef HACK_ALIGN_STACK_ODD
+#  define HACK_ALIGN_STACK_ODD {}
+#endif
+#ifndef ASSERT_ALIGNED_DOUBLE
+#  define ASSERT_ALIGNED_DOUBLE {}
+#endif
+
+/****************************************************************************/
+/*                                  Timers                                  */
+/****************************************************************************/
+
+/*
+ * Here, you can use all the nice timers available in your machine.
+ */
+
+/*
+ *
+ Things you should define to include your own clock:
+ 
+ fftw_time -- the data type used to store a time
+ 
+ extern fftw_time fftw_get_time(void); 
+ -- a function returning the current time.  (We have
+ implemented this as a macro in most cases.)
+ 
+ extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
+ -- returns the time difference (t1 - t2).
+ If t1 < t2, it may simply return zero (although this
+ is not required).  (We have implemented this as a macro
+ in most cases.)
+ 
+ extern double fftw_time_to_sec(fftw_time t);
+ -- returns the time t expressed in seconds, as a double.
+ (Implemented as a macro in most cases.)
+ 
+ FFTW_TIME_MIN -- a double-precision macro holding the minimum
+ time interval (in seconds) for accurate time measurements.
+ This should probably be at least 100 times the precision of
+ your clock (we use even longer intervals, to be conservative).
+ This will determine how long the planner takes to measure
+ the speeds of different possible plans.
+ 
+ Bracket all of your definitions with an appropriate #ifdef so that
+ they will be enabled on your machine.  If you do add your own
+ high-precision timer code, let us know (at fftw@fftw.org).
+ 
+ Only declarations should go in this file.  Any function definitions
+ that you need should go into timer.c.
+ */
+
+/*
+ * define a symbol so that we know that we have the fftw_time_diff
+ * function/macro (it did not exist prior to FFTW 1.2) 
+ */
+#define FFTW_HAS_TIME_DIFF
+
+/**********************************************
+ *              SOLARIS
+ **********************************************/
+#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T)
+
+/* we use the nanosecond virtual timer */
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+typedef hrtime_t fftw_time;
+
+#define fftw_get_time() gethrtime()
+#define fftw_time_diff(t1,t2) ((t1) - (t2))
+#define fftw_time_to_sec(t) ((double) t / 1.0e9)
+
+/*
+ * a measurement is valid if it runs for at least
+ * FFTW_TIME_MIN seconds.
+ */
+#define FFTW_TIME_MIN (1.0e-4)	/* for Solaris nanosecond timer */
+#define FFTW_TIME_REPEAT 8
+
+/**********************************************
+ *        Pentium time stamp counter
+ **********************************************/
+#elif defined(__GNUC__) && defined(__i386__) && defined(FFTW_ENABLE_PENTIUM_TIMER)
+
+/*
+ * Use internal Pentium register (time stamp counter). Resolution
+ * is 1/FFTW_CYCLES_PER_SEC seconds (e.g. 5 ns for Pentium 200 MHz).
+ * (This code was contributed by Wolfgang Reimer)
+ */
+
+#ifndef FFTW_CYCLES_PER_SEC
+#error "Must define FFTW_CYCLES_PER_SEC in fftw/config.h to use the Pentium cycle counter"
+#endif
+
+typedef unsigned long long fftw_time;
+
+static __inline__ fftw_time read_tsc()
+{
+     fftw_time ret;
+
+     __asm__ __volatile__("rdtsc": "=A" (ret)); 
+     /* no input, nothing else clobbered */
+     return ret;
+}
+
+#define fftw_get_time()  read_tsc()
+#define fftw_time_diff(t1,t2) ((t1) - (t2))
+#define fftw_time_to_sec(t) (((double) (t)) / FFTW_CYCLES_PER_SEC)
+#define FFTW_TIME_MIN (1.0e-4)	/* for Pentium TSC register */
+
+/************* generic systems having gettimeofday ************/
+#elif defined(HAVE_GETTIMEOFDAY) || defined(HAVE_BSDGETTIMEOFDAY)
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#define FFTW_USE_GETTIMEOFDAY
+
+typedef struct timeval fftw_time;
+
+extern fftw_time fftw_gettimeofday_get_time(void);
+extern fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2);
+#define fftw_get_time() fftw_gettimeofday_get_time()
+#define fftw_time_diff(t1, t2) fftw_gettimeofday_time_diff(t1, t2)
+#define fftw_time_to_sec(t) ((double)(t).tv_sec + (double)(t).tv_usec * 1.0E-6)
+
+#ifndef FFTW_TIME_MIN
+/* this should be fine on any system claiming a microsecond timer */
+#define FFTW_TIME_MIN (1.0e-2)
+#endif
+
+/**********************************************
+ *              MACINTOSH
+ **********************************************/
+#elif defined(HAVE_MAC_TIMER)
+
+/*
+ * By default, use the microsecond-timer in the Mac Time Manager.
+ * Alternatively, by changing the following #if 1 to #if 0, you
+ * can use the nanosecond timer available *only* on PCI PowerMacs. 
+ * WARNING: the nanosecond timer was just a little experiment;
+ * I haven't gotten it to work reliably.  Tips/patches are welcome.
+ */
+#ifndef HAVE_MAC_PCI_TIMER	/* use time manager */
+
+/*
+ * Use Macintosh Time Manager routines (maximum resolution is about 20
+ * microseconds). 
+ */
+typedef struct fftw_time_struct {
+     unsigned long hi, lo;
+} fftw_time;
+
+extern fftw_time get_Mac_microseconds(void);
+
+#define fftw_get_time() get_Mac_microseconds()
+
+/* define as a function instead of a macro: */
+extern fftw_time fftw_time_diff(fftw_time t1, fftw_time t2);
+
+#define fftw_time_to_sec(t) ((t).lo * 1.0e-6 + 4294967295.0e-6 * (t).hi)
+
+/* very conservative, since timer should be accurate to 20e-6: */
+/* (although this seems not to be the case in practice) */
+#define FFTW_TIME_MIN (5.0e-2)	/* for MacOS Time Manager timer */
+
+#else				/* use nanosecond timer */
+
+/* Use the nanosecond timer available on PCI PowerMacs. */
+
+#include <DriverServices.h>
+
+typedef AbsoluteTime fftw_time;
+#define fftw_get_time() UpTime()
+#define fftw_time_diff(t1,t2) SubAbsoluteFromAbsolute(t1,t2)
+#define fftw_time_to_sec(t) (AbsoluteToNanoseconds(t).lo * 1.0e-9)
+
+/* Extremely conservative minimum time: */
+/* for MacOS PCI PowerMac nanosecond timer */
+#define FFTW_TIME_MIN (5.0e-3)	
+
+#endif				/* use nanosecond timer */
+
+/**********************************************
+ *              WINDOWS
+ **********************************************/
+#elif defined(HAVE_WIN32_TIMER)
+
+#include <time.h>
+
+typedef unsigned long fftw_time;
+extern unsigned long GetPerfTime(void);
+extern double GetPerfSec(double ticks);
+
+#define fftw_get_time() GetPerfTime()
+#define fftw_time_diff(t1,t2) ((t1) - (t2))
+#define fftw_time_to_sec(t) GetPerfSec(t)
+
+#define FFTW_TIME_MIN (5.0e-2)	/* for Win32 timer */
+
+/**********************************************
+ *              CRAY
+ **********************************************/
+#elif defined(_CRAYMPP)		/* Cray MPP system */
+
+double SECONDR(void);		/* 
+				 * I think you have to link with -lsci to
+				 * get this 
+				 */
+
+typedef double fftw_time;
+#define fftw_get_time() SECONDR()
+#define fftw_time_diff(t1,t2) ((t1) - (t2))
+#define fftw_time_to_sec(t) (t)
+
+#define FFTW_TIME_MIN (1.0e-1)	/* for Cray MPP SECONDR timer */
+
+/**********************************************
+ *          VANILLA UNIX/ISO C SYSTEMS
+ **********************************************/
+/* last resort: use good old Unix clock() */
+#else
+
+#include <time.h>
+
+typedef clock_t fftw_time;
+
+#ifndef CLOCKS_PER_SEC
+#ifdef sun
+/* stupid sunos4 prototypes */
+#define CLOCKS_PER_SEC 1000000
+extern long clock(void);
+#else				/* not sun, we don't know CLOCKS_PER_SEC */
+#error Please define CLOCKS_PER_SEC
+#endif
+#endif
+
+#define fftw_get_time() clock()
+#define fftw_time_diff(t1,t2) ((t1) - (t2))
+#define fftw_time_to_sec(t) (((double) (t)) / CLOCKS_PER_SEC)
+
+/*
+ * ***VERY*** conservative constant: this says that a
+ * measurement must run for 200ms in order to be valid.
+ * You had better check the manual of your machine
+ * to discover if it can do better than this
+ */
+#define FFTW_TIME_MIN (2.0e-1)	/* for default clock() timer */
+
+#endif				/* UNIX clock() */
+
+/* take FFTW_TIME_REPEAT measurements... */
+#ifndef FFTW_TIME_REPEAT
+#define FFTW_TIME_REPEAT 4
+#endif
+
+/* but do not run for more than TIME_LIMIT seconds while measuring one FFT */
+#ifndef FFTW_TIME_LIMIT
+#define FFTW_TIME_LIMIT 2.0
+#endif
+
+#ifdef __cplusplus
+}				/* extern "C" */
+
+#endif				/* __cplusplus */
+
+#endif				/* FFTW_INT_H */
diff --git a/src/fftw/fftw.h b/src/fftw/fftw.h
new file mode 100644
index 0000000..3ec3c49
--- /dev/null
+++ b/src/fftw/fftw.h
@@ -0,0 +1,421 @@
+/* -*- C -*- */
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* fftw.h -- system-wide definitions */
+/* $Id: fftw.h,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#ifndef FFTW_H
+#define FFTW_H
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif				/* __cplusplus */
+
+/* Define for using single precision */
+/*
+ * If you can, use configure --enable-float instead of changing this
+ * flag directly 
+ */
+/* #undef FFTW_ENABLE_FLOAT */
+
+/* our real numbers */
+#ifdef FFTW_ENABLE_FLOAT
+typedef float fftw_real;
+#else
+typedef double fftw_real;
+#endif
+
+/*********************************************
+ * Complex numbers and operations 
+ *********************************************/
+typedef struct {
+     fftw_real re, im;
+} fftw_complex;
+#define c_re(c)  ((c).re)
+#define c_im(c)  ((c).im)
+
+typedef enum {
+     FFTW_FORWARD = -1, FFTW_BACKWARD = 1
+} fftw_direction;
+
+/* backward compatibility with FFTW-1.3 */
+typedef fftw_complex FFTW_COMPLEX;
+typedef fftw_real FFTW_REAL;
+
+#ifndef FFTW_1_0_COMPATIBILITY
+#define FFTW_1_0_COMPATIBILITY 0
+#endif
+
+#if FFTW_1_0_COMPATIBILITY
+/* backward compatibility with FFTW-1.0 */
+#define REAL fftw_real
+#define COMPLEX fftw_complex
+#endif
+
+/*********************************************
+ * Success or failure status
+ *********************************************/
+
+typedef enum {
+     FFTW_SUCCESS = 0, FFTW_FAILURE = -1
+} fftw_status;
+
+/*********************************************
+ *              Codelets
+ *********************************************/
+typedef void (fftw_notw_codelet) 
+     (const fftw_complex *, fftw_complex *, int, int);
+typedef void (fftw_twiddle_codelet)
+     (fftw_complex *, const fftw_complex *, int,
+      int, int);
+typedef void (fftw_generic_codelet) 
+     (fftw_complex *, const fftw_complex *, int,
+      int, int, int);
+typedef void (fftw_real2hc_codelet)
+     (const fftw_real *, fftw_real *, fftw_real *,
+      int, int, int);
+typedef void (fftw_hc2real_codelet)
+     (const fftw_real *, const fftw_real *,
+      fftw_real *, int, int, int);
+typedef void (fftw_hc2hc_codelet)
+     (fftw_real *, const fftw_complex *,
+      int, int, int);
+typedef void (fftw_rgeneric_codelet)
+     (fftw_real *, const fftw_complex *, int,
+      int, int, int);
+
+/*********************************************
+ *     Configurations
+ *********************************************/
+/*
+ * A configuration is a database of all known codelets
+ */
+
+enum fftw_node_type {
+     FFTW_NOTW, FFTW_TWIDDLE, FFTW_GENERIC, FFTW_RADER,
+     FFTW_REAL2HC, FFTW_HC2REAL, FFTW_HC2HC, FFTW_RGENERIC
+};
+
+/* description of a codelet */
+typedef struct {
+     const char *name;		/* name of the codelet */
+     void (*codelet) ();	/* pointer to the codelet itself */
+     int size;			/* size of the codelet */
+     fftw_direction dir;	/* direction */
+     enum fftw_node_type type;	/* TWIDDLE or NO_TWIDDLE */
+     int signature;		/* unique id */
+     int ntwiddle;		/* number of twiddle factors */
+     const int *twiddle_order;	/* 
+				 * array that determines the order 
+				 * in which the codelet expects
+				 * the twiddle factors
+				 */
+} fftw_codelet_desc;
+
+/* On Win32, you need to do funny things to access global variables
+   in shared libraries.  Thanks to Andrew Sterian for this hack. */
+#ifdef HAVE_WIN32
+#  if defined(BUILD_FFTW_DLL)
+#    define DL_IMPORT(type) __declspec(dllexport) type
+#  elif defined(USE_FFTW_DLL)
+#    define DL_IMPORT(type) __declspec(dllimport) type
+#  else
+#    define DL_IMPORT(type) type
+#  endif
+#else
+#  define DL_IMPORT(type) type
+#endif
+
+extern DL_IMPORT(const char *) fftw_version;
+
+/*****************************
+ *        Plans
+ *****************************/
+/*
+ * A plan is a sequence of reductions to compute a FFT of
+ * a given size.  At each step, the FFT algorithm can:
+ *
+ * 1) apply a notw codelet, or
+ * 2) recurse and apply a twiddle codelet, or
+ * 3) apply the generic codelet.
+ */
+
+/* structure that contains twiddle factors */
+typedef struct fftw_twiddle_struct {
+     int n;
+     const fftw_codelet_desc *cdesc;
+     fftw_complex *twarray;
+     struct fftw_twiddle_struct *next;
+     int refcnt;
+} fftw_twiddle;
+
+typedef struct fftw_rader_data_struct {
+     struct fftw_plan_struct *plan;
+     fftw_complex *omega;
+     int g, ginv;
+     int p, flags, refcount;
+     struct fftw_rader_data_struct *next;
+     fftw_codelet_desc *cdesc;
+} fftw_rader_data;
+
+typedef void (fftw_rader_codelet) 
+     (fftw_complex *, const fftw_complex *, int,
+      int, int, fftw_rader_data *);
+
+/* structure that holds all the data needed for a given step */
+typedef struct fftw_plan_node_struct {
+     enum fftw_node_type type;
+
+     union {
+	  /* nodes of type FFTW_NOTW */
+	  struct {
+	       int size;
+	       fftw_notw_codelet *codelet;
+	       const fftw_codelet_desc *codelet_desc;
+	  } notw;
+
+	  /* nodes of type FFTW_TWIDDLE */
+	  struct {
+	       int size;
+	       fftw_twiddle_codelet *codelet;
+	       fftw_twiddle *tw;
+	       struct fftw_plan_node_struct *recurse;
+	       const fftw_codelet_desc *codelet_desc;
+	  } twiddle;
+
+	  /* nodes of type FFTW_GENERIC */
+	  struct {
+	       int size;
+	       fftw_generic_codelet *codelet;
+	       fftw_twiddle *tw;
+	       struct fftw_plan_node_struct *recurse;
+	  } generic;
+
+	  /* nodes of type FFTW_RADER */
+	  struct {
+	       int size;
+	       fftw_rader_codelet *codelet;
+	       fftw_rader_data *rader_data;
+	       fftw_twiddle *tw;
+	       struct fftw_plan_node_struct *recurse;
+	  } rader;
+
+	  /* nodes of type FFTW_REAL2HC */
+	  struct {
+	       int size;
+	       fftw_real2hc_codelet *codelet;
+	       const fftw_codelet_desc *codelet_desc;
+	  } real2hc;
+
+	  /* nodes of type FFTW_HC2REAL */
+	  struct {
+	       int size;
+	       fftw_hc2real_codelet *codelet;
+	       const fftw_codelet_desc *codelet_desc;
+	  } hc2real;
+
+	  /* nodes of type FFTW_HC2HC */
+	  struct {
+	       int size;
+	       fftw_direction dir;
+	       fftw_hc2hc_codelet *codelet;
+	       fftw_twiddle *tw;
+	       struct fftw_plan_node_struct *recurse;
+	       const fftw_codelet_desc *codelet_desc;
+	  } hc2hc;
+
+	  /* nodes of type FFTW_RGENERIC */
+	  struct {
+	       int size;
+	       fftw_direction dir;
+	       fftw_rgeneric_codelet *codelet;
+	       fftw_twiddle *tw;
+	       struct fftw_plan_node_struct *recurse;
+	  } rgeneric;
+     } nodeu;
+
+     int refcnt;
+} fftw_plan_node;
+
+typedef enum {
+     FFTW_NORMAL_RECURSE = 0,
+     FFTW_VECTOR_RECURSE = 1
+} fftw_recurse_kind;
+
+struct fftw_plan_struct {
+     int n;
+     int refcnt;
+     fftw_direction dir;
+     int flags;
+     int wisdom_signature;
+     enum fftw_node_type wisdom_type;
+     struct fftw_plan_struct *next;
+     fftw_plan_node *root;
+     double cost;
+     fftw_recurse_kind recurse_kind;
+     int vector_size;
+};
+
+typedef struct fftw_plan_struct *fftw_plan;
+
+/* flags for the planner */
+#define  FFTW_ESTIMATE (0)
+#define  FFTW_MEASURE  (1)
+
+#define FFTW_OUT_OF_PLACE (0)
+#define FFTW_IN_PLACE (8)
+#define FFTW_USE_WISDOM (16)
+
+#define FFTW_THREADSAFE (128)  /* guarantee plan is read-only so that the
+				  same plan can be used in parallel by
+				  multiple threads */
+
+#define FFTWND_FORCE_BUFFERED (256)     /* internal flag, forces buffering
+                                           in fftwnd transforms */
+
+#define FFTW_NO_VECTOR_RECURSE (512)    /* internal flag, prevents use
+                                           of vector recursion */
+
+extern fftw_plan fftw_create_plan_specific(int n, fftw_direction dir,
+					   int flags,
+					   fftw_complex *in, int istride,
+					 fftw_complex *out, int ostride);
+#define FFTW_HAS_PLAN_SPECIFIC
+extern fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags);
+extern void fftw_print_plan(fftw_plan plan);
+extern void fftw_destroy_plan(fftw_plan plan);
+extern void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride,
+		 int idist, fftw_complex *out, int ostride, int odist);
+extern void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out);
+extern void fftw_die(const char *s);
+extern void *fftw_malloc(size_t n);
+extern void fftw_free(void *p);
+extern void fftw_check_memory_leaks(void);
+extern void fftw_print_max_memory_usage(void);
+
+typedef void *(*fftw_malloc_type_function) (size_t n);
+typedef void  (*fftw_free_type_function) (void *p);
+typedef void  (*fftw_die_type_function) (const char *errString);
+extern DL_IMPORT(fftw_malloc_type_function) fftw_malloc_hook;
+extern DL_IMPORT(fftw_free_type_function) fftw_free_hook;
+extern DL_IMPORT(fftw_die_type_function) fftw_die_hook;
+
+extern size_t fftw_sizeof_fftw_real(void);
+
+/* Wisdom: */
+/*
+ * define this symbol so that users know we are using a version of FFTW
+ * with wisdom
+ */
+#define FFTW_HAS_WISDOM
+extern void fftw_forget_wisdom(void);
+extern void fftw_export_wisdom(void (*emitter) (char c, void *), void *data);
+extern fftw_status fftw_import_wisdom(int (*g) (void *), void *data);
+extern void fftw_export_wisdom_to_file(FILE *output_file);
+extern fftw_status fftw_import_wisdom_from_file(FILE *input_file);
+extern char *fftw_export_wisdom_to_string(void);
+extern fftw_status fftw_import_wisdom_from_string(const char *input_string);
+
+/*
+ * define symbol so we know this function is available (it is not in
+ * older FFTWs)
+ */
+#define FFTW_HAS_FPRINT_PLAN
+extern void fftw_fprint_plan(FILE *f, fftw_plan plan);
+
+/*****************************
+ *    N-dimensional code
+ *****************************/
+typedef struct {
+     int is_in_place;		/* 1 if for in-place FFTs, 0 otherwise */
+
+     int rank;			/* 
+				 * the rank (number of dimensions) of the
+				 * array to be FFTed 
+				 */
+     int *n;			/*
+				 * the dimensions of the array to the
+				 * FFTed 
+				 */
+     fftw_direction dir;
+
+     int *n_before;		/*
+				 * n_before[i] = product of n[j] for j < i 
+				 */
+     int *n_after;		/* n_after[i] = product of n[j] for j > i */
+
+     fftw_plan *plans;		/* 1d fftw plans for each dimension */
+
+     int nbuffers, nwork;
+     fftw_complex *work;	/* 
+				 * work array big enough to hold
+				 * nbuffers+1 of the largest dimension 
+				 * (has nwork elements)
+				 */
+} fftwnd_data;
+
+typedef fftwnd_data *fftwnd_plan;
+
+/* Initializing the FFTWND plan: */
+extern fftwnd_plan fftw2d_create_plan(int nx, int ny, fftw_direction dir,
+				      int flags);
+extern fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz,
+				      fftw_direction dir, int flags);
+extern fftwnd_plan fftwnd_create_plan(int rank, const int *n,
+				      fftw_direction dir,
+				      int flags);
+
+extern fftwnd_plan fftw2d_create_plan_specific(int nx, int ny,
+					       fftw_direction dir,
+					       int flags,
+					   fftw_complex *in, int istride,
+					 fftw_complex *out, int ostride);
+extern fftwnd_plan fftw3d_create_plan_specific(int nx, int ny, int nz,
+					   fftw_direction dir, int flags,
+					   fftw_complex *in, int istride,
+					 fftw_complex *out, int ostride);
+extern fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n,
+					       fftw_direction dir,
+					       int flags,
+					   fftw_complex *in, int istride,
+					 fftw_complex *out, int ostride);
+
+/* Freeing the FFTWND plan: */
+extern void fftwnd_destroy_plan(fftwnd_plan plan);
+
+/* Printing the plan: */
+extern void fftwnd_fprint_plan(FILE *f, fftwnd_plan p);
+extern void fftwnd_print_plan(fftwnd_plan p);
+#define FFTWND_HAS_PRINT_PLAN
+
+/* Computing the N-Dimensional FFT */
+extern void fftwnd(fftwnd_plan plan, int howmany,
+		   fftw_complex *in, int istride, int idist,
+		   fftw_complex *out, int ostride, int odist);
+extern void fftwnd_one(fftwnd_plan p, fftw_complex *in, fftw_complex *out);
+
+#ifdef __cplusplus
+}                               /* extern "C" */
+
+#endif				/* __cplusplus */
+#endif				/* FFTW_H */
diff --git a/src/fftw/fftwnd.c b/src/fftw/fftwnd.c
new file mode 100644
index 0000000..57354b0
--- /dev/null
+++ b/src/fftw/fftwnd.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: fftwnd.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#include "fftw-int.h"
+
+/* the number of buffers to use for buffered transforms: */
+#define FFTWND_NBUFFERS 8
+
+/* the default number of buffers to use: */
+#define FFTWND_DEFAULT_NBUFFERS 0
+
+/* the number of "padding" elements between consecutive buffer lines */
+#define FFTWND_BUFFER_PADDING 8
+
+static void destroy_plan_array(int rank, fftw_plan *plans);
+
+static void init_test_array(fftw_complex *arr, int stride, int n)
+{
+     int j;
+
+     for (j = 0; j < n; ++j) {
+	  c_re(arr[stride * j]) = 0.0;
+	  c_im(arr[stride * j]) = 0.0;
+     }
+}
+
+/*
+ * Same as fftw_measure_runtime, except for fftwnd plan.
+ */
+double fftwnd_measure_runtime(fftwnd_plan plan,
+			      fftw_complex *in, int istride,
+			      fftw_complex *out, int ostride)
+{
+     fftw_time begin, end, start;
+     double t, tmax, tmin;
+     int i, iter;
+     int n;
+     int repeat;
+
+     if (plan->rank == 0)
+	  return 0.0;
+
+     n = 1;
+     for (i = 0; i < plan->rank; ++i)
+	  n *= plan->n[i];
+
+     iter = 1;
+
+     for (;;) {
+	  tmin = 1.0E10;
+	  tmax = -1.0E10;
+	  init_test_array(in, istride, n);
+
+	  start = fftw_get_time();
+	  /* repeat the measurement FFTW_TIME_REPEAT times */
+	  for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) {
+	       begin = fftw_get_time();
+	       for (i = 0; i < iter; ++i) {
+		    fftwnd(plan, 1, in, istride, 0, out, ostride, 0);
+	       }
+	       end = fftw_get_time();
+
+	       t = fftw_time_to_sec(fftw_time_diff(end, begin));
+	       if (t < tmin)
+		    tmin = t;
+	       if (t > tmax)
+		    tmax = t;
+
+	       /* do not run for too long */
+	       t = fftw_time_to_sec(fftw_time_diff(end, start));
+	       if (t > FFTW_TIME_LIMIT)
+		    break;
+	  }
+
+	  if (tmin >= FFTW_TIME_MIN)
+	       break;
+
+	  iter *= 2;
+     }
+
+     tmin /= (double) iter;
+     tmax /= (double) iter;
+
+     return tmin;
+}
+
+/********************** Initializing the FFTWND Plan ***********************/
+
+/* Initialize everything except for the 1D plans and the work array: */
+fftwnd_plan fftwnd_create_plan_aux(int rank, const int *n,
+				   fftw_direction dir, int flags)
+{
+     int i;
+     fftwnd_plan p;
+
+     if (rank < 0)
+	  return 0;
+
+     for (i = 0; i < rank; ++i)
+	  if (n[i] <= 0)
+	       return 0;
+
+     p = (fftwnd_plan) fftw_malloc(sizeof(fftwnd_data));
+     p->n = 0;
+     p->n_before = 0;
+     p->n_after = 0;
+     p->plans = 0;
+     p->work = 0;
+     p->dir = dir;
+
+     p->rank = rank;
+     p->is_in_place = flags & FFTW_IN_PLACE;
+
+     p->nwork = 0;
+     p->nbuffers = 0;
+
+     if (rank == 0)
+	  return 0;
+
+     p->n = (int *) fftw_malloc(sizeof(int) * rank);
+     p->n_before = (int *) fftw_malloc(sizeof(int) * rank);
+     p->n_after = (int *) fftw_malloc(sizeof(int) * rank);
+     p->n_before[0] = 1;
+     p->n_after[rank - 1] = 1;
+
+     for (i = 0; i < rank; ++i) {
+	  p->n[i] = n[i];
+
+	  if (i) {
+	       p->n_before[i] = p->n_before[i - 1] * n[i - 1];
+	       p->n_after[rank - 1 - i] = p->n_after[rank - i] * n[rank - i];
+	  }
+     }
+
+     return p;
+}
+
+/* create an empty new array of rank 1d plans */
+fftw_plan *fftwnd_new_plan_array(int rank)
+{
+     fftw_plan *plans;
+     int i;
+
+     plans = (fftw_plan *) fftw_malloc(rank * sizeof(fftw_plan));
+     if (!plans)
+	  return 0;
+     for (i = 0; i < rank; ++i)
+	  plans[i] = 0;
+     return plans;
+}
+
+/* 
+ * create an array of plans using the ordinary 1d fftw_create_plan,
+ * which allocates its own array and creates plans optimized for
+ * contiguous data. 
+ */
+fftw_plan *fftwnd_create_plans_generic(fftw_plan *plans,
+				       int rank, const int *n,
+				       fftw_direction dir, int flags)
+{
+     if (rank <= 0)
+	  return 0;
+
+     if (plans) {
+	  int i, j;
+	  int cur_flags;
+
+	  for (i = 0; i < rank; ++i) {
+	       if (i < rank - 1 || (flags & FFTW_IN_PLACE)) {
+		    /* 
+		     * fft's except the last dimension are always in-place 
+		     */
+		    cur_flags = flags | FFTW_IN_PLACE;
+		    for (j = i - 1; j >= 0 && n[i] != n[j]; --j);
+	       } else {
+		    cur_flags = flags;
+		    /* 
+		     * we must create a separate plan for the last
+		     * dimension 
+		     */
+		    j = -1;
+	       }
+
+	       if (j >= 0) {
+		    /* 
+		     * If a plan already exists for this size
+		     * array, reuse it: 
+		     */
+		    plans[i] = plans[j];
+	       } else {
+		    /* generate a new plan: */
+		    plans[i] = fftw_create_plan(n[i], dir, cur_flags);
+		    if (!plans[i]) {
+			 destroy_plan_array(rank, plans);
+			 return 0;
+		    }
+	       }
+	  }
+     }
+     return plans;
+}
+
+static int get_maxdim(int rank, const int *n, int flags)
+{
+     int i;
+     int maxdim = 0;
+
+     for (i = 0; i < rank - 1; ++i)
+	  if (n[i] > maxdim)
+	       maxdim = n[i];
+     if (rank > 0 && flags & FFTW_IN_PLACE && n[rank - 1] > maxdim)
+	  maxdim = n[rank - 1];
+
+     return maxdim;
+}
+
+/* compute number of elements required for work array (has to
+   be big enough to hold ncopies of the largest dimension in
+   n that will need an in-place transform. */
+int fftwnd_work_size(int rank, const int *n, int flags, int ncopies)
+{
+     return (ncopies * get_maxdim(rank, n, flags)
+	     + (ncopies - 1) * FFTWND_BUFFER_PADDING);
+}
+
+/*
+ * create plans using the fftw_create_plan_specific planner, which
+ * allows us to create plans for each dimension that are specialized
+ * for the strides that we are going to use. 
+ */
+fftw_plan *fftwnd_create_plans_specific(fftw_plan *plans,
+					int rank, const int *n,
+					const int *n_after,
+					fftw_direction dir, int flags,
+					fftw_complex *in, int istride,
+					fftw_complex *out, int ostride)
+{
+     if (rank <= 0)
+	  return 0;
+
+     if (plans) {
+	  int i, stride, cur_flags;
+	  fftw_complex *work = 0;
+	  int nwork;
+
+	  nwork = fftwnd_work_size(rank, n, flags, 1);
+	  if (nwork)
+	       work = (fftw_complex*)fftw_malloc(nwork * sizeof(fftw_complex));
+
+	  for (i = 0; i < rank; ++i) {
+	       /* fft's except the last dimension are always in-place */
+	       if (i < rank - 1)
+		    cur_flags = flags | FFTW_IN_PLACE;
+	       else
+		    cur_flags = flags;
+
+	       /* stride for transforming ith dimension */
+	       stride = n_after[i];
+
+	       if (cur_flags & FFTW_IN_PLACE)
+		    plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags,
+						    in, istride * stride,
+							 work, 1);
+	       else
+		    plans[i] = fftw_create_plan_specific(n[i], dir, cur_flags,
+						    in, istride * stride,
+						  out, ostride * stride);
+	       if (!plans[i]) {
+		    destroy_plan_array(rank, plans);
+		    fftw_free(work);
+		    return 0;
+	       }
+	  }
+
+	  if (work)
+	       fftw_free(work);
+     }
+     return plans;
+}
+
+/*
+ * Create an fftwnd_plan specialized for specific arrays.  (These
+ * arrays are ignored, however, if they are NULL or if the flags do
+ * not include FFTW_MEASURE.)  The main advantage of being provided
+ * arrays like this is that we can do runtime timing measurements of
+ * our options, without worrying about allocating excessive scratch
+ * space.
+ */
+fftwnd_plan fftwnd_create_plan_specific(int rank, const int *n,
+					fftw_direction dir, int flags,
+					fftw_complex *in, int istride,
+					fftw_complex *out, int ostride)
+{
+     fftwnd_plan p;
+
+     if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
+	  return 0;
+
+     if (!(flags & FFTW_MEASURE) || in == 0
+	 || (!p->is_in_place && out == 0)) {
+
+/**** use default plan ****/
+
+	  p->plans = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank),
+						 rank, n, dir, flags);
+	  if (!p->plans) {
+	       fftwnd_destroy_plan(p);
+	       return 0;
+	  }
+	  if (flags & FFTWND_FORCE_BUFFERED)
+	       p->nbuffers = FFTWND_NBUFFERS;
+	  else
+	       p->nbuffers = FFTWND_DEFAULT_NBUFFERS;
+
+	  p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+	  if (p->nwork && !(flags & FFTW_THREADSAFE)) {
+	       p->work = (fftw_complex*) fftw_malloc(p->nwork 
+						     * sizeof(fftw_complex));
+	       if (!p->work) {
+		    fftwnd_destroy_plan(p);
+		    return 0;
+	       }
+	  }
+     } else {
+/**** use runtime measurements to pick plan ****/
+
+	  fftw_plan *plans_buf, *plans_nobuf;
+	  double t_buf, t_nobuf;
+
+	  p->nwork = fftwnd_work_size(rank, n, flags, FFTWND_NBUFFERS + 1);
+	  if (p->nwork && !(flags & FFTW_THREADSAFE)) {
+	       p->work = (fftw_complex*) fftw_malloc(p->nwork 
+						     * sizeof(fftw_complex));
+	       if (!p->work) {
+		    fftwnd_destroy_plan(p);
+		    return 0;
+	       }
+	  }
+	  else
+	       p->work = (fftw_complex*) NULL;
+
+	  /* two possible sets of 1D plans: */
+	  plans_buf = fftwnd_create_plans_generic(fftwnd_new_plan_array(rank),
+						  rank, n, dir, flags);
+	  plans_nobuf = 
+	       fftwnd_create_plans_specific(fftwnd_new_plan_array(rank),
+					    rank, n, p->n_after, dir,
+					    flags, in, istride,
+					    out, ostride);
+	  if (!plans_buf || !plans_nobuf) {
+	       destroy_plan_array(rank, plans_nobuf);
+	       destroy_plan_array(rank, plans_buf);
+	       fftwnd_destroy_plan(p);
+	       return 0;
+	  }
+	  /* time the two possible plans */
+	  p->plans = plans_nobuf;
+	  p->nbuffers = 0;
+	  p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+	  t_nobuf = fftwnd_measure_runtime(p, in, istride, out, ostride);
+	  p->plans = plans_buf;
+	  p->nbuffers = FFTWND_NBUFFERS;
+	  p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+	  t_buf = fftwnd_measure_runtime(p, in, istride, out, ostride);
+
+	  /* pick the better one: */
+	  if (t_nobuf < t_buf) {	/* use unbuffered transform */
+	       p->plans = plans_nobuf;
+	       p->nbuffers = 0;
+
+	       /* work array is unnecessarily large */
+	       if (p->work)
+		    fftw_free(p->work);
+	       p->work = 0;
+
+	       destroy_plan_array(rank, plans_buf);
+
+	       /* allocate a work array of the correct size: */
+	       p->nwork = fftwnd_work_size(rank, n, flags, p->nbuffers + 1);
+	       if (p->nwork && !(flags & FFTW_THREADSAFE)) {
+		    p->work = (fftw_complex*) fftw_malloc(p->nwork 
+						       * sizeof(fftw_complex));
+		    if (!p->work) {
+			 fftwnd_destroy_plan(p);
+			 return 0;
+		    }
+	       }
+	  } else {		/* use buffered transform */
+	       destroy_plan_array(rank, plans_nobuf);
+	  }
+     }
+
+     return p;
+}
+
+fftwnd_plan fftw2d_create_plan_specific(int nx, int ny,
+					fftw_direction dir, int flags,
+					fftw_complex *in, int istride,
+					fftw_complex *out, int ostride)
+{
+     int n[2];
+
+     n[0] = nx;
+     n[1] = ny;
+
+     return fftwnd_create_plan_specific(2, n, dir, flags,
+					in, istride, out, ostride);
+}
+
+fftwnd_plan fftw3d_create_plan_specific(int nx, int ny, int nz,
+					fftw_direction dir, int flags,
+					fftw_complex *in, int istride,
+					fftw_complex *out, int ostride)
+{
+     int n[3];
+
+     n[0] = nx;
+     n[1] = ny;
+     n[2] = nz;
+
+     return fftwnd_create_plan_specific(3, n, dir, flags,
+					in, istride, out, ostride);
+}
+
+/* Create a generic fftwnd plan: */
+
+fftwnd_plan fftwnd_create_plan(int rank, const int *n,
+			       fftw_direction dir, int flags)
+{
+     return fftwnd_create_plan_specific(rank, n, dir, flags, 0, 1, 0, 1);
+}
+
+fftwnd_plan fftw2d_create_plan(int nx, int ny,
+			       fftw_direction dir, int flags)
+{
+     return fftw2d_create_plan_specific(nx, ny, dir, flags, 0, 1, 0, 1);
+}
+
+fftwnd_plan fftw3d_create_plan(int nx, int ny, int nz,
+			       fftw_direction dir, int flags)
+{
+     return fftw3d_create_plan_specific(nx, ny, nz, dir, flags, 0, 1, 0, 1);
+}
+
+/************************ Freeing the FFTWND Plan ************************/
+
+static void destroy_plan_array(int rank, fftw_plan *plans)
+{
+     if (plans) {
+	  int i, j;
+
+	  for (i = 0; i < rank; ++i) {
+	       for (j = i - 1;
+		    j >= 0 && plans[i] != plans[j];
+		    --j);
+	       if (j < 0 && plans[i])
+		    fftw_destroy_plan(plans[i]);
+	  }
+	  fftw_free(plans);
+     }
+}
+
+void fftwnd_destroy_plan(fftwnd_plan plan)
+{
+     if (plan) {
+	  destroy_plan_array(plan->rank, plan->plans);
+
+	  if (plan->n)
+	       fftw_free(plan->n);
+
+	  if (plan->n_before)
+	       fftw_free(plan->n_before);
+
+	  if (plan->n_after)
+	       fftw_free(plan->n_after);
+
+	  if (plan->work)
+	       fftw_free(plan->work);
+
+	  fftw_free(plan);
+     }
+}
+
+/************************ Printing the FFTWND Plan ************************/
+
+void fftwnd_fprint_plan(FILE *f, fftwnd_plan plan)
+{
+     if (plan) {
+	  int i, j;
+
+	  if (plan->rank == 0) {
+	       fprintf(f, "plan for rank 0 (null) transform.\n");
+	       return;
+	  }
+	  fprintf(f, "plan for ");
+	  for (i = 0; i < plan->rank; ++i)
+	       fprintf(f, "%s%d", i ? "x" : "", plan->n[i]);
+	  fprintf(f, " transform:\n");
+
+	  if (plan->nbuffers > 0)
+	       fprintf(f, "  -- using buffered transforms (%d buffers)\n",
+		       plan->nbuffers);
+	  else
+	       fprintf(f, "  -- using unbuffered transform\n");
+
+	  for (i = 0; i < plan->rank; ++i) {
+	       fprintf(f, "* dimension %d (size %d) ", i, plan->n[i]);
+
+	       for (j = i - 1; j >= 0; --j)
+		    if (plan->plans[j] == plan->plans[i])
+			 break;
+
+	       if (j < 0)
+		    fftw_fprint_plan(f, plan->plans[i]);
+	       else
+		    fprintf(f, "plan is same as dimension %d plan.\n", j);
+	  }
+     }
+}
+
+void fftwnd_print_plan(fftwnd_plan plan)
+{
+     fftwnd_fprint_plan(stdout, plan);
+}
+
+/********************* Buffered FFTW (in-place) *********************/
+
+void fftw_buffered(fftw_plan p, int howmany,
+		   fftw_complex *in, int istride, int idist,
+		   fftw_complex *work,
+		   int nbuffers, fftw_complex *buffers)
+{
+     int i = 0, n, nb;
+
+     n = p->n;
+     nb = n + FFTWND_BUFFER_PADDING;
+
+     do {
+	  for (; i <= howmany - nbuffers; i += nbuffers) {
+	       fftw_complex *cur_in = in + i * idist;
+	       int j, buf;
+
+	       /* 
+	        * First, copy nbuffers strided arrays to the
+	        * contiguous buffer arrays (reading consecutive
+	        * locations, assuming that idist is 1):
+	        */
+	       for (j = 0; j < n; ++j) {
+		    fftw_complex *cur_in2 = cur_in + j * istride;
+		    fftw_complex *cur_buffers = buffers + j;
+
+		    for (buf = 0; buf <= nbuffers - 4; buf += 4) {
+			 fftw_real r0, i0, r1, i1, r2, i2, r3, i3;
+			 r0 = c_re(cur_in2[0]);
+			 i0 = c_im(cur_in2[0]);
+			 r1 = c_re(cur_in2[idist]);
+			 i1 = c_im(cur_in2[idist]);
+			 r2 = c_re(cur_in2[2 * idist]);
+			 i2 = c_im(cur_in2[2 * idist]);
+			 r3 = c_re(cur_in2[3 * idist]);
+			 i3 = c_im(cur_in2[3 * idist]);
+			 c_re(cur_buffers[0]) = r0;
+			 c_im(cur_buffers[0]) = i0;
+			 c_re(cur_buffers[nb]) = r1;
+			 c_im(cur_buffers[nb]) = i1;
+			 c_re(cur_buffers[2 * nb]) = r2;
+			 c_im(cur_buffers[2 * nb]) = i2;
+			 c_re(cur_buffers[3 * nb]) = r3;
+			 c_im(cur_buffers[3 * nb]) = i3;
+			 cur_buffers += 4 * nb;
+			 cur_in2 += 4 * idist;
+		    }
+		    for (; buf < nbuffers; ++buf) {
+			 *cur_buffers = *cur_in2;
+			 cur_buffers += nb;
+			 cur_in2 += idist;
+		    }
+	       }
+
+	       /* 
+	        * Now, compute the FFTs in the buffers (in-place
+	        * using work): 
+	        */
+	       fftw(p, nbuffers, buffers, 1, nb, work, 1, 0);
+
+	       /* 
+	        * Finally, copy the results back from the contiguous
+	        * buffers to the strided arrays (writing consecutive
+	        * locations):
+	        */
+	       for (j = 0; j < n; ++j) {
+		    fftw_complex *cur_in2 = cur_in + j * istride;
+		    fftw_complex *cur_buffers = buffers + j;
+
+		    for (buf = 0; buf <= nbuffers - 4; buf += 4) {
+			 fftw_real r0, i0, r1, i1, r2, i2, r3, i3;
+			 r0 = c_re(cur_buffers[0]);
+			 i0 = c_im(cur_buffers[0]);
+			 r1 = c_re(cur_buffers[nb]);
+			 i1 = c_im(cur_buffers[nb]);
+			 r2 = c_re(cur_buffers[2 * nb]);
+			 i2 = c_im(cur_buffers[2 * nb]);
+			 r3 = c_re(cur_buffers[3 * nb]);
+			 i3 = c_im(cur_buffers[3 * nb]);
+			 c_re(cur_in2[0]) = r0;
+			 c_im(cur_in2[0]) = i0;
+			 c_re(cur_in2[idist]) = r1;
+			 c_im(cur_in2[idist]) = i1;
+			 c_re(cur_in2[2 * idist]) = r2;
+			 c_im(cur_in2[2 * idist]) = i2;
+			 c_re(cur_in2[3 * idist]) = r3;
+			 c_im(cur_in2[3 * idist]) = i3;
+			 cur_buffers += 4 * nb;
+			 cur_in2 += 4 * idist;
+		    }
+		    for (; buf < nbuffers; ++buf) {
+			 *cur_in2 = *cur_buffers;
+			 cur_buffers += nb;
+			 cur_in2 += idist;
+		    }
+	       }
+	  }
+
+	  /* 
+	   * we skip howmany % nbuffers ffts at the end of the loop,
+	   * so we have to go back and do them: 
+	   */
+	  nbuffers = howmany - i;
+     } while (i < howmany);
+}
+
+/********************* Computing the N-Dimensional FFT *********************/
+
+void fftwnd_aux(fftwnd_plan p, int cur_dim,
+		fftw_complex *in, int istride,
+		fftw_complex *out, int ostride,
+		fftw_complex *work)
+{
+     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
+
+     if (cur_dim == p->rank - 2) {
+	  /* just do the last dimension directly: */
+	  if (p->is_in_place)
+	       fftw(p->plans[p->rank - 1], n,
+		    in, istride, n_after * istride,
+		    work, 1, 0);
+	  else
+	       fftw(p->plans[p->rank - 1], n,
+		    in, istride, n_after * istride,
+		    out, ostride, n_after * ostride);
+     } else {			/* we have at least two dimensions to go */
+	  int i;
+
+	  /* 
+	   * process the subsequent dimensions recursively, in hyperslabs,
+	   * to get maximum locality: 
+	   */
+	  for (i = 0; i < n; ++i)
+	       fftwnd_aux(p, cur_dim + 1,
+			  in + i * n_after * istride, istride,
+			  out + i * n_after * ostride, ostride, work);
+     }
+
+     /* do the current dimension (in-place): */
+     if (p->nbuffers == 0) {
+	  fftw(p->plans[cur_dim], n_after,
+	       out, n_after * ostride, ostride,
+	       work, 1, 0);
+     } else			/* using contiguous copy buffers: */
+	  fftw_buffered(p->plans[cur_dim], n_after,
+			out, n_after * ostride, ostride,
+			work, p->nbuffers, work + n);
+}
+
+/*
+ * alternate version of fftwnd_aux -- this version pushes the howmany
+ * loop down to the leaves of the computation, for greater locality in
+ * cases where dist < stride
+ */
+void fftwnd_aux_howmany(fftwnd_plan p, int cur_dim,
+			int howmany,
+			fftw_complex *in, int istride, int idist,
+			fftw_complex *out, int ostride, int odist,
+			fftw_complex *work)
+{
+     int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
+     int k;
+
+     if (cur_dim == p->rank - 2) {
+	  /* just do the last dimension directly: */
+	  if (p->is_in_place)
+	       for (k = 0; k < n; ++k)
+		    fftw(p->plans[p->rank - 1], howmany,
+			 in + k * n_after * istride, istride, idist,
+			 work, 1, 0);
+	  else
+	       for (k = 0; k < n; ++k)
+		    fftw(p->plans[p->rank - 1], howmany,
+			 in + k * n_after * istride, istride, idist,
+			 out + k * n_after * ostride, ostride, odist);
+     } else {			/* we have at least two dimensions to go */
+	  int i;
+
+	  /* 
+	   * process the subsequent dimensions recursively, in
+	   * hyperslabs, to get maximum locality:
+	   */
+	  for (i = 0; i < n; ++i)
+	       fftwnd_aux_howmany(p, cur_dim + 1, howmany,
+			      in + i * n_after * istride, istride, idist,
+				  out + i * n_after * ostride, ostride, odist,
+				  work);
+     }
+
+     /* do the current dimension (in-place): */
+     if (p->nbuffers == 0)
+	  for (k = 0; k < n_after; ++k)
+	       fftw(p->plans[cur_dim], howmany,
+		    out + k * ostride, n_after * ostride, odist,
+		    work, 1, 0);
+     else			/* using contiguous copy buffers: */
+	  for (k = 0; k < n_after; ++k)
+	       fftw_buffered(p->plans[cur_dim], howmany,
+			     out + k * ostride, n_after * ostride, odist,
+			     work, p->nbuffers, work + n);
+}
+
+void fftwnd(fftwnd_plan p, int howmany,
+	    fftw_complex *in, int istride, int idist,
+	    fftw_complex *out, int ostride, int odist)
+{
+     fftw_complex *work;
+
+#ifdef FFTW_DEBUG
+     if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
+	 && p->nwork && p->work)
+	  fftw_die("bug with FFTW_THREADSAFE flag\n");
+#endif
+
+     if (p->nwork && !p->work)
+	  work = (fftw_complex *) fftw_malloc(p->nwork * sizeof(fftw_complex));
+     else
+	  work = p->work;
+
+     switch (p->rank) {
+	 case 0:
+	      break;
+	 case 1:
+	      if (p->is_in_place)	/* fft is in-place */
+		   fftw(p->plans[0], howmany, in, istride, idist,
+			work, 1, 0);
+	      else
+		   fftw(p->plans[0], howmany, in, istride, idist,
+			out, ostride, odist);
+	      break;
+	 default:		/* rank >= 2 */
+	      {
+		   if (p->is_in_place) {
+			out = in;
+			ostride = istride;
+			odist = idist;
+		   }
+		   if (howmany > 1 && odist < ostride)
+			fftwnd_aux_howmany(p, 0, howmany,
+					   in, istride, idist,
+					   out, ostride, odist,
+					   work);
+		   else {
+			int i;
+
+			for (i = 0; i < howmany; ++i)
+			     fftwnd_aux(p, 0,
+					in + i * idist, istride,
+					out + i * odist, ostride,
+					work);
+		   }
+	      }
+     }
+
+     if (p->nwork && !p->work)
+	  fftw_free(work);
+
+}
+
+void fftwnd_one(fftwnd_plan p, fftw_complex *in, fftw_complex *out)
+{
+     fftwnd(p, 1, in, 1, 1, out, 1, 1);
+}
diff --git a/src/fftw/fn_1.c b/src/fftw/fn_1.c
new file mode 100644
index 0000000..e0b6d03
--- /dev/null
+++ b/src/fftw/fn_1.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 1 */
+
+/*
+ * This function contains 0 FP additions, 0 FP multiplications,
+ * (or, 0 additions, 0 multiplications, 0 fused multiply/add),
+ * 2 stack variables, and 4 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: fn_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_1(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp2;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     c_re(output[0]) = tmp1;
+     tmp2 = c_im(input[0]);
+     c_im(output[0]) = tmp2;
+}
+
+fftw_codelet_desc fftw_no_twiddle_1_desc = {
+     "fftw_no_twiddle_1",
+     (void (*)()) fftw_no_twiddle_1,
+     1,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     23,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_10.c b/src/fftw/fn_10.c
new file mode 100644
index 0000000..8010fb6
--- /dev/null
+++ b/src/fftw/fn_10.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:38 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 10 */
+
+/*
+ * This function contains 84 FP additions, 24 FP multiplications,
+ * (or, 72 additions, 12 multiplications, 12 fused multiply/add),
+ * 36 stack variables, and 40 memory accesses
+ */
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_10(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp19;
+     fftw_real tmp64;
+     fftw_real tmp76;
+     fftw_real tmp68;
+     fftw_real tmp69;
+     fftw_real tmp10;
+     fftw_real tmp17;
+     fftw_real tmp18;
+     fftw_real tmp74;
+     fftw_real tmp73;
+     fftw_real tmp22;
+     fftw_real tmp25;
+     fftw_real tmp26;
+     fftw_real tmp36;
+     fftw_real tmp43;
+     fftw_real tmp59;
+     fftw_real tmp60;
+     fftw_real tmp65;
+     fftw_real tmp52;
+     fftw_real tmp55;
+     fftw_real tmp77;
+     fftw_real tmp78;
+     fftw_real tmp79;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp62;
+	  fftw_real tmp63;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[5 * istride]);
+	  tmp3 = tmp1 - tmp2;
+	  tmp19 = tmp1 + tmp2;
+	  tmp62 = c_im(input[0]);
+	  tmp63 = c_im(input[5 * istride]);
+	  tmp64 = tmp62 - tmp63;
+	  tmp76 = tmp62 + tmp63;
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp20;
+	  fftw_real tmp16;
+	  fftw_real tmp24;
+	  fftw_real tmp9;
+	  fftw_real tmp21;
+	  fftw_real tmp13;
+	  fftw_real tmp23;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp14;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[2 * istride]);
+	       tmp5 = c_re(input[7 * istride]);
+	       tmp6 = tmp4 - tmp5;
+	       tmp20 = tmp4 + tmp5;
+	       tmp14 = c_re(input[6 * istride]);
+	       tmp15 = c_re(input[istride]);
+	       tmp16 = tmp14 - tmp15;
+	       tmp24 = tmp14 + tmp15;
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = c_re(input[8 * istride]);
+	       tmp8 = c_re(input[3 * istride]);
+	       tmp9 = tmp7 - tmp8;
+	       tmp21 = tmp7 + tmp8;
+	       tmp11 = c_re(input[4 * istride]);
+	       tmp12 = c_re(input[9 * istride]);
+	       tmp13 = tmp11 - tmp12;
+	       tmp23 = tmp11 + tmp12;
+	  }
+	  tmp68 = tmp6 - tmp9;
+	  tmp69 = tmp13 - tmp16;
+	  tmp10 = tmp6 + tmp9;
+	  tmp17 = tmp13 + tmp16;
+	  tmp18 = tmp10 + tmp17;
+	  tmp74 = tmp20 - tmp21;
+	  tmp73 = tmp23 - tmp24;
+	  tmp22 = tmp20 + tmp21;
+	  tmp25 = tmp23 + tmp24;
+	  tmp26 = tmp22 + tmp25;
+     }
+     {
+	  fftw_real tmp32;
+	  fftw_real tmp53;
+	  fftw_real tmp42;
+	  fftw_real tmp51;
+	  fftw_real tmp35;
+	  fftw_real tmp54;
+	  fftw_real tmp39;
+	  fftw_real tmp50;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp30;
+	       fftw_real tmp31;
+	       fftw_real tmp40;
+	       fftw_real tmp41;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp30 = c_im(input[2 * istride]);
+	       tmp31 = c_im(input[7 * istride]);
+	       tmp32 = tmp30 - tmp31;
+	       tmp53 = tmp30 + tmp31;
+	       tmp40 = c_im(input[6 * istride]);
+	       tmp41 = c_im(input[istride]);
+	       tmp42 = tmp40 - tmp41;
+	       tmp51 = tmp40 + tmp41;
+	  }
+	  {
+	       fftw_real tmp33;
+	       fftw_real tmp34;
+	       fftw_real tmp37;
+	       fftw_real tmp38;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp33 = c_im(input[8 * istride]);
+	       tmp34 = c_im(input[3 * istride]);
+	       tmp35 = tmp33 - tmp34;
+	       tmp54 = tmp33 + tmp34;
+	       tmp37 = c_im(input[4 * istride]);
+	       tmp38 = c_im(input[9 * istride]);
+	       tmp39 = tmp37 - tmp38;
+	       tmp50 = tmp37 + tmp38;
+	  }
+	  tmp36 = tmp32 - tmp35;
+	  tmp43 = tmp39 - tmp42;
+	  tmp59 = tmp32 + tmp35;
+	  tmp60 = tmp39 + tmp42;
+	  tmp65 = tmp59 + tmp60;
+	  tmp52 = tmp50 - tmp51;
+	  tmp55 = tmp53 - tmp54;
+	  tmp77 = tmp53 + tmp54;
+	  tmp78 = tmp50 + tmp51;
+	  tmp79 = tmp77 + tmp78;
+     }
+     c_re(output[5 * ostride]) = tmp3 + tmp18;
+     {
+	  fftw_real tmp44;
+	  fftw_real tmp46;
+	  fftw_real tmp29;
+	  fftw_real tmp45;
+	  fftw_real tmp27;
+	  fftw_real tmp28;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp44 = (K951056516 * tmp36) + (K587785252 * tmp43);
+	  tmp46 = (K951056516 * tmp43) - (K587785252 * tmp36);
+	  tmp27 = K559016994 * (tmp10 - tmp17);
+	  tmp28 = tmp3 - (K250000000 * tmp18);
+	  tmp29 = tmp27 + tmp28;
+	  tmp45 = tmp28 - tmp27;
+	  c_re(output[9 * ostride]) = tmp29 - tmp44;
+	  c_re(output[ostride]) = tmp29 + tmp44;
+	  c_re(output[7 * ostride]) = tmp45 - tmp46;
+	  c_re(output[3 * ostride]) = tmp45 + tmp46;
+     }
+     c_re(output[0]) = tmp19 + tmp26;
+     {
+	  fftw_real tmp56;
+	  fftw_real tmp58;
+	  fftw_real tmp49;
+	  fftw_real tmp57;
+	  fftw_real tmp47;
+	  fftw_real tmp48;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp56 = (K951056516 * tmp52) - (K587785252 * tmp55);
+	  tmp58 = (K951056516 * tmp55) + (K587785252 * tmp52);
+	  tmp47 = tmp19 - (K250000000 * tmp26);
+	  tmp48 = K559016994 * (tmp22 - tmp25);
+	  tmp49 = tmp47 - tmp48;
+	  tmp57 = tmp48 + tmp47;
+	  c_re(output[2 * ostride]) = tmp49 - tmp56;
+	  c_re(output[8 * ostride]) = tmp49 + tmp56;
+	  c_re(output[4 * ostride]) = tmp57 - tmp58;
+	  c_re(output[6 * ostride]) = tmp57 + tmp58;
+     }
+     c_im(output[5 * ostride]) = tmp65 + tmp64;
+     {
+	  fftw_real tmp70;
+	  fftw_real tmp72;
+	  fftw_real tmp67;
+	  fftw_real tmp71;
+	  fftw_real tmp61;
+	  fftw_real tmp66;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp70 = (K951056516 * tmp68) + (K587785252 * tmp69);
+	  tmp72 = (K951056516 * tmp69) - (K587785252 * tmp68);
+	  tmp61 = K559016994 * (tmp59 - tmp60);
+	  tmp66 = tmp64 - (K250000000 * tmp65);
+	  tmp67 = tmp61 + tmp66;
+	  tmp71 = tmp66 - tmp61;
+	  c_im(output[ostride]) = tmp67 - tmp70;
+	  c_im(output[9 * ostride]) = tmp70 + tmp67;
+	  c_im(output[3 * ostride]) = tmp71 - tmp72;
+	  c_im(output[7 * ostride]) = tmp72 + tmp71;
+     }
+     c_im(output[0]) = tmp79 + tmp76;
+     {
+	  fftw_real tmp75;
+	  fftw_real tmp83;
+	  fftw_real tmp82;
+	  fftw_real tmp84;
+	  fftw_real tmp80;
+	  fftw_real tmp81;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp75 = (K951056516 * tmp73) - (K587785252 * tmp74);
+	  tmp83 = (K951056516 * tmp74) + (K587785252 * tmp73);
+	  tmp80 = tmp76 - (K250000000 * tmp79);
+	  tmp81 = K559016994 * (tmp77 - tmp78);
+	  tmp82 = tmp80 - tmp81;
+	  tmp84 = tmp81 + tmp80;
+	  c_im(output[2 * ostride]) = tmp75 + tmp82;
+	  c_im(output[8 * ostride]) = tmp82 - tmp75;
+	  c_im(output[4 * ostride]) = tmp83 + tmp84;
+	  c_im(output[6 * ostride]) = tmp84 - tmp83;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_10_desc = {
+     "fftw_no_twiddle_10",
+     (void (*)()) fftw_no_twiddle_10,
+     10,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     221,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_11.c b/src/fftw/fn_11.c
new file mode 100644
index 0000000..3342319
--- /dev/null
+++ b/src/fftw/fn_11.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:38 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 11 */
+
+/*
+ * This function contains 140 FP additions, 100 FP multiplications,
+ * (or, 140 additions, 100 multiplications, 0 fused multiply/add),
+ * 30 stack variables, and 44 memory accesses
+ */
+static const fftw_real K142314838 =
+FFTW_KONST(+0.142314838273285140443792668616369668791051361);
+static const fftw_real K654860733 =
+FFTW_KONST(+0.654860733945285064056925072466293553183791199);
+static const fftw_real K959492973 =
+FFTW_KONST(+0.959492973614497389890368057066327699062454848);
+static const fftw_real K415415013 =
+FFTW_KONST(+0.415415013001886425529274149229623203524004910);
+static const fftw_real K841253532 =
+FFTW_KONST(+0.841253532831181168861811648919367717513292498);
+static const fftw_real K909631995 =
+FFTW_KONST(+0.909631995354518371411715383079028460060241051);
+static const fftw_real K281732556 =
+FFTW_KONST(+0.281732556841429697711417915346616899035777899);
+static const fftw_real K755749574 =
+FFTW_KONST(+0.755749574354258283774035843972344420179717445);
+static const fftw_real K989821441 =
+FFTW_KONST(+0.989821441880932732376092037776718787376519372);
+static const fftw_real K540640817 =
+FFTW_KONST(+0.540640817455597582107635954318691695431770608);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_11(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp50;
+     fftw_real tmp4;
+     fftw_real tmp42;
+     fftw_real tmp20;
+     fftw_real tmp53;
+     fftw_real tmp29;
+     fftw_real tmp49;
+     fftw_real tmp7;
+     fftw_real tmp46;
+     fftw_real tmp10;
+     fftw_real tmp43;
+     fftw_real tmp23;
+     fftw_real tmp52;
+     fftw_real tmp13;
+     fftw_real tmp45;
+     fftw_real tmp32;
+     fftw_real tmp48;
+     fftw_real tmp26;
+     fftw_real tmp51;
+     fftw_real tmp16;
+     fftw_real tmp44;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp18;
+	  fftw_real tmp19;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp50 = c_im(input[0]);
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[10 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp42 = tmp3 - tmp2;
+	  tmp18 = c_im(input[istride]);
+	  tmp19 = c_im(input[10 * istride]);
+	  tmp20 = tmp18 - tmp19;
+	  tmp53 = tmp18 + tmp19;
+	  {
+	       fftw_real tmp27;
+	       fftw_real tmp28;
+	       fftw_real tmp5;
+	       fftw_real tmp6;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp27 = c_im(input[2 * istride]);
+	       tmp28 = c_im(input[9 * istride]);
+	       tmp29 = tmp27 - tmp28;
+	       tmp49 = tmp27 + tmp28;
+	       tmp5 = c_re(input[2 * istride]);
+	       tmp6 = c_re(input[9 * istride]);
+	       tmp7 = tmp5 + tmp6;
+	       tmp46 = tmp6 - tmp5;
+	  }
+     }
+     {
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp8 = c_re(input[3 * istride]);
+	  tmp9 = c_re(input[8 * istride]);
+	  tmp10 = tmp8 + tmp9;
+	  tmp43 = tmp9 - tmp8;
+	  {
+	       fftw_real tmp21;
+	       fftw_real tmp22;
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp21 = c_im(input[3 * istride]);
+	       tmp22 = c_im(input[8 * istride]);
+	       tmp23 = tmp21 - tmp22;
+	       tmp52 = tmp21 + tmp22;
+	       tmp11 = c_re(input[4 * istride]);
+	       tmp12 = c_re(input[7 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp45 = tmp12 - tmp11;
+	  }
+	  tmp30 = c_im(input[4 * istride]);
+	  tmp31 = c_im(input[7 * istride]);
+	  tmp32 = tmp30 - tmp31;
+	  tmp48 = tmp30 + tmp31;
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp25;
+	       fftw_real tmp14;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp24 = c_im(input[5 * istride]);
+	       tmp25 = c_im(input[6 * istride]);
+	       tmp26 = tmp24 - tmp25;
+	       tmp51 = tmp24 + tmp25;
+	       tmp14 = c_re(input[5 * istride]);
+	       tmp15 = c_re(input[6 * istride]);
+	       tmp16 = tmp14 + tmp15;
+	       tmp44 = tmp15 - tmp14;
+	  }
+     }
+     {
+	  fftw_real tmp35;
+	  fftw_real tmp34;
+	  fftw_real tmp59;
+	  fftw_real tmp60;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10 + tmp13 + tmp16;
+	  {
+	       fftw_real tmp41;
+	       fftw_real tmp40;
+	       fftw_real tmp37;
+	       fftw_real tmp36;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp41 =
+		   (K540640817 * tmp20) + (K989821441 * tmp23) +
+		   (K755749574 * tmp32) + (K281732556 * tmp26) +
+		   (K909631995 * tmp29);
+	       tmp40 =
+		   tmp1 + (K841253532 * tmp4) + (K415415013 * tmp7) -
+		   (K959492973 * tmp16) - (K654860733 * tmp13) -
+		   (K142314838 * tmp10);
+	       c_re(output[10 * ostride]) = tmp40 - tmp41;
+	       c_re(output[ostride]) = tmp40 + tmp41;
+	       tmp37 =
+		   (K755749574 * tmp20) + (K540640817 * tmp23) +
+		   (K281732556 * tmp32) - (K989821441 * tmp29) -
+		   (K909631995 * tmp26);
+	       tmp36 =
+		   tmp1 + (K841253532 * tmp10) + (K415415013 * tmp16) -
+		   (K959492973 * tmp13) - (K142314838 * tmp7) -
+		   (K654860733 * tmp4);
+	       c_re(output[7 * ostride]) = tmp36 - tmp37;
+	       c_re(output[4 * ostride]) = tmp36 + tmp37;
+	  }
+	  tmp35 =
+	      (K989821441 * tmp20) + (K540640817 * tmp32) +
+	      (K755749574 * tmp26) - (K281732556 * tmp29) -
+	      (K909631995 * tmp23);
+	  tmp34 =
+	      tmp1 + (K415415013 * tmp10) + (K841253532 * tmp13) -
+	      (K654860733 * tmp16) - (K959492973 * tmp7) -
+	      (K142314838 * tmp4);
+	  c_re(output[8 * ostride]) = tmp34 - tmp35;
+	  c_re(output[3 * ostride]) = tmp34 + tmp35;
+	  {
+	       fftw_real tmp39;
+	       fftw_real tmp38;
+	       fftw_real tmp33;
+	       fftw_real tmp17;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp39 =
+		   (K909631995 * tmp20) + (K755749574 * tmp29) -
+		   (K540640817 * tmp26) - (K989821441 * tmp32) -
+		   (K281732556 * tmp23);
+	       tmp38 =
+		   tmp1 + (K415415013 * tmp4) + (K841253532 * tmp16) -
+		   (K142314838 * tmp13) - (K959492973 * tmp10) -
+		   (K654860733 * tmp7);
+	       c_re(output[9 * ostride]) = tmp38 - tmp39;
+	       c_re(output[2 * ostride]) = tmp38 + tmp39;
+	       tmp33 =
+		   (K281732556 * tmp20) + (K755749574 * tmp23) +
+		   (K989821441 * tmp26) - (K540640817 * tmp29) -
+		   (K909631995 * tmp32);
+	       tmp17 =
+		   tmp1 + (K841253532 * tmp7) + (K415415013 * tmp13) -
+		   (K142314838 * tmp16) - (K654860733 * tmp10) -
+		   (K959492973 * tmp4);
+	       c_re(output[6 * ostride]) = tmp17 - tmp33;
+	       c_re(output[5 * ostride]) = tmp17 + tmp33;
+	  }
+	  c_im(output[0]) = tmp53 + tmp52 + tmp48 + tmp51 + tmp49 + tmp50;
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp54;
+	       fftw_real tmp57;
+	       fftw_real tmp58;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp47 =
+		   (K281732556 * tmp42) + (K755749574 * tmp43) +
+		   (K989821441 * tmp44) - (K909631995 * tmp45) -
+		   (K540640817 * tmp46);
+	       tmp54 =
+		   (K415415013 * tmp48) + (K841253532 * tmp49) + tmp50 -
+		   (K142314838 * tmp51) - (K654860733 * tmp52) -
+		   (K959492973 * tmp53);
+	       c_im(output[5 * ostride]) = tmp47 + tmp54;
+	       c_im(output[6 * ostride]) = tmp54 - tmp47;
+	       tmp57 =
+		   (K540640817 * tmp42) + (K909631995 * tmp46) +
+		   (K989821441 * tmp43) + (K755749574 * tmp45) +
+		   (K281732556 * tmp44);
+	       tmp58 =
+		   (K841253532 * tmp53) + (K415415013 * tmp49) + tmp50 -
+		   (K959492973 * tmp51) - (K654860733 * tmp48) -
+		   (K142314838 * tmp52);
+	       c_im(output[ostride]) = tmp57 + tmp58;
+	       c_im(output[10 * ostride]) = tmp58 - tmp57;
+	  }
+	  tmp59 =
+	      (K909631995 * tmp42) + (K755749574 * tmp46) -
+	      (K540640817 * tmp44) - (K989821441 * tmp45) -
+	      (K281732556 * tmp43);
+	  tmp60 =
+	      (K415415013 * tmp53) + (K841253532 * tmp51) + tmp50 -
+	      (K654860733 * tmp49) - (K142314838 * tmp48) -
+	      (K959492973 * tmp52);
+	  c_im(output[2 * ostride]) = tmp59 + tmp60;
+	  c_im(output[9 * ostride]) = tmp60 - tmp59;
+	  {
+	       fftw_real tmp55;
+	       fftw_real tmp56;
+	       fftw_real tmp61;
+	       fftw_real tmp62;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp55 =
+		   (K989821441 * tmp42) + (K540640817 * tmp45) +
+		   (K755749574 * tmp44) - (K909631995 * tmp43) -
+		   (K281732556 * tmp46);
+	       tmp56 =
+		   (K415415013 * tmp52) + (K841253532 * tmp48) + tmp50 -
+		   (K959492973 * tmp49) - (K654860733 * tmp51) -
+		   (K142314838 * tmp53);
+	       c_im(output[3 * ostride]) = tmp55 + tmp56;
+	       c_im(output[8 * ostride]) = tmp56 - tmp55;
+	       tmp61 =
+		   (K755749574 * tmp42) + (K540640817 * tmp43) +
+		   (K281732556 * tmp45) - (K909631995 * tmp44) -
+		   (K989821441 * tmp46);
+	       tmp62 =
+		   (K841253532 * tmp52) + (K415415013 * tmp51) + tmp50 -
+		   (K142314838 * tmp49) - (K959492973 * tmp48) -
+		   (K654860733 * tmp53);
+	       c_im(output[4 * ostride]) = tmp61 + tmp62;
+	       c_im(output[7 * ostride]) = tmp62 - tmp61;
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_11_desc = {
+     "fftw_no_twiddle_11",
+     (void (*)()) fftw_no_twiddle_11,
+     11,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     243,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_12.c b/src/fftw/fn_12.c
new file mode 100644
index 0000000..1a149ba
--- /dev/null
+++ b/src/fftw/fn_12.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:40 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 12 */
+
+/*
+ * This function contains 96 FP additions, 16 FP multiplications,
+ * (or, 88 additions, 8 multiplications, 8 fused multiply/add),
+ * 40 stack variables, and 48 memory accesses
+ */
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_12(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp5;
+     fftw_real tmp53;
+     fftw_real tmp36;
+     fftw_real tmp28;
+     fftw_real tmp35;
+     fftw_real tmp54;
+     fftw_real tmp10;
+     fftw_real tmp56;
+     fftw_real tmp39;
+     fftw_real tmp33;
+     fftw_real tmp38;
+     fftw_real tmp57;
+     fftw_real tmp16;
+     fftw_real tmp42;
+     fftw_real tmp72;
+     fftw_real tmp45;
+     fftw_real tmp92;
+     fftw_real tmp75;
+     fftw_real tmp21;
+     fftw_real tmp47;
+     fftw_real tmp77;
+     fftw_real tmp50;
+     fftw_real tmp93;
+     fftw_real tmp80;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp4;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[4 * istride]);
+	  tmp3 = c_re(input[8 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = tmp1 + tmp4;
+	  tmp53 = tmp1 - (K500000000 * tmp4);
+	  tmp36 = K866025403 * (tmp3 - tmp2);
+     }
+     {
+	  fftw_real tmp24;
+	  fftw_real tmp25;
+	  fftw_real tmp26;
+	  fftw_real tmp27;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp24 = c_im(input[0]);
+	  tmp25 = c_im(input[4 * istride]);
+	  tmp26 = c_im(input[8 * istride]);
+	  tmp27 = tmp25 + tmp26;
+	  tmp28 = tmp24 + tmp27;
+	  tmp35 = tmp24 - (K500000000 * tmp27);
+	  tmp54 = K866025403 * (tmp25 - tmp26);
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp6 = c_re(input[6 * istride]);
+	  tmp7 = c_re(input[10 * istride]);
+	  tmp8 = c_re(input[2 * istride]);
+	  tmp9 = tmp7 + tmp8;
+	  tmp10 = tmp6 + tmp9;
+	  tmp56 = tmp6 - (K500000000 * tmp9);
+	  tmp39 = K866025403 * (tmp8 - tmp7);
+     }
+     {
+	  fftw_real tmp29;
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  fftw_real tmp32;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp29 = c_im(input[6 * istride]);
+	  tmp30 = c_im(input[10 * istride]);
+	  tmp31 = c_im(input[2 * istride]);
+	  tmp32 = tmp30 + tmp31;
+	  tmp33 = tmp29 + tmp32;
+	  tmp38 = tmp29 - (K500000000 * tmp32);
+	  tmp57 = K866025403 * (tmp30 - tmp31);
+     }
+     {
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  fftw_real tmp14;
+	  fftw_real tmp15;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp12 = c_re(input[3 * istride]);
+	  tmp13 = c_re(input[7 * istride]);
+	  tmp14 = c_re(input[11 * istride]);
+	  tmp15 = tmp13 + tmp14;
+	  tmp16 = tmp12 + tmp15;
+	  tmp42 = tmp12 - (K500000000 * tmp15);
+	  tmp72 = K866025403 * (tmp14 - tmp13);
+     }
+     {
+	  fftw_real tmp73;
+	  fftw_real tmp43;
+	  fftw_real tmp44;
+	  fftw_real tmp74;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp73 = c_im(input[3 * istride]);
+	  tmp43 = c_im(input[7 * istride]);
+	  tmp44 = c_im(input[11 * istride]);
+	  tmp74 = tmp43 + tmp44;
+	  tmp45 = K866025403 * (tmp43 - tmp44);
+	  tmp92 = tmp73 + tmp74;
+	  tmp75 = tmp73 - (K500000000 * tmp74);
+     }
+     {
+	  fftw_real tmp17;
+	  fftw_real tmp18;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp17 = c_re(input[9 * istride]);
+	  tmp18 = c_re(input[istride]);
+	  tmp19 = c_re(input[5 * istride]);
+	  tmp20 = tmp18 + tmp19;
+	  tmp21 = tmp17 + tmp20;
+	  tmp47 = tmp17 - (K500000000 * tmp20);
+	  tmp77 = K866025403 * (tmp19 - tmp18);
+     }
+     {
+	  fftw_real tmp78;
+	  fftw_real tmp48;
+	  fftw_real tmp49;
+	  fftw_real tmp79;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp78 = c_im(input[9 * istride]);
+	  tmp48 = c_im(input[istride]);
+	  tmp49 = c_im(input[5 * istride]);
+	  tmp79 = tmp48 + tmp49;
+	  tmp50 = K866025403 * (tmp48 - tmp49);
+	  tmp93 = tmp78 + tmp79;
+	  tmp80 = tmp78 - (K500000000 * tmp79);
+     }
+     {
+	  fftw_real tmp11;
+	  fftw_real tmp22;
+	  fftw_real tmp23;
+	  fftw_real tmp34;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp11 = tmp5 + tmp10;
+	  tmp22 = tmp16 + tmp21;
+	  c_re(output[6 * ostride]) = tmp11 - tmp22;
+	  c_re(output[0]) = tmp11 + tmp22;
+	  {
+	       fftw_real tmp91;
+	       fftw_real tmp94;
+	       fftw_real tmp95;
+	       fftw_real tmp96;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp91 = tmp5 - tmp10;
+	       tmp94 = tmp92 - tmp93;
+	       c_re(output[3 * ostride]) = tmp91 - tmp94;
+	       c_re(output[9 * ostride]) = tmp91 + tmp94;
+	       tmp95 = tmp28 + tmp33;
+	       tmp96 = tmp92 + tmp93;
+	       c_im(output[6 * ostride]) = tmp95 - tmp96;
+	       c_im(output[0]) = tmp95 + tmp96;
+	  }
+	  tmp23 = tmp16 - tmp21;
+	  tmp34 = tmp28 - tmp33;
+	  c_im(output[3 * ostride]) = tmp23 + tmp34;
+	  c_im(output[9 * ostride]) = tmp34 - tmp23;
+	  {
+	       fftw_real tmp63;
+	       fftw_real tmp83;
+	       fftw_real tmp82;
+	       fftw_real tmp84;
+	       fftw_real tmp66;
+	       fftw_real tmp70;
+	       fftw_real tmp69;
+	       fftw_real tmp71;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp62;
+		    fftw_real tmp76;
+		    fftw_real tmp81;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 = tmp36 + tmp35;
+		    tmp62 = tmp39 + tmp38;
+		    tmp63 = tmp61 - tmp62;
+		    tmp83 = tmp61 + tmp62;
+		    tmp76 = tmp72 + tmp75;
+		    tmp81 = tmp77 + tmp80;
+		    tmp82 = tmp76 - tmp81;
+		    tmp84 = tmp76 + tmp81;
+	       }
+	       {
+		    fftw_real tmp64;
+		    fftw_real tmp65;
+		    fftw_real tmp67;
+		    fftw_real tmp68;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp64 = tmp42 + tmp45;
+		    tmp65 = tmp47 + tmp50;
+		    tmp66 = tmp64 - tmp65;
+		    tmp70 = tmp64 + tmp65;
+		    tmp67 = tmp53 + tmp54;
+		    tmp68 = tmp56 + tmp57;
+		    tmp69 = tmp67 + tmp68;
+		    tmp71 = tmp67 - tmp68;
+	       }
+	       c_im(output[ostride]) = tmp63 - tmp66;
+	       c_im(output[7 * ostride]) = tmp63 + tmp66;
+	       c_re(output[10 * ostride]) = tmp69 - tmp70;
+	       c_re(output[4 * ostride]) = tmp69 + tmp70;
+	       c_re(output[7 * ostride]) = tmp71 - tmp82;
+	       c_re(output[ostride]) = tmp71 + tmp82;
+	       c_im(output[10 * ostride]) = tmp83 - tmp84;
+	       c_im(output[4 * ostride]) = tmp83 + tmp84;
+	  }
+	  {
+	       fftw_real tmp41;
+	       fftw_real tmp89;
+	       fftw_real tmp88;
+	       fftw_real tmp90;
+	       fftw_real tmp52;
+	       fftw_real tmp60;
+	       fftw_real tmp59;
+	       fftw_real tmp85;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp40;
+		    fftw_real tmp86;
+		    fftw_real tmp87;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = tmp35 - tmp36;
+		    tmp40 = tmp38 - tmp39;
+		    tmp41 = tmp37 - tmp40;
+		    tmp89 = tmp37 + tmp40;
+		    tmp86 = tmp75 - tmp72;
+		    tmp87 = tmp80 - tmp77;
+		    tmp88 = tmp86 - tmp87;
+		    tmp90 = tmp86 + tmp87;
+	       }
+	       {
+		    fftw_real tmp46;
+		    fftw_real tmp51;
+		    fftw_real tmp55;
+		    fftw_real tmp58;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp46 = tmp42 - tmp45;
+		    tmp51 = tmp47 - tmp50;
+		    tmp52 = tmp46 - tmp51;
+		    tmp60 = tmp46 + tmp51;
+		    tmp55 = tmp53 - tmp54;
+		    tmp58 = tmp56 - tmp57;
+		    tmp59 = tmp55 + tmp58;
+		    tmp85 = tmp55 - tmp58;
+	       }
+	       c_im(output[5 * ostride]) = tmp41 - tmp52;
+	       c_im(output[11 * ostride]) = tmp41 + tmp52;
+	       c_re(output[2 * ostride]) = tmp59 - tmp60;
+	       c_re(output[8 * ostride]) = tmp59 + tmp60;
+	       c_re(output[11 * ostride]) = tmp85 - tmp88;
+	       c_re(output[5 * ostride]) = tmp85 + tmp88;
+	       c_im(output[2 * ostride]) = tmp89 - tmp90;
+	       c_im(output[8 * ostride]) = tmp89 + tmp90;
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_12_desc = {
+     "fftw_no_twiddle_12",
+     (void (*)()) fftw_no_twiddle_12,
+     12,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     265,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_13.c b/src/fftw/fn_13.c
new file mode 100644
index 0000000..7b41c37
--- /dev/null
+++ b/src/fftw/fn_13.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:40 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 13 */
+
+/*
+ * This function contains 176 FP additions, 68 FP multiplications,
+ * (or, 138 additions, 30 multiplications, 38 fused multiply/add),
+ * 50 stack variables, and 52 memory accesses
+ */
+static const fftw_real K1_732050807 =
+FFTW_KONST(+1.732050807568877293527446341505872366942805254);
+static const fftw_real K256247671 =
+FFTW_KONST(+0.256247671582936600958684654061725059144125175);
+static const fftw_real K156891391 =
+FFTW_KONST(+0.156891391051584611046832726756003269660212636);
+static const fftw_real K300238635 =
+FFTW_KONST(+0.300238635966332641462884626667381504676006424);
+static const fftw_real K011599105 =
+FFTW_KONST(+0.011599105605768290721655456654083252189827041);
+static const fftw_real K174138601 =
+FFTW_KONST(+0.174138601152135905005660794929264742616964676);
+static const fftw_real K575140729 =
+FFTW_KONST(+0.575140729474003121368385547455453388461001608);
+static const fftw_real K2_000000000 =
+FFTW_KONST(+2.000000000000000000000000000000000000000000000);
+static const fftw_real K083333333 =
+FFTW_KONST(+0.083333333333333333333333333333333333333333333);
+static const fftw_real K075902986 =
+FFTW_KONST(+0.075902986037193865983102897245103540356428373);
+static const fftw_real K251768516 =
+FFTW_KONST(+0.251768516431883313623436926934233488546674281);
+static const fftw_real K258260390 =
+FFTW_KONST(+0.258260390311744861420450644284508567852516811);
+static const fftw_real K132983124 =
+FFTW_KONST(+0.132983124607418643793760531921092974399165133);
+static const fftw_real K265966249 =
+FFTW_KONST(+0.265966249214837287587521063842185948798330267);
+static const fftw_real K387390585 =
+FFTW_KONST(+0.387390585467617292130675966426762851778775217);
+static const fftw_real K503537032 =
+FFTW_KONST(+0.503537032863766627246873853868466977093348562);
+static const fftw_real K113854479 =
+FFTW_KONST(+0.113854479055790798974654345867655310534642560);
+static const fftw_real K300462606 =
+FFTW_KONST(+0.300462606288665774426601772289207995520941381);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_13(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp146;
+     fftw_real tmp115;
+     fftw_real tmp120;
+     fftw_real tmp125;
+     fftw_real tmp31;
+     fftw_real tmp40;
+     fftw_real tmp116;
+     fftw_real tmp24;
+     fftw_real tmp43;
+     fftw_real tmp36;
+     fftw_real tmp41;
+     fftw_real tmp123;
+     fftw_real tmp126;
+     fftw_real tmp56;
+     fftw_real tmp141;
+     fftw_real tmp147;
+     fftw_real tmp134;
+     fftw_real tmp143;
+     fftw_real tmp67;
+     fftw_real tmp73;
+     fftw_real tmp82;
+     fftw_real tmp137;
+     fftw_real tmp144;
+     fftw_real tmp80;
+     fftw_real tmp83;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp146 = c_im(input[0]);
+     {
+	  fftw_real tmp15;
+	  fftw_real tmp113;
+	  fftw_real tmp18;
+	  fftw_real tmp29;
+	  fftw_real tmp21;
+	  fftw_real tmp28;
+	  fftw_real tmp22;
+	  fftw_real tmp114;
+	  fftw_real tmp6;
+	  fftw_real tmp32;
+	  fftw_real tmp25;
+	  fftw_real tmp11;
+	  fftw_real tmp33;
+	  fftw_real tmp26;
+	  fftw_real tmp13;
+	  fftw_real tmp14;
+	  fftw_real tmp12;
+	  fftw_real tmp23;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp13 = c_re(input[8 * istride]);
+	  tmp14 = c_re(input[5 * istride]);
+	  tmp15 = tmp13 + tmp14;
+	  tmp113 = tmp13 - tmp14;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[6 * istride]);
+	       tmp17 = c_re(input[11 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp29 = tmp16 - tmp17;
+	       tmp19 = c_re(input[2 * istride]);
+	       tmp20 = c_re(input[7 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp28 = tmp19 - tmp20;
+	  }
+	  tmp22 = tmp18 + tmp21;
+	  tmp114 = tmp29 + tmp28;
+	  {
+	       fftw_real tmp2;
+	       fftw_real tmp3;
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp2 = c_re(input[istride]);
+	       tmp3 = c_re(input[3 * istride]);
+	       tmp4 = c_re(input[9 * istride]);
+	       tmp5 = tmp3 + tmp4;
+	       tmp6 = tmp2 + tmp5;
+	       tmp32 = tmp2 - (K500000000 * tmp5);
+	       tmp25 = tmp3 - tmp4;
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp10;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = c_re(input[12 * istride]);
+	       tmp8 = c_re(input[4 * istride]);
+	       tmp9 = c_re(input[10 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp11 = tmp7 + tmp10;
+	       tmp33 = tmp7 - (K500000000 * tmp10);
+	       tmp26 = tmp8 - tmp9;
+	  }
+	  tmp115 = tmp113 - tmp114;
+	  {
+	       fftw_real tmp118;
+	       fftw_real tmp119;
+	       fftw_real tmp27;
+	       fftw_real tmp30;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp118 = tmp113 + (K500000000 * tmp114);
+	       tmp119 = K866025403 * (tmp25 + tmp26);
+	       tmp120 = tmp118 - tmp119;
+	       tmp125 = tmp119 + tmp118;
+	       tmp27 = tmp25 - tmp26;
+	       tmp30 = tmp28 - tmp29;
+	       tmp31 = tmp27 + tmp30;
+	       tmp40 = tmp30 - tmp27;
+	  }
+	  tmp116 = tmp6 - tmp11;
+	  tmp12 = tmp6 + tmp11;
+	  tmp23 = tmp15 + tmp22;
+	  tmp24 = tmp12 + tmp23;
+	  tmp43 = K300462606 * (tmp12 - tmp23);
+	  {
+	       fftw_real tmp34;
+	       fftw_real tmp35;
+	       fftw_real tmp121;
+	       fftw_real tmp122;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp34 = tmp32 + tmp33;
+	       tmp35 = tmp15 - (K500000000 * tmp22);
+	       tmp36 = tmp34 + tmp35;
+	       tmp41 = tmp34 - tmp35;
+	       tmp121 = tmp32 - tmp33;
+	       tmp122 = K866025403 * (tmp18 - tmp21);
+	       tmp123 = tmp121 + tmp122;
+	       tmp126 = tmp121 - tmp122;
+	  }
+     }
+     {
+	  fftw_real tmp59;
+	  fftw_real tmp131;
+	  fftw_real tmp62;
+	  fftw_real tmp78;
+	  fftw_real tmp65;
+	  fftw_real tmp77;
+	  fftw_real tmp66;
+	  fftw_real tmp132;
+	  fftw_real tmp50;
+	  fftw_real tmp74;
+	  fftw_real tmp69;
+	  fftw_real tmp55;
+	  fftw_real tmp75;
+	  fftw_real tmp70;
+	  fftw_real tmp57;
+	  fftw_real tmp58;
+	  fftw_real tmp71;
+	  fftw_real tmp72;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp57 = c_im(input[8 * istride]);
+	  tmp58 = c_im(input[5 * istride]);
+	  tmp59 = tmp57 - tmp58;
+	  tmp131 = tmp57 + tmp58;
+	  {
+	       fftw_real tmp60;
+	       fftw_real tmp61;
+	       fftw_real tmp63;
+	       fftw_real tmp64;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp60 = c_im(input[6 * istride]);
+	       tmp61 = c_im(input[11 * istride]);
+	       tmp62 = tmp60 - tmp61;
+	       tmp78 = tmp60 + tmp61;
+	       tmp63 = c_im(input[2 * istride]);
+	       tmp64 = c_im(input[7 * istride]);
+	       tmp65 = tmp63 - tmp64;
+	       tmp77 = tmp63 + tmp64;
+	  }
+	  tmp66 = tmp62 + tmp65;
+	  tmp132 = tmp78 + tmp77;
+	  {
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       fftw_real tmp48;
+	       fftw_real tmp49;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp46 = c_im(input[istride]);
+	       tmp47 = c_im(input[3 * istride]);
+	       tmp48 = c_im(input[9 * istride]);
+	       tmp49 = tmp47 + tmp48;
+	       tmp50 = tmp46 + tmp49;
+	       tmp74 = tmp46 - (K500000000 * tmp49);
+	       tmp69 = tmp47 - tmp48;
+	  }
+	  {
+	       fftw_real tmp51;
+	       fftw_real tmp52;
+	       fftw_real tmp53;
+	       fftw_real tmp54;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp51 = c_im(input[12 * istride]);
+	       tmp52 = c_im(input[4 * istride]);
+	       tmp53 = c_im(input[10 * istride]);
+	       tmp54 = tmp52 + tmp53;
+	       tmp55 = tmp51 + tmp54;
+	       tmp75 = tmp51 - (K500000000 * tmp54);
+	       tmp70 = tmp52 - tmp53;
+	  }
+	  tmp56 = tmp50 - tmp55;
+	  {
+	       fftw_real tmp139;
+	       fftw_real tmp140;
+	       fftw_real tmp130;
+	       fftw_real tmp133;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp139 = tmp50 + tmp55;
+	       tmp140 = tmp131 + tmp132;
+	       tmp141 = K300462606 * (tmp139 - tmp140);
+	       tmp147 = tmp139 + tmp140;
+	       tmp130 = tmp74 + tmp75;
+	       tmp133 = tmp131 - (K500000000 * tmp132);
+	       tmp134 = tmp130 - tmp133;
+	       tmp143 = tmp130 + tmp133;
+	  }
+	  tmp67 = tmp59 - tmp66;
+	  tmp71 = K866025403 * (tmp69 + tmp70);
+	  tmp72 = tmp59 + (K500000000 * tmp66);
+	  tmp73 = tmp71 - tmp72;
+	  tmp82 = tmp71 + tmp72;
+	  {
+	       fftw_real tmp135;
+	       fftw_real tmp136;
+	       fftw_real tmp76;
+	       fftw_real tmp79;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp135 = tmp62 - tmp65;
+	       tmp136 = tmp70 - tmp69;
+	       tmp137 = tmp135 - tmp136;
+	       tmp144 = tmp136 + tmp135;
+	       tmp76 = tmp74 - tmp75;
+	       tmp79 = K866025403 * (tmp77 - tmp78);
+	       tmp80 = tmp76 - tmp79;
+	       tmp83 = tmp76 + tmp79;
+	  }
+     }
+     c_re(output[0]) = tmp1 + tmp24;
+     {
+	  fftw_real tmp99;
+	  fftw_real tmp109;
+	  fftw_real tmp39;
+	  fftw_real tmp105;
+	  fftw_real tmp89;
+	  fftw_real tmp44;
+	  fftw_real tmp68;
+	  fftw_real tmp85;
+	  fftw_real tmp96;
+	  fftw_real tmp108;
+	  fftw_real tmp90;
+	  fftw_real tmp93;
+	  fftw_real tmp94;
+	  fftw_real tmp106;
+	  fftw_real tmp97;
+	  fftw_real tmp98;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp97 = (K113854479 * tmp31) - (K503537032 * tmp36);
+	  tmp98 = (K387390585 * tmp40) - (K265966249 * tmp41);
+	  tmp99 = tmp97 - tmp98;
+	  tmp109 = tmp98 + tmp97;
+	  {
+	       fftw_real tmp42;
+	       fftw_real tmp87;
+	       fftw_real tmp37;
+	       fftw_real tmp38;
+	       fftw_real tmp88;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp42 = (K132983124 * tmp40) + (K258260390 * tmp41);
+	       tmp87 = tmp43 - tmp42;
+	       tmp37 = (K251768516 * tmp31) + (K075902986 * tmp36);
+	       tmp38 = tmp1 - (K083333333 * tmp24);
+	       tmp88 = tmp38 - tmp37;
+	       tmp39 = (K2_000000000 * tmp37) + tmp38;
+	       tmp105 = tmp88 - tmp87;
+	       tmp89 = tmp87 + tmp88;
+	       tmp44 = (K2_000000000 * tmp42) + tmp43;
+	  }
+	  {
+	       fftw_real tmp81;
+	       fftw_real tmp84;
+	       fftw_real tmp91;
+	       fftw_real tmp92;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp68 = (K575140729 * tmp56) + (K174138601 * tmp67);
+	       tmp81 = (K011599105 * tmp73) + (K300238635 * tmp80);
+	       tmp84 = (K156891391 * tmp82) - (K256247671 * tmp83);
+	       tmp85 = tmp81 + tmp84;
+	       tmp96 = K1_732050807 * (tmp84 - tmp81);
+	       tmp108 = tmp85 - tmp68;
+	       tmp90 = (K174138601 * tmp56) - (K575140729 * tmp67);
+	       tmp91 = (K300238635 * tmp73) - (K011599105 * tmp80);
+	       tmp92 = (K256247671 * tmp82) + (K156891391 * tmp83);
+	       tmp93 = tmp91 + tmp92;
+	       tmp94 = tmp90 - tmp93;
+	       tmp106 = K1_732050807 * (tmp92 - tmp91);
+	  }
+	  {
+	       fftw_real tmp45;
+	       fftw_real tmp86;
+	       fftw_real tmp101;
+	       fftw_real tmp102;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp45 = tmp39 - tmp44;
+	       tmp86 = tmp68 + (K2_000000000 * tmp85);
+	       c_re(output[8 * ostride]) = tmp45 - tmp86;
+	       c_re(output[5 * ostride]) = tmp45 + tmp86;
+	       {
+		    fftw_real tmp103;
+		    fftw_real tmp104;
+		    fftw_real tmp95;
+		    fftw_real tmp100;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp103 = tmp44 + tmp39;
+		    tmp104 = tmp90 + (K2_000000000 * tmp93);
+		    c_re(output[12 * ostride]) = tmp103 - tmp104;
+		    c_re(output[ostride]) = tmp103 + tmp104;
+		    tmp95 = tmp89 - tmp94;
+		    tmp100 = tmp96 - tmp99;
+		    c_re(output[4 * ostride]) = tmp95 - tmp100;
+		    c_re(output[10 * ostride]) = tmp100 + tmp95;
+	       }
+	       tmp101 = tmp89 + tmp94;
+	       tmp102 = tmp99 + tmp96;
+	       c_re(output[3 * ostride]) = tmp101 - tmp102;
+	       c_re(output[9 * ostride]) = tmp102 + tmp101;
+	       {
+		    fftw_real tmp111;
+		    fftw_real tmp112;
+		    fftw_real tmp107;
+		    fftw_real tmp110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp111 = tmp105 + tmp106;
+		    tmp112 = tmp109 + tmp108;
+		    c_re(output[2 * ostride]) = tmp111 - tmp112;
+		    c_re(output[7 * ostride]) = tmp112 + tmp111;
+		    tmp107 = tmp105 - tmp106;
+		    tmp110 = tmp108 - tmp109;
+		    c_re(output[6 * ostride]) = tmp107 - tmp110;
+		    c_re(output[11 * ostride]) = tmp110 + tmp107;
+	       }
+	  }
+     }
+     c_im(output[0]) = tmp147 + tmp146;
+     {
+	  fftw_real tmp160;
+	  fftw_real tmp173;
+	  fftw_real tmp142;
+	  fftw_real tmp170;
+	  fftw_real tmp164;
+	  fftw_real tmp149;
+	  fftw_real tmp117;
+	  fftw_real tmp128;
+	  fftw_real tmp165;
+	  fftw_real tmp169;
+	  fftw_real tmp152;
+	  fftw_real tmp155;
+	  fftw_real tmp157;
+	  fftw_real tmp172;
+	  fftw_real tmp158;
+	  fftw_real tmp159;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp158 = (K387390585 * tmp137) + (K265966249 * tmp134);
+	  tmp159 = (K113854479 * tmp144) + (K503537032 * tmp143);
+	  tmp160 = tmp158 + tmp159;
+	  tmp173 = tmp158 - tmp159;
+	  {
+	       fftw_real tmp138;
+	       fftw_real tmp163;
+	       fftw_real tmp145;
+	       fftw_real tmp148;
+	       fftw_real tmp162;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp138 = (K258260390 * tmp134) - (K132983124 * tmp137);
+	       tmp163 = tmp141 - tmp138;
+	       tmp145 = (K075902986 * tmp143) - (K251768516 * tmp144);
+	       tmp148 = tmp146 - (K083333333 * tmp147);
+	       tmp162 = tmp148 - tmp145;
+	       tmp142 = (K2_000000000 * tmp138) + tmp141;
+	       tmp170 = tmp163 + tmp162;
+	       tmp164 = tmp162 - tmp163;
+	       tmp149 = (K2_000000000 * tmp145) + tmp148;
+	  }
+	  {
+	       fftw_real tmp124;
+	       fftw_real tmp127;
+	       fftw_real tmp153;
+	       fftw_real tmp154;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp117 = (K575140729 * tmp115) - (K174138601 * tmp116);
+	       tmp124 = (K300238635 * tmp120) + (K011599105 * tmp123);
+	       tmp127 = (K256247671 * tmp125) + (K156891391 * tmp126);
+	       tmp128 = tmp124 - tmp127;
+	       tmp165 = K1_732050807 * (tmp127 + tmp124);
+	       tmp169 = tmp117 - tmp128;
+	       tmp152 = (K575140729 * tmp116) + (K174138601 * tmp115);
+	       tmp153 = (K156891391 * tmp125) - (K256247671 * tmp126);
+	       tmp154 = (K300238635 * tmp123) - (K011599105 * tmp120);
+	       tmp155 = tmp153 + tmp154;
+	       tmp157 = tmp155 - tmp152;
+	       tmp172 = K1_732050807 * (tmp154 - tmp153);
+	  }
+	  {
+	       fftw_real tmp129;
+	       fftw_real tmp150;
+	       fftw_real tmp167;
+	       fftw_real tmp168;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp129 = tmp117 + (K2_000000000 * tmp128);
+	       tmp150 = tmp142 + tmp149;
+	       c_im(output[ostride]) = tmp129 + tmp150;
+	       c_im(output[12 * ostride]) = tmp150 - tmp129;
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp156;
+		    fftw_real tmp161;
+		    fftw_real tmp166;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = tmp149 - tmp142;
+		    tmp156 = tmp152 + (K2_000000000 * tmp155);
+		    c_im(output[5 * ostride]) = tmp151 - tmp156;
+		    c_im(output[8 * ostride]) = tmp156 + tmp151;
+		    tmp161 = tmp157 + tmp160;
+		    tmp166 = tmp164 - tmp165;
+		    c_im(output[2 * ostride]) = tmp161 + tmp166;
+		    c_im(output[7 * ostride]) = tmp166 - tmp161;
+	       }
+	       tmp167 = tmp165 + tmp164;
+	       tmp168 = tmp160 - tmp157;
+	       c_im(output[6 * ostride]) = tmp167 - tmp168;
+	       c_im(output[11 * ostride]) = tmp168 + tmp167;
+	       {
+		    fftw_real tmp175;
+		    fftw_real tmp176;
+		    fftw_real tmp171;
+		    fftw_real tmp174;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp175 = tmp170 - tmp169;
+		    tmp176 = tmp172 - tmp173;
+		    c_im(output[4 * ostride]) = tmp175 - tmp176;
+		    c_im(output[10 * ostride]) = tmp176 + tmp175;
+		    tmp171 = tmp169 + tmp170;
+		    tmp174 = tmp172 + tmp173;
+		    c_im(output[3 * ostride]) = tmp171 - tmp174;
+		    c_im(output[9 * ostride]) = tmp174 + tmp171;
+	       }
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_13_desc = {
+     "fftw_no_twiddle_13",
+     (void (*)()) fftw_no_twiddle_13,
+     13,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     287,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_14.c b/src/fftw/fn_14.c
new file mode 100644
index 0000000..faf1bd8
--- /dev/null
+++ b/src/fftw/fn_14.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:41 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 14 */
+
+/*
+ * This function contains 148 FP additions, 72 FP multiplications,
+ * (or, 148 additions, 72 multiplications, 0 fused multiply/add),
+ * 36 stack variables, and 56 memory accesses
+ */
+static const fftw_real K900968867 =
+FFTW_KONST(+0.900968867902419126236102319507445051165919162);
+static const fftw_real K222520933 =
+FFTW_KONST(+0.222520933956314404288902564496794759466355569);
+static const fftw_real K623489801 =
+FFTW_KONST(+0.623489801858733530525004884004239810632274731);
+static const fftw_real K781831482 =
+FFTW_KONST(+0.781831482468029808708444526674057750232334519);
+static const fftw_real K974927912 =
+FFTW_KONST(+0.974927912181823607018131682993931217232785801);
+static const fftw_real K433883739 =
+FFTW_KONST(+0.433883739117558120475768332848358754609990728);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_14(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp25;
+     fftw_real tmp84;
+     fftw_real tmp93;
+     fftw_real tmp10;
+     fftw_real tmp77;
+     fftw_real tmp28;
+     fftw_real tmp97;
+     fftw_real tmp42;
+     fftw_real tmp86;
+     fftw_real tmp65;
+     fftw_real tmp92;
+     fftw_real tmp17;
+     fftw_real tmp79;
+     fftw_real tmp31;
+     fftw_real tmp99;
+     fftw_real tmp56;
+     fftw_real tmp81;
+     fftw_real tmp68;
+     fftw_real tmp94;
+     fftw_real tmp24;
+     fftw_real tmp78;
+     fftw_real tmp34;
+     fftw_real tmp98;
+     fftw_real tmp49;
+     fftw_real tmp85;
+     fftw_real tmp71;
+     fftw_real tmp95;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp82;
+	  fftw_real tmp83;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[7 * istride]);
+	  tmp3 = tmp1 - tmp2;
+	  tmp25 = tmp1 + tmp2;
+	  tmp82 = c_im(input[0]);
+	  tmp83 = c_im(input[7 * istride]);
+	  tmp84 = tmp82 - tmp83;
+	  tmp93 = tmp82 + tmp83;
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp26;
+	  fftw_real tmp9;
+	  fftw_real tmp27;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[2 * istride]);
+	       tmp5 = c_re(input[9 * istride]);
+	       tmp6 = tmp4 - tmp5;
+	       tmp26 = tmp4 + tmp5;
+	       tmp7 = c_re(input[12 * istride]);
+	       tmp8 = c_re(input[5 * istride]);
+	       tmp9 = tmp7 - tmp8;
+	       tmp27 = tmp7 + tmp8;
+	  }
+	  tmp10 = tmp6 + tmp9;
+	  tmp77 = tmp9 - tmp6;
+	  tmp28 = tmp26 + tmp27;
+	  tmp97 = tmp27 - tmp26;
+     }
+     {
+	  fftw_real tmp38;
+	  fftw_real tmp63;
+	  fftw_real tmp41;
+	  fftw_real tmp64;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp36;
+	       fftw_real tmp37;
+	       fftw_real tmp39;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp36 = c_im(input[2 * istride]);
+	       tmp37 = c_im(input[9 * istride]);
+	       tmp38 = tmp36 - tmp37;
+	       tmp63 = tmp36 + tmp37;
+	       tmp39 = c_im(input[12 * istride]);
+	       tmp40 = c_im(input[5 * istride]);
+	       tmp41 = tmp39 - tmp40;
+	       tmp64 = tmp39 + tmp40;
+	  }
+	  tmp42 = tmp38 - tmp41;
+	  tmp86 = tmp38 + tmp41;
+	  tmp65 = tmp63 - tmp64;
+	  tmp92 = tmp63 + tmp64;
+     }
+     {
+	  fftw_real tmp13;
+	  fftw_real tmp29;
+	  fftw_real tmp16;
+	  fftw_real tmp30;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp14;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[4 * istride]);
+	       tmp12 = c_re(input[11 * istride]);
+	       tmp13 = tmp11 - tmp12;
+	       tmp29 = tmp11 + tmp12;
+	       tmp14 = c_re(input[10 * istride]);
+	       tmp15 = c_re(input[3 * istride]);
+	       tmp16 = tmp14 - tmp15;
+	       tmp30 = tmp14 + tmp15;
+	  }
+	  tmp17 = tmp13 + tmp16;
+	  tmp79 = tmp16 - tmp13;
+	  tmp31 = tmp29 + tmp30;
+	  tmp99 = tmp29 - tmp30;
+     }
+     {
+	  fftw_real tmp52;
+	  fftw_real tmp67;
+	  fftw_real tmp55;
+	  fftw_real tmp66;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp50;
+	       fftw_real tmp51;
+	       fftw_real tmp53;
+	       fftw_real tmp54;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp50 = c_im(input[4 * istride]);
+	       tmp51 = c_im(input[11 * istride]);
+	       tmp52 = tmp50 - tmp51;
+	       tmp67 = tmp50 + tmp51;
+	       tmp53 = c_im(input[10 * istride]);
+	       tmp54 = c_im(input[3 * istride]);
+	       tmp55 = tmp53 - tmp54;
+	       tmp66 = tmp53 + tmp54;
+	  }
+	  tmp56 = tmp52 - tmp55;
+	  tmp81 = tmp52 + tmp55;
+	  tmp68 = tmp66 - tmp67;
+	  tmp94 = tmp67 + tmp66;
+     }
+     {
+	  fftw_real tmp20;
+	  fftw_real tmp32;
+	  fftw_real tmp23;
+	  fftw_real tmp33;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp18;
+	       fftw_real tmp19;
+	       fftw_real tmp21;
+	       fftw_real tmp22;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp18 = c_re(input[6 * istride]);
+	       tmp19 = c_re(input[13 * istride]);
+	       tmp20 = tmp18 - tmp19;
+	       tmp32 = tmp18 + tmp19;
+	       tmp21 = c_re(input[8 * istride]);
+	       tmp22 = c_re(input[istride]);
+	       tmp23 = tmp21 - tmp22;
+	       tmp33 = tmp21 + tmp22;
+	  }
+	  tmp24 = tmp20 + tmp23;
+	  tmp78 = tmp23 - tmp20;
+	  tmp34 = tmp32 + tmp33;
+	  tmp98 = tmp32 - tmp33;
+     }
+     {
+	  fftw_real tmp45;
+	  fftw_real tmp70;
+	  fftw_real tmp48;
+	  fftw_real tmp69;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp43;
+	       fftw_real tmp44;
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp43 = c_im(input[6 * istride]);
+	       tmp44 = c_im(input[13 * istride]);
+	       tmp45 = tmp43 - tmp44;
+	       tmp70 = tmp43 + tmp44;
+	       tmp46 = c_im(input[8 * istride]);
+	       tmp47 = c_im(input[istride]);
+	       tmp48 = tmp46 - tmp47;
+	       tmp69 = tmp46 + tmp47;
+	  }
+	  tmp49 = tmp45 - tmp48;
+	  tmp85 = tmp45 + tmp48;
+	  tmp71 = tmp69 - tmp70;
+	  tmp95 = tmp70 + tmp69;
+     }
+     {
+	  fftw_real tmp57;
+	  fftw_real tmp35;
+	  fftw_real tmp72;
+	  fftw_real tmp62;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[7 * ostride]) = tmp3 + tmp10 + tmp17 + tmp24;
+	  tmp57 =
+	      (K433883739 * tmp42) + (K974927912 * tmp49) -
+	      (K781831482 * tmp56);
+	  tmp35 =
+	      tmp3 + (K623489801 * tmp17) - (K222520933 * tmp24) -
+	      (K900968867 * tmp10);
+	  c_re(output[11 * ostride]) = tmp35 - tmp57;
+	  c_re(output[3 * ostride]) = tmp35 + tmp57;
+	  {
+	       fftw_real tmp59;
+	       fftw_real tmp58;
+	       fftw_real tmp61;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp59 =
+		   (K974927912 * tmp42) - (K433883739 * tmp56) -
+		   (K781831482 * tmp49);
+	       tmp58 =
+		   tmp3 + (K623489801 * tmp24) - (K900968867 * tmp17) -
+		   (K222520933 * tmp10);
+	       c_re(output[5 * ostride]) = tmp58 - tmp59;
+	       c_re(output[9 * ostride]) = tmp58 + tmp59;
+	       tmp61 =
+		   (K781831482 * tmp42) + (K433883739 * tmp49) +
+		   (K974927912 * tmp56);
+	       tmp60 =
+		   tmp3 + (K623489801 * tmp10) - (K900968867 * tmp24) -
+		   (K222520933 * tmp17);
+	       c_re(output[13 * ostride]) = tmp60 - tmp61;
+	       c_re(output[ostride]) = tmp60 + tmp61;
+	  }
+	  c_re(output[0]) = tmp25 + tmp28 + tmp31 + tmp34;
+	  tmp72 =
+	      (K781831482 * tmp65) - (K974927912 * tmp68) -
+	      (K433883739 * tmp71);
+	  tmp62 =
+	      tmp25 + (K623489801 * tmp28) - (K900968867 * tmp34) -
+	      (K222520933 * tmp31);
+	  c_re(output[6 * ostride]) = tmp62 - tmp72;
+	  c_re(output[8 * ostride]) = tmp62 + tmp72;
+	  {
+	       fftw_real tmp74;
+	       fftw_real tmp73;
+	       fftw_real tmp76;
+	       fftw_real tmp75;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp74 =
+		   (K433883739 * tmp65) + (K781831482 * tmp68) -
+		   (K974927912 * tmp71);
+	       tmp73 =
+		   tmp25 + (K623489801 * tmp31) - (K222520933 * tmp34) -
+		   (K900968867 * tmp28);
+	       c_re(output[4 * ostride]) = tmp73 - tmp74;
+	       c_re(output[10 * ostride]) = tmp73 + tmp74;
+	       tmp76 =
+		   (K974927912 * tmp65) + (K781831482 * tmp71) +
+		   (K433883739 * tmp68);
+	       tmp75 =
+		   tmp25 + (K623489801 * tmp34) - (K900968867 * tmp31) -
+		   (K222520933 * tmp28);
+	       c_re(output[12 * ostride]) = tmp75 - tmp76;
+	       c_re(output[2 * ostride]) = tmp75 + tmp76;
+	  }
+     }
+     {
+	  fftw_real tmp91;
+	  fftw_real tmp90;
+	  fftw_real tmp103;
+	  fftw_real tmp104;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_im(output[7 * ostride]) = tmp86 + tmp85 + tmp81 + tmp84;
+	  tmp91 =
+	      (K974927912 * tmp77) - (K781831482 * tmp78) -
+	      (K433883739 * tmp79);
+	  tmp90 =
+	      (K623489801 * tmp85) + tmp84 - (K900968867 * tmp81) -
+	      (K222520933 * tmp86);
+	  c_im(output[5 * ostride]) = tmp90 - tmp91;
+	  c_im(output[9 * ostride]) = tmp91 + tmp90;
+	  {
+	       fftw_real tmp88;
+	       fftw_real tmp89;
+	       fftw_real tmp80;
+	       fftw_real tmp87;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp88 =
+		   (K781831482 * tmp77) + (K974927912 * tmp79) +
+		   (K433883739 * tmp78);
+	       tmp89 =
+		   (K623489801 * tmp86) + tmp84 - (K222520933 * tmp81) -
+		   (K900968867 * tmp85);
+	       c_im(output[ostride]) = tmp88 + tmp89;
+	       c_im(output[13 * ostride]) = tmp89 - tmp88;
+	       tmp80 =
+		   (K433883739 * tmp77) + (K974927912 * tmp78) -
+		   (K781831482 * tmp79);
+	       tmp87 =
+		   (K623489801 * tmp81) + tmp84 - (K222520933 * tmp85) -
+		   (K900968867 * tmp86);
+	       c_im(output[3 * ostride]) = tmp80 + tmp87;
+	       c_im(output[11 * ostride]) = tmp87 - tmp80;
+	  }
+	  c_im(output[0]) = tmp92 + tmp95 + tmp94 + tmp93;
+	  tmp103 =
+	      (K974927912 * tmp97) + (K433883739 * tmp99) +
+	      (K781831482 * tmp98);
+	  tmp104 =
+	      (K623489801 * tmp95) + tmp93 - (K900968867 * tmp94) -
+	      (K222520933 * tmp92);
+	  c_im(output[2 * ostride]) = tmp103 + tmp104;
+	  c_im(output[12 * ostride]) = tmp104 - tmp103;
+	  {
+	       fftw_real tmp100;
+	       fftw_real tmp96;
+	       fftw_real tmp102;
+	       fftw_real tmp101;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp100 =
+		   (K781831482 * tmp97) - (K433883739 * tmp98) -
+		   (K974927912 * tmp99);
+	       tmp96 =
+		   (K623489801 * tmp92) + tmp93 - (K222520933 * tmp94) -
+		   (K900968867 * tmp95);
+	       c_im(output[6 * ostride]) = tmp96 - tmp100;
+	       c_im(output[8 * ostride]) = tmp100 + tmp96;
+	       tmp102 =
+		   (K433883739 * tmp97) + (K781831482 * tmp99) -
+		   (K974927912 * tmp98);
+	       tmp101 =
+		   (K623489801 * tmp94) + tmp93 - (K222520933 * tmp95) -
+		   (K900968867 * tmp92);
+	       c_im(output[4 * ostride]) = tmp101 - tmp102;
+	       c_im(output[10 * ostride]) = tmp102 + tmp101;
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_14_desc = {
+     "fftw_no_twiddle_14",
+     (void (*)()) fftw_no_twiddle_14,
+     14,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     309,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_15.c b/src/fftw/fn_15.c
new file mode 100644
index 0000000..22eaa43
--- /dev/null
+++ b/src/fftw/fn_15.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:42 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 15 */
+
+/*
+ * This function contains 156 FP additions, 56 FP multiplications,
+ * (or, 128 additions, 28 multiplications, 28 fused multiply/add),
+ * 62 stack variables, and 60 memory accesses
+ */
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_15(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp5;
+     fftw_real tmp33;
+     fftw_real tmp57;
+     fftw_real tmp145;
+     fftw_real tmp124;
+     fftw_real tmp136;
+     fftw_real tmp21;
+     fftw_real tmp26;
+     fftw_real tmp27;
+     fftw_real tmp49;
+     fftw_real tmp54;
+     fftw_real tmp55;
+     fftw_real tmp108;
+     fftw_real tmp109;
+     fftw_real tmp147;
+     fftw_real tmp61;
+     fftw_real tmp62;
+     fftw_real tmp63;
+     fftw_real tmp96;
+     fftw_real tmp97;
+     fftw_real tmp138;
+     fftw_real tmp83;
+     fftw_real tmp88;
+     fftw_real tmp118;
+     fftw_real tmp10;
+     fftw_real tmp15;
+     fftw_real tmp16;
+     fftw_real tmp38;
+     fftw_real tmp43;
+     fftw_real tmp44;
+     fftw_real tmp111;
+     fftw_real tmp112;
+     fftw_real tmp146;
+     fftw_real tmp58;
+     fftw_real tmp59;
+     fftw_real tmp60;
+     fftw_real tmp99;
+     fftw_real tmp100;
+     fftw_real tmp137;
+     fftw_real tmp72;
+     fftw_real tmp77;
+     fftw_real tmp117;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp121;
+	  fftw_real tmp4;
+	  fftw_real tmp120;
+	  fftw_real tmp32;
+	  fftw_real tmp122;
+	  fftw_real tmp29;
+	  fftw_real tmp123;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp121 = c_im(input[0]);
+	  {
+	       fftw_real tmp2;
+	       fftw_real tmp3;
+	       fftw_real tmp30;
+	       fftw_real tmp31;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp2 = c_re(input[5 * istride]);
+	       tmp3 = c_re(input[10 * istride]);
+	       tmp4 = tmp2 + tmp3;
+	       tmp120 = K866025403 * (tmp3 - tmp2);
+	       tmp30 = c_im(input[5 * istride]);
+	       tmp31 = c_im(input[10 * istride]);
+	       tmp32 = K866025403 * (tmp30 - tmp31);
+	       tmp122 = tmp30 + tmp31;
+	  }
+	  tmp5 = tmp1 + tmp4;
+	  tmp29 = tmp1 - (K500000000 * tmp4);
+	  tmp33 = tmp29 - tmp32;
+	  tmp57 = tmp29 + tmp32;
+	  tmp145 = tmp122 + tmp121;
+	  tmp123 = tmp121 - (K500000000 * tmp122);
+	  tmp124 = tmp120 + tmp123;
+	  tmp136 = tmp123 - tmp120;
+     }
+     {
+	  fftw_real tmp17;
+	  fftw_real tmp20;
+	  fftw_real tmp45;
+	  fftw_real tmp79;
+	  fftw_real tmp80;
+	  fftw_real tmp81;
+	  fftw_real tmp48;
+	  fftw_real tmp82;
+	  fftw_real tmp22;
+	  fftw_real tmp25;
+	  fftw_real tmp50;
+	  fftw_real tmp84;
+	  fftw_real tmp85;
+	  fftw_real tmp86;
+	  fftw_real tmp53;
+	  fftw_real tmp87;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp18;
+	       fftw_real tmp19;
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp17 = c_re(input[6 * istride]);
+	       tmp18 = c_re(input[11 * istride]);
+	       tmp19 = c_re(input[istride]);
+	       tmp20 = tmp18 + tmp19;
+	       tmp45 = tmp17 - (K500000000 * tmp20);
+	       tmp79 = K866025403 * (tmp19 - tmp18);
+	       tmp80 = c_im(input[6 * istride]);
+	       tmp46 = c_im(input[11 * istride]);
+	       tmp47 = c_im(input[istride]);
+	       tmp81 = tmp46 + tmp47;
+	       tmp48 = K866025403 * (tmp46 - tmp47);
+	       tmp82 = tmp80 - (K500000000 * tmp81);
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp51;
+	       fftw_real tmp52;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp22 = c_re(input[9 * istride]);
+	       tmp23 = c_re(input[14 * istride]);
+	       tmp24 = c_re(input[4 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp50 = tmp22 - (K500000000 * tmp25);
+	       tmp84 = K866025403 * (tmp24 - tmp23);
+	       tmp85 = c_im(input[9 * istride]);
+	       tmp51 = c_im(input[14 * istride]);
+	       tmp52 = c_im(input[4 * istride]);
+	       tmp86 = tmp51 + tmp52;
+	       tmp53 = K866025403 * (tmp51 - tmp52);
+	       tmp87 = tmp85 - (K500000000 * tmp86);
+	  }
+	  tmp21 = tmp17 + tmp20;
+	  tmp26 = tmp22 + tmp25;
+	  tmp27 = tmp21 + tmp26;
+	  tmp49 = tmp45 - tmp48;
+	  tmp54 = tmp50 - tmp53;
+	  tmp55 = tmp49 + tmp54;
+	  tmp108 = tmp81 + tmp80;
+	  tmp109 = tmp86 + tmp85;
+	  tmp147 = tmp108 + tmp109;
+	  tmp61 = tmp45 + tmp48;
+	  tmp62 = tmp50 + tmp53;
+	  tmp63 = tmp61 + tmp62;
+	  tmp96 = tmp82 - tmp79;
+	  tmp97 = tmp87 - tmp84;
+	  tmp138 = tmp96 + tmp97;
+	  tmp83 = tmp79 + tmp82;
+	  tmp88 = tmp84 + tmp87;
+	  tmp118 = tmp83 + tmp88;
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp9;
+	  fftw_real tmp34;
+	  fftw_real tmp68;
+	  fftw_real tmp69;
+	  fftw_real tmp70;
+	  fftw_real tmp37;
+	  fftw_real tmp71;
+	  fftw_real tmp11;
+	  fftw_real tmp14;
+	  fftw_real tmp39;
+	  fftw_real tmp73;
+	  fftw_real tmp74;
+	  fftw_real tmp75;
+	  fftw_real tmp42;
+	  fftw_real tmp76;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp35;
+	       fftw_real tmp36;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp6 = c_re(input[3 * istride]);
+	       tmp7 = c_re(input[8 * istride]);
+	       tmp8 = c_re(input[13 * istride]);
+	       tmp9 = tmp7 + tmp8;
+	       tmp34 = tmp6 - (K500000000 * tmp9);
+	       tmp68 = K866025403 * (tmp8 - tmp7);
+	       tmp69 = c_im(input[3 * istride]);
+	       tmp35 = c_im(input[8 * istride]);
+	       tmp36 = c_im(input[13 * istride]);
+	       tmp70 = tmp35 + tmp36;
+	       tmp37 = K866025403 * (tmp35 - tmp36);
+	       tmp71 = tmp69 - (K500000000 * tmp70);
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp13;
+	       fftw_real tmp40;
+	       fftw_real tmp41;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[12 * istride]);
+	       tmp12 = c_re(input[2 * istride]);
+	       tmp13 = c_re(input[7 * istride]);
+	       tmp14 = tmp12 + tmp13;
+	       tmp39 = tmp11 - (K500000000 * tmp14);
+	       tmp73 = K866025403 * (tmp13 - tmp12);
+	       tmp74 = c_im(input[12 * istride]);
+	       tmp40 = c_im(input[2 * istride]);
+	       tmp41 = c_im(input[7 * istride]);
+	       tmp75 = tmp40 + tmp41;
+	       tmp42 = K866025403 * (tmp40 - tmp41);
+	       tmp76 = tmp74 - (K500000000 * tmp75);
+	  }
+	  tmp10 = tmp6 + tmp9;
+	  tmp15 = tmp11 + tmp14;
+	  tmp16 = tmp10 + tmp15;
+	  tmp38 = tmp34 - tmp37;
+	  tmp43 = tmp39 - tmp42;
+	  tmp44 = tmp38 + tmp43;
+	  tmp111 = tmp70 + tmp69;
+	  tmp112 = tmp75 + tmp74;
+	  tmp146 = tmp111 + tmp112;
+	  tmp58 = tmp34 + tmp37;
+	  tmp59 = tmp39 + tmp42;
+	  tmp60 = tmp58 + tmp59;
+	  tmp99 = tmp71 - tmp68;
+	  tmp100 = tmp76 - tmp73;
+	  tmp137 = tmp99 + tmp100;
+	  tmp72 = tmp68 + tmp71;
+	  tmp77 = tmp73 + tmp76;
+	  tmp117 = tmp72 + tmp77;
+     }
+     {
+	  fftw_real tmp106;
+	  fftw_real tmp28;
+	  fftw_real tmp105;
+	  fftw_real tmp114;
+	  fftw_real tmp116;
+	  fftw_real tmp110;
+	  fftw_real tmp113;
+	  fftw_real tmp115;
+	  fftw_real tmp107;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp106 = K559016994 * (tmp16 - tmp27);
+	  tmp28 = tmp16 + tmp27;
+	  tmp105 = tmp5 - (K250000000 * tmp28);
+	  tmp110 = tmp108 - tmp109;
+	  tmp113 = tmp111 - tmp112;
+	  tmp114 = (K951056516 * tmp110) - (K587785252 * tmp113);
+	  tmp116 = (K951056516 * tmp113) + (K587785252 * tmp110);
+	  c_re(output[0]) = tmp5 + tmp28;
+	  tmp115 = tmp106 + tmp105;
+	  c_re(output[9 * ostride]) = tmp115 - tmp116;
+	  c_re(output[6 * ostride]) = tmp115 + tmp116;
+	  tmp107 = tmp105 - tmp106;
+	  c_re(output[12 * ostride]) = tmp107 - tmp114;
+	  c_re(output[3 * ostride]) = tmp107 + tmp114;
+     }
+     {
+	  fftw_real tmp94;
+	  fftw_real tmp56;
+	  fftw_real tmp93;
+	  fftw_real tmp102;
+	  fftw_real tmp104;
+	  fftw_real tmp98;
+	  fftw_real tmp101;
+	  fftw_real tmp103;
+	  fftw_real tmp95;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp94 = K559016994 * (tmp44 - tmp55);
+	  tmp56 = tmp44 + tmp55;
+	  tmp93 = tmp33 - (K250000000 * tmp56);
+	  tmp98 = tmp96 - tmp97;
+	  tmp101 = tmp99 - tmp100;
+	  tmp102 = (K951056516 * tmp98) - (K587785252 * tmp101);
+	  tmp104 = (K951056516 * tmp101) + (K587785252 * tmp98);
+	  c_re(output[5 * ostride]) = tmp33 + tmp56;
+	  tmp103 = tmp94 + tmp93;
+	  c_re(output[14 * ostride]) = tmp103 - tmp104;
+	  c_re(output[11 * ostride]) = tmp103 + tmp104;
+	  tmp95 = tmp93 - tmp94;
+	  c_re(output[2 * ostride]) = tmp95 - tmp102;
+	  c_re(output[8 * ostride]) = tmp95 + tmp102;
+     }
+     {
+	  fftw_real tmp150;
+	  fftw_real tmp148;
+	  fftw_real tmp149;
+	  fftw_real tmp154;
+	  fftw_real tmp156;
+	  fftw_real tmp152;
+	  fftw_real tmp153;
+	  fftw_real tmp155;
+	  fftw_real tmp151;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp150 = K559016994 * (tmp146 - tmp147);
+	  tmp148 = tmp146 + tmp147;
+	  tmp149 = tmp145 - (K250000000 * tmp148);
+	  tmp152 = tmp21 - tmp26;
+	  tmp153 = tmp10 - tmp15;
+	  tmp154 = (K951056516 * tmp152) - (K587785252 * tmp153);
+	  tmp156 = (K951056516 * tmp153) + (K587785252 * tmp152);
+	  c_im(output[0]) = tmp148 + tmp145;
+	  tmp155 = tmp150 + tmp149;
+	  c_im(output[6 * ostride]) = tmp155 - tmp156;
+	  c_im(output[9 * ostride]) = tmp156 + tmp155;
+	  tmp151 = tmp149 - tmp150;
+	  c_im(output[3 * ostride]) = tmp151 - tmp154;
+	  c_im(output[12 * ostride]) = tmp154 + tmp151;
+     }
+     {
+	  fftw_real tmp141;
+	  fftw_real tmp139;
+	  fftw_real tmp140;
+	  fftw_real tmp135;
+	  fftw_real tmp144;
+	  fftw_real tmp133;
+	  fftw_real tmp134;
+	  fftw_real tmp143;
+	  fftw_real tmp142;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp141 = K559016994 * (tmp137 - tmp138);
+	  tmp139 = tmp137 + tmp138;
+	  tmp140 = tmp136 - (K250000000 * tmp139);
+	  tmp133 = tmp49 - tmp54;
+	  tmp134 = tmp38 - tmp43;
+	  tmp135 = (K951056516 * tmp133) - (K587785252 * tmp134);
+	  tmp144 = (K951056516 * tmp134) + (K587785252 * tmp133);
+	  c_im(output[5 * ostride]) = tmp139 + tmp136;
+	  tmp143 = tmp141 + tmp140;
+	  c_im(output[11 * ostride]) = tmp143 - tmp144;
+	  c_im(output[14 * ostride]) = tmp144 + tmp143;
+	  tmp142 = tmp140 - tmp141;
+	  c_im(output[2 * ostride]) = tmp135 + tmp142;
+	  c_im(output[8 * ostride]) = tmp142 - tmp135;
+     }
+     {
+	  fftw_real tmp119;
+	  fftw_real tmp125;
+	  fftw_real tmp126;
+	  fftw_real tmp130;
+	  fftw_real tmp131;
+	  fftw_real tmp128;
+	  fftw_real tmp129;
+	  fftw_real tmp132;
+	  fftw_real tmp127;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp119 = K559016994 * (tmp117 - tmp118);
+	  tmp125 = tmp117 + tmp118;
+	  tmp126 = tmp124 - (K250000000 * tmp125);
+	  tmp128 = tmp58 - tmp59;
+	  tmp129 = tmp61 - tmp62;
+	  tmp130 = (K951056516 * tmp128) + (K587785252 * tmp129);
+	  tmp131 = (K951056516 * tmp129) - (K587785252 * tmp128);
+	  c_im(output[10 * ostride]) = tmp125 + tmp124;
+	  tmp132 = tmp126 - tmp119;
+	  c_im(output[7 * ostride]) = tmp131 + tmp132;
+	  c_im(output[13 * ostride]) = tmp132 - tmp131;
+	  tmp127 = tmp119 + tmp126;
+	  c_im(output[ostride]) = tmp127 - tmp130;
+	  c_im(output[4 * ostride]) = tmp130 + tmp127;
+     }
+     {
+	  fftw_real tmp65;
+	  fftw_real tmp64;
+	  fftw_real tmp66;
+	  fftw_real tmp90;
+	  fftw_real tmp92;
+	  fftw_real tmp78;
+	  fftw_real tmp89;
+	  fftw_real tmp91;
+	  fftw_real tmp67;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp65 = K559016994 * (tmp60 - tmp63);
+	  tmp64 = tmp60 + tmp63;
+	  tmp66 = tmp57 - (K250000000 * tmp64);
+	  tmp78 = tmp72 - tmp77;
+	  tmp89 = tmp83 - tmp88;
+	  tmp90 = (K951056516 * tmp78) + (K587785252 * tmp89);
+	  tmp92 = (K951056516 * tmp89) - (K587785252 * tmp78);
+	  c_re(output[10 * ostride]) = tmp57 + tmp64;
+	  tmp91 = tmp66 - tmp65;
+	  c_re(output[7 * ostride]) = tmp91 - tmp92;
+	  c_re(output[13 * ostride]) = tmp91 + tmp92;
+	  tmp67 = tmp65 + tmp66;
+	  c_re(output[4 * ostride]) = tmp67 - tmp90;
+	  c_re(output[ostride]) = tmp67 + tmp90;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_15_desc = {
+     "fftw_no_twiddle_15",
+     (void (*)()) fftw_no_twiddle_15,
+     15,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     331,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_16.c b/src/fftw/fn_16.c
new file mode 100644
index 0000000..d75b389
--- /dev/null
+++ b/src/fftw/fn_16.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:46 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 16 */
+
+/*
+ * This function contains 144 FP additions, 24 FP multiplications,
+ * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
+ * 46 stack variables, and 64 memory accesses
+ */
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_16(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp7;
+     fftw_real tmp115;
+     fftw_real tmp38;
+     fftw_real tmp129;
+     fftw_real tmp49;
+     fftw_real tmp95;
+     fftw_real tmp83;
+     fftw_real tmp105;
+     fftw_real tmp29;
+     fftw_real tmp123;
+     fftw_real tmp73;
+     fftw_real tmp101;
+     fftw_real tmp78;
+     fftw_real tmp102;
+     fftw_real tmp126;
+     fftw_real tmp141;
+     fftw_real tmp14;
+     fftw_real tmp130;
+     fftw_real tmp45;
+     fftw_real tmp116;
+     fftw_real tmp52;
+     fftw_real tmp85;
+     fftw_real tmp55;
+     fftw_real tmp84;
+     fftw_real tmp22;
+     fftw_real tmp118;
+     fftw_real tmp62;
+     fftw_real tmp98;
+     fftw_real tmp67;
+     fftw_real tmp99;
+     fftw_real tmp121;
+     fftw_real tmp140;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp3;
+	  fftw_real tmp47;
+	  fftw_real tmp34;
+	  fftw_real tmp82;
+	  fftw_real tmp6;
+	  fftw_real tmp81;
+	  fftw_real tmp37;
+	  fftw_real tmp48;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp2;
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(input[0]);
+	       tmp2 = c_re(input[8 * istride]);
+	       tmp3 = tmp1 + tmp2;
+	       tmp47 = tmp1 - tmp2;
+	       tmp32 = c_im(input[0]);
+	       tmp33 = c_im(input[8 * istride]);
+	       tmp34 = tmp32 + tmp33;
+	       tmp82 = tmp32 - tmp33;
+	  }
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp35;
+	       fftw_real tmp36;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[4 * istride]);
+	       tmp5 = c_re(input[12 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp81 = tmp4 - tmp5;
+	       tmp35 = c_im(input[4 * istride]);
+	       tmp36 = c_im(input[12 * istride]);
+	       tmp37 = tmp35 + tmp36;
+	       tmp48 = tmp35 - tmp36;
+	  }
+	  tmp7 = tmp3 + tmp6;
+	  tmp115 = tmp3 - tmp6;
+	  tmp38 = tmp34 + tmp37;
+	  tmp129 = tmp34 - tmp37;
+	  tmp49 = tmp47 - tmp48;
+	  tmp95 = tmp47 + tmp48;
+	  tmp83 = tmp81 + tmp82;
+	  tmp105 = tmp82 - tmp81;
+     }
+     {
+	  fftw_real tmp25;
+	  fftw_real tmp69;
+	  fftw_real tmp77;
+	  fftw_real tmp124;
+	  fftw_real tmp28;
+	  fftw_real tmp74;
+	  fftw_real tmp72;
+	  fftw_real tmp125;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp75;
+	       fftw_real tmp76;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = c_re(input[15 * istride]);
+	       tmp24 = c_re(input[7 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp69 = tmp23 - tmp24;
+	       tmp75 = c_im(input[15 * istride]);
+	       tmp76 = c_im(input[7 * istride]);
+	       tmp77 = tmp75 - tmp76;
+	       tmp124 = tmp75 + tmp76;
+	  }
+	  {
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp70;
+	       fftw_real tmp71;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp26 = c_re(input[3 * istride]);
+	       tmp27 = c_re(input[11 * istride]);
+	       tmp28 = tmp26 + tmp27;
+	       tmp74 = tmp26 - tmp27;
+	       tmp70 = c_im(input[3 * istride]);
+	       tmp71 = c_im(input[11 * istride]);
+	       tmp72 = tmp70 - tmp71;
+	       tmp125 = tmp70 + tmp71;
+	  }
+	  tmp29 = tmp25 + tmp28;
+	  tmp123 = tmp25 - tmp28;
+	  tmp73 = tmp69 - tmp72;
+	  tmp101 = tmp69 + tmp72;
+	  tmp78 = tmp74 + tmp77;
+	  tmp102 = tmp77 - tmp74;
+	  tmp126 = tmp124 - tmp125;
+	  tmp141 = tmp124 + tmp125;
+     }
+     {
+	  fftw_real tmp10;
+	  fftw_real tmp51;
+	  fftw_real tmp41;
+	  fftw_real tmp50;
+	  fftw_real tmp13;
+	  fftw_real tmp53;
+	  fftw_real tmp44;
+	  fftw_real tmp54;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp39;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[2 * istride]);
+	       tmp9 = c_re(input[10 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp51 = tmp8 - tmp9;
+	       tmp39 = c_im(input[2 * istride]);
+	       tmp40 = c_im(input[10 * istride]);
+	       tmp41 = tmp39 + tmp40;
+	       tmp50 = tmp39 - tmp40;
+	  }
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp42;
+	       fftw_real tmp43;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[14 * istride]);
+	       tmp12 = c_re(input[6 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp53 = tmp11 - tmp12;
+	       tmp42 = c_im(input[14 * istride]);
+	       tmp43 = c_im(input[6 * istride]);
+	       tmp44 = tmp42 + tmp43;
+	       tmp54 = tmp42 - tmp43;
+	  }
+	  tmp14 = tmp10 + tmp13;
+	  tmp130 = tmp13 - tmp10;
+	  tmp45 = tmp41 + tmp44;
+	  tmp116 = tmp41 - tmp44;
+	  tmp52 = tmp50 - tmp51;
+	  tmp85 = tmp51 + tmp50;
+	  tmp55 = tmp53 + tmp54;
+	  tmp84 = tmp53 - tmp54;
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp63;
+	  fftw_real tmp61;
+	  fftw_real tmp119;
+	  fftw_real tmp21;
+	  fftw_real tmp58;
+	  fftw_real tmp66;
+	  fftw_real tmp120;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp59;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[istride]);
+	       tmp17 = c_re(input[9 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp63 = tmp16 - tmp17;
+	       tmp59 = c_im(input[istride]);
+	       tmp60 = c_im(input[9 * istride]);
+	       tmp61 = tmp59 - tmp60;
+	       tmp119 = tmp59 + tmp60;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = c_re(input[5 * istride]);
+	       tmp20 = c_re(input[13 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp58 = tmp19 - tmp20;
+	       tmp64 = c_im(input[5 * istride]);
+	       tmp65 = c_im(input[13 * istride]);
+	       tmp66 = tmp64 - tmp65;
+	       tmp120 = tmp64 + tmp65;
+	  }
+	  tmp22 = tmp18 + tmp21;
+	  tmp118 = tmp18 - tmp21;
+	  tmp62 = tmp58 + tmp61;
+	  tmp98 = tmp61 - tmp58;
+	  tmp67 = tmp63 - tmp66;
+	  tmp99 = tmp63 + tmp66;
+	  tmp121 = tmp119 - tmp120;
+	  tmp140 = tmp119 + tmp120;
+     }
+     {
+	  fftw_real tmp15;
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  fftw_real tmp46;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp15 = tmp7 + tmp14;
+	  tmp30 = tmp22 + tmp29;
+	  c_re(output[8 * ostride]) = tmp15 - tmp30;
+	  c_re(output[0]) = tmp15 + tmp30;
+	  tmp31 = tmp29 - tmp22;
+	  tmp46 = tmp38 - tmp45;
+	  c_im(output[4 * ostride]) = tmp31 + tmp46;
+	  c_im(output[12 * ostride]) = tmp46 - tmp31;
+     }
+     {
+	  fftw_real tmp143;
+	  fftw_real tmp144;
+	  fftw_real tmp139;
+	  fftw_real tmp142;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp143 = tmp38 + tmp45;
+	  tmp144 = tmp140 + tmp141;
+	  c_im(output[8 * ostride]) = tmp143 - tmp144;
+	  c_im(output[0]) = tmp143 + tmp144;
+	  tmp139 = tmp7 - tmp14;
+	  tmp142 = tmp140 - tmp141;
+	  c_re(output[12 * ostride]) = tmp139 - tmp142;
+	  c_re(output[4 * ostride]) = tmp139 + tmp142;
+     }
+     {
+	  fftw_real tmp117;
+	  fftw_real tmp131;
+	  fftw_real tmp128;
+	  fftw_real tmp132;
+	  fftw_real tmp122;
+	  fftw_real tmp127;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp117 = tmp115 + tmp116;
+	  tmp131 = tmp129 - tmp130;
+	  tmp122 = tmp118 + tmp121;
+	  tmp127 = tmp123 - tmp126;
+	  tmp128 = K707106781 * (tmp122 + tmp127);
+	  tmp132 = K707106781 * (tmp127 - tmp122);
+	  c_re(output[10 * ostride]) = tmp117 - tmp128;
+	  c_re(output[2 * ostride]) = tmp117 + tmp128;
+	  c_im(output[14 * ostride]) = tmp131 - tmp132;
+	  c_im(output[6 * ostride]) = tmp131 + tmp132;
+     }
+     {
+	  fftw_real tmp133;
+	  fftw_real tmp137;
+	  fftw_real tmp136;
+	  fftw_real tmp138;
+	  fftw_real tmp134;
+	  fftw_real tmp135;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp133 = tmp115 - tmp116;
+	  tmp137 = tmp130 + tmp129;
+	  tmp134 = tmp121 - tmp118;
+	  tmp135 = tmp123 + tmp126;
+	  tmp136 = K707106781 * (tmp134 - tmp135);
+	  tmp138 = K707106781 * (tmp134 + tmp135);
+	  c_re(output[14 * ostride]) = tmp133 - tmp136;
+	  c_re(output[6 * ostride]) = tmp133 + tmp136;
+	  c_im(output[10 * ostride]) = tmp137 - tmp138;
+	  c_im(output[2 * ostride]) = tmp137 + tmp138;
+     }
+     {
+	  fftw_real tmp57;
+	  fftw_real tmp89;
+	  fftw_real tmp92;
+	  fftw_real tmp94;
+	  fftw_real tmp87;
+	  fftw_real tmp93;
+	  fftw_real tmp80;
+	  fftw_real tmp88;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp56;
+	       fftw_real tmp90;
+	       fftw_real tmp91;
+	       fftw_real tmp86;
+	       fftw_real tmp68;
+	       fftw_real tmp79;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp56 = K707106781 * (tmp52 - tmp55);
+	       tmp57 = tmp49 + tmp56;
+	       tmp89 = tmp49 - tmp56;
+	       tmp90 = (K382683432 * tmp62) - (K923879532 * tmp67);
+	       tmp91 = (K382683432 * tmp78) + (K923879532 * tmp73);
+	       tmp92 = tmp90 - tmp91;
+	       tmp94 = tmp90 + tmp91;
+	       tmp86 = K707106781 * (tmp84 - tmp85);
+	       tmp87 = tmp83 - tmp86;
+	       tmp93 = tmp83 + tmp86;
+	       tmp68 = (K923879532 * tmp62) + (K382683432 * tmp67);
+	       tmp79 = (K382683432 * tmp73) - (K923879532 * tmp78);
+	       tmp80 = tmp68 + tmp79;
+	       tmp88 = tmp79 - tmp68;
+	  }
+	  c_re(output[11 * ostride]) = tmp57 - tmp80;
+	  c_re(output[3 * ostride]) = tmp57 + tmp80;
+	  c_im(output[15 * ostride]) = tmp87 - tmp88;
+	  c_im(output[7 * ostride]) = tmp87 + tmp88;
+	  c_re(output[15 * ostride]) = tmp89 - tmp92;
+	  c_re(output[7 * ostride]) = tmp89 + tmp92;
+	  c_im(output[11 * ostride]) = tmp93 - tmp94;
+	  c_im(output[3 * ostride]) = tmp93 + tmp94;
+     }
+     {
+	  fftw_real tmp97;
+	  fftw_real tmp109;
+	  fftw_real tmp112;
+	  fftw_real tmp114;
+	  fftw_real tmp107;
+	  fftw_real tmp113;
+	  fftw_real tmp104;
+	  fftw_real tmp108;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp110;
+	       fftw_real tmp111;
+	       fftw_real tmp106;
+	       fftw_real tmp100;
+	       fftw_real tmp103;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp96 = K707106781 * (tmp85 + tmp84);
+	       tmp97 = tmp95 + tmp96;
+	       tmp109 = tmp95 - tmp96;
+	       tmp110 = (K923879532 * tmp98) - (K382683432 * tmp99);
+	       tmp111 = (K923879532 * tmp102) + (K382683432 * tmp101);
+	       tmp112 = tmp110 - tmp111;
+	       tmp114 = tmp110 + tmp111;
+	       tmp106 = K707106781 * (tmp52 + tmp55);
+	       tmp107 = tmp105 - tmp106;
+	       tmp113 = tmp105 + tmp106;
+	       tmp100 = (K382683432 * tmp98) + (K923879532 * tmp99);
+	       tmp103 = (K923879532 * tmp101) - (K382683432 * tmp102);
+	       tmp104 = tmp100 + tmp103;
+	       tmp108 = tmp103 - tmp100;
+	  }
+	  c_re(output[9 * ostride]) = tmp97 - tmp104;
+	  c_re(output[ostride]) = tmp97 + tmp104;
+	  c_im(output[13 * ostride]) = tmp107 - tmp108;
+	  c_im(output[5 * ostride]) = tmp107 + tmp108;
+	  c_re(output[13 * ostride]) = tmp109 - tmp112;
+	  c_re(output[5 * ostride]) = tmp109 + tmp112;
+	  c_im(output[9 * ostride]) = tmp113 - tmp114;
+	  c_im(output[ostride]) = tmp113 + tmp114;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_16_desc = {
+     "fftw_no_twiddle_16",
+     (void (*)()) fftw_no_twiddle_16,
+     16,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     353,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_2.c b/src/fftw/fn_2.c
new file mode 100644
index 0000000..ca45ae3
--- /dev/null
+++ b/src/fftw/fn_2.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 2 */
+
+/*
+ * This function contains 4 FP additions, 0 FP multiplications,
+ * (or, 4 additions, 0 multiplications, 0 fused multiply/add),
+ * 4 stack variables, and 8 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: fn_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_2(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp2;
+     fftw_real tmp3;
+     fftw_real tmp4;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp2 = c_re(input[istride]);
+     c_re(output[ostride]) = tmp1 - tmp2;
+     c_re(output[0]) = tmp1 + tmp2;
+     tmp3 = c_im(input[0]);
+     tmp4 = c_im(input[istride]);
+     c_im(output[ostride]) = tmp3 - tmp4;
+     c_im(output[0]) = tmp3 + tmp4;
+}
+
+fftw_codelet_desc fftw_no_twiddle_2_desc = {
+     "fftw_no_twiddle_2",
+     (void (*)()) fftw_no_twiddle_2,
+     2,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     45,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_3.c b/src/fftw/fn_3.c
new file mode 100644
index 0000000..4f143d7
--- /dev/null
+++ b/src/fftw/fn_3.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 3 */
+
+/*
+ * This function contains 12 FP additions, 4 FP multiplications,
+ * (or, 10 additions, 2 multiplications, 2 fused multiply/add),
+ * 12 stack variables, and 12 memory accesses
+ */
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_3(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp10;
+     fftw_real tmp4;
+     fftw_real tmp9;
+     fftw_real tmp8;
+     fftw_real tmp11;
+     fftw_real tmp5;
+     fftw_real tmp12;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp10 = c_im(input[0]);
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[2 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp9 = K866025403 * (tmp3 - tmp2);
+	  tmp6 = c_im(input[istride]);
+	  tmp7 = c_im(input[2 * istride]);
+	  tmp8 = K866025403 * (tmp6 - tmp7);
+	  tmp11 = tmp6 + tmp7;
+     }
+     c_re(output[0]) = tmp1 + tmp4;
+     tmp5 = tmp1 - (K500000000 * tmp4);
+     c_re(output[2 * ostride]) = tmp5 - tmp8;
+     c_re(output[ostride]) = tmp5 + tmp8;
+     c_im(output[0]) = tmp10 + tmp11;
+     tmp12 = tmp10 - (K500000000 * tmp11);
+     c_im(output[ostride]) = tmp9 + tmp12;
+     c_im(output[2 * ostride]) = tmp12 - tmp9;
+}
+
+fftw_codelet_desc fftw_no_twiddle_3_desc = {
+     "fftw_no_twiddle_3",
+     (void (*)()) fftw_no_twiddle_3,
+     3,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     67,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_32.c b/src/fftw/fn_32.c
new file mode 100644
index 0000000..632b77d
--- /dev/null
+++ b/src/fftw/fn_32.c
@@ -0,0 +1,1049 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:49 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 32 */
+
+/*
+ * This function contains 372 FP additions, 84 FP multiplications,
+ * (or, 340 additions, 52 multiplications, 32 fused multiply/add),
+ * 92 stack variables, and 128 memory accesses
+ */
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_32(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp7;
+     fftw_real tmp275;
+     fftw_real tmp70;
+     fftw_real tmp309;
+     fftw_real tmp97;
+     fftw_real tmp215;
+     fftw_real tmp179;
+     fftw_real tmp241;
+     fftw_real tmp14;
+     fftw_real tmp310;
+     fftw_real tmp77;
+     fftw_real tmp276;
+     fftw_real tmp182;
+     fftw_real tmp216;
+     fftw_real tmp104;
+     fftw_real tmp242;
+     fftw_real tmp153;
+     fftw_real tmp233;
+     fftw_real tmp53;
+     fftw_real tmp60;
+     fftw_real tmp351;
+     fftw_real tmp306;
+     fftw_real tmp330;
+     fftw_real tmp352;
+     fftw_real tmp353;
+     fftw_real tmp354;
+     fftw_real tmp170;
+     fftw_real tmp236;
+     fftw_real tmp301;
+     fftw_real tmp329;
+     fftw_real tmp164;
+     fftw_real tmp237;
+     fftw_real tmp173;
+     fftw_real tmp234;
+     fftw_real tmp22;
+     fftw_real tmp280;
+     fftw_real tmp313;
+     fftw_real tmp85;
+     fftw_real tmp112;
+     fftw_real tmp185;
+     fftw_real tmp220;
+     fftw_real tmp245;
+     fftw_real tmp29;
+     fftw_real tmp283;
+     fftw_real tmp312;
+     fftw_real tmp92;
+     fftw_real tmp119;
+     fftw_real tmp184;
+     fftw_real tmp223;
+     fftw_real tmp244;
+     fftw_real tmp126;
+     fftw_real tmp229;
+     fftw_real tmp38;
+     fftw_real tmp45;
+     fftw_real tmp346;
+     fftw_real tmp295;
+     fftw_real tmp327;
+     fftw_real tmp347;
+     fftw_real tmp348;
+     fftw_real tmp349;
+     fftw_real tmp143;
+     fftw_real tmp226;
+     fftw_real tmp290;
+     fftw_real tmp326;
+     fftw_real tmp137;
+     fftw_real tmp227;
+     fftw_real tmp146;
+     fftw_real tmp230;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp3;
+	  fftw_real tmp95;
+	  fftw_real tmp66;
+	  fftw_real tmp178;
+	  fftw_real tmp6;
+	  fftw_real tmp177;
+	  fftw_real tmp69;
+	  fftw_real tmp96;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp2;
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(input[0]);
+	       tmp2 = c_re(input[16 * istride]);
+	       tmp3 = tmp1 + tmp2;
+	       tmp95 = tmp1 - tmp2;
+	       tmp64 = c_im(input[0]);
+	       tmp65 = c_im(input[16 * istride]);
+	       tmp66 = tmp64 + tmp65;
+	       tmp178 = tmp64 - tmp65;
+	  }
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp67;
+	       fftw_real tmp68;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[8 * istride]);
+	       tmp5 = c_re(input[24 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp177 = tmp4 - tmp5;
+	       tmp67 = c_im(input[8 * istride]);
+	       tmp68 = c_im(input[24 * istride]);
+	       tmp69 = tmp67 + tmp68;
+	       tmp96 = tmp67 - tmp68;
+	  }
+	  tmp7 = tmp3 + tmp6;
+	  tmp275 = tmp3 - tmp6;
+	  tmp70 = tmp66 + tmp69;
+	  tmp309 = tmp66 - tmp69;
+	  tmp97 = tmp95 - tmp96;
+	  tmp215 = tmp95 + tmp96;
+	  tmp179 = tmp177 + tmp178;
+	  tmp241 = tmp178 - tmp177;
+     }
+     {
+	  fftw_real tmp10;
+	  fftw_real tmp99;
+	  fftw_real tmp73;
+	  fftw_real tmp98;
+	  fftw_real tmp13;
+	  fftw_real tmp101;
+	  fftw_real tmp76;
+	  fftw_real tmp102;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp71;
+	       fftw_real tmp72;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[4 * istride]);
+	       tmp9 = c_re(input[20 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp99 = tmp8 - tmp9;
+	       tmp71 = c_im(input[4 * istride]);
+	       tmp72 = c_im(input[20 * istride]);
+	       tmp73 = tmp71 + tmp72;
+	       tmp98 = tmp71 - tmp72;
+	  }
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp74;
+	       fftw_real tmp75;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[28 * istride]);
+	       tmp12 = c_re(input[12 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp101 = tmp11 - tmp12;
+	       tmp74 = c_im(input[28 * istride]);
+	       tmp75 = c_im(input[12 * istride]);
+	       tmp76 = tmp74 + tmp75;
+	       tmp102 = tmp74 - tmp75;
+	  }
+	  tmp14 = tmp10 + tmp13;
+	  tmp310 = tmp13 - tmp10;
+	  tmp77 = tmp73 + tmp76;
+	  tmp276 = tmp73 - tmp76;
+	  {
+	       fftw_real tmp180;
+	       fftw_real tmp181;
+	       fftw_real tmp100;
+	       fftw_real tmp103;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp180 = tmp101 - tmp102;
+	       tmp181 = tmp99 + tmp98;
+	       tmp182 = K707106781 * (tmp180 - tmp181);
+	       tmp216 = K707106781 * (tmp181 + tmp180);
+	       tmp100 = tmp98 - tmp99;
+	       tmp103 = tmp101 + tmp102;
+	       tmp104 = K707106781 * (tmp100 - tmp103);
+	       tmp242 = K707106781 * (tmp100 + tmp103);
+	  }
+     }
+     {
+	  fftw_real tmp49;
+	  fftw_real tmp149;
+	  fftw_real tmp169;
+	  fftw_real tmp302;
+	  fftw_real tmp52;
+	  fftw_real tmp166;
+	  fftw_real tmp152;
+	  fftw_real tmp303;
+	  fftw_real tmp56;
+	  fftw_real tmp157;
+	  fftw_real tmp156;
+	  fftw_real tmp298;
+	  fftw_real tmp59;
+	  fftw_real tmp159;
+	  fftw_real tmp162;
+	  fftw_real tmp299;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp48;
+	       fftw_real tmp167;
+	       fftw_real tmp168;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp47 = c_re(input[31 * istride]);
+	       tmp48 = c_re(input[15 * istride]);
+	       tmp49 = tmp47 + tmp48;
+	       tmp149 = tmp47 - tmp48;
+	       tmp167 = c_im(input[31 * istride]);
+	       tmp168 = c_im(input[15 * istride]);
+	       tmp169 = tmp167 - tmp168;
+	       tmp302 = tmp167 + tmp168;
+	  }
+	  {
+	       fftw_real tmp50;
+	       fftw_real tmp51;
+	       fftw_real tmp150;
+	       fftw_real tmp151;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp50 = c_re(input[7 * istride]);
+	       tmp51 = c_re(input[23 * istride]);
+	       tmp52 = tmp50 + tmp51;
+	       tmp166 = tmp50 - tmp51;
+	       tmp150 = c_im(input[7 * istride]);
+	       tmp151 = c_im(input[23 * istride]);
+	       tmp152 = tmp150 - tmp151;
+	       tmp303 = tmp150 + tmp151;
+	  }
+	  {
+	       fftw_real tmp54;
+	       fftw_real tmp55;
+	       fftw_real tmp154;
+	       fftw_real tmp155;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp54 = c_re(input[3 * istride]);
+	       tmp55 = c_re(input[19 * istride]);
+	       tmp56 = tmp54 + tmp55;
+	       tmp157 = tmp54 - tmp55;
+	       tmp154 = c_im(input[3 * istride]);
+	       tmp155 = c_im(input[19 * istride]);
+	       tmp156 = tmp154 - tmp155;
+	       tmp298 = tmp154 + tmp155;
+	  }
+	  {
+	       fftw_real tmp57;
+	       fftw_real tmp58;
+	       fftw_real tmp160;
+	       fftw_real tmp161;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp57 = c_re(input[27 * istride]);
+	       tmp58 = c_re(input[11 * istride]);
+	       tmp59 = tmp57 + tmp58;
+	       tmp159 = tmp57 - tmp58;
+	       tmp160 = c_im(input[27 * istride]);
+	       tmp161 = c_im(input[11 * istride]);
+	       tmp162 = tmp160 - tmp161;
+	       tmp299 = tmp160 + tmp161;
+	  }
+	  {
+	       fftw_real tmp304;
+	       fftw_real tmp305;
+	       fftw_real tmp297;
+	       fftw_real tmp300;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp153 = tmp149 - tmp152;
+	       tmp233 = tmp149 + tmp152;
+	       tmp53 = tmp49 + tmp52;
+	       tmp60 = tmp56 + tmp59;
+	       tmp351 = tmp53 - tmp60;
+	       tmp304 = tmp302 - tmp303;
+	       tmp305 = tmp59 - tmp56;
+	       tmp306 = tmp304 - tmp305;
+	       tmp330 = tmp305 + tmp304;
+	       tmp352 = tmp302 + tmp303;
+	       tmp353 = tmp298 + tmp299;
+	       tmp354 = tmp352 - tmp353;
+	       tmp170 = tmp166 + tmp169;
+	       tmp236 = tmp169 - tmp166;
+	       tmp297 = tmp49 - tmp52;
+	       tmp300 = tmp298 - tmp299;
+	       tmp301 = tmp297 - tmp300;
+	       tmp329 = tmp297 + tmp300;
+	       {
+		    fftw_real tmp158;
+		    fftw_real tmp163;
+		    fftw_real tmp171;
+		    fftw_real tmp172;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp158 = tmp156 - tmp157;
+		    tmp163 = tmp159 + tmp162;
+		    tmp164 = K707106781 * (tmp158 - tmp163);
+		    tmp237 = K707106781 * (tmp158 + tmp163);
+		    tmp171 = tmp159 - tmp162;
+		    tmp172 = tmp157 + tmp156;
+		    tmp173 = K707106781 * (tmp171 - tmp172);
+		    tmp234 = K707106781 * (tmp172 + tmp171);
+	       }
+	  }
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp109;
+	  fftw_real tmp81;
+	  fftw_real tmp107;
+	  fftw_real tmp21;
+	  fftw_real tmp106;
+	  fftw_real tmp84;
+	  fftw_real tmp110;
+	  fftw_real tmp278;
+	  fftw_real tmp279;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp79;
+	       fftw_real tmp80;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[2 * istride]);
+	       tmp17 = c_re(input[18 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp109 = tmp16 - tmp17;
+	       tmp79 = c_im(input[2 * istride]);
+	       tmp80 = c_im(input[18 * istride]);
+	       tmp81 = tmp79 + tmp80;
+	       tmp107 = tmp79 - tmp80;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       fftw_real tmp82;
+	       fftw_real tmp83;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = c_re(input[10 * istride]);
+	       tmp20 = c_re(input[26 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp106 = tmp19 - tmp20;
+	       tmp82 = c_im(input[10 * istride]);
+	       tmp83 = c_im(input[26 * istride]);
+	       tmp84 = tmp82 + tmp83;
+	       tmp110 = tmp82 - tmp83;
+	  }
+	  tmp22 = tmp18 + tmp21;
+	  tmp278 = tmp81 - tmp84;
+	  tmp279 = tmp18 - tmp21;
+	  tmp280 = tmp278 - tmp279;
+	  tmp313 = tmp279 + tmp278;
+	  tmp85 = tmp81 + tmp84;
+	  {
+	       fftw_real tmp108;
+	       fftw_real tmp111;
+	       fftw_real tmp218;
+	       fftw_real tmp219;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp108 = tmp106 + tmp107;
+	       tmp111 = tmp109 - tmp110;
+	       tmp112 = (K382683432 * tmp108) - (K923879532 * tmp111);
+	       tmp185 = (K923879532 * tmp108) + (K382683432 * tmp111);
+	       tmp218 = tmp107 - tmp106;
+	       tmp219 = tmp109 + tmp110;
+	       tmp220 = (K923879532 * tmp218) - (K382683432 * tmp219);
+	       tmp245 = (K382683432 * tmp218) + (K923879532 * tmp219);
+	  }
+     }
+     {
+	  fftw_real tmp25;
+	  fftw_real tmp116;
+	  fftw_real tmp88;
+	  fftw_real tmp114;
+	  fftw_real tmp28;
+	  fftw_real tmp113;
+	  fftw_real tmp91;
+	  fftw_real tmp117;
+	  fftw_real tmp281;
+	  fftw_real tmp282;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp86;
+	       fftw_real tmp87;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = c_re(input[30 * istride]);
+	       tmp24 = c_re(input[14 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp116 = tmp23 - tmp24;
+	       tmp86 = c_im(input[30 * istride]);
+	       tmp87 = c_im(input[14 * istride]);
+	       tmp88 = tmp86 + tmp87;
+	       tmp114 = tmp86 - tmp87;
+	  }
+	  {
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp89;
+	       fftw_real tmp90;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp26 = c_re(input[6 * istride]);
+	       tmp27 = c_re(input[22 * istride]);
+	       tmp28 = tmp26 + tmp27;
+	       tmp113 = tmp26 - tmp27;
+	       tmp89 = c_im(input[6 * istride]);
+	       tmp90 = c_im(input[22 * istride]);
+	       tmp91 = tmp89 + tmp90;
+	       tmp117 = tmp89 - tmp90;
+	  }
+	  tmp29 = tmp25 + tmp28;
+	  tmp281 = tmp25 - tmp28;
+	  tmp282 = tmp88 - tmp91;
+	  tmp283 = tmp281 + tmp282;
+	  tmp312 = tmp281 - tmp282;
+	  tmp92 = tmp88 + tmp91;
+	  {
+	       fftw_real tmp115;
+	       fftw_real tmp118;
+	       fftw_real tmp221;
+	       fftw_real tmp222;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp115 = tmp113 + tmp114;
+	       tmp118 = tmp116 - tmp117;
+	       tmp119 = (K382683432 * tmp115) + (K923879532 * tmp118);
+	       tmp184 = (K382683432 * tmp118) - (K923879532 * tmp115);
+	       tmp221 = tmp114 - tmp113;
+	       tmp222 = tmp116 + tmp117;
+	       tmp223 = (K923879532 * tmp221) + (K382683432 * tmp222);
+	       tmp244 = (K923879532 * tmp222) - (K382683432 * tmp221);
+	  }
+     }
+     {
+	  fftw_real tmp34;
+	  fftw_real tmp139;
+	  fftw_real tmp125;
+	  fftw_real tmp286;
+	  fftw_real tmp37;
+	  fftw_real tmp122;
+	  fftw_real tmp142;
+	  fftw_real tmp287;
+	  fftw_real tmp41;
+	  fftw_real tmp132;
+	  fftw_real tmp135;
+	  fftw_real tmp292;
+	  fftw_real tmp44;
+	  fftw_real tmp127;
+	  fftw_real tmp130;
+	  fftw_real tmp293;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       fftw_real tmp123;
+	       fftw_real tmp124;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp32 = c_re(input[istride]);
+	       tmp33 = c_re(input[17 * istride]);
+	       tmp34 = tmp32 + tmp33;
+	       tmp139 = tmp32 - tmp33;
+	       tmp123 = c_im(input[istride]);
+	       tmp124 = c_im(input[17 * istride]);
+	       tmp125 = tmp123 - tmp124;
+	       tmp286 = tmp123 + tmp124;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp36;
+	       fftw_real tmp140;
+	       fftw_real tmp141;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp35 = c_re(input[9 * istride]);
+	       tmp36 = c_re(input[25 * istride]);
+	       tmp37 = tmp35 + tmp36;
+	       tmp122 = tmp35 - tmp36;
+	       tmp140 = c_im(input[9 * istride]);
+	       tmp141 = c_im(input[25 * istride]);
+	       tmp142 = tmp140 - tmp141;
+	       tmp287 = tmp140 + tmp141;
+	  }
+	  {
+	       fftw_real tmp39;
+	       fftw_real tmp40;
+	       fftw_real tmp133;
+	       fftw_real tmp134;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp39 = c_re(input[5 * istride]);
+	       tmp40 = c_re(input[21 * istride]);
+	       tmp41 = tmp39 + tmp40;
+	       tmp132 = tmp39 - tmp40;
+	       tmp133 = c_im(input[5 * istride]);
+	       tmp134 = c_im(input[21 * istride]);
+	       tmp135 = tmp133 - tmp134;
+	       tmp292 = tmp133 + tmp134;
+	  }
+	  {
+	       fftw_real tmp42;
+	       fftw_real tmp43;
+	       fftw_real tmp128;
+	       fftw_real tmp129;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp42 = c_re(input[29 * istride]);
+	       tmp43 = c_re(input[13 * istride]);
+	       tmp44 = tmp42 + tmp43;
+	       tmp127 = tmp42 - tmp43;
+	       tmp128 = c_im(input[29 * istride]);
+	       tmp129 = c_im(input[13 * istride]);
+	       tmp130 = tmp128 - tmp129;
+	       tmp293 = tmp128 + tmp129;
+	  }
+	  {
+	       fftw_real tmp291;
+	       fftw_real tmp294;
+	       fftw_real tmp288;
+	       fftw_real tmp289;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp126 = tmp122 + tmp125;
+	       tmp229 = tmp125 - tmp122;
+	       tmp38 = tmp34 + tmp37;
+	       tmp45 = tmp41 + tmp44;
+	       tmp346 = tmp38 - tmp45;
+	       tmp291 = tmp34 - tmp37;
+	       tmp294 = tmp292 - tmp293;
+	       tmp295 = tmp291 - tmp294;
+	       tmp327 = tmp291 + tmp294;
+	       tmp347 = tmp286 + tmp287;
+	       tmp348 = tmp292 + tmp293;
+	       tmp349 = tmp347 - tmp348;
+	       tmp143 = tmp139 - tmp142;
+	       tmp226 = tmp139 + tmp142;
+	       tmp288 = tmp286 - tmp287;
+	       tmp289 = tmp44 - tmp41;
+	       tmp290 = tmp288 - tmp289;
+	       tmp326 = tmp289 + tmp288;
+	       {
+		    fftw_real tmp131;
+		    fftw_real tmp136;
+		    fftw_real tmp144;
+		    fftw_real tmp145;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp131 = tmp127 - tmp130;
+		    tmp136 = tmp132 + tmp135;
+		    tmp137 = K707106781 * (tmp131 - tmp136);
+		    tmp227 = K707106781 * (tmp136 + tmp131);
+		    tmp144 = tmp135 - tmp132;
+		    tmp145 = tmp127 + tmp130;
+		    tmp146 = K707106781 * (tmp144 - tmp145);
+		    tmp230 = K707106781 * (tmp144 + tmp145);
+	       }
+	  }
+     }
+     {
+	  fftw_real tmp285;
+	  fftw_real tmp317;
+	  fftw_real tmp320;
+	  fftw_real tmp322;
+	  fftw_real tmp308;
+	  fftw_real tmp316;
+	  fftw_real tmp315;
+	  fftw_real tmp321;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp277;
+	       fftw_real tmp284;
+	       fftw_real tmp318;
+	       fftw_real tmp319;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp277 = tmp275 - tmp276;
+	       tmp284 = K707106781 * (tmp280 - tmp283);
+	       tmp285 = tmp277 + tmp284;
+	       tmp317 = tmp277 - tmp284;
+	       tmp318 = (K382683432 * tmp290) - (K923879532 * tmp295);
+	       tmp319 = (K382683432 * tmp306) + (K923879532 * tmp301);
+	       tmp320 = tmp318 - tmp319;
+	       tmp322 = tmp318 + tmp319;
+	  }
+	  {
+	       fftw_real tmp296;
+	       fftw_real tmp307;
+	       fftw_real tmp311;
+	       fftw_real tmp314;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp296 = (K923879532 * tmp290) + (K382683432 * tmp295);
+	       tmp307 = (K382683432 * tmp301) - (K923879532 * tmp306);
+	       tmp308 = tmp296 + tmp307;
+	       tmp316 = tmp307 - tmp296;
+	       tmp311 = tmp309 - tmp310;
+	       tmp314 = K707106781 * (tmp312 - tmp313);
+	       tmp315 = tmp311 - tmp314;
+	       tmp321 = tmp311 + tmp314;
+	  }
+	  c_re(output[22 * ostride]) = tmp285 - tmp308;
+	  c_re(output[6 * ostride]) = tmp285 + tmp308;
+	  c_im(output[30 * ostride]) = tmp315 - tmp316;
+	  c_im(output[14 * ostride]) = tmp315 + tmp316;
+	  c_re(output[30 * ostride]) = tmp317 - tmp320;
+	  c_re(output[14 * ostride]) = tmp317 + tmp320;
+	  c_im(output[22 * ostride]) = tmp321 - tmp322;
+	  c_im(output[6 * ostride]) = tmp321 + tmp322;
+     }
+     {
+	  fftw_real tmp325;
+	  fftw_real tmp337;
+	  fftw_real tmp340;
+	  fftw_real tmp342;
+	  fftw_real tmp332;
+	  fftw_real tmp336;
+	  fftw_real tmp335;
+	  fftw_real tmp341;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp323;
+	       fftw_real tmp324;
+	       fftw_real tmp338;
+	       fftw_real tmp339;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp323 = tmp275 + tmp276;
+	       tmp324 = K707106781 * (tmp313 + tmp312);
+	       tmp325 = tmp323 + tmp324;
+	       tmp337 = tmp323 - tmp324;
+	       tmp338 = (K923879532 * tmp326) - (K382683432 * tmp327);
+	       tmp339 = (K923879532 * tmp330) + (K382683432 * tmp329);
+	       tmp340 = tmp338 - tmp339;
+	       tmp342 = tmp338 + tmp339;
+	  }
+	  {
+	       fftw_real tmp328;
+	       fftw_real tmp331;
+	       fftw_real tmp333;
+	       fftw_real tmp334;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp328 = (K382683432 * tmp326) + (K923879532 * tmp327);
+	       tmp331 = (K923879532 * tmp329) - (K382683432 * tmp330);
+	       tmp332 = tmp328 + tmp331;
+	       tmp336 = tmp331 - tmp328;
+	       tmp333 = tmp310 + tmp309;
+	       tmp334 = K707106781 * (tmp280 + tmp283);
+	       tmp335 = tmp333 - tmp334;
+	       tmp341 = tmp333 + tmp334;
+	  }
+	  c_re(output[18 * ostride]) = tmp325 - tmp332;
+	  c_re(output[2 * ostride]) = tmp325 + tmp332;
+	  c_im(output[26 * ostride]) = tmp335 - tmp336;
+	  c_im(output[10 * ostride]) = tmp335 + tmp336;
+	  c_re(output[26 * ostride]) = tmp337 - tmp340;
+	  c_re(output[10 * ostride]) = tmp337 + tmp340;
+	  c_im(output[18 * ostride]) = tmp341 - tmp342;
+	  c_im(output[2 * ostride]) = tmp341 + tmp342;
+     }
+     {
+	  fftw_real tmp345;
+	  fftw_real tmp361;
+	  fftw_real tmp364;
+	  fftw_real tmp366;
+	  fftw_real tmp356;
+	  fftw_real tmp360;
+	  fftw_real tmp359;
+	  fftw_real tmp365;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp343;
+	       fftw_real tmp344;
+	       fftw_real tmp362;
+	       fftw_real tmp363;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp343 = tmp7 - tmp14;
+	       tmp344 = tmp85 - tmp92;
+	       tmp345 = tmp343 + tmp344;
+	       tmp361 = tmp343 - tmp344;
+	       tmp362 = tmp349 - tmp346;
+	       tmp363 = tmp351 + tmp354;
+	       tmp364 = K707106781 * (tmp362 - tmp363);
+	       tmp366 = K707106781 * (tmp362 + tmp363);
+	  }
+	  {
+	       fftw_real tmp350;
+	       fftw_real tmp355;
+	       fftw_real tmp357;
+	       fftw_real tmp358;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp350 = tmp346 + tmp349;
+	       tmp355 = tmp351 - tmp354;
+	       tmp356 = K707106781 * (tmp350 + tmp355);
+	       tmp360 = K707106781 * (tmp355 - tmp350);
+	       tmp357 = tmp70 - tmp77;
+	       tmp358 = tmp29 - tmp22;
+	       tmp359 = tmp357 - tmp358;
+	       tmp365 = tmp358 + tmp357;
+	  }
+	  c_re(output[20 * ostride]) = tmp345 - tmp356;
+	  c_re(output[4 * ostride]) = tmp345 + tmp356;
+	  c_im(output[28 * ostride]) = tmp359 - tmp360;
+	  c_im(output[12 * ostride]) = tmp359 + tmp360;
+	  c_re(output[28 * ostride]) = tmp361 - tmp364;
+	  c_re(output[12 * ostride]) = tmp361 + tmp364;
+	  c_im(output[20 * ostride]) = tmp365 - tmp366;
+	  c_im(output[4 * ostride]) = tmp365 + tmp366;
+     }
+     {
+	  fftw_real tmp31;
+	  fftw_real tmp367;
+	  fftw_real tmp370;
+	  fftw_real tmp372;
+	  fftw_real tmp62;
+	  fftw_real tmp63;
+	  fftw_real tmp94;
+	  fftw_real tmp371;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp15;
+	       fftw_real tmp30;
+	       fftw_real tmp368;
+	       fftw_real tmp369;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp15 = tmp7 + tmp14;
+	       tmp30 = tmp22 + tmp29;
+	       tmp31 = tmp15 + tmp30;
+	       tmp367 = tmp15 - tmp30;
+	       tmp368 = tmp347 + tmp348;
+	       tmp369 = tmp352 + tmp353;
+	       tmp370 = tmp368 - tmp369;
+	       tmp372 = tmp368 + tmp369;
+	  }
+	  {
+	       fftw_real tmp46;
+	       fftw_real tmp61;
+	       fftw_real tmp78;
+	       fftw_real tmp93;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp46 = tmp38 + tmp45;
+	       tmp61 = tmp53 + tmp60;
+	       tmp62 = tmp46 + tmp61;
+	       tmp63 = tmp61 - tmp46;
+	       tmp78 = tmp70 + tmp77;
+	       tmp93 = tmp85 + tmp92;
+	       tmp94 = tmp78 - tmp93;
+	       tmp371 = tmp78 + tmp93;
+	  }
+	  c_re(output[16 * ostride]) = tmp31 - tmp62;
+	  c_re(output[0]) = tmp31 + tmp62;
+	  c_im(output[8 * ostride]) = tmp63 + tmp94;
+	  c_im(output[24 * ostride]) = tmp94 - tmp63;
+	  c_re(output[24 * ostride]) = tmp367 - tmp370;
+	  c_re(output[8 * ostride]) = tmp367 + tmp370;
+	  c_im(output[16 * ostride]) = tmp371 - tmp372;
+	  c_im(output[0]) = tmp371 + tmp372;
+     }
+     {
+	  fftw_real tmp121;
+	  fftw_real tmp189;
+	  fftw_real tmp187;
+	  fftw_real tmp193;
+	  fftw_real tmp148;
+	  fftw_real tmp190;
+	  fftw_real tmp175;
+	  fftw_real tmp191;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp105;
+	       fftw_real tmp120;
+	       fftw_real tmp183;
+	       fftw_real tmp186;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp105 = tmp97 - tmp104;
+	       tmp120 = tmp112 - tmp119;
+	       tmp121 = tmp105 + tmp120;
+	       tmp189 = tmp105 - tmp120;
+	       tmp183 = tmp179 - tmp182;
+	       tmp186 = tmp184 - tmp185;
+	       tmp187 = tmp183 - tmp186;
+	       tmp193 = tmp183 + tmp186;
+	  }
+	  {
+	       fftw_real tmp138;
+	       fftw_real tmp147;
+	       fftw_real tmp165;
+	       fftw_real tmp174;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp138 = tmp126 - tmp137;
+	       tmp147 = tmp143 - tmp146;
+	       tmp148 = (K980785280 * tmp138) + (K195090322 * tmp147);
+	       tmp190 = (K195090322 * tmp138) - (K980785280 * tmp147);
+	       tmp165 = tmp153 - tmp164;
+	       tmp174 = tmp170 - tmp173;
+	       tmp175 = (K195090322 * tmp165) - (K980785280 * tmp174);
+	       tmp191 = (K195090322 * tmp174) + (K980785280 * tmp165);
+	  }
+	  {
+	       fftw_real tmp176;
+	       fftw_real tmp188;
+	       fftw_real tmp192;
+	       fftw_real tmp194;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp176 = tmp148 + tmp175;
+	       c_re(output[23 * ostride]) = tmp121 - tmp176;
+	       c_re(output[7 * ostride]) = tmp121 + tmp176;
+	       tmp188 = tmp175 - tmp148;
+	       c_im(output[31 * ostride]) = tmp187 - tmp188;
+	       c_im(output[15 * ostride]) = tmp187 + tmp188;
+	       tmp192 = tmp190 - tmp191;
+	       c_re(output[31 * ostride]) = tmp189 - tmp192;
+	       c_re(output[15 * ostride]) = tmp189 + tmp192;
+	       tmp194 = tmp190 + tmp191;
+	       c_im(output[23 * ostride]) = tmp193 - tmp194;
+	       c_im(output[7 * ostride]) = tmp193 + tmp194;
+	  }
+     }
+     {
+	  fftw_real tmp197;
+	  fftw_real tmp209;
+	  fftw_real tmp207;
+	  fftw_real tmp213;
+	  fftw_real tmp200;
+	  fftw_real tmp210;
+	  fftw_real tmp203;
+	  fftw_real tmp211;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp195;
+	       fftw_real tmp196;
+	       fftw_real tmp205;
+	       fftw_real tmp206;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp195 = tmp97 + tmp104;
+	       tmp196 = tmp185 + tmp184;
+	       tmp197 = tmp195 + tmp196;
+	       tmp209 = tmp195 - tmp196;
+	       tmp205 = tmp179 + tmp182;
+	       tmp206 = tmp112 + tmp119;
+	       tmp207 = tmp205 - tmp206;
+	       tmp213 = tmp205 + tmp206;
+	  }
+	  {
+	       fftw_real tmp198;
+	       fftw_real tmp199;
+	       fftw_real tmp201;
+	       fftw_real tmp202;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp198 = tmp126 + tmp137;
+	       tmp199 = tmp143 + tmp146;
+	       tmp200 = (K555570233 * tmp198) + (K831469612 * tmp199);
+	       tmp210 = (K831469612 * tmp198) - (K555570233 * tmp199);
+	       tmp201 = tmp153 + tmp164;
+	       tmp202 = tmp170 + tmp173;
+	       tmp203 = (K831469612 * tmp201) - (K555570233 * tmp202);
+	       tmp211 = (K831469612 * tmp202) + (K555570233 * tmp201);
+	  }
+	  {
+	       fftw_real tmp204;
+	       fftw_real tmp208;
+	       fftw_real tmp212;
+	       fftw_real tmp214;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp204 = tmp200 + tmp203;
+	       c_re(output[19 * ostride]) = tmp197 - tmp204;
+	       c_re(output[3 * ostride]) = tmp197 + tmp204;
+	       tmp208 = tmp203 - tmp200;
+	       c_im(output[27 * ostride]) = tmp207 - tmp208;
+	       c_im(output[11 * ostride]) = tmp207 + tmp208;
+	       tmp212 = tmp210 - tmp211;
+	       c_re(output[27 * ostride]) = tmp209 - tmp212;
+	       c_re(output[11 * ostride]) = tmp209 + tmp212;
+	       tmp214 = tmp210 + tmp211;
+	       c_im(output[19 * ostride]) = tmp213 - tmp214;
+	       c_im(output[3 * ostride]) = tmp213 + tmp214;
+	  }
+     }
+     {
+	  fftw_real tmp225;
+	  fftw_real tmp249;
+	  fftw_real tmp247;
+	  fftw_real tmp253;
+	  fftw_real tmp232;
+	  fftw_real tmp250;
+	  fftw_real tmp239;
+	  fftw_real tmp251;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp217;
+	       fftw_real tmp224;
+	       fftw_real tmp243;
+	       fftw_real tmp246;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp217 = tmp215 - tmp216;
+	       tmp224 = tmp220 - tmp223;
+	       tmp225 = tmp217 + tmp224;
+	       tmp249 = tmp217 - tmp224;
+	       tmp243 = tmp241 - tmp242;
+	       tmp246 = tmp244 - tmp245;
+	       tmp247 = tmp243 - tmp246;
+	       tmp253 = tmp243 + tmp246;
+	  }
+	  {
+	       fftw_real tmp228;
+	       fftw_real tmp231;
+	       fftw_real tmp235;
+	       fftw_real tmp238;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp228 = tmp226 - tmp227;
+	       tmp231 = tmp229 - tmp230;
+	       tmp232 = (K555570233 * tmp228) + (K831469612 * tmp231);
+	       tmp250 = (K555570233 * tmp231) - (K831469612 * tmp228);
+	       tmp235 = tmp233 - tmp234;
+	       tmp238 = tmp236 - tmp237;
+	       tmp239 = (K555570233 * tmp235) - (K831469612 * tmp238);
+	       tmp251 = (K831469612 * tmp235) + (K555570233 * tmp238);
+	  }
+	  {
+	       fftw_real tmp240;
+	       fftw_real tmp248;
+	       fftw_real tmp252;
+	       fftw_real tmp254;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp240 = tmp232 + tmp239;
+	       c_re(output[21 * ostride]) = tmp225 - tmp240;
+	       c_re(output[5 * ostride]) = tmp225 + tmp240;
+	       tmp248 = tmp239 - tmp232;
+	       c_im(output[29 * ostride]) = tmp247 - tmp248;
+	       c_im(output[13 * ostride]) = tmp247 + tmp248;
+	       tmp252 = tmp250 - tmp251;
+	       c_re(output[29 * ostride]) = tmp249 - tmp252;
+	       c_re(output[13 * ostride]) = tmp249 + tmp252;
+	       tmp254 = tmp250 + tmp251;
+	       c_im(output[21 * ostride]) = tmp253 - tmp254;
+	       c_im(output[5 * ostride]) = tmp253 + tmp254;
+	  }
+     }
+     {
+	  fftw_real tmp257;
+	  fftw_real tmp269;
+	  fftw_real tmp267;
+	  fftw_real tmp273;
+	  fftw_real tmp260;
+	  fftw_real tmp270;
+	  fftw_real tmp263;
+	  fftw_real tmp271;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp255;
+	       fftw_real tmp256;
+	       fftw_real tmp265;
+	       fftw_real tmp266;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp255 = tmp215 + tmp216;
+	       tmp256 = tmp245 + tmp244;
+	       tmp257 = tmp255 + tmp256;
+	       tmp269 = tmp255 - tmp256;
+	       tmp265 = tmp241 + tmp242;
+	       tmp266 = tmp220 + tmp223;
+	       tmp267 = tmp265 - tmp266;
+	       tmp273 = tmp265 + tmp266;
+	  }
+	  {
+	       fftw_real tmp258;
+	       fftw_real tmp259;
+	       fftw_real tmp261;
+	       fftw_real tmp262;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp258 = tmp226 + tmp227;
+	       tmp259 = tmp229 + tmp230;
+	       tmp260 = (K980785280 * tmp258) + (K195090322 * tmp259);
+	       tmp270 = (K980785280 * tmp259) - (K195090322 * tmp258);
+	       tmp261 = tmp233 + tmp234;
+	       tmp262 = tmp236 + tmp237;
+	       tmp263 = (K980785280 * tmp261) - (K195090322 * tmp262);
+	       tmp271 = (K195090322 * tmp261) + (K980785280 * tmp262);
+	  }
+	  {
+	       fftw_real tmp264;
+	       fftw_real tmp268;
+	       fftw_real tmp272;
+	       fftw_real tmp274;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp264 = tmp260 + tmp263;
+	       c_re(output[17 * ostride]) = tmp257 - tmp264;
+	       c_re(output[ostride]) = tmp257 + tmp264;
+	       tmp268 = tmp263 - tmp260;
+	       c_im(output[25 * ostride]) = tmp267 - tmp268;
+	       c_im(output[9 * ostride]) = tmp267 + tmp268;
+	       tmp272 = tmp270 - tmp271;
+	       c_re(output[25 * ostride]) = tmp269 - tmp272;
+	       c_re(output[9 * ostride]) = tmp269 + tmp272;
+	       tmp274 = tmp270 + tmp271;
+	       c_im(output[17 * ostride]) = tmp273 - tmp274;
+	       c_im(output[ostride]) = tmp273 + tmp274;
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_32_desc = {
+     "fftw_no_twiddle_32",
+     (void (*)()) fftw_no_twiddle_32,
+     32,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     705,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_4.c b/src/fftw/fn_4.c
new file mode 100644
index 0000000..23bd538
--- /dev/null
+++ b/src/fftw/fn_4.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 4 */
+
+/*
+ * This function contains 16 FP additions, 0 FP multiplications,
+ * (or, 16 additions, 0 multiplications, 0 fused multiply/add),
+ * 12 stack variables, and 16 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: fn_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_4(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp11;
+     fftw_real tmp9;
+     fftw_real tmp15;
+     fftw_real tmp6;
+     fftw_real tmp10;
+     fftw_real tmp14;
+     fftw_real tmp16;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp7;
+	  fftw_real tmp8;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[2 * istride]);
+	  tmp3 = tmp1 + tmp2;
+	  tmp11 = tmp1 - tmp2;
+	  tmp7 = c_im(input[0]);
+	  tmp8 = c_im(input[2 * istride]);
+	  tmp9 = tmp7 - tmp8;
+	  tmp15 = tmp7 + tmp8;
+     }
+     {
+	  fftw_real tmp4;
+	  fftw_real tmp5;
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp4 = c_re(input[istride]);
+	  tmp5 = c_re(input[3 * istride]);
+	  tmp6 = tmp4 + tmp5;
+	  tmp10 = tmp4 - tmp5;
+	  tmp12 = c_im(input[istride]);
+	  tmp13 = c_im(input[3 * istride]);
+	  tmp14 = tmp12 - tmp13;
+	  tmp16 = tmp12 + tmp13;
+     }
+     c_re(output[2 * ostride]) = tmp3 - tmp6;
+     c_re(output[0]) = tmp3 + tmp6;
+     c_im(output[ostride]) = tmp9 - tmp10;
+     c_im(output[3 * ostride]) = tmp10 + tmp9;
+     c_re(output[3 * ostride]) = tmp11 - tmp14;
+     c_re(output[ostride]) = tmp11 + tmp14;
+     c_im(output[2 * ostride]) = tmp15 - tmp16;
+     c_im(output[0]) = tmp15 + tmp16;
+}
+
+fftw_codelet_desc fftw_no_twiddle_4_desc = {
+     "fftw_no_twiddle_4",
+     (void (*)()) fftw_no_twiddle_4,
+     4,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     89,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_5.c b/src/fftw/fn_5.c
new file mode 100644
index 0000000..03e5460
--- /dev/null
+++ b/src/fftw/fn_5.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 5 */
+
+/*
+ * This function contains 32 FP additions, 12 FP multiplications,
+ * (or, 26 additions, 6 multiplications, 6 fused multiply/add),
+ * 16 stack variables, and 20 memory accesses
+ */
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_5(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp24;
+     fftw_real tmp8;
+     fftw_real tmp9;
+     fftw_real tmp28;
+     fftw_real tmp29;
+     fftw_real tmp14;
+     fftw_real tmp25;
+     fftw_real tmp23;
+     fftw_real tmp17;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp24 = c_im(input[0]);
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp4;
+	  fftw_real tmp5;
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[4 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = c_re(input[2 * istride]);
+	  tmp6 = c_re(input[3 * istride]);
+	  tmp7 = tmp5 + tmp6;
+	  tmp8 = tmp4 + tmp7;
+	  tmp9 = K559016994 * (tmp4 - tmp7);
+	  tmp28 = tmp2 - tmp3;
+	  tmp29 = tmp5 - tmp6;
+     }
+     {
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  fftw_real tmp21;
+	  fftw_real tmp15;
+	  fftw_real tmp16;
+	  fftw_real tmp22;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp12 = c_im(input[istride]);
+	  tmp13 = c_im(input[4 * istride]);
+	  tmp21 = tmp12 + tmp13;
+	  tmp15 = c_im(input[2 * istride]);
+	  tmp16 = c_im(input[3 * istride]);
+	  tmp22 = tmp15 + tmp16;
+	  tmp14 = tmp12 - tmp13;
+	  tmp25 = tmp21 + tmp22;
+	  tmp23 = K559016994 * (tmp21 - tmp22);
+	  tmp17 = tmp15 - tmp16;
+     }
+     c_re(output[0]) = tmp1 + tmp8;
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp20;
+	  fftw_real tmp11;
+	  fftw_real tmp19;
+	  fftw_real tmp10;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp18 = (K951056516 * tmp14) + (K587785252 * tmp17);
+	  tmp20 = (K951056516 * tmp17) - (K587785252 * tmp14);
+	  tmp10 = tmp1 - (K250000000 * tmp8);
+	  tmp11 = tmp9 + tmp10;
+	  tmp19 = tmp10 - tmp9;
+	  c_re(output[4 * ostride]) = tmp11 - tmp18;
+	  c_re(output[ostride]) = tmp11 + tmp18;
+	  c_re(output[2 * ostride]) = tmp19 - tmp20;
+	  c_re(output[3 * ostride]) = tmp19 + tmp20;
+     }
+     c_im(output[0]) = tmp25 + tmp24;
+     {
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  fftw_real tmp27;
+	  fftw_real tmp32;
+	  fftw_real tmp26;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp30 = (K951056516 * tmp28) + (K587785252 * tmp29);
+	  tmp31 = (K951056516 * tmp29) - (K587785252 * tmp28);
+	  tmp26 = tmp24 - (K250000000 * tmp25);
+	  tmp27 = tmp23 + tmp26;
+	  tmp32 = tmp26 - tmp23;
+	  c_im(output[ostride]) = tmp27 - tmp30;
+	  c_im(output[4 * ostride]) = tmp30 + tmp27;
+	  c_im(output[2 * ostride]) = tmp31 + tmp32;
+	  c_im(output[3 * ostride]) = tmp32 - tmp31;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_5_desc = {
+     "fftw_no_twiddle_5",
+     (void (*)()) fftw_no_twiddle_5,
+     5,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     111,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_6.c b/src/fftw/fn_6.c
new file mode 100644
index 0000000..c034d04
--- /dev/null
+++ b/src/fftw/fn_6.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 6 */
+
+/*
+ * This function contains 36 FP additions, 8 FP multiplications,
+ * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
+ * 20 stack variables, and 24 memory accesses
+ */
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_6(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp11;
+     fftw_real tmp26;
+     fftw_real tmp33;
+     fftw_real tmp6;
+     fftw_real tmp12;
+     fftw_real tmp9;
+     fftw_real tmp13;
+     fftw_real tmp10;
+     fftw_real tmp14;
+     fftw_real tmp18;
+     fftw_real tmp30;
+     fftw_real tmp21;
+     fftw_real tmp31;
+     fftw_real tmp27;
+     fftw_real tmp34;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp24;
+	  fftw_real tmp25;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[3 * istride]);
+	  tmp3 = tmp1 - tmp2;
+	  tmp11 = tmp1 + tmp2;
+	  tmp24 = c_im(input[0]);
+	  tmp25 = c_im(input[3 * istride]);
+	  tmp26 = tmp24 - tmp25;
+	  tmp33 = tmp24 + tmp25;
+     }
+     {
+	  fftw_real tmp4;
+	  fftw_real tmp5;
+	  fftw_real tmp7;
+	  fftw_real tmp8;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp4 = c_re(input[2 * istride]);
+	  tmp5 = c_re(input[5 * istride]);
+	  tmp6 = tmp4 - tmp5;
+	  tmp12 = tmp4 + tmp5;
+	  tmp7 = c_re(input[4 * istride]);
+	  tmp8 = c_re(input[istride]);
+	  tmp9 = tmp7 - tmp8;
+	  tmp13 = tmp7 + tmp8;
+     }
+     tmp10 = tmp6 + tmp9;
+     tmp14 = tmp12 + tmp13;
+     {
+	  fftw_real tmp16;
+	  fftw_real tmp17;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp16 = c_im(input[2 * istride]);
+	  tmp17 = c_im(input[5 * istride]);
+	  tmp18 = tmp16 - tmp17;
+	  tmp30 = tmp16 + tmp17;
+	  tmp19 = c_im(input[4 * istride]);
+	  tmp20 = c_im(input[istride]);
+	  tmp21 = tmp19 - tmp20;
+	  tmp31 = tmp19 + tmp20;
+     }
+     tmp27 = tmp18 + tmp21;
+     tmp34 = tmp30 + tmp31;
+     {
+	  fftw_real tmp15;
+	  fftw_real tmp22;
+	  fftw_real tmp29;
+	  fftw_real tmp32;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[3 * ostride]) = tmp3 + tmp10;
+	  tmp15 = tmp3 - (K500000000 * tmp10);
+	  tmp22 = K866025403 * (tmp18 - tmp21);
+	  c_re(output[5 * ostride]) = tmp15 - tmp22;
+	  c_re(output[ostride]) = tmp15 + tmp22;
+	  c_re(output[0]) = tmp11 + tmp14;
+	  tmp29 = tmp11 - (K500000000 * tmp14);
+	  tmp32 = K866025403 * (tmp30 - tmp31);
+	  c_re(output[2 * ostride]) = tmp29 - tmp32;
+	  c_re(output[4 * ostride]) = tmp29 + tmp32;
+     }
+     {
+	  fftw_real tmp23;
+	  fftw_real tmp28;
+	  fftw_real tmp35;
+	  fftw_real tmp36;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_im(output[3 * ostride]) = tmp26 + tmp27;
+	  tmp23 = K866025403 * (tmp9 - tmp6);
+	  tmp28 = tmp26 - (K500000000 * tmp27);
+	  c_im(output[ostride]) = tmp23 + tmp28;
+	  c_im(output[5 * ostride]) = tmp28 - tmp23;
+	  c_im(output[0]) = tmp33 + tmp34;
+	  tmp35 = tmp33 - (K500000000 * tmp34);
+	  tmp36 = K866025403 * (tmp13 - tmp12);
+	  c_im(output[2 * ostride]) = tmp35 - tmp36;
+	  c_im(output[4 * ostride]) = tmp36 + tmp35;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_6_desc = {
+     "fftw_no_twiddle_6",
+     (void (*)()) fftw_no_twiddle_6,
+     6,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     133,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_64.c b/src/fftw/fn_64.c
new file mode 100644
index 0000000..08ab4b6
--- /dev/null
+++ b/src/fftw/fn_64.c
@@ -0,0 +1,2464 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:51 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 64 */
+
+/*
+ * This function contains 912 FP additions, 248 FP multiplications,
+ * (or, 808 additions, 144 multiplications, 104 fused multiply/add),
+ * 156 stack variables, and 256 memory accesses
+ */
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K881921264 =
+FFTW_KONST(+0.881921264348355029712756863660388349508442621);
+static const fftw_real K471396736 =
+FFTW_KONST(+0.471396736825997648556387625905254377657460319);
+static const fftw_real K290284677 =
+FFTW_KONST(+0.290284677254462367636192375817395274691476278);
+static const fftw_real K956940335 =
+FFTW_KONST(+0.956940335732208864935797886980269969482849206);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K995184726 =
+FFTW_KONST(+0.995184726672196886244836953109479921575474869);
+static const fftw_real K098017140 =
+FFTW_KONST(+0.098017140329560601994195563888641845861136673);
+static const fftw_real K773010453 =
+FFTW_KONST(+0.773010453362736960810906609758469800971041293);
+static const fftw_real K634393284 =
+FFTW_KONST(+0.634393284163645498215171613225493370675687095);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_64(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp193;
+     fftw_real tmp471;
+     fftw_real tmp15;
+     fftw_real tmp815;
+     fftw_real tmp719;
+     fftw_real tmp781;
+     fftw_real tmp142;
+     fftw_real tmp849;
+     fftw_real tmp371;
+     fftw_real tmp537;
+     fftw_real tmp637;
+     fftw_real tmp755;
+     fftw_real tmp200;
+     fftw_real tmp538;
+     fftw_real tmp374;
+     fftw_real tmp472;
+     fftw_real tmp109;
+     fftw_real tmp837;
+     fftw_real tmp693;
+     fftw_real tmp773;
+     fftw_real tmp844;
+     fftw_real tmp892;
+     fftw_real tmp710;
+     fftw_real tmp776;
+     fftw_real tmp329;
+     fftw_real tmp429;
+     fftw_real tmp519;
+     fftw_real tmp593;
+     fftw_real tmp362;
+     fftw_real tmp432;
+     fftw_real tmp530;
+     fftw_real tmp596;
+     fftw_real tmp30;
+     fftw_real tmp850;
+     fftw_real tmp640;
+     fftw_real tmp721;
+     fftw_real tmp157;
+     fftw_real tmp816;
+     fftw_real tmp643;
+     fftw_real tmp720;
+     fftw_real tmp208;
+     fftw_real tmp377;
+     fftw_real tmp476;
+     fftw_real tmp541;
+     fftw_real tmp215;
+     fftw_real tmp376;
+     fftw_real tmp479;
+     fftw_real tmp540;
+     fftw_real tmp124;
+     fftw_real tmp845;
+     fftw_real tmp365;
+     fftw_real tmp430;
+     fftw_real tmp352;
+     fftw_real tmp433;
+     fftw_real tmp840;
+     fftw_real tmp893;
+     fftw_real tmp526;
+     fftw_real tmp597;
+     fftw_real tmp533;
+     fftw_real tmp594;
+     fftw_real tmp704;
+     fftw_real tmp777;
+     fftw_real tmp713;
+     fftw_real tmp774;
+     fftw_real tmp46;
+     fftw_real tmp819;
+     fftw_real tmp648;
+     fftw_real tmp758;
+     fftw_real tmp173;
+     fftw_real tmp818;
+     fftw_real tmp651;
+     fftw_real tmp759;
+     fftw_real tmp228;
+     fftw_real tmp414;
+     fftw_real tmp484;
+     fftw_real tmp578;
+     fftw_real tmp235;
+     fftw_real tmp415;
+     fftw_real tmp487;
+     fftw_real tmp579;
+     fftw_real tmp78;
+     fftw_real tmp831;
+     fftw_real tmp666;
+     fftw_real tmp769;
+     fftw_real tmp828;
+     fftw_real tmp887;
+     fftw_real tmp683;
+     fftw_real tmp766;
+     fftw_real tmp274;
+     fftw_real tmp425;
+     fftw_real tmp500;
+     fftw_real tmp589;
+     fftw_real tmp307;
+     fftw_real tmp422;
+     fftw_real tmp511;
+     fftw_real tmp586;
+     fftw_real tmp61;
+     fftw_real tmp821;
+     fftw_real tmp655;
+     fftw_real tmp761;
+     fftw_real tmp188;
+     fftw_real tmp822;
+     fftw_real tmp658;
+     fftw_real tmp762;
+     fftw_real tmp247;
+     fftw_real tmp417;
+     fftw_real tmp491;
+     fftw_real tmp581;
+     fftw_real tmp254;
+     fftw_real tmp418;
+     fftw_real tmp494;
+     fftw_real tmp582;
+     fftw_real tmp93;
+     fftw_real tmp829;
+     fftw_real tmp310;
+     fftw_real tmp426;
+     fftw_real tmp297;
+     fftw_real tmp423;
+     fftw_real tmp834;
+     fftw_real tmp888;
+     fftw_real tmp507;
+     fftw_real tmp587;
+     fftw_real tmp514;
+     fftw_real tmp590;
+     fftw_real tmp677;
+     fftw_real tmp767;
+     fftw_real tmp686;
+     fftw_real tmp770;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp3;
+	  fftw_real tmp191;
+	  fftw_real tmp130;
+	  fftw_real tmp370;
+	  fftw_real tmp6;
+	  fftw_real tmp369;
+	  fftw_real tmp133;
+	  fftw_real tmp192;
+	  fftw_real tmp10;
+	  fftw_real tmp195;
+	  fftw_real tmp137;
+	  fftw_real tmp194;
+	  fftw_real tmp13;
+	  fftw_real tmp197;
+	  fftw_real tmp140;
+	  fftw_real tmp198;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp2;
+	       fftw_real tmp128;
+	       fftw_real tmp129;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(input[0]);
+	       tmp2 = c_re(input[32 * istride]);
+	       tmp3 = tmp1 + tmp2;
+	       tmp191 = tmp1 - tmp2;
+	       tmp128 = c_im(input[0]);
+	       tmp129 = c_im(input[32 * istride]);
+	       tmp130 = tmp128 + tmp129;
+	       tmp370 = tmp128 - tmp129;
+	  }
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp131;
+	       fftw_real tmp132;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[16 * istride]);
+	       tmp5 = c_re(input[48 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp369 = tmp4 - tmp5;
+	       tmp131 = c_im(input[16 * istride]);
+	       tmp132 = c_im(input[48 * istride]);
+	       tmp133 = tmp131 + tmp132;
+	       tmp192 = tmp131 - tmp132;
+	  }
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp135;
+	       fftw_real tmp136;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[8 * istride]);
+	       tmp9 = c_re(input[40 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp195 = tmp8 - tmp9;
+	       tmp135 = c_im(input[8 * istride]);
+	       tmp136 = c_im(input[40 * istride]);
+	       tmp137 = tmp135 + tmp136;
+	       tmp194 = tmp135 - tmp136;
+	  }
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp138;
+	       fftw_real tmp139;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[56 * istride]);
+	       tmp12 = c_re(input[24 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp197 = tmp11 - tmp12;
+	       tmp138 = c_im(input[56 * istride]);
+	       tmp139 = c_im(input[24 * istride]);
+	       tmp140 = tmp138 + tmp139;
+	       tmp198 = tmp138 - tmp139;
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp14;
+	       fftw_real tmp635;
+	       fftw_real tmp636;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp193 = tmp191 - tmp192;
+	       tmp471 = tmp191 + tmp192;
+	       tmp7 = tmp3 + tmp6;
+	       tmp14 = tmp10 + tmp13;
+	       tmp15 = tmp7 + tmp14;
+	       tmp815 = tmp7 - tmp14;
+	       {
+		    fftw_real tmp717;
+		    fftw_real tmp718;
+		    fftw_real tmp134;
+		    fftw_real tmp141;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp717 = tmp130 - tmp133;
+		    tmp718 = tmp13 - tmp10;
+		    tmp719 = tmp717 - tmp718;
+		    tmp781 = tmp718 + tmp717;
+		    tmp134 = tmp130 + tmp133;
+		    tmp141 = tmp137 + tmp140;
+		    tmp142 = tmp134 + tmp141;
+		    tmp849 = tmp134 - tmp141;
+	       }
+	       tmp371 = tmp369 + tmp370;
+	       tmp537 = tmp370 - tmp369;
+	       tmp635 = tmp3 - tmp6;
+	       tmp636 = tmp137 - tmp140;
+	       tmp637 = tmp635 - tmp636;
+	       tmp755 = tmp635 + tmp636;
+	       {
+		    fftw_real tmp196;
+		    fftw_real tmp199;
+		    fftw_real tmp372;
+		    fftw_real tmp373;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp196 = tmp194 - tmp195;
+		    tmp199 = tmp197 + tmp198;
+		    tmp200 = K707106781 * (tmp196 - tmp199);
+		    tmp538 = K707106781 * (tmp196 + tmp199);
+		    tmp372 = tmp197 - tmp198;
+		    tmp373 = tmp195 + tmp194;
+		    tmp374 = K707106781 * (tmp372 - tmp373);
+		    tmp472 = K707106781 * (tmp373 + tmp372);
+	       }
+	  }
+     }
+     {
+	  fftw_real tmp97;
+	  fftw_real tmp313;
+	  fftw_real tmp357;
+	  fftw_real tmp706;
+	  fftw_real tmp100;
+	  fftw_real tmp354;
+	  fftw_real tmp316;
+	  fftw_real tmp707;
+	  fftw_real tmp107;
+	  fftw_real tmp691;
+	  fftw_real tmp327;
+	  fftw_real tmp359;
+	  fftw_real tmp104;
+	  fftw_real tmp690;
+	  fftw_real tmp322;
+	  fftw_real tmp360;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp95;
+	       fftw_real tmp96;
+	       fftw_real tmp314;
+	       fftw_real tmp315;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp95 = c_re(input[63 * istride]);
+	       tmp96 = c_re(input[31 * istride]);
+	       tmp97 = tmp95 + tmp96;
+	       tmp313 = tmp95 - tmp96;
+	       {
+		    fftw_real tmp355;
+		    fftw_real tmp356;
+		    fftw_real tmp98;
+		    fftw_real tmp99;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp355 = c_im(input[63 * istride]);
+		    tmp356 = c_im(input[31 * istride]);
+		    tmp357 = tmp355 - tmp356;
+		    tmp706 = tmp355 + tmp356;
+		    tmp98 = c_re(input[15 * istride]);
+		    tmp99 = c_re(input[47 * istride]);
+		    tmp100 = tmp98 + tmp99;
+		    tmp354 = tmp98 - tmp99;
+	       }
+	       tmp314 = c_im(input[15 * istride]);
+	       tmp315 = c_im(input[47 * istride]);
+	       tmp316 = tmp314 - tmp315;
+	       tmp707 = tmp314 + tmp315;
+	       {
+		    fftw_real tmp105;
+		    fftw_real tmp106;
+		    fftw_real tmp323;
+		    fftw_real tmp324;
+		    fftw_real tmp325;
+		    fftw_real tmp326;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp105 = c_re(input[55 * istride]);
+		    tmp106 = c_re(input[23 * istride]);
+		    tmp323 = tmp105 - tmp106;
+		    tmp324 = c_im(input[55 * istride]);
+		    tmp325 = c_im(input[23 * istride]);
+		    tmp326 = tmp324 - tmp325;
+		    tmp107 = tmp105 + tmp106;
+		    tmp691 = tmp324 + tmp325;
+		    tmp327 = tmp323 + tmp326;
+		    tmp359 = tmp323 - tmp326;
+	       }
+	       {
+		    fftw_real tmp102;
+		    fftw_real tmp103;
+		    fftw_real tmp321;
+		    fftw_real tmp318;
+		    fftw_real tmp319;
+		    fftw_real tmp320;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp102 = c_re(input[7 * istride]);
+		    tmp103 = c_re(input[39 * istride]);
+		    tmp321 = tmp102 - tmp103;
+		    tmp318 = c_im(input[7 * istride]);
+		    tmp319 = c_im(input[39 * istride]);
+		    tmp320 = tmp318 - tmp319;
+		    tmp104 = tmp102 + tmp103;
+		    tmp690 = tmp318 + tmp319;
+		    tmp322 = tmp320 - tmp321;
+		    tmp360 = tmp321 + tmp320;
+	       }
+	  }
+	  {
+	       fftw_real tmp101;
+	       fftw_real tmp108;
+	       fftw_real tmp689;
+	       fftw_real tmp692;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp101 = tmp97 + tmp100;
+	       tmp108 = tmp104 + tmp107;
+	       tmp109 = tmp101 + tmp108;
+	       tmp837 = tmp101 - tmp108;
+	       tmp689 = tmp97 - tmp100;
+	       tmp692 = tmp690 - tmp691;
+	       tmp693 = tmp689 - tmp692;
+	       tmp773 = tmp689 + tmp692;
+	  }
+	  {
+	       fftw_real tmp842;
+	       fftw_real tmp843;
+	       fftw_real tmp708;
+	       fftw_real tmp709;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp842 = tmp706 + tmp707;
+	       tmp843 = tmp690 + tmp691;
+	       tmp844 = tmp842 - tmp843;
+	       tmp892 = tmp842 + tmp843;
+	       tmp708 = tmp706 - tmp707;
+	       tmp709 = tmp107 - tmp104;
+	       tmp710 = tmp708 - tmp709;
+	       tmp776 = tmp709 + tmp708;
+	  }
+	  {
+	       fftw_real tmp317;
+	       fftw_real tmp328;
+	       fftw_real tmp517;
+	       fftw_real tmp518;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp317 = tmp313 - tmp316;
+	       tmp328 = K707106781 * (tmp322 - tmp327);
+	       tmp329 = tmp317 - tmp328;
+	       tmp429 = tmp317 + tmp328;
+	       tmp517 = tmp313 + tmp316;
+	       tmp518 = K707106781 * (tmp360 + tmp359);
+	       tmp519 = tmp517 - tmp518;
+	       tmp593 = tmp517 + tmp518;
+	  }
+	  {
+	       fftw_real tmp358;
+	       fftw_real tmp361;
+	       fftw_real tmp528;
+	       fftw_real tmp529;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp358 = tmp354 + tmp357;
+	       tmp361 = K707106781 * (tmp359 - tmp360);
+	       tmp362 = tmp358 - tmp361;
+	       tmp432 = tmp358 + tmp361;
+	       tmp528 = tmp357 - tmp354;
+	       tmp529 = K707106781 * (tmp322 + tmp327);
+	       tmp530 = tmp528 - tmp529;
+	       tmp596 = tmp528 + tmp529;
+	  }
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp205;
+	  fftw_real tmp145;
+	  fftw_real tmp203;
+	  fftw_real tmp21;
+	  fftw_real tmp202;
+	  fftw_real tmp148;
+	  fftw_real tmp206;
+	  fftw_real tmp25;
+	  fftw_real tmp212;
+	  fftw_real tmp152;
+	  fftw_real tmp210;
+	  fftw_real tmp28;
+	  fftw_real tmp209;
+	  fftw_real tmp155;
+	  fftw_real tmp213;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp143;
+	       fftw_real tmp144;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[4 * istride]);
+	       tmp17 = c_re(input[36 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp205 = tmp16 - tmp17;
+	       tmp143 = c_im(input[4 * istride]);
+	       tmp144 = c_im(input[36 * istride]);
+	       tmp145 = tmp143 + tmp144;
+	       tmp203 = tmp143 - tmp144;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       fftw_real tmp146;
+	       fftw_real tmp147;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = c_re(input[20 * istride]);
+	       tmp20 = c_re(input[52 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp202 = tmp19 - tmp20;
+	       tmp146 = c_im(input[20 * istride]);
+	       tmp147 = c_im(input[52 * istride]);
+	       tmp148 = tmp146 + tmp147;
+	       tmp206 = tmp146 - tmp147;
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp150;
+	       fftw_real tmp151;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = c_re(input[60 * istride]);
+	       tmp24 = c_re(input[28 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp212 = tmp23 - tmp24;
+	       tmp150 = c_im(input[60 * istride]);
+	       tmp151 = c_im(input[28 * istride]);
+	       tmp152 = tmp150 + tmp151;
+	       tmp210 = tmp150 - tmp151;
+	  }
+	  {
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp153;
+	       fftw_real tmp154;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp26 = c_re(input[12 * istride]);
+	       tmp27 = c_re(input[44 * istride]);
+	       tmp28 = tmp26 + tmp27;
+	       tmp209 = tmp26 - tmp27;
+	       tmp153 = c_im(input[12 * istride]);
+	       tmp154 = c_im(input[44 * istride]);
+	       tmp155 = tmp153 + tmp154;
+	       tmp213 = tmp153 - tmp154;
+	  }
+	  {
+	       fftw_real tmp22;
+	       fftw_real tmp29;
+	       fftw_real tmp638;
+	       fftw_real tmp639;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp22 = tmp18 + tmp21;
+	       tmp29 = tmp25 + tmp28;
+	       tmp30 = tmp22 + tmp29;
+	       tmp850 = tmp29 - tmp22;
+	       tmp638 = tmp145 - tmp148;
+	       tmp639 = tmp18 - tmp21;
+	       tmp640 = tmp638 - tmp639;
+	       tmp721 = tmp639 + tmp638;
+	  }
+	  {
+	       fftw_real tmp149;
+	       fftw_real tmp156;
+	       fftw_real tmp641;
+	       fftw_real tmp642;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp149 = tmp145 + tmp148;
+	       tmp156 = tmp152 + tmp155;
+	       tmp157 = tmp149 + tmp156;
+	       tmp816 = tmp149 - tmp156;
+	       tmp641 = tmp25 - tmp28;
+	       tmp642 = tmp152 - tmp155;
+	       tmp643 = tmp641 + tmp642;
+	       tmp720 = tmp641 - tmp642;
+	  }
+	  {
+	       fftw_real tmp204;
+	       fftw_real tmp207;
+	       fftw_real tmp474;
+	       fftw_real tmp475;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp204 = tmp202 + tmp203;
+	       tmp207 = tmp205 - tmp206;
+	       tmp208 = (K382683432 * tmp204) - (K923879532 * tmp207);
+	       tmp377 = (K923879532 * tmp204) + (K382683432 * tmp207);
+	       tmp474 = tmp203 - tmp202;
+	       tmp475 = tmp205 + tmp206;
+	       tmp476 = (K923879532 * tmp474) - (K382683432 * tmp475);
+	       tmp541 = (K382683432 * tmp474) + (K923879532 * tmp475);
+	  }
+	  {
+	       fftw_real tmp211;
+	       fftw_real tmp214;
+	       fftw_real tmp477;
+	       fftw_real tmp478;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp211 = tmp209 + tmp210;
+	       tmp214 = tmp212 - tmp213;
+	       tmp215 = (K382683432 * tmp211) + (K923879532 * tmp214);
+	       tmp376 = (K382683432 * tmp214) - (K923879532 * tmp211);
+	       tmp477 = tmp210 - tmp209;
+	       tmp478 = tmp212 + tmp213;
+	       tmp479 = (K923879532 * tmp477) + (K382683432 * tmp478);
+	       tmp540 = (K923879532 * tmp478) - (K382683432 * tmp477);
+	  }
+     }
+     {
+	  fftw_real tmp112;
+	  fftw_real tmp694;
+	  fftw_real tmp115;
+	  fftw_real tmp695;
+	  fftw_real tmp334;
+	  fftw_real tmp520;
+	  fftw_real tmp339;
+	  fftw_real tmp521;
+	  fftw_real tmp697;
+	  fftw_real tmp696;
+	  fftw_real tmp119;
+	  fftw_real tmp700;
+	  fftw_real tmp122;
+	  fftw_real tmp701;
+	  fftw_real tmp345;
+	  fftw_real tmp523;
+	  fftw_real tmp350;
+	  fftw_real tmp524;
+	  fftw_real tmp702;
+	  fftw_real tmp699;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp335;
+	       fftw_real tmp333;
+	       fftw_real tmp330;
+	       fftw_real tmp338;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp110;
+		    fftw_real tmp111;
+		    fftw_real tmp331;
+		    fftw_real tmp332;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp110 = c_re(input[3 * istride]);
+		    tmp111 = c_re(input[35 * istride]);
+		    tmp112 = tmp110 + tmp111;
+		    tmp335 = tmp110 - tmp111;
+		    tmp331 = c_im(input[3 * istride]);
+		    tmp332 = c_im(input[35 * istride]);
+		    tmp333 = tmp331 - tmp332;
+		    tmp694 = tmp331 + tmp332;
+	       }
+	       {
+		    fftw_real tmp113;
+		    fftw_real tmp114;
+		    fftw_real tmp336;
+		    fftw_real tmp337;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp113 = c_re(input[19 * istride]);
+		    tmp114 = c_re(input[51 * istride]);
+		    tmp115 = tmp113 + tmp114;
+		    tmp330 = tmp113 - tmp114;
+		    tmp336 = c_im(input[19 * istride]);
+		    tmp337 = c_im(input[51 * istride]);
+		    tmp338 = tmp336 - tmp337;
+		    tmp695 = tmp336 + tmp337;
+	       }
+	       tmp334 = tmp330 + tmp333;
+	       tmp520 = tmp333 - tmp330;
+	       tmp339 = tmp335 - tmp338;
+	       tmp521 = tmp335 + tmp338;
+	       tmp697 = tmp112 - tmp115;
+	       tmp696 = tmp694 - tmp695;
+	  }
+	  {
+	       fftw_real tmp346;
+	       fftw_real tmp344;
+	       fftw_real tmp341;
+	       fftw_real tmp349;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp117;
+		    fftw_real tmp118;
+		    fftw_real tmp342;
+		    fftw_real tmp343;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp117 = c_re(input[59 * istride]);
+		    tmp118 = c_re(input[27 * istride]);
+		    tmp119 = tmp117 + tmp118;
+		    tmp346 = tmp117 - tmp118;
+		    tmp342 = c_im(input[59 * istride]);
+		    tmp343 = c_im(input[27 * istride]);
+		    tmp344 = tmp342 - tmp343;
+		    tmp700 = tmp342 + tmp343;
+	       }
+	       {
+		    fftw_real tmp120;
+		    fftw_real tmp121;
+		    fftw_real tmp347;
+		    fftw_real tmp348;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp120 = c_re(input[11 * istride]);
+		    tmp121 = c_re(input[43 * istride]);
+		    tmp122 = tmp120 + tmp121;
+		    tmp341 = tmp120 - tmp121;
+		    tmp347 = c_im(input[11 * istride]);
+		    tmp348 = c_im(input[43 * istride]);
+		    tmp349 = tmp347 - tmp348;
+		    tmp701 = tmp347 + tmp348;
+	       }
+	       tmp345 = tmp341 + tmp344;
+	       tmp523 = tmp344 - tmp341;
+	       tmp350 = tmp346 - tmp349;
+	       tmp524 = tmp346 + tmp349;
+	       tmp702 = tmp700 - tmp701;
+	       tmp699 = tmp119 - tmp122;
+	  }
+	  {
+	       fftw_real tmp116;
+	       fftw_real tmp123;
+	       fftw_real tmp363;
+	       fftw_real tmp364;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp116 = tmp112 + tmp115;
+	       tmp123 = tmp119 + tmp122;
+	       tmp124 = tmp116 + tmp123;
+	       tmp845 = tmp123 - tmp116;
+	       tmp363 = (K382683432 * tmp350) - (K923879532 * tmp345);
+	       tmp364 = (K923879532 * tmp334) + (K382683432 * tmp339);
+	       tmp365 = tmp363 - tmp364;
+	       tmp430 = tmp364 + tmp363;
+	  }
+	  {
+	       fftw_real tmp340;
+	       fftw_real tmp351;
+	       fftw_real tmp838;
+	       fftw_real tmp839;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp340 = (K382683432 * tmp334) - (K923879532 * tmp339);
+	       tmp351 = (K382683432 * tmp345) + (K923879532 * tmp350);
+	       tmp352 = tmp340 - tmp351;
+	       tmp433 = tmp340 + tmp351;
+	       tmp838 = tmp694 + tmp695;
+	       tmp839 = tmp700 + tmp701;
+	       tmp840 = tmp838 - tmp839;
+	       tmp893 = tmp838 + tmp839;
+	  }
+	  {
+	       fftw_real tmp522;
+	       fftw_real tmp525;
+	       fftw_real tmp531;
+	       fftw_real tmp532;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp522 = (K923879532 * tmp520) - (K382683432 * tmp521);
+	       tmp525 = (K923879532 * tmp523) + (K382683432 * tmp524);
+	       tmp526 = tmp522 - tmp525;
+	       tmp597 = tmp522 + tmp525;
+	       tmp531 = (K923879532 * tmp524) - (K382683432 * tmp523);
+	       tmp532 = (K382683432 * tmp520) + (K923879532 * tmp521);
+	       tmp533 = tmp531 - tmp532;
+	       tmp594 = tmp532 + tmp531;
+	  }
+	  {
+	       fftw_real tmp698;
+	       fftw_real tmp703;
+	       fftw_real tmp711;
+	       fftw_real tmp712;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp698 = tmp696 - tmp697;
+	       tmp703 = tmp699 + tmp702;
+	       tmp704 = K707106781 * (tmp698 - tmp703);
+	       tmp777 = K707106781 * (tmp698 + tmp703);
+	       tmp711 = tmp699 - tmp702;
+	       tmp712 = tmp697 + tmp696;
+	       tmp713 = K707106781 * (tmp711 - tmp712);
+	       tmp774 = K707106781 * (tmp712 + tmp711);
+	  }
+     }
+     {
+	  fftw_real tmp34;
+	  fftw_real tmp229;
+	  fftw_real tmp161;
+	  fftw_real tmp219;
+	  fftw_real tmp37;
+	  fftw_real tmp218;
+	  fftw_real tmp164;
+	  fftw_real tmp230;
+	  fftw_real tmp44;
+	  fftw_real tmp233;
+	  fftw_real tmp223;
+	  fftw_real tmp171;
+	  fftw_real tmp41;
+	  fftw_real tmp232;
+	  fftw_real tmp226;
+	  fftw_real tmp168;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       fftw_real tmp162;
+	       fftw_real tmp163;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp32 = c_re(input[2 * istride]);
+	       tmp33 = c_re(input[34 * istride]);
+	       tmp34 = tmp32 + tmp33;
+	       tmp229 = tmp32 - tmp33;
+	       {
+		    fftw_real tmp159;
+		    fftw_real tmp160;
+		    fftw_real tmp35;
+		    fftw_real tmp36;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp159 = c_im(input[2 * istride]);
+		    tmp160 = c_im(input[34 * istride]);
+		    tmp161 = tmp159 + tmp160;
+		    tmp219 = tmp159 - tmp160;
+		    tmp35 = c_re(input[18 * istride]);
+		    tmp36 = c_re(input[50 * istride]);
+		    tmp37 = tmp35 + tmp36;
+		    tmp218 = tmp35 - tmp36;
+	       }
+	       tmp162 = c_im(input[18 * istride]);
+	       tmp163 = c_im(input[50 * istride]);
+	       tmp164 = tmp162 + tmp163;
+	       tmp230 = tmp162 - tmp163;
+	       {
+		    fftw_real tmp42;
+		    fftw_real tmp43;
+		    fftw_real tmp221;
+		    fftw_real tmp169;
+		    fftw_real tmp170;
+		    fftw_real tmp222;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp42 = c_re(input[58 * istride]);
+		    tmp43 = c_re(input[26 * istride]);
+		    tmp221 = tmp42 - tmp43;
+		    tmp169 = c_im(input[58 * istride]);
+		    tmp170 = c_im(input[26 * istride]);
+		    tmp222 = tmp169 - tmp170;
+		    tmp44 = tmp42 + tmp43;
+		    tmp233 = tmp221 + tmp222;
+		    tmp223 = tmp221 - tmp222;
+		    tmp171 = tmp169 + tmp170;
+	       }
+	       {
+		    fftw_real tmp39;
+		    fftw_real tmp40;
+		    fftw_real tmp224;
+		    fftw_real tmp166;
+		    fftw_real tmp167;
+		    fftw_real tmp225;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp39 = c_re(input[10 * istride]);
+		    tmp40 = c_re(input[42 * istride]);
+		    tmp224 = tmp39 - tmp40;
+		    tmp166 = c_im(input[10 * istride]);
+		    tmp167 = c_im(input[42 * istride]);
+		    tmp225 = tmp166 - tmp167;
+		    tmp41 = tmp39 + tmp40;
+		    tmp232 = tmp225 - tmp224;
+		    tmp226 = tmp224 + tmp225;
+		    tmp168 = tmp166 + tmp167;
+	       }
+	  }
+	  {
+	       fftw_real tmp38;
+	       fftw_real tmp45;
+	       fftw_real tmp646;
+	       fftw_real tmp647;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp38 = tmp34 + tmp37;
+	       tmp45 = tmp41 + tmp44;
+	       tmp46 = tmp38 + tmp45;
+	       tmp819 = tmp38 - tmp45;
+	       tmp646 = tmp161 - tmp164;
+	       tmp647 = tmp44 - tmp41;
+	       tmp648 = tmp646 - tmp647;
+	       tmp758 = tmp647 + tmp646;
+	  }
+	  {
+	       fftw_real tmp165;
+	       fftw_real tmp172;
+	       fftw_real tmp649;
+	       fftw_real tmp650;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp165 = tmp161 + tmp164;
+	       tmp172 = tmp168 + tmp171;
+	       tmp173 = tmp165 + tmp172;
+	       tmp818 = tmp165 - tmp172;
+	       tmp649 = tmp34 - tmp37;
+	       tmp650 = tmp168 - tmp171;
+	       tmp651 = tmp649 - tmp650;
+	       tmp759 = tmp649 + tmp650;
+	  }
+	  {
+	       fftw_real tmp220;
+	       fftw_real tmp227;
+	       fftw_real tmp482;
+	       fftw_real tmp483;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp220 = tmp218 + tmp219;
+	       tmp227 = K707106781 * (tmp223 - tmp226);
+	       tmp228 = tmp220 - tmp227;
+	       tmp414 = tmp220 + tmp227;
+	       tmp482 = tmp219 - tmp218;
+	       tmp483 = K707106781 * (tmp232 + tmp233);
+	       tmp484 = tmp482 - tmp483;
+	       tmp578 = tmp482 + tmp483;
+	  }
+	  {
+	       fftw_real tmp231;
+	       fftw_real tmp234;
+	       fftw_real tmp485;
+	       fftw_real tmp486;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp231 = tmp229 - tmp230;
+	       tmp234 = K707106781 * (tmp232 - tmp233);
+	       tmp235 = tmp231 - tmp234;
+	       tmp415 = tmp231 + tmp234;
+	       tmp485 = tmp229 + tmp230;
+	       tmp486 = K707106781 * (tmp226 + tmp223);
+	       tmp487 = tmp485 - tmp486;
+	       tmp579 = tmp485 + tmp486;
+	  }
+     }
+     {
+	  fftw_real tmp66;
+	  fftw_real tmp299;
+	  fftw_real tmp261;
+	  fftw_real tmp662;
+	  fftw_real tmp69;
+	  fftw_real tmp258;
+	  fftw_real tmp302;
+	  fftw_real tmp663;
+	  fftw_real tmp76;
+	  fftw_real tmp681;
+	  fftw_real tmp267;
+	  fftw_real tmp305;
+	  fftw_real tmp73;
+	  fftw_real tmp680;
+	  fftw_real tmp272;
+	  fftw_real tmp304;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       fftw_real tmp300;
+	       fftw_real tmp301;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp64 = c_re(input[istride]);
+	       tmp65 = c_re(input[33 * istride]);
+	       tmp66 = tmp64 + tmp65;
+	       tmp299 = tmp64 - tmp65;
+	       {
+		    fftw_real tmp259;
+		    fftw_real tmp260;
+		    fftw_real tmp67;
+		    fftw_real tmp68;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp259 = c_im(input[istride]);
+		    tmp260 = c_im(input[33 * istride]);
+		    tmp261 = tmp259 - tmp260;
+		    tmp662 = tmp259 + tmp260;
+		    tmp67 = c_re(input[17 * istride]);
+		    tmp68 = c_re(input[49 * istride]);
+		    tmp69 = tmp67 + tmp68;
+		    tmp258 = tmp67 - tmp68;
+	       }
+	       tmp300 = c_im(input[17 * istride]);
+	       tmp301 = c_im(input[49 * istride]);
+	       tmp302 = tmp300 - tmp301;
+	       tmp663 = tmp300 + tmp301;
+	       {
+		    fftw_real tmp74;
+		    fftw_real tmp75;
+		    fftw_real tmp263;
+		    fftw_real tmp264;
+		    fftw_real tmp265;
+		    fftw_real tmp266;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp74 = c_re(input[57 * istride]);
+		    tmp75 = c_re(input[25 * istride]);
+		    tmp263 = tmp74 - tmp75;
+		    tmp264 = c_im(input[57 * istride]);
+		    tmp265 = c_im(input[25 * istride]);
+		    tmp266 = tmp264 - tmp265;
+		    tmp76 = tmp74 + tmp75;
+		    tmp681 = tmp264 + tmp265;
+		    tmp267 = tmp263 - tmp266;
+		    tmp305 = tmp263 + tmp266;
+	       }
+	       {
+		    fftw_real tmp71;
+		    fftw_real tmp72;
+		    fftw_real tmp268;
+		    fftw_real tmp269;
+		    fftw_real tmp270;
+		    fftw_real tmp271;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp71 = c_re(input[9 * istride]);
+		    tmp72 = c_re(input[41 * istride]);
+		    tmp268 = tmp71 - tmp72;
+		    tmp269 = c_im(input[9 * istride]);
+		    tmp270 = c_im(input[41 * istride]);
+		    tmp271 = tmp269 - tmp270;
+		    tmp73 = tmp71 + tmp72;
+		    tmp680 = tmp269 + tmp270;
+		    tmp272 = tmp268 + tmp271;
+		    tmp304 = tmp271 - tmp268;
+	       }
+	  }
+	  {
+	       fftw_real tmp70;
+	       fftw_real tmp77;
+	       fftw_real tmp664;
+	       fftw_real tmp665;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp70 = tmp66 + tmp69;
+	       tmp77 = tmp73 + tmp76;
+	       tmp78 = tmp70 + tmp77;
+	       tmp831 = tmp70 - tmp77;
+	       tmp664 = tmp662 - tmp663;
+	       tmp665 = tmp76 - tmp73;
+	       tmp666 = tmp664 - tmp665;
+	       tmp769 = tmp665 + tmp664;
+	  }
+	  {
+	       fftw_real tmp826;
+	       fftw_real tmp827;
+	       fftw_real tmp679;
+	       fftw_real tmp682;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp826 = tmp662 + tmp663;
+	       tmp827 = tmp680 + tmp681;
+	       tmp828 = tmp826 - tmp827;
+	       tmp887 = tmp826 + tmp827;
+	       tmp679 = tmp66 - tmp69;
+	       tmp682 = tmp680 - tmp681;
+	       tmp683 = tmp679 - tmp682;
+	       tmp766 = tmp679 + tmp682;
+	  }
+	  {
+	       fftw_real tmp262;
+	       fftw_real tmp273;
+	       fftw_real tmp498;
+	       fftw_real tmp499;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp262 = tmp258 + tmp261;
+	       tmp273 = K707106781 * (tmp267 - tmp272);
+	       tmp274 = tmp262 - tmp273;
+	       tmp425 = tmp262 + tmp273;
+	       tmp498 = tmp261 - tmp258;
+	       tmp499 = K707106781 * (tmp304 + tmp305);
+	       tmp500 = tmp498 - tmp499;
+	       tmp589 = tmp498 + tmp499;
+	  }
+	  {
+	       fftw_real tmp303;
+	       fftw_real tmp306;
+	       fftw_real tmp509;
+	       fftw_real tmp510;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp303 = tmp299 - tmp302;
+	       tmp306 = K707106781 * (tmp304 - tmp305);
+	       tmp307 = tmp303 - tmp306;
+	       tmp422 = tmp303 + tmp306;
+	       tmp509 = tmp299 + tmp302;
+	       tmp510 = K707106781 * (tmp272 + tmp267);
+	       tmp511 = tmp509 - tmp510;
+	       tmp586 = tmp509 + tmp510;
+	  }
+     }
+     {
+	  fftw_real tmp49;
+	  fftw_real tmp248;
+	  fftw_real tmp176;
+	  fftw_real tmp238;
+	  fftw_real tmp52;
+	  fftw_real tmp237;
+	  fftw_real tmp179;
+	  fftw_real tmp249;
+	  fftw_real tmp59;
+	  fftw_real tmp252;
+	  fftw_real tmp242;
+	  fftw_real tmp186;
+	  fftw_real tmp56;
+	  fftw_real tmp251;
+	  fftw_real tmp245;
+	  fftw_real tmp183;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp48;
+	       fftw_real tmp177;
+	       fftw_real tmp178;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp47 = c_re(input[62 * istride]);
+	       tmp48 = c_re(input[30 * istride]);
+	       tmp49 = tmp47 + tmp48;
+	       tmp248 = tmp47 - tmp48;
+	       {
+		    fftw_real tmp174;
+		    fftw_real tmp175;
+		    fftw_real tmp50;
+		    fftw_real tmp51;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp174 = c_im(input[62 * istride]);
+		    tmp175 = c_im(input[30 * istride]);
+		    tmp176 = tmp174 + tmp175;
+		    tmp238 = tmp174 - tmp175;
+		    tmp50 = c_re(input[14 * istride]);
+		    tmp51 = c_re(input[46 * istride]);
+		    tmp52 = tmp50 + tmp51;
+		    tmp237 = tmp50 - tmp51;
+	       }
+	       tmp177 = c_im(input[14 * istride]);
+	       tmp178 = c_im(input[46 * istride]);
+	       tmp179 = tmp177 + tmp178;
+	       tmp249 = tmp177 - tmp178;
+	       {
+		    fftw_real tmp57;
+		    fftw_real tmp58;
+		    fftw_real tmp240;
+		    fftw_real tmp184;
+		    fftw_real tmp185;
+		    fftw_real tmp241;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp57 = c_re(input[54 * istride]);
+		    tmp58 = c_re(input[22 * istride]);
+		    tmp240 = tmp57 - tmp58;
+		    tmp184 = c_im(input[54 * istride]);
+		    tmp185 = c_im(input[22 * istride]);
+		    tmp241 = tmp184 - tmp185;
+		    tmp59 = tmp57 + tmp58;
+		    tmp252 = tmp240 + tmp241;
+		    tmp242 = tmp240 - tmp241;
+		    tmp186 = tmp184 + tmp185;
+	       }
+	       {
+		    fftw_real tmp54;
+		    fftw_real tmp55;
+		    fftw_real tmp243;
+		    fftw_real tmp181;
+		    fftw_real tmp182;
+		    fftw_real tmp244;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp54 = c_re(input[6 * istride]);
+		    tmp55 = c_re(input[38 * istride]);
+		    tmp243 = tmp54 - tmp55;
+		    tmp181 = c_im(input[6 * istride]);
+		    tmp182 = c_im(input[38 * istride]);
+		    tmp244 = tmp181 - tmp182;
+		    tmp56 = tmp54 + tmp55;
+		    tmp251 = tmp244 - tmp243;
+		    tmp245 = tmp243 + tmp244;
+		    tmp183 = tmp181 + tmp182;
+	       }
+	  }
+	  {
+	       fftw_real tmp53;
+	       fftw_real tmp60;
+	       fftw_real tmp653;
+	       fftw_real tmp654;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp53 = tmp49 + tmp52;
+	       tmp60 = tmp56 + tmp59;
+	       tmp61 = tmp53 + tmp60;
+	       tmp821 = tmp53 - tmp60;
+	       tmp653 = tmp176 - tmp179;
+	       tmp654 = tmp59 - tmp56;
+	       tmp655 = tmp653 - tmp654;
+	       tmp761 = tmp654 + tmp653;
+	  }
+	  {
+	       fftw_real tmp180;
+	       fftw_real tmp187;
+	       fftw_real tmp656;
+	       fftw_real tmp657;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp180 = tmp176 + tmp179;
+	       tmp187 = tmp183 + tmp186;
+	       tmp188 = tmp180 + tmp187;
+	       tmp822 = tmp180 - tmp187;
+	       tmp656 = tmp49 - tmp52;
+	       tmp657 = tmp183 - tmp186;
+	       tmp658 = tmp656 - tmp657;
+	       tmp762 = tmp656 + tmp657;
+	  }
+	  {
+	       fftw_real tmp239;
+	       fftw_real tmp246;
+	       fftw_real tmp489;
+	       fftw_real tmp490;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp239 = tmp237 + tmp238;
+	       tmp246 = K707106781 * (tmp242 - tmp245);
+	       tmp247 = tmp239 - tmp246;
+	       tmp417 = tmp239 + tmp246;
+	       tmp489 = tmp248 + tmp249;
+	       tmp490 = K707106781 * (tmp245 + tmp242);
+	       tmp491 = tmp489 - tmp490;
+	       tmp581 = tmp489 + tmp490;
+	  }
+	  {
+	       fftw_real tmp250;
+	       fftw_real tmp253;
+	       fftw_real tmp492;
+	       fftw_real tmp493;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp250 = tmp248 - tmp249;
+	       tmp253 = K707106781 * (tmp251 - tmp252);
+	       tmp254 = tmp250 - tmp253;
+	       tmp418 = tmp250 + tmp253;
+	       tmp492 = tmp238 - tmp237;
+	       tmp493 = K707106781 * (tmp251 + tmp252);
+	       tmp494 = tmp492 - tmp493;
+	       tmp582 = tmp492 + tmp493;
+	  }
+     }
+     {
+	  fftw_real tmp81;
+	  fftw_real tmp673;
+	  fftw_real tmp84;
+	  fftw_real tmp674;
+	  fftw_real tmp290;
+	  fftw_real tmp504;
+	  fftw_real tmp295;
+	  fftw_real tmp505;
+	  fftw_real tmp675;
+	  fftw_real tmp672;
+	  fftw_real tmp88;
+	  fftw_real tmp668;
+	  fftw_real tmp91;
+	  fftw_real tmp669;
+	  fftw_real tmp279;
+	  fftw_real tmp501;
+	  fftw_real tmp284;
+	  fftw_real tmp502;
+	  fftw_real tmp670;
+	  fftw_real tmp667;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp291;
+	       fftw_real tmp289;
+	       fftw_real tmp286;
+	       fftw_real tmp294;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp79;
+		    fftw_real tmp80;
+		    fftw_real tmp287;
+		    fftw_real tmp288;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp79 = c_re(input[5 * istride]);
+		    tmp80 = c_re(input[37 * istride]);
+		    tmp81 = tmp79 + tmp80;
+		    tmp291 = tmp79 - tmp80;
+		    tmp287 = c_im(input[5 * istride]);
+		    tmp288 = c_im(input[37 * istride]);
+		    tmp289 = tmp287 - tmp288;
+		    tmp673 = tmp287 + tmp288;
+	       }
+	       {
+		    fftw_real tmp82;
+		    fftw_real tmp83;
+		    fftw_real tmp292;
+		    fftw_real tmp293;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp82 = c_re(input[21 * istride]);
+		    tmp83 = c_re(input[53 * istride]);
+		    tmp84 = tmp82 + tmp83;
+		    tmp286 = tmp82 - tmp83;
+		    tmp292 = c_im(input[21 * istride]);
+		    tmp293 = c_im(input[53 * istride]);
+		    tmp294 = tmp292 - tmp293;
+		    tmp674 = tmp292 + tmp293;
+	       }
+	       tmp290 = tmp286 + tmp289;
+	       tmp504 = tmp289 - tmp286;
+	       tmp295 = tmp291 - tmp294;
+	       tmp505 = tmp291 + tmp294;
+	       tmp675 = tmp673 - tmp674;
+	       tmp672 = tmp81 - tmp84;
+	  }
+	  {
+	       fftw_real tmp275;
+	       fftw_real tmp283;
+	       fftw_real tmp280;
+	       fftw_real tmp278;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp86;
+		    fftw_real tmp87;
+		    fftw_real tmp281;
+		    fftw_real tmp282;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp86 = c_re(input[61 * istride]);
+		    tmp87 = c_re(input[29 * istride]);
+		    tmp88 = tmp86 + tmp87;
+		    tmp275 = tmp86 - tmp87;
+		    tmp281 = c_im(input[61 * istride]);
+		    tmp282 = c_im(input[29 * istride]);
+		    tmp283 = tmp281 - tmp282;
+		    tmp668 = tmp281 + tmp282;
+	       }
+	       {
+		    fftw_real tmp89;
+		    fftw_real tmp90;
+		    fftw_real tmp276;
+		    fftw_real tmp277;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp89 = c_re(input[13 * istride]);
+		    tmp90 = c_re(input[45 * istride]);
+		    tmp91 = tmp89 + tmp90;
+		    tmp280 = tmp89 - tmp90;
+		    tmp276 = c_im(input[13 * istride]);
+		    tmp277 = c_im(input[45 * istride]);
+		    tmp278 = tmp276 - tmp277;
+		    tmp669 = tmp276 + tmp277;
+	       }
+	       tmp279 = tmp275 - tmp278;
+	       tmp501 = tmp275 + tmp278;
+	       tmp284 = tmp280 + tmp283;
+	       tmp502 = tmp283 - tmp280;
+	       tmp670 = tmp668 - tmp669;
+	       tmp667 = tmp88 - tmp91;
+	  }
+	  {
+	       fftw_real tmp85;
+	       fftw_real tmp92;
+	       fftw_real tmp308;
+	       fftw_real tmp309;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp85 = tmp81 + tmp84;
+	       tmp92 = tmp88 + tmp91;
+	       tmp93 = tmp85 + tmp92;
+	       tmp829 = tmp92 - tmp85;
+	       tmp308 = (K382683432 * tmp290) - (K923879532 * tmp295);
+	       tmp309 = (K382683432 * tmp284) + (K923879532 * tmp279);
+	       tmp310 = tmp308 - tmp309;
+	       tmp426 = tmp308 + tmp309;
+	  }
+	  {
+	       fftw_real tmp285;
+	       fftw_real tmp296;
+	       fftw_real tmp832;
+	       fftw_real tmp833;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp285 = (K382683432 * tmp279) - (K923879532 * tmp284);
+	       tmp296 = (K923879532 * tmp290) + (K382683432 * tmp295);
+	       tmp297 = tmp285 - tmp296;
+	       tmp423 = tmp296 + tmp285;
+	       tmp832 = tmp673 + tmp674;
+	       tmp833 = tmp668 + tmp669;
+	       tmp834 = tmp832 - tmp833;
+	       tmp888 = tmp832 + tmp833;
+	  }
+	  {
+	       fftw_real tmp503;
+	       fftw_real tmp506;
+	       fftw_real tmp512;
+	       fftw_real tmp513;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp503 = (K923879532 * tmp501) - (K382683432 * tmp502);
+	       tmp506 = (K382683432 * tmp504) + (K923879532 * tmp505);
+	       tmp507 = tmp503 - tmp506;
+	       tmp587 = tmp506 + tmp503;
+	       tmp512 = (K923879532 * tmp504) - (K382683432 * tmp505);
+	       tmp513 = (K923879532 * tmp502) + (K382683432 * tmp501);
+	       tmp514 = tmp512 - tmp513;
+	       tmp590 = tmp512 + tmp513;
+	  }
+	  {
+	       fftw_real tmp671;
+	       fftw_real tmp676;
+	       fftw_real tmp684;
+	       fftw_real tmp685;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp671 = tmp667 - tmp670;
+	       tmp676 = tmp672 + tmp675;
+	       tmp677 = K707106781 * (tmp671 - tmp676);
+	       tmp767 = K707106781 * (tmp676 + tmp671);
+	       tmp684 = tmp675 - tmp672;
+	       tmp685 = tmp667 + tmp670;
+	       tmp686 = K707106781 * (tmp684 - tmp685);
+	       tmp770 = K707106781 * (tmp684 + tmp685);
+	  }
+     }
+     {
+	  fftw_real tmp63;
+	  fftw_real tmp907;
+	  fftw_real tmp910;
+	  fftw_real tmp912;
+	  fftw_real tmp126;
+	  fftw_real tmp127;
+	  fftw_real tmp190;
+	  fftw_real tmp911;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp31;
+	       fftw_real tmp62;
+	       fftw_real tmp908;
+	       fftw_real tmp909;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp31 = tmp15 + tmp30;
+	       tmp62 = tmp46 + tmp61;
+	       tmp63 = tmp31 + tmp62;
+	       tmp907 = tmp31 - tmp62;
+	       tmp908 = tmp887 + tmp888;
+	       tmp909 = tmp892 + tmp893;
+	       tmp910 = tmp908 - tmp909;
+	       tmp912 = tmp908 + tmp909;
+	  }
+	  {
+	       fftw_real tmp94;
+	       fftw_real tmp125;
+	       fftw_real tmp158;
+	       fftw_real tmp189;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp94 = tmp78 + tmp93;
+	       tmp125 = tmp109 + tmp124;
+	       tmp126 = tmp94 + tmp125;
+	       tmp127 = tmp125 - tmp94;
+	       tmp158 = tmp142 + tmp157;
+	       tmp189 = tmp173 + tmp188;
+	       tmp190 = tmp158 - tmp189;
+	       tmp911 = tmp158 + tmp189;
+	  }
+	  c_re(output[32 * ostride]) = tmp63 - tmp126;
+	  c_re(output[0]) = tmp63 + tmp126;
+	  c_im(output[16 * ostride]) = tmp127 + tmp190;
+	  c_im(output[48 * ostride]) = tmp190 - tmp127;
+	  c_re(output[48 * ostride]) = tmp907 - tmp910;
+	  c_re(output[16 * ostride]) = tmp907 + tmp910;
+	  c_im(output[32 * ostride]) = tmp911 - tmp912;
+	  c_im(output[0]) = tmp911 + tmp912;
+     }
+     {
+	  fftw_real tmp885;
+	  fftw_real tmp901;
+	  fftw_real tmp899;
+	  fftw_real tmp905;
+	  fftw_real tmp890;
+	  fftw_real tmp902;
+	  fftw_real tmp895;
+	  fftw_real tmp903;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp883;
+	       fftw_real tmp884;
+	       fftw_real tmp897;
+	       fftw_real tmp898;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp883 = tmp15 - tmp30;
+	       tmp884 = tmp173 - tmp188;
+	       tmp885 = tmp883 + tmp884;
+	       tmp901 = tmp883 - tmp884;
+	       tmp897 = tmp142 - tmp157;
+	       tmp898 = tmp61 - tmp46;
+	       tmp899 = tmp897 - tmp898;
+	       tmp905 = tmp898 + tmp897;
+	  }
+	  {
+	       fftw_real tmp886;
+	       fftw_real tmp889;
+	       fftw_real tmp891;
+	       fftw_real tmp894;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp886 = tmp78 - tmp93;
+	       tmp889 = tmp887 - tmp888;
+	       tmp890 = tmp886 + tmp889;
+	       tmp902 = tmp889 - tmp886;
+	       tmp891 = tmp109 - tmp124;
+	       tmp894 = tmp892 - tmp893;
+	       tmp895 = tmp891 - tmp894;
+	       tmp903 = tmp891 + tmp894;
+	  }
+	  {
+	       fftw_real tmp896;
+	       fftw_real tmp900;
+	       fftw_real tmp904;
+	       fftw_real tmp906;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp896 = K707106781 * (tmp890 + tmp895);
+	       c_re(output[40 * ostride]) = tmp885 - tmp896;
+	       c_re(output[8 * ostride]) = tmp885 + tmp896;
+	       tmp900 = K707106781 * (tmp895 - tmp890);
+	       c_im(output[56 * ostride]) = tmp899 - tmp900;
+	       c_im(output[24 * ostride]) = tmp899 + tmp900;
+	       tmp904 = K707106781 * (tmp902 - tmp903);
+	       c_re(output[56 * ostride]) = tmp901 - tmp904;
+	       c_re(output[24 * ostride]) = tmp901 + tmp904;
+	       tmp906 = K707106781 * (tmp902 + tmp903);
+	       c_im(output[40 * ostride]) = tmp905 - tmp906;
+	       c_im(output[8 * ostride]) = tmp905 + tmp906;
+	  }
+     }
+     {
+	  fftw_real tmp217;
+	  fftw_real tmp391;
+	  fftw_real tmp396;
+	  fftw_real tmp406;
+	  fftw_real tmp399;
+	  fftw_real tmp407;
+	  fftw_real tmp367;
+	  fftw_real tmp387;
+	  fftw_real tmp312;
+	  fftw_real tmp386;
+	  fftw_real tmp379;
+	  fftw_real tmp401;
+	  fftw_real tmp382;
+	  fftw_real tmp392;
+	  fftw_real tmp256;
+	  fftw_real tmp402;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp201;
+	       fftw_real tmp216;
+	       fftw_real tmp394;
+	       fftw_real tmp395;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp201 = tmp193 - tmp200;
+	       tmp216 = tmp208 - tmp215;
+	       tmp217 = tmp201 - tmp216;
+	       tmp391 = tmp201 + tmp216;
+	       tmp394 = tmp274 + tmp297;
+	       tmp395 = tmp307 + tmp310;
+	       tmp396 = (K634393284 * tmp394) + (K773010453 * tmp395);
+	       tmp406 = (K773010453 * tmp394) - (K634393284 * tmp395);
+	  }
+	  {
+	       fftw_real tmp397;
+	       fftw_real tmp398;
+	       fftw_real tmp353;
+	       fftw_real tmp366;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp397 = tmp329 + tmp352;
+	       tmp398 = tmp362 + tmp365;
+	       tmp399 = (K773010453 * tmp397) - (K634393284 * tmp398);
+	       tmp407 = (K773010453 * tmp398) + (K634393284 * tmp397);
+	       tmp353 = tmp329 - tmp352;
+	       tmp366 = tmp362 - tmp365;
+	       tmp367 = (K098017140 * tmp353) - (K995184726 * tmp366);
+	       tmp387 = (K098017140 * tmp366) + (K995184726 * tmp353);
+	  }
+	  {
+	       fftw_real tmp298;
+	       fftw_real tmp311;
+	       fftw_real tmp375;
+	       fftw_real tmp378;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp298 = tmp274 - tmp297;
+	       tmp311 = tmp307 - tmp310;
+	       tmp312 = (K995184726 * tmp298) + (K098017140 * tmp311);
+	       tmp386 = (K098017140 * tmp298) - (K995184726 * tmp311);
+	       tmp375 = tmp371 - tmp374;
+	       tmp378 = tmp376 - tmp377;
+	       tmp379 = tmp375 - tmp378;
+	       tmp401 = tmp375 + tmp378;
+	  }
+	  {
+	       fftw_real tmp380;
+	       fftw_real tmp381;
+	       fftw_real tmp236;
+	       fftw_real tmp255;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp380 = (K195090322 * tmp254) - (K980785280 * tmp247);
+	       tmp381 = (K980785280 * tmp228) + (K195090322 * tmp235);
+	       tmp382 = tmp380 - tmp381;
+	       tmp392 = tmp381 + tmp380;
+	       tmp236 = (K195090322 * tmp228) - (K980785280 * tmp235);
+	       tmp255 = (K195090322 * tmp247) + (K980785280 * tmp254);
+	       tmp256 = tmp236 - tmp255;
+	       tmp402 = tmp236 + tmp255;
+	  }
+	  {
+	       fftw_real tmp257;
+	       fftw_real tmp368;
+	       fftw_real tmp383;
+	       fftw_real tmp384;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp257 = tmp217 + tmp256;
+	       tmp368 = tmp312 + tmp367;
+	       c_re(output[47 * ostride]) = tmp257 - tmp368;
+	       c_re(output[15 * ostride]) = tmp257 + tmp368;
+	       tmp383 = tmp379 - tmp382;
+	       tmp384 = tmp367 - tmp312;
+	       c_im(output[63 * ostride]) = tmp383 - tmp384;
+	       c_im(output[31 * ostride]) = tmp383 + tmp384;
+	  }
+	  {
+	       fftw_real tmp389;
+	       fftw_real tmp390;
+	       fftw_real tmp385;
+	       fftw_real tmp388;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp389 = tmp379 + tmp382;
+	       tmp390 = tmp386 + tmp387;
+	       c_im(output[47 * ostride]) = tmp389 - tmp390;
+	       c_im(output[15 * ostride]) = tmp389 + tmp390;
+	       tmp385 = tmp217 - tmp256;
+	       tmp388 = tmp386 - tmp387;
+	       c_re(output[63 * ostride]) = tmp385 - tmp388;
+	       c_re(output[31 * ostride]) = tmp385 + tmp388;
+	  }
+	  {
+	       fftw_real tmp393;
+	       fftw_real tmp400;
+	       fftw_real tmp403;
+	       fftw_real tmp404;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp393 = tmp391 + tmp392;
+	       tmp400 = tmp396 + tmp399;
+	       c_re(output[39 * ostride]) = tmp393 - tmp400;
+	       c_re(output[7 * ostride]) = tmp393 + tmp400;
+	       tmp403 = tmp401 - tmp402;
+	       tmp404 = tmp399 - tmp396;
+	       c_im(output[55 * ostride]) = tmp403 - tmp404;
+	       c_im(output[23 * ostride]) = tmp403 + tmp404;
+	  }
+	  {
+	       fftw_real tmp409;
+	       fftw_real tmp410;
+	       fftw_real tmp405;
+	       fftw_real tmp408;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp409 = tmp401 + tmp402;
+	       tmp410 = tmp406 + tmp407;
+	       c_im(output[39 * ostride]) = tmp409 - tmp410;
+	       c_im(output[7 * ostride]) = tmp409 + tmp410;
+	       tmp405 = tmp391 - tmp392;
+	       tmp408 = tmp406 - tmp407;
+	       c_re(output[55 * ostride]) = tmp405 - tmp408;
+	       c_re(output[23 * ostride]) = tmp405 + tmp408;
+	  }
+     }
+     {
+	  fftw_real tmp413;
+	  fftw_real tmp451;
+	  fftw_real tmp456;
+	  fftw_real tmp466;
+	  fftw_real tmp459;
+	  fftw_real tmp467;
+	  fftw_real tmp435;
+	  fftw_real tmp447;
+	  fftw_real tmp428;
+	  fftw_real tmp446;
+	  fftw_real tmp439;
+	  fftw_real tmp461;
+	  fftw_real tmp442;
+	  fftw_real tmp452;
+	  fftw_real tmp420;
+	  fftw_real tmp462;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp411;
+	       fftw_real tmp412;
+	       fftw_real tmp454;
+	       fftw_real tmp455;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp411 = tmp193 + tmp200;
+	       tmp412 = tmp377 + tmp376;
+	       tmp413 = tmp411 - tmp412;
+	       tmp451 = tmp411 + tmp412;
+	       tmp454 = tmp422 + tmp423;
+	       tmp455 = tmp425 + tmp426;
+	       tmp456 = (K956940335 * tmp454) + (K290284677 * tmp455);
+	       tmp466 = (K956940335 * tmp455) - (K290284677 * tmp454);
+	  }
+	  {
+	       fftw_real tmp457;
+	       fftw_real tmp458;
+	       fftw_real tmp431;
+	       fftw_real tmp434;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp457 = tmp429 + tmp430;
+	       tmp458 = tmp432 + tmp433;
+	       tmp459 = (K956940335 * tmp457) - (K290284677 * tmp458);
+	       tmp467 = (K290284677 * tmp457) + (K956940335 * tmp458);
+	       tmp431 = tmp429 - tmp430;
+	       tmp434 = tmp432 - tmp433;
+	       tmp435 = (K471396736 * tmp431) - (K881921264 * tmp434);
+	       tmp447 = (K881921264 * tmp431) + (K471396736 * tmp434);
+	  }
+	  {
+	       fftw_real tmp424;
+	       fftw_real tmp427;
+	       fftw_real tmp437;
+	       fftw_real tmp438;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp424 = tmp422 - tmp423;
+	       tmp427 = tmp425 - tmp426;
+	       tmp428 = (K471396736 * tmp424) + (K881921264 * tmp427);
+	       tmp446 = (K471396736 * tmp427) - (K881921264 * tmp424);
+	       tmp437 = tmp371 + tmp374;
+	       tmp438 = tmp208 + tmp215;
+	       tmp439 = tmp437 - tmp438;
+	       tmp461 = tmp437 + tmp438;
+	  }
+	  {
+	       fftw_real tmp440;
+	       fftw_real tmp441;
+	       fftw_real tmp416;
+	       fftw_real tmp419;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp440 = (K831469612 * tmp418) - (K555570233 * tmp417);
+	       tmp441 = (K555570233 * tmp414) + (K831469612 * tmp415);
+	       tmp442 = tmp440 - tmp441;
+	       tmp452 = tmp441 + tmp440;
+	       tmp416 = (K831469612 * tmp414) - (K555570233 * tmp415);
+	       tmp419 = (K831469612 * tmp417) + (K555570233 * tmp418);
+	       tmp420 = tmp416 - tmp419;
+	       tmp462 = tmp416 + tmp419;
+	  }
+	  {
+	       fftw_real tmp421;
+	       fftw_real tmp436;
+	       fftw_real tmp443;
+	       fftw_real tmp444;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp421 = tmp413 + tmp420;
+	       tmp436 = tmp428 + tmp435;
+	       c_re(output[43 * ostride]) = tmp421 - tmp436;
+	       c_re(output[11 * ostride]) = tmp421 + tmp436;
+	       tmp443 = tmp439 - tmp442;
+	       tmp444 = tmp435 - tmp428;
+	       c_im(output[59 * ostride]) = tmp443 - tmp444;
+	       c_im(output[27 * ostride]) = tmp443 + tmp444;
+	  }
+	  {
+	       fftw_real tmp449;
+	       fftw_real tmp450;
+	       fftw_real tmp445;
+	       fftw_real tmp448;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp449 = tmp439 + tmp442;
+	       tmp450 = tmp446 + tmp447;
+	       c_im(output[43 * ostride]) = tmp449 - tmp450;
+	       c_im(output[11 * ostride]) = tmp449 + tmp450;
+	       tmp445 = tmp413 - tmp420;
+	       tmp448 = tmp446 - tmp447;
+	       c_re(output[59 * ostride]) = tmp445 - tmp448;
+	       c_re(output[27 * ostride]) = tmp445 + tmp448;
+	  }
+	  {
+	       fftw_real tmp453;
+	       fftw_real tmp460;
+	       fftw_real tmp463;
+	       fftw_real tmp464;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp453 = tmp451 + tmp452;
+	       tmp460 = tmp456 + tmp459;
+	       c_re(output[35 * ostride]) = tmp453 - tmp460;
+	       c_re(output[3 * ostride]) = tmp453 + tmp460;
+	       tmp463 = tmp461 - tmp462;
+	       tmp464 = tmp459 - tmp456;
+	       c_im(output[51 * ostride]) = tmp463 - tmp464;
+	       c_im(output[19 * ostride]) = tmp463 + tmp464;
+	  }
+	  {
+	       fftw_real tmp469;
+	       fftw_real tmp470;
+	       fftw_real tmp465;
+	       fftw_real tmp468;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp469 = tmp461 + tmp462;
+	       tmp470 = tmp466 + tmp467;
+	       c_im(output[35 * ostride]) = tmp469 - tmp470;
+	       c_im(output[3 * ostride]) = tmp469 + tmp470;
+	       tmp465 = tmp451 - tmp452;
+	       tmp468 = tmp466 - tmp467;
+	       c_re(output[51 * ostride]) = tmp465 - tmp468;
+	       c_re(output[19 * ostride]) = tmp465 + tmp468;
+	  }
+     }
+     {
+	  fftw_real tmp817;
+	  fftw_real tmp863;
+	  fftw_real tmp824;
+	  fftw_real tmp874;
+	  fftw_real tmp854;
+	  fftw_real tmp864;
+	  fftw_real tmp836;
+	  fftw_real tmp858;
+	  fftw_real tmp851;
+	  fftw_real tmp873;
+	  fftw_real tmp868;
+	  fftw_real tmp878;
+	  fftw_real tmp847;
+	  fftw_real tmp859;
+	  fftw_real tmp871;
+	  fftw_real tmp879;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp820;
+	       fftw_real tmp823;
+	       fftw_real tmp866;
+	       fftw_real tmp867;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp817 = tmp815 - tmp816;
+	       tmp863 = tmp815 + tmp816;
+	       tmp820 = tmp818 - tmp819;
+	       tmp823 = tmp821 + tmp822;
+	       tmp824 = K707106781 * (tmp820 - tmp823);
+	       tmp874 = K707106781 * (tmp820 + tmp823);
+	       {
+		    fftw_real tmp852;
+		    fftw_real tmp853;
+		    fftw_real tmp830;
+		    fftw_real tmp835;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp852 = tmp821 - tmp822;
+		    tmp853 = tmp819 + tmp818;
+		    tmp854 = K707106781 * (tmp852 - tmp853);
+		    tmp864 = K707106781 * (tmp853 + tmp852);
+		    tmp830 = tmp828 - tmp829;
+		    tmp835 = tmp831 - tmp834;
+		    tmp836 = (K923879532 * tmp830) + (K382683432 * tmp835);
+		    tmp858 = (K382683432 * tmp830) - (K923879532 * tmp835);
+	       }
+	       tmp851 = tmp849 - tmp850;
+	       tmp873 = tmp850 + tmp849;
+	       tmp866 = tmp829 + tmp828;
+	       tmp867 = tmp831 + tmp834;
+	       tmp868 = (K382683432 * tmp866) + (K923879532 * tmp867);
+	       tmp878 = (K923879532 * tmp866) - (K382683432 * tmp867);
+	       {
+		    fftw_real tmp841;
+		    fftw_real tmp846;
+		    fftw_real tmp869;
+		    fftw_real tmp870;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp841 = tmp837 - tmp840;
+		    tmp846 = tmp844 - tmp845;
+		    tmp847 = (K382683432 * tmp841) - (K923879532 * tmp846);
+		    tmp859 = (K382683432 * tmp846) + (K923879532 * tmp841);
+		    tmp869 = tmp837 + tmp840;
+		    tmp870 = tmp845 + tmp844;
+		    tmp871 = (K923879532 * tmp869) - (K382683432 * tmp870);
+		    tmp879 = (K923879532 * tmp870) + (K382683432 * tmp869);
+	       }
+	  }
+	  {
+	       fftw_real tmp825;
+	       fftw_real tmp848;
+	       fftw_real tmp855;
+	       fftw_real tmp856;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp825 = tmp817 + tmp824;
+	       tmp848 = tmp836 + tmp847;
+	       c_re(output[44 * ostride]) = tmp825 - tmp848;
+	       c_re(output[12 * ostride]) = tmp825 + tmp848;
+	       tmp855 = tmp851 - tmp854;
+	       tmp856 = tmp847 - tmp836;
+	       c_im(output[60 * ostride]) = tmp855 - tmp856;
+	       c_im(output[28 * ostride]) = tmp855 + tmp856;
+	  }
+	  {
+	       fftw_real tmp861;
+	       fftw_real tmp862;
+	       fftw_real tmp857;
+	       fftw_real tmp860;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp861 = tmp851 + tmp854;
+	       tmp862 = tmp858 + tmp859;
+	       c_im(output[44 * ostride]) = tmp861 - tmp862;
+	       c_im(output[12 * ostride]) = tmp861 + tmp862;
+	       tmp857 = tmp817 - tmp824;
+	       tmp860 = tmp858 - tmp859;
+	       c_re(output[60 * ostride]) = tmp857 - tmp860;
+	       c_re(output[28 * ostride]) = tmp857 + tmp860;
+	  }
+	  {
+	       fftw_real tmp865;
+	       fftw_real tmp872;
+	       fftw_real tmp875;
+	       fftw_real tmp876;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp865 = tmp863 + tmp864;
+	       tmp872 = tmp868 + tmp871;
+	       c_re(output[36 * ostride]) = tmp865 - tmp872;
+	       c_re(output[4 * ostride]) = tmp865 + tmp872;
+	       tmp875 = tmp873 - tmp874;
+	       tmp876 = tmp871 - tmp868;
+	       c_im(output[52 * ostride]) = tmp875 - tmp876;
+	       c_im(output[20 * ostride]) = tmp875 + tmp876;
+	  }
+	  {
+	       fftw_real tmp881;
+	       fftw_real tmp882;
+	       fftw_real tmp877;
+	       fftw_real tmp880;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp881 = tmp873 + tmp874;
+	       tmp882 = tmp878 + tmp879;
+	       c_im(output[36 * ostride]) = tmp881 - tmp882;
+	       c_im(output[4 * ostride]) = tmp881 + tmp882;
+	       tmp877 = tmp863 - tmp864;
+	       tmp880 = tmp878 - tmp879;
+	       c_re(output[52 * ostride]) = tmp877 - tmp880;
+	       c_re(output[20 * ostride]) = tmp877 + tmp880;
+	  }
+     }
+     {
+	  fftw_real tmp757;
+	  fftw_real tmp795;
+	  fftw_real tmp800;
+	  fftw_real tmp810;
+	  fftw_real tmp803;
+	  fftw_real tmp811;
+	  fftw_real tmp779;
+	  fftw_real tmp791;
+	  fftw_real tmp783;
+	  fftw_real tmp805;
+	  fftw_real tmp764;
+	  fftw_real tmp806;
+	  fftw_real tmp786;
+	  fftw_real tmp796;
+	  fftw_real tmp772;
+	  fftw_real tmp790;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp756;
+	       fftw_real tmp798;
+	       fftw_real tmp799;
+	       fftw_real tmp782;
+	       fftw_real tmp760;
+	       fftw_real tmp763;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp756 = K707106781 * (tmp721 + tmp720);
+	       tmp757 = tmp755 - tmp756;
+	       tmp795 = tmp755 + tmp756;
+	       tmp798 = tmp766 + tmp767;
+	       tmp799 = tmp769 + tmp770;
+	       tmp800 = (K980785280 * tmp798) + (K195090322 * tmp799);
+	       tmp810 = (K980785280 * tmp799) - (K195090322 * tmp798);
+	       {
+		    fftw_real tmp801;
+		    fftw_real tmp802;
+		    fftw_real tmp775;
+		    fftw_real tmp778;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp801 = tmp773 + tmp774;
+		    tmp802 = tmp776 + tmp777;
+		    tmp803 = (K980785280 * tmp801) - (K195090322 * tmp802);
+		    tmp811 = (K195090322 * tmp801) + (K980785280 * tmp802);
+		    tmp775 = tmp773 - tmp774;
+		    tmp778 = tmp776 - tmp777;
+		    tmp779 = (K555570233 * tmp775) - (K831469612 * tmp778);
+		    tmp791 = (K831469612 * tmp775) + (K555570233 * tmp778);
+	       }
+	       tmp782 = K707106781 * (tmp640 + tmp643);
+	       tmp783 = tmp781 - tmp782;
+	       tmp805 = tmp781 + tmp782;
+	       tmp760 = (K923879532 * tmp758) - (K382683432 * tmp759);
+	       tmp763 = (K923879532 * tmp761) + (K382683432 * tmp762);
+	       tmp764 = tmp760 - tmp763;
+	       tmp806 = tmp760 + tmp763;
+	       {
+		    fftw_real tmp784;
+		    fftw_real tmp785;
+		    fftw_real tmp768;
+		    fftw_real tmp771;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp784 = (K923879532 * tmp762) - (K382683432 * tmp761);
+		    tmp785 = (K382683432 * tmp758) + (K923879532 * tmp759);
+		    tmp786 = tmp784 - tmp785;
+		    tmp796 = tmp785 + tmp784;
+		    tmp768 = tmp766 - tmp767;
+		    tmp771 = tmp769 - tmp770;
+		    tmp772 = (K555570233 * tmp768) + (K831469612 * tmp771);
+		    tmp790 = (K555570233 * tmp771) - (K831469612 * tmp768);
+	       }
+	  }
+	  {
+	       fftw_real tmp765;
+	       fftw_real tmp780;
+	       fftw_real tmp787;
+	       fftw_real tmp788;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp765 = tmp757 + tmp764;
+	       tmp780 = tmp772 + tmp779;
+	       c_re(output[42 * ostride]) = tmp765 - tmp780;
+	       c_re(output[10 * ostride]) = tmp765 + tmp780;
+	       tmp787 = tmp783 - tmp786;
+	       tmp788 = tmp779 - tmp772;
+	       c_im(output[58 * ostride]) = tmp787 - tmp788;
+	       c_im(output[26 * ostride]) = tmp787 + tmp788;
+	  }
+	  {
+	       fftw_real tmp793;
+	       fftw_real tmp794;
+	       fftw_real tmp789;
+	       fftw_real tmp792;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp793 = tmp783 + tmp786;
+	       tmp794 = tmp790 + tmp791;
+	       c_im(output[42 * ostride]) = tmp793 - tmp794;
+	       c_im(output[10 * ostride]) = tmp793 + tmp794;
+	       tmp789 = tmp757 - tmp764;
+	       tmp792 = tmp790 - tmp791;
+	       c_re(output[58 * ostride]) = tmp789 - tmp792;
+	       c_re(output[26 * ostride]) = tmp789 + tmp792;
+	  }
+	  {
+	       fftw_real tmp797;
+	       fftw_real tmp804;
+	       fftw_real tmp807;
+	       fftw_real tmp808;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp797 = tmp795 + tmp796;
+	       tmp804 = tmp800 + tmp803;
+	       c_re(output[34 * ostride]) = tmp797 - tmp804;
+	       c_re(output[2 * ostride]) = tmp797 + tmp804;
+	       tmp807 = tmp805 - tmp806;
+	       tmp808 = tmp803 - tmp800;
+	       c_im(output[50 * ostride]) = tmp807 - tmp808;
+	       c_im(output[18 * ostride]) = tmp807 + tmp808;
+	  }
+	  {
+	       fftw_real tmp813;
+	       fftw_real tmp814;
+	       fftw_real tmp809;
+	       fftw_real tmp812;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp813 = tmp805 + tmp806;
+	       tmp814 = tmp810 + tmp811;
+	       c_im(output[34 * ostride]) = tmp813 - tmp814;
+	       c_im(output[2 * ostride]) = tmp813 + tmp814;
+	       tmp809 = tmp795 - tmp796;
+	       tmp812 = tmp810 - tmp811;
+	       c_re(output[50 * ostride]) = tmp809 - tmp812;
+	       c_re(output[18 * ostride]) = tmp809 + tmp812;
+	  }
+     }
+     {
+	  fftw_real tmp645;
+	  fftw_real tmp735;
+	  fftw_real tmp740;
+	  fftw_real tmp750;
+	  fftw_real tmp743;
+	  fftw_real tmp751;
+	  fftw_real tmp715;
+	  fftw_real tmp731;
+	  fftw_real tmp723;
+	  fftw_real tmp745;
+	  fftw_real tmp660;
+	  fftw_real tmp746;
+	  fftw_real tmp726;
+	  fftw_real tmp736;
+	  fftw_real tmp688;
+	  fftw_real tmp730;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp644;
+	       fftw_real tmp738;
+	       fftw_real tmp739;
+	       fftw_real tmp722;
+	       fftw_real tmp652;
+	       fftw_real tmp659;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp644 = K707106781 * (tmp640 - tmp643);
+	       tmp645 = tmp637 - tmp644;
+	       tmp735 = tmp637 + tmp644;
+	       tmp738 = tmp666 + tmp677;
+	       tmp739 = tmp683 + tmp686;
+	       tmp740 = (K555570233 * tmp738) + (K831469612 * tmp739);
+	       tmp750 = (K831469612 * tmp738) - (K555570233 * tmp739);
+	       {
+		    fftw_real tmp741;
+		    fftw_real tmp742;
+		    fftw_real tmp705;
+		    fftw_real tmp714;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp741 = tmp693 + tmp704;
+		    tmp742 = tmp710 + tmp713;
+		    tmp743 = (K831469612 * tmp741) - (K555570233 * tmp742);
+		    tmp751 = (K831469612 * tmp742) + (K555570233 * tmp741);
+		    tmp705 = tmp693 - tmp704;
+		    tmp714 = tmp710 - tmp713;
+		    tmp715 = (K195090322 * tmp705) - (K980785280 * tmp714);
+		    tmp731 = (K195090322 * tmp714) + (K980785280 * tmp705);
+	       }
+	       tmp722 = K707106781 * (tmp720 - tmp721);
+	       tmp723 = tmp719 - tmp722;
+	       tmp745 = tmp719 + tmp722;
+	       tmp652 = (K382683432 * tmp648) - (K923879532 * tmp651);
+	       tmp659 = (K382683432 * tmp655) + (K923879532 * tmp658);
+	       tmp660 = tmp652 - tmp659;
+	       tmp746 = tmp652 + tmp659;
+	       {
+		    fftw_real tmp724;
+		    fftw_real tmp725;
+		    fftw_real tmp678;
+		    fftw_real tmp687;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp724 = (K382683432 * tmp658) - (K923879532 * tmp655);
+		    tmp725 = (K923879532 * tmp648) + (K382683432 * tmp651);
+		    tmp726 = tmp724 - tmp725;
+		    tmp736 = tmp725 + tmp724;
+		    tmp678 = tmp666 - tmp677;
+		    tmp687 = tmp683 - tmp686;
+		    tmp688 = (K980785280 * tmp678) + (K195090322 * tmp687);
+		    tmp730 = (K195090322 * tmp678) - (K980785280 * tmp687);
+	       }
+	  }
+	  {
+	       fftw_real tmp661;
+	       fftw_real tmp716;
+	       fftw_real tmp727;
+	       fftw_real tmp728;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp661 = tmp645 + tmp660;
+	       tmp716 = tmp688 + tmp715;
+	       c_re(output[46 * ostride]) = tmp661 - tmp716;
+	       c_re(output[14 * ostride]) = tmp661 + tmp716;
+	       tmp727 = tmp723 - tmp726;
+	       tmp728 = tmp715 - tmp688;
+	       c_im(output[62 * ostride]) = tmp727 - tmp728;
+	       c_im(output[30 * ostride]) = tmp727 + tmp728;
+	  }
+	  {
+	       fftw_real tmp733;
+	       fftw_real tmp734;
+	       fftw_real tmp729;
+	       fftw_real tmp732;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp733 = tmp723 + tmp726;
+	       tmp734 = tmp730 + tmp731;
+	       c_im(output[46 * ostride]) = tmp733 - tmp734;
+	       c_im(output[14 * ostride]) = tmp733 + tmp734;
+	       tmp729 = tmp645 - tmp660;
+	       tmp732 = tmp730 - tmp731;
+	       c_re(output[62 * ostride]) = tmp729 - tmp732;
+	       c_re(output[30 * ostride]) = tmp729 + tmp732;
+	  }
+	  {
+	       fftw_real tmp737;
+	       fftw_real tmp744;
+	       fftw_real tmp747;
+	       fftw_real tmp748;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp737 = tmp735 + tmp736;
+	       tmp744 = tmp740 + tmp743;
+	       c_re(output[38 * ostride]) = tmp737 - tmp744;
+	       c_re(output[6 * ostride]) = tmp737 + tmp744;
+	       tmp747 = tmp745 - tmp746;
+	       tmp748 = tmp743 - tmp740;
+	       c_im(output[54 * ostride]) = tmp747 - tmp748;
+	       c_im(output[22 * ostride]) = tmp747 + tmp748;
+	  }
+	  {
+	       fftw_real tmp753;
+	       fftw_real tmp754;
+	       fftw_real tmp749;
+	       fftw_real tmp752;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp753 = tmp745 + tmp746;
+	       tmp754 = tmp750 + tmp751;
+	       c_im(output[38 * ostride]) = tmp753 - tmp754;
+	       c_im(output[6 * ostride]) = tmp753 + tmp754;
+	       tmp749 = tmp735 - tmp736;
+	       tmp752 = tmp750 - tmp751;
+	       c_re(output[54 * ostride]) = tmp749 - tmp752;
+	       c_re(output[22 * ostride]) = tmp749 + tmp752;
+	  }
+     }
+     {
+	  fftw_real tmp481;
+	  fftw_real tmp555;
+	  fftw_real tmp560;
+	  fftw_real tmp570;
+	  fftw_real tmp563;
+	  fftw_real tmp571;
+	  fftw_real tmp535;
+	  fftw_real tmp551;
+	  fftw_real tmp516;
+	  fftw_real tmp550;
+	  fftw_real tmp543;
+	  fftw_real tmp565;
+	  fftw_real tmp546;
+	  fftw_real tmp556;
+	  fftw_real tmp496;
+	  fftw_real tmp566;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp473;
+	       fftw_real tmp480;
+	       fftw_real tmp558;
+	       fftw_real tmp559;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp473 = tmp471 - tmp472;
+	       tmp480 = tmp476 - tmp479;
+	       tmp481 = tmp473 - tmp480;
+	       tmp555 = tmp473 + tmp480;
+	       tmp558 = tmp500 + tmp507;
+	       tmp559 = tmp511 + tmp514;
+	       tmp560 = (K471396736 * tmp558) + (K881921264 * tmp559);
+	       tmp570 = (K881921264 * tmp558) - (K471396736 * tmp559);
+	  }
+	  {
+	       fftw_real tmp561;
+	       fftw_real tmp562;
+	       fftw_real tmp527;
+	       fftw_real tmp534;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp561 = tmp519 + tmp526;
+	       tmp562 = tmp530 + tmp533;
+	       tmp563 = (K881921264 * tmp561) - (K471396736 * tmp562);
+	       tmp571 = (K881921264 * tmp562) + (K471396736 * tmp561);
+	       tmp527 = tmp519 - tmp526;
+	       tmp534 = tmp530 - tmp533;
+	       tmp535 = (K290284677 * tmp527) - (K956940335 * tmp534);
+	       tmp551 = (K290284677 * tmp534) + (K956940335 * tmp527);
+	  }
+	  {
+	       fftw_real tmp508;
+	       fftw_real tmp515;
+	       fftw_real tmp539;
+	       fftw_real tmp542;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp508 = tmp500 - tmp507;
+	       tmp515 = tmp511 - tmp514;
+	       tmp516 = (K956940335 * tmp508) + (K290284677 * tmp515);
+	       tmp550 = (K290284677 * tmp508) - (K956940335 * tmp515);
+	       tmp539 = tmp537 - tmp538;
+	       tmp542 = tmp540 - tmp541;
+	       tmp543 = tmp539 - tmp542;
+	       tmp565 = tmp539 + tmp542;
+	  }
+	  {
+	       fftw_real tmp544;
+	       fftw_real tmp545;
+	       fftw_real tmp488;
+	       fftw_real tmp495;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp544 = (K555570233 * tmp491) - (K831469612 * tmp494);
+	       tmp545 = (K555570233 * tmp487) + (K831469612 * tmp484);
+	       tmp546 = tmp544 - tmp545;
+	       tmp556 = tmp545 + tmp544;
+	       tmp488 = (K555570233 * tmp484) - (K831469612 * tmp487);
+	       tmp495 = (K831469612 * tmp491) + (K555570233 * tmp494);
+	       tmp496 = tmp488 - tmp495;
+	       tmp566 = tmp488 + tmp495;
+	  }
+	  {
+	       fftw_real tmp497;
+	       fftw_real tmp536;
+	       fftw_real tmp547;
+	       fftw_real tmp548;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp497 = tmp481 + tmp496;
+	       tmp536 = tmp516 + tmp535;
+	       c_re(output[45 * ostride]) = tmp497 - tmp536;
+	       c_re(output[13 * ostride]) = tmp497 + tmp536;
+	       tmp547 = tmp543 - tmp546;
+	       tmp548 = tmp535 - tmp516;
+	       c_im(output[61 * ostride]) = tmp547 - tmp548;
+	       c_im(output[29 * ostride]) = tmp547 + tmp548;
+	  }
+	  {
+	       fftw_real tmp553;
+	       fftw_real tmp554;
+	       fftw_real tmp549;
+	       fftw_real tmp552;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp553 = tmp543 + tmp546;
+	       tmp554 = tmp550 + tmp551;
+	       c_im(output[45 * ostride]) = tmp553 - tmp554;
+	       c_im(output[13 * ostride]) = tmp553 + tmp554;
+	       tmp549 = tmp481 - tmp496;
+	       tmp552 = tmp550 - tmp551;
+	       c_re(output[61 * ostride]) = tmp549 - tmp552;
+	       c_re(output[29 * ostride]) = tmp549 + tmp552;
+	  }
+	  {
+	       fftw_real tmp557;
+	       fftw_real tmp564;
+	       fftw_real tmp567;
+	       fftw_real tmp568;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp557 = tmp555 + tmp556;
+	       tmp564 = tmp560 + tmp563;
+	       c_re(output[37 * ostride]) = tmp557 - tmp564;
+	       c_re(output[5 * ostride]) = tmp557 + tmp564;
+	       tmp567 = tmp565 - tmp566;
+	       tmp568 = tmp563 - tmp560;
+	       c_im(output[53 * ostride]) = tmp567 - tmp568;
+	       c_im(output[21 * ostride]) = tmp567 + tmp568;
+	  }
+	  {
+	       fftw_real tmp573;
+	       fftw_real tmp574;
+	       fftw_real tmp569;
+	       fftw_real tmp572;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp573 = tmp565 + tmp566;
+	       tmp574 = tmp570 + tmp571;
+	       c_im(output[37 * ostride]) = tmp573 - tmp574;
+	       c_im(output[5 * ostride]) = tmp573 + tmp574;
+	       tmp569 = tmp555 - tmp556;
+	       tmp572 = tmp570 - tmp571;
+	       c_re(output[53 * ostride]) = tmp569 - tmp572;
+	       c_re(output[21 * ostride]) = tmp569 + tmp572;
+	  }
+     }
+     {
+	  fftw_real tmp577;
+	  fftw_real tmp615;
+	  fftw_real tmp620;
+	  fftw_real tmp630;
+	  fftw_real tmp623;
+	  fftw_real tmp631;
+	  fftw_real tmp599;
+	  fftw_real tmp611;
+	  fftw_real tmp592;
+	  fftw_real tmp610;
+	  fftw_real tmp603;
+	  fftw_real tmp625;
+	  fftw_real tmp606;
+	  fftw_real tmp616;
+	  fftw_real tmp584;
+	  fftw_real tmp626;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp575;
+	       fftw_real tmp576;
+	       fftw_real tmp618;
+	       fftw_real tmp619;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp575 = tmp471 + tmp472;
+	       tmp576 = tmp541 + tmp540;
+	       tmp577 = tmp575 - tmp576;
+	       tmp615 = tmp575 + tmp576;
+	       tmp618 = tmp586 + tmp587;
+	       tmp619 = tmp589 + tmp590;
+	       tmp620 = (K995184726 * tmp618) + (K098017140 * tmp619);
+	       tmp630 = (K995184726 * tmp619) - (K098017140 * tmp618);
+	  }
+	  {
+	       fftw_real tmp621;
+	       fftw_real tmp622;
+	       fftw_real tmp595;
+	       fftw_real tmp598;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp621 = tmp593 + tmp594;
+	       tmp622 = tmp596 + tmp597;
+	       tmp623 = (K995184726 * tmp621) - (K098017140 * tmp622);
+	       tmp631 = (K098017140 * tmp621) + (K995184726 * tmp622);
+	       tmp595 = tmp593 - tmp594;
+	       tmp598 = tmp596 - tmp597;
+	       tmp599 = (K634393284 * tmp595) - (K773010453 * tmp598);
+	       tmp611 = (K773010453 * tmp595) + (K634393284 * tmp598);
+	  }
+	  {
+	       fftw_real tmp588;
+	       fftw_real tmp591;
+	       fftw_real tmp601;
+	       fftw_real tmp602;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp588 = tmp586 - tmp587;
+	       tmp591 = tmp589 - tmp590;
+	       tmp592 = (K634393284 * tmp588) + (K773010453 * tmp591);
+	       tmp610 = (K634393284 * tmp591) - (K773010453 * tmp588);
+	       tmp601 = tmp537 + tmp538;
+	       tmp602 = tmp476 + tmp479;
+	       tmp603 = tmp601 - tmp602;
+	       tmp625 = tmp601 + tmp602;
+	  }
+	  {
+	       fftw_real tmp604;
+	       fftw_real tmp605;
+	       fftw_real tmp580;
+	       fftw_real tmp583;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp604 = (K980785280 * tmp581) - (K195090322 * tmp582);
+	       tmp605 = (K980785280 * tmp579) + (K195090322 * tmp578);
+	       tmp606 = tmp604 - tmp605;
+	       tmp616 = tmp605 + tmp604;
+	       tmp580 = (K980785280 * tmp578) - (K195090322 * tmp579);
+	       tmp583 = (K195090322 * tmp581) + (K980785280 * tmp582);
+	       tmp584 = tmp580 - tmp583;
+	       tmp626 = tmp580 + tmp583;
+	  }
+	  {
+	       fftw_real tmp585;
+	       fftw_real tmp600;
+	       fftw_real tmp607;
+	       fftw_real tmp608;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp585 = tmp577 + tmp584;
+	       tmp600 = tmp592 + tmp599;
+	       c_re(output[41 * ostride]) = tmp585 - tmp600;
+	       c_re(output[9 * ostride]) = tmp585 + tmp600;
+	       tmp607 = tmp603 - tmp606;
+	       tmp608 = tmp599 - tmp592;
+	       c_im(output[57 * ostride]) = tmp607 - tmp608;
+	       c_im(output[25 * ostride]) = tmp607 + tmp608;
+	  }
+	  {
+	       fftw_real tmp613;
+	       fftw_real tmp614;
+	       fftw_real tmp609;
+	       fftw_real tmp612;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp613 = tmp603 + tmp606;
+	       tmp614 = tmp610 + tmp611;
+	       c_im(output[41 * ostride]) = tmp613 - tmp614;
+	       c_im(output[9 * ostride]) = tmp613 + tmp614;
+	       tmp609 = tmp577 - tmp584;
+	       tmp612 = tmp610 - tmp611;
+	       c_re(output[57 * ostride]) = tmp609 - tmp612;
+	       c_re(output[25 * ostride]) = tmp609 + tmp612;
+	  }
+	  {
+	       fftw_real tmp617;
+	       fftw_real tmp624;
+	       fftw_real tmp627;
+	       fftw_real tmp628;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp617 = tmp615 + tmp616;
+	       tmp624 = tmp620 + tmp623;
+	       c_re(output[33 * ostride]) = tmp617 - tmp624;
+	       c_re(output[ostride]) = tmp617 + tmp624;
+	       tmp627 = tmp625 - tmp626;
+	       tmp628 = tmp623 - tmp620;
+	       c_im(output[49 * ostride]) = tmp627 - tmp628;
+	       c_im(output[17 * ostride]) = tmp627 + tmp628;
+	  }
+	  {
+	       fftw_real tmp633;
+	       fftw_real tmp634;
+	       fftw_real tmp629;
+	       fftw_real tmp632;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp633 = tmp625 + tmp626;
+	       tmp634 = tmp630 + tmp631;
+	       c_im(output[33 * ostride]) = tmp633 - tmp634;
+	       c_im(output[ostride]) = tmp633 + tmp634;
+	       tmp629 = tmp615 - tmp616;
+	       tmp632 = tmp630 - tmp631;
+	       c_re(output[49 * ostride]) = tmp629 - tmp632;
+	       c_re(output[17 * ostride]) = tmp629 + tmp632;
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_64_desc = {
+     "fftw_no_twiddle_64",
+     (void (*)()) fftw_no_twiddle_64,
+     64,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     1409,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_7.c b/src/fftw/fn_7.c
new file mode 100644
index 0000000..c9acc08
--- /dev/null
+++ b/src/fftw/fn_7.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 7 */
+
+/*
+ * This function contains 60 FP additions, 36 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 0 fused multiply/add),
+ * 22 stack variables, and 28 memory accesses
+ */
+static const fftw_real K222520933 =
+FFTW_KONST(+0.222520933956314404288902564496794759466355569);
+static const fftw_real K900968867 =
+FFTW_KONST(+0.900968867902419126236102319507445051165919162);
+static const fftw_real K623489801 =
+FFTW_KONST(+0.623489801858733530525004884004239810632274731);
+static const fftw_real K781831482 =
+FFTW_KONST(+0.781831482468029808708444526674057750232334519);
+static const fftw_real K433883739 =
+FFTW_KONST(+0.433883739117558120475768332848358754609990728);
+static const fftw_real K974927912 =
+FFTW_KONST(+0.974927912181823607018131682993931217232785801);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_7(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp31;
+     fftw_real tmp4;
+     fftw_real tmp26;
+     fftw_real tmp14;
+     fftw_real tmp33;
+     fftw_real tmp7;
+     fftw_real tmp28;
+     fftw_real tmp20;
+     fftw_real tmp30;
+     fftw_real tmp10;
+     fftw_real tmp27;
+     fftw_real tmp17;
+     fftw_real tmp32;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp31 = c_im(input[0]);
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[6 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp26 = tmp3 - tmp2;
+	  tmp12 = c_im(input[istride]);
+	  tmp13 = c_im(input[6 * istride]);
+	  tmp14 = tmp12 - tmp13;
+	  tmp33 = tmp12 + tmp13;
+     }
+     {
+	  fftw_real tmp5;
+	  fftw_real tmp6;
+	  fftw_real tmp18;
+	  fftw_real tmp19;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp5 = c_re(input[2 * istride]);
+	  tmp6 = c_re(input[5 * istride]);
+	  tmp7 = tmp5 + tmp6;
+	  tmp28 = tmp6 - tmp5;
+	  tmp18 = c_im(input[2 * istride]);
+	  tmp19 = c_im(input[5 * istride]);
+	  tmp20 = tmp18 - tmp19;
+	  tmp30 = tmp18 + tmp19;
+     }
+     {
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  fftw_real tmp15;
+	  fftw_real tmp16;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp8 = c_re(input[3 * istride]);
+	  tmp9 = c_re(input[4 * istride]);
+	  tmp10 = tmp8 + tmp9;
+	  tmp27 = tmp9 - tmp8;
+	  tmp15 = c_im(input[3 * istride]);
+	  tmp16 = c_im(input[4 * istride]);
+	  tmp17 = tmp15 - tmp16;
+	  tmp32 = tmp15 + tmp16;
+     }
+     {
+	  fftw_real tmp23;
+	  fftw_real tmp22;
+	  fftw_real tmp35;
+	  fftw_real tmp36;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10;
+	  tmp23 =
+	      (K974927912 * tmp14) - (K433883739 * tmp20) -
+	      (K781831482 * tmp17);
+	  tmp22 =
+	      tmp1 + (K623489801 * tmp10) - (K900968867 * tmp7) -
+	      (K222520933 * tmp4);
+	  c_re(output[5 * ostride]) = tmp22 - tmp23;
+	  c_re(output[2 * ostride]) = tmp22 + tmp23;
+	  {
+	       fftw_real tmp25;
+	       fftw_real tmp24;
+	       fftw_real tmp21;
+	       fftw_real tmp11;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp25 =
+		   (K781831482 * tmp14) + (K433883739 * tmp17) +
+		   (K974927912 * tmp20);
+	       tmp24 =
+		   tmp1 + (K623489801 * tmp4) - (K900968867 * tmp10) -
+		   (K222520933 * tmp7);
+	       c_re(output[6 * ostride]) = tmp24 - tmp25;
+	       c_re(output[ostride]) = tmp24 + tmp25;
+	       tmp21 =
+		   (K433883739 * tmp14) + (K974927912 * tmp17) -
+		   (K781831482 * tmp20);
+	       tmp11 =
+		   tmp1 + (K623489801 * tmp7) - (K222520933 * tmp10) -
+		   (K900968867 * tmp4);
+	       c_re(output[4 * ostride]) = tmp11 - tmp21;
+	       c_re(output[3 * ostride]) = tmp11 + tmp21;
+	  }
+	  c_im(output[0]) = tmp33 + tmp32 + tmp30 + tmp31;
+	  tmp35 =
+	      (K781831482 * tmp26) + (K974927912 * tmp28) +
+	      (K433883739 * tmp27);
+	  tmp36 =
+	      (K623489801 * tmp33) + tmp31 - (K222520933 * tmp30) -
+	      (K900968867 * tmp32);
+	  c_im(output[ostride]) = tmp35 + tmp36;
+	  c_im(output[6 * ostride]) = tmp36 - tmp35;
+	  {
+	       fftw_real tmp29;
+	       fftw_real tmp34;
+	       fftw_real tmp37;
+	       fftw_real tmp38;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp29 =
+		   (K433883739 * tmp26) + (K974927912 * tmp27) -
+		   (K781831482 * tmp28);
+	       tmp34 =
+		   (K623489801 * tmp30) + tmp31 - (K222520933 * tmp32) -
+		   (K900968867 * tmp33);
+	       c_im(output[3 * ostride]) = tmp29 + tmp34;
+	       c_im(output[4 * ostride]) = tmp34 - tmp29;
+	       tmp37 =
+		   (K974927912 * tmp26) - (K781831482 * tmp27) -
+		   (K433883739 * tmp28);
+	       tmp38 =
+		   (K623489801 * tmp32) + tmp31 - (K900968867 * tmp30) -
+		   (K222520933 * tmp33);
+	       c_im(output[2 * ostride]) = tmp37 + tmp38;
+	       c_im(output[5 * ostride]) = tmp38 - tmp37;
+	  }
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_7_desc = {
+     "fftw_no_twiddle_7",
+     (void (*)()) fftw_no_twiddle_7,
+     7,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     155,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_8.c b/src/fftw/fn_8.c
new file mode 100644
index 0000000..d2075fb
--- /dev/null
+++ b/src/fftw/fn_8.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 8 */
+
+/*
+ * This function contains 52 FP additions, 4 FP multiplications,
+ * (or, 52 additions, 4 multiplications, 0 fused multiply/add),
+ * 26 stack variables, and 32 memory accesses
+ */
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_8(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp23;
+     fftw_real tmp18;
+     fftw_real tmp38;
+     fftw_real tmp6;
+     fftw_real tmp37;
+     fftw_real tmp21;
+     fftw_real tmp24;
+     fftw_real tmp13;
+     fftw_real tmp49;
+     fftw_real tmp35;
+     fftw_real tmp43;
+     fftw_real tmp10;
+     fftw_real tmp48;
+     fftw_real tmp30;
+     fftw_real tmp42;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[4 * istride]);
+	  tmp3 = tmp1 + tmp2;
+	  tmp23 = tmp1 - tmp2;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_im(input[0]);
+	       tmp17 = c_im(input[4 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp38 = tmp16 - tmp17;
+	       tmp4 = c_re(input[2 * istride]);
+	       tmp5 = c_re(input[6 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp37 = tmp4 - tmp5;
+	  }
+	  tmp19 = c_im(input[2 * istride]);
+	  tmp20 = c_im(input[6 * istride]);
+	  tmp21 = tmp19 + tmp20;
+	  tmp24 = tmp19 - tmp20;
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp31;
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       fftw_real tmp34;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[7 * istride]);
+	       tmp12 = c_re(input[3 * istride]);
+	       tmp31 = tmp11 - tmp12;
+	       tmp32 = c_im(input[7 * istride]);
+	       tmp33 = c_im(input[3 * istride]);
+	       tmp34 = tmp32 - tmp33;
+	       tmp13 = tmp11 + tmp12;
+	       tmp49 = tmp32 + tmp33;
+	       tmp35 = tmp31 - tmp34;
+	       tmp43 = tmp31 + tmp34;
+	  }
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp28;
+	       fftw_real tmp29;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[istride]);
+	       tmp9 = c_re(input[5 * istride]);
+	       tmp26 = tmp8 - tmp9;
+	       tmp27 = c_im(input[istride]);
+	       tmp28 = c_im(input[5 * istride]);
+	       tmp29 = tmp27 - tmp28;
+	       tmp10 = tmp8 + tmp9;
+	       tmp48 = tmp27 + tmp28;
+	       tmp30 = tmp26 + tmp29;
+	       tmp42 = tmp29 - tmp26;
+	  }
+     }
+     {
+	  fftw_real tmp7;
+	  fftw_real tmp14;
+	  fftw_real tmp15;
+	  fftw_real tmp22;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp7 = tmp3 + tmp6;
+	  tmp14 = tmp10 + tmp13;
+	  c_re(output[4 * ostride]) = tmp7 - tmp14;
+	  c_re(output[0]) = tmp7 + tmp14;
+	  tmp15 = tmp13 - tmp10;
+	  tmp22 = tmp18 - tmp21;
+	  c_im(output[2 * ostride]) = tmp15 + tmp22;
+	  c_im(output[6 * ostride]) = tmp22 - tmp15;
+     }
+     {
+	  fftw_real tmp51;
+	  fftw_real tmp52;
+	  fftw_real tmp47;
+	  fftw_real tmp50;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp51 = tmp18 + tmp21;
+	  tmp52 = tmp48 + tmp49;
+	  c_im(output[4 * ostride]) = tmp51 - tmp52;
+	  c_im(output[0]) = tmp51 + tmp52;
+	  tmp47 = tmp3 - tmp6;
+	  tmp50 = tmp48 - tmp49;
+	  c_re(output[6 * ostride]) = tmp47 - tmp50;
+	  c_re(output[2 * ostride]) = tmp47 + tmp50;
+     }
+     {
+	  fftw_real tmp25;
+	  fftw_real tmp36;
+	  fftw_real tmp39;
+	  fftw_real tmp40;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp25 = tmp23 + tmp24;
+	  tmp36 = K707106781 * (tmp30 + tmp35);
+	  c_re(output[5 * ostride]) = tmp25 - tmp36;
+	  c_re(output[ostride]) = tmp25 + tmp36;
+	  tmp39 = tmp37 + tmp38;
+	  tmp40 = K707106781 * (tmp35 - tmp30);
+	  c_im(output[7 * ostride]) = tmp39 - tmp40;
+	  c_im(output[3 * ostride]) = tmp39 + tmp40;
+     }
+     {
+	  fftw_real tmp45;
+	  fftw_real tmp46;
+	  fftw_real tmp41;
+	  fftw_real tmp44;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp45 = tmp38 - tmp37;
+	  tmp46 = K707106781 * (tmp42 + tmp43);
+	  c_im(output[5 * ostride]) = tmp45 - tmp46;
+	  c_im(output[ostride]) = tmp45 + tmp46;
+	  tmp41 = tmp23 - tmp24;
+	  tmp44 = K707106781 * (tmp42 - tmp43);
+	  c_re(output[7 * ostride]) = tmp41 - tmp44;
+	  c_re(output[3 * ostride]) = tmp41 + tmp44;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_8_desc = {
+     "fftw_no_twiddle_8",
+     (void (*)()) fftw_no_twiddle_8,
+     8,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     177,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fn_9.c b/src/fftw/fn_9.c
new file mode 100644
index 0000000..837ece7
--- /dev/null
+++ b/src/fftw/fn_9.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:05:38 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddle 9 */
+
+/*
+ * This function contains 80 FP additions, 40 FP multiplications,
+ * (or, 60 additions, 20 multiplications, 20 fused multiply/add),
+ * 30 stack variables, and 36 memory accesses
+ */
+static const fftw_real K939692620 =
+FFTW_KONST(+0.939692620785908384054109277324731469936208134);
+static const fftw_real K342020143 =
+FFTW_KONST(+0.342020143325668733044099614682259580763083368);
+static const fftw_real K984807753 =
+FFTW_KONST(+0.984807753012208059366743024589523013670643252);
+static const fftw_real K173648177 =
+FFTW_KONST(+0.173648177666930348851716626769314796000375677);
+static const fftw_real K642787609 =
+FFTW_KONST(+0.642787609686539326322643409907263432907559884);
+static const fftw_real K766044443 =
+FFTW_KONST(+0.766044443118978035202392650555416673935832457);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fn_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fn_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_no_twiddle_9(const fftw_complex *input, fftw_complex *output,
+		       int istride, int ostride)
+{
+     fftw_real tmp5;
+     fftw_real tmp17;
+     fftw_real tmp50;
+     fftw_real tmp20;
+     fftw_real tmp78;
+     fftw_real tmp53;
+     fftw_real tmp10;
+     fftw_real tmp26;
+     fftw_real tmp58;
+     fftw_real tmp74;
+     fftw_real tmp31;
+     fftw_real tmp59;
+     fftw_real tmp15;
+     fftw_real tmp37;
+     fftw_real tmp62;
+     fftw_real tmp75;
+     fftw_real tmp42;
+     fftw_real tmp61;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp4;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[3 * istride]);
+	  tmp3 = c_re(input[6 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = tmp1 + tmp4;
+	  tmp17 = tmp1 - (K500000000 * tmp4);
+	  tmp50 = K866025403 * (tmp3 - tmp2);
+     }
+     {
+	  fftw_real tmp51;
+	  fftw_real tmp18;
+	  fftw_real tmp19;
+	  fftw_real tmp52;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp51 = c_im(input[0]);
+	  tmp18 = c_im(input[3 * istride]);
+	  tmp19 = c_im(input[6 * istride]);
+	  tmp52 = tmp18 + tmp19;
+	  tmp20 = K866025403 * (tmp18 - tmp19);
+	  tmp78 = tmp51 + tmp52;
+	  tmp53 = tmp51 - (K500000000 * tmp52);
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp28;
+	  fftw_real tmp9;
+	  fftw_real tmp27;
+	  fftw_real tmp25;
+	  fftw_real tmp29;
+	  fftw_real tmp22;
+	  fftw_real tmp30;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp6 = c_re(input[istride]);
+	  tmp28 = c_im(input[istride]);
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = c_re(input[4 * istride]);
+	       tmp8 = c_re(input[7 * istride]);
+	       tmp9 = tmp7 + tmp8;
+	       tmp27 = K866025403 * (tmp8 - tmp7);
+	       tmp23 = c_im(input[4 * istride]);
+	       tmp24 = c_im(input[7 * istride]);
+	       tmp25 = K866025403 * (tmp23 - tmp24);
+	       tmp29 = tmp23 + tmp24;
+	  }
+	  tmp10 = tmp6 + tmp9;
+	  tmp22 = tmp6 - (K500000000 * tmp9);
+	  tmp26 = tmp22 + tmp25;
+	  tmp58 = tmp22 - tmp25;
+	  tmp74 = tmp28 + tmp29;
+	  tmp30 = tmp28 - (K500000000 * tmp29);
+	  tmp31 = tmp27 + tmp30;
+	  tmp59 = tmp30 - tmp27;
+     }
+     {
+	  fftw_real tmp11;
+	  fftw_real tmp39;
+	  fftw_real tmp14;
+	  fftw_real tmp38;
+	  fftw_real tmp36;
+	  fftw_real tmp40;
+	  fftw_real tmp33;
+	  fftw_real tmp41;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp11 = c_re(input[2 * istride]);
+	  tmp39 = c_im(input[2 * istride]);
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp13;
+	       fftw_real tmp34;
+	       fftw_real tmp35;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp12 = c_re(input[5 * istride]);
+	       tmp13 = c_re(input[8 * istride]);
+	       tmp14 = tmp12 + tmp13;
+	       tmp38 = K866025403 * (tmp13 - tmp12);
+	       tmp34 = c_im(input[5 * istride]);
+	       tmp35 = c_im(input[8 * istride]);
+	       tmp36 = K866025403 * (tmp34 - tmp35);
+	       tmp40 = tmp34 + tmp35;
+	  }
+	  tmp15 = tmp11 + tmp14;
+	  tmp33 = tmp11 - (K500000000 * tmp14);
+	  tmp37 = tmp33 + tmp36;
+	  tmp62 = tmp33 - tmp36;
+	  tmp75 = tmp39 + tmp40;
+	  tmp41 = tmp39 - (K500000000 * tmp40);
+	  tmp42 = tmp38 + tmp41;
+	  tmp61 = tmp41 - tmp38;
+     }
+     {
+	  fftw_real tmp76;
+	  fftw_real tmp16;
+	  fftw_real tmp73;
+	  fftw_real tmp77;
+	  fftw_real tmp79;
+	  fftw_real tmp80;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp76 = K866025403 * (tmp74 - tmp75);
+	  tmp16 = tmp10 + tmp15;
+	  tmp73 = tmp5 - (K500000000 * tmp16);
+	  c_re(output[0]) = tmp5 + tmp16;
+	  c_re(output[3 * ostride]) = tmp73 + tmp76;
+	  c_re(output[6 * ostride]) = tmp73 - tmp76;
+	  tmp77 = K866025403 * (tmp15 - tmp10);
+	  tmp79 = tmp74 + tmp75;
+	  tmp80 = tmp78 - (K500000000 * tmp79);
+	  c_im(output[3 * ostride]) = tmp77 + tmp80;
+	  c_im(output[6 * ostride]) = tmp80 - tmp77;
+	  c_im(output[0]) = tmp78 + tmp79;
+     }
+     {
+	  fftw_real tmp21;
+	  fftw_real tmp54;
+	  fftw_real tmp44;
+	  fftw_real tmp49;
+	  fftw_real tmp48;
+	  fftw_real tmp55;
+	  fftw_real tmp45;
+	  fftw_real tmp56;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp21 = tmp17 + tmp20;
+	  tmp54 = tmp50 + tmp53;
+	  {
+	       fftw_real tmp32;
+	       fftw_real tmp43;
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp32 = (K766044443 * tmp26) + (K642787609 * tmp31);
+	       tmp43 = (K173648177 * tmp37) + (K984807753 * tmp42);
+	       tmp44 = tmp32 + tmp43;
+	       tmp49 = K866025403 * (tmp43 - tmp32);
+	       tmp46 = (K766044443 * tmp31) - (K642787609 * tmp26);
+	       tmp47 = (K173648177 * tmp42) - (K984807753 * tmp37);
+	       tmp48 = K866025403 * (tmp46 - tmp47);
+	       tmp55 = tmp46 + tmp47;
+	  }
+	  c_re(output[ostride]) = tmp21 + tmp44;
+	  tmp45 = tmp21 - (K500000000 * tmp44);
+	  c_re(output[7 * ostride]) = tmp45 - tmp48;
+	  c_re(output[4 * ostride]) = tmp45 + tmp48;
+	  c_im(output[ostride]) = tmp54 + tmp55;
+	  tmp56 = tmp54 - (K500000000 * tmp55);
+	  c_im(output[4 * ostride]) = tmp49 + tmp56;
+	  c_im(output[7 * ostride]) = tmp56 - tmp49;
+     }
+     {
+	  fftw_real tmp57;
+	  fftw_real tmp69;
+	  fftw_real tmp72;
+	  fftw_real tmp66;
+	  fftw_real tmp64;
+	  fftw_real tmp65;
+	  fftw_real tmp71;
+	  fftw_real tmp70;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp67;
+	       fftw_real tmp68;
+	       fftw_real tmp60;
+	       fftw_real tmp63;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp57 = tmp17 - tmp20;
+	       tmp67 = (K173648177 * tmp59) - (K984807753 * tmp58);
+	       tmp68 = (K342020143 * tmp62) + (K939692620 * tmp61);
+	       tmp69 = tmp67 - tmp68;
+	       tmp72 = K866025403 * (tmp67 + tmp68);
+	       tmp66 = tmp53 - tmp50;
+	       tmp60 = (K173648177 * tmp58) + (K984807753 * tmp59);
+	       tmp63 = (K342020143 * tmp61) - (K939692620 * tmp62);
+	       tmp64 = tmp60 + tmp63;
+	       tmp65 = K866025403 * (tmp63 - tmp60);
+	  }
+	  c_re(output[2 * ostride]) = tmp57 + tmp64;
+	  tmp71 = tmp57 - (K500000000 * tmp64);
+	  c_re(output[8 * ostride]) = tmp71 - tmp72;
+	  c_re(output[5 * ostride]) = tmp71 + tmp72;
+	  c_im(output[2 * ostride]) = tmp66 + tmp69;
+	  tmp70 = tmp66 - (K500000000 * tmp69);
+	  c_im(output[5 * ostride]) = tmp65 + tmp70;
+	  c_im(output[8 * ostride]) = tmp70 - tmp65;
+     }
+}
+
+fftw_codelet_desc fftw_no_twiddle_9_desc = {
+     "fftw_no_twiddle_9",
+     (void (*)()) fftw_no_twiddle_9,
+     9,
+     FFTW_FORWARD,
+     FFTW_NOTW,
+     199,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_1.c b/src/fftw/fni_1.c
new file mode 100644
index 0000000..b06ba0e
--- /dev/null
+++ b/src/fftw/fni_1.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:18 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 1 */
+
+/*
+ * This function contains 0 FP additions, 0 FP multiplications,
+ * (or, 0 additions, 0 multiplications, 0 fused multiply/add),
+ * 2 stack variables, and 4 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: fni_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_1.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_1(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp2;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     c_re(output[0]) = tmp1;
+     tmp2 = c_im(input[0]);
+     c_im(output[0]) = tmp2;
+}
+
+fftw_codelet_desc fftwi_no_twiddle_1_desc = {
+     "fftwi_no_twiddle_1",
+     (void (*)()) fftwi_no_twiddle_1,
+     1,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     34,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_10.c b/src/fftw/fni_10.c
new file mode 100644
index 0000000..b197726
--- /dev/null
+++ b/src/fftw/fni_10.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:28 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 10 */
+
+/*
+ * This function contains 84 FP additions, 24 FP multiplications,
+ * (or, 72 additions, 12 multiplications, 12 fused multiply/add),
+ * 36 stack variables, and 40 memory accesses
+ */
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_10(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp19;
+     fftw_real tmp67;
+     fftw_real tmp76;
+     fftw_real tmp59;
+     fftw_real tmp60;
+     fftw_real tmp10;
+     fftw_real tmp17;
+     fftw_real tmp18;
+     fftw_real tmp74;
+     fftw_real tmp73;
+     fftw_real tmp22;
+     fftw_real tmp25;
+     fftw_real tmp26;
+     fftw_real tmp36;
+     fftw_real tmp43;
+     fftw_real tmp62;
+     fftw_real tmp63;
+     fftw_real tmp68;
+     fftw_real tmp52;
+     fftw_real tmp55;
+     fftw_real tmp77;
+     fftw_real tmp78;
+     fftw_real tmp79;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp65;
+	  fftw_real tmp66;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[5 * istride]);
+	  tmp3 = tmp1 - tmp2;
+	  tmp19 = tmp1 + tmp2;
+	  tmp65 = c_im(input[0]);
+	  tmp66 = c_im(input[5 * istride]);
+	  tmp67 = tmp65 - tmp66;
+	  tmp76 = tmp65 + tmp66;
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp20;
+	  fftw_real tmp16;
+	  fftw_real tmp24;
+	  fftw_real tmp9;
+	  fftw_real tmp21;
+	  fftw_real tmp13;
+	  fftw_real tmp23;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp14;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[2 * istride]);
+	       tmp5 = c_re(input[7 * istride]);
+	       tmp6 = tmp4 - tmp5;
+	       tmp20 = tmp4 + tmp5;
+	       tmp14 = c_re(input[6 * istride]);
+	       tmp15 = c_re(input[istride]);
+	       tmp16 = tmp14 - tmp15;
+	       tmp24 = tmp14 + tmp15;
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = c_re(input[8 * istride]);
+	       tmp8 = c_re(input[3 * istride]);
+	       tmp9 = tmp7 - tmp8;
+	       tmp21 = tmp7 + tmp8;
+	       tmp11 = c_re(input[4 * istride]);
+	       tmp12 = c_re(input[9 * istride]);
+	       tmp13 = tmp11 - tmp12;
+	       tmp23 = tmp11 + tmp12;
+	  }
+	  tmp59 = tmp6 - tmp9;
+	  tmp60 = tmp13 - tmp16;
+	  tmp10 = tmp6 + tmp9;
+	  tmp17 = tmp13 + tmp16;
+	  tmp18 = tmp10 + tmp17;
+	  tmp74 = tmp23 - tmp24;
+	  tmp73 = tmp20 - tmp21;
+	  tmp22 = tmp20 + tmp21;
+	  tmp25 = tmp23 + tmp24;
+	  tmp26 = tmp22 + tmp25;
+     }
+     {
+	  fftw_real tmp32;
+	  fftw_real tmp50;
+	  fftw_real tmp42;
+	  fftw_real tmp54;
+	  fftw_real tmp35;
+	  fftw_real tmp51;
+	  fftw_real tmp39;
+	  fftw_real tmp53;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp30;
+	       fftw_real tmp31;
+	       fftw_real tmp40;
+	       fftw_real tmp41;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp30 = c_im(input[2 * istride]);
+	       tmp31 = c_im(input[7 * istride]);
+	       tmp32 = tmp30 - tmp31;
+	       tmp50 = tmp30 + tmp31;
+	       tmp40 = c_im(input[6 * istride]);
+	       tmp41 = c_im(input[istride]);
+	       tmp42 = tmp40 - tmp41;
+	       tmp54 = tmp40 + tmp41;
+	  }
+	  {
+	       fftw_real tmp33;
+	       fftw_real tmp34;
+	       fftw_real tmp37;
+	       fftw_real tmp38;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp33 = c_im(input[8 * istride]);
+	       tmp34 = c_im(input[3 * istride]);
+	       tmp35 = tmp33 - tmp34;
+	       tmp51 = tmp33 + tmp34;
+	       tmp37 = c_im(input[4 * istride]);
+	       tmp38 = c_im(input[9 * istride]);
+	       tmp39 = tmp37 - tmp38;
+	       tmp53 = tmp37 + tmp38;
+	  }
+	  tmp36 = tmp32 - tmp35;
+	  tmp43 = tmp39 - tmp42;
+	  tmp62 = tmp32 + tmp35;
+	  tmp63 = tmp39 + tmp42;
+	  tmp68 = tmp62 + tmp63;
+	  tmp52 = tmp50 - tmp51;
+	  tmp55 = tmp53 - tmp54;
+	  tmp77 = tmp50 + tmp51;
+	  tmp78 = tmp53 + tmp54;
+	  tmp79 = tmp77 + tmp78;
+     }
+     c_re(output[5 * ostride]) = tmp3 + tmp18;
+     {
+	  fftw_real tmp44;
+	  fftw_real tmp46;
+	  fftw_real tmp29;
+	  fftw_real tmp45;
+	  fftw_real tmp27;
+	  fftw_real tmp28;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp44 = (K587785252 * tmp36) - (K951056516 * tmp43);
+	  tmp46 = (K951056516 * tmp36) + (K587785252 * tmp43);
+	  tmp27 = tmp3 - (K250000000 * tmp18);
+	  tmp28 = K559016994 * (tmp10 - tmp17);
+	  tmp29 = tmp27 - tmp28;
+	  tmp45 = tmp28 + tmp27;
+	  c_re(output[7 * ostride]) = tmp29 - tmp44;
+	  c_re(output[3 * ostride]) = tmp29 + tmp44;
+	  c_re(output[ostride]) = tmp45 - tmp46;
+	  c_re(output[9 * ostride]) = tmp45 + tmp46;
+     }
+     c_re(output[0]) = tmp19 + tmp26;
+     {
+	  fftw_real tmp56;
+	  fftw_real tmp58;
+	  fftw_real tmp49;
+	  fftw_real tmp57;
+	  fftw_real tmp47;
+	  fftw_real tmp48;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp56 = (K587785252 * tmp52) - (K951056516 * tmp55);
+	  tmp58 = (K951056516 * tmp52) + (K587785252 * tmp55);
+	  tmp47 = tmp19 - (K250000000 * tmp26);
+	  tmp48 = K559016994 * (tmp22 - tmp25);
+	  tmp49 = tmp47 - tmp48;
+	  tmp57 = tmp48 + tmp47;
+	  c_re(output[2 * ostride]) = tmp49 - tmp56;
+	  c_re(output[8 * ostride]) = tmp49 + tmp56;
+	  c_re(output[6 * ostride]) = tmp57 - tmp58;
+	  c_re(output[4 * ostride]) = tmp57 + tmp58;
+     }
+     c_im(output[5 * ostride]) = tmp68 + tmp67;
+     {
+	  fftw_real tmp61;
+	  fftw_real tmp72;
+	  fftw_real tmp70;
+	  fftw_real tmp71;
+	  fftw_real tmp64;
+	  fftw_real tmp69;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp61 = (K951056516 * tmp59) + (K587785252 * tmp60);
+	  tmp72 = (K587785252 * tmp59) - (K951056516 * tmp60);
+	  tmp64 = K559016994 * (tmp62 - tmp63);
+	  tmp69 = tmp67 - (K250000000 * tmp68);
+	  tmp70 = tmp64 + tmp69;
+	  tmp71 = tmp69 - tmp64;
+	  c_im(output[ostride]) = tmp61 + tmp70;
+	  c_im(output[9 * ostride]) = tmp70 - tmp61;
+	  c_im(output[3 * ostride]) = tmp71 - tmp72;
+	  c_im(output[7 * ostride]) = tmp72 + tmp71;
+     }
+     c_im(output[0]) = tmp79 + tmp76;
+     {
+	  fftw_real tmp75;
+	  fftw_real tmp84;
+	  fftw_real tmp82;
+	  fftw_real tmp83;
+	  fftw_real tmp80;
+	  fftw_real tmp81;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp75 = (K587785252 * tmp73) - (K951056516 * tmp74);
+	  tmp84 = (K951056516 * tmp73) + (K587785252 * tmp74);
+	  tmp80 = tmp76 - (K250000000 * tmp79);
+	  tmp81 = K559016994 * (tmp77 - tmp78);
+	  tmp82 = tmp80 - tmp81;
+	  tmp83 = tmp81 + tmp80;
+	  c_im(output[2 * ostride]) = tmp75 + tmp82;
+	  c_im(output[8 * ostride]) = tmp82 - tmp75;
+	  c_im(output[4 * ostride]) = tmp83 - tmp84;
+	  c_im(output[6 * ostride]) = tmp84 + tmp83;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_10_desc = {
+     "fftwi_no_twiddle_10",
+     (void (*)()) fftwi_no_twiddle_10,
+     10,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     232,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_11.c b/src/fftw/fni_11.c
new file mode 100644
index 0000000..89dcbbc
--- /dev/null
+++ b/src/fftw/fni_11.c
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:29 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 11 */
+
+/*
+ * This function contains 140 FP additions, 100 FP multiplications,
+ * (or, 140 additions, 100 multiplications, 0 fused multiply/add),
+ * 30 stack variables, and 44 memory accesses
+ */
+static const fftw_real K959492973 =
+FFTW_KONST(+0.959492973614497389890368057066327699062454848);
+static const fftw_real K654860733 =
+FFTW_KONST(+0.654860733945285064056925072466293553183791199);
+static const fftw_real K142314838 =
+FFTW_KONST(+0.142314838273285140443792668616369668791051361);
+static const fftw_real K415415013 =
+FFTW_KONST(+0.415415013001886425529274149229623203524004910);
+static const fftw_real K841253532 =
+FFTW_KONST(+0.841253532831181168861811648919367717513292498);
+static const fftw_real K540640817 =
+FFTW_KONST(+0.540640817455597582107635954318691695431770608);
+static const fftw_real K909631995 =
+FFTW_KONST(+0.909631995354518371411715383079028460060241051);
+static const fftw_real K281732556 =
+FFTW_KONST(+0.281732556841429697711417915346616899035777899);
+static const fftw_real K755749574 =
+FFTW_KONST(+0.755749574354258283774035843972344420179717445);
+static const fftw_real K989821441 =
+FFTW_KONST(+0.989821441880932732376092037776718787376519372);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_11.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_11(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp23;
+     fftw_real tmp4;
+     fftw_real tmp17;
+     fftw_real tmp32;
+     fftw_real tmp51;
+     fftw_real tmp38;
+     fftw_real tmp53;
+     fftw_real tmp7;
+     fftw_real tmp21;
+     fftw_real tmp10;
+     fftw_real tmp18;
+     fftw_real tmp29;
+     fftw_real tmp50;
+     fftw_real tmp13;
+     fftw_real tmp19;
+     fftw_real tmp35;
+     fftw_real tmp49;
+     fftw_real tmp26;
+     fftw_real tmp52;
+     fftw_real tmp16;
+     fftw_real tmp20;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp23 = c_im(input[0]);
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[10 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp17 = tmp2 - tmp3;
+	  tmp30 = c_im(input[istride]);
+	  tmp31 = c_im(input[10 * istride]);
+	  tmp32 = tmp30 + tmp31;
+	  tmp51 = tmp31 - tmp30;
+	  {
+	       fftw_real tmp36;
+	       fftw_real tmp37;
+	       fftw_real tmp5;
+	       fftw_real tmp6;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp36 = c_im(input[2 * istride]);
+	       tmp37 = c_im(input[9 * istride]);
+	       tmp38 = tmp36 + tmp37;
+	       tmp53 = tmp37 - tmp36;
+	       tmp5 = c_re(input[2 * istride]);
+	       tmp6 = c_re(input[9 * istride]);
+	       tmp7 = tmp5 + tmp6;
+	       tmp21 = tmp5 - tmp6;
+	  }
+     }
+     {
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  fftw_real tmp33;
+	  fftw_real tmp34;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp8 = c_re(input[3 * istride]);
+	  tmp9 = c_re(input[8 * istride]);
+	  tmp10 = tmp8 + tmp9;
+	  tmp18 = tmp8 - tmp9;
+	  {
+	       fftw_real tmp27;
+	       fftw_real tmp28;
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp27 = c_im(input[3 * istride]);
+	       tmp28 = c_im(input[8 * istride]);
+	       tmp29 = tmp27 + tmp28;
+	       tmp50 = tmp28 - tmp27;
+	       tmp11 = c_re(input[4 * istride]);
+	       tmp12 = c_re(input[7 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp19 = tmp11 - tmp12;
+	  }
+	  tmp33 = c_im(input[4 * istride]);
+	  tmp34 = c_im(input[7 * istride]);
+	  tmp35 = tmp33 + tmp34;
+	  tmp49 = tmp34 - tmp33;
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp25;
+	       fftw_real tmp14;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp24 = c_im(input[5 * istride]);
+	       tmp25 = c_im(input[6 * istride]);
+	       tmp26 = tmp24 + tmp25;
+	       tmp52 = tmp25 - tmp24;
+	       tmp14 = c_re(input[5 * istride]);
+	       tmp15 = c_re(input[6 * istride]);
+	       tmp16 = tmp14 + tmp15;
+	       tmp20 = tmp14 - tmp15;
+	  }
+     }
+     {
+	  fftw_real tmp56;
+	  fftw_real tmp55;
+	  fftw_real tmp44;
+	  fftw_real tmp45;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10 + tmp13 + tmp16;
+	  {
+	       fftw_real tmp62;
+	       fftw_real tmp61;
+	       fftw_real tmp58;
+	       fftw_real tmp57;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp62 =
+		   (K989821441 * tmp52) + (K755749574 * tmp50) +
+		   (K281732556 * tmp51) - (K909631995 * tmp49) -
+		   (K540640817 * tmp53);
+	       tmp61 =
+		   tmp1 + (K841253532 * tmp7) + (K415415013 * tmp13) -
+		   (K142314838 * tmp16) - (K654860733 * tmp10) -
+		   (K959492973 * tmp4);
+	       c_re(output[6 * ostride]) = tmp61 - tmp62;
+	       c_re(output[5 * ostride]) = tmp61 + tmp62;
+	       tmp58 =
+		   (K909631995 * tmp53) + (K755749574 * tmp49) +
+		   (K281732556 * tmp52) + (K989821441 * tmp50) +
+		   (K540640817 * tmp51);
+	       tmp57 =
+		   tmp1 + (K841253532 * tmp4) + (K415415013 * tmp7) -
+		   (K959492973 * tmp16) - (K654860733 * tmp13) -
+		   (K142314838 * tmp10);
+	       c_re(output[10 * ostride]) = tmp57 - tmp58;
+	       c_re(output[ostride]) = tmp57 + tmp58;
+	  }
+	  tmp56 =
+	      (K755749574 * tmp53) + (K909631995 * tmp51) -
+	      (K281732556 * tmp50) - (K540640817 * tmp52) -
+	      (K989821441 * tmp49);
+	  tmp55 =
+	      tmp1 + (K415415013 * tmp4) + (K841253532 * tmp16) -
+	      (K142314838 * tmp13) - (K959492973 * tmp10) -
+	      (K654860733 * tmp7);
+	  c_re(output[9 * ostride]) = tmp55 - tmp56;
+	  c_re(output[2 * ostride]) = tmp55 + tmp56;
+	  {
+	       fftw_real tmp60;
+	       fftw_real tmp59;
+	       fftw_real tmp54;
+	       fftw_real tmp48;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp60 =
+		   (K540640817 * tmp49) + (K755749574 * tmp52) +
+		   (K989821441 * tmp51) - (K909631995 * tmp50) -
+		   (K281732556 * tmp53);
+	       tmp59 =
+		   tmp1 + (K415415013 * tmp10) + (K841253532 * tmp13) -
+		   (K654860733 * tmp16) - (K959492973 * tmp7) -
+		   (K142314838 * tmp4);
+	       c_re(output[8 * ostride]) = tmp59 - tmp60;
+	       c_re(output[3 * ostride]) = tmp59 + tmp60;
+	       tmp54 =
+		   (K281732556 * tmp49) + (K540640817 * tmp50) +
+		   (K755749574 * tmp51) - (K909631995 * tmp52) -
+		   (K989821441 * tmp53);
+	       tmp48 =
+		   tmp1 + (K841253532 * tmp10) + (K415415013 * tmp16) -
+		   (K959492973 * tmp13) - (K142314838 * tmp7) -
+		   (K654860733 * tmp4);
+	       c_re(output[7 * ostride]) = tmp48 - tmp54;
+	       c_re(output[4 * ostride]) = tmp48 + tmp54;
+	  }
+	  c_im(output[0]) = tmp23 + tmp38 + tmp35 + tmp26 + tmp29 + tmp32;
+	  {
+	       fftw_real tmp22;
+	       fftw_real tmp39;
+	       fftw_real tmp42;
+	       fftw_real tmp43;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp22 =
+		   (K755749574 * tmp17) + (K540640817 * tmp18) +
+		   (K281732556 * tmp19) - (K909631995 * tmp20) -
+		   (K989821441 * tmp21);
+	       tmp39 =
+		   tmp23 + (K415415013 * tmp26) + (K841253532 * tmp29) -
+		   (K654860733 * tmp32) - (K959492973 * tmp35) -
+		   (K142314838 * tmp38);
+	       c_im(output[4 * ostride]) = tmp22 + tmp39;
+	       c_im(output[7 * ostride]) = tmp39 - tmp22;
+	       tmp42 =
+		   (K281732556 * tmp17) + (K755749574 * tmp18) +
+		   (K989821441 * tmp20) - (K909631995 * tmp19) -
+		   (K540640817 * tmp21);
+	       tmp43 =
+		   tmp23 + (K841253532 * tmp38) + (K415415013 * tmp35) -
+		   (K959492973 * tmp32) - (K654860733 * tmp29) -
+		   (K142314838 * tmp26);
+	       c_im(output[5 * ostride]) = tmp42 + tmp43;
+	       c_im(output[6 * ostride]) = tmp43 - tmp42;
+	  }
+	  tmp44 =
+	      (K540640817 * tmp17) + (K909631995 * tmp21) +
+	      (K989821441 * tmp18) + (K755749574 * tmp19) +
+	      (K281732556 * tmp20);
+	  tmp45 =
+	      tmp23 + (K415415013 * tmp38) + (K841253532 * tmp32) -
+	      (K142314838 * tmp29) - (K959492973 * tmp26) -
+	      (K654860733 * tmp35);
+	  c_im(output[ostride]) = tmp44 + tmp45;
+	  c_im(output[10 * ostride]) = tmp45 - tmp44;
+	  {
+	       fftw_real tmp40;
+	       fftw_real tmp41;
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp40 =
+		   (K989821441 * tmp17) + (K540640817 * tmp19) +
+		   (K755749574 * tmp20) - (K909631995 * tmp18) -
+		   (K281732556 * tmp21);
+	       tmp41 =
+		   tmp23 + (K841253532 * tmp35) + (K415415013 * tmp29) -
+		   (K142314838 * tmp32) - (K654860733 * tmp26) -
+		   (K959492973 * tmp38);
+	       c_im(output[3 * ostride]) = tmp40 + tmp41;
+	       c_im(output[8 * ostride]) = tmp41 - tmp40;
+	       tmp46 =
+		   (K909631995 * tmp17) + (K755749574 * tmp21) -
+		   (K540640817 * tmp20) - (K989821441 * tmp19) -
+		   (K281732556 * tmp18);
+	       tmp47 =
+		   tmp23 + (K841253532 * tmp26) + (K415415013 * tmp32) -
+		   (K959492973 * tmp29) - (K142314838 * tmp35) -
+		   (K654860733 * tmp38);
+	       c_im(output[2 * ostride]) = tmp46 + tmp47;
+	       c_im(output[9 * ostride]) = tmp47 - tmp46;
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_11_desc = {
+     "fftwi_no_twiddle_11",
+     (void (*)()) fftwi_no_twiddle_11,
+     11,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     254,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_12.c b/src/fftw/fni_12.c
new file mode 100644
index 0000000..889391b
--- /dev/null
+++ b/src/fftw/fni_12.c
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:36 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 12 */
+
+/*
+ * This function contains 96 FP additions, 16 FP multiplications,
+ * (or, 88 additions, 8 multiplications, 8 fused multiply/add),
+ * 40 stack variables, and 48 memory accesses
+ */
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_12.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_12(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp5;
+     fftw_real tmp35;
+     fftw_real tmp57;
+     fftw_real tmp27;
+     fftw_real tmp58;
+     fftw_real tmp36;
+     fftw_real tmp10;
+     fftw_real tmp38;
+     fftw_real tmp60;
+     fftw_real tmp32;
+     fftw_real tmp61;
+     fftw_real tmp39;
+     fftw_real tmp16;
+     fftw_real tmp82;
+     fftw_real tmp42;
+     fftw_real tmp47;
+     fftw_real tmp76;
+     fftw_real tmp83;
+     fftw_real tmp21;
+     fftw_real tmp85;
+     fftw_real tmp49;
+     fftw_real tmp54;
+     fftw_real tmp77;
+     fftw_real tmp86;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp4;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[4 * istride]);
+	  tmp3 = c_re(input[8 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = tmp1 + tmp4;
+	  tmp35 = tmp1 - (K500000000 * tmp4);
+	  tmp57 = K866025403 * (tmp2 - tmp3);
+     }
+     {
+	  fftw_real tmp23;
+	  fftw_real tmp24;
+	  fftw_real tmp25;
+	  fftw_real tmp26;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp23 = c_im(input[0]);
+	  tmp24 = c_im(input[4 * istride]);
+	  tmp25 = c_im(input[8 * istride]);
+	  tmp26 = tmp24 + tmp25;
+	  tmp27 = tmp23 + tmp26;
+	  tmp58 = tmp23 - (K500000000 * tmp26);
+	  tmp36 = K866025403 * (tmp25 - tmp24);
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp6 = c_re(input[6 * istride]);
+	  tmp7 = c_re(input[10 * istride]);
+	  tmp8 = c_re(input[2 * istride]);
+	  tmp9 = tmp7 + tmp8;
+	  tmp10 = tmp6 + tmp9;
+	  tmp38 = tmp6 - (K500000000 * tmp9);
+	  tmp60 = K866025403 * (tmp7 - tmp8);
+     }
+     {
+	  fftw_real tmp28;
+	  fftw_real tmp29;
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp28 = c_im(input[6 * istride]);
+	  tmp29 = c_im(input[10 * istride]);
+	  tmp30 = c_im(input[2 * istride]);
+	  tmp31 = tmp29 + tmp30;
+	  tmp32 = tmp28 + tmp31;
+	  tmp61 = tmp28 - (K500000000 * tmp31);
+	  tmp39 = K866025403 * (tmp30 - tmp29);
+     }
+     {
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  fftw_real tmp14;
+	  fftw_real tmp15;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp12 = c_re(input[3 * istride]);
+	  tmp13 = c_re(input[7 * istride]);
+	  tmp14 = c_re(input[11 * istride]);
+	  tmp15 = tmp13 + tmp14;
+	  tmp16 = tmp12 + tmp15;
+	  tmp82 = tmp12 - (K500000000 * tmp15);
+	  tmp42 = K866025403 * (tmp13 - tmp14);
+     }
+     {
+	  fftw_real tmp43;
+	  fftw_real tmp44;
+	  fftw_real tmp45;
+	  fftw_real tmp46;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp43 = c_im(input[3 * istride]);
+	  tmp44 = c_im(input[7 * istride]);
+	  tmp45 = c_im(input[11 * istride]);
+	  tmp46 = tmp44 + tmp45;
+	  tmp47 = tmp43 - (K500000000 * tmp46);
+	  tmp76 = tmp43 + tmp46;
+	  tmp83 = K866025403 * (tmp45 - tmp44);
+     }
+     {
+	  fftw_real tmp17;
+	  fftw_real tmp18;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp17 = c_re(input[9 * istride]);
+	  tmp18 = c_re(input[istride]);
+	  tmp19 = c_re(input[5 * istride]);
+	  tmp20 = tmp18 + tmp19;
+	  tmp21 = tmp17 + tmp20;
+	  tmp85 = tmp17 - (K500000000 * tmp20);
+	  tmp49 = K866025403 * (tmp18 - tmp19);
+     }
+     {
+	  fftw_real tmp50;
+	  fftw_real tmp51;
+	  fftw_real tmp52;
+	  fftw_real tmp53;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp50 = c_im(input[9 * istride]);
+	  tmp51 = c_im(input[istride]);
+	  tmp52 = c_im(input[5 * istride]);
+	  tmp53 = tmp51 + tmp52;
+	  tmp54 = tmp50 - (K500000000 * tmp53);
+	  tmp77 = tmp50 + tmp53;
+	  tmp86 = K866025403 * (tmp52 - tmp51);
+     }
+     {
+	  fftw_real tmp11;
+	  fftw_real tmp22;
+	  fftw_real tmp33;
+	  fftw_real tmp34;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp11 = tmp5 + tmp10;
+	  tmp22 = tmp16 + tmp21;
+	  c_re(output[6 * ostride]) = tmp11 - tmp22;
+	  c_re(output[0]) = tmp11 + tmp22;
+	  {
+	       fftw_real tmp75;
+	       fftw_real tmp78;
+	       fftw_real tmp79;
+	       fftw_real tmp80;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp75 = tmp5 - tmp10;
+	       tmp78 = tmp76 - tmp77;
+	       c_re(output[9 * ostride]) = tmp75 - tmp78;
+	       c_re(output[3 * ostride]) = tmp75 + tmp78;
+	       tmp79 = tmp27 + tmp32;
+	       tmp80 = tmp76 + tmp77;
+	       c_im(output[6 * ostride]) = tmp79 - tmp80;
+	       c_im(output[0]) = tmp79 + tmp80;
+	  }
+	  tmp33 = tmp27 - tmp32;
+	  tmp34 = tmp16 - tmp21;
+	  c_im(output[3 * ostride]) = tmp33 - tmp34;
+	  c_im(output[9 * ostride]) = tmp34 + tmp33;
+	  {
+	       fftw_real tmp67;
+	       fftw_real tmp89;
+	       fftw_real tmp88;
+	       fftw_real tmp90;
+	       fftw_real tmp70;
+	       fftw_real tmp74;
+	       fftw_real tmp73;
+	       fftw_real tmp81;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp65;
+		    fftw_real tmp66;
+		    fftw_real tmp84;
+		    fftw_real tmp87;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp65 = tmp35 - tmp36;
+		    tmp66 = tmp38 - tmp39;
+		    tmp67 = tmp65 - tmp66;
+		    tmp89 = tmp65 + tmp66;
+		    tmp84 = tmp82 - tmp83;
+		    tmp87 = tmp85 - tmp86;
+		    tmp88 = tmp84 - tmp87;
+		    tmp90 = tmp84 + tmp87;
+	       }
+	       {
+		    fftw_real tmp68;
+		    fftw_real tmp69;
+		    fftw_real tmp71;
+		    fftw_real tmp72;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp68 = tmp47 - tmp42;
+		    tmp69 = tmp54 - tmp49;
+		    tmp70 = tmp68 - tmp69;
+		    tmp74 = tmp68 + tmp69;
+		    tmp71 = tmp58 - tmp57;
+		    tmp72 = tmp61 - tmp60;
+		    tmp73 = tmp71 + tmp72;
+		    tmp81 = tmp71 - tmp72;
+	       }
+	       c_re(output[5 * ostride]) = tmp67 - tmp70;
+	       c_re(output[11 * ostride]) = tmp67 + tmp70;
+	       c_im(output[2 * ostride]) = tmp73 - tmp74;
+	       c_im(output[8 * ostride]) = tmp73 + tmp74;
+	       c_im(output[11 * ostride]) = tmp81 - tmp88;
+	       c_im(output[5 * ostride]) = tmp81 + tmp88;
+	       c_re(output[2 * ostride]) = tmp89 - tmp90;
+	       c_re(output[8 * ostride]) = tmp89 + tmp90;
+	  }
+	  {
+	       fftw_real tmp41;
+	       fftw_real tmp95;
+	       fftw_real tmp94;
+	       fftw_real tmp96;
+	       fftw_real tmp56;
+	       fftw_real tmp64;
+	       fftw_real tmp63;
+	       fftw_real tmp91;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp40;
+		    fftw_real tmp92;
+		    fftw_real tmp93;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = tmp35 + tmp36;
+		    tmp40 = tmp38 + tmp39;
+		    tmp41 = tmp37 - tmp40;
+		    tmp95 = tmp37 + tmp40;
+		    tmp92 = tmp82 + tmp83;
+		    tmp93 = tmp85 + tmp86;
+		    tmp94 = tmp92 - tmp93;
+		    tmp96 = tmp92 + tmp93;
+	       }
+	       {
+		    fftw_real tmp48;
+		    fftw_real tmp55;
+		    fftw_real tmp59;
+		    fftw_real tmp62;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp48 = tmp42 + tmp47;
+		    tmp55 = tmp49 + tmp54;
+		    tmp56 = tmp48 - tmp55;
+		    tmp64 = tmp48 + tmp55;
+		    tmp59 = tmp57 + tmp58;
+		    tmp62 = tmp60 + tmp61;
+		    tmp63 = tmp59 + tmp62;
+		    tmp91 = tmp59 - tmp62;
+	       }
+	       c_re(output[ostride]) = tmp41 - tmp56;
+	       c_re(output[7 * ostride]) = tmp41 + tmp56;
+	       c_im(output[10 * ostride]) = tmp63 - tmp64;
+	       c_im(output[4 * ostride]) = tmp63 + tmp64;
+	       c_im(output[7 * ostride]) = tmp91 - tmp94;
+	       c_im(output[ostride]) = tmp91 + tmp94;
+	       c_re(output[10 * ostride]) = tmp95 - tmp96;
+	       c_re(output[4 * ostride]) = tmp95 + tmp96;
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_12_desc = {
+     "fftwi_no_twiddle_12",
+     (void (*)()) fftwi_no_twiddle_12,
+     12,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     276,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_13.c b/src/fftw/fni_13.c
new file mode 100644
index 0000000..424a6c3
--- /dev/null
+++ b/src/fftw/fni_13.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:37 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 13 */
+
+/*
+ * This function contains 176 FP additions, 68 FP multiplications,
+ * (or, 138 additions, 30 multiplications, 38 fused multiply/add),
+ * 50 stack variables, and 52 memory accesses
+ */
+static const fftw_real K1_732050807 =
+FFTW_KONST(+1.732050807568877293527446341505872366942805254);
+static const fftw_real K156891391 =
+FFTW_KONST(+0.156891391051584611046832726756003269660212636);
+static const fftw_real K256247671 =
+FFTW_KONST(+0.256247671582936600958684654061725059144125175);
+static const fftw_real K300238635 =
+FFTW_KONST(+0.300238635966332641462884626667381504676006424);
+static const fftw_real K011599105 =
+FFTW_KONST(+0.011599105605768290721655456654083252189827041);
+static const fftw_real K174138601 =
+FFTW_KONST(+0.174138601152135905005660794929264742616964676);
+static const fftw_real K575140729 =
+FFTW_KONST(+0.575140729474003121368385547455453388461001608);
+static const fftw_real K2_000000000 =
+FFTW_KONST(+2.000000000000000000000000000000000000000000000);
+static const fftw_real K083333333 =
+FFTW_KONST(+0.083333333333333333333333333333333333333333333);
+static const fftw_real K075902986 =
+FFTW_KONST(+0.075902986037193865983102897245103540356428373);
+static const fftw_real K251768516 =
+FFTW_KONST(+0.251768516431883313623436926934233488546674281);
+static const fftw_real K258260390 =
+FFTW_KONST(+0.258260390311744861420450644284508567852516811);
+static const fftw_real K132983124 =
+FFTW_KONST(+0.132983124607418643793760531921092974399165133);
+static const fftw_real K503537032 =
+FFTW_KONST(+0.503537032863766627246873853868466977093348562);
+static const fftw_real K113854479 =
+FFTW_KONST(+0.113854479055790798974654345867655310534642560);
+static const fftw_real K265966249 =
+FFTW_KONST(+0.265966249214837287587521063842185948798330267);
+static const fftw_real K387390585 =
+FFTW_KONST(+0.387390585467617292130675966426762851778775217);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K300462606 =
+FFTW_KONST(+0.300462606288665774426601772289207995520941381);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_13.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_13(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp146;
+     fftw_real tmp113;
+     fftw_real tmp24;
+     fftw_real tmp38;
+     fftw_real tmp36;
+     fftw_real tmp41;
+     fftw_real tmp116;
+     fftw_real tmp120;
+     fftw_real tmp125;
+     fftw_real tmp31;
+     fftw_real tmp40;
+     fftw_real tmp123;
+     fftw_real tmp126;
+     fftw_real tmp56;
+     fftw_real tmp80;
+     fftw_real tmp82;
+     fftw_real tmp137;
+     fftw_real tmp144;
+     fftw_real tmp67;
+     fftw_real tmp141;
+     fftw_real tmp147;
+     fftw_real tmp134;
+     fftw_real tmp143;
+     fftw_real tmp75;
+     fftw_real tmp83;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp146 = c_im(input[0]);
+     {
+	  fftw_real tmp15;
+	  fftw_real tmp114;
+	  fftw_real tmp18;
+	  fftw_real tmp26;
+	  fftw_real tmp21;
+	  fftw_real tmp25;
+	  fftw_real tmp22;
+	  fftw_real tmp115;
+	  fftw_real tmp6;
+	  fftw_real tmp32;
+	  fftw_real tmp28;
+	  fftw_real tmp11;
+	  fftw_real tmp33;
+	  fftw_real tmp29;
+	  fftw_real tmp13;
+	  fftw_real tmp14;
+	  fftw_real tmp118;
+	  fftw_real tmp119;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp13 = c_re(input[8 * istride]);
+	  tmp14 = c_re(input[5 * istride]);
+	  tmp15 = tmp13 + tmp14;
+	  tmp114 = tmp13 - tmp14;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[6 * istride]);
+	       tmp17 = c_re(input[11 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp26 = tmp16 - tmp17;
+	       tmp19 = c_re(input[2 * istride]);
+	       tmp20 = c_re(input[7 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp25 = tmp19 - tmp20;
+	  }
+	  tmp22 = tmp18 + tmp21;
+	  tmp115 = tmp26 + tmp25;
+	  {
+	       fftw_real tmp2;
+	       fftw_real tmp3;
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp2 = c_re(input[istride]);
+	       tmp3 = c_re(input[3 * istride]);
+	       tmp4 = c_re(input[9 * istride]);
+	       tmp5 = tmp3 + tmp4;
+	       tmp6 = tmp2 + tmp5;
+	       tmp32 = tmp2 - (K500000000 * tmp5);
+	       tmp28 = tmp3 - tmp4;
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp10;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = c_re(input[12 * istride]);
+	       tmp8 = c_re(input[4 * istride]);
+	       tmp9 = c_re(input[10 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp11 = tmp7 + tmp10;
+	       tmp33 = tmp7 - (K500000000 * tmp10);
+	       tmp29 = tmp8 - tmp9;
+	  }
+	  tmp113 = tmp6 - tmp11;
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp23;
+	       fftw_real tmp34;
+	       fftw_real tmp35;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp12 = tmp6 + tmp11;
+	       tmp23 = tmp15 + tmp22;
+	       tmp24 = tmp12 + tmp23;
+	       tmp38 = K300462606 * (tmp12 - tmp23);
+	       tmp34 = tmp32 + tmp33;
+	       tmp35 = tmp15 - (K500000000 * tmp22);
+	       tmp36 = tmp34 - tmp35;
+	       tmp41 = tmp34 + tmp35;
+	  }
+	  tmp116 = tmp114 - tmp115;
+	  tmp118 = K866025403 * (tmp28 + tmp29);
+	  tmp119 = tmp114 + (K500000000 * tmp115);
+	  tmp120 = tmp118 + tmp119;
+	  tmp125 = tmp119 - tmp118;
+	  {
+	       fftw_real tmp27;
+	       fftw_real tmp30;
+	       fftw_real tmp121;
+	       fftw_real tmp122;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp27 = tmp25 - tmp26;
+	       tmp30 = tmp28 - tmp29;
+	       tmp31 = tmp27 - tmp30;
+	       tmp40 = tmp30 + tmp27;
+	       tmp121 = tmp32 - tmp33;
+	       tmp122 = K866025403 * (tmp18 - tmp21);
+	       tmp123 = tmp121 - tmp122;
+	       tmp126 = tmp121 + tmp122;
+	  }
+     }
+     {
+	  fftw_real tmp48;
+	  fftw_real tmp131;
+	  fftw_real tmp66;
+	  fftw_real tmp70;
+	  fftw_real tmp77;
+	  fftw_real tmp61;
+	  fftw_real tmp69;
+	  fftw_real tmp76;
+	  fftw_real tmp51;
+	  fftw_real tmp73;
+	  fftw_real tmp54;
+	  fftw_real tmp72;
+	  fftw_real tmp55;
+	  fftw_real tmp132;
+	  fftw_real tmp46;
+	  fftw_real tmp47;
+	  fftw_real tmp139;
+	  fftw_real tmp140;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp46 = c_im(input[8 * istride]);
+	  tmp47 = c_im(input[5 * istride]);
+	  tmp48 = tmp46 - tmp47;
+	  tmp131 = tmp46 + tmp47;
+	  {
+	       fftw_real tmp62;
+	       fftw_real tmp63;
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp62 = c_im(input[12 * istride]);
+	       tmp63 = c_im(input[4 * istride]);
+	       tmp64 = c_im(input[10 * istride]);
+	       tmp65 = tmp63 + tmp64;
+	       tmp66 = tmp62 + tmp65;
+	       tmp70 = tmp62 - (K500000000 * tmp65);
+	       tmp77 = tmp63 - tmp64;
+	  }
+	  {
+	       fftw_real tmp57;
+	       fftw_real tmp58;
+	       fftw_real tmp59;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp57 = c_im(input[istride]);
+	       tmp58 = c_im(input[3 * istride]);
+	       tmp59 = c_im(input[9 * istride]);
+	       tmp60 = tmp58 + tmp59;
+	       tmp61 = tmp57 + tmp60;
+	       tmp69 = tmp57 - (K500000000 * tmp60);
+	       tmp76 = tmp58 - tmp59;
+	  }
+	  {
+	       fftw_real tmp49;
+	       fftw_real tmp50;
+	       fftw_real tmp52;
+	       fftw_real tmp53;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp49 = c_im(input[6 * istride]);
+	       tmp50 = c_im(input[11 * istride]);
+	       tmp51 = tmp49 - tmp50;
+	       tmp73 = tmp49 + tmp50;
+	       tmp52 = c_im(input[2 * istride]);
+	       tmp53 = c_im(input[7 * istride]);
+	       tmp54 = tmp52 - tmp53;
+	       tmp72 = tmp52 + tmp53;
+	  }
+	  tmp55 = tmp51 + tmp54;
+	  tmp132 = tmp73 + tmp72;
+	  tmp56 = tmp48 - tmp55;
+	  {
+	       fftw_real tmp78;
+	       fftw_real tmp79;
+	       fftw_real tmp135;
+	       fftw_real tmp136;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp78 = K866025403 * (tmp76 + tmp77);
+	       tmp79 = tmp48 + (K500000000 * tmp55);
+	       tmp80 = tmp78 - tmp79;
+	       tmp82 = tmp78 + tmp79;
+	       tmp135 = tmp51 - tmp54;
+	       tmp136 = tmp77 - tmp76;
+	       tmp137 = tmp135 - tmp136;
+	       tmp144 = tmp136 + tmp135;
+	  }
+	  tmp67 = tmp61 - tmp66;
+	  tmp139 = tmp61 + tmp66;
+	  tmp140 = tmp131 + tmp132;
+	  tmp141 = K300462606 * (tmp139 - tmp140);
+	  tmp147 = tmp139 + tmp140;
+	  {
+	       fftw_real tmp130;
+	       fftw_real tmp133;
+	       fftw_real tmp71;
+	       fftw_real tmp74;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp130 = tmp69 + tmp70;
+	       tmp133 = tmp131 - (K500000000 * tmp132);
+	       tmp134 = tmp130 - tmp133;
+	       tmp143 = tmp130 + tmp133;
+	       tmp71 = tmp69 - tmp70;
+	       tmp74 = K866025403 * (tmp72 - tmp73);
+	       tmp75 = tmp71 - tmp74;
+	       tmp83 = tmp71 + tmp74;
+	  }
+     }
+     c_re(output[0]) = tmp1 + tmp24;
+     {
+	  fftw_real tmp100;
+	  fftw_real tmp108;
+	  fftw_real tmp39;
+	  fftw_real tmp105;
+	  fftw_real tmp95;
+	  fftw_real tmp44;
+	  fftw_real tmp68;
+	  fftw_real tmp85;
+	  fftw_real tmp96;
+	  fftw_real tmp106;
+	  fftw_real tmp88;
+	  fftw_real tmp91;
+	  fftw_real tmp101;
+	  fftw_real tmp109;
+	  fftw_real tmp98;
+	  fftw_real tmp99;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp98 = (K387390585 * tmp31) - (K265966249 * tmp36);
+	  tmp99 = (K113854479 * tmp40) - (K503537032 * tmp41);
+	  tmp100 = tmp98 + tmp99;
+	  tmp108 = tmp99 - tmp98;
+	  {
+	       fftw_real tmp37;
+	       fftw_real tmp94;
+	       fftw_real tmp42;
+	       fftw_real tmp43;
+	       fftw_real tmp93;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp37 = (K132983124 * tmp31) + (K258260390 * tmp36);
+	       tmp94 = tmp38 - tmp37;
+	       tmp42 = (K251768516 * tmp40) + (K075902986 * tmp41);
+	       tmp43 = tmp1 - (K083333333 * tmp24);
+	       tmp93 = tmp43 - tmp42;
+	       tmp39 = (K2_000000000 * tmp37) + tmp38;
+	       tmp105 = tmp94 + tmp93;
+	       tmp95 = tmp93 - tmp94;
+	       tmp44 = (K2_000000000 * tmp42) + tmp43;
+	  }
+	  {
+	       fftw_real tmp81;
+	       fftw_real tmp84;
+	       fftw_real tmp89;
+	       fftw_real tmp90;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp68 = (K575140729 * tmp56) - (K174138601 * tmp67);
+	       tmp81 = (K011599105 * tmp75) - (K300238635 * tmp80);
+	       tmp84 = (K256247671 * tmp82) + (K156891391 * tmp83);
+	       tmp85 = tmp81 - tmp84;
+	       tmp96 = K1_732050807 * (tmp81 + tmp84);
+	       tmp106 = tmp68 - tmp85;
+	       tmp88 = (K575140729 * tmp67) + (K174138601 * tmp56);
+	       tmp89 = (K256247671 * tmp83) - (K156891391 * tmp82);
+	       tmp90 = (K011599105 * tmp80) + (K300238635 * tmp75);
+	       tmp91 = tmp89 - tmp90;
+	       tmp101 = tmp88 + tmp91;
+	       tmp109 = K1_732050807 * (tmp90 + tmp89);
+	  }
+	  {
+	       fftw_real tmp45;
+	       fftw_real tmp86;
+	       fftw_real tmp103;
+	       fftw_real tmp104;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp45 = tmp39 + tmp44;
+	       tmp86 = tmp68 + (K2_000000000 * tmp85);
+	       c_re(output[12 * ostride]) = tmp45 - tmp86;
+	       c_re(output[ostride]) = tmp45 + tmp86;
+	       {
+		    fftw_real tmp87;
+		    fftw_real tmp92;
+		    fftw_real tmp97;
+		    fftw_real tmp102;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp87 = tmp44 - tmp39;
+		    tmp92 = tmp88 - (K2_000000000 * tmp91);
+		    c_re(output[5 * ostride]) = tmp87 - tmp92;
+		    c_re(output[8 * ostride]) = tmp87 + tmp92;
+		    tmp97 = tmp95 - tmp96;
+		    tmp102 = tmp100 + tmp101;
+		    c_re(output[2 * ostride]) = tmp97 - tmp102;
+		    c_re(output[7 * ostride]) = tmp97 + tmp102;
+	       }
+	       tmp103 = tmp95 + tmp96;
+	       tmp104 = tmp101 - tmp100;
+	       c_re(output[6 * ostride]) = tmp103 - tmp104;
+	       c_re(output[11 * ostride]) = tmp103 + tmp104;
+	       {
+		    fftw_real tmp111;
+		    fftw_real tmp112;
+		    fftw_real tmp107;
+		    fftw_real tmp110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp111 = tmp105 - tmp106;
+		    tmp112 = tmp109 - tmp108;
+		    c_re(output[4 * ostride]) = tmp111 - tmp112;
+		    c_re(output[10 * ostride]) = tmp111 + tmp112;
+		    tmp107 = tmp105 + tmp106;
+		    tmp110 = tmp108 + tmp109;
+		    c_re(output[3 * ostride]) = tmp107 - tmp110;
+		    c_re(output[9 * ostride]) = tmp107 + tmp110;
+	       }
+	  }
+     }
+     c_im(output[0]) = tmp147 + tmp146;
+     {
+	  fftw_real tmp160;
+	  fftw_real tmp173;
+	  fftw_real tmp142;
+	  fftw_real tmp170;
+	  fftw_real tmp165;
+	  fftw_real tmp149;
+	  fftw_real tmp117;
+	  fftw_real tmp128;
+	  fftw_real tmp162;
+	  fftw_real tmp169;
+	  fftw_real tmp151;
+	  fftw_real tmp154;
+	  fftw_real tmp157;
+	  fftw_real tmp172;
+	  fftw_real tmp158;
+	  fftw_real tmp159;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp158 = (K387390585 * tmp137) + (K265966249 * tmp134);
+	  tmp159 = (K113854479 * tmp144) + (K503537032 * tmp143);
+	  tmp160 = tmp158 + tmp159;
+	  tmp173 = tmp158 - tmp159;
+	  {
+	       fftw_real tmp138;
+	       fftw_real tmp164;
+	       fftw_real tmp145;
+	       fftw_real tmp148;
+	       fftw_real tmp163;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp138 = (K258260390 * tmp134) - (K132983124 * tmp137);
+	       tmp164 = tmp141 - tmp138;
+	       tmp145 = (K075902986 * tmp143) - (K251768516 * tmp144);
+	       tmp148 = tmp146 - (K083333333 * tmp147);
+	       tmp163 = tmp148 - tmp145;
+	       tmp142 = (K2_000000000 * tmp138) + tmp141;
+	       tmp170 = tmp164 + tmp163;
+	       tmp165 = tmp163 - tmp164;
+	       tmp149 = (K2_000000000 * tmp145) + tmp148;
+	  }
+	  {
+	       fftw_real tmp124;
+	       fftw_real tmp127;
+	       fftw_real tmp152;
+	       fftw_real tmp153;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp117 = (K174138601 * tmp113) - (K575140729 * tmp116);
+	       tmp124 = (K256247671 * tmp120) + (K156891391 * tmp123);
+	       tmp127 = (K300238635 * tmp125) + (K011599105 * tmp126);
+	       tmp128 = tmp124 - tmp127;
+	       tmp162 = K1_732050807 * (tmp124 + tmp127);
+	       tmp169 = tmp117 - tmp128;
+	       tmp151 = (K575140729 * tmp113) + (K174138601 * tmp116);
+	       tmp152 = (K256247671 * tmp123) - (K156891391 * tmp120);
+	       tmp153 = (K011599105 * tmp125) - (K300238635 * tmp126);
+	       tmp154 = tmp152 + tmp153;
+	       tmp157 = tmp151 + tmp154;
+	       tmp172 = K1_732050807 * (tmp153 - tmp152);
+	  }
+	  {
+	       fftw_real tmp129;
+	       fftw_real tmp150;
+	       fftw_real tmp167;
+	       fftw_real tmp168;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp129 = tmp117 + (K2_000000000 * tmp128);
+	       tmp150 = tmp142 + tmp149;
+	       c_im(output[ostride]) = tmp129 + tmp150;
+	       c_im(output[12 * ostride]) = tmp150 - tmp129;
+	       {
+		    fftw_real tmp155;
+		    fftw_real tmp156;
+		    fftw_real tmp161;
+		    fftw_real tmp166;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp155 = tmp151 - (K2_000000000 * tmp154);
+		    tmp156 = tmp149 - tmp142;
+		    c_im(output[5 * ostride]) = tmp155 + tmp156;
+		    c_im(output[8 * ostride]) = tmp156 - tmp155;
+		    tmp161 = tmp157 + tmp160;
+		    tmp166 = tmp162 + tmp165;
+		    c_im(output[2 * ostride]) = tmp161 + tmp166;
+		    c_im(output[7 * ostride]) = tmp166 - tmp161;
+	       }
+	       tmp167 = tmp165 - tmp162;
+	       tmp168 = tmp160 - tmp157;
+	       c_im(output[6 * ostride]) = tmp167 - tmp168;
+	       c_im(output[11 * ostride]) = tmp168 + tmp167;
+	       {
+		    fftw_real tmp175;
+		    fftw_real tmp176;
+		    fftw_real tmp171;
+		    fftw_real tmp174;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp175 = tmp170 - tmp169;
+		    tmp176 = tmp172 - tmp173;
+		    c_im(output[4 * ostride]) = tmp175 - tmp176;
+		    c_im(output[10 * ostride]) = tmp176 + tmp175;
+		    tmp171 = tmp169 + tmp170;
+		    tmp174 = tmp172 + tmp173;
+		    c_im(output[3 * ostride]) = tmp171 - tmp174;
+		    c_im(output[9 * ostride]) = tmp174 + tmp171;
+	       }
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_13_desc = {
+     "fftwi_no_twiddle_13",
+     (void (*)()) fftwi_no_twiddle_13,
+     13,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     298,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_14.c b/src/fftw/fni_14.c
new file mode 100644
index 0000000..61f5f89
--- /dev/null
+++ b/src/fftw/fni_14.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:45 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 14 */
+
+/*
+ * This function contains 148 FP additions, 72 FP multiplications,
+ * (or, 148 additions, 72 multiplications, 0 fused multiply/add),
+ * 36 stack variables, and 56 memory accesses
+ */
+static const fftw_real K900968867 =
+FFTW_KONST(+0.900968867902419126236102319507445051165919162);
+static const fftw_real K222520933 =
+FFTW_KONST(+0.222520933956314404288902564496794759466355569);
+static const fftw_real K623489801 =
+FFTW_KONST(+0.623489801858733530525004884004239810632274731);
+static const fftw_real K781831482 =
+FFTW_KONST(+0.781831482468029808708444526674057750232334519);
+static const fftw_real K974927912 =
+FFTW_KONST(+0.974927912181823607018131682993931217232785801);
+static const fftw_real K433883739 =
+FFTW_KONST(+0.433883739117558120475768332848358754609990728);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_14.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_14(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp25;
+     fftw_real tmp84;
+     fftw_real tmp93;
+     fftw_real tmp10;
+     fftw_real tmp77;
+     fftw_real tmp28;
+     fftw_real tmp97;
+     fftw_real tmp42;
+     fftw_real tmp86;
+     fftw_real tmp65;
+     fftw_real tmp92;
+     fftw_real tmp17;
+     fftw_real tmp79;
+     fftw_real tmp31;
+     fftw_real tmp99;
+     fftw_real tmp56;
+     fftw_real tmp81;
+     fftw_real tmp68;
+     fftw_real tmp94;
+     fftw_real tmp24;
+     fftw_real tmp78;
+     fftw_real tmp34;
+     fftw_real tmp98;
+     fftw_real tmp49;
+     fftw_real tmp85;
+     fftw_real tmp71;
+     fftw_real tmp95;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp82;
+	  fftw_real tmp83;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[7 * istride]);
+	  tmp3 = tmp1 - tmp2;
+	  tmp25 = tmp1 + tmp2;
+	  tmp82 = c_im(input[0]);
+	  tmp83 = c_im(input[7 * istride]);
+	  tmp84 = tmp82 - tmp83;
+	  tmp93 = tmp82 + tmp83;
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp26;
+	  fftw_real tmp9;
+	  fftw_real tmp27;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[2 * istride]);
+	       tmp5 = c_re(input[9 * istride]);
+	       tmp6 = tmp4 - tmp5;
+	       tmp26 = tmp4 + tmp5;
+	       tmp7 = c_re(input[12 * istride]);
+	       tmp8 = c_re(input[5 * istride]);
+	       tmp9 = tmp7 - tmp8;
+	       tmp27 = tmp7 + tmp8;
+	  }
+	  tmp10 = tmp6 + tmp9;
+	  tmp77 = tmp6 - tmp9;
+	  tmp28 = tmp26 + tmp27;
+	  tmp97 = tmp26 - tmp27;
+     }
+     {
+	  fftw_real tmp38;
+	  fftw_real tmp63;
+	  fftw_real tmp41;
+	  fftw_real tmp64;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp36;
+	       fftw_real tmp37;
+	       fftw_real tmp39;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp36 = c_im(input[12 * istride]);
+	       tmp37 = c_im(input[5 * istride]);
+	       tmp38 = tmp36 - tmp37;
+	       tmp63 = tmp36 + tmp37;
+	       tmp39 = c_im(input[2 * istride]);
+	       tmp40 = c_im(input[9 * istride]);
+	       tmp41 = tmp39 - tmp40;
+	       tmp64 = tmp39 + tmp40;
+	  }
+	  tmp42 = tmp38 - tmp41;
+	  tmp86 = tmp38 + tmp41;
+	  tmp65 = tmp63 - tmp64;
+	  tmp92 = tmp63 + tmp64;
+     }
+     {
+	  fftw_real tmp13;
+	  fftw_real tmp29;
+	  fftw_real tmp16;
+	  fftw_real tmp30;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp14;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[4 * istride]);
+	       tmp12 = c_re(input[11 * istride]);
+	       tmp13 = tmp11 - tmp12;
+	       tmp29 = tmp11 + tmp12;
+	       tmp14 = c_re(input[10 * istride]);
+	       tmp15 = c_re(input[3 * istride]);
+	       tmp16 = tmp14 - tmp15;
+	       tmp30 = tmp14 + tmp15;
+	  }
+	  tmp17 = tmp13 + tmp16;
+	  tmp79 = tmp13 - tmp16;
+	  tmp31 = tmp29 + tmp30;
+	  tmp99 = tmp30 - tmp29;
+     }
+     {
+	  fftw_real tmp52;
+	  fftw_real tmp67;
+	  fftw_real tmp55;
+	  fftw_real tmp66;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp50;
+	       fftw_real tmp51;
+	       fftw_real tmp53;
+	       fftw_real tmp54;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp50 = c_im(input[10 * istride]);
+	       tmp51 = c_im(input[3 * istride]);
+	       tmp52 = tmp50 - tmp51;
+	       tmp67 = tmp50 + tmp51;
+	       tmp53 = c_im(input[4 * istride]);
+	       tmp54 = c_im(input[11 * istride]);
+	       tmp55 = tmp53 - tmp54;
+	       tmp66 = tmp53 + tmp54;
+	  }
+	  tmp56 = tmp52 - tmp55;
+	  tmp81 = tmp52 + tmp55;
+	  tmp68 = tmp66 - tmp67;
+	  tmp94 = tmp67 + tmp66;
+     }
+     {
+	  fftw_real tmp20;
+	  fftw_real tmp32;
+	  fftw_real tmp23;
+	  fftw_real tmp33;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp18;
+	       fftw_real tmp19;
+	       fftw_real tmp21;
+	       fftw_real tmp22;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp18 = c_re(input[6 * istride]);
+	       tmp19 = c_re(input[13 * istride]);
+	       tmp20 = tmp18 - tmp19;
+	       tmp32 = tmp18 + tmp19;
+	       tmp21 = c_re(input[8 * istride]);
+	       tmp22 = c_re(input[istride]);
+	       tmp23 = tmp21 - tmp22;
+	       tmp33 = tmp21 + tmp22;
+	  }
+	  tmp24 = tmp20 + tmp23;
+	  tmp78 = tmp20 - tmp23;
+	  tmp34 = tmp32 + tmp33;
+	  tmp98 = tmp33 - tmp32;
+     }
+     {
+	  fftw_real tmp45;
+	  fftw_real tmp70;
+	  fftw_real tmp48;
+	  fftw_real tmp69;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp43;
+	       fftw_real tmp44;
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp43 = c_im(input[8 * istride]);
+	       tmp44 = c_im(input[istride]);
+	       tmp45 = tmp43 - tmp44;
+	       tmp70 = tmp43 + tmp44;
+	       tmp46 = c_im(input[6 * istride]);
+	       tmp47 = c_im(input[13 * istride]);
+	       tmp48 = tmp46 - tmp47;
+	       tmp69 = tmp46 + tmp47;
+	  }
+	  tmp49 = tmp45 - tmp48;
+	  tmp85 = tmp45 + tmp48;
+	  tmp71 = tmp69 - tmp70;
+	  tmp95 = tmp70 + tmp69;
+     }
+     {
+	  fftw_real tmp57;
+	  fftw_real tmp35;
+	  fftw_real tmp72;
+	  fftw_real tmp62;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[7 * ostride]) = tmp3 + tmp10 + tmp17 + tmp24;
+	  tmp57 =
+	      (K433883739 * tmp42) + (K974927912 * tmp49) -
+	      (K781831482 * tmp56);
+	  tmp35 =
+	      tmp3 + (K623489801 * tmp17) - (K222520933 * tmp24) -
+	      (K900968867 * tmp10);
+	  c_re(output[11 * ostride]) = tmp35 - tmp57;
+	  c_re(output[3 * ostride]) = tmp35 + tmp57;
+	  {
+	       fftw_real tmp59;
+	       fftw_real tmp58;
+	       fftw_real tmp61;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp59 =
+		   (K974927912 * tmp42) - (K433883739 * tmp56) -
+		   (K781831482 * tmp49);
+	       tmp58 =
+		   tmp3 + (K623489801 * tmp24) - (K900968867 * tmp17) -
+		   (K222520933 * tmp10);
+	       c_re(output[5 * ostride]) = tmp58 - tmp59;
+	       c_re(output[9 * ostride]) = tmp58 + tmp59;
+	       tmp61 =
+		   (K781831482 * tmp42) + (K433883739 * tmp49) +
+		   (K974927912 * tmp56);
+	       tmp60 =
+		   tmp3 + (K623489801 * tmp10) - (K900968867 * tmp24) -
+		   (K222520933 * tmp17);
+	       c_re(output[13 * ostride]) = tmp60 - tmp61;
+	       c_re(output[ostride]) = tmp60 + tmp61;
+	  }
+	  c_re(output[0]) = tmp25 + tmp28 + tmp31 + tmp34;
+	  tmp72 =
+	      (K781831482 * tmp65) - (K974927912 * tmp68) -
+	      (K433883739 * tmp71);
+	  tmp62 =
+	      tmp25 + (K623489801 * tmp28) - (K900968867 * tmp34) -
+	      (K222520933 * tmp31);
+	  c_re(output[6 * ostride]) = tmp62 - tmp72;
+	  c_re(output[8 * ostride]) = tmp62 + tmp72;
+	  {
+	       fftw_real tmp74;
+	       fftw_real tmp73;
+	       fftw_real tmp76;
+	       fftw_real tmp75;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp74 =
+		   (K433883739 * tmp65) + (K781831482 * tmp68) -
+		   (K974927912 * tmp71);
+	       tmp73 =
+		   tmp25 + (K623489801 * tmp31) - (K222520933 * tmp34) -
+		   (K900968867 * tmp28);
+	       c_re(output[4 * ostride]) = tmp73 - tmp74;
+	       c_re(output[10 * ostride]) = tmp73 + tmp74;
+	       tmp76 =
+		   (K974927912 * tmp65) + (K781831482 * tmp71) +
+		   (K433883739 * tmp68);
+	       tmp75 =
+		   tmp25 + (K623489801 * tmp34) - (K900968867 * tmp31) -
+		   (K222520933 * tmp28);
+	       c_re(output[12 * ostride]) = tmp75 - tmp76;
+	       c_re(output[2 * ostride]) = tmp75 + tmp76;
+	  }
+     }
+     {
+	  fftw_real tmp91;
+	  fftw_real tmp90;
+	  fftw_real tmp103;
+	  fftw_real tmp104;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_im(output[7 * ostride]) = tmp86 + tmp85 + tmp81 + tmp84;
+	  tmp91 =
+	      (K974927912 * tmp77) - (K781831482 * tmp78) -
+	      (K433883739 * tmp79);
+	  tmp90 =
+	      (K623489801 * tmp85) + tmp84 - (K900968867 * tmp81) -
+	      (K222520933 * tmp86);
+	  c_im(output[5 * ostride]) = tmp90 - tmp91;
+	  c_im(output[9 * ostride]) = tmp91 + tmp90;
+	  {
+	       fftw_real tmp88;
+	       fftw_real tmp89;
+	       fftw_real tmp80;
+	       fftw_real tmp87;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp88 =
+		   (K781831482 * tmp77) + (K974927912 * tmp79) +
+		   (K433883739 * tmp78);
+	       tmp89 =
+		   (K623489801 * tmp86) + tmp84 - (K222520933 * tmp81) -
+		   (K900968867 * tmp85);
+	       c_im(output[ostride]) = tmp88 + tmp89;
+	       c_im(output[13 * ostride]) = tmp89 - tmp88;
+	       tmp80 =
+		   (K433883739 * tmp77) + (K974927912 * tmp78) -
+		   (K781831482 * tmp79);
+	       tmp87 =
+		   (K623489801 * tmp81) + tmp84 - (K222520933 * tmp85) -
+		   (K900968867 * tmp86);
+	       c_im(output[3 * ostride]) = tmp80 + tmp87;
+	       c_im(output[11 * ostride]) = tmp87 - tmp80;
+	  }
+	  c_im(output[0]) = tmp92 + tmp95 + tmp94 + tmp93;
+	  tmp103 =
+	      (K974927912 * tmp97) + (K433883739 * tmp99) +
+	      (K781831482 * tmp98);
+	  tmp104 =
+	      (K623489801 * tmp95) + tmp93 - (K900968867 * tmp94) -
+	      (K222520933 * tmp92);
+	  c_im(output[2 * ostride]) = tmp103 + tmp104;
+	  c_im(output[12 * ostride]) = tmp104 - tmp103;
+	  {
+	       fftw_real tmp100;
+	       fftw_real tmp96;
+	       fftw_real tmp102;
+	       fftw_real tmp101;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp100 =
+		   (K781831482 * tmp97) - (K433883739 * tmp98) -
+		   (K974927912 * tmp99);
+	       tmp96 =
+		   (K623489801 * tmp92) + tmp93 - (K222520933 * tmp94) -
+		   (K900968867 * tmp95);
+	       c_im(output[6 * ostride]) = tmp96 - tmp100;
+	       c_im(output[8 * ostride]) = tmp100 + tmp96;
+	       tmp102 =
+		   (K433883739 * tmp97) + (K781831482 * tmp99) -
+		   (K974927912 * tmp98);
+	       tmp101 =
+		   (K623489801 * tmp94) + tmp93 - (K222520933 * tmp95) -
+		   (K900968867 * tmp92);
+	       c_im(output[4 * ostride]) = tmp101 - tmp102;
+	       c_im(output[10 * ostride]) = tmp102 + tmp101;
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_14_desc = {
+     "fftwi_no_twiddle_14",
+     (void (*)()) fftwi_no_twiddle_14,
+     14,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     320,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_15.c b/src/fftw/fni_15.c
new file mode 100644
index 0000000..1f2c342
--- /dev/null
+++ b/src/fftw/fni_15.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:46 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 15 */
+
+/*
+ * This function contains 156 FP additions, 56 FP multiplications,
+ * (or, 128 additions, 28 multiplications, 28 fused multiply/add),
+ * 62 stack variables, and 60 memory accesses
+ */
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_15.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_15(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp5;
+     fftw_real tmp121;
+     fftw_real tmp148;
+     fftw_real tmp87;
+     fftw_real tmp35;
+     fftw_real tmp67;
+     fftw_real tmp21;
+     fftw_real tmp26;
+     fftw_real tmp27;
+     fftw_real tmp111;
+     fftw_real tmp114;
+     fftw_real tmp123;
+     fftw_real tmp139;
+     fftw_real tmp140;
+     fftw_real tmp146;
+     fftw_real tmp81;
+     fftw_real tmp82;
+     fftw_real tmp89;
+     fftw_real tmp71;
+     fftw_real tmp72;
+     fftw_real tmp73;
+     fftw_real tmp57;
+     fftw_real tmp64;
+     fftw_real tmp65;
+     fftw_real tmp10;
+     fftw_real tmp15;
+     fftw_real tmp16;
+     fftw_real tmp104;
+     fftw_real tmp107;
+     fftw_real tmp122;
+     fftw_real tmp136;
+     fftw_real tmp137;
+     fftw_real tmp145;
+     fftw_real tmp78;
+     fftw_real tmp79;
+     fftw_real tmp88;
+     fftw_real tmp68;
+     fftw_real tmp69;
+     fftw_real tmp70;
+     fftw_real tmp42;
+     fftw_real tmp49;
+     fftw_real tmp50;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp30;
+	  fftw_real tmp4;
+	  fftw_real tmp29;
+	  fftw_real tmp33;
+	  fftw_real tmp120;
+	  fftw_real tmp119;
+	  fftw_real tmp34;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp30 = c_im(input[0]);
+	  {
+	       fftw_real tmp2;
+	       fftw_real tmp3;
+	       fftw_real tmp31;
+	       fftw_real tmp32;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp2 = c_re(input[5 * istride]);
+	       tmp3 = c_re(input[10 * istride]);
+	       tmp4 = tmp2 + tmp3;
+	       tmp29 = K866025403 * (tmp2 - tmp3);
+	       tmp31 = c_im(input[5 * istride]);
+	       tmp32 = c_im(input[10 * istride]);
+	       tmp33 = tmp31 + tmp32;
+	       tmp120 = K866025403 * (tmp32 - tmp31);
+	  }
+	  tmp5 = tmp1 + tmp4;
+	  tmp119 = tmp1 - (K500000000 * tmp4);
+	  tmp121 = tmp119 - tmp120;
+	  tmp148 = tmp119 + tmp120;
+	  tmp87 = tmp30 + tmp33;
+	  tmp34 = tmp30 - (K500000000 * tmp33);
+	  tmp35 = tmp29 + tmp34;
+	  tmp67 = tmp34 - tmp29;
+     }
+     {
+	  fftw_real tmp17;
+	  fftw_real tmp20;
+	  fftw_real tmp51;
+	  fftw_real tmp109;
+	  fftw_real tmp52;
+	  fftw_real tmp55;
+	  fftw_real tmp56;
+	  fftw_real tmp110;
+	  fftw_real tmp22;
+	  fftw_real tmp25;
+	  fftw_real tmp58;
+	  fftw_real tmp112;
+	  fftw_real tmp59;
+	  fftw_real tmp62;
+	  fftw_real tmp63;
+	  fftw_real tmp113;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp18;
+	       fftw_real tmp19;
+	       fftw_real tmp53;
+	       fftw_real tmp54;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp17 = c_re(input[6 * istride]);
+	       tmp18 = c_re(input[11 * istride]);
+	       tmp19 = c_re(input[istride]);
+	       tmp20 = tmp18 + tmp19;
+	       tmp51 = K866025403 * (tmp18 - tmp19);
+	       tmp109 = tmp17 - (K500000000 * tmp20);
+	       tmp52 = c_im(input[6 * istride]);
+	       tmp53 = c_im(input[11 * istride]);
+	       tmp54 = c_im(input[istride]);
+	       tmp55 = tmp53 + tmp54;
+	       tmp56 = tmp52 - (K500000000 * tmp55);
+	       tmp110 = K866025403 * (tmp54 - tmp53);
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp60;
+	       fftw_real tmp61;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp22 = c_re(input[9 * istride]);
+	       tmp23 = c_re(input[14 * istride]);
+	       tmp24 = c_re(input[4 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp58 = K866025403 * (tmp23 - tmp24);
+	       tmp112 = tmp22 - (K500000000 * tmp25);
+	       tmp59 = c_im(input[9 * istride]);
+	       tmp60 = c_im(input[14 * istride]);
+	       tmp61 = c_im(input[4 * istride]);
+	       tmp62 = tmp60 + tmp61;
+	       tmp63 = tmp59 - (K500000000 * tmp62);
+	       tmp113 = K866025403 * (tmp61 - tmp60);
+	  }
+	  tmp21 = tmp17 + tmp20;
+	  tmp26 = tmp22 + tmp25;
+	  tmp27 = tmp21 + tmp26;
+	  tmp111 = tmp109 - tmp110;
+	  tmp114 = tmp112 - tmp113;
+	  tmp123 = tmp111 + tmp114;
+	  tmp139 = tmp109 + tmp110;
+	  tmp140 = tmp112 + tmp113;
+	  tmp146 = tmp139 + tmp140;
+	  tmp81 = tmp52 + tmp55;
+	  tmp82 = tmp59 + tmp62;
+	  tmp89 = tmp81 + tmp82;
+	  tmp71 = tmp56 - tmp51;
+	  tmp72 = tmp63 - tmp58;
+	  tmp73 = tmp71 + tmp72;
+	  tmp57 = tmp51 + tmp56;
+	  tmp64 = tmp58 + tmp63;
+	  tmp65 = tmp57 + tmp64;
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp9;
+	  fftw_real tmp36;
+	  fftw_real tmp102;
+	  fftw_real tmp37;
+	  fftw_real tmp40;
+	  fftw_real tmp41;
+	  fftw_real tmp103;
+	  fftw_real tmp11;
+	  fftw_real tmp14;
+	  fftw_real tmp43;
+	  fftw_real tmp105;
+	  fftw_real tmp44;
+	  fftw_real tmp47;
+	  fftw_real tmp48;
+	  fftw_real tmp106;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp38;
+	       fftw_real tmp39;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp6 = c_re(input[3 * istride]);
+	       tmp7 = c_re(input[8 * istride]);
+	       tmp8 = c_re(input[13 * istride]);
+	       tmp9 = tmp7 + tmp8;
+	       tmp36 = K866025403 * (tmp7 - tmp8);
+	       tmp102 = tmp6 - (K500000000 * tmp9);
+	       tmp37 = c_im(input[3 * istride]);
+	       tmp38 = c_im(input[8 * istride]);
+	       tmp39 = c_im(input[13 * istride]);
+	       tmp40 = tmp38 + tmp39;
+	       tmp41 = tmp37 - (K500000000 * tmp40);
+	       tmp103 = K866025403 * (tmp39 - tmp38);
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp13;
+	       fftw_real tmp45;
+	       fftw_real tmp46;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[12 * istride]);
+	       tmp12 = c_re(input[2 * istride]);
+	       tmp13 = c_re(input[7 * istride]);
+	       tmp14 = tmp12 + tmp13;
+	       tmp43 = K866025403 * (tmp12 - tmp13);
+	       tmp105 = tmp11 - (K500000000 * tmp14);
+	       tmp44 = c_im(input[12 * istride]);
+	       tmp45 = c_im(input[2 * istride]);
+	       tmp46 = c_im(input[7 * istride]);
+	       tmp47 = tmp45 + tmp46;
+	       tmp48 = tmp44 - (K500000000 * tmp47);
+	       tmp106 = K866025403 * (tmp46 - tmp45);
+	  }
+	  tmp10 = tmp6 + tmp9;
+	  tmp15 = tmp11 + tmp14;
+	  tmp16 = tmp10 + tmp15;
+	  tmp104 = tmp102 - tmp103;
+	  tmp107 = tmp105 - tmp106;
+	  tmp122 = tmp104 + tmp107;
+	  tmp136 = tmp102 + tmp103;
+	  tmp137 = tmp105 + tmp106;
+	  tmp145 = tmp136 + tmp137;
+	  tmp78 = tmp37 + tmp40;
+	  tmp79 = tmp44 + tmp47;
+	  tmp88 = tmp78 + tmp79;
+	  tmp68 = tmp41 - tmp36;
+	  tmp69 = tmp48 - tmp43;
+	  tmp70 = tmp68 + tmp69;
+	  tmp42 = tmp36 + tmp41;
+	  tmp49 = tmp43 + tmp48;
+	  tmp50 = tmp42 + tmp49;
+     }
+     {
+	  fftw_real tmp76;
+	  fftw_real tmp28;
+	  fftw_real tmp75;
+	  fftw_real tmp84;
+	  fftw_real tmp86;
+	  fftw_real tmp80;
+	  fftw_real tmp83;
+	  fftw_real tmp85;
+	  fftw_real tmp77;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp76 = K559016994 * (tmp16 - tmp27);
+	  tmp28 = tmp16 + tmp27;
+	  tmp75 = tmp5 - (K250000000 * tmp28);
+	  tmp80 = tmp78 - tmp79;
+	  tmp83 = tmp81 - tmp82;
+	  tmp84 = (K587785252 * tmp80) - (K951056516 * tmp83);
+	  tmp86 = (K951056516 * tmp80) + (K587785252 * tmp83);
+	  c_re(output[0]) = tmp5 + tmp28;
+	  tmp85 = tmp76 + tmp75;
+	  c_re(output[6 * ostride]) = tmp85 - tmp86;
+	  c_re(output[9 * ostride]) = tmp85 + tmp86;
+	  tmp77 = tmp75 - tmp76;
+	  c_re(output[12 * ostride]) = tmp77 - tmp84;
+	  c_re(output[3 * ostride]) = tmp77 + tmp84;
+     }
+     {
+	  fftw_real tmp134;
+	  fftw_real tmp66;
+	  fftw_real tmp133;
+	  fftw_real tmp142;
+	  fftw_real tmp144;
+	  fftw_real tmp138;
+	  fftw_real tmp141;
+	  fftw_real tmp143;
+	  fftw_real tmp135;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp134 = K559016994 * (tmp50 - tmp65);
+	  tmp66 = tmp50 + tmp65;
+	  tmp133 = tmp35 - (K250000000 * tmp66);
+	  tmp138 = tmp136 - tmp137;
+	  tmp141 = tmp139 - tmp140;
+	  tmp142 = (K587785252 * tmp138) - (K951056516 * tmp141);
+	  tmp144 = (K951056516 * tmp138) + (K587785252 * tmp141);
+	  c_im(output[10 * ostride]) = tmp35 + tmp66;
+	  tmp143 = tmp134 + tmp133;
+	  c_im(output[4 * ostride]) = tmp143 - tmp144;
+	  c_im(output[ostride]) = tmp143 + tmp144;
+	  tmp135 = tmp133 - tmp134;
+	  c_im(output[13 * ostride]) = tmp135 - tmp142;
+	  c_im(output[7 * ostride]) = tmp135 + tmp142;
+     }
+     {
+	  fftw_real tmp147;
+	  fftw_real tmp149;
+	  fftw_real tmp150;
+	  fftw_real tmp154;
+	  fftw_real tmp156;
+	  fftw_real tmp152;
+	  fftw_real tmp153;
+	  fftw_real tmp155;
+	  fftw_real tmp151;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp147 = K559016994 * (tmp145 - tmp146);
+	  tmp149 = tmp145 + tmp146;
+	  tmp150 = tmp148 - (K250000000 * tmp149);
+	  tmp152 = tmp42 - tmp49;
+	  tmp153 = tmp57 - tmp64;
+	  tmp154 = (K951056516 * tmp152) + (K587785252 * tmp153);
+	  tmp156 = (K587785252 * tmp152) - (K951056516 * tmp153);
+	  c_re(output[10 * ostride]) = tmp148 + tmp149;
+	  tmp155 = tmp150 - tmp147;
+	  c_re(output[7 * ostride]) = tmp155 - tmp156;
+	  c_re(output[13 * ostride]) = tmp156 + tmp155;
+	  tmp151 = tmp147 + tmp150;
+	  c_re(output[ostride]) = tmp151 - tmp154;
+	  c_re(output[4 * ostride]) = tmp154 + tmp151;
+     }
+     {
+	  fftw_real tmp126;
+	  fftw_real tmp124;
+	  fftw_real tmp125;
+	  fftw_real tmp130;
+	  fftw_real tmp132;
+	  fftw_real tmp128;
+	  fftw_real tmp129;
+	  fftw_real tmp131;
+	  fftw_real tmp127;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp126 = K559016994 * (tmp122 - tmp123);
+	  tmp124 = tmp122 + tmp123;
+	  tmp125 = tmp121 - (K250000000 * tmp124);
+	  tmp128 = tmp68 - tmp69;
+	  tmp129 = tmp71 - tmp72;
+	  tmp130 = (K587785252 * tmp128) - (K951056516 * tmp129);
+	  tmp132 = (K951056516 * tmp128) + (K587785252 * tmp129);
+	  c_re(output[5 * ostride]) = tmp121 + tmp124;
+	  tmp131 = tmp126 + tmp125;
+	  c_re(output[11 * ostride]) = tmp131 - tmp132;
+	  c_re(output[14 * ostride]) = tmp132 + tmp131;
+	  tmp127 = tmp125 - tmp126;
+	  c_re(output[2 * ostride]) = tmp127 - tmp130;
+	  c_re(output[8 * ostride]) = tmp130 + tmp127;
+     }
+     {
+	  fftw_real tmp92;
+	  fftw_real tmp90;
+	  fftw_real tmp91;
+	  fftw_real tmp96;
+	  fftw_real tmp97;
+	  fftw_real tmp94;
+	  fftw_real tmp95;
+	  fftw_real tmp98;
+	  fftw_real tmp93;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp92 = K559016994 * (tmp88 - tmp89);
+	  tmp90 = tmp88 + tmp89;
+	  tmp91 = tmp87 - (K250000000 * tmp90);
+	  tmp94 = tmp10 - tmp15;
+	  tmp95 = tmp21 - tmp26;
+	  tmp96 = (K587785252 * tmp94) - (K951056516 * tmp95);
+	  tmp97 = (K951056516 * tmp94) + (K587785252 * tmp95);
+	  c_im(output[0]) = tmp87 + tmp90;
+	  tmp98 = tmp92 + tmp91;
+	  c_im(output[6 * ostride]) = tmp97 + tmp98;
+	  c_im(output[9 * ostride]) = tmp98 - tmp97;
+	  tmp93 = tmp91 - tmp92;
+	  c_im(output[3 * ostride]) = tmp93 - tmp96;
+	  c_im(output[12 * ostride]) = tmp96 + tmp93;
+     }
+     {
+	  fftw_real tmp100;
+	  fftw_real tmp74;
+	  fftw_real tmp99;
+	  fftw_real tmp116;
+	  fftw_real tmp118;
+	  fftw_real tmp108;
+	  fftw_real tmp115;
+	  fftw_real tmp117;
+	  fftw_real tmp101;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp100 = K559016994 * (tmp70 - tmp73);
+	  tmp74 = tmp70 + tmp73;
+	  tmp99 = tmp67 - (K250000000 * tmp74);
+	  tmp108 = tmp104 - tmp107;
+	  tmp115 = tmp111 - tmp114;
+	  tmp116 = (K587785252 * tmp108) - (K951056516 * tmp115);
+	  tmp118 = (K951056516 * tmp108) + (K587785252 * tmp115);
+	  c_im(output[5 * ostride]) = tmp67 + tmp74;
+	  tmp117 = tmp100 + tmp99;
+	  c_im(output[14 * ostride]) = tmp117 - tmp118;
+	  c_im(output[11 * ostride]) = tmp117 + tmp118;
+	  tmp101 = tmp99 - tmp100;
+	  c_im(output[8 * ostride]) = tmp101 - tmp116;
+	  c_im(output[2 * ostride]) = tmp101 + tmp116;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_15_desc = {
+     "fftwi_no_twiddle_15",
+     (void (*)()) fftwi_no_twiddle_15,
+     15,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     342,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_16.c b/src/fftw/fni_16.c
new file mode 100644
index 0000000..bb2e49a
--- /dev/null
+++ b/src/fftw/fni_16.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:47 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 16 */
+
+/*
+ * This function contains 144 FP additions, 24 FP multiplications,
+ * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
+ * 46 stack variables, and 64 memory accesses
+ */
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_16(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp7;
+     fftw_real tmp129;
+     fftw_real tmp38;
+     fftw_real tmp115;
+     fftw_real tmp49;
+     fftw_real tmp95;
+     fftw_real tmp83;
+     fftw_real tmp105;
+     fftw_real tmp29;
+     fftw_real tmp123;
+     fftw_real tmp73;
+     fftw_real tmp101;
+     fftw_real tmp78;
+     fftw_real tmp102;
+     fftw_real tmp126;
+     fftw_real tmp141;
+     fftw_real tmp14;
+     fftw_real tmp116;
+     fftw_real tmp45;
+     fftw_real tmp130;
+     fftw_real tmp52;
+     fftw_real tmp84;
+     fftw_real tmp55;
+     fftw_real tmp85;
+     fftw_real tmp22;
+     fftw_real tmp118;
+     fftw_real tmp62;
+     fftw_real tmp98;
+     fftw_real tmp67;
+     fftw_real tmp99;
+     fftw_real tmp121;
+     fftw_real tmp140;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp3;
+	  fftw_real tmp81;
+	  fftw_real tmp34;
+	  fftw_real tmp48;
+	  fftw_real tmp6;
+	  fftw_real tmp47;
+	  fftw_real tmp37;
+	  fftw_real tmp82;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp2;
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(input[0]);
+	       tmp2 = c_re(input[8 * istride]);
+	       tmp3 = tmp1 + tmp2;
+	       tmp81 = tmp1 - tmp2;
+	       tmp32 = c_im(input[0]);
+	       tmp33 = c_im(input[8 * istride]);
+	       tmp34 = tmp32 + tmp33;
+	       tmp48 = tmp32 - tmp33;
+	  }
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp35;
+	       fftw_real tmp36;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[4 * istride]);
+	       tmp5 = c_re(input[12 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp47 = tmp4 - tmp5;
+	       tmp35 = c_im(input[4 * istride]);
+	       tmp36 = c_im(input[12 * istride]);
+	       tmp37 = tmp35 + tmp36;
+	       tmp82 = tmp35 - tmp36;
+	  }
+	  tmp7 = tmp3 + tmp6;
+	  tmp129 = tmp3 - tmp6;
+	  tmp38 = tmp34 + tmp37;
+	  tmp115 = tmp34 - tmp37;
+	  tmp49 = tmp47 + tmp48;
+	  tmp95 = tmp48 - tmp47;
+	  tmp83 = tmp81 - tmp82;
+	  tmp105 = tmp81 + tmp82;
+     }
+     {
+	  fftw_real tmp25;
+	  fftw_real tmp74;
+	  fftw_real tmp72;
+	  fftw_real tmp124;
+	  fftw_real tmp28;
+	  fftw_real tmp69;
+	  fftw_real tmp77;
+	  fftw_real tmp125;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp70;
+	       fftw_real tmp71;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = c_re(input[15 * istride]);
+	       tmp24 = c_re(input[7 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp74 = tmp23 - tmp24;
+	       tmp70 = c_im(input[15 * istride]);
+	       tmp71 = c_im(input[7 * istride]);
+	       tmp72 = tmp70 - tmp71;
+	       tmp124 = tmp70 + tmp71;
+	  }
+	  {
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp75;
+	       fftw_real tmp76;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp26 = c_re(input[3 * istride]);
+	       tmp27 = c_re(input[11 * istride]);
+	       tmp28 = tmp26 + tmp27;
+	       tmp69 = tmp26 - tmp27;
+	       tmp75 = c_im(input[3 * istride]);
+	       tmp76 = c_im(input[11 * istride]);
+	       tmp77 = tmp75 - tmp76;
+	       tmp125 = tmp75 + tmp76;
+	  }
+	  tmp29 = tmp25 + tmp28;
+	  tmp123 = tmp25 - tmp28;
+	  tmp73 = tmp69 + tmp72;
+	  tmp101 = tmp72 - tmp69;
+	  tmp78 = tmp74 - tmp77;
+	  tmp102 = tmp74 + tmp77;
+	  tmp126 = tmp124 - tmp125;
+	  tmp141 = tmp124 + tmp125;
+     }
+     {
+	  fftw_real tmp10;
+	  fftw_real tmp50;
+	  fftw_real tmp41;
+	  fftw_real tmp51;
+	  fftw_real tmp13;
+	  fftw_real tmp54;
+	  fftw_real tmp44;
+	  fftw_real tmp53;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp39;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[2 * istride]);
+	       tmp9 = c_re(input[10 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp50 = tmp8 - tmp9;
+	       tmp39 = c_im(input[2 * istride]);
+	       tmp40 = c_im(input[10 * istride]);
+	       tmp41 = tmp39 + tmp40;
+	       tmp51 = tmp39 - tmp40;
+	  }
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp42;
+	       fftw_real tmp43;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[14 * istride]);
+	       tmp12 = c_re(input[6 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp54 = tmp11 - tmp12;
+	       tmp42 = c_im(input[14 * istride]);
+	       tmp43 = c_im(input[6 * istride]);
+	       tmp44 = tmp42 + tmp43;
+	       tmp53 = tmp42 - tmp43;
+	  }
+	  tmp14 = tmp10 + tmp13;
+	  tmp116 = tmp10 - tmp13;
+	  tmp45 = tmp41 + tmp44;
+	  tmp130 = tmp44 - tmp41;
+	  tmp52 = tmp50 + tmp51;
+	  tmp84 = tmp50 - tmp51;
+	  tmp55 = tmp53 - tmp54;
+	  tmp85 = tmp54 + tmp53;
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp63;
+	  fftw_real tmp61;
+	  fftw_real tmp119;
+	  fftw_real tmp21;
+	  fftw_real tmp58;
+	  fftw_real tmp66;
+	  fftw_real tmp120;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp59;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[istride]);
+	       tmp17 = c_re(input[9 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp63 = tmp16 - tmp17;
+	       tmp59 = c_im(input[istride]);
+	       tmp60 = c_im(input[9 * istride]);
+	       tmp61 = tmp59 - tmp60;
+	       tmp119 = tmp59 + tmp60;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = c_re(input[5 * istride]);
+	       tmp20 = c_re(input[13 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp58 = tmp19 - tmp20;
+	       tmp64 = c_im(input[5 * istride]);
+	       tmp65 = c_im(input[13 * istride]);
+	       tmp66 = tmp64 - tmp65;
+	       tmp120 = tmp64 + tmp65;
+	  }
+	  tmp22 = tmp18 + tmp21;
+	  tmp118 = tmp18 - tmp21;
+	  tmp62 = tmp58 + tmp61;
+	  tmp98 = tmp61 - tmp58;
+	  tmp67 = tmp63 - tmp66;
+	  tmp99 = tmp63 + tmp66;
+	  tmp121 = tmp119 - tmp120;
+	  tmp140 = tmp119 + tmp120;
+     }
+     {
+	  fftw_real tmp15;
+	  fftw_real tmp30;
+	  fftw_real tmp31;
+	  fftw_real tmp46;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp15 = tmp7 + tmp14;
+	  tmp30 = tmp22 + tmp29;
+	  c_re(output[8 * ostride]) = tmp15 - tmp30;
+	  c_re(output[0]) = tmp15 + tmp30;
+	  tmp31 = tmp22 - tmp29;
+	  tmp46 = tmp38 - tmp45;
+	  c_im(output[4 * ostride]) = tmp31 + tmp46;
+	  c_im(output[12 * ostride]) = tmp46 - tmp31;
+     }
+     {
+	  fftw_real tmp139;
+	  fftw_real tmp142;
+	  fftw_real tmp143;
+	  fftw_real tmp144;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp139 = tmp38 + tmp45;
+	  tmp142 = tmp140 + tmp141;
+	  c_im(output[8 * ostride]) = tmp139 - tmp142;
+	  c_im(output[0]) = tmp139 + tmp142;
+	  tmp143 = tmp7 - tmp14;
+	  tmp144 = tmp141 - tmp140;
+	  c_re(output[12 * ostride]) = tmp143 - tmp144;
+	  c_re(output[4 * ostride]) = tmp143 + tmp144;
+     }
+     {
+	  fftw_real tmp117;
+	  fftw_real tmp131;
+	  fftw_real tmp128;
+	  fftw_real tmp132;
+	  fftw_real tmp122;
+	  fftw_real tmp127;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp117 = tmp115 - tmp116;
+	  tmp131 = tmp129 + tmp130;
+	  tmp122 = tmp118 - tmp121;
+	  tmp127 = tmp123 + tmp126;
+	  tmp128 = K707106781 * (tmp122 - tmp127);
+	  tmp132 = K707106781 * (tmp122 + tmp127);
+	  c_im(output[14 * ostride]) = tmp117 - tmp128;
+	  c_im(output[6 * ostride]) = tmp117 + tmp128;
+	  c_re(output[10 * ostride]) = tmp131 - tmp132;
+	  c_re(output[2 * ostride]) = tmp131 + tmp132;
+     }
+     {
+	  fftw_real tmp133;
+	  fftw_real tmp137;
+	  fftw_real tmp136;
+	  fftw_real tmp138;
+	  fftw_real tmp134;
+	  fftw_real tmp135;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp133 = tmp116 + tmp115;
+	  tmp137 = tmp129 - tmp130;
+	  tmp134 = tmp118 + tmp121;
+	  tmp135 = tmp126 - tmp123;
+	  tmp136 = K707106781 * (tmp134 + tmp135);
+	  tmp138 = K707106781 * (tmp135 - tmp134);
+	  c_im(output[10 * ostride]) = tmp133 - tmp136;
+	  c_im(output[2 * ostride]) = tmp133 + tmp136;
+	  c_re(output[14 * ostride]) = tmp137 - tmp138;
+	  c_re(output[6 * ostride]) = tmp137 + tmp138;
+     }
+     {
+	  fftw_real tmp57;
+	  fftw_real tmp89;
+	  fftw_real tmp92;
+	  fftw_real tmp94;
+	  fftw_real tmp87;
+	  fftw_real tmp93;
+	  fftw_real tmp80;
+	  fftw_real tmp88;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp56;
+	       fftw_real tmp90;
+	       fftw_real tmp91;
+	       fftw_real tmp86;
+	       fftw_real tmp68;
+	       fftw_real tmp79;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp56 = K707106781 * (tmp52 + tmp55);
+	       tmp57 = tmp49 + tmp56;
+	       tmp89 = tmp49 - tmp56;
+	       tmp90 = (K923879532 * tmp67) - (K382683432 * tmp62);
+	       tmp91 = (K382683432 * tmp73) + (K923879532 * tmp78);
+	       tmp92 = tmp90 - tmp91;
+	       tmp94 = tmp90 + tmp91;
+	       tmp86 = K707106781 * (tmp84 + tmp85);
+	       tmp87 = tmp83 - tmp86;
+	       tmp93 = tmp83 + tmp86;
+	       tmp68 = (K923879532 * tmp62) + (K382683432 * tmp67);
+	       tmp79 = (K923879532 * tmp73) - (K382683432 * tmp78);
+	       tmp80 = tmp68 + tmp79;
+	       tmp88 = tmp79 - tmp68;
+	  }
+	  c_im(output[9 * ostride]) = tmp57 - tmp80;
+	  c_im(output[ostride]) = tmp57 + tmp80;
+	  c_re(output[13 * ostride]) = tmp87 - tmp88;
+	  c_re(output[5 * ostride]) = tmp87 + tmp88;
+	  c_im(output[13 * ostride]) = tmp89 - tmp92;
+	  c_im(output[5 * ostride]) = tmp89 + tmp92;
+	  c_re(output[9 * ostride]) = tmp93 - tmp94;
+	  c_re(output[ostride]) = tmp93 + tmp94;
+     }
+     {
+	  fftw_real tmp97;
+	  fftw_real tmp109;
+	  fftw_real tmp112;
+	  fftw_real tmp114;
+	  fftw_real tmp107;
+	  fftw_real tmp113;
+	  fftw_real tmp104;
+	  fftw_real tmp108;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp110;
+	       fftw_real tmp111;
+	       fftw_real tmp106;
+	       fftw_real tmp100;
+	       fftw_real tmp103;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp96 = K707106781 * (tmp84 - tmp85);
+	       tmp97 = tmp95 + tmp96;
+	       tmp109 = tmp95 - tmp96;
+	       tmp110 = (K382683432 * tmp99) - (K923879532 * tmp98);
+	       tmp111 = (K923879532 * tmp101) + (K382683432 * tmp102);
+	       tmp112 = tmp110 - tmp111;
+	       tmp114 = tmp110 + tmp111;
+	       tmp106 = K707106781 * (tmp55 - tmp52);
+	       tmp107 = tmp105 - tmp106;
+	       tmp113 = tmp105 + tmp106;
+	       tmp100 = (K382683432 * tmp98) + (K923879532 * tmp99);
+	       tmp103 = (K382683432 * tmp101) - (K923879532 * tmp102);
+	       tmp104 = tmp100 + tmp103;
+	       tmp108 = tmp103 - tmp100;
+	  }
+	  c_im(output[11 * ostride]) = tmp97 - tmp104;
+	  c_im(output[3 * ostride]) = tmp97 + tmp104;
+	  c_re(output[15 * ostride]) = tmp107 - tmp108;
+	  c_re(output[7 * ostride]) = tmp107 + tmp108;
+	  c_im(output[15 * ostride]) = tmp109 - tmp112;
+	  c_im(output[7 * ostride]) = tmp109 + tmp112;
+	  c_re(output[11 * ostride]) = tmp113 - tmp114;
+	  c_re(output[3 * ostride]) = tmp113 + tmp114;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_16_desc = {
+     "fftwi_no_twiddle_16",
+     (void (*)()) fftwi_no_twiddle_16,
+     16,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     364,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_2.c b/src/fftw/fni_2.c
new file mode 100644
index 0000000..ca159e5
--- /dev/null
+++ b/src/fftw/fni_2.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:18 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 2 */
+
+/*
+ * This function contains 4 FP additions, 0 FP multiplications,
+ * (or, 4 additions, 0 multiplications, 0 fused multiply/add),
+ * 4 stack variables, and 8 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: fni_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_2(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp2;
+     fftw_real tmp3;
+     fftw_real tmp4;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp2 = c_re(input[istride]);
+     c_re(output[ostride]) = tmp1 - tmp2;
+     c_re(output[0]) = tmp1 + tmp2;
+     tmp3 = c_im(input[0]);
+     tmp4 = c_im(input[istride]);
+     c_im(output[ostride]) = tmp3 - tmp4;
+     c_im(output[0]) = tmp3 + tmp4;
+}
+
+fftw_codelet_desc fftwi_no_twiddle_2_desc = {
+     "fftwi_no_twiddle_2",
+     (void (*)()) fftwi_no_twiddle_2,
+     2,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     56,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_3.c b/src/fftw/fni_3.c
new file mode 100644
index 0000000..9db0abd
--- /dev/null
+++ b/src/fftw/fni_3.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:18 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 3 */
+
+/*
+ * This function contains 12 FP additions, 4 FP multiplications,
+ * (or, 10 additions, 2 multiplications, 2 fused multiply/add),
+ * 12 stack variables, and 12 memory accesses
+ */
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_3(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp6;
+     fftw_real tmp4;
+     fftw_real tmp5;
+     fftw_real tmp9;
+     fftw_real tmp12;
+     fftw_real tmp11;
+     fftw_real tmp10;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp6 = c_im(input[0]);
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp7;
+	  fftw_real tmp8;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[2 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = K866025403 * (tmp2 - tmp3);
+	  tmp7 = c_im(input[istride]);
+	  tmp8 = c_im(input[2 * istride]);
+	  tmp9 = tmp7 + tmp8;
+	  tmp12 = K866025403 * (tmp8 - tmp7);
+     }
+     c_re(output[0]) = tmp1 + tmp4;
+     tmp11 = tmp1 - (K500000000 * tmp4);
+     c_re(output[2 * ostride]) = tmp11 - tmp12;
+     c_re(output[ostride]) = tmp11 + tmp12;
+     c_im(output[0]) = tmp6 + tmp9;
+     tmp10 = tmp6 - (K500000000 * tmp9);
+     c_im(output[ostride]) = tmp5 + tmp10;
+     c_im(output[2 * ostride]) = tmp10 - tmp5;
+}
+
+fftw_codelet_desc fftwi_no_twiddle_3_desc = {
+     "fftwi_no_twiddle_3",
+     (void (*)()) fftwi_no_twiddle_3,
+     3,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     78,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_32.c b/src/fftw/fni_32.c
new file mode 100644
index 0000000..d748fd0
--- /dev/null
+++ b/src/fftw/fni_32.c
@@ -0,0 +1,1049 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:50 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 32 */
+
+/*
+ * This function contains 372 FP additions, 84 FP multiplications,
+ * (or, 340 additions, 52 multiplications, 32 fused multiply/add),
+ * 92 stack variables, and 128 memory accesses
+ */
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_32(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp7;
+     fftw_real tmp339;
+     fftw_real tmp70;
+     fftw_real tmp313;
+     fftw_real tmp97;
+     fftw_real tmp215;
+     fftw_real tmp179;
+     fftw_real tmp241;
+     fftw_real tmp14;
+     fftw_real tmp314;
+     fftw_real tmp77;
+     fftw_real tmp340;
+     fftw_real tmp182;
+     fftw_real tmp216;
+     fftw_real tmp104;
+     fftw_real tmp242;
+     fftw_real tmp153;
+     fftw_real tmp236;
+     fftw_real tmp53;
+     fftw_real tmp60;
+     fftw_real tmp287;
+     fftw_real tmp336;
+     fftw_real tmp360;
+     fftw_real tmp290;
+     fftw_real tmp293;
+     fftw_real tmp294;
+     fftw_real tmp170;
+     fftw_real tmp233;
+     fftw_real tmp333;
+     fftw_real tmp359;
+     fftw_real tmp164;
+     fftw_real tmp234;
+     fftw_real tmp173;
+     fftw_real tmp237;
+     fftw_real tmp22;
+     fftw_real tmp318;
+     fftw_real tmp343;
+     fftw_real tmp85;
+     fftw_real tmp112;
+     fftw_real tmp185;
+     fftw_real tmp220;
+     fftw_real tmp245;
+     fftw_real tmp29;
+     fftw_real tmp321;
+     fftw_real tmp342;
+     fftw_real tmp92;
+     fftw_real tmp119;
+     fftw_real tmp184;
+     fftw_real tmp223;
+     fftw_real tmp244;
+     fftw_real tmp126;
+     fftw_real tmp229;
+     fftw_real tmp38;
+     fftw_real tmp45;
+     fftw_real tmp278;
+     fftw_real tmp329;
+     fftw_real tmp357;
+     fftw_real tmp281;
+     fftw_real tmp284;
+     fftw_real tmp285;
+     fftw_real tmp143;
+     fftw_real tmp226;
+     fftw_real tmp326;
+     fftw_real tmp356;
+     fftw_real tmp137;
+     fftw_real tmp227;
+     fftw_real tmp146;
+     fftw_real tmp230;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp3;
+	  fftw_real tmp177;
+	  fftw_real tmp66;
+	  fftw_real tmp96;
+	  fftw_real tmp6;
+	  fftw_real tmp95;
+	  fftw_real tmp69;
+	  fftw_real tmp178;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp2;
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(input[0]);
+	       tmp2 = c_re(input[16 * istride]);
+	       tmp3 = tmp1 + tmp2;
+	       tmp177 = tmp1 - tmp2;
+	       tmp64 = c_im(input[0]);
+	       tmp65 = c_im(input[16 * istride]);
+	       tmp66 = tmp64 + tmp65;
+	       tmp96 = tmp64 - tmp65;
+	  }
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp67;
+	       fftw_real tmp68;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[8 * istride]);
+	       tmp5 = c_re(input[24 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp95 = tmp4 - tmp5;
+	       tmp67 = c_im(input[8 * istride]);
+	       tmp68 = c_im(input[24 * istride]);
+	       tmp69 = tmp67 + tmp68;
+	       tmp178 = tmp67 - tmp68;
+	  }
+	  tmp7 = tmp3 + tmp6;
+	  tmp339 = tmp3 - tmp6;
+	  tmp70 = tmp66 + tmp69;
+	  tmp313 = tmp66 - tmp69;
+	  tmp97 = tmp95 + tmp96;
+	  tmp215 = tmp96 - tmp95;
+	  tmp179 = tmp177 - tmp178;
+	  tmp241 = tmp177 + tmp178;
+     }
+     {
+	  fftw_real tmp10;
+	  fftw_real tmp98;
+	  fftw_real tmp73;
+	  fftw_real tmp99;
+	  fftw_real tmp13;
+	  fftw_real tmp102;
+	  fftw_real tmp76;
+	  fftw_real tmp101;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp71;
+	       fftw_real tmp72;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[4 * istride]);
+	       tmp9 = c_re(input[20 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp98 = tmp8 - tmp9;
+	       tmp71 = c_im(input[4 * istride]);
+	       tmp72 = c_im(input[20 * istride]);
+	       tmp73 = tmp71 + tmp72;
+	       tmp99 = tmp71 - tmp72;
+	  }
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp74;
+	       fftw_real tmp75;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[28 * istride]);
+	       tmp12 = c_re(input[12 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp102 = tmp11 - tmp12;
+	       tmp74 = c_im(input[28 * istride]);
+	       tmp75 = c_im(input[12 * istride]);
+	       tmp76 = tmp74 + tmp75;
+	       tmp101 = tmp74 - tmp75;
+	  }
+	  tmp14 = tmp10 + tmp13;
+	  tmp314 = tmp10 - tmp13;
+	  tmp77 = tmp73 + tmp76;
+	  tmp340 = tmp76 - tmp73;
+	  {
+	       fftw_real tmp180;
+	       fftw_real tmp181;
+	       fftw_real tmp100;
+	       fftw_real tmp103;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp180 = tmp98 - tmp99;
+	       tmp181 = tmp102 + tmp101;
+	       tmp182 = K707106781 * (tmp180 + tmp181);
+	       tmp216 = K707106781 * (tmp180 - tmp181);
+	       tmp100 = tmp98 + tmp99;
+	       tmp103 = tmp101 - tmp102;
+	       tmp104 = K707106781 * (tmp100 + tmp103);
+	       tmp242 = K707106781 * (tmp103 - tmp100);
+	  }
+     }
+     {
+	  fftw_real tmp49;
+	  fftw_real tmp149;
+	  fftw_real tmp169;
+	  fftw_real tmp288;
+	  fftw_real tmp52;
+	  fftw_real tmp166;
+	  fftw_real tmp152;
+	  fftw_real tmp289;
+	  fftw_real tmp56;
+	  fftw_real tmp154;
+	  fftw_real tmp157;
+	  fftw_real tmp291;
+	  fftw_real tmp59;
+	  fftw_real tmp159;
+	  fftw_real tmp162;
+	  fftw_real tmp292;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp48;
+	       fftw_real tmp167;
+	       fftw_real tmp168;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp47 = c_re(input[31 * istride]);
+	       tmp48 = c_re(input[15 * istride]);
+	       tmp49 = tmp47 + tmp48;
+	       tmp149 = tmp47 - tmp48;
+	       tmp167 = c_im(input[31 * istride]);
+	       tmp168 = c_im(input[15 * istride]);
+	       tmp169 = tmp167 - tmp168;
+	       tmp288 = tmp167 + tmp168;
+	  }
+	  {
+	       fftw_real tmp50;
+	       fftw_real tmp51;
+	       fftw_real tmp150;
+	       fftw_real tmp151;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp50 = c_re(input[7 * istride]);
+	       tmp51 = c_re(input[23 * istride]);
+	       tmp52 = tmp50 + tmp51;
+	       tmp166 = tmp50 - tmp51;
+	       tmp150 = c_im(input[7 * istride]);
+	       tmp151 = c_im(input[23 * istride]);
+	       tmp152 = tmp150 - tmp151;
+	       tmp289 = tmp150 + tmp151;
+	  }
+	  {
+	       fftw_real tmp54;
+	       fftw_real tmp55;
+	       fftw_real tmp155;
+	       fftw_real tmp156;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp54 = c_re(input[3 * istride]);
+	       tmp55 = c_re(input[19 * istride]);
+	       tmp56 = tmp54 + tmp55;
+	       tmp154 = tmp54 - tmp55;
+	       tmp155 = c_im(input[3 * istride]);
+	       tmp156 = c_im(input[19 * istride]);
+	       tmp157 = tmp155 - tmp156;
+	       tmp291 = tmp155 + tmp156;
+	  }
+	  {
+	       fftw_real tmp57;
+	       fftw_real tmp58;
+	       fftw_real tmp160;
+	       fftw_real tmp161;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp57 = c_re(input[27 * istride]);
+	       tmp58 = c_re(input[11 * istride]);
+	       tmp59 = tmp57 + tmp58;
+	       tmp159 = tmp57 - tmp58;
+	       tmp160 = c_im(input[27 * istride]);
+	       tmp161 = c_im(input[11 * istride]);
+	       tmp162 = tmp160 - tmp161;
+	       tmp292 = tmp160 + tmp161;
+	  }
+	  {
+	       fftw_real tmp334;
+	       fftw_real tmp335;
+	       fftw_real tmp331;
+	       fftw_real tmp332;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp153 = tmp149 - tmp152;
+	       tmp236 = tmp149 + tmp152;
+	       tmp53 = tmp49 + tmp52;
+	       tmp60 = tmp56 + tmp59;
+	       tmp287 = tmp53 - tmp60;
+	       tmp334 = tmp49 - tmp52;
+	       tmp335 = tmp292 - tmp291;
+	       tmp336 = tmp334 - tmp335;
+	       tmp360 = tmp334 + tmp335;
+	       tmp290 = tmp288 + tmp289;
+	       tmp293 = tmp291 + tmp292;
+	       tmp294 = tmp290 - tmp293;
+	       tmp170 = tmp166 + tmp169;
+	       tmp233 = tmp169 - tmp166;
+	       tmp331 = tmp288 - tmp289;
+	       tmp332 = tmp56 - tmp59;
+	       tmp333 = tmp331 - tmp332;
+	       tmp359 = tmp332 + tmp331;
+	       {
+		    fftw_real tmp158;
+		    fftw_real tmp163;
+		    fftw_real tmp171;
+		    fftw_real tmp172;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp158 = tmp154 - tmp157;
+		    tmp163 = tmp159 + tmp162;
+		    tmp164 = K707106781 * (tmp158 + tmp163);
+		    tmp234 = K707106781 * (tmp158 - tmp163);
+		    tmp171 = tmp154 + tmp157;
+		    tmp172 = tmp162 - tmp159;
+		    tmp173 = K707106781 * (tmp171 + tmp172);
+		    tmp237 = K707106781 * (tmp172 - tmp171);
+	       }
+	  }
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp106;
+	  fftw_real tmp81;
+	  fftw_real tmp110;
+	  fftw_real tmp21;
+	  fftw_real tmp109;
+	  fftw_real tmp84;
+	  fftw_real tmp107;
+	  fftw_real tmp316;
+	  fftw_real tmp317;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp79;
+	       fftw_real tmp80;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[2 * istride]);
+	       tmp17 = c_re(input[18 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp106 = tmp16 - tmp17;
+	       tmp79 = c_im(input[2 * istride]);
+	       tmp80 = c_im(input[18 * istride]);
+	       tmp81 = tmp79 + tmp80;
+	       tmp110 = tmp79 - tmp80;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       fftw_real tmp82;
+	       fftw_real tmp83;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = c_re(input[10 * istride]);
+	       tmp20 = c_re(input[26 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp109 = tmp19 - tmp20;
+	       tmp82 = c_im(input[10 * istride]);
+	       tmp83 = c_im(input[26 * istride]);
+	       tmp84 = tmp82 + tmp83;
+	       tmp107 = tmp82 - tmp83;
+	  }
+	  tmp22 = tmp18 + tmp21;
+	  tmp316 = tmp18 - tmp21;
+	  tmp317 = tmp81 - tmp84;
+	  tmp318 = tmp316 - tmp317;
+	  tmp343 = tmp316 + tmp317;
+	  tmp85 = tmp81 + tmp84;
+	  {
+	       fftw_real tmp108;
+	       fftw_real tmp111;
+	       fftw_real tmp218;
+	       fftw_real tmp219;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp108 = tmp106 - tmp107;
+	       tmp111 = tmp109 + tmp110;
+	       tmp112 = (K923879532 * tmp108) - (K382683432 * tmp111);
+	       tmp185 = (K923879532 * tmp111) + (K382683432 * tmp108);
+	       tmp218 = tmp106 + tmp107;
+	       tmp219 = tmp110 - tmp109;
+	       tmp220 = (K382683432 * tmp218) - (K923879532 * tmp219);
+	       tmp245 = (K382683432 * tmp219) + (K923879532 * tmp218);
+	  }
+     }
+     {
+	  fftw_real tmp25;
+	  fftw_real tmp116;
+	  fftw_real tmp88;
+	  fftw_real tmp114;
+	  fftw_real tmp28;
+	  fftw_real tmp113;
+	  fftw_real tmp91;
+	  fftw_real tmp117;
+	  fftw_real tmp319;
+	  fftw_real tmp320;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp86;
+	       fftw_real tmp87;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = c_re(input[30 * istride]);
+	       tmp24 = c_re(input[14 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp116 = tmp23 - tmp24;
+	       tmp86 = c_im(input[30 * istride]);
+	       tmp87 = c_im(input[14 * istride]);
+	       tmp88 = tmp86 + tmp87;
+	       tmp114 = tmp86 - tmp87;
+	  }
+	  {
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp89;
+	       fftw_real tmp90;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp26 = c_re(input[6 * istride]);
+	       tmp27 = c_re(input[22 * istride]);
+	       tmp28 = tmp26 + tmp27;
+	       tmp113 = tmp26 - tmp27;
+	       tmp89 = c_im(input[6 * istride]);
+	       tmp90 = c_im(input[22 * istride]);
+	       tmp91 = tmp89 + tmp90;
+	       tmp117 = tmp89 - tmp90;
+	  }
+	  tmp29 = tmp25 + tmp28;
+	  tmp319 = tmp25 - tmp28;
+	  tmp320 = tmp88 - tmp91;
+	  tmp321 = tmp319 + tmp320;
+	  tmp342 = tmp320 - tmp319;
+	  tmp92 = tmp88 + tmp91;
+	  {
+	       fftw_real tmp115;
+	       fftw_real tmp118;
+	       fftw_real tmp221;
+	       fftw_real tmp222;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp115 = tmp113 + tmp114;
+	       tmp118 = tmp116 - tmp117;
+	       tmp119 = (K382683432 * tmp115) + (K923879532 * tmp118);
+	       tmp184 = (K923879532 * tmp115) - (K382683432 * tmp118);
+	       tmp221 = tmp114 - tmp113;
+	       tmp222 = tmp116 + tmp117;
+	       tmp223 = (K923879532 * tmp221) + (K382683432 * tmp222);
+	       tmp244 = (K382683432 * tmp221) - (K923879532 * tmp222);
+	  }
+     }
+     {
+	  fftw_real tmp34;
+	  fftw_real tmp122;
+	  fftw_real tmp142;
+	  fftw_real tmp279;
+	  fftw_real tmp37;
+	  fftw_real tmp139;
+	  fftw_real tmp125;
+	  fftw_real tmp280;
+	  fftw_real tmp41;
+	  fftw_real tmp127;
+	  fftw_real tmp130;
+	  fftw_real tmp282;
+	  fftw_real tmp44;
+	  fftw_real tmp132;
+	  fftw_real tmp135;
+	  fftw_real tmp283;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       fftw_real tmp140;
+	       fftw_real tmp141;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp32 = c_re(input[istride]);
+	       tmp33 = c_re(input[17 * istride]);
+	       tmp34 = tmp32 + tmp33;
+	       tmp122 = tmp32 - tmp33;
+	       tmp140 = c_im(input[istride]);
+	       tmp141 = c_im(input[17 * istride]);
+	       tmp142 = tmp140 - tmp141;
+	       tmp279 = tmp140 + tmp141;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp36;
+	       fftw_real tmp123;
+	       fftw_real tmp124;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp35 = c_re(input[9 * istride]);
+	       tmp36 = c_re(input[25 * istride]);
+	       tmp37 = tmp35 + tmp36;
+	       tmp139 = tmp35 - tmp36;
+	       tmp123 = c_im(input[9 * istride]);
+	       tmp124 = c_im(input[25 * istride]);
+	       tmp125 = tmp123 - tmp124;
+	       tmp280 = tmp123 + tmp124;
+	  }
+	  {
+	       fftw_real tmp39;
+	       fftw_real tmp40;
+	       fftw_real tmp128;
+	       fftw_real tmp129;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp39 = c_re(input[5 * istride]);
+	       tmp40 = c_re(input[21 * istride]);
+	       tmp41 = tmp39 + tmp40;
+	       tmp127 = tmp39 - tmp40;
+	       tmp128 = c_im(input[5 * istride]);
+	       tmp129 = c_im(input[21 * istride]);
+	       tmp130 = tmp128 - tmp129;
+	       tmp282 = tmp128 + tmp129;
+	  }
+	  {
+	       fftw_real tmp42;
+	       fftw_real tmp43;
+	       fftw_real tmp133;
+	       fftw_real tmp134;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp42 = c_re(input[29 * istride]);
+	       tmp43 = c_re(input[13 * istride]);
+	       tmp44 = tmp42 + tmp43;
+	       tmp132 = tmp42 - tmp43;
+	       tmp133 = c_im(input[29 * istride]);
+	       tmp134 = c_im(input[13 * istride]);
+	       tmp135 = tmp133 - tmp134;
+	       tmp283 = tmp133 + tmp134;
+	  }
+	  {
+	       fftw_real tmp327;
+	       fftw_real tmp328;
+	       fftw_real tmp324;
+	       fftw_real tmp325;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp126 = tmp122 - tmp125;
+	       tmp229 = tmp122 + tmp125;
+	       tmp38 = tmp34 + tmp37;
+	       tmp45 = tmp41 + tmp44;
+	       tmp278 = tmp38 - tmp45;
+	       tmp327 = tmp34 - tmp37;
+	       tmp328 = tmp283 - tmp282;
+	       tmp329 = tmp327 - tmp328;
+	       tmp357 = tmp327 + tmp328;
+	       tmp281 = tmp279 + tmp280;
+	       tmp284 = tmp282 + tmp283;
+	       tmp285 = tmp281 - tmp284;
+	       tmp143 = tmp139 + tmp142;
+	       tmp226 = tmp142 - tmp139;
+	       tmp324 = tmp279 - tmp280;
+	       tmp325 = tmp41 - tmp44;
+	       tmp326 = tmp324 - tmp325;
+	       tmp356 = tmp325 + tmp324;
+	       {
+		    fftw_real tmp131;
+		    fftw_real tmp136;
+		    fftw_real tmp144;
+		    fftw_real tmp145;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp131 = tmp127 - tmp130;
+		    tmp136 = tmp132 + tmp135;
+		    tmp137 = K707106781 * (tmp131 + tmp136);
+		    tmp227 = K707106781 * (tmp131 - tmp136);
+		    tmp144 = tmp127 + tmp130;
+		    tmp145 = tmp135 - tmp132;
+		    tmp146 = K707106781 * (tmp144 + tmp145);
+		    tmp230 = K707106781 * (tmp145 - tmp144);
+	       }
+	  }
+     }
+     {
+	  fftw_real tmp277;
+	  fftw_real tmp301;
+	  fftw_real tmp304;
+	  fftw_real tmp306;
+	  fftw_real tmp296;
+	  fftw_real tmp300;
+	  fftw_real tmp299;
+	  fftw_real tmp305;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp275;
+	       fftw_real tmp276;
+	       fftw_real tmp302;
+	       fftw_real tmp303;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp275 = tmp70 - tmp77;
+	       tmp276 = tmp22 - tmp29;
+	       tmp277 = tmp275 - tmp276;
+	       tmp301 = tmp276 + tmp275;
+	       tmp302 = tmp278 + tmp285;
+	       tmp303 = tmp294 - tmp287;
+	       tmp304 = K707106781 * (tmp302 + tmp303);
+	       tmp306 = K707106781 * (tmp303 - tmp302);
+	  }
+	  {
+	       fftw_real tmp286;
+	       fftw_real tmp295;
+	       fftw_real tmp297;
+	       fftw_real tmp298;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp286 = tmp278 - tmp285;
+	       tmp295 = tmp287 + tmp294;
+	       tmp296 = K707106781 * (tmp286 - tmp295);
+	       tmp300 = K707106781 * (tmp286 + tmp295);
+	       tmp297 = tmp7 - tmp14;
+	       tmp298 = tmp92 - tmp85;
+	       tmp299 = tmp297 + tmp298;
+	       tmp305 = tmp297 - tmp298;
+	  }
+	  c_im(output[28 * ostride]) = tmp277 - tmp296;
+	  c_im(output[12 * ostride]) = tmp277 + tmp296;
+	  c_re(output[20 * ostride]) = tmp299 - tmp300;
+	  c_re(output[4 * ostride]) = tmp299 + tmp300;
+	  c_im(output[20 * ostride]) = tmp301 - tmp304;
+	  c_im(output[4 * ostride]) = tmp301 + tmp304;
+	  c_re(output[28 * ostride]) = tmp305 - tmp306;
+	  c_re(output[12 * ostride]) = tmp305 + tmp306;
+     }
+     {
+	  fftw_real tmp31;
+	  fftw_real tmp311;
+	  fftw_real tmp310;
+	  fftw_real tmp312;
+	  fftw_real tmp62;
+	  fftw_real tmp63;
+	  fftw_real tmp94;
+	  fftw_real tmp307;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp15;
+	       fftw_real tmp30;
+	       fftw_real tmp308;
+	       fftw_real tmp309;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp15 = tmp7 + tmp14;
+	       tmp30 = tmp22 + tmp29;
+	       tmp31 = tmp15 + tmp30;
+	       tmp311 = tmp15 - tmp30;
+	       tmp308 = tmp281 + tmp284;
+	       tmp309 = tmp290 + tmp293;
+	       tmp310 = tmp308 + tmp309;
+	       tmp312 = tmp309 - tmp308;
+	  }
+	  {
+	       fftw_real tmp46;
+	       fftw_real tmp61;
+	       fftw_real tmp78;
+	       fftw_real tmp93;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp46 = tmp38 + tmp45;
+	       tmp61 = tmp53 + tmp60;
+	       tmp62 = tmp46 + tmp61;
+	       tmp63 = tmp46 - tmp61;
+	       tmp78 = tmp70 + tmp77;
+	       tmp93 = tmp85 + tmp92;
+	       tmp94 = tmp78 - tmp93;
+	       tmp307 = tmp78 + tmp93;
+	  }
+	  c_re(output[16 * ostride]) = tmp31 - tmp62;
+	  c_re(output[0]) = tmp31 + tmp62;
+	  c_im(output[8 * ostride]) = tmp63 + tmp94;
+	  c_im(output[24 * ostride]) = tmp94 - tmp63;
+	  c_im(output[16 * ostride]) = tmp307 - tmp310;
+	  c_im(output[0]) = tmp307 + tmp310;
+	  c_re(output[24 * ostride]) = tmp311 - tmp312;
+	  c_re(output[8 * ostride]) = tmp311 + tmp312;
+     }
+     {
+	  fftw_real tmp121;
+	  fftw_real tmp189;
+	  fftw_real tmp187;
+	  fftw_real tmp193;
+	  fftw_real tmp148;
+	  fftw_real tmp190;
+	  fftw_real tmp175;
+	  fftw_real tmp191;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp105;
+	       fftw_real tmp120;
+	       fftw_real tmp183;
+	       fftw_real tmp186;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp105 = tmp97 - tmp104;
+	       tmp120 = tmp112 - tmp119;
+	       tmp121 = tmp105 - tmp120;
+	       tmp189 = tmp105 + tmp120;
+	       tmp183 = tmp179 - tmp182;
+	       tmp186 = tmp184 - tmp185;
+	       tmp187 = tmp183 + tmp186;
+	       tmp193 = tmp183 - tmp186;
+	  }
+	  {
+	       fftw_real tmp138;
+	       fftw_real tmp147;
+	       fftw_real tmp165;
+	       fftw_real tmp174;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp138 = tmp126 - tmp137;
+	       tmp147 = tmp143 - tmp146;
+	       tmp148 = (K555570233 * tmp138) - (K831469612 * tmp147);
+	       tmp190 = (K831469612 * tmp138) + (K555570233 * tmp147);
+	       tmp165 = tmp153 - tmp164;
+	       tmp174 = tmp170 - tmp173;
+	       tmp175 = (K555570233 * tmp165) + (K831469612 * tmp174);
+	       tmp191 = (K555570233 * tmp174) - (K831469612 * tmp165);
+	  }
+	  {
+	       fftw_real tmp176;
+	       fftw_real tmp188;
+	       fftw_real tmp192;
+	       fftw_real tmp194;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp176 = tmp148 - tmp175;
+	       c_im(output[29 * ostride]) = tmp121 - tmp176;
+	       c_im(output[13 * ostride]) = tmp121 + tmp176;
+	       tmp188 = tmp148 + tmp175;
+	       c_re(output[21 * ostride]) = tmp187 - tmp188;
+	       c_re(output[5 * ostride]) = tmp187 + tmp188;
+	       tmp192 = tmp190 + tmp191;
+	       c_im(output[21 * ostride]) = tmp189 - tmp192;
+	       c_im(output[5 * ostride]) = tmp189 + tmp192;
+	       tmp194 = tmp191 - tmp190;
+	       c_re(output[29 * ostride]) = tmp193 - tmp194;
+	       c_re(output[13 * ostride]) = tmp193 + tmp194;
+	  }
+     }
+     {
+	  fftw_real tmp197;
+	  fftw_real tmp209;
+	  fftw_real tmp207;
+	  fftw_real tmp213;
+	  fftw_real tmp200;
+	  fftw_real tmp210;
+	  fftw_real tmp203;
+	  fftw_real tmp211;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp195;
+	       fftw_real tmp196;
+	       fftw_real tmp205;
+	       fftw_real tmp206;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp195 = tmp97 + tmp104;
+	       tmp196 = tmp185 + tmp184;
+	       tmp197 = tmp195 - tmp196;
+	       tmp209 = tmp195 + tmp196;
+	       tmp205 = tmp179 + tmp182;
+	       tmp206 = tmp112 + tmp119;
+	       tmp207 = tmp205 + tmp206;
+	       tmp213 = tmp205 - tmp206;
+	  }
+	  {
+	       fftw_real tmp198;
+	       fftw_real tmp199;
+	       fftw_real tmp201;
+	       fftw_real tmp202;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp198 = tmp126 + tmp137;
+	       tmp199 = tmp143 + tmp146;
+	       tmp200 = (K980785280 * tmp198) - (K195090322 * tmp199);
+	       tmp210 = (K195090322 * tmp198) + (K980785280 * tmp199);
+	       tmp201 = tmp153 + tmp164;
+	       tmp202 = tmp170 + tmp173;
+	       tmp203 = (K980785280 * tmp201) + (K195090322 * tmp202);
+	       tmp211 = (K980785280 * tmp202) - (K195090322 * tmp201);
+	  }
+	  {
+	       fftw_real tmp204;
+	       fftw_real tmp208;
+	       fftw_real tmp212;
+	       fftw_real tmp214;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp204 = tmp200 - tmp203;
+	       c_im(output[25 * ostride]) = tmp197 - tmp204;
+	       c_im(output[9 * ostride]) = tmp197 + tmp204;
+	       tmp208 = tmp200 + tmp203;
+	       c_re(output[17 * ostride]) = tmp207 - tmp208;
+	       c_re(output[ostride]) = tmp207 + tmp208;
+	       tmp212 = tmp210 + tmp211;
+	       c_im(output[17 * ostride]) = tmp209 - tmp212;
+	       c_im(output[ostride]) = tmp209 + tmp212;
+	       tmp214 = tmp211 - tmp210;
+	       c_re(output[25 * ostride]) = tmp213 - tmp214;
+	       c_re(output[9 * ostride]) = tmp213 + tmp214;
+	  }
+     }
+     {
+	  fftw_real tmp323;
+	  fftw_real tmp347;
+	  fftw_real tmp350;
+	  fftw_real tmp352;
+	  fftw_real tmp338;
+	  fftw_real tmp346;
+	  fftw_real tmp345;
+	  fftw_real tmp351;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp315;
+	       fftw_real tmp322;
+	       fftw_real tmp348;
+	       fftw_real tmp349;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp315 = tmp313 - tmp314;
+	       tmp322 = K707106781 * (tmp318 - tmp321);
+	       tmp323 = tmp315 + tmp322;
+	       tmp347 = tmp315 - tmp322;
+	       tmp348 = (K382683432 * tmp329) - (K923879532 * tmp326);
+	       tmp349 = (K923879532 * tmp333) + (K382683432 * tmp336);
+	       tmp350 = tmp348 - tmp349;
+	       tmp352 = tmp348 + tmp349;
+	  }
+	  {
+	       fftw_real tmp330;
+	       fftw_real tmp337;
+	       fftw_real tmp341;
+	       fftw_real tmp344;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp330 = (K382683432 * tmp326) + (K923879532 * tmp329);
+	       tmp337 = (K382683432 * tmp333) - (K923879532 * tmp336);
+	       tmp338 = tmp330 + tmp337;
+	       tmp346 = tmp337 - tmp330;
+	       tmp341 = tmp339 - tmp340;
+	       tmp344 = K707106781 * (tmp342 - tmp343);
+	       tmp345 = tmp341 - tmp344;
+	       tmp351 = tmp341 + tmp344;
+	  }
+	  c_im(output[22 * ostride]) = tmp323 - tmp338;
+	  c_im(output[6 * ostride]) = tmp323 + tmp338;
+	  c_re(output[30 * ostride]) = tmp345 - tmp346;
+	  c_re(output[14 * ostride]) = tmp345 + tmp346;
+	  c_im(output[30 * ostride]) = tmp347 - tmp350;
+	  c_im(output[14 * ostride]) = tmp347 + tmp350;
+	  c_re(output[22 * ostride]) = tmp351 - tmp352;
+	  c_re(output[6 * ostride]) = tmp351 + tmp352;
+     }
+     {
+	  fftw_real tmp355;
+	  fftw_real tmp367;
+	  fftw_real tmp370;
+	  fftw_real tmp372;
+	  fftw_real tmp362;
+	  fftw_real tmp366;
+	  fftw_real tmp365;
+	  fftw_real tmp371;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp353;
+	       fftw_real tmp354;
+	       fftw_real tmp368;
+	       fftw_real tmp369;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp353 = tmp314 + tmp313;
+	       tmp354 = K707106781 * (tmp343 + tmp342);
+	       tmp355 = tmp353 + tmp354;
+	       tmp367 = tmp353 - tmp354;
+	       tmp368 = (K923879532 * tmp357) - (K382683432 * tmp356);
+	       tmp369 = (K382683432 * tmp359) + (K923879532 * tmp360);
+	       tmp370 = tmp368 - tmp369;
+	       tmp372 = tmp368 + tmp369;
+	  }
+	  {
+	       fftw_real tmp358;
+	       fftw_real tmp361;
+	       fftw_real tmp363;
+	       fftw_real tmp364;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp358 = (K923879532 * tmp356) + (K382683432 * tmp357);
+	       tmp361 = (K923879532 * tmp359) - (K382683432 * tmp360);
+	       tmp362 = tmp358 + tmp361;
+	       tmp366 = tmp361 - tmp358;
+	       tmp363 = tmp339 + tmp340;
+	       tmp364 = K707106781 * (tmp318 + tmp321);
+	       tmp365 = tmp363 - tmp364;
+	       tmp371 = tmp363 + tmp364;
+	  }
+	  c_im(output[18 * ostride]) = tmp355 - tmp362;
+	  c_im(output[2 * ostride]) = tmp355 + tmp362;
+	  c_re(output[26 * ostride]) = tmp365 - tmp366;
+	  c_re(output[10 * ostride]) = tmp365 + tmp366;
+	  c_im(output[26 * ostride]) = tmp367 - tmp370;
+	  c_im(output[10 * ostride]) = tmp367 + tmp370;
+	  c_re(output[18 * ostride]) = tmp371 - tmp372;
+	  c_re(output[2 * ostride]) = tmp371 + tmp372;
+     }
+     {
+	  fftw_real tmp225;
+	  fftw_real tmp249;
+	  fftw_real tmp247;
+	  fftw_real tmp253;
+	  fftw_real tmp232;
+	  fftw_real tmp250;
+	  fftw_real tmp239;
+	  fftw_real tmp251;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp217;
+	       fftw_real tmp224;
+	       fftw_real tmp243;
+	       fftw_real tmp246;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp217 = tmp215 - tmp216;
+	       tmp224 = tmp220 - tmp223;
+	       tmp225 = tmp217 + tmp224;
+	       tmp249 = tmp217 - tmp224;
+	       tmp243 = tmp241 - tmp242;
+	       tmp246 = tmp244 - tmp245;
+	       tmp247 = tmp243 - tmp246;
+	       tmp253 = tmp243 + tmp246;
+	  }
+	  {
+	       fftw_real tmp228;
+	       fftw_real tmp231;
+	       fftw_real tmp235;
+	       fftw_real tmp238;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp228 = tmp226 - tmp227;
+	       tmp231 = tmp229 - tmp230;
+	       tmp232 = (K195090322 * tmp228) + (K980785280 * tmp231);
+	       tmp250 = (K195090322 * tmp231) - (K980785280 * tmp228);
+	       tmp235 = tmp233 - tmp234;
+	       tmp238 = tmp236 - tmp237;
+	       tmp239 = (K195090322 * tmp235) - (K980785280 * tmp238);
+	       tmp251 = (K980785280 * tmp235) + (K195090322 * tmp238);
+	  }
+	  {
+	       fftw_real tmp240;
+	       fftw_real tmp248;
+	       fftw_real tmp252;
+	       fftw_real tmp254;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp240 = tmp232 + tmp239;
+	       c_im(output[23 * ostride]) = tmp225 - tmp240;
+	       c_im(output[7 * ostride]) = tmp225 + tmp240;
+	       tmp248 = tmp239 - tmp232;
+	       c_re(output[31 * ostride]) = tmp247 - tmp248;
+	       c_re(output[15 * ostride]) = tmp247 + tmp248;
+	       tmp252 = tmp250 - tmp251;
+	       c_im(output[31 * ostride]) = tmp249 - tmp252;
+	       c_im(output[15 * ostride]) = tmp249 + tmp252;
+	       tmp254 = tmp250 + tmp251;
+	       c_re(output[23 * ostride]) = tmp253 - tmp254;
+	       c_re(output[7 * ostride]) = tmp253 + tmp254;
+	  }
+     }
+     {
+	  fftw_real tmp257;
+	  fftw_real tmp269;
+	  fftw_real tmp267;
+	  fftw_real tmp273;
+	  fftw_real tmp260;
+	  fftw_real tmp270;
+	  fftw_real tmp263;
+	  fftw_real tmp271;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp255;
+	       fftw_real tmp256;
+	       fftw_real tmp265;
+	       fftw_real tmp266;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp255 = tmp215 + tmp216;
+	       tmp256 = tmp245 + tmp244;
+	       tmp257 = tmp255 + tmp256;
+	       tmp269 = tmp255 - tmp256;
+	       tmp265 = tmp241 + tmp242;
+	       tmp266 = tmp220 + tmp223;
+	       tmp267 = tmp265 - tmp266;
+	       tmp273 = tmp265 + tmp266;
+	  }
+	  {
+	       fftw_real tmp258;
+	       fftw_real tmp259;
+	       fftw_real tmp261;
+	       fftw_real tmp262;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp258 = tmp226 + tmp227;
+	       tmp259 = tmp229 + tmp230;
+	       tmp260 = (K831469612 * tmp258) + (K555570233 * tmp259);
+	       tmp270 = (K831469612 * tmp259) - (K555570233 * tmp258);
+	       tmp261 = tmp233 + tmp234;
+	       tmp262 = tmp236 + tmp237;
+	       tmp263 = (K831469612 * tmp261) - (K555570233 * tmp262);
+	       tmp271 = (K555570233 * tmp261) + (K831469612 * tmp262);
+	  }
+	  {
+	       fftw_real tmp264;
+	       fftw_real tmp268;
+	       fftw_real tmp272;
+	       fftw_real tmp274;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp264 = tmp260 + tmp263;
+	       c_im(output[19 * ostride]) = tmp257 - tmp264;
+	       c_im(output[3 * ostride]) = tmp257 + tmp264;
+	       tmp268 = tmp263 - tmp260;
+	       c_re(output[27 * ostride]) = tmp267 - tmp268;
+	       c_re(output[11 * ostride]) = tmp267 + tmp268;
+	       tmp272 = tmp270 - tmp271;
+	       c_im(output[27 * ostride]) = tmp269 - tmp272;
+	       c_im(output[11 * ostride]) = tmp269 + tmp272;
+	       tmp274 = tmp270 + tmp271;
+	       c_re(output[19 * ostride]) = tmp273 - tmp274;
+	       c_re(output[3 * ostride]) = tmp273 + tmp274;
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_32_desc = {
+     "fftwi_no_twiddle_32",
+     (void (*)()) fftwi_no_twiddle_32,
+     32,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     716,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_4.c b/src/fftw/fni_4.c
new file mode 100644
index 0000000..f20547e
--- /dev/null
+++ b/src/fftw/fni_4.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:18 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 4 */
+
+/*
+ * This function contains 16 FP additions, 0 FP multiplications,
+ * (or, 16 additions, 0 multiplications, 0 fused multiply/add),
+ * 12 stack variables, and 16 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: fni_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_4(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp11;
+     fftw_real tmp10;
+     fftw_real tmp15;
+     fftw_real tmp6;
+     fftw_real tmp7;
+     fftw_real tmp14;
+     fftw_real tmp16;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[2 * istride]);
+	  tmp3 = tmp1 + tmp2;
+	  tmp11 = tmp1 - tmp2;
+	  tmp8 = c_im(input[0]);
+	  tmp9 = c_im(input[2 * istride]);
+	  tmp10 = tmp8 - tmp9;
+	  tmp15 = tmp8 + tmp9;
+     }
+     {
+	  fftw_real tmp4;
+	  fftw_real tmp5;
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp4 = c_re(input[istride]);
+	  tmp5 = c_re(input[3 * istride]);
+	  tmp6 = tmp4 + tmp5;
+	  tmp7 = tmp4 - tmp5;
+	  tmp12 = c_im(input[istride]);
+	  tmp13 = c_im(input[3 * istride]);
+	  tmp14 = tmp12 - tmp13;
+	  tmp16 = tmp12 + tmp13;
+     }
+     c_re(output[2 * ostride]) = tmp3 - tmp6;
+     c_re(output[0]) = tmp3 + tmp6;
+     c_im(output[ostride]) = tmp7 + tmp10;
+     c_im(output[3 * ostride]) = tmp10 - tmp7;
+     c_re(output[ostride]) = tmp11 - tmp14;
+     c_re(output[3 * ostride]) = tmp11 + tmp14;
+     c_im(output[2 * ostride]) = tmp15 - tmp16;
+     c_im(output[0]) = tmp15 + tmp16;
+}
+
+fftw_codelet_desc fftwi_no_twiddle_4_desc = {
+     "fftwi_no_twiddle_4",
+     (void (*)()) fftwi_no_twiddle_4,
+     4,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     100,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_5.c b/src/fftw/fni_5.c
new file mode 100644
index 0000000..6da791f
--- /dev/null
+++ b/src/fftw/fni_5.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:18 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 5 */
+
+/*
+ * This function contains 32 FP additions, 12 FP multiplications,
+ * (or, 26 additions, 6 multiplications, 6 fused multiply/add),
+ * 16 stack variables, and 20 memory accesses
+ */
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_5(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp27;
+     fftw_real tmp8;
+     fftw_real tmp10;
+     fftw_real tmp21;
+     fftw_real tmp22;
+     fftw_real tmp14;
+     fftw_real tmp28;
+     fftw_real tmp26;
+     fftw_real tmp17;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp27 = c_im(input[0]);
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp4;
+	  fftw_real tmp5;
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[4 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = c_re(input[2 * istride]);
+	  tmp6 = c_re(input[3 * istride]);
+	  tmp7 = tmp5 + tmp6;
+	  tmp8 = tmp4 + tmp7;
+	  tmp10 = K559016994 * (tmp4 - tmp7);
+	  tmp21 = tmp2 - tmp3;
+	  tmp22 = tmp5 - tmp6;
+     }
+     {
+	  fftw_real tmp12;
+	  fftw_real tmp13;
+	  fftw_real tmp24;
+	  fftw_real tmp15;
+	  fftw_real tmp16;
+	  fftw_real tmp25;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp12 = c_im(input[istride]);
+	  tmp13 = c_im(input[4 * istride]);
+	  tmp24 = tmp12 + tmp13;
+	  tmp15 = c_im(input[2 * istride]);
+	  tmp16 = c_im(input[3 * istride]);
+	  tmp25 = tmp15 + tmp16;
+	  tmp14 = tmp12 - tmp13;
+	  tmp28 = tmp24 + tmp25;
+	  tmp26 = K559016994 * (tmp24 - tmp25);
+	  tmp17 = tmp15 - tmp16;
+     }
+     c_re(output[0]) = tmp1 + tmp8;
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp20;
+	  fftw_real tmp11;
+	  fftw_real tmp19;
+	  fftw_real tmp9;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp18 = (K587785252 * tmp14) - (K951056516 * tmp17);
+	  tmp20 = (K951056516 * tmp14) + (K587785252 * tmp17);
+	  tmp9 = tmp1 - (K250000000 * tmp8);
+	  tmp11 = tmp9 - tmp10;
+	  tmp19 = tmp10 + tmp9;
+	  c_re(output[2 * ostride]) = tmp11 - tmp18;
+	  c_re(output[3 * ostride]) = tmp11 + tmp18;
+	  c_re(output[ostride]) = tmp19 - tmp20;
+	  c_re(output[4 * ostride]) = tmp19 + tmp20;
+     }
+     c_im(output[0]) = tmp28 + tmp27;
+     {
+	  fftw_real tmp23;
+	  fftw_real tmp31;
+	  fftw_real tmp30;
+	  fftw_real tmp32;
+	  fftw_real tmp29;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp23 = (K951056516 * tmp21) + (K587785252 * tmp22);
+	  tmp31 = (K587785252 * tmp21) - (K951056516 * tmp22);
+	  tmp29 = tmp27 - (K250000000 * tmp28);
+	  tmp30 = tmp26 + tmp29;
+	  tmp32 = tmp29 - tmp26;
+	  c_im(output[ostride]) = tmp23 + tmp30;
+	  c_im(output[4 * ostride]) = tmp30 - tmp23;
+	  c_im(output[2 * ostride]) = tmp31 + tmp32;
+	  c_im(output[3 * ostride]) = tmp32 - tmp31;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_5_desc = {
+     "fftwi_no_twiddle_5",
+     (void (*)()) fftwi_no_twiddle_5,
+     5,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     122,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_6.c b/src/fftw/fni_6.c
new file mode 100644
index 0000000..3454303
--- /dev/null
+++ b/src/fftw/fni_6.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:19 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 6 */
+
+/*
+ * This function contains 36 FP additions, 8 FP multiplications,
+ * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
+ * 20 stack variables, and 24 memory accesses
+ */
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_6(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp11;
+     fftw_real tmp26;
+     fftw_real tmp29;
+     fftw_real tmp6;
+     fftw_real tmp12;
+     fftw_real tmp9;
+     fftw_real tmp13;
+     fftw_real tmp10;
+     fftw_real tmp14;
+     fftw_real tmp18;
+     fftw_real tmp31;
+     fftw_real tmp21;
+     fftw_real tmp30;
+     fftw_real tmp27;
+     fftw_real tmp32;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp24;
+	  fftw_real tmp25;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[3 * istride]);
+	  tmp3 = tmp1 - tmp2;
+	  tmp11 = tmp1 + tmp2;
+	  tmp24 = c_im(input[0]);
+	  tmp25 = c_im(input[3 * istride]);
+	  tmp26 = tmp24 - tmp25;
+	  tmp29 = tmp24 + tmp25;
+     }
+     {
+	  fftw_real tmp4;
+	  fftw_real tmp5;
+	  fftw_real tmp7;
+	  fftw_real tmp8;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp4 = c_re(input[2 * istride]);
+	  tmp5 = c_re(input[5 * istride]);
+	  tmp6 = tmp4 - tmp5;
+	  tmp12 = tmp4 + tmp5;
+	  tmp7 = c_re(input[4 * istride]);
+	  tmp8 = c_re(input[istride]);
+	  tmp9 = tmp7 - tmp8;
+	  tmp13 = tmp7 + tmp8;
+     }
+     tmp10 = tmp6 + tmp9;
+     tmp14 = tmp12 + tmp13;
+     {
+	  fftw_real tmp16;
+	  fftw_real tmp17;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp16 = c_im(input[4 * istride]);
+	  tmp17 = c_im(input[istride]);
+	  tmp18 = tmp16 - tmp17;
+	  tmp31 = tmp16 + tmp17;
+	  tmp19 = c_im(input[2 * istride]);
+	  tmp20 = c_im(input[5 * istride]);
+	  tmp21 = tmp19 - tmp20;
+	  tmp30 = tmp19 + tmp20;
+     }
+     tmp27 = tmp21 + tmp18;
+     tmp32 = tmp30 + tmp31;
+     {
+	  fftw_real tmp15;
+	  fftw_real tmp22;
+	  fftw_real tmp35;
+	  fftw_real tmp36;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[3 * ostride]) = tmp3 + tmp10;
+	  tmp15 = tmp3 - (K500000000 * tmp10);
+	  tmp22 = K866025403 * (tmp18 - tmp21);
+	  c_re(output[5 * ostride]) = tmp15 - tmp22;
+	  c_re(output[ostride]) = tmp15 + tmp22;
+	  c_re(output[0]) = tmp11 + tmp14;
+	  tmp35 = tmp11 - (K500000000 * tmp14);
+	  tmp36 = K866025403 * (tmp31 - tmp30);
+	  c_re(output[2 * ostride]) = tmp35 - tmp36;
+	  c_re(output[4 * ostride]) = tmp35 + tmp36;
+     }
+     {
+	  fftw_real tmp23;
+	  fftw_real tmp28;
+	  fftw_real tmp33;
+	  fftw_real tmp34;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_im(output[3 * ostride]) = tmp26 + tmp27;
+	  tmp23 = K866025403 * (tmp6 - tmp9);
+	  tmp28 = tmp26 - (K500000000 * tmp27);
+	  c_im(output[ostride]) = tmp23 + tmp28;
+	  c_im(output[5 * ostride]) = tmp28 - tmp23;
+	  c_im(output[0]) = tmp29 + tmp32;
+	  tmp33 = tmp29 - (K500000000 * tmp32);
+	  tmp34 = K866025403 * (tmp12 - tmp13);
+	  c_im(output[2 * ostride]) = tmp33 - tmp34;
+	  c_im(output[4 * ostride]) = tmp34 + tmp33;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_6_desc = {
+     "fftwi_no_twiddle_6",
+     (void (*)()) fftwi_no_twiddle_6,
+     6,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     144,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_64.c b/src/fftw/fni_64.c
new file mode 100644
index 0000000..10e8cf1
--- /dev/null
+++ b/src/fftw/fni_64.c
@@ -0,0 +1,2464 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:51 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 64 */
+
+/*
+ * This function contains 912 FP additions, 248 FP multiplications,
+ * (or, 808 additions, 144 multiplications, 104 fused multiply/add),
+ * 156 stack variables, and 256 memory accesses
+ */
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K773010453 =
+FFTW_KONST(+0.773010453362736960810906609758469800971041293);
+static const fftw_real K634393284 =
+FFTW_KONST(+0.634393284163645498215171613225493370675687095);
+static const fftw_real K098017140 =
+FFTW_KONST(+0.098017140329560601994195563888641845861136673);
+static const fftw_real K995184726 =
+FFTW_KONST(+0.995184726672196886244836953109479921575474869);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K956940335 =
+FFTW_KONST(+0.956940335732208864935797886980269969482849206);
+static const fftw_real K290284677 =
+FFTW_KONST(+0.290284677254462367636192375817395274691476278);
+static const fftw_real K471396736 =
+FFTW_KONST(+0.471396736825997648556387625905254377657460319);
+static const fftw_real K881921264 =
+FFTW_KONST(+0.881921264348355029712756863660388349508442621);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_64(const fftw_complex *input, fftw_complex *output,
+			 int istride, int ostride)
+{
+     fftw_real tmp193;
+     fftw_real tmp471;
+     fftw_real tmp15;
+     fftw_real tmp879;
+     fftw_real tmp719;
+     fftw_real tmp781;
+     fftw_real tmp142;
+     fftw_real tmp853;
+     fftw_real tmp371;
+     fftw_real tmp537;
+     fftw_real tmp637;
+     fftw_real tmp755;
+     fftw_real tmp200;
+     fftw_real tmp538;
+     fftw_real tmp374;
+     fftw_real tmp472;
+     fftw_real tmp109;
+     fftw_real tmp874;
+     fftw_real tmp693;
+     fftw_real tmp776;
+     fftw_real tmp830;
+     fftw_real tmp871;
+     fftw_real tmp710;
+     fftw_real tmp773;
+     fftw_real tmp329;
+     fftw_real tmp432;
+     fftw_real tmp519;
+     fftw_real tmp596;
+     fftw_real tmp362;
+     fftw_real tmp429;
+     fftw_real tmp530;
+     fftw_real tmp593;
+     fftw_real tmp30;
+     fftw_real tmp854;
+     fftw_real tmp640;
+     fftw_real tmp720;
+     fftw_real tmp157;
+     fftw_real tmp880;
+     fftw_real tmp643;
+     fftw_real tmp721;
+     fftw_real tmp208;
+     fftw_real tmp377;
+     fftw_real tmp476;
+     fftw_real tmp541;
+     fftw_real tmp215;
+     fftw_real tmp376;
+     fftw_real tmp479;
+     fftw_real tmp540;
+     fftw_real tmp124;
+     fftw_real tmp872;
+     fftw_real tmp365;
+     fftw_real tmp433;
+     fftw_real tmp352;
+     fftw_real tmp430;
+     fftw_real tmp833;
+     fftw_real tmp875;
+     fftw_real tmp526;
+     fftw_real tmp594;
+     fftw_real tmp533;
+     fftw_real tmp597;
+     fftw_real tmp704;
+     fftw_real tmp774;
+     fftw_real tmp713;
+     fftw_real tmp777;
+     fftw_real tmp46;
+     fftw_real tmp856;
+     fftw_real tmp648;
+     fftw_real tmp758;
+     fftw_real tmp173;
+     fftw_real tmp857;
+     fftw_real tmp651;
+     fftw_real tmp759;
+     fftw_real tmp228;
+     fftw_real tmp414;
+     fftw_real tmp484;
+     fftw_real tmp578;
+     fftw_real tmp235;
+     fftw_real tmp415;
+     fftw_real tmp487;
+     fftw_real tmp579;
+     fftw_real tmp78;
+     fftw_real tmp867;
+     fftw_real tmp666;
+     fftw_real tmp769;
+     fftw_real tmp821;
+     fftw_real tmp864;
+     fftw_real tmp683;
+     fftw_real tmp766;
+     fftw_real tmp274;
+     fftw_real tmp425;
+     fftw_real tmp500;
+     fftw_real tmp589;
+     fftw_real tmp307;
+     fftw_real tmp422;
+     fftw_real tmp511;
+     fftw_real tmp586;
+     fftw_real tmp61;
+     fftw_real tmp859;
+     fftw_real tmp655;
+     fftw_real tmp761;
+     fftw_real tmp188;
+     fftw_real tmp860;
+     fftw_real tmp658;
+     fftw_real tmp762;
+     fftw_real tmp247;
+     fftw_real tmp417;
+     fftw_real tmp491;
+     fftw_real tmp581;
+     fftw_real tmp254;
+     fftw_real tmp418;
+     fftw_real tmp494;
+     fftw_real tmp582;
+     fftw_real tmp93;
+     fftw_real tmp865;
+     fftw_real tmp310;
+     fftw_real tmp426;
+     fftw_real tmp297;
+     fftw_real tmp423;
+     fftw_real tmp824;
+     fftw_real tmp868;
+     fftw_real tmp507;
+     fftw_real tmp587;
+     fftw_real tmp514;
+     fftw_real tmp590;
+     fftw_real tmp677;
+     fftw_real tmp767;
+     fftw_real tmp686;
+     fftw_real tmp770;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp3;
+	  fftw_real tmp369;
+	  fftw_real tmp130;
+	  fftw_real tmp192;
+	  fftw_real tmp6;
+	  fftw_real tmp191;
+	  fftw_real tmp133;
+	  fftw_real tmp370;
+	  fftw_real tmp10;
+	  fftw_real tmp194;
+	  fftw_real tmp137;
+	  fftw_real tmp195;
+	  fftw_real tmp13;
+	  fftw_real tmp198;
+	  fftw_real tmp140;
+	  fftw_real tmp197;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp2;
+	       fftw_real tmp128;
+	       fftw_real tmp129;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(input[0]);
+	       tmp2 = c_re(input[32 * istride]);
+	       tmp3 = tmp1 + tmp2;
+	       tmp369 = tmp1 - tmp2;
+	       tmp128 = c_im(input[0]);
+	       tmp129 = c_im(input[32 * istride]);
+	       tmp130 = tmp128 + tmp129;
+	       tmp192 = tmp128 - tmp129;
+	  }
+	  {
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       fftw_real tmp131;
+	       fftw_real tmp132;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp4 = c_re(input[16 * istride]);
+	       tmp5 = c_re(input[48 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp191 = tmp4 - tmp5;
+	       tmp131 = c_im(input[16 * istride]);
+	       tmp132 = c_im(input[48 * istride]);
+	       tmp133 = tmp131 + tmp132;
+	       tmp370 = tmp131 - tmp132;
+	  }
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp135;
+	       fftw_real tmp136;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[8 * istride]);
+	       tmp9 = c_re(input[40 * istride]);
+	       tmp10 = tmp8 + tmp9;
+	       tmp194 = tmp8 - tmp9;
+	       tmp135 = c_im(input[8 * istride]);
+	       tmp136 = c_im(input[40 * istride]);
+	       tmp137 = tmp135 + tmp136;
+	       tmp195 = tmp135 - tmp136;
+	  }
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp138;
+	       fftw_real tmp139;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[56 * istride]);
+	       tmp12 = c_re(input[24 * istride]);
+	       tmp13 = tmp11 + tmp12;
+	       tmp198 = tmp11 - tmp12;
+	       tmp138 = c_im(input[56 * istride]);
+	       tmp139 = c_im(input[24 * istride]);
+	       tmp140 = tmp138 + tmp139;
+	       tmp197 = tmp138 - tmp139;
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp14;
+	       fftw_real tmp635;
+	       fftw_real tmp636;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp193 = tmp191 + tmp192;
+	       tmp471 = tmp192 - tmp191;
+	       tmp7 = tmp3 + tmp6;
+	       tmp14 = tmp10 + tmp13;
+	       tmp15 = tmp7 + tmp14;
+	       tmp879 = tmp7 - tmp14;
+	       {
+		    fftw_real tmp717;
+		    fftw_real tmp718;
+		    fftw_real tmp134;
+		    fftw_real tmp141;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp717 = tmp3 - tmp6;
+		    tmp718 = tmp140 - tmp137;
+		    tmp719 = tmp717 + tmp718;
+		    tmp781 = tmp717 - tmp718;
+		    tmp134 = tmp130 + tmp133;
+		    tmp141 = tmp137 + tmp140;
+		    tmp142 = tmp134 + tmp141;
+		    tmp853 = tmp134 - tmp141;
+	       }
+	       tmp371 = tmp369 - tmp370;
+	       tmp537 = tmp369 + tmp370;
+	       tmp635 = tmp10 - tmp13;
+	       tmp636 = tmp130 - tmp133;
+	       tmp637 = tmp635 + tmp636;
+	       tmp755 = tmp636 - tmp635;
+	       {
+		    fftw_real tmp196;
+		    fftw_real tmp199;
+		    fftw_real tmp372;
+		    fftw_real tmp373;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp196 = tmp194 + tmp195;
+		    tmp199 = tmp197 - tmp198;
+		    tmp200 = K707106781 * (tmp196 + tmp199);
+		    tmp538 = K707106781 * (tmp199 - tmp196);
+		    tmp372 = tmp194 - tmp195;
+		    tmp373 = tmp198 + tmp197;
+		    tmp374 = K707106781 * (tmp372 + tmp373);
+		    tmp472 = K707106781 * (tmp372 - tmp373);
+	       }
+	  }
+     }
+     {
+	  fftw_real tmp97;
+	  fftw_real tmp313;
+	  fftw_real tmp357;
+	  fftw_real tmp707;
+	  fftw_real tmp100;
+	  fftw_real tmp354;
+	  fftw_real tmp316;
+	  fftw_real tmp708;
+	  fftw_real tmp107;
+	  fftw_real tmp690;
+	  fftw_real tmp327;
+	  fftw_real tmp360;
+	  fftw_real tmp104;
+	  fftw_real tmp691;
+	  fftw_real tmp322;
+	  fftw_real tmp359;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp95;
+	       fftw_real tmp96;
+	       fftw_real tmp314;
+	       fftw_real tmp315;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp95 = c_re(input[63 * istride]);
+	       tmp96 = c_re(input[31 * istride]);
+	       tmp97 = tmp95 + tmp96;
+	       tmp313 = tmp95 - tmp96;
+	       {
+		    fftw_real tmp355;
+		    fftw_real tmp356;
+		    fftw_real tmp98;
+		    fftw_real tmp99;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp355 = c_im(input[63 * istride]);
+		    tmp356 = c_im(input[31 * istride]);
+		    tmp357 = tmp355 - tmp356;
+		    tmp707 = tmp355 + tmp356;
+		    tmp98 = c_re(input[15 * istride]);
+		    tmp99 = c_re(input[47 * istride]);
+		    tmp100 = tmp98 + tmp99;
+		    tmp354 = tmp98 - tmp99;
+	       }
+	       tmp314 = c_im(input[15 * istride]);
+	       tmp315 = c_im(input[47 * istride]);
+	       tmp316 = tmp314 - tmp315;
+	       tmp708 = tmp314 + tmp315;
+	       {
+		    fftw_real tmp105;
+		    fftw_real tmp106;
+		    fftw_real tmp323;
+		    fftw_real tmp324;
+		    fftw_real tmp325;
+		    fftw_real tmp326;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp105 = c_re(input[55 * istride]);
+		    tmp106 = c_re(input[23 * istride]);
+		    tmp323 = tmp105 - tmp106;
+		    tmp324 = c_im(input[55 * istride]);
+		    tmp325 = c_im(input[23 * istride]);
+		    tmp326 = tmp324 - tmp325;
+		    tmp107 = tmp105 + tmp106;
+		    tmp690 = tmp324 + tmp325;
+		    tmp327 = tmp323 + tmp326;
+		    tmp360 = tmp326 - tmp323;
+	       }
+	       {
+		    fftw_real tmp102;
+		    fftw_real tmp103;
+		    fftw_real tmp318;
+		    fftw_real tmp319;
+		    fftw_real tmp320;
+		    fftw_real tmp321;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp102 = c_re(input[7 * istride]);
+		    tmp103 = c_re(input[39 * istride]);
+		    tmp318 = tmp102 - tmp103;
+		    tmp319 = c_im(input[7 * istride]);
+		    tmp320 = c_im(input[39 * istride]);
+		    tmp321 = tmp319 - tmp320;
+		    tmp104 = tmp102 + tmp103;
+		    tmp691 = tmp319 + tmp320;
+		    tmp322 = tmp318 - tmp321;
+		    tmp359 = tmp318 + tmp321;
+	       }
+	  }
+	  {
+	       fftw_real tmp101;
+	       fftw_real tmp108;
+	       fftw_real tmp689;
+	       fftw_real tmp692;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp101 = tmp97 + tmp100;
+	       tmp108 = tmp104 + tmp107;
+	       tmp109 = tmp101 + tmp108;
+	       tmp874 = tmp101 - tmp108;
+	       tmp689 = tmp97 - tmp100;
+	       tmp692 = tmp690 - tmp691;
+	       tmp693 = tmp689 + tmp692;
+	       tmp776 = tmp689 - tmp692;
+	  }
+	  {
+	       fftw_real tmp828;
+	       fftw_real tmp829;
+	       fftw_real tmp706;
+	       fftw_real tmp709;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp828 = tmp707 + tmp708;
+	       tmp829 = tmp691 + tmp690;
+	       tmp830 = tmp828 + tmp829;
+	       tmp871 = tmp828 - tmp829;
+	       tmp706 = tmp104 - tmp107;
+	       tmp709 = tmp707 - tmp708;
+	       tmp710 = tmp706 + tmp709;
+	       tmp773 = tmp709 - tmp706;
+	  }
+	  {
+	       fftw_real tmp317;
+	       fftw_real tmp328;
+	       fftw_real tmp517;
+	       fftw_real tmp518;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp317 = tmp313 - tmp316;
+	       tmp328 = K707106781 * (tmp322 + tmp327);
+	       tmp329 = tmp317 - tmp328;
+	       tmp432 = tmp317 + tmp328;
+	       tmp517 = tmp313 + tmp316;
+	       tmp518 = K707106781 * (tmp360 - tmp359);
+	       tmp519 = tmp517 - tmp518;
+	       tmp596 = tmp517 + tmp518;
+	  }
+	  {
+	       fftw_real tmp358;
+	       fftw_real tmp361;
+	       fftw_real tmp528;
+	       fftw_real tmp529;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp358 = tmp354 + tmp357;
+	       tmp361 = K707106781 * (tmp359 + tmp360);
+	       tmp362 = tmp358 - tmp361;
+	       tmp429 = tmp358 + tmp361;
+	       tmp528 = tmp357 - tmp354;
+	       tmp529 = K707106781 * (tmp322 - tmp327);
+	       tmp530 = tmp528 - tmp529;
+	       tmp593 = tmp528 + tmp529;
+	  }
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp202;
+	  fftw_real tmp145;
+	  fftw_real tmp206;
+	  fftw_real tmp21;
+	  fftw_real tmp205;
+	  fftw_real tmp148;
+	  fftw_real tmp203;
+	  fftw_real tmp25;
+	  fftw_real tmp212;
+	  fftw_real tmp152;
+	  fftw_real tmp210;
+	  fftw_real tmp28;
+	  fftw_real tmp209;
+	  fftw_real tmp155;
+	  fftw_real tmp213;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp143;
+	       fftw_real tmp144;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_re(input[4 * istride]);
+	       tmp17 = c_re(input[36 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp202 = tmp16 - tmp17;
+	       tmp143 = c_im(input[4 * istride]);
+	       tmp144 = c_im(input[36 * istride]);
+	       tmp145 = tmp143 + tmp144;
+	       tmp206 = tmp143 - tmp144;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       fftw_real tmp146;
+	       fftw_real tmp147;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = c_re(input[20 * istride]);
+	       tmp20 = c_re(input[52 * istride]);
+	       tmp21 = tmp19 + tmp20;
+	       tmp205 = tmp19 - tmp20;
+	       tmp146 = c_im(input[20 * istride]);
+	       tmp147 = c_im(input[52 * istride]);
+	       tmp148 = tmp146 + tmp147;
+	       tmp203 = tmp146 - tmp147;
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp24;
+	       fftw_real tmp150;
+	       fftw_real tmp151;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = c_re(input[60 * istride]);
+	       tmp24 = c_re(input[28 * istride]);
+	       tmp25 = tmp23 + tmp24;
+	       tmp212 = tmp23 - tmp24;
+	       tmp150 = c_im(input[60 * istride]);
+	       tmp151 = c_im(input[28 * istride]);
+	       tmp152 = tmp150 + tmp151;
+	       tmp210 = tmp150 - tmp151;
+	  }
+	  {
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp153;
+	       fftw_real tmp154;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp26 = c_re(input[12 * istride]);
+	       tmp27 = c_re(input[44 * istride]);
+	       tmp28 = tmp26 + tmp27;
+	       tmp209 = tmp26 - tmp27;
+	       tmp153 = c_im(input[12 * istride]);
+	       tmp154 = c_im(input[44 * istride]);
+	       tmp155 = tmp153 + tmp154;
+	       tmp213 = tmp153 - tmp154;
+	  }
+	  {
+	       fftw_real tmp22;
+	       fftw_real tmp29;
+	       fftw_real tmp638;
+	       fftw_real tmp639;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp22 = tmp18 + tmp21;
+	       tmp29 = tmp25 + tmp28;
+	       tmp30 = tmp22 + tmp29;
+	       tmp854 = tmp22 - tmp29;
+	       tmp638 = tmp18 - tmp21;
+	       tmp639 = tmp145 - tmp148;
+	       tmp640 = tmp638 + tmp639;
+	       tmp720 = tmp638 - tmp639;
+	  }
+	  {
+	       fftw_real tmp149;
+	       fftw_real tmp156;
+	       fftw_real tmp641;
+	       fftw_real tmp642;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp149 = tmp145 + tmp148;
+	       tmp156 = tmp152 + tmp155;
+	       tmp157 = tmp149 + tmp156;
+	       tmp880 = tmp156 - tmp149;
+	       tmp641 = tmp152 - tmp155;
+	       tmp642 = tmp25 - tmp28;
+	       tmp643 = tmp641 - tmp642;
+	       tmp721 = tmp642 + tmp641;
+	  }
+	  {
+	       fftw_real tmp204;
+	       fftw_real tmp207;
+	       fftw_real tmp474;
+	       fftw_real tmp475;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp204 = tmp202 - tmp203;
+	       tmp207 = tmp205 + tmp206;
+	       tmp208 = (K923879532 * tmp204) - (K382683432 * tmp207);
+	       tmp377 = (K923879532 * tmp207) + (K382683432 * tmp204);
+	       tmp474 = tmp202 + tmp203;
+	       tmp475 = tmp206 - tmp205;
+	       tmp476 = (K382683432 * tmp474) - (K923879532 * tmp475);
+	       tmp541 = (K382683432 * tmp475) + (K923879532 * tmp474);
+	  }
+	  {
+	       fftw_real tmp211;
+	       fftw_real tmp214;
+	       fftw_real tmp477;
+	       fftw_real tmp478;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp211 = tmp209 + tmp210;
+	       tmp214 = tmp212 - tmp213;
+	       tmp215 = (K382683432 * tmp211) + (K923879532 * tmp214);
+	       tmp376 = (K923879532 * tmp211) - (K382683432 * tmp214);
+	       tmp477 = tmp210 - tmp209;
+	       tmp478 = tmp212 + tmp213;
+	       tmp479 = (K923879532 * tmp477) + (K382683432 * tmp478);
+	       tmp540 = (K382683432 * tmp477) - (K923879532 * tmp478);
+	  }
+     }
+     {
+	  fftw_real tmp112;
+	  fftw_real tmp695;
+	  fftw_real tmp115;
+	  fftw_real tmp696;
+	  fftw_real tmp345;
+	  fftw_real tmp523;
+	  fftw_real tmp350;
+	  fftw_real tmp524;
+	  fftw_real tmp697;
+	  fftw_real tmp694;
+	  fftw_real tmp119;
+	  fftw_real tmp700;
+	  fftw_real tmp122;
+	  fftw_real tmp701;
+	  fftw_real tmp334;
+	  fftw_real tmp520;
+	  fftw_real tmp339;
+	  fftw_real tmp521;
+	  fftw_real tmp702;
+	  fftw_real tmp699;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp346;
+	       fftw_real tmp344;
+	       fftw_real tmp341;
+	       fftw_real tmp349;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp110;
+		    fftw_real tmp111;
+		    fftw_real tmp342;
+		    fftw_real tmp343;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp110 = c_re(input[3 * istride]);
+		    tmp111 = c_re(input[35 * istride]);
+		    tmp112 = tmp110 + tmp111;
+		    tmp346 = tmp110 - tmp111;
+		    tmp342 = c_im(input[3 * istride]);
+		    tmp343 = c_im(input[35 * istride]);
+		    tmp344 = tmp342 - tmp343;
+		    tmp695 = tmp342 + tmp343;
+	       }
+	       {
+		    fftw_real tmp113;
+		    fftw_real tmp114;
+		    fftw_real tmp347;
+		    fftw_real tmp348;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp113 = c_re(input[19 * istride]);
+		    tmp114 = c_re(input[51 * istride]);
+		    tmp115 = tmp113 + tmp114;
+		    tmp341 = tmp113 - tmp114;
+		    tmp347 = c_im(input[19 * istride]);
+		    tmp348 = c_im(input[51 * istride]);
+		    tmp349 = tmp347 - tmp348;
+		    tmp696 = tmp347 + tmp348;
+	       }
+	       tmp345 = tmp341 + tmp344;
+	       tmp523 = tmp344 - tmp341;
+	       tmp350 = tmp346 - tmp349;
+	       tmp524 = tmp346 + tmp349;
+	       tmp697 = tmp695 - tmp696;
+	       tmp694 = tmp112 - tmp115;
+	  }
+	  {
+	       fftw_real tmp335;
+	       fftw_real tmp333;
+	       fftw_real tmp330;
+	       fftw_real tmp338;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp117;
+		    fftw_real tmp118;
+		    fftw_real tmp331;
+		    fftw_real tmp332;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp117 = c_re(input[59 * istride]);
+		    tmp118 = c_re(input[27 * istride]);
+		    tmp119 = tmp117 + tmp118;
+		    tmp335 = tmp117 - tmp118;
+		    tmp331 = c_im(input[59 * istride]);
+		    tmp332 = c_im(input[27 * istride]);
+		    tmp333 = tmp331 - tmp332;
+		    tmp700 = tmp331 + tmp332;
+	       }
+	       {
+		    fftw_real tmp120;
+		    fftw_real tmp121;
+		    fftw_real tmp336;
+		    fftw_real tmp337;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp120 = c_re(input[11 * istride]);
+		    tmp121 = c_re(input[43 * istride]);
+		    tmp122 = tmp120 + tmp121;
+		    tmp330 = tmp120 - tmp121;
+		    tmp336 = c_im(input[11 * istride]);
+		    tmp337 = c_im(input[43 * istride]);
+		    tmp338 = tmp336 - tmp337;
+		    tmp701 = tmp336 + tmp337;
+	       }
+	       tmp334 = tmp330 + tmp333;
+	       tmp520 = tmp333 - tmp330;
+	       tmp339 = tmp335 - tmp338;
+	       tmp521 = tmp335 + tmp338;
+	       tmp702 = tmp700 - tmp701;
+	       tmp699 = tmp119 - tmp122;
+	  }
+	  {
+	       fftw_real tmp116;
+	       fftw_real tmp123;
+	       fftw_real tmp363;
+	       fftw_real tmp364;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp116 = tmp112 + tmp115;
+	       tmp123 = tmp119 + tmp122;
+	       tmp124 = tmp116 + tmp123;
+	       tmp872 = tmp116 - tmp123;
+	       tmp363 = (K923879532 * tmp350) - (K382683432 * tmp345);
+	       tmp364 = (K382683432 * tmp334) + (K923879532 * tmp339);
+	       tmp365 = tmp363 - tmp364;
+	       tmp433 = tmp363 + tmp364;
+	  }
+	  {
+	       fftw_real tmp340;
+	       fftw_real tmp351;
+	       fftw_real tmp831;
+	       fftw_real tmp832;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp340 = (K923879532 * tmp334) - (K382683432 * tmp339);
+	       tmp351 = (K923879532 * tmp345) + (K382683432 * tmp350);
+	       tmp352 = tmp340 - tmp351;
+	       tmp430 = tmp351 + tmp340;
+	       tmp831 = tmp695 + tmp696;
+	       tmp832 = tmp700 + tmp701;
+	       tmp833 = tmp831 + tmp832;
+	       tmp875 = tmp832 - tmp831;
+	  }
+	  {
+	       fftw_real tmp522;
+	       fftw_real tmp525;
+	       fftw_real tmp531;
+	       fftw_real tmp532;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp522 = (K382683432 * tmp520) - (K923879532 * tmp521);
+	       tmp525 = (K382683432 * tmp523) + (K923879532 * tmp524);
+	       tmp526 = tmp522 - tmp525;
+	       tmp594 = tmp525 + tmp522;
+	       tmp531 = (K382683432 * tmp524) - (K923879532 * tmp523);
+	       tmp532 = (K923879532 * tmp520) + (K382683432 * tmp521);
+	       tmp533 = tmp531 - tmp532;
+	       tmp597 = tmp531 + tmp532;
+	  }
+	  {
+	       fftw_real tmp698;
+	       fftw_real tmp703;
+	       fftw_real tmp711;
+	       fftw_real tmp712;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp698 = tmp694 - tmp697;
+	       tmp703 = tmp699 + tmp702;
+	       tmp704 = K707106781 * (tmp698 + tmp703);
+	       tmp774 = K707106781 * (tmp698 - tmp703);
+	       tmp711 = tmp694 + tmp697;
+	       tmp712 = tmp702 - tmp699;
+	       tmp713 = K707106781 * (tmp711 + tmp712);
+	       tmp777 = K707106781 * (tmp712 - tmp711);
+	  }
+     }
+     {
+	  fftw_real tmp34;
+	  fftw_real tmp218;
+	  fftw_real tmp161;
+	  fftw_real tmp230;
+	  fftw_real tmp37;
+	  fftw_real tmp229;
+	  fftw_real tmp164;
+	  fftw_real tmp219;
+	  fftw_real tmp44;
+	  fftw_real tmp233;
+	  fftw_real tmp226;
+	  fftw_real tmp171;
+	  fftw_real tmp41;
+	  fftw_real tmp232;
+	  fftw_real tmp223;
+	  fftw_real tmp168;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       fftw_real tmp162;
+	       fftw_real tmp163;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp32 = c_re(input[2 * istride]);
+	       tmp33 = c_re(input[34 * istride]);
+	       tmp34 = tmp32 + tmp33;
+	       tmp218 = tmp32 - tmp33;
+	       {
+		    fftw_real tmp159;
+		    fftw_real tmp160;
+		    fftw_real tmp35;
+		    fftw_real tmp36;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp159 = c_im(input[2 * istride]);
+		    tmp160 = c_im(input[34 * istride]);
+		    tmp161 = tmp159 + tmp160;
+		    tmp230 = tmp159 - tmp160;
+		    tmp35 = c_re(input[18 * istride]);
+		    tmp36 = c_re(input[50 * istride]);
+		    tmp37 = tmp35 + tmp36;
+		    tmp229 = tmp35 - tmp36;
+	       }
+	       tmp162 = c_im(input[18 * istride]);
+	       tmp163 = c_im(input[50 * istride]);
+	       tmp164 = tmp162 + tmp163;
+	       tmp219 = tmp162 - tmp163;
+	       {
+		    fftw_real tmp42;
+		    fftw_real tmp43;
+		    fftw_real tmp224;
+		    fftw_real tmp169;
+		    fftw_real tmp170;
+		    fftw_real tmp225;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp42 = c_re(input[58 * istride]);
+		    tmp43 = c_re(input[26 * istride]);
+		    tmp224 = tmp42 - tmp43;
+		    tmp169 = c_im(input[58 * istride]);
+		    tmp170 = c_im(input[26 * istride]);
+		    tmp225 = tmp169 - tmp170;
+		    tmp44 = tmp42 + tmp43;
+		    tmp233 = tmp225 - tmp224;
+		    tmp226 = tmp224 + tmp225;
+		    tmp171 = tmp169 + tmp170;
+	       }
+	       {
+		    fftw_real tmp39;
+		    fftw_real tmp40;
+		    fftw_real tmp221;
+		    fftw_real tmp166;
+		    fftw_real tmp167;
+		    fftw_real tmp222;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp39 = c_re(input[10 * istride]);
+		    tmp40 = c_re(input[42 * istride]);
+		    tmp221 = tmp39 - tmp40;
+		    tmp166 = c_im(input[10 * istride]);
+		    tmp167 = c_im(input[42 * istride]);
+		    tmp222 = tmp166 - tmp167;
+		    tmp41 = tmp39 + tmp40;
+		    tmp232 = tmp221 + tmp222;
+		    tmp223 = tmp221 - tmp222;
+		    tmp168 = tmp166 + tmp167;
+	       }
+	  }
+	  {
+	       fftw_real tmp38;
+	       fftw_real tmp45;
+	       fftw_real tmp646;
+	       fftw_real tmp647;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp38 = tmp34 + tmp37;
+	       tmp45 = tmp41 + tmp44;
+	       tmp46 = tmp38 + tmp45;
+	       tmp856 = tmp38 - tmp45;
+	       tmp646 = tmp34 - tmp37;
+	       tmp647 = tmp171 - tmp168;
+	       tmp648 = tmp646 + tmp647;
+	       tmp758 = tmp646 - tmp647;
+	  }
+	  {
+	       fftw_real tmp165;
+	       fftw_real tmp172;
+	       fftw_real tmp649;
+	       fftw_real tmp650;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp165 = tmp161 + tmp164;
+	       tmp172 = tmp168 + tmp171;
+	       tmp173 = tmp165 + tmp172;
+	       tmp857 = tmp165 - tmp172;
+	       tmp649 = tmp41 - tmp44;
+	       tmp650 = tmp161 - tmp164;
+	       tmp651 = tmp649 + tmp650;
+	       tmp759 = tmp650 - tmp649;
+	  }
+	  {
+	       fftw_real tmp220;
+	       fftw_real tmp227;
+	       fftw_real tmp482;
+	       fftw_real tmp483;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp220 = tmp218 - tmp219;
+	       tmp227 = K707106781 * (tmp223 + tmp226);
+	       tmp228 = tmp220 - tmp227;
+	       tmp414 = tmp220 + tmp227;
+	       tmp482 = tmp218 + tmp219;
+	       tmp483 = K707106781 * (tmp233 - tmp232);
+	       tmp484 = tmp482 - tmp483;
+	       tmp578 = tmp482 + tmp483;
+	  }
+	  {
+	       fftw_real tmp231;
+	       fftw_real tmp234;
+	       fftw_real tmp485;
+	       fftw_real tmp486;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp231 = tmp229 + tmp230;
+	       tmp234 = K707106781 * (tmp232 + tmp233);
+	       tmp235 = tmp231 - tmp234;
+	       tmp415 = tmp231 + tmp234;
+	       tmp485 = tmp230 - tmp229;
+	       tmp486 = K707106781 * (tmp223 - tmp226);
+	       tmp487 = tmp485 - tmp486;
+	       tmp579 = tmp485 + tmp486;
+	  }
+     }
+     {
+	  fftw_real tmp66;
+	  fftw_real tmp258;
+	  fftw_real tmp302;
+	  fftw_real tmp680;
+	  fftw_real tmp69;
+	  fftw_real tmp299;
+	  fftw_real tmp261;
+	  fftw_real tmp681;
+	  fftw_real tmp76;
+	  fftw_real tmp663;
+	  fftw_real tmp272;
+	  fftw_real tmp305;
+	  fftw_real tmp73;
+	  fftw_real tmp664;
+	  fftw_real tmp267;
+	  fftw_real tmp304;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp64;
+	       fftw_real tmp65;
+	       fftw_real tmp259;
+	       fftw_real tmp260;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp64 = c_re(input[istride]);
+	       tmp65 = c_re(input[33 * istride]);
+	       tmp66 = tmp64 + tmp65;
+	       tmp258 = tmp64 - tmp65;
+	       {
+		    fftw_real tmp300;
+		    fftw_real tmp301;
+		    fftw_real tmp67;
+		    fftw_real tmp68;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp300 = c_im(input[istride]);
+		    tmp301 = c_im(input[33 * istride]);
+		    tmp302 = tmp300 - tmp301;
+		    tmp680 = tmp300 + tmp301;
+		    tmp67 = c_re(input[17 * istride]);
+		    tmp68 = c_re(input[49 * istride]);
+		    tmp69 = tmp67 + tmp68;
+		    tmp299 = tmp67 - tmp68;
+	       }
+	       tmp259 = c_im(input[17 * istride]);
+	       tmp260 = c_im(input[49 * istride]);
+	       tmp261 = tmp259 - tmp260;
+	       tmp681 = tmp259 + tmp260;
+	       {
+		    fftw_real tmp74;
+		    fftw_real tmp75;
+		    fftw_real tmp268;
+		    fftw_real tmp269;
+		    fftw_real tmp270;
+		    fftw_real tmp271;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp74 = c_re(input[57 * istride]);
+		    tmp75 = c_re(input[25 * istride]);
+		    tmp268 = tmp74 - tmp75;
+		    tmp269 = c_im(input[57 * istride]);
+		    tmp270 = c_im(input[25 * istride]);
+		    tmp271 = tmp269 - tmp270;
+		    tmp76 = tmp74 + tmp75;
+		    tmp663 = tmp269 + tmp270;
+		    tmp272 = tmp268 + tmp271;
+		    tmp305 = tmp271 - tmp268;
+	       }
+	       {
+		    fftw_real tmp71;
+		    fftw_real tmp72;
+		    fftw_real tmp263;
+		    fftw_real tmp264;
+		    fftw_real tmp265;
+		    fftw_real tmp266;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp71 = c_re(input[9 * istride]);
+		    tmp72 = c_re(input[41 * istride]);
+		    tmp263 = tmp71 - tmp72;
+		    tmp264 = c_im(input[9 * istride]);
+		    tmp265 = c_im(input[41 * istride]);
+		    tmp266 = tmp264 - tmp265;
+		    tmp73 = tmp71 + tmp72;
+		    tmp664 = tmp264 + tmp265;
+		    tmp267 = tmp263 - tmp266;
+		    tmp304 = tmp263 + tmp266;
+	       }
+	  }
+	  {
+	       fftw_real tmp70;
+	       fftw_real tmp77;
+	       fftw_real tmp662;
+	       fftw_real tmp665;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp70 = tmp66 + tmp69;
+	       tmp77 = tmp73 + tmp76;
+	       tmp78 = tmp70 + tmp77;
+	       tmp867 = tmp70 - tmp77;
+	       tmp662 = tmp66 - tmp69;
+	       tmp665 = tmp663 - tmp664;
+	       tmp666 = tmp662 + tmp665;
+	       tmp769 = tmp662 - tmp665;
+	  }
+	  {
+	       fftw_real tmp819;
+	       fftw_real tmp820;
+	       fftw_real tmp679;
+	       fftw_real tmp682;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp819 = tmp680 + tmp681;
+	       tmp820 = tmp664 + tmp663;
+	       tmp821 = tmp819 + tmp820;
+	       tmp864 = tmp819 - tmp820;
+	       tmp679 = tmp73 - tmp76;
+	       tmp682 = tmp680 - tmp681;
+	       tmp683 = tmp679 + tmp682;
+	       tmp766 = tmp682 - tmp679;
+	  }
+	  {
+	       fftw_real tmp262;
+	       fftw_real tmp273;
+	       fftw_real tmp498;
+	       fftw_real tmp499;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp262 = tmp258 - tmp261;
+	       tmp273 = K707106781 * (tmp267 + tmp272);
+	       tmp274 = tmp262 - tmp273;
+	       tmp425 = tmp262 + tmp273;
+	       tmp498 = tmp258 + tmp261;
+	       tmp499 = K707106781 * (tmp305 - tmp304);
+	       tmp500 = tmp498 - tmp499;
+	       tmp589 = tmp498 + tmp499;
+	  }
+	  {
+	       fftw_real tmp303;
+	       fftw_real tmp306;
+	       fftw_real tmp509;
+	       fftw_real tmp510;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp303 = tmp299 + tmp302;
+	       tmp306 = K707106781 * (tmp304 + tmp305);
+	       tmp307 = tmp303 - tmp306;
+	       tmp422 = tmp303 + tmp306;
+	       tmp509 = tmp302 - tmp299;
+	       tmp510 = K707106781 * (tmp267 - tmp272);
+	       tmp511 = tmp509 - tmp510;
+	       tmp586 = tmp509 + tmp510;
+	  }
+     }
+     {
+	  fftw_real tmp49;
+	  fftw_real tmp237;
+	  fftw_real tmp176;
+	  fftw_real tmp249;
+	  fftw_real tmp52;
+	  fftw_real tmp248;
+	  fftw_real tmp179;
+	  fftw_real tmp238;
+	  fftw_real tmp59;
+	  fftw_real tmp252;
+	  fftw_real tmp245;
+	  fftw_real tmp186;
+	  fftw_real tmp56;
+	  fftw_real tmp251;
+	  fftw_real tmp242;
+	  fftw_real tmp183;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp48;
+	       fftw_real tmp177;
+	       fftw_real tmp178;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp47 = c_re(input[62 * istride]);
+	       tmp48 = c_re(input[30 * istride]);
+	       tmp49 = tmp47 + tmp48;
+	       tmp237 = tmp47 - tmp48;
+	       {
+		    fftw_real tmp174;
+		    fftw_real tmp175;
+		    fftw_real tmp50;
+		    fftw_real tmp51;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp174 = c_im(input[62 * istride]);
+		    tmp175 = c_im(input[30 * istride]);
+		    tmp176 = tmp174 + tmp175;
+		    tmp249 = tmp174 - tmp175;
+		    tmp50 = c_re(input[14 * istride]);
+		    tmp51 = c_re(input[46 * istride]);
+		    tmp52 = tmp50 + tmp51;
+		    tmp248 = tmp50 - tmp51;
+	       }
+	       tmp177 = c_im(input[14 * istride]);
+	       tmp178 = c_im(input[46 * istride]);
+	       tmp179 = tmp177 + tmp178;
+	       tmp238 = tmp177 - tmp178;
+	       {
+		    fftw_real tmp57;
+		    fftw_real tmp58;
+		    fftw_real tmp243;
+		    fftw_real tmp184;
+		    fftw_real tmp185;
+		    fftw_real tmp244;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp57 = c_re(input[54 * istride]);
+		    tmp58 = c_re(input[22 * istride]);
+		    tmp243 = tmp57 - tmp58;
+		    tmp184 = c_im(input[54 * istride]);
+		    tmp185 = c_im(input[22 * istride]);
+		    tmp244 = tmp184 - tmp185;
+		    tmp59 = tmp57 + tmp58;
+		    tmp252 = tmp244 - tmp243;
+		    tmp245 = tmp243 + tmp244;
+		    tmp186 = tmp184 + tmp185;
+	       }
+	       {
+		    fftw_real tmp54;
+		    fftw_real tmp55;
+		    fftw_real tmp240;
+		    fftw_real tmp181;
+		    fftw_real tmp182;
+		    fftw_real tmp241;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp54 = c_re(input[6 * istride]);
+		    tmp55 = c_re(input[38 * istride]);
+		    tmp240 = tmp54 - tmp55;
+		    tmp181 = c_im(input[6 * istride]);
+		    tmp182 = c_im(input[38 * istride]);
+		    tmp241 = tmp181 - tmp182;
+		    tmp56 = tmp54 + tmp55;
+		    tmp251 = tmp240 + tmp241;
+		    tmp242 = tmp240 - tmp241;
+		    tmp183 = tmp181 + tmp182;
+	       }
+	  }
+	  {
+	       fftw_real tmp53;
+	       fftw_real tmp60;
+	       fftw_real tmp653;
+	       fftw_real tmp654;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp53 = tmp49 + tmp52;
+	       tmp60 = tmp56 + tmp59;
+	       tmp61 = tmp53 + tmp60;
+	       tmp859 = tmp53 - tmp60;
+	       tmp653 = tmp56 - tmp59;
+	       tmp654 = tmp176 - tmp179;
+	       tmp655 = tmp653 + tmp654;
+	       tmp761 = tmp654 - tmp653;
+	  }
+	  {
+	       fftw_real tmp180;
+	       fftw_real tmp187;
+	       fftw_real tmp656;
+	       fftw_real tmp657;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp180 = tmp176 + tmp179;
+	       tmp187 = tmp183 + tmp186;
+	       tmp188 = tmp180 + tmp187;
+	       tmp860 = tmp180 - tmp187;
+	       tmp656 = tmp49 - tmp52;
+	       tmp657 = tmp186 - tmp183;
+	       tmp658 = tmp656 + tmp657;
+	       tmp762 = tmp656 - tmp657;
+	  }
+	  {
+	       fftw_real tmp239;
+	       fftw_real tmp246;
+	       fftw_real tmp489;
+	       fftw_real tmp490;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp239 = tmp237 - tmp238;
+	       tmp246 = K707106781 * (tmp242 + tmp245);
+	       tmp247 = tmp239 - tmp246;
+	       tmp417 = tmp239 + tmp246;
+	       tmp489 = tmp249 - tmp248;
+	       tmp490 = K707106781 * (tmp242 - tmp245);
+	       tmp491 = tmp489 - tmp490;
+	       tmp581 = tmp489 + tmp490;
+	  }
+	  {
+	       fftw_real tmp250;
+	       fftw_real tmp253;
+	       fftw_real tmp492;
+	       fftw_real tmp493;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp250 = tmp248 + tmp249;
+	       tmp253 = K707106781 * (tmp251 + tmp252);
+	       tmp254 = tmp250 - tmp253;
+	       tmp418 = tmp250 + tmp253;
+	       tmp492 = tmp237 + tmp238;
+	       tmp493 = K707106781 * (tmp252 - tmp251);
+	       tmp494 = tmp492 - tmp493;
+	       tmp582 = tmp492 + tmp493;
+	  }
+     }
+     {
+	  fftw_real tmp81;
+	  fftw_real tmp668;
+	  fftw_real tmp84;
+	  fftw_real tmp669;
+	  fftw_real tmp290;
+	  fftw_real tmp504;
+	  fftw_real tmp295;
+	  fftw_real tmp505;
+	  fftw_real tmp670;
+	  fftw_real tmp667;
+	  fftw_real tmp88;
+	  fftw_real tmp673;
+	  fftw_real tmp91;
+	  fftw_real tmp674;
+	  fftw_real tmp279;
+	  fftw_real tmp501;
+	  fftw_real tmp284;
+	  fftw_real tmp502;
+	  fftw_real tmp675;
+	  fftw_real tmp672;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp291;
+	       fftw_real tmp289;
+	       fftw_real tmp286;
+	       fftw_real tmp294;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp79;
+		    fftw_real tmp80;
+		    fftw_real tmp287;
+		    fftw_real tmp288;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp79 = c_re(input[5 * istride]);
+		    tmp80 = c_re(input[37 * istride]);
+		    tmp81 = tmp79 + tmp80;
+		    tmp291 = tmp79 - tmp80;
+		    tmp287 = c_im(input[5 * istride]);
+		    tmp288 = c_im(input[37 * istride]);
+		    tmp289 = tmp287 - tmp288;
+		    tmp668 = tmp287 + tmp288;
+	       }
+	       {
+		    fftw_real tmp82;
+		    fftw_real tmp83;
+		    fftw_real tmp292;
+		    fftw_real tmp293;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp82 = c_re(input[21 * istride]);
+		    tmp83 = c_re(input[53 * istride]);
+		    tmp84 = tmp82 + tmp83;
+		    tmp286 = tmp82 - tmp83;
+		    tmp292 = c_im(input[21 * istride]);
+		    tmp293 = c_im(input[53 * istride]);
+		    tmp294 = tmp292 - tmp293;
+		    tmp669 = tmp292 + tmp293;
+	       }
+	       tmp290 = tmp286 + tmp289;
+	       tmp504 = tmp289 - tmp286;
+	       tmp295 = tmp291 - tmp294;
+	       tmp505 = tmp291 + tmp294;
+	       tmp670 = tmp668 - tmp669;
+	       tmp667 = tmp81 - tmp84;
+	  }
+	  {
+	       fftw_real tmp280;
+	       fftw_real tmp278;
+	       fftw_real tmp275;
+	       fftw_real tmp283;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp86;
+		    fftw_real tmp87;
+		    fftw_real tmp276;
+		    fftw_real tmp277;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp86 = c_re(input[61 * istride]);
+		    tmp87 = c_re(input[29 * istride]);
+		    tmp88 = tmp86 + tmp87;
+		    tmp280 = tmp86 - tmp87;
+		    tmp276 = c_im(input[61 * istride]);
+		    tmp277 = c_im(input[29 * istride]);
+		    tmp278 = tmp276 - tmp277;
+		    tmp673 = tmp276 + tmp277;
+	       }
+	       {
+		    fftw_real tmp89;
+		    fftw_real tmp90;
+		    fftw_real tmp281;
+		    fftw_real tmp282;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp89 = c_re(input[13 * istride]);
+		    tmp90 = c_re(input[45 * istride]);
+		    tmp91 = tmp89 + tmp90;
+		    tmp275 = tmp89 - tmp90;
+		    tmp281 = c_im(input[13 * istride]);
+		    tmp282 = c_im(input[45 * istride]);
+		    tmp283 = tmp281 - tmp282;
+		    tmp674 = tmp281 + tmp282;
+	       }
+	       tmp279 = tmp275 + tmp278;
+	       tmp501 = tmp278 - tmp275;
+	       tmp284 = tmp280 - tmp283;
+	       tmp502 = tmp280 + tmp283;
+	       tmp675 = tmp673 - tmp674;
+	       tmp672 = tmp88 - tmp91;
+	  }
+	  {
+	       fftw_real tmp85;
+	       fftw_real tmp92;
+	       fftw_real tmp308;
+	       fftw_real tmp309;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp85 = tmp81 + tmp84;
+	       tmp92 = tmp88 + tmp91;
+	       tmp93 = tmp85 + tmp92;
+	       tmp865 = tmp85 - tmp92;
+	       tmp308 = (K923879532 * tmp295) - (K382683432 * tmp290);
+	       tmp309 = (K382683432 * tmp279) + (K923879532 * tmp284);
+	       tmp310 = tmp308 - tmp309;
+	       tmp426 = tmp308 + tmp309;
+	  }
+	  {
+	       fftw_real tmp285;
+	       fftw_real tmp296;
+	       fftw_real tmp822;
+	       fftw_real tmp823;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp285 = (K923879532 * tmp279) - (K382683432 * tmp284);
+	       tmp296 = (K923879532 * tmp290) + (K382683432 * tmp295);
+	       tmp297 = tmp285 - tmp296;
+	       tmp423 = tmp296 + tmp285;
+	       tmp822 = tmp668 + tmp669;
+	       tmp823 = tmp673 + tmp674;
+	       tmp824 = tmp822 + tmp823;
+	       tmp868 = tmp823 - tmp822;
+	  }
+	  {
+	       fftw_real tmp503;
+	       fftw_real tmp506;
+	       fftw_real tmp512;
+	       fftw_real tmp513;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp503 = (K382683432 * tmp501) - (K923879532 * tmp502);
+	       tmp506 = (K382683432 * tmp504) + (K923879532 * tmp505);
+	       tmp507 = tmp503 - tmp506;
+	       tmp587 = tmp506 + tmp503;
+	       tmp512 = (K382683432 * tmp505) - (K923879532 * tmp504);
+	       tmp513 = (K923879532 * tmp501) + (K382683432 * tmp502);
+	       tmp514 = tmp512 - tmp513;
+	       tmp590 = tmp512 + tmp513;
+	  }
+	  {
+	       fftw_real tmp671;
+	       fftw_real tmp676;
+	       fftw_real tmp684;
+	       fftw_real tmp685;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp671 = tmp667 - tmp670;
+	       tmp676 = tmp672 + tmp675;
+	       tmp677 = K707106781 * (tmp671 + tmp676);
+	       tmp767 = K707106781 * (tmp671 - tmp676);
+	       tmp684 = tmp667 + tmp670;
+	       tmp685 = tmp675 - tmp672;
+	       tmp686 = K707106781 * (tmp684 + tmp685);
+	       tmp770 = K707106781 * (tmp685 - tmp684);
+	  }
+     }
+     {
+	  fftw_real tmp63;
+	  fftw_real tmp851;
+	  fftw_real tmp850;
+	  fftw_real tmp852;
+	  fftw_real tmp126;
+	  fftw_real tmp127;
+	  fftw_real tmp190;
+	  fftw_real tmp847;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp31;
+	       fftw_real tmp62;
+	       fftw_real tmp848;
+	       fftw_real tmp849;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp31 = tmp15 + tmp30;
+	       tmp62 = tmp46 + tmp61;
+	       tmp63 = tmp31 + tmp62;
+	       tmp851 = tmp31 - tmp62;
+	       tmp848 = tmp821 + tmp824;
+	       tmp849 = tmp830 + tmp833;
+	       tmp850 = tmp848 + tmp849;
+	       tmp852 = tmp849 - tmp848;
+	  }
+	  {
+	       fftw_real tmp94;
+	       fftw_real tmp125;
+	       fftw_real tmp158;
+	       fftw_real tmp189;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp94 = tmp78 + tmp93;
+	       tmp125 = tmp109 + tmp124;
+	       tmp126 = tmp94 + tmp125;
+	       tmp127 = tmp94 - tmp125;
+	       tmp158 = tmp142 + tmp157;
+	       tmp189 = tmp173 + tmp188;
+	       tmp190 = tmp158 - tmp189;
+	       tmp847 = tmp158 + tmp189;
+	  }
+	  c_re(output[32 * ostride]) = tmp63 - tmp126;
+	  c_re(output[0]) = tmp63 + tmp126;
+	  c_im(output[16 * ostride]) = tmp127 + tmp190;
+	  c_im(output[48 * ostride]) = tmp190 - tmp127;
+	  c_im(output[32 * ostride]) = tmp847 - tmp850;
+	  c_im(output[0]) = tmp847 + tmp850;
+	  c_re(output[48 * ostride]) = tmp851 - tmp852;
+	  c_re(output[16 * ostride]) = tmp851 + tmp852;
+     }
+     {
+	  fftw_real tmp817;
+	  fftw_real tmp841;
+	  fftw_real tmp839;
+	  fftw_real tmp845;
+	  fftw_real tmp826;
+	  fftw_real tmp842;
+	  fftw_real tmp835;
+	  fftw_real tmp843;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp815;
+	       fftw_real tmp816;
+	       fftw_real tmp837;
+	       fftw_real tmp838;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp815 = tmp142 - tmp157;
+	       tmp816 = tmp46 - tmp61;
+	       tmp817 = tmp815 - tmp816;
+	       tmp841 = tmp816 + tmp815;
+	       tmp837 = tmp15 - tmp30;
+	       tmp838 = tmp188 - tmp173;
+	       tmp839 = tmp837 + tmp838;
+	       tmp845 = tmp837 - tmp838;
+	  }
+	  {
+	       fftw_real tmp818;
+	       fftw_real tmp825;
+	       fftw_real tmp827;
+	       fftw_real tmp834;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp818 = tmp78 - tmp93;
+	       tmp825 = tmp821 - tmp824;
+	       tmp826 = tmp818 - tmp825;
+	       tmp842 = tmp818 + tmp825;
+	       tmp827 = tmp109 - tmp124;
+	       tmp834 = tmp830 - tmp833;
+	       tmp835 = tmp827 + tmp834;
+	       tmp843 = tmp834 - tmp827;
+	  }
+	  {
+	       fftw_real tmp836;
+	       fftw_real tmp840;
+	       fftw_real tmp844;
+	       fftw_real tmp846;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp836 = K707106781 * (tmp826 - tmp835);
+	       c_im(output[56 * ostride]) = tmp817 - tmp836;
+	       c_im(output[24 * ostride]) = tmp817 + tmp836;
+	       tmp840 = K707106781 * (tmp826 + tmp835);
+	       c_re(output[40 * ostride]) = tmp839 - tmp840;
+	       c_re(output[8 * ostride]) = tmp839 + tmp840;
+	       tmp844 = K707106781 * (tmp842 + tmp843);
+	       c_im(output[40 * ostride]) = tmp841 - tmp844;
+	       c_im(output[8 * ostride]) = tmp841 + tmp844;
+	       tmp846 = K707106781 * (tmp843 - tmp842);
+	       c_re(output[56 * ostride]) = tmp845 - tmp846;
+	       c_re(output[24 * ostride]) = tmp845 + tmp846;
+	  }
+     }
+     {
+	  fftw_real tmp217;
+	  fftw_real tmp391;
+	  fftw_real tmp396;
+	  fftw_real tmp406;
+	  fftw_real tmp399;
+	  fftw_real tmp407;
+	  fftw_real tmp367;
+	  fftw_real tmp387;
+	  fftw_real tmp312;
+	  fftw_real tmp386;
+	  fftw_real tmp379;
+	  fftw_real tmp401;
+	  fftw_real tmp382;
+	  fftw_real tmp392;
+	  fftw_real tmp256;
+	  fftw_real tmp402;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp201;
+	       fftw_real tmp216;
+	       fftw_real tmp394;
+	       fftw_real tmp395;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp201 = tmp193 - tmp200;
+	       tmp216 = tmp208 - tmp215;
+	       tmp217 = tmp201 - tmp216;
+	       tmp391 = tmp201 + tmp216;
+	       tmp394 = tmp274 + tmp297;
+	       tmp395 = tmp307 + tmp310;
+	       tmp396 = (K881921264 * tmp394) - (K471396736 * tmp395);
+	       tmp406 = (K471396736 * tmp394) + (K881921264 * tmp395);
+	  }
+	  {
+	       fftw_real tmp397;
+	       fftw_real tmp398;
+	       fftw_real tmp353;
+	       fftw_real tmp366;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp397 = tmp329 + tmp352;
+	       tmp398 = tmp362 + tmp365;
+	       tmp399 = (K881921264 * tmp397) + (K471396736 * tmp398);
+	       tmp407 = (K881921264 * tmp398) - (K471396736 * tmp397);
+	       tmp353 = tmp329 - tmp352;
+	       tmp366 = tmp362 - tmp365;
+	       tmp367 = (K290284677 * tmp353) + (K956940335 * tmp366);
+	       tmp387 = (K290284677 * tmp366) - (K956940335 * tmp353);
+	  }
+	  {
+	       fftw_real tmp298;
+	       fftw_real tmp311;
+	       fftw_real tmp375;
+	       fftw_real tmp378;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp298 = tmp274 - tmp297;
+	       tmp311 = tmp307 - tmp310;
+	       tmp312 = (K290284677 * tmp298) - (K956940335 * tmp311);
+	       tmp386 = (K956940335 * tmp298) + (K290284677 * tmp311);
+	       tmp375 = tmp371 - tmp374;
+	       tmp378 = tmp376 - tmp377;
+	       tmp379 = tmp375 - tmp378;
+	       tmp401 = tmp375 + tmp378;
+	  }
+	  {
+	       fftw_real tmp380;
+	       fftw_real tmp381;
+	       fftw_real tmp236;
+	       fftw_real tmp255;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp380 = (K555570233 * tmp254) - (K831469612 * tmp247);
+	       tmp381 = (K831469612 * tmp228) + (K555570233 * tmp235);
+	       tmp382 = tmp380 - tmp381;
+	       tmp392 = tmp381 + tmp380;
+	       tmp236 = (K555570233 * tmp228) - (K831469612 * tmp235);
+	       tmp255 = (K555570233 * tmp247) + (K831469612 * tmp254);
+	       tmp256 = tmp236 - tmp255;
+	       tmp402 = tmp236 + tmp255;
+	  }
+	  {
+	       fftw_real tmp257;
+	       fftw_real tmp368;
+	       fftw_real tmp383;
+	       fftw_real tmp384;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp257 = tmp217 - tmp256;
+	       tmp368 = tmp312 - tmp367;
+	       c_im(output[61 * ostride]) = tmp257 - tmp368;
+	       c_im(output[29 * ostride]) = tmp257 + tmp368;
+	       tmp383 = tmp379 + tmp382;
+	       tmp384 = tmp312 + tmp367;
+	       c_re(output[45 * ostride]) = tmp383 - tmp384;
+	       c_re(output[13 * ostride]) = tmp383 + tmp384;
+	  }
+	  {
+	       fftw_real tmp389;
+	       fftw_real tmp390;
+	       fftw_real tmp385;
+	       fftw_real tmp388;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp389 = tmp379 - tmp382;
+	       tmp390 = tmp387 - tmp386;
+	       c_re(output[61 * ostride]) = tmp389 - tmp390;
+	       c_re(output[29 * ostride]) = tmp389 + tmp390;
+	       tmp385 = tmp217 + tmp256;
+	       tmp388 = tmp386 + tmp387;
+	       c_im(output[45 * ostride]) = tmp385 - tmp388;
+	       c_im(output[13 * ostride]) = tmp385 + tmp388;
+	  }
+	  {
+	       fftw_real tmp393;
+	       fftw_real tmp400;
+	       fftw_real tmp403;
+	       fftw_real tmp404;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp393 = tmp391 - tmp392;
+	       tmp400 = tmp396 - tmp399;
+	       c_im(output[53 * ostride]) = tmp393 - tmp400;
+	       c_im(output[21 * ostride]) = tmp393 + tmp400;
+	       tmp403 = tmp401 + tmp402;
+	       tmp404 = tmp396 + tmp399;
+	       c_re(output[37 * ostride]) = tmp403 - tmp404;
+	       c_re(output[5 * ostride]) = tmp403 + tmp404;
+	  }
+	  {
+	       fftw_real tmp409;
+	       fftw_real tmp410;
+	       fftw_real tmp405;
+	       fftw_real tmp408;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp409 = tmp401 - tmp402;
+	       tmp410 = tmp407 - tmp406;
+	       c_re(output[53 * ostride]) = tmp409 - tmp410;
+	       c_re(output[21 * ostride]) = tmp409 + tmp410;
+	       tmp405 = tmp391 + tmp392;
+	       tmp408 = tmp406 + tmp407;
+	       c_im(output[37 * ostride]) = tmp405 - tmp408;
+	       c_im(output[5 * ostride]) = tmp405 + tmp408;
+	  }
+     }
+     {
+	  fftw_real tmp413;
+	  fftw_real tmp451;
+	  fftw_real tmp456;
+	  fftw_real tmp466;
+	  fftw_real tmp459;
+	  fftw_real tmp467;
+	  fftw_real tmp435;
+	  fftw_real tmp447;
+	  fftw_real tmp428;
+	  fftw_real tmp446;
+	  fftw_real tmp439;
+	  fftw_real tmp461;
+	  fftw_real tmp442;
+	  fftw_real tmp452;
+	  fftw_real tmp420;
+	  fftw_real tmp462;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp411;
+	       fftw_real tmp412;
+	       fftw_real tmp454;
+	       fftw_real tmp455;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp411 = tmp193 + tmp200;
+	       tmp412 = tmp377 + tmp376;
+	       tmp413 = tmp411 - tmp412;
+	       tmp451 = tmp411 + tmp412;
+	       tmp454 = tmp422 + tmp423;
+	       tmp455 = tmp425 + tmp426;
+	       tmp456 = (K995184726 * tmp454) + (K098017140 * tmp455);
+	       tmp466 = (K995184726 * tmp455) - (K098017140 * tmp454);
+	  }
+	  {
+	       fftw_real tmp457;
+	       fftw_real tmp458;
+	       fftw_real tmp431;
+	       fftw_real tmp434;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp457 = tmp429 + tmp430;
+	       tmp458 = tmp432 + tmp433;
+	       tmp459 = (K995184726 * tmp457) - (K098017140 * tmp458);
+	       tmp467 = (K098017140 * tmp457) + (K995184726 * tmp458);
+	       tmp431 = tmp429 - tmp430;
+	       tmp434 = tmp432 - tmp433;
+	       tmp435 = (K634393284 * tmp431) - (K773010453 * tmp434);
+	       tmp447 = (K773010453 * tmp431) + (K634393284 * tmp434);
+	  }
+	  {
+	       fftw_real tmp424;
+	       fftw_real tmp427;
+	       fftw_real tmp437;
+	       fftw_real tmp438;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp424 = tmp422 - tmp423;
+	       tmp427 = tmp425 - tmp426;
+	       tmp428 = (K634393284 * tmp424) + (K773010453 * tmp427);
+	       tmp446 = (K634393284 * tmp427) - (K773010453 * tmp424);
+	       tmp437 = tmp371 + tmp374;
+	       tmp438 = tmp208 + tmp215;
+	       tmp439 = tmp437 - tmp438;
+	       tmp461 = tmp437 + tmp438;
+	  }
+	  {
+	       fftw_real tmp440;
+	       fftw_real tmp441;
+	       fftw_real tmp416;
+	       fftw_real tmp419;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp440 = (K980785280 * tmp418) - (K195090322 * tmp417);
+	       tmp441 = (K195090322 * tmp414) + (K980785280 * tmp415);
+	       tmp442 = tmp440 - tmp441;
+	       tmp452 = tmp441 + tmp440;
+	       tmp416 = (K980785280 * tmp414) - (K195090322 * tmp415);
+	       tmp419 = (K980785280 * tmp417) + (K195090322 * tmp418);
+	       tmp420 = tmp416 - tmp419;
+	       tmp462 = tmp416 + tmp419;
+	  }
+	  {
+	       fftw_real tmp421;
+	       fftw_real tmp436;
+	       fftw_real tmp443;
+	       fftw_real tmp444;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp421 = tmp413 + tmp420;
+	       tmp436 = tmp428 + tmp435;
+	       c_im(output[41 * ostride]) = tmp421 - tmp436;
+	       c_im(output[9 * ostride]) = tmp421 + tmp436;
+	       tmp443 = tmp439 - tmp442;
+	       tmp444 = tmp435 - tmp428;
+	       c_re(output[57 * ostride]) = tmp443 - tmp444;
+	       c_re(output[25 * ostride]) = tmp443 + tmp444;
+	  }
+	  {
+	       fftw_real tmp449;
+	       fftw_real tmp450;
+	       fftw_real tmp445;
+	       fftw_real tmp448;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp449 = tmp439 + tmp442;
+	       tmp450 = tmp446 + tmp447;
+	       c_re(output[41 * ostride]) = tmp449 - tmp450;
+	       c_re(output[9 * ostride]) = tmp449 + tmp450;
+	       tmp445 = tmp413 - tmp420;
+	       tmp448 = tmp446 - tmp447;
+	       c_im(output[57 * ostride]) = tmp445 - tmp448;
+	       c_im(output[25 * ostride]) = tmp445 + tmp448;
+	  }
+	  {
+	       fftw_real tmp453;
+	       fftw_real tmp460;
+	       fftw_real tmp463;
+	       fftw_real tmp464;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp453 = tmp451 + tmp452;
+	       tmp460 = tmp456 + tmp459;
+	       c_im(output[33 * ostride]) = tmp453 - tmp460;
+	       c_im(output[ostride]) = tmp453 + tmp460;
+	       tmp463 = tmp461 - tmp462;
+	       tmp464 = tmp459 - tmp456;
+	       c_re(output[49 * ostride]) = tmp463 - tmp464;
+	       c_re(output[17 * ostride]) = tmp463 + tmp464;
+	  }
+	  {
+	       fftw_real tmp469;
+	       fftw_real tmp470;
+	       fftw_real tmp465;
+	       fftw_real tmp468;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp469 = tmp461 + tmp462;
+	       tmp470 = tmp466 + tmp467;
+	       c_re(output[33 * ostride]) = tmp469 - tmp470;
+	       c_re(output[ostride]) = tmp469 + tmp470;
+	       tmp465 = tmp451 - tmp452;
+	       tmp468 = tmp466 - tmp467;
+	       c_im(output[49 * ostride]) = tmp465 - tmp468;
+	       c_im(output[17 * ostride]) = tmp465 + tmp468;
+	  }
+     }
+     {
+	  fftw_real tmp855;
+	  fftw_real tmp893;
+	  fftw_real tmp862;
+	  fftw_real tmp904;
+	  fftw_real tmp884;
+	  fftw_real tmp894;
+	  fftw_real tmp870;
+	  fftw_real tmp888;
+	  fftw_real tmp881;
+	  fftw_real tmp903;
+	  fftw_real tmp898;
+	  fftw_real tmp908;
+	  fftw_real tmp877;
+	  fftw_real tmp889;
+	  fftw_real tmp901;
+	  fftw_real tmp909;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp858;
+	       fftw_real tmp861;
+	       fftw_real tmp896;
+	       fftw_real tmp897;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp855 = tmp853 - tmp854;
+	       tmp893 = tmp854 + tmp853;
+	       tmp858 = tmp856 - tmp857;
+	       tmp861 = tmp859 + tmp860;
+	       tmp862 = K707106781 * (tmp858 - tmp861);
+	       tmp904 = K707106781 * (tmp858 + tmp861);
+	       {
+		    fftw_real tmp882;
+		    fftw_real tmp883;
+		    fftw_real tmp866;
+		    fftw_real tmp869;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp882 = tmp860 - tmp859;
+		    tmp883 = tmp856 + tmp857;
+		    tmp884 = K707106781 * (tmp882 - tmp883);
+		    tmp894 = K707106781 * (tmp883 + tmp882);
+		    tmp866 = tmp864 - tmp865;
+		    tmp869 = tmp867 - tmp868;
+		    tmp870 = (K382683432 * tmp866) + (K923879532 * tmp869);
+		    tmp888 = (K382683432 * tmp869) - (K923879532 * tmp866);
+	       }
+	       tmp881 = tmp879 - tmp880;
+	       tmp903 = tmp879 + tmp880;
+	       tmp896 = tmp865 + tmp864;
+	       tmp897 = tmp867 + tmp868;
+	       tmp898 = (K923879532 * tmp896) + (K382683432 * tmp897);
+	       tmp908 = (K923879532 * tmp897) - (K382683432 * tmp896);
+	       {
+		    fftw_real tmp873;
+		    fftw_real tmp876;
+		    fftw_real tmp899;
+		    fftw_real tmp900;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp873 = tmp871 - tmp872;
+		    tmp876 = tmp874 - tmp875;
+		    tmp877 = (K382683432 * tmp873) - (K923879532 * tmp876);
+		    tmp889 = (K923879532 * tmp873) + (K382683432 * tmp876);
+		    tmp899 = tmp872 + tmp871;
+		    tmp900 = tmp874 + tmp875;
+		    tmp901 = (K923879532 * tmp899) - (K382683432 * tmp900);
+		    tmp909 = (K382683432 * tmp899) + (K923879532 * tmp900);
+	       }
+	  }
+	  {
+	       fftw_real tmp863;
+	       fftw_real tmp878;
+	       fftw_real tmp885;
+	       fftw_real tmp886;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp863 = tmp855 + tmp862;
+	       tmp878 = tmp870 + tmp877;
+	       c_im(output[44 * ostride]) = tmp863 - tmp878;
+	       c_im(output[12 * ostride]) = tmp863 + tmp878;
+	       tmp885 = tmp881 - tmp884;
+	       tmp886 = tmp877 - tmp870;
+	       c_re(output[60 * ostride]) = tmp885 - tmp886;
+	       c_re(output[28 * ostride]) = tmp885 + tmp886;
+	  }
+	  {
+	       fftw_real tmp891;
+	       fftw_real tmp892;
+	       fftw_real tmp887;
+	       fftw_real tmp890;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp891 = tmp881 + tmp884;
+	       tmp892 = tmp888 + tmp889;
+	       c_re(output[44 * ostride]) = tmp891 - tmp892;
+	       c_re(output[12 * ostride]) = tmp891 + tmp892;
+	       tmp887 = tmp855 - tmp862;
+	       tmp890 = tmp888 - tmp889;
+	       c_im(output[60 * ostride]) = tmp887 - tmp890;
+	       c_im(output[28 * ostride]) = tmp887 + tmp890;
+	  }
+	  {
+	       fftw_real tmp895;
+	       fftw_real tmp902;
+	       fftw_real tmp905;
+	       fftw_real tmp906;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp895 = tmp893 + tmp894;
+	       tmp902 = tmp898 + tmp901;
+	       c_im(output[36 * ostride]) = tmp895 - tmp902;
+	       c_im(output[4 * ostride]) = tmp895 + tmp902;
+	       tmp905 = tmp903 - tmp904;
+	       tmp906 = tmp901 - tmp898;
+	       c_re(output[52 * ostride]) = tmp905 - tmp906;
+	       c_re(output[20 * ostride]) = tmp905 + tmp906;
+	  }
+	  {
+	       fftw_real tmp911;
+	       fftw_real tmp912;
+	       fftw_real tmp907;
+	       fftw_real tmp910;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp911 = tmp903 + tmp904;
+	       tmp912 = tmp908 + tmp909;
+	       c_re(output[36 * ostride]) = tmp911 - tmp912;
+	       c_re(output[4 * ostride]) = tmp911 + tmp912;
+	       tmp907 = tmp893 - tmp894;
+	       tmp910 = tmp908 - tmp909;
+	       c_im(output[52 * ostride]) = tmp907 - tmp910;
+	       c_im(output[20 * ostride]) = tmp907 + tmp910;
+	  }
+     }
+     {
+	  fftw_real tmp757;
+	  fftw_real tmp795;
+	  fftw_real tmp800;
+	  fftw_real tmp810;
+	  fftw_real tmp803;
+	  fftw_real tmp811;
+	  fftw_real tmp779;
+	  fftw_real tmp791;
+	  fftw_real tmp783;
+	  fftw_real tmp805;
+	  fftw_real tmp764;
+	  fftw_real tmp806;
+	  fftw_real tmp786;
+	  fftw_real tmp796;
+	  fftw_real tmp772;
+	  fftw_real tmp790;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp756;
+	       fftw_real tmp798;
+	       fftw_real tmp799;
+	       fftw_real tmp782;
+	       fftw_real tmp760;
+	       fftw_real tmp763;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp756 = K707106781 * (tmp720 - tmp721);
+	       tmp757 = tmp755 - tmp756;
+	       tmp795 = tmp755 + tmp756;
+	       tmp798 = tmp766 + tmp767;
+	       tmp799 = tmp769 + tmp770;
+	       tmp800 = (K831469612 * tmp798) + (K555570233 * tmp799);
+	       tmp810 = (K831469612 * tmp799) - (K555570233 * tmp798);
+	       {
+		    fftw_real tmp801;
+		    fftw_real tmp802;
+		    fftw_real tmp775;
+		    fftw_real tmp778;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp801 = tmp773 + tmp774;
+		    tmp802 = tmp776 + tmp777;
+		    tmp803 = (K831469612 * tmp801) - (K555570233 * tmp802);
+		    tmp811 = (K555570233 * tmp801) + (K831469612 * tmp802);
+		    tmp775 = tmp773 - tmp774;
+		    tmp778 = tmp776 - tmp777;
+		    tmp779 = (K195090322 * tmp775) - (K980785280 * tmp778);
+		    tmp791 = (K980785280 * tmp775) + (K195090322 * tmp778);
+	       }
+	       tmp782 = K707106781 * (tmp643 - tmp640);
+	       tmp783 = tmp781 - tmp782;
+	       tmp805 = tmp781 + tmp782;
+	       tmp760 = (K382683432 * tmp758) - (K923879532 * tmp759);
+	       tmp763 = (K923879532 * tmp761) + (K382683432 * tmp762);
+	       tmp764 = tmp760 - tmp763;
+	       tmp806 = tmp760 + tmp763;
+	       {
+		    fftw_real tmp784;
+		    fftw_real tmp785;
+		    fftw_real tmp768;
+		    fftw_real tmp771;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp784 = (K382683432 * tmp761) - (K923879532 * tmp762);
+		    tmp785 = (K382683432 * tmp759) + (K923879532 * tmp758);
+		    tmp786 = tmp784 - tmp785;
+		    tmp796 = tmp785 + tmp784;
+		    tmp768 = tmp766 - tmp767;
+		    tmp771 = tmp769 - tmp770;
+		    tmp772 = (K195090322 * tmp768) + (K980785280 * tmp771);
+		    tmp790 = (K195090322 * tmp771) - (K980785280 * tmp768);
+	       }
+	  }
+	  {
+	       fftw_real tmp765;
+	       fftw_real tmp780;
+	       fftw_real tmp787;
+	       fftw_real tmp788;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp765 = tmp757 + tmp764;
+	       tmp780 = tmp772 + tmp779;
+	       c_im(output[46 * ostride]) = tmp765 - tmp780;
+	       c_im(output[14 * ostride]) = tmp765 + tmp780;
+	       tmp787 = tmp783 - tmp786;
+	       tmp788 = tmp779 - tmp772;
+	       c_re(output[62 * ostride]) = tmp787 - tmp788;
+	       c_re(output[30 * ostride]) = tmp787 + tmp788;
+	  }
+	  {
+	       fftw_real tmp793;
+	       fftw_real tmp794;
+	       fftw_real tmp789;
+	       fftw_real tmp792;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp793 = tmp783 + tmp786;
+	       tmp794 = tmp790 + tmp791;
+	       c_re(output[46 * ostride]) = tmp793 - tmp794;
+	       c_re(output[14 * ostride]) = tmp793 + tmp794;
+	       tmp789 = tmp757 - tmp764;
+	       tmp792 = tmp790 - tmp791;
+	       c_im(output[62 * ostride]) = tmp789 - tmp792;
+	       c_im(output[30 * ostride]) = tmp789 + tmp792;
+	  }
+	  {
+	       fftw_real tmp797;
+	       fftw_real tmp804;
+	       fftw_real tmp807;
+	       fftw_real tmp808;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp797 = tmp795 + tmp796;
+	       tmp804 = tmp800 + tmp803;
+	       c_im(output[38 * ostride]) = tmp797 - tmp804;
+	       c_im(output[6 * ostride]) = tmp797 + tmp804;
+	       tmp807 = tmp805 - tmp806;
+	       tmp808 = tmp803 - tmp800;
+	       c_re(output[54 * ostride]) = tmp807 - tmp808;
+	       c_re(output[22 * ostride]) = tmp807 + tmp808;
+	  }
+	  {
+	       fftw_real tmp813;
+	       fftw_real tmp814;
+	       fftw_real tmp809;
+	       fftw_real tmp812;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp813 = tmp805 + tmp806;
+	       tmp814 = tmp810 + tmp811;
+	       c_re(output[38 * ostride]) = tmp813 - tmp814;
+	       c_re(output[6 * ostride]) = tmp813 + tmp814;
+	       tmp809 = tmp795 - tmp796;
+	       tmp812 = tmp810 - tmp811;
+	       c_im(output[54 * ostride]) = tmp809 - tmp812;
+	       c_im(output[22 * ostride]) = tmp809 + tmp812;
+	  }
+     }
+     {
+	  fftw_real tmp645;
+	  fftw_real tmp735;
+	  fftw_real tmp740;
+	  fftw_real tmp750;
+	  fftw_real tmp743;
+	  fftw_real tmp751;
+	  fftw_real tmp715;
+	  fftw_real tmp731;
+	  fftw_real tmp723;
+	  fftw_real tmp745;
+	  fftw_real tmp660;
+	  fftw_real tmp746;
+	  fftw_real tmp726;
+	  fftw_real tmp736;
+	  fftw_real tmp688;
+	  fftw_real tmp730;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp644;
+	       fftw_real tmp738;
+	       fftw_real tmp739;
+	       fftw_real tmp722;
+	       fftw_real tmp652;
+	       fftw_real tmp659;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp644 = K707106781 * (tmp640 + tmp643);
+	       tmp645 = tmp637 - tmp644;
+	       tmp735 = tmp637 + tmp644;
+	       tmp738 = tmp666 + tmp677;
+	       tmp739 = tmp683 + tmp686;
+	       tmp740 = (K980785280 * tmp738) - (K195090322 * tmp739);
+	       tmp750 = (K195090322 * tmp738) + (K980785280 * tmp739);
+	       {
+		    fftw_real tmp741;
+		    fftw_real tmp742;
+		    fftw_real tmp705;
+		    fftw_real tmp714;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp741 = tmp693 + tmp704;
+		    tmp742 = tmp710 + tmp713;
+		    tmp743 = (K980785280 * tmp741) + (K195090322 * tmp742);
+		    tmp751 = (K980785280 * tmp742) - (K195090322 * tmp741);
+		    tmp705 = tmp693 - tmp704;
+		    tmp714 = tmp710 - tmp713;
+		    tmp715 = (K555570233 * tmp705) + (K831469612 * tmp714);
+		    tmp731 = (K555570233 * tmp714) - (K831469612 * tmp705);
+	       }
+	       tmp722 = K707106781 * (tmp720 + tmp721);
+	       tmp723 = tmp719 - tmp722;
+	       tmp745 = tmp719 + tmp722;
+	       tmp652 = (K923879532 * tmp648) - (K382683432 * tmp651);
+	       tmp659 = (K382683432 * tmp655) + (K923879532 * tmp658);
+	       tmp660 = tmp652 - tmp659;
+	       tmp746 = tmp652 + tmp659;
+	       {
+		    fftw_real tmp724;
+		    fftw_real tmp725;
+		    fftw_real tmp678;
+		    fftw_real tmp687;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp724 = (K923879532 * tmp655) - (K382683432 * tmp658);
+		    tmp725 = (K923879532 * tmp651) + (K382683432 * tmp648);
+		    tmp726 = tmp724 - tmp725;
+		    tmp736 = tmp725 + tmp724;
+		    tmp678 = tmp666 - tmp677;
+		    tmp687 = tmp683 - tmp686;
+		    tmp688 = (K555570233 * tmp678) - (K831469612 * tmp687);
+		    tmp730 = (K831469612 * tmp678) + (K555570233 * tmp687);
+	       }
+	  }
+	  {
+	       fftw_real tmp661;
+	       fftw_real tmp716;
+	       fftw_real tmp727;
+	       fftw_real tmp728;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp661 = tmp645 - tmp660;
+	       tmp716 = tmp688 - tmp715;
+	       c_im(output[58 * ostride]) = tmp661 - tmp716;
+	       c_im(output[26 * ostride]) = tmp661 + tmp716;
+	       tmp727 = tmp723 + tmp726;
+	       tmp728 = tmp688 + tmp715;
+	       c_re(output[42 * ostride]) = tmp727 - tmp728;
+	       c_re(output[10 * ostride]) = tmp727 + tmp728;
+	  }
+	  {
+	       fftw_real tmp733;
+	       fftw_real tmp734;
+	       fftw_real tmp729;
+	       fftw_real tmp732;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp733 = tmp723 - tmp726;
+	       tmp734 = tmp731 - tmp730;
+	       c_re(output[58 * ostride]) = tmp733 - tmp734;
+	       c_re(output[26 * ostride]) = tmp733 + tmp734;
+	       tmp729 = tmp645 + tmp660;
+	       tmp732 = tmp730 + tmp731;
+	       c_im(output[42 * ostride]) = tmp729 - tmp732;
+	       c_im(output[10 * ostride]) = tmp729 + tmp732;
+	  }
+	  {
+	       fftw_real tmp737;
+	       fftw_real tmp744;
+	       fftw_real tmp747;
+	       fftw_real tmp748;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp737 = tmp735 - tmp736;
+	       tmp744 = tmp740 - tmp743;
+	       c_im(output[50 * ostride]) = tmp737 - tmp744;
+	       c_im(output[18 * ostride]) = tmp737 + tmp744;
+	       tmp747 = tmp745 + tmp746;
+	       tmp748 = tmp740 + tmp743;
+	       c_re(output[34 * ostride]) = tmp747 - tmp748;
+	       c_re(output[2 * ostride]) = tmp747 + tmp748;
+	  }
+	  {
+	       fftw_real tmp753;
+	       fftw_real tmp754;
+	       fftw_real tmp749;
+	       fftw_real tmp752;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp753 = tmp745 - tmp746;
+	       tmp754 = tmp751 - tmp750;
+	       c_re(output[50 * ostride]) = tmp753 - tmp754;
+	       c_re(output[18 * ostride]) = tmp753 + tmp754;
+	       tmp749 = tmp735 + tmp736;
+	       tmp752 = tmp750 + tmp751;
+	       c_im(output[34 * ostride]) = tmp749 - tmp752;
+	       c_im(output[2 * ostride]) = tmp749 + tmp752;
+	  }
+     }
+     {
+	  fftw_real tmp481;
+	  fftw_real tmp555;
+	  fftw_real tmp560;
+	  fftw_real tmp570;
+	  fftw_real tmp563;
+	  fftw_real tmp571;
+	  fftw_real tmp535;
+	  fftw_real tmp551;
+	  fftw_real tmp516;
+	  fftw_real tmp550;
+	  fftw_real tmp543;
+	  fftw_real tmp565;
+	  fftw_real tmp546;
+	  fftw_real tmp556;
+	  fftw_real tmp496;
+	  fftw_real tmp566;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp473;
+	       fftw_real tmp480;
+	       fftw_real tmp558;
+	       fftw_real tmp559;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp473 = tmp471 - tmp472;
+	       tmp480 = tmp476 - tmp479;
+	       tmp481 = tmp473 - tmp480;
+	       tmp555 = tmp473 + tmp480;
+	       tmp558 = tmp500 + tmp507;
+	       tmp559 = tmp511 + tmp514;
+	       tmp560 = (K773010453 * tmp558) - (K634393284 * tmp559);
+	       tmp570 = (K634393284 * tmp558) + (K773010453 * tmp559);
+	  }
+	  {
+	       fftw_real tmp561;
+	       fftw_real tmp562;
+	       fftw_real tmp527;
+	       fftw_real tmp534;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp561 = tmp519 + tmp526;
+	       tmp562 = tmp530 + tmp533;
+	       tmp563 = (K773010453 * tmp561) + (K634393284 * tmp562);
+	       tmp571 = (K773010453 * tmp562) - (K634393284 * tmp561);
+	       tmp527 = tmp519 - tmp526;
+	       tmp534 = tmp530 - tmp533;
+	       tmp535 = (K098017140 * tmp527) + (K995184726 * tmp534);
+	       tmp551 = (K098017140 * tmp534) - (K995184726 * tmp527);
+	  }
+	  {
+	       fftw_real tmp508;
+	       fftw_real tmp515;
+	       fftw_real tmp539;
+	       fftw_real tmp542;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp508 = tmp500 - tmp507;
+	       tmp515 = tmp511 - tmp514;
+	       tmp516 = (K098017140 * tmp508) - (K995184726 * tmp515);
+	       tmp550 = (K995184726 * tmp508) + (K098017140 * tmp515);
+	       tmp539 = tmp537 - tmp538;
+	       tmp542 = tmp540 - tmp541;
+	       tmp543 = tmp539 - tmp542;
+	       tmp565 = tmp539 + tmp542;
+	  }
+	  {
+	       fftw_real tmp544;
+	       fftw_real tmp545;
+	       fftw_real tmp488;
+	       fftw_real tmp495;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp544 = (K195090322 * tmp491) - (K980785280 * tmp494);
+	       tmp545 = (K195090322 * tmp487) + (K980785280 * tmp484);
+	       tmp546 = tmp544 - tmp545;
+	       tmp556 = tmp545 + tmp544;
+	       tmp488 = (K195090322 * tmp484) - (K980785280 * tmp487);
+	       tmp495 = (K980785280 * tmp491) + (K195090322 * tmp494);
+	       tmp496 = tmp488 - tmp495;
+	       tmp566 = tmp488 + tmp495;
+	  }
+	  {
+	       fftw_real tmp497;
+	       fftw_real tmp536;
+	       fftw_real tmp547;
+	       fftw_real tmp548;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp497 = tmp481 - tmp496;
+	       tmp536 = tmp516 - tmp535;
+	       c_im(output[63 * ostride]) = tmp497 - tmp536;
+	       c_im(output[31 * ostride]) = tmp497 + tmp536;
+	       tmp547 = tmp543 + tmp546;
+	       tmp548 = tmp516 + tmp535;
+	       c_re(output[47 * ostride]) = tmp547 - tmp548;
+	       c_re(output[15 * ostride]) = tmp547 + tmp548;
+	  }
+	  {
+	       fftw_real tmp553;
+	       fftw_real tmp554;
+	       fftw_real tmp549;
+	       fftw_real tmp552;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp553 = tmp543 - tmp546;
+	       tmp554 = tmp551 - tmp550;
+	       c_re(output[63 * ostride]) = tmp553 - tmp554;
+	       c_re(output[31 * ostride]) = tmp553 + tmp554;
+	       tmp549 = tmp481 + tmp496;
+	       tmp552 = tmp550 + tmp551;
+	       c_im(output[47 * ostride]) = tmp549 - tmp552;
+	       c_im(output[15 * ostride]) = tmp549 + tmp552;
+	  }
+	  {
+	       fftw_real tmp557;
+	       fftw_real tmp564;
+	       fftw_real tmp567;
+	       fftw_real tmp568;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp557 = tmp555 - tmp556;
+	       tmp564 = tmp560 - tmp563;
+	       c_im(output[55 * ostride]) = tmp557 - tmp564;
+	       c_im(output[23 * ostride]) = tmp557 + tmp564;
+	       tmp567 = tmp565 + tmp566;
+	       tmp568 = tmp560 + tmp563;
+	       c_re(output[39 * ostride]) = tmp567 - tmp568;
+	       c_re(output[7 * ostride]) = tmp567 + tmp568;
+	  }
+	  {
+	       fftw_real tmp573;
+	       fftw_real tmp574;
+	       fftw_real tmp569;
+	       fftw_real tmp572;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp573 = tmp565 - tmp566;
+	       tmp574 = tmp571 - tmp570;
+	       c_re(output[55 * ostride]) = tmp573 - tmp574;
+	       c_re(output[23 * ostride]) = tmp573 + tmp574;
+	       tmp569 = tmp555 + tmp556;
+	       tmp572 = tmp570 + tmp571;
+	       c_im(output[39 * ostride]) = tmp569 - tmp572;
+	       c_im(output[7 * ostride]) = tmp569 + tmp572;
+	  }
+     }
+     {
+	  fftw_real tmp577;
+	  fftw_real tmp615;
+	  fftw_real tmp620;
+	  fftw_real tmp630;
+	  fftw_real tmp623;
+	  fftw_real tmp631;
+	  fftw_real tmp599;
+	  fftw_real tmp611;
+	  fftw_real tmp592;
+	  fftw_real tmp610;
+	  fftw_real tmp603;
+	  fftw_real tmp625;
+	  fftw_real tmp606;
+	  fftw_real tmp616;
+	  fftw_real tmp584;
+	  fftw_real tmp626;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp575;
+	       fftw_real tmp576;
+	       fftw_real tmp618;
+	       fftw_real tmp619;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp575 = tmp471 + tmp472;
+	       tmp576 = tmp541 + tmp540;
+	       tmp577 = tmp575 - tmp576;
+	       tmp615 = tmp575 + tmp576;
+	       tmp618 = tmp586 + tmp587;
+	       tmp619 = tmp589 + tmp590;
+	       tmp620 = (K956940335 * tmp618) + (K290284677 * tmp619);
+	       tmp630 = (K956940335 * tmp619) - (K290284677 * tmp618);
+	  }
+	  {
+	       fftw_real tmp621;
+	       fftw_real tmp622;
+	       fftw_real tmp595;
+	       fftw_real tmp598;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp621 = tmp593 + tmp594;
+	       tmp622 = tmp596 + tmp597;
+	       tmp623 = (K956940335 * tmp621) - (K290284677 * tmp622);
+	       tmp631 = (K290284677 * tmp621) + (K956940335 * tmp622);
+	       tmp595 = tmp593 - tmp594;
+	       tmp598 = tmp596 - tmp597;
+	       tmp599 = (K471396736 * tmp595) - (K881921264 * tmp598);
+	       tmp611 = (K881921264 * tmp595) + (K471396736 * tmp598);
+	  }
+	  {
+	       fftw_real tmp588;
+	       fftw_real tmp591;
+	       fftw_real tmp601;
+	       fftw_real tmp602;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp588 = tmp586 - tmp587;
+	       tmp591 = tmp589 - tmp590;
+	       tmp592 = (K471396736 * tmp588) + (K881921264 * tmp591);
+	       tmp610 = (K471396736 * tmp591) - (K881921264 * tmp588);
+	       tmp601 = tmp537 + tmp538;
+	       tmp602 = tmp476 + tmp479;
+	       tmp603 = tmp601 - tmp602;
+	       tmp625 = tmp601 + tmp602;
+	  }
+	  {
+	       fftw_real tmp604;
+	       fftw_real tmp605;
+	       fftw_real tmp580;
+	       fftw_real tmp583;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp604 = (K831469612 * tmp581) - (K555570233 * tmp582);
+	       tmp605 = (K831469612 * tmp579) + (K555570233 * tmp578);
+	       tmp606 = tmp604 - tmp605;
+	       tmp616 = tmp605 + tmp604;
+	       tmp580 = (K831469612 * tmp578) - (K555570233 * tmp579);
+	       tmp583 = (K555570233 * tmp581) + (K831469612 * tmp582);
+	       tmp584 = tmp580 - tmp583;
+	       tmp626 = tmp580 + tmp583;
+	  }
+	  {
+	       fftw_real tmp585;
+	       fftw_real tmp600;
+	       fftw_real tmp607;
+	       fftw_real tmp608;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp585 = tmp577 + tmp584;
+	       tmp600 = tmp592 + tmp599;
+	       c_im(output[43 * ostride]) = tmp585 - tmp600;
+	       c_im(output[11 * ostride]) = tmp585 + tmp600;
+	       tmp607 = tmp603 - tmp606;
+	       tmp608 = tmp599 - tmp592;
+	       c_re(output[59 * ostride]) = tmp607 - tmp608;
+	       c_re(output[27 * ostride]) = tmp607 + tmp608;
+	  }
+	  {
+	       fftw_real tmp613;
+	       fftw_real tmp614;
+	       fftw_real tmp609;
+	       fftw_real tmp612;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp613 = tmp603 + tmp606;
+	       tmp614 = tmp610 + tmp611;
+	       c_re(output[43 * ostride]) = tmp613 - tmp614;
+	       c_re(output[11 * ostride]) = tmp613 + tmp614;
+	       tmp609 = tmp577 - tmp584;
+	       tmp612 = tmp610 - tmp611;
+	       c_im(output[59 * ostride]) = tmp609 - tmp612;
+	       c_im(output[27 * ostride]) = tmp609 + tmp612;
+	  }
+	  {
+	       fftw_real tmp617;
+	       fftw_real tmp624;
+	       fftw_real tmp627;
+	       fftw_real tmp628;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp617 = tmp615 + tmp616;
+	       tmp624 = tmp620 + tmp623;
+	       c_im(output[35 * ostride]) = tmp617 - tmp624;
+	       c_im(output[3 * ostride]) = tmp617 + tmp624;
+	       tmp627 = tmp625 - tmp626;
+	       tmp628 = tmp623 - tmp620;
+	       c_re(output[51 * ostride]) = tmp627 - tmp628;
+	       c_re(output[19 * ostride]) = tmp627 + tmp628;
+	  }
+	  {
+	       fftw_real tmp633;
+	       fftw_real tmp634;
+	       fftw_real tmp629;
+	       fftw_real tmp632;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp633 = tmp625 + tmp626;
+	       tmp634 = tmp630 + tmp631;
+	       c_re(output[35 * ostride]) = tmp633 - tmp634;
+	       c_re(output[3 * ostride]) = tmp633 + tmp634;
+	       tmp629 = tmp615 - tmp616;
+	       tmp632 = tmp630 - tmp631;
+	       c_im(output[51 * ostride]) = tmp629 - tmp632;
+	       c_im(output[19 * ostride]) = tmp629 + tmp632;
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_64_desc = {
+     "fftwi_no_twiddle_64",
+     (void (*)()) fftwi_no_twiddle_64,
+     64,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     1420,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_7.c b/src/fftw/fni_7.c
new file mode 100644
index 0000000..6fe8dc8
--- /dev/null
+++ b/src/fftw/fni_7.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:20 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 7 */
+
+/*
+ * This function contains 60 FP additions, 36 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 0 fused multiply/add),
+ * 22 stack variables, and 28 memory accesses
+ */
+static const fftw_real K222520933 =
+FFTW_KONST(+0.222520933956314404288902564496794759466355569);
+static const fftw_real K900968867 =
+FFTW_KONST(+0.900968867902419126236102319507445051165919162);
+static const fftw_real K623489801 =
+FFTW_KONST(+0.623489801858733530525004884004239810632274731);
+static const fftw_real K781831482 =
+FFTW_KONST(+0.781831482468029808708444526674057750232334519);
+static const fftw_real K433883739 =
+FFTW_KONST(+0.433883739117558120475768332848358754609990728);
+static const fftw_real K974927912 =
+FFTW_KONST(+0.974927912181823607018131682993931217232785801);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_7(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp1;
+     fftw_real tmp15;
+     fftw_real tmp4;
+     fftw_real tmp11;
+     fftw_real tmp21;
+     fftw_real tmp31;
+     fftw_real tmp7;
+     fftw_real tmp13;
+     fftw_real tmp24;
+     fftw_real tmp33;
+     fftw_real tmp10;
+     fftw_real tmp12;
+     fftw_real tmp18;
+     fftw_real tmp32;
+     ASSERT_ALIGNED_DOUBLE;
+     tmp1 = c_re(input[0]);
+     tmp15 = c_im(input[0]);
+     {
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp2 = c_re(input[istride]);
+	  tmp3 = c_re(input[6 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp11 = tmp2 - tmp3;
+	  tmp19 = c_im(input[istride]);
+	  tmp20 = c_im(input[6 * istride]);
+	  tmp21 = tmp19 + tmp20;
+	  tmp31 = tmp20 - tmp19;
+     }
+     {
+	  fftw_real tmp5;
+	  fftw_real tmp6;
+	  fftw_real tmp22;
+	  fftw_real tmp23;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp5 = c_re(input[2 * istride]);
+	  tmp6 = c_re(input[5 * istride]);
+	  tmp7 = tmp5 + tmp6;
+	  tmp13 = tmp5 - tmp6;
+	  tmp22 = c_im(input[2 * istride]);
+	  tmp23 = c_im(input[5 * istride]);
+	  tmp24 = tmp22 + tmp23;
+	  tmp33 = tmp23 - tmp22;
+     }
+     {
+	  fftw_real tmp8;
+	  fftw_real tmp9;
+	  fftw_real tmp16;
+	  fftw_real tmp17;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp8 = c_re(input[3 * istride]);
+	  tmp9 = c_re(input[4 * istride]);
+	  tmp10 = tmp8 + tmp9;
+	  tmp12 = tmp8 - tmp9;
+	  tmp16 = c_im(input[3 * istride]);
+	  tmp17 = c_im(input[4 * istride]);
+	  tmp18 = tmp16 + tmp17;
+	  tmp32 = tmp17 - tmp16;
+     }
+     {
+	  fftw_real tmp36;
+	  fftw_real tmp35;
+	  fftw_real tmp26;
+	  fftw_real tmp27;
+	  ASSERT_ALIGNED_DOUBLE;
+	  c_re(output[0]) = tmp1 + tmp4 + tmp7 + tmp10;
+	  tmp36 =
+	      (K974927912 * tmp33) + (K433883739 * tmp32) +
+	      (K781831482 * tmp31);
+	  tmp35 =
+	      tmp1 + (K623489801 * tmp4) - (K900968867 * tmp10) -
+	      (K222520933 * tmp7);
+	  c_re(output[6 * ostride]) = tmp35 - tmp36;
+	  c_re(output[ostride]) = tmp35 + tmp36;
+	  {
+	       fftw_real tmp38;
+	       fftw_real tmp37;
+	       fftw_real tmp34;
+	       fftw_real tmp30;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp38 =
+		   (K974927912 * tmp32) + (K433883739 * tmp31) -
+		   (K781831482 * tmp33);
+	       tmp37 =
+		   tmp1 + (K623489801 * tmp7) - (K222520933 * tmp10) -
+		   (K900968867 * tmp4);
+	       c_re(output[4 * ostride]) = tmp37 - tmp38;
+	       c_re(output[3 * ostride]) = tmp37 + tmp38;
+	       tmp34 =
+		   (K974927912 * tmp31) - (K781831482 * tmp32) -
+		   (K433883739 * tmp33);
+	       tmp30 =
+		   tmp1 + (K623489801 * tmp10) - (K900968867 * tmp7) -
+		   (K222520933 * tmp4);
+	       c_re(output[5 * ostride]) = tmp30 - tmp34;
+	       c_re(output[2 * ostride]) = tmp30 + tmp34;
+	  }
+	  c_im(output[0]) = tmp15 + tmp24 + tmp18 + tmp21;
+	  tmp26 =
+	      (K433883739 * tmp11) + (K974927912 * tmp12) -
+	      (K781831482 * tmp13);
+	  tmp27 =
+	      tmp15 + (K623489801 * tmp24) - (K900968867 * tmp21) -
+	      (K222520933 * tmp18);
+	  c_im(output[3 * ostride]) = tmp26 + tmp27;
+	  c_im(output[4 * ostride]) = tmp27 - tmp26;
+	  {
+	       fftw_real tmp14;
+	       fftw_real tmp25;
+	       fftw_real tmp28;
+	       fftw_real tmp29;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp14 =
+		   (K974927912 * tmp11) - (K781831482 * tmp12) -
+		   (K433883739 * tmp13);
+	       tmp25 =
+		   tmp15 + (K623489801 * tmp18) - (K222520933 * tmp21) -
+		   (K900968867 * tmp24);
+	       c_im(output[2 * ostride]) = tmp14 + tmp25;
+	       c_im(output[5 * ostride]) = tmp25 - tmp14;
+	       tmp28 =
+		   (K781831482 * tmp11) + (K974927912 * tmp13) +
+		   (K433883739 * tmp12);
+	       tmp29 =
+		   tmp15 + (K623489801 * tmp21) - (K900968867 * tmp18) -
+		   (K222520933 * tmp24);
+	       c_im(output[ostride]) = tmp28 + tmp29;
+	       c_im(output[6 * ostride]) = tmp29 - tmp28;
+	  }
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_7_desc = {
+     "fftwi_no_twiddle_7",
+     (void (*)()) fftwi_no_twiddle_7,
+     7,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     166,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_8.c b/src/fftw/fni_8.c
new file mode 100644
index 0000000..9bee5ad
--- /dev/null
+++ b/src/fftw/fni_8.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:25 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 8 */
+
+/*
+ * This function contains 52 FP additions, 4 FP multiplications,
+ * (or, 52 additions, 4 multiplications, 0 fused multiply/add),
+ * 26 stack variables, and 32 memory accesses
+ */
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_8(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp3;
+     fftw_real tmp37;
+     fftw_real tmp18;
+     fftw_real tmp23;
+     fftw_real tmp6;
+     fftw_real tmp24;
+     fftw_real tmp21;
+     fftw_real tmp38;
+     fftw_real tmp13;
+     fftw_real tmp49;
+     fftw_real tmp35;
+     fftw_real tmp43;
+     fftw_real tmp10;
+     fftw_real tmp48;
+     fftw_real tmp30;
+     fftw_real tmp42;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[4 * istride]);
+	  tmp3 = tmp1 + tmp2;
+	  tmp37 = tmp1 - tmp2;
+	  {
+	       fftw_real tmp16;
+	       fftw_real tmp17;
+	       fftw_real tmp4;
+	       fftw_real tmp5;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp16 = c_im(input[0]);
+	       tmp17 = c_im(input[4 * istride]);
+	       tmp18 = tmp16 + tmp17;
+	       tmp23 = tmp16 - tmp17;
+	       tmp4 = c_re(input[2 * istride]);
+	       tmp5 = c_re(input[6 * istride]);
+	       tmp6 = tmp4 + tmp5;
+	       tmp24 = tmp4 - tmp5;
+	  }
+	  tmp19 = c_im(input[2 * istride]);
+	  tmp20 = c_im(input[6 * istride]);
+	  tmp21 = tmp19 + tmp20;
+	  tmp38 = tmp19 - tmp20;
+	  {
+	       fftw_real tmp11;
+	       fftw_real tmp12;
+	       fftw_real tmp31;
+	       fftw_real tmp32;
+	       fftw_real tmp33;
+	       fftw_real tmp34;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp11 = c_re(input[7 * istride]);
+	       tmp12 = c_re(input[3 * istride]);
+	       tmp31 = tmp11 - tmp12;
+	       tmp32 = c_im(input[7 * istride]);
+	       tmp33 = c_im(input[3 * istride]);
+	       tmp34 = tmp32 - tmp33;
+	       tmp13 = tmp11 + tmp12;
+	       tmp49 = tmp32 + tmp33;
+	       tmp35 = tmp31 + tmp34;
+	       tmp43 = tmp34 - tmp31;
+	  }
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp9;
+	       fftw_real tmp26;
+	       fftw_real tmp27;
+	       fftw_real tmp28;
+	       fftw_real tmp29;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(input[istride]);
+	       tmp9 = c_re(input[5 * istride]);
+	       tmp26 = tmp8 - tmp9;
+	       tmp27 = c_im(input[istride]);
+	       tmp28 = c_im(input[5 * istride]);
+	       tmp29 = tmp27 - tmp28;
+	       tmp10 = tmp8 + tmp9;
+	       tmp48 = tmp27 + tmp28;
+	       tmp30 = tmp26 - tmp29;
+	       tmp42 = tmp26 + tmp29;
+	  }
+     }
+     {
+	  fftw_real tmp7;
+	  fftw_real tmp14;
+	  fftw_real tmp15;
+	  fftw_real tmp22;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp7 = tmp3 + tmp6;
+	  tmp14 = tmp10 + tmp13;
+	  c_re(output[4 * ostride]) = tmp7 - tmp14;
+	  c_re(output[0]) = tmp7 + tmp14;
+	  tmp15 = tmp10 - tmp13;
+	  tmp22 = tmp18 - tmp21;
+	  c_im(output[2 * ostride]) = tmp15 + tmp22;
+	  c_im(output[6 * ostride]) = tmp22 - tmp15;
+     }
+     {
+	  fftw_real tmp47;
+	  fftw_real tmp50;
+	  fftw_real tmp51;
+	  fftw_real tmp52;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp47 = tmp18 + tmp21;
+	  tmp50 = tmp48 + tmp49;
+	  c_im(output[4 * ostride]) = tmp47 - tmp50;
+	  c_im(output[0]) = tmp47 + tmp50;
+	  tmp51 = tmp3 - tmp6;
+	  tmp52 = tmp49 - tmp48;
+	  c_re(output[6 * ostride]) = tmp51 - tmp52;
+	  c_re(output[2 * ostride]) = tmp51 + tmp52;
+     }
+     {
+	  fftw_real tmp25;
+	  fftw_real tmp36;
+	  fftw_real tmp39;
+	  fftw_real tmp40;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp25 = tmp23 - tmp24;
+	  tmp36 = K707106781 * (tmp30 - tmp35);
+	  c_im(output[7 * ostride]) = tmp25 - tmp36;
+	  c_im(output[3 * ostride]) = tmp25 + tmp36;
+	  tmp39 = tmp37 - tmp38;
+	  tmp40 = K707106781 * (tmp30 + tmp35);
+	  c_re(output[5 * ostride]) = tmp39 - tmp40;
+	  c_re(output[ostride]) = tmp39 + tmp40;
+     }
+     {
+	  fftw_real tmp45;
+	  fftw_real tmp46;
+	  fftw_real tmp41;
+	  fftw_real tmp44;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp45 = tmp37 + tmp38;
+	  tmp46 = K707106781 * (tmp43 - tmp42);
+	  c_re(output[7 * ostride]) = tmp45 - tmp46;
+	  c_re(output[3 * ostride]) = tmp45 + tmp46;
+	  tmp41 = tmp24 + tmp23;
+	  tmp44 = K707106781 * (tmp42 + tmp43);
+	  c_im(output[5 * ostride]) = tmp41 - tmp44;
+	  c_im(output[ostride]) = tmp41 + tmp44;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_8_desc = {
+     "fftwi_no_twiddle_8",
+     (void (*)()) fftwi_no_twiddle_8,
+     8,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     188,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/fni_9.c b/src/fftw/fni_9.c
new file mode 100644
index 0000000..ce9f1f5
--- /dev/null
+++ b/src/fftw/fni_9.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:06:25 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -notwiddleinv 9 */
+
+/*
+ * This function contains 80 FP additions, 40 FP multiplications,
+ * (or, 60 additions, 20 multiplications, 20 fused multiply/add),
+ * 30 stack variables, and 36 memory accesses
+ */
+static const fftw_real K642787609 =
+FFTW_KONST(+0.642787609686539326322643409907263432907559884);
+static const fftw_real K766044443 =
+FFTW_KONST(+0.766044443118978035202392650555416673935832457);
+static const fftw_real K939692620 =
+FFTW_KONST(+0.939692620785908384054109277324731469936208134);
+static const fftw_real K342020143 =
+FFTW_KONST(+0.342020143325668733044099614682259580763083368);
+static const fftw_real K984807753 =
+FFTW_KONST(+0.984807753012208059366743024589523013670643252);
+static const fftw_real K173648177 =
+FFTW_KONST(+0.173648177666930348851716626769314796000375677);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: fni_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: fni_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_no_twiddle_9(const fftw_complex *input, fftw_complex *output,
+			int istride, int ostride)
+{
+     fftw_real tmp5;
+     fftw_real tmp37;
+     fftw_real tmp57;
+     fftw_real tmp22;
+     fftw_real tmp56;
+     fftw_real tmp38;
+     fftw_real tmp10;
+     fftw_real tmp42;
+     fftw_real tmp66;
+     fftw_real tmp27;
+     fftw_real tmp45;
+     fftw_real tmp67;
+     fftw_real tmp15;
+     fftw_real tmp52;
+     fftw_real tmp69;
+     fftw_real tmp32;
+     fftw_real tmp49;
+     fftw_real tmp70;
+     ASSERT_ALIGNED_DOUBLE;
+     {
+	  fftw_real tmp1;
+	  fftw_real tmp2;
+	  fftw_real tmp3;
+	  fftw_real tmp4;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(input[0]);
+	  tmp2 = c_re(input[3 * istride]);
+	  tmp3 = c_re(input[6 * istride]);
+	  tmp4 = tmp2 + tmp3;
+	  tmp5 = tmp1 + tmp4;
+	  tmp37 = tmp1 - (K500000000 * tmp4);
+	  tmp57 = K866025403 * (tmp2 - tmp3);
+     }
+     {
+	  fftw_real tmp18;
+	  fftw_real tmp19;
+	  fftw_real tmp20;
+	  fftw_real tmp21;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp18 = c_im(input[0]);
+	  tmp19 = c_im(input[3 * istride]);
+	  tmp20 = c_im(input[6 * istride]);
+	  tmp21 = tmp19 + tmp20;
+	  tmp22 = tmp18 + tmp21;
+	  tmp56 = tmp18 - (K500000000 * tmp21);
+	  tmp38 = K866025403 * (tmp20 - tmp19);
+     }
+     {
+	  fftw_real tmp6;
+	  fftw_real tmp23;
+	  fftw_real tmp9;
+	  fftw_real tmp44;
+	  fftw_real tmp26;
+	  fftw_real tmp41;
+	  fftw_real tmp40;
+	  fftw_real tmp43;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp6 = c_re(input[istride]);
+	  tmp23 = c_im(input[istride]);
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp8;
+	       fftw_real tmp24;
+	       fftw_real tmp25;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = c_re(input[4 * istride]);
+	       tmp8 = c_re(input[7 * istride]);
+	       tmp9 = tmp7 + tmp8;
+	       tmp44 = K866025403 * (tmp7 - tmp8);
+	       tmp24 = c_im(input[4 * istride]);
+	       tmp25 = c_im(input[7 * istride]);
+	       tmp26 = tmp24 + tmp25;
+	       tmp41 = K866025403 * (tmp25 - tmp24);
+	  }
+	  tmp10 = tmp6 + tmp9;
+	  tmp40 = tmp6 - (K500000000 * tmp9);
+	  tmp42 = tmp40 - tmp41;
+	  tmp66 = tmp40 + tmp41;
+	  tmp27 = tmp23 + tmp26;
+	  tmp43 = tmp23 - (K500000000 * tmp26);
+	  tmp45 = tmp43 - tmp44;
+	  tmp67 = tmp44 + tmp43;
+     }
+     {
+	  fftw_real tmp11;
+	  fftw_real tmp28;
+	  fftw_real tmp14;
+	  fftw_real tmp48;
+	  fftw_real tmp31;
+	  fftw_real tmp51;
+	  fftw_real tmp50;
+	  fftw_real tmp47;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp11 = c_re(input[2 * istride]);
+	  tmp28 = c_im(input[2 * istride]);
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp13;
+	       fftw_real tmp29;
+	       fftw_real tmp30;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp12 = c_re(input[5 * istride]);
+	       tmp13 = c_re(input[8 * istride]);
+	       tmp14 = tmp12 + tmp13;
+	       tmp48 = K866025403 * (tmp12 - tmp13);
+	       tmp29 = c_im(input[5 * istride]);
+	       tmp30 = c_im(input[8 * istride]);
+	       tmp31 = tmp29 + tmp30;
+	       tmp51 = K866025403 * (tmp30 - tmp29);
+	  }
+	  tmp15 = tmp11 + tmp14;
+	  tmp50 = tmp11 - (K500000000 * tmp14);
+	  tmp52 = tmp50 - tmp51;
+	  tmp69 = tmp50 + tmp51;
+	  tmp32 = tmp28 + tmp31;
+	  tmp47 = tmp28 - (K500000000 * tmp31);
+	  tmp49 = tmp47 - tmp48;
+	  tmp70 = tmp48 + tmp47;
+     }
+     {
+	  fftw_real tmp36;
+	  fftw_real tmp16;
+	  fftw_real tmp35;
+	  fftw_real tmp17;
+	  fftw_real tmp33;
+	  fftw_real tmp34;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp36 = K866025403 * (tmp32 - tmp27);
+	  tmp16 = tmp10 + tmp15;
+	  tmp35 = tmp5 - (K500000000 * tmp16);
+	  c_re(output[0]) = tmp5 + tmp16;
+	  c_re(output[3 * ostride]) = tmp35 + tmp36;
+	  c_re(output[6 * ostride]) = tmp35 - tmp36;
+	  tmp17 = K866025403 * (tmp10 - tmp15);
+	  tmp33 = tmp27 + tmp32;
+	  tmp34 = tmp22 - (K500000000 * tmp33);
+	  c_im(output[3 * ostride]) = tmp17 + tmp34;
+	  c_im(output[6 * ostride]) = tmp34 - tmp17;
+	  c_im(output[0]) = tmp22 + tmp33;
+     }
+     {
+	  fftw_real tmp39;
+	  fftw_real tmp61;
+	  fftw_real tmp64;
+	  fftw_real tmp58;
+	  fftw_real tmp54;
+	  fftw_real tmp55;
+	  fftw_real tmp63;
+	  fftw_real tmp62;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp59;
+	       fftw_real tmp60;
+	       fftw_real tmp46;
+	       fftw_real tmp53;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp39 = tmp37 - tmp38;
+	       tmp59 = (K173648177 * tmp45) + (K984807753 * tmp42);
+	       tmp60 = (K342020143 * tmp52) - (K939692620 * tmp49);
+	       tmp61 = tmp59 + tmp60;
+	       tmp64 = K866025403 * (tmp60 - tmp59);
+	       tmp58 = tmp56 - tmp57;
+	       tmp46 = (K173648177 * tmp42) - (K984807753 * tmp45);
+	       tmp53 = (K342020143 * tmp49) + (K939692620 * tmp52);
+	       tmp54 = tmp46 - tmp53;
+	       tmp55 = K866025403 * (tmp46 + tmp53);
+	  }
+	  c_re(output[2 * ostride]) = tmp39 + tmp54;
+	  tmp63 = tmp39 - (K500000000 * tmp54);
+	  c_re(output[8 * ostride]) = tmp63 - tmp64;
+	  c_re(output[5 * ostride]) = tmp63 + tmp64;
+	  c_im(output[2 * ostride]) = tmp58 + tmp61;
+	  tmp62 = tmp58 - (K500000000 * tmp61);
+	  c_im(output[5 * ostride]) = tmp55 + tmp62;
+	  c_im(output[8 * ostride]) = tmp62 - tmp55;
+     }
+     {
+	  fftw_real tmp65;
+	  fftw_real tmp77;
+	  fftw_real tmp80;
+	  fftw_real tmp74;
+	  fftw_real tmp72;
+	  fftw_real tmp73;
+	  fftw_real tmp79;
+	  fftw_real tmp78;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp75;
+	       fftw_real tmp76;
+	       fftw_real tmp68;
+	       fftw_real tmp71;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp65 = tmp37 + tmp38;
+	       tmp75 = (K766044443 * tmp67) + (K642787609 * tmp66);
+	       tmp76 = (K173648177 * tmp70) + (K984807753 * tmp69);
+	       tmp77 = tmp75 + tmp76;
+	       tmp80 = K866025403 * (tmp76 - tmp75);
+	       tmp74 = tmp57 + tmp56;
+	       tmp68 = (K766044443 * tmp66) - (K642787609 * tmp67);
+	       tmp71 = (K173648177 * tmp69) - (K984807753 * tmp70);
+	       tmp72 = tmp68 + tmp71;
+	       tmp73 = K866025403 * (tmp68 - tmp71);
+	  }
+	  c_re(output[ostride]) = tmp65 + tmp72;
+	  tmp79 = tmp65 - (K500000000 * tmp72);
+	  c_re(output[7 * ostride]) = tmp79 - tmp80;
+	  c_re(output[4 * ostride]) = tmp79 + tmp80;
+	  c_im(output[ostride]) = tmp74 + tmp77;
+	  tmp78 = tmp74 - (K500000000 * tmp77);
+	  c_im(output[4 * ostride]) = tmp73 + tmp78;
+	  c_im(output[7 * ostride]) = tmp78 - tmp73;
+     }
+}
+
+fftw_codelet_desc fftwi_no_twiddle_9_desc = {
+     "fftwi_no_twiddle_9",
+     (void (*)()) fftwi_no_twiddle_9,
+     9,
+     FFTW_BACKWARD,
+     FFTW_NOTW,
+     210,
+     0,
+     (const int *) 0,
+};
diff --git a/src/fftw/ftw_10.c b/src/fftw/ftw_10.c
new file mode 100644
index 0000000..fea0234
--- /dev/null
+++ b/src/fftw/ftw_10.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:41 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 10 */
+
+/*
+ * This function contains 102 FP additions, 60 FP multiplications,
+ * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
+ * 42 stack variables, and 40 memory accesses
+ */
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_10(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 9) {
+	  fftw_real tmp7;
+	  fftw_real tmp55;
+	  fftw_real tmp100;
+	  fftw_real tmp112;
+	  fftw_real tmp41;
+	  fftw_real tmp52;
+	  fftw_real tmp53;
+	  fftw_real tmp59;
+	  fftw_real tmp60;
+	  fftw_real tmp61;
+	  fftw_real tmp75;
+	  fftw_real tmp78;
+	  fftw_real tmp110;
+	  fftw_real tmp86;
+	  fftw_real tmp87;
+	  fftw_real tmp96;
+	  fftw_real tmp18;
+	  fftw_real tmp29;
+	  fftw_real tmp30;
+	  fftw_real tmp56;
+	  fftw_real tmp57;
+	  fftw_real tmp58;
+	  fftw_real tmp68;
+	  fftw_real tmp71;
+	  fftw_real tmp109;
+	  fftw_real tmp89;
+	  fftw_real tmp90;
+	  fftw_real tmp95;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp99;
+	       fftw_real tmp6;
+	       fftw_real tmp98;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp99 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[5 * iostride]);
+		    tmp5 = c_im(inout[5 * iostride]);
+		    tmp2 = c_re(W[4]);
+		    tmp4 = c_im(W[4]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp98 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       tmp7 = tmp1 - tmp6;
+	       tmp55 = tmp1 + tmp6;
+	       tmp100 = tmp98 + tmp99;
+	       tmp112 = tmp99 - tmp98;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp73;
+	       fftw_real tmp51;
+	       fftw_real tmp77;
+	       fftw_real tmp40;
+	       fftw_real tmp74;
+	       fftw_real tmp46;
+	       fftw_real tmp76;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[4 * iostride]);
+		    tmp34 = c_im(inout[4 * iostride]);
+		    tmp31 = c_re(W[3]);
+		    tmp33 = c_im(W[3]);
+		    tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34);
+		    tmp73 = (tmp33 * tmp32) + (tmp31 * tmp34);
+	       }
+	       {
+		    fftw_real tmp48;
+		    fftw_real tmp50;
+		    fftw_real tmp47;
+		    fftw_real tmp49;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp48 = c_re(inout[iostride]);
+		    tmp50 = c_im(inout[iostride]);
+		    tmp47 = c_re(W[0]);
+		    tmp49 = c_im(W[0]);
+		    tmp51 = (tmp47 * tmp48) - (tmp49 * tmp50);
+		    tmp77 = (tmp49 * tmp48) + (tmp47 * tmp50);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[9 * iostride]);
+		    tmp39 = c_im(inout[9 * iostride]);
+		    tmp36 = c_re(W[8]);
+		    tmp38 = c_im(W[8]);
+		    tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39);
+		    tmp74 = (tmp38 * tmp37) + (tmp36 * tmp39);
+	       }
+	       {
+		    fftw_real tmp43;
+		    fftw_real tmp45;
+		    fftw_real tmp42;
+		    fftw_real tmp44;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp43 = c_re(inout[6 * iostride]);
+		    tmp45 = c_im(inout[6 * iostride]);
+		    tmp42 = c_re(W[5]);
+		    tmp44 = c_im(W[5]);
+		    tmp46 = (tmp42 * tmp43) - (tmp44 * tmp45);
+		    tmp76 = (tmp44 * tmp43) + (tmp42 * tmp45);
+	       }
+	       tmp41 = tmp35 - tmp40;
+	       tmp52 = tmp46 - tmp51;
+	       tmp53 = tmp41 + tmp52;
+	       tmp59 = tmp35 + tmp40;
+	       tmp60 = tmp46 + tmp51;
+	       tmp61 = tmp59 + tmp60;
+	       tmp75 = tmp73 - tmp74;
+	       tmp78 = tmp76 - tmp77;
+	       tmp110 = tmp75 + tmp78;
+	       tmp86 = tmp73 + tmp74;
+	       tmp87 = tmp76 + tmp77;
+	       tmp96 = tmp86 + tmp87;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp66;
+	       fftw_real tmp28;
+	       fftw_real tmp70;
+	       fftw_real tmp17;
+	       fftw_real tmp67;
+	       fftw_real tmp23;
+	       fftw_real tmp69;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[2 * iostride]);
+		    tmp11 = c_im(inout[2 * iostride]);
+		    tmp8 = c_re(W[1]);
+		    tmp10 = c_im(W[1]);
+		    tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+		    tmp66 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	       }
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[3 * iostride]);
+		    tmp27 = c_im(inout[3 * iostride]);
+		    tmp24 = c_re(W[2]);
+		    tmp26 = c_im(W[2]);
+		    tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27);
+		    tmp70 = (tmp26 * tmp25) + (tmp24 * tmp27);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[7 * iostride]);
+		    tmp16 = c_im(inout[7 * iostride]);
+		    tmp13 = c_re(W[6]);
+		    tmp15 = c_im(W[6]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp67 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       {
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp20 = c_re(inout[8 * iostride]);
+		    tmp22 = c_im(inout[8 * iostride]);
+		    tmp19 = c_re(W[7]);
+		    tmp21 = c_im(W[7]);
+		    tmp23 = (tmp19 * tmp20) - (tmp21 * tmp22);
+		    tmp69 = (tmp21 * tmp20) + (tmp19 * tmp22);
+	       }
+	       tmp18 = tmp12 - tmp17;
+	       tmp29 = tmp23 - tmp28;
+	       tmp30 = tmp18 + tmp29;
+	       tmp56 = tmp12 + tmp17;
+	       tmp57 = tmp23 + tmp28;
+	       tmp58 = tmp56 + tmp57;
+	       tmp68 = tmp66 - tmp67;
+	       tmp71 = tmp69 - tmp70;
+	       tmp109 = tmp68 + tmp71;
+	       tmp89 = tmp66 + tmp67;
+	       tmp90 = tmp69 + tmp70;
+	       tmp95 = tmp89 + tmp90;
+	  }
+	  {
+	       fftw_real tmp63;
+	       fftw_real tmp54;
+	       fftw_real tmp64;
+	       fftw_real tmp80;
+	       fftw_real tmp82;
+	       fftw_real tmp72;
+	       fftw_real tmp79;
+	       fftw_real tmp81;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp63 = K559016994 * (tmp30 - tmp53);
+	       tmp54 = tmp30 + tmp53;
+	       tmp64 = tmp7 - (K250000000 * tmp54);
+	       tmp72 = tmp68 - tmp71;
+	       tmp79 = tmp75 - tmp78;
+	       tmp80 = (K951056516 * tmp72) + (K587785252 * tmp79);
+	       tmp82 = (K951056516 * tmp79) - (K587785252 * tmp72);
+	       c_re(inout[5 * iostride]) = tmp7 + tmp54;
+	       tmp81 = tmp64 - tmp63;
+	       c_re(inout[7 * iostride]) = tmp81 - tmp82;
+	       c_re(inout[3 * iostride]) = tmp81 + tmp82;
+	       tmp65 = tmp63 + tmp64;
+	       c_re(inout[9 * iostride]) = tmp65 - tmp80;
+	       c_re(inout[iostride]) = tmp65 + tmp80;
+	  }
+	  {
+	       fftw_real tmp111;
+	       fftw_real tmp113;
+	       fftw_real tmp114;
+	       fftw_real tmp118;
+	       fftw_real tmp120;
+	       fftw_real tmp116;
+	       fftw_real tmp117;
+	       fftw_real tmp119;
+	       fftw_real tmp115;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp111 = K559016994 * (tmp109 - tmp110);
+	       tmp113 = tmp109 + tmp110;
+	       tmp114 = tmp112 - (K250000000 * tmp113);
+	       tmp116 = tmp18 - tmp29;
+	       tmp117 = tmp41 - tmp52;
+	       tmp118 = (K951056516 * tmp116) + (K587785252 * tmp117);
+	       tmp120 = (K951056516 * tmp117) - (K587785252 * tmp116);
+	       c_im(inout[5 * iostride]) = tmp113 + tmp112;
+	       tmp119 = tmp114 - tmp111;
+	       c_im(inout[3 * iostride]) = tmp119 - tmp120;
+	       c_im(inout[7 * iostride]) = tmp120 + tmp119;
+	       tmp115 = tmp111 + tmp114;
+	       c_im(inout[iostride]) = tmp115 - tmp118;
+	       c_im(inout[9 * iostride]) = tmp118 + tmp115;
+	  }
+	  {
+	       fftw_real tmp84;
+	       fftw_real tmp62;
+	       fftw_real tmp83;
+	       fftw_real tmp92;
+	       fftw_real tmp94;
+	       fftw_real tmp88;
+	       fftw_real tmp91;
+	       fftw_real tmp93;
+	       fftw_real tmp85;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp84 = K559016994 * (tmp58 - tmp61);
+	       tmp62 = tmp58 + tmp61;
+	       tmp83 = tmp55 - (K250000000 * tmp62);
+	       tmp88 = tmp86 - tmp87;
+	       tmp91 = tmp89 - tmp90;
+	       tmp92 = (K951056516 * tmp88) - (K587785252 * tmp91);
+	       tmp94 = (K951056516 * tmp91) + (K587785252 * tmp88);
+	       c_re(inout[0]) = tmp55 + tmp62;
+	       tmp93 = tmp84 + tmp83;
+	       c_re(inout[4 * iostride]) = tmp93 - tmp94;
+	       c_re(inout[6 * iostride]) = tmp93 + tmp94;
+	       tmp85 = tmp83 - tmp84;
+	       c_re(inout[2 * iostride]) = tmp85 - tmp92;
+	       c_re(inout[8 * iostride]) = tmp85 + tmp92;
+	  }
+	  {
+	       fftw_real tmp105;
+	       fftw_real tmp97;
+	       fftw_real tmp104;
+	       fftw_real tmp103;
+	       fftw_real tmp107;
+	       fftw_real tmp101;
+	       fftw_real tmp102;
+	       fftw_real tmp108;
+	       fftw_real tmp106;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp105 = K559016994 * (tmp95 - tmp96);
+	       tmp97 = tmp95 + tmp96;
+	       tmp104 = tmp100 - (K250000000 * tmp97);
+	       tmp101 = tmp59 - tmp60;
+	       tmp102 = tmp56 - tmp57;
+	       tmp103 = (K951056516 * tmp101) - (K587785252 * tmp102);
+	       tmp107 = (K951056516 * tmp102) + (K587785252 * tmp101);
+	       c_im(inout[0]) = tmp97 + tmp100;
+	       tmp108 = tmp105 + tmp104;
+	       c_im(inout[4 * iostride]) = tmp107 + tmp108;
+	       c_im(inout[6 * iostride]) = tmp108 - tmp107;
+	       tmp106 = tmp104 - tmp105;
+	       c_im(inout[2 * iostride]) = tmp103 + tmp106;
+	       c_im(inout[8 * iostride]) = tmp106 - tmp103;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+fftw_codelet_desc fftw_twiddle_10_desc = {
+     "fftw_twiddle_10",
+     (void (*)()) fftw_twiddle_10,
+     10,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     220,
+     9,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_16.c b/src/fftw/ftw_16.c
new file mode 100644
index 0000000..1df4281
--- /dev/null
+++ b/src/fftw/ftw_16.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:43 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 16 */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 136 additions, 46 multiplications, 38 fused multiply/add),
+ * 50 stack variables, and 64 memory accesses
+ */
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_16(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 15) {
+	  fftw_real tmp7;
+	  fftw_real tmp91;
+	  fftw_real tmp180;
+	  fftw_real tmp193;
+	  fftw_real tmp18;
+	  fftw_real tmp194;
+	  fftw_real tmp94;
+	  fftw_real tmp177;
+	  fftw_real tmp77;
+	  fftw_real tmp88;
+	  fftw_real tmp161;
+	  fftw_real tmp128;
+	  fftw_real tmp144;
+	  fftw_real tmp162;
+	  fftw_real tmp163;
+	  fftw_real tmp164;
+	  fftw_real tmp123;
+	  fftw_real tmp143;
+	  fftw_real tmp30;
+	  fftw_real tmp152;
+	  fftw_real tmp100;
+	  fftw_real tmp136;
+	  fftw_real tmp41;
+	  fftw_real tmp153;
+	  fftw_real tmp105;
+	  fftw_real tmp137;
+	  fftw_real tmp54;
+	  fftw_real tmp65;
+	  fftw_real tmp156;
+	  fftw_real tmp117;
+	  fftw_real tmp141;
+	  fftw_real tmp157;
+	  fftw_real tmp158;
+	  fftw_real tmp159;
+	  fftw_real tmp112;
+	  fftw_real tmp140;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp179;
+	       fftw_real tmp6;
+	       fftw_real tmp178;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp179 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[8 * iostride]);
+		    tmp5 = c_im(inout[8 * iostride]);
+		    tmp2 = c_re(W[7]);
+		    tmp4 = c_im(W[7]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp178 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       tmp7 = tmp1 + tmp6;
+	       tmp91 = tmp1 - tmp6;
+	       tmp180 = tmp178 + tmp179;
+	       tmp193 = tmp179 - tmp178;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp92;
+	       fftw_real tmp17;
+	       fftw_real tmp93;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[4 * iostride]);
+		    tmp11 = c_im(inout[4 * iostride]);
+		    tmp8 = c_re(W[3]);
+		    tmp10 = c_im(W[3]);
+		    tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+		    tmp92 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[12 * iostride]);
+		    tmp16 = c_im(inout[12 * iostride]);
+		    tmp13 = c_re(W[11]);
+		    tmp15 = c_im(W[11]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp93 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       tmp18 = tmp12 + tmp17;
+	       tmp194 = tmp12 - tmp17;
+	       tmp94 = tmp92 - tmp93;
+	       tmp177 = tmp92 + tmp93;
+	  }
+	  {
+	       fftw_real tmp71;
+	       fftw_real tmp124;
+	       fftw_real tmp87;
+	       fftw_real tmp121;
+	       fftw_real tmp76;
+	       fftw_real tmp125;
+	       fftw_real tmp82;
+	       fftw_real tmp120;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp68;
+		    fftw_real tmp70;
+		    fftw_real tmp67;
+		    fftw_real tmp69;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp68 = c_re(inout[15 * iostride]);
+		    tmp70 = c_im(inout[15 * iostride]);
+		    tmp67 = c_re(W[14]);
+		    tmp69 = c_im(W[14]);
+		    tmp71 = (tmp67 * tmp68) - (tmp69 * tmp70);
+		    tmp124 = (tmp69 * tmp68) + (tmp67 * tmp70);
+	       }
+	       {
+		    fftw_real tmp84;
+		    fftw_real tmp86;
+		    fftw_real tmp83;
+		    fftw_real tmp85;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp84 = c_re(inout[11 * iostride]);
+		    tmp86 = c_im(inout[11 * iostride]);
+		    tmp83 = c_re(W[10]);
+		    tmp85 = c_im(W[10]);
+		    tmp87 = (tmp83 * tmp84) - (tmp85 * tmp86);
+		    tmp121 = (tmp85 * tmp84) + (tmp83 * tmp86);
+	       }
+	       {
+		    fftw_real tmp73;
+		    fftw_real tmp75;
+		    fftw_real tmp72;
+		    fftw_real tmp74;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp73 = c_re(inout[7 * iostride]);
+		    tmp75 = c_im(inout[7 * iostride]);
+		    tmp72 = c_re(W[6]);
+		    tmp74 = c_im(W[6]);
+		    tmp76 = (tmp72 * tmp73) - (tmp74 * tmp75);
+		    tmp125 = (tmp74 * tmp73) + (tmp72 * tmp75);
+	       }
+	       {
+		    fftw_real tmp79;
+		    fftw_real tmp81;
+		    fftw_real tmp78;
+		    fftw_real tmp80;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp79 = c_re(inout[3 * iostride]);
+		    tmp81 = c_im(inout[3 * iostride]);
+		    tmp78 = c_re(W[2]);
+		    tmp80 = c_im(W[2]);
+		    tmp82 = (tmp78 * tmp79) - (tmp80 * tmp81);
+		    tmp120 = (tmp80 * tmp79) + (tmp78 * tmp81);
+	       }
+	       {
+		    fftw_real tmp126;
+		    fftw_real tmp127;
+		    fftw_real tmp119;
+		    fftw_real tmp122;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp77 = tmp71 + tmp76;
+		    tmp88 = tmp82 + tmp87;
+		    tmp161 = tmp77 - tmp88;
+		    tmp126 = tmp124 - tmp125;
+		    tmp127 = tmp82 - tmp87;
+		    tmp128 = tmp126 + tmp127;
+		    tmp144 = tmp126 - tmp127;
+		    tmp162 = tmp124 + tmp125;
+		    tmp163 = tmp120 + tmp121;
+		    tmp164 = tmp162 - tmp163;
+		    tmp119 = tmp71 - tmp76;
+		    tmp122 = tmp120 - tmp121;
+		    tmp123 = tmp119 - tmp122;
+		    tmp143 = tmp119 + tmp122;
+	       }
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp96;
+	       fftw_real tmp29;
+	       fftw_real tmp97;
+	       fftw_real tmp98;
+	       fftw_real tmp99;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[2 * iostride]);
+		    tmp23 = c_im(inout[2 * iostride]);
+		    tmp20 = c_re(W[1]);
+		    tmp22 = c_im(W[1]);
+		    tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23);
+		    tmp96 = (tmp22 * tmp21) + (tmp20 * tmp23);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[10 * iostride]);
+		    tmp28 = c_im(inout[10 * iostride]);
+		    tmp25 = c_re(W[9]);
+		    tmp27 = c_im(W[9]);
+		    tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28);
+		    tmp97 = (tmp27 * tmp26) + (tmp25 * tmp28);
+	       }
+	       tmp30 = tmp24 + tmp29;
+	       tmp152 = tmp96 + tmp97;
+	       tmp98 = tmp96 - tmp97;
+	       tmp99 = tmp24 - tmp29;
+	       tmp100 = tmp98 - tmp99;
+	       tmp136 = tmp99 + tmp98;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp102;
+	       fftw_real tmp40;
+	       fftw_real tmp103;
+	       fftw_real tmp101;
+	       fftw_real tmp104;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[14 * iostride]);
+		    tmp34 = c_im(inout[14 * iostride]);
+		    tmp31 = c_re(W[13]);
+		    tmp33 = c_im(W[13]);
+		    tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34);
+		    tmp102 = (tmp33 * tmp32) + (tmp31 * tmp34);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[6 * iostride]);
+		    tmp39 = c_im(inout[6 * iostride]);
+		    tmp36 = c_re(W[5]);
+		    tmp38 = c_im(W[5]);
+		    tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39);
+		    tmp103 = (tmp38 * tmp37) + (tmp36 * tmp39);
+	       }
+	       tmp41 = tmp35 + tmp40;
+	       tmp153 = tmp102 + tmp103;
+	       tmp101 = tmp35 - tmp40;
+	       tmp104 = tmp102 - tmp103;
+	       tmp105 = tmp101 + tmp104;
+	       tmp137 = tmp101 - tmp104;
+	  }
+	  {
+	       fftw_real tmp48;
+	       fftw_real tmp108;
+	       fftw_real tmp64;
+	       fftw_real tmp115;
+	       fftw_real tmp53;
+	       fftw_real tmp109;
+	       fftw_real tmp59;
+	       fftw_real tmp114;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp45;
+		    fftw_real tmp47;
+		    fftw_real tmp44;
+		    fftw_real tmp46;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp45 = c_re(inout[iostride]);
+		    tmp47 = c_im(inout[iostride]);
+		    tmp44 = c_re(W[0]);
+		    tmp46 = c_im(W[0]);
+		    tmp48 = (tmp44 * tmp45) - (tmp46 * tmp47);
+		    tmp108 = (tmp46 * tmp45) + (tmp44 * tmp47);
+	       }
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp63;
+		    fftw_real tmp60;
+		    fftw_real tmp62;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 = c_re(inout[13 * iostride]);
+		    tmp63 = c_im(inout[13 * iostride]);
+		    tmp60 = c_re(W[12]);
+		    tmp62 = c_im(W[12]);
+		    tmp64 = (tmp60 * tmp61) - (tmp62 * tmp63);
+		    tmp115 = (tmp62 * tmp61) + (tmp60 * tmp63);
+	       }
+	       {
+		    fftw_real tmp50;
+		    fftw_real tmp52;
+		    fftw_real tmp49;
+		    fftw_real tmp51;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp50 = c_re(inout[9 * iostride]);
+		    tmp52 = c_im(inout[9 * iostride]);
+		    tmp49 = c_re(W[8]);
+		    tmp51 = c_im(W[8]);
+		    tmp53 = (tmp49 * tmp50) - (tmp51 * tmp52);
+		    tmp109 = (tmp51 * tmp50) + (tmp49 * tmp52);
+	       }
+	       {
+		    fftw_real tmp56;
+		    fftw_real tmp58;
+		    fftw_real tmp55;
+		    fftw_real tmp57;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp56 = c_re(inout[5 * iostride]);
+		    tmp58 = c_im(inout[5 * iostride]);
+		    tmp55 = c_re(W[4]);
+		    tmp57 = c_im(W[4]);
+		    tmp59 = (tmp55 * tmp56) - (tmp57 * tmp58);
+		    tmp114 = (tmp57 * tmp56) + (tmp55 * tmp58);
+	       }
+	       {
+		    fftw_real tmp113;
+		    fftw_real tmp116;
+		    fftw_real tmp110;
+		    fftw_real tmp111;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp54 = tmp48 + tmp53;
+		    tmp65 = tmp59 + tmp64;
+		    tmp156 = tmp54 - tmp65;
+		    tmp113 = tmp48 - tmp53;
+		    tmp116 = tmp114 - tmp115;
+		    tmp117 = tmp113 - tmp116;
+		    tmp141 = tmp113 + tmp116;
+		    tmp157 = tmp108 + tmp109;
+		    tmp158 = tmp114 + tmp115;
+		    tmp159 = tmp157 - tmp158;
+		    tmp110 = tmp108 - tmp109;
+		    tmp111 = tmp59 - tmp64;
+		    tmp112 = tmp110 + tmp111;
+		    tmp140 = tmp110 - tmp111;
+	       }
+	  }
+	  {
+	       fftw_real tmp107;
+	       fftw_real tmp131;
+	       fftw_real tmp202;
+	       fftw_real tmp204;
+	       fftw_real tmp130;
+	       fftw_real tmp203;
+	       fftw_real tmp134;
+	       fftw_real tmp199;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp95;
+		    fftw_real tmp106;
+		    fftw_real tmp200;
+		    fftw_real tmp201;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp95 = tmp91 - tmp94;
+		    tmp106 = K707106781 * (tmp100 - tmp105);
+		    tmp107 = tmp95 + tmp106;
+		    tmp131 = tmp95 - tmp106;
+		    tmp200 = K707106781 * (tmp137 - tmp136);
+		    tmp201 = tmp194 + tmp193;
+		    tmp202 = tmp200 + tmp201;
+		    tmp204 = tmp201 - tmp200;
+	       }
+	       {
+		    fftw_real tmp118;
+		    fftw_real tmp129;
+		    fftw_real tmp132;
+		    fftw_real tmp133;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp118 = (K923879532 * tmp112) + (K382683432 * tmp117);
+		    tmp129 = (K382683432 * tmp123) - (K923879532 * tmp128);
+		    tmp130 = tmp118 + tmp129;
+		    tmp203 = tmp129 - tmp118;
+		    tmp132 = (K382683432 * tmp112) - (K923879532 * tmp117);
+		    tmp133 = (K382683432 * tmp128) + (K923879532 * tmp123);
+		    tmp134 = tmp132 - tmp133;
+		    tmp199 = tmp132 + tmp133;
+	       }
+	       c_re(inout[11 * iostride]) = tmp107 - tmp130;
+	       c_re(inout[3 * iostride]) = tmp107 + tmp130;
+	       c_re(inout[15 * iostride]) = tmp131 - tmp134;
+	       c_re(inout[7 * iostride]) = tmp131 + tmp134;
+	       c_im(inout[3 * iostride]) = tmp199 + tmp202;
+	       c_im(inout[11 * iostride]) = tmp202 - tmp199;
+	       c_im(inout[7 * iostride]) = tmp203 + tmp204;
+	       c_im(inout[15 * iostride]) = tmp204 - tmp203;
+	  }
+	  {
+	       fftw_real tmp139;
+	       fftw_real tmp147;
+	       fftw_real tmp196;
+	       fftw_real tmp198;
+	       fftw_real tmp146;
+	       fftw_real tmp197;
+	       fftw_real tmp150;
+	       fftw_real tmp191;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp135;
+		    fftw_real tmp138;
+		    fftw_real tmp192;
+		    fftw_real tmp195;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp135 = tmp91 + tmp94;
+		    tmp138 = K707106781 * (tmp136 + tmp137);
+		    tmp139 = tmp135 + tmp138;
+		    tmp147 = tmp135 - tmp138;
+		    tmp192 = K707106781 * (tmp100 + tmp105);
+		    tmp195 = tmp193 - tmp194;
+		    tmp196 = tmp192 + tmp195;
+		    tmp198 = tmp195 - tmp192;
+	       }
+	       {
+		    fftw_real tmp142;
+		    fftw_real tmp145;
+		    fftw_real tmp148;
+		    fftw_real tmp149;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp142 = (K382683432 * tmp140) + (K923879532 * tmp141);
+		    tmp145 = (K923879532 * tmp143) - (K382683432 * tmp144);
+		    tmp146 = tmp142 + tmp145;
+		    tmp197 = tmp145 - tmp142;
+		    tmp148 = (K923879532 * tmp140) - (K382683432 * tmp141);
+		    tmp149 = (K923879532 * tmp144) + (K382683432 * tmp143);
+		    tmp150 = tmp148 - tmp149;
+		    tmp191 = tmp148 + tmp149;
+	       }
+	       c_re(inout[9 * iostride]) = tmp139 - tmp146;
+	       c_re(inout[iostride]) = tmp139 + tmp146;
+	       c_re(inout[13 * iostride]) = tmp147 - tmp150;
+	       c_re(inout[5 * iostride]) = tmp147 + tmp150;
+	       c_im(inout[iostride]) = tmp191 + tmp196;
+	       c_im(inout[9 * iostride]) = tmp196 - tmp191;
+	       c_im(inout[5 * iostride]) = tmp197 + tmp198;
+	       c_im(inout[13 * iostride]) = tmp198 - tmp197;
+	  }
+	  {
+	       fftw_real tmp155;
+	       fftw_real tmp167;
+	       fftw_real tmp188;
+	       fftw_real tmp190;
+	       fftw_real tmp166;
+	       fftw_real tmp189;
+	       fftw_real tmp170;
+	       fftw_real tmp185;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp154;
+		    fftw_real tmp186;
+		    fftw_real tmp187;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = tmp7 - tmp18;
+		    tmp154 = tmp152 - tmp153;
+		    tmp155 = tmp151 + tmp154;
+		    tmp167 = tmp151 - tmp154;
+		    tmp186 = tmp41 - tmp30;
+		    tmp187 = tmp180 - tmp177;
+		    tmp188 = tmp186 + tmp187;
+		    tmp190 = tmp187 - tmp186;
+	       }
+	       {
+		    fftw_real tmp160;
+		    fftw_real tmp165;
+		    fftw_real tmp168;
+		    fftw_real tmp169;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp160 = tmp156 + tmp159;
+		    tmp165 = tmp161 - tmp164;
+		    tmp166 = K707106781 * (tmp160 + tmp165);
+		    tmp189 = K707106781 * (tmp165 - tmp160);
+		    tmp168 = tmp159 - tmp156;
+		    tmp169 = tmp161 + tmp164;
+		    tmp170 = K707106781 * (tmp168 - tmp169);
+		    tmp185 = K707106781 * (tmp168 + tmp169);
+	       }
+	       c_re(inout[10 * iostride]) = tmp155 - tmp166;
+	       c_re(inout[2 * iostride]) = tmp155 + tmp166;
+	       c_re(inout[14 * iostride]) = tmp167 - tmp170;
+	       c_re(inout[6 * iostride]) = tmp167 + tmp170;
+	       c_im(inout[2 * iostride]) = tmp185 + tmp188;
+	       c_im(inout[10 * iostride]) = tmp188 - tmp185;
+	       c_im(inout[6 * iostride]) = tmp189 + tmp190;
+	       c_im(inout[14 * iostride]) = tmp190 - tmp189;
+	  }
+	  {
+	       fftw_real tmp43;
+	       fftw_real tmp171;
+	       fftw_real tmp182;
+	       fftw_real tmp184;
+	       fftw_real tmp90;
+	       fftw_real tmp183;
+	       fftw_real tmp174;
+	       fftw_real tmp175;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp19;
+		    fftw_real tmp42;
+		    fftw_real tmp176;
+		    fftw_real tmp181;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp19 = tmp7 + tmp18;
+		    tmp42 = tmp30 + tmp41;
+		    tmp43 = tmp19 + tmp42;
+		    tmp171 = tmp19 - tmp42;
+		    tmp176 = tmp152 + tmp153;
+		    tmp181 = tmp177 + tmp180;
+		    tmp182 = tmp176 + tmp181;
+		    tmp184 = tmp181 - tmp176;
+	       }
+	       {
+		    fftw_real tmp66;
+		    fftw_real tmp89;
+		    fftw_real tmp172;
+		    fftw_real tmp173;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp66 = tmp54 + tmp65;
+		    tmp89 = tmp77 + tmp88;
+		    tmp90 = tmp66 + tmp89;
+		    tmp183 = tmp89 - tmp66;
+		    tmp172 = tmp157 + tmp158;
+		    tmp173 = tmp162 + tmp163;
+		    tmp174 = tmp172 - tmp173;
+		    tmp175 = tmp172 + tmp173;
+	       }
+	       c_re(inout[8 * iostride]) = tmp43 - tmp90;
+	       c_re(inout[0]) = tmp43 + tmp90;
+	       c_re(inout[12 * iostride]) = tmp171 - tmp174;
+	       c_re(inout[4 * iostride]) = tmp171 + tmp174;
+	       c_im(inout[0]) = tmp175 + tmp182;
+	       c_im(inout[8 * iostride]) = tmp182 - tmp175;
+	       c_im(inout[4 * iostride]) = tmp183 + tmp184;
+	       c_im(inout[12 * iostride]) = tmp184 - tmp183;
+	  }
+     }
+}
+
+static const int twiddle_order[] =
+    { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+fftw_codelet_desc fftw_twiddle_16_desc = {
+     "fftw_twiddle_16",
+     (void (*)()) fftw_twiddle_16,
+     16,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     352,
+     15,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_2.c b/src/fftw/ftw_2.c
new file mode 100644
index 0000000..3c82074
--- /dev/null
+++ b/src/fftw/ftw_2.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:31 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 2 */
+
+/*
+ * This function contains 6 FP additions, 4 FP multiplications,
+ * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
+ * 10 stack variables, and 8 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_2(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 1) {
+	  fftw_real tmp1;
+	  fftw_real tmp8;
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp8 = c_im(inout[0]);
+	  {
+	       fftw_real tmp3;
+	       fftw_real tmp5;
+	       fftw_real tmp2;
+	       fftw_real tmp4;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp3 = c_re(inout[iostride]);
+	       tmp5 = c_im(inout[iostride]);
+	       tmp2 = c_re(W[0]);
+	       tmp4 = c_im(W[0]);
+	       tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+	       tmp7 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	  }
+	  c_re(inout[iostride]) = tmp1 - tmp6;
+	  c_re(inout[0]) = tmp1 + tmp6;
+	  c_im(inout[0]) = tmp7 + tmp8;
+	  c_im(inout[iostride]) = tmp8 - tmp7;
+     }
+}
+
+static const int twiddle_order[] = { 1 };
+fftw_codelet_desc fftw_twiddle_2_desc = {
+     "fftw_twiddle_2",
+     (void (*)()) fftw_twiddle_2,
+     2,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     44,
+     1,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_3.c b/src/fftw/ftw_3.c
new file mode 100644
index 0000000..63ea592
--- /dev/null
+++ b/src/fftw/ftw_3.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:31 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 3 */
+
+/*
+ * This function contains 16 FP additions, 12 FP multiplications,
+ * (or, 10 additions, 6 multiplications, 6 fused multiply/add),
+ * 14 stack variables, and 12 memory accesses
+ */
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_3(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 2) {
+	  fftw_real tmp1;
+	  fftw_real tmp18;
+	  fftw_real tmp6;
+	  fftw_real tmp14;
+	  fftw_real tmp11;
+	  fftw_real tmp15;
+	  fftw_real tmp12;
+	  fftw_real tmp17;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp18 = c_im(inout[0]);
+	  {
+	       fftw_real tmp3;
+	       fftw_real tmp5;
+	       fftw_real tmp2;
+	       fftw_real tmp4;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp3 = c_re(inout[iostride]);
+	       tmp5 = c_im(inout[iostride]);
+	       tmp2 = c_re(W[0]);
+	       tmp4 = c_im(W[0]);
+	       tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+	       tmp14 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	  }
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp10;
+	       fftw_real tmp7;
+	       fftw_real tmp9;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(inout[2 * iostride]);
+	       tmp10 = c_im(inout[2 * iostride]);
+	       tmp7 = c_re(W[1]);
+	       tmp9 = c_im(W[1]);
+	       tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10);
+	       tmp15 = (tmp9 * tmp8) + (tmp7 * tmp10);
+	  }
+	  tmp12 = tmp6 + tmp11;
+	  tmp17 = tmp14 + tmp15;
+	  {
+	       fftw_real tmp13;
+	       fftw_real tmp16;
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       ASSERT_ALIGNED_DOUBLE;
+	       c_re(inout[0]) = tmp1 + tmp12;
+	       tmp13 = tmp1 - (K500000000 * tmp12);
+	       tmp16 = K866025403 * (tmp14 - tmp15);
+	       c_re(inout[2 * iostride]) = tmp13 - tmp16;
+	       c_re(inout[iostride]) = tmp13 + tmp16;
+	       c_im(inout[0]) = tmp17 + tmp18;
+	       tmp19 = K866025403 * (tmp11 - tmp6);
+	       tmp20 = tmp18 - (K500000000 * tmp17);
+	       c_im(inout[iostride]) = tmp19 + tmp20;
+	       c_im(inout[2 * iostride]) = tmp20 - tmp19;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2 };
+fftw_codelet_desc fftw_twiddle_3_desc = {
+     "fftw_twiddle_3",
+     (void (*)()) fftw_twiddle_3,
+     3,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     66,
+     2,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_32.c b/src/fftw/ftw_32.c
new file mode 100644
index 0000000..8fa7e39
--- /dev/null
+++ b/src/fftw/ftw_32.c
@@ -0,0 +1,1398 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:45 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 32 */
+
+/*
+ * This function contains 434 FP additions, 208 FP multiplications,
+ * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
+ * 90 stack variables, and 128 memory accesses
+ */
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_32(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 31) {
+	  fftw_real tmp19;
+	  fftw_real tmp351;
+	  fftw_real tmp472;
+	  fftw_real tmp486;
+	  fftw_real tmp442;
+	  fftw_real tmp456;
+	  fftw_real tmp191;
+	  fftw_real tmp303;
+	  fftw_real tmp161;
+	  fftw_real tmp379;
+	  fftw_real tmp276;
+	  fftw_real tmp326;
+	  fftw_real tmp386;
+	  fftw_real tmp422;
+	  fftw_real tmp259;
+	  fftw_real tmp323;
+	  fftw_real tmp42;
+	  fftw_real tmp455;
+	  fftw_real tmp201;
+	  fftw_real tmp305;
+	  fftw_real tmp354;
+	  fftw_real tmp437;
+	  fftw_real tmp196;
+	  fftw_real tmp304;
+	  fftw_real tmp184;
+	  fftw_real tmp387;
+	  fftw_real tmp382;
+	  fftw_real tmp423;
+	  fftw_real tmp270;
+	  fftw_real tmp327;
+	  fftw_real tmp279;
+	  fftw_real tmp324;
+	  fftw_real tmp66;
+	  fftw_real tmp359;
+	  fftw_real tmp213;
+	  fftw_real tmp309;
+	  fftw_real tmp358;
+	  fftw_real tmp412;
+	  fftw_real tmp208;
+	  fftw_real tmp308;
+	  fftw_real tmp114;
+	  fftw_real tmp373;
+	  fftw_real tmp249;
+	  fftw_real tmp316;
+	  fftw_real tmp370;
+	  fftw_real tmp417;
+	  fftw_real tmp232;
+	  fftw_real tmp319;
+	  fftw_real tmp89;
+	  fftw_real tmp361;
+	  fftw_real tmp224;
+	  fftw_real tmp312;
+	  fftw_real tmp364;
+	  fftw_real tmp413;
+	  fftw_real tmp219;
+	  fftw_real tmp311;
+	  fftw_real tmp137;
+	  fftw_real tmp371;
+	  fftw_real tmp376;
+	  fftw_real tmp418;
+	  fftw_real tmp243;
+	  fftw_real tmp317;
+	  fftw_real tmp252;
+	  fftw_real tmp320;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp440;
+	       fftw_real tmp6;
+	       fftw_real tmp439;
+	       fftw_real tmp12;
+	       fftw_real tmp188;
+	       fftw_real tmp17;
+	       fftw_real tmp189;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp440 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[16 * iostride]);
+		    tmp5 = c_im(inout[16 * iostride]);
+		    tmp2 = c_re(W[15]);
+		    tmp4 = c_im(W[15]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp439 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[8 * iostride]);
+		    tmp11 = c_im(inout[8 * iostride]);
+		    tmp8 = c_re(W[7]);
+		    tmp10 = c_im(W[7]);
+		    tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+		    tmp188 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[24 * iostride]);
+		    tmp16 = c_im(inout[24 * iostride]);
+		    tmp13 = c_re(W[23]);
+		    tmp15 = c_im(W[23]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp189 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       {
+		    fftw_real tmp7;
+		    fftw_real tmp18;
+		    fftw_real tmp470;
+		    fftw_real tmp471;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp7 = tmp1 + tmp6;
+		    tmp18 = tmp12 + tmp17;
+		    tmp19 = tmp7 + tmp18;
+		    tmp351 = tmp7 - tmp18;
+		    tmp470 = tmp440 - tmp439;
+		    tmp471 = tmp12 - tmp17;
+		    tmp472 = tmp470 - tmp471;
+		    tmp486 = tmp471 + tmp470;
+	       }
+	       {
+		    fftw_real tmp438;
+		    fftw_real tmp441;
+		    fftw_real tmp187;
+		    fftw_real tmp190;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp438 = tmp188 + tmp189;
+		    tmp441 = tmp439 + tmp440;
+		    tmp442 = tmp438 + tmp441;
+		    tmp456 = tmp441 - tmp438;
+		    tmp187 = tmp1 - tmp6;
+		    tmp190 = tmp188 - tmp189;
+		    tmp191 = tmp187 - tmp190;
+		    tmp303 = tmp187 + tmp190;
+	       }
+	  }
+	  {
+	       fftw_real tmp143;
+	       fftw_real tmp272;
+	       fftw_real tmp159;
+	       fftw_real tmp257;
+	       fftw_real tmp148;
+	       fftw_real tmp273;
+	       fftw_real tmp154;
+	       fftw_real tmp256;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp140;
+		    fftw_real tmp142;
+		    fftw_real tmp139;
+		    fftw_real tmp141;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp140 = c_re(inout[31 * iostride]);
+		    tmp142 = c_im(inout[31 * iostride]);
+		    tmp139 = c_re(W[30]);
+		    tmp141 = c_im(W[30]);
+		    tmp143 = (tmp139 * tmp140) - (tmp141 * tmp142);
+		    tmp272 = (tmp141 * tmp140) + (tmp139 * tmp142);
+	       }
+	       {
+		    fftw_real tmp156;
+		    fftw_real tmp158;
+		    fftw_real tmp155;
+		    fftw_real tmp157;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp156 = c_re(inout[23 * iostride]);
+		    tmp158 = c_im(inout[23 * iostride]);
+		    tmp155 = c_re(W[22]);
+		    tmp157 = c_im(W[22]);
+		    tmp159 = (tmp155 * tmp156) - (tmp157 * tmp158);
+		    tmp257 = (tmp157 * tmp156) + (tmp155 * tmp158);
+	       }
+	       {
+		    fftw_real tmp145;
+		    fftw_real tmp147;
+		    fftw_real tmp144;
+		    fftw_real tmp146;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp145 = c_re(inout[15 * iostride]);
+		    tmp147 = c_im(inout[15 * iostride]);
+		    tmp144 = c_re(W[14]);
+		    tmp146 = c_im(W[14]);
+		    tmp148 = (tmp144 * tmp145) - (tmp146 * tmp147);
+		    tmp273 = (tmp146 * tmp145) + (tmp144 * tmp147);
+	       }
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp153;
+		    fftw_real tmp150;
+		    fftw_real tmp152;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = c_re(inout[7 * iostride]);
+		    tmp153 = c_im(inout[7 * iostride]);
+		    tmp150 = c_re(W[6]);
+		    tmp152 = c_im(W[6]);
+		    tmp154 = (tmp150 * tmp151) - (tmp152 * tmp153);
+		    tmp256 = (tmp152 * tmp151) + (tmp150 * tmp153);
+	       }
+	       {
+		    fftw_real tmp149;
+		    fftw_real tmp160;
+		    fftw_real tmp274;
+		    fftw_real tmp275;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp149 = tmp143 + tmp148;
+		    tmp160 = tmp154 + tmp159;
+		    tmp161 = tmp149 + tmp160;
+		    tmp379 = tmp149 - tmp160;
+		    tmp274 = tmp272 - tmp273;
+		    tmp275 = tmp154 - tmp159;
+		    tmp276 = tmp274 + tmp275;
+		    tmp326 = tmp274 - tmp275;
+	       }
+	       {
+		    fftw_real tmp384;
+		    fftw_real tmp385;
+		    fftw_real tmp255;
+		    fftw_real tmp258;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp384 = tmp272 + tmp273;
+		    tmp385 = tmp256 + tmp257;
+		    tmp386 = tmp384 - tmp385;
+		    tmp422 = tmp384 + tmp385;
+		    tmp255 = tmp143 - tmp148;
+		    tmp258 = tmp256 - tmp257;
+		    tmp259 = tmp255 - tmp258;
+		    tmp323 = tmp255 + tmp258;
+	       }
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp192;
+	       fftw_real tmp40;
+	       fftw_real tmp199;
+	       fftw_real tmp29;
+	       fftw_real tmp193;
+	       fftw_real tmp35;
+	       fftw_real tmp198;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[4 * iostride]);
+		    tmp23 = c_im(inout[4 * iostride]);
+		    tmp20 = c_re(W[3]);
+		    tmp22 = c_im(W[3]);
+		    tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23);
+		    tmp192 = (tmp22 * tmp21) + (tmp20 * tmp23);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[12 * iostride]);
+		    tmp39 = c_im(inout[12 * iostride]);
+		    tmp36 = c_re(W[11]);
+		    tmp38 = c_im(W[11]);
+		    tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39);
+		    tmp199 = (tmp38 * tmp37) + (tmp36 * tmp39);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[20 * iostride]);
+		    tmp28 = c_im(inout[20 * iostride]);
+		    tmp25 = c_re(W[19]);
+		    tmp27 = c_im(W[19]);
+		    tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28);
+		    tmp193 = (tmp27 * tmp26) + (tmp25 * tmp28);
+	       }
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[28 * iostride]);
+		    tmp34 = c_im(inout[28 * iostride]);
+		    tmp31 = c_re(W[27]);
+		    tmp33 = c_im(W[27]);
+		    tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34);
+		    tmp198 = (tmp33 * tmp32) + (tmp31 * tmp34);
+	       }
+	       {
+		    fftw_real tmp30;
+		    fftw_real tmp41;
+		    fftw_real tmp197;
+		    fftw_real tmp200;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = tmp24 + tmp29;
+		    tmp41 = tmp35 + tmp40;
+		    tmp42 = tmp30 + tmp41;
+		    tmp455 = tmp41 - tmp30;
+		    tmp197 = tmp35 - tmp40;
+		    tmp200 = tmp198 - tmp199;
+		    tmp201 = tmp197 + tmp200;
+		    tmp305 = tmp197 - tmp200;
+	       }
+	       {
+		    fftw_real tmp352;
+		    fftw_real tmp353;
+		    fftw_real tmp194;
+		    fftw_real tmp195;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp352 = tmp192 + tmp193;
+		    tmp353 = tmp198 + tmp199;
+		    tmp354 = tmp352 - tmp353;
+		    tmp437 = tmp352 + tmp353;
+		    tmp194 = tmp192 - tmp193;
+		    tmp195 = tmp24 - tmp29;
+		    tmp196 = tmp194 - tmp195;
+		    tmp304 = tmp195 + tmp194;
+	       }
+	  }
+	  {
+	       fftw_real tmp166;
+	       fftw_real tmp260;
+	       fftw_real tmp171;
+	       fftw_real tmp261;
+	       fftw_real tmp262;
+	       fftw_real tmp263;
+	       fftw_real tmp177;
+	       fftw_real tmp266;
+	       fftw_real tmp182;
+	       fftw_real tmp267;
+	       fftw_real tmp265;
+	       fftw_real tmp268;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp163;
+		    fftw_real tmp165;
+		    fftw_real tmp162;
+		    fftw_real tmp164;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp163 = c_re(inout[3 * iostride]);
+		    tmp165 = c_im(inout[3 * iostride]);
+		    tmp162 = c_re(W[2]);
+		    tmp164 = c_im(W[2]);
+		    tmp166 = (tmp162 * tmp163) - (tmp164 * tmp165);
+		    tmp260 = (tmp164 * tmp163) + (tmp162 * tmp165);
+	       }
+	       {
+		    fftw_real tmp168;
+		    fftw_real tmp170;
+		    fftw_real tmp167;
+		    fftw_real tmp169;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp168 = c_re(inout[19 * iostride]);
+		    tmp170 = c_im(inout[19 * iostride]);
+		    tmp167 = c_re(W[18]);
+		    tmp169 = c_im(W[18]);
+		    tmp171 = (tmp167 * tmp168) - (tmp169 * tmp170);
+		    tmp261 = (tmp169 * tmp168) + (tmp167 * tmp170);
+	       }
+	       tmp262 = tmp260 - tmp261;
+	       tmp263 = tmp166 - tmp171;
+	       {
+		    fftw_real tmp174;
+		    fftw_real tmp176;
+		    fftw_real tmp173;
+		    fftw_real tmp175;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp174 = c_re(inout[27 * iostride]);
+		    tmp176 = c_im(inout[27 * iostride]);
+		    tmp173 = c_re(W[26]);
+		    tmp175 = c_im(W[26]);
+		    tmp177 = (tmp173 * tmp174) - (tmp175 * tmp176);
+		    tmp266 = (tmp175 * tmp174) + (tmp173 * tmp176);
+	       }
+	       {
+		    fftw_real tmp179;
+		    fftw_real tmp181;
+		    fftw_real tmp178;
+		    fftw_real tmp180;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp179 = c_re(inout[11 * iostride]);
+		    tmp181 = c_im(inout[11 * iostride]);
+		    tmp178 = c_re(W[10]);
+		    tmp180 = c_im(W[10]);
+		    tmp182 = (tmp178 * tmp179) - (tmp180 * tmp181);
+		    tmp267 = (tmp180 * tmp179) + (tmp178 * tmp181);
+	       }
+	       tmp265 = tmp177 - tmp182;
+	       tmp268 = tmp266 - tmp267;
+	       {
+		    fftw_real tmp172;
+		    fftw_real tmp183;
+		    fftw_real tmp380;
+		    fftw_real tmp381;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp172 = tmp166 + tmp171;
+		    tmp183 = tmp177 + tmp182;
+		    tmp184 = tmp172 + tmp183;
+		    tmp387 = tmp183 - tmp172;
+		    tmp380 = tmp260 + tmp261;
+		    tmp381 = tmp266 + tmp267;
+		    tmp382 = tmp380 - tmp381;
+		    tmp423 = tmp380 + tmp381;
+	       }
+	       {
+		    fftw_real tmp264;
+		    fftw_real tmp269;
+		    fftw_real tmp277;
+		    fftw_real tmp278;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp264 = tmp262 - tmp263;
+		    tmp269 = tmp265 + tmp268;
+		    tmp270 = K707106781 * (tmp264 - tmp269);
+		    tmp327 = K707106781 * (tmp264 + tmp269);
+		    tmp277 = tmp265 - tmp268;
+		    tmp278 = tmp263 + tmp262;
+		    tmp279 = K707106781 * (tmp277 - tmp278);
+		    tmp324 = K707106781 * (tmp278 + tmp277);
+	       }
+	  }
+	  {
+	       fftw_real tmp48;
+	       fftw_real tmp204;
+	       fftw_real tmp64;
+	       fftw_real tmp211;
+	       fftw_real tmp53;
+	       fftw_real tmp205;
+	       fftw_real tmp59;
+	       fftw_real tmp210;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp45;
+		    fftw_real tmp47;
+		    fftw_real tmp44;
+		    fftw_real tmp46;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp45 = c_re(inout[2 * iostride]);
+		    tmp47 = c_im(inout[2 * iostride]);
+		    tmp44 = c_re(W[1]);
+		    tmp46 = c_im(W[1]);
+		    tmp48 = (tmp44 * tmp45) - (tmp46 * tmp47);
+		    tmp204 = (tmp46 * tmp45) + (tmp44 * tmp47);
+	       }
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp63;
+		    fftw_real tmp60;
+		    fftw_real tmp62;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 = c_re(inout[26 * iostride]);
+		    tmp63 = c_im(inout[26 * iostride]);
+		    tmp60 = c_re(W[25]);
+		    tmp62 = c_im(W[25]);
+		    tmp64 = (tmp60 * tmp61) - (tmp62 * tmp63);
+		    tmp211 = (tmp62 * tmp61) + (tmp60 * tmp63);
+	       }
+	       {
+		    fftw_real tmp50;
+		    fftw_real tmp52;
+		    fftw_real tmp49;
+		    fftw_real tmp51;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp50 = c_re(inout[18 * iostride]);
+		    tmp52 = c_im(inout[18 * iostride]);
+		    tmp49 = c_re(W[17]);
+		    tmp51 = c_im(W[17]);
+		    tmp53 = (tmp49 * tmp50) - (tmp51 * tmp52);
+		    tmp205 = (tmp51 * tmp50) + (tmp49 * tmp52);
+	       }
+	       {
+		    fftw_real tmp56;
+		    fftw_real tmp58;
+		    fftw_real tmp55;
+		    fftw_real tmp57;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp56 = c_re(inout[10 * iostride]);
+		    tmp58 = c_im(inout[10 * iostride]);
+		    tmp55 = c_re(W[9]);
+		    tmp57 = c_im(W[9]);
+		    tmp59 = (tmp55 * tmp56) - (tmp57 * tmp58);
+		    tmp210 = (tmp57 * tmp56) + (tmp55 * tmp58);
+	       }
+	       {
+		    fftw_real tmp54;
+		    fftw_real tmp65;
+		    fftw_real tmp209;
+		    fftw_real tmp212;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp54 = tmp48 + tmp53;
+		    tmp65 = tmp59 + tmp64;
+		    tmp66 = tmp54 + tmp65;
+		    tmp359 = tmp54 - tmp65;
+		    tmp209 = tmp48 - tmp53;
+		    tmp212 = tmp210 - tmp211;
+		    tmp213 = tmp209 - tmp212;
+		    tmp309 = tmp209 + tmp212;
+	       }
+	       {
+		    fftw_real tmp356;
+		    fftw_real tmp357;
+		    fftw_real tmp206;
+		    fftw_real tmp207;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp356 = tmp204 + tmp205;
+		    tmp357 = tmp210 + tmp211;
+		    tmp358 = tmp356 - tmp357;
+		    tmp412 = tmp356 + tmp357;
+		    tmp206 = tmp204 - tmp205;
+		    tmp207 = tmp59 - tmp64;
+		    tmp208 = tmp206 + tmp207;
+		    tmp308 = tmp206 - tmp207;
+	       }
+	  }
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp228;
+	       fftw_real tmp112;
+	       fftw_real tmp247;
+	       fftw_real tmp101;
+	       fftw_real tmp229;
+	       fftw_real tmp107;
+	       fftw_real tmp246;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp93;
+		    fftw_real tmp95;
+		    fftw_real tmp92;
+		    fftw_real tmp94;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp93 = c_re(inout[iostride]);
+		    tmp95 = c_im(inout[iostride]);
+		    tmp92 = c_re(W[0]);
+		    tmp94 = c_im(W[0]);
+		    tmp96 = (tmp92 * tmp93) - (tmp94 * tmp95);
+		    tmp228 = (tmp94 * tmp93) + (tmp92 * tmp95);
+	       }
+	       {
+		    fftw_real tmp109;
+		    fftw_real tmp111;
+		    fftw_real tmp108;
+		    fftw_real tmp110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp109 = c_re(inout[25 * iostride]);
+		    tmp111 = c_im(inout[25 * iostride]);
+		    tmp108 = c_re(W[24]);
+		    tmp110 = c_im(W[24]);
+		    tmp112 = (tmp108 * tmp109) - (tmp110 * tmp111);
+		    tmp247 = (tmp110 * tmp109) + (tmp108 * tmp111);
+	       }
+	       {
+		    fftw_real tmp98;
+		    fftw_real tmp100;
+		    fftw_real tmp97;
+		    fftw_real tmp99;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp98 = c_re(inout[17 * iostride]);
+		    tmp100 = c_im(inout[17 * iostride]);
+		    tmp97 = c_re(W[16]);
+		    tmp99 = c_im(W[16]);
+		    tmp101 = (tmp97 * tmp98) - (tmp99 * tmp100);
+		    tmp229 = (tmp99 * tmp98) + (tmp97 * tmp100);
+	       }
+	       {
+		    fftw_real tmp104;
+		    fftw_real tmp106;
+		    fftw_real tmp103;
+		    fftw_real tmp105;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp104 = c_re(inout[9 * iostride]);
+		    tmp106 = c_im(inout[9 * iostride]);
+		    tmp103 = c_re(W[8]);
+		    tmp105 = c_im(W[8]);
+		    tmp107 = (tmp103 * tmp104) - (tmp105 * tmp106);
+		    tmp246 = (tmp105 * tmp104) + (tmp103 * tmp106);
+	       }
+	       {
+		    fftw_real tmp102;
+		    fftw_real tmp113;
+		    fftw_real tmp245;
+		    fftw_real tmp248;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp102 = tmp96 + tmp101;
+		    tmp113 = tmp107 + tmp112;
+		    tmp114 = tmp102 + tmp113;
+		    tmp373 = tmp102 - tmp113;
+		    tmp245 = tmp96 - tmp101;
+		    tmp248 = tmp246 - tmp247;
+		    tmp249 = tmp245 - tmp248;
+		    tmp316 = tmp245 + tmp248;
+	       }
+	       {
+		    fftw_real tmp368;
+		    fftw_real tmp369;
+		    fftw_real tmp230;
+		    fftw_real tmp231;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp368 = tmp228 + tmp229;
+		    tmp369 = tmp246 + tmp247;
+		    tmp370 = tmp368 - tmp369;
+		    tmp417 = tmp368 + tmp369;
+		    tmp230 = tmp228 - tmp229;
+		    tmp231 = tmp107 - tmp112;
+		    tmp232 = tmp230 + tmp231;
+		    tmp319 = tmp230 - tmp231;
+	       }
+	  }
+	  {
+	       fftw_real tmp71;
+	       fftw_real tmp215;
+	       fftw_real tmp87;
+	       fftw_real tmp222;
+	       fftw_real tmp76;
+	       fftw_real tmp216;
+	       fftw_real tmp82;
+	       fftw_real tmp221;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp68;
+		    fftw_real tmp70;
+		    fftw_real tmp67;
+		    fftw_real tmp69;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp68 = c_re(inout[30 * iostride]);
+		    tmp70 = c_im(inout[30 * iostride]);
+		    tmp67 = c_re(W[29]);
+		    tmp69 = c_im(W[29]);
+		    tmp71 = (tmp67 * tmp68) - (tmp69 * tmp70);
+		    tmp215 = (tmp69 * tmp68) + (tmp67 * tmp70);
+	       }
+	       {
+		    fftw_real tmp84;
+		    fftw_real tmp86;
+		    fftw_real tmp83;
+		    fftw_real tmp85;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp84 = c_re(inout[22 * iostride]);
+		    tmp86 = c_im(inout[22 * iostride]);
+		    tmp83 = c_re(W[21]);
+		    tmp85 = c_im(W[21]);
+		    tmp87 = (tmp83 * tmp84) - (tmp85 * tmp86);
+		    tmp222 = (tmp85 * tmp84) + (tmp83 * tmp86);
+	       }
+	       {
+		    fftw_real tmp73;
+		    fftw_real tmp75;
+		    fftw_real tmp72;
+		    fftw_real tmp74;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp73 = c_re(inout[14 * iostride]);
+		    tmp75 = c_im(inout[14 * iostride]);
+		    tmp72 = c_re(W[13]);
+		    tmp74 = c_im(W[13]);
+		    tmp76 = (tmp72 * tmp73) - (tmp74 * tmp75);
+		    tmp216 = (tmp74 * tmp73) + (tmp72 * tmp75);
+	       }
+	       {
+		    fftw_real tmp79;
+		    fftw_real tmp81;
+		    fftw_real tmp78;
+		    fftw_real tmp80;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp79 = c_re(inout[6 * iostride]);
+		    tmp81 = c_im(inout[6 * iostride]);
+		    tmp78 = c_re(W[5]);
+		    tmp80 = c_im(W[5]);
+		    tmp82 = (tmp78 * tmp79) - (tmp80 * tmp81);
+		    tmp221 = (tmp80 * tmp79) + (tmp78 * tmp81);
+	       }
+	       {
+		    fftw_real tmp77;
+		    fftw_real tmp88;
+		    fftw_real tmp220;
+		    fftw_real tmp223;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp77 = tmp71 + tmp76;
+		    tmp88 = tmp82 + tmp87;
+		    tmp89 = tmp77 + tmp88;
+		    tmp361 = tmp77 - tmp88;
+		    tmp220 = tmp71 - tmp76;
+		    tmp223 = tmp221 - tmp222;
+		    tmp224 = tmp220 - tmp223;
+		    tmp312 = tmp220 + tmp223;
+	       }
+	       {
+		    fftw_real tmp362;
+		    fftw_real tmp363;
+		    fftw_real tmp217;
+		    fftw_real tmp218;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp362 = tmp215 + tmp216;
+		    tmp363 = tmp221 + tmp222;
+		    tmp364 = tmp362 - tmp363;
+		    tmp413 = tmp362 + tmp363;
+		    tmp217 = tmp215 - tmp216;
+		    tmp218 = tmp82 - tmp87;
+		    tmp219 = tmp217 + tmp218;
+		    tmp311 = tmp217 - tmp218;
+	       }
+	  }
+	  {
+	       fftw_real tmp119;
+	       fftw_real tmp239;
+	       fftw_real tmp124;
+	       fftw_real tmp240;
+	       fftw_real tmp238;
+	       fftw_real tmp241;
+	       fftw_real tmp130;
+	       fftw_real tmp234;
+	       fftw_real tmp135;
+	       fftw_real tmp235;
+	       fftw_real tmp233;
+	       fftw_real tmp236;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp116;
+		    fftw_real tmp118;
+		    fftw_real tmp115;
+		    fftw_real tmp117;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp116 = c_re(inout[5 * iostride]);
+		    tmp118 = c_im(inout[5 * iostride]);
+		    tmp115 = c_re(W[4]);
+		    tmp117 = c_im(W[4]);
+		    tmp119 = (tmp115 * tmp116) - (tmp117 * tmp118);
+		    tmp239 = (tmp117 * tmp116) + (tmp115 * tmp118);
+	       }
+	       {
+		    fftw_real tmp121;
+		    fftw_real tmp123;
+		    fftw_real tmp120;
+		    fftw_real tmp122;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp121 = c_re(inout[21 * iostride]);
+		    tmp123 = c_im(inout[21 * iostride]);
+		    tmp120 = c_re(W[20]);
+		    tmp122 = c_im(W[20]);
+		    tmp124 = (tmp120 * tmp121) - (tmp122 * tmp123);
+		    tmp240 = (tmp122 * tmp121) + (tmp120 * tmp123);
+	       }
+	       tmp238 = tmp119 - tmp124;
+	       tmp241 = tmp239 - tmp240;
+	       {
+		    fftw_real tmp127;
+		    fftw_real tmp129;
+		    fftw_real tmp126;
+		    fftw_real tmp128;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp127 = c_re(inout[29 * iostride]);
+		    tmp129 = c_im(inout[29 * iostride]);
+		    tmp126 = c_re(W[28]);
+		    tmp128 = c_im(W[28]);
+		    tmp130 = (tmp126 * tmp127) - (tmp128 * tmp129);
+		    tmp234 = (tmp128 * tmp127) + (tmp126 * tmp129);
+	       }
+	       {
+		    fftw_real tmp132;
+		    fftw_real tmp134;
+		    fftw_real tmp131;
+		    fftw_real tmp133;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp132 = c_re(inout[13 * iostride]);
+		    tmp134 = c_im(inout[13 * iostride]);
+		    tmp131 = c_re(W[12]);
+		    tmp133 = c_im(W[12]);
+		    tmp135 = (tmp131 * tmp132) - (tmp133 * tmp134);
+		    tmp235 = (tmp133 * tmp132) + (tmp131 * tmp134);
+	       }
+	       tmp233 = tmp130 - tmp135;
+	       tmp236 = tmp234 - tmp235;
+	       {
+		    fftw_real tmp125;
+		    fftw_real tmp136;
+		    fftw_real tmp374;
+		    fftw_real tmp375;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp125 = tmp119 + tmp124;
+		    tmp136 = tmp130 + tmp135;
+		    tmp137 = tmp125 + tmp136;
+		    tmp371 = tmp136 - tmp125;
+		    tmp374 = tmp239 + tmp240;
+		    tmp375 = tmp234 + tmp235;
+		    tmp376 = tmp374 - tmp375;
+		    tmp418 = tmp374 + tmp375;
+	       }
+	       {
+		    fftw_real tmp237;
+		    fftw_real tmp242;
+		    fftw_real tmp250;
+		    fftw_real tmp251;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp237 = tmp233 - tmp236;
+		    tmp242 = tmp238 + tmp241;
+		    tmp243 = K707106781 * (tmp237 - tmp242);
+		    tmp317 = K707106781 * (tmp242 + tmp237);
+		    tmp250 = tmp241 - tmp238;
+		    tmp251 = tmp233 + tmp236;
+		    tmp252 = K707106781 * (tmp250 - tmp251);
+		    tmp320 = K707106781 * (tmp250 + tmp251);
+	       }
+	  }
+	  {
+	       fftw_real tmp91;
+	       fftw_real tmp431;
+	       fftw_real tmp444;
+	       fftw_real tmp446;
+	       fftw_real tmp186;
+	       fftw_real tmp445;
+	       fftw_real tmp434;
+	       fftw_real tmp435;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp43;
+		    fftw_real tmp90;
+		    fftw_real tmp436;
+		    fftw_real tmp443;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp43 = tmp19 + tmp42;
+		    tmp90 = tmp66 + tmp89;
+		    tmp91 = tmp43 + tmp90;
+		    tmp431 = tmp43 - tmp90;
+		    tmp436 = tmp412 + tmp413;
+		    tmp443 = tmp437 + tmp442;
+		    tmp444 = tmp436 + tmp443;
+		    tmp446 = tmp443 - tmp436;
+	       }
+	       {
+		    fftw_real tmp138;
+		    fftw_real tmp185;
+		    fftw_real tmp432;
+		    fftw_real tmp433;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp138 = tmp114 + tmp137;
+		    tmp185 = tmp161 + tmp184;
+		    tmp186 = tmp138 + tmp185;
+		    tmp445 = tmp185 - tmp138;
+		    tmp432 = tmp417 + tmp418;
+		    tmp433 = tmp422 + tmp423;
+		    tmp434 = tmp432 - tmp433;
+		    tmp435 = tmp432 + tmp433;
+	       }
+	       c_re(inout[16 * iostride]) = tmp91 - tmp186;
+	       c_re(inout[0]) = tmp91 + tmp186;
+	       c_re(inout[24 * iostride]) = tmp431 - tmp434;
+	       c_re(inout[8 * iostride]) = tmp431 + tmp434;
+	       c_im(inout[0]) = tmp435 + tmp444;
+	       c_im(inout[16 * iostride]) = tmp444 - tmp435;
+	       c_im(inout[8 * iostride]) = tmp445 + tmp446;
+	       c_im(inout[24 * iostride]) = tmp446 - tmp445;
+	  }
+	  {
+	       fftw_real tmp415;
+	       fftw_real tmp427;
+	       fftw_real tmp450;
+	       fftw_real tmp452;
+	       fftw_real tmp420;
+	       fftw_real tmp428;
+	       fftw_real tmp425;
+	       fftw_real tmp429;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp411;
+		    fftw_real tmp414;
+		    fftw_real tmp448;
+		    fftw_real tmp449;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp411 = tmp19 - tmp42;
+		    tmp414 = tmp412 - tmp413;
+		    tmp415 = tmp411 + tmp414;
+		    tmp427 = tmp411 - tmp414;
+		    tmp448 = tmp89 - tmp66;
+		    tmp449 = tmp442 - tmp437;
+		    tmp450 = tmp448 + tmp449;
+		    tmp452 = tmp449 - tmp448;
+	       }
+	       {
+		    fftw_real tmp416;
+		    fftw_real tmp419;
+		    fftw_real tmp421;
+		    fftw_real tmp424;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp416 = tmp114 - tmp137;
+		    tmp419 = tmp417 - tmp418;
+		    tmp420 = tmp416 + tmp419;
+		    tmp428 = tmp419 - tmp416;
+		    tmp421 = tmp161 - tmp184;
+		    tmp424 = tmp422 - tmp423;
+		    tmp425 = tmp421 - tmp424;
+		    tmp429 = tmp421 + tmp424;
+	       }
+	       {
+		    fftw_real tmp426;
+		    fftw_real tmp451;
+		    fftw_real tmp430;
+		    fftw_real tmp447;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp426 = K707106781 * (tmp420 + tmp425);
+		    c_re(inout[20 * iostride]) = tmp415 - tmp426;
+		    c_re(inout[4 * iostride]) = tmp415 + tmp426;
+		    tmp451 = K707106781 * (tmp425 - tmp420);
+		    c_im(inout[12 * iostride]) = tmp451 + tmp452;
+		    c_im(inout[28 * iostride]) = tmp452 - tmp451;
+		    tmp430 = K707106781 * (tmp428 - tmp429);
+		    c_re(inout[28 * iostride]) = tmp427 - tmp430;
+		    c_re(inout[12 * iostride]) = tmp427 + tmp430;
+		    tmp447 = K707106781 * (tmp428 + tmp429);
+		    c_im(inout[4 * iostride]) = tmp447 + tmp450;
+		    c_im(inout[20 * iostride]) = tmp450 - tmp447;
+	       }
+	  }
+	  {
+	       fftw_real tmp355;
+	       fftw_real tmp395;
+	       fftw_real tmp366;
+	       fftw_real tmp454;
+	       fftw_real tmp398;
+	       fftw_real tmp462;
+	       fftw_real tmp378;
+	       fftw_real tmp392;
+	       fftw_real tmp457;
+	       fftw_real tmp463;
+	       fftw_real tmp402;
+	       fftw_real tmp408;
+	       fftw_real tmp389;
+	       fftw_real tmp393;
+	       fftw_real tmp405;
+	       fftw_real tmp409;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp360;
+		    fftw_real tmp365;
+		    fftw_real tmp400;
+		    fftw_real tmp401;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp355 = tmp351 - tmp354;
+		    tmp395 = tmp351 + tmp354;
+		    tmp360 = tmp358 - tmp359;
+		    tmp365 = tmp361 + tmp364;
+		    tmp366 = K707106781 * (tmp360 - tmp365);
+		    tmp454 = K707106781 * (tmp360 + tmp365);
+		    {
+			 fftw_real tmp396;
+			 fftw_real tmp397;
+			 fftw_real tmp372;
+			 fftw_real tmp377;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp396 = tmp359 + tmp358;
+			 tmp397 = tmp361 - tmp364;
+			 tmp398 = K707106781 * (tmp396 + tmp397);
+			 tmp462 = K707106781 * (tmp397 - tmp396);
+			 tmp372 = tmp370 - tmp371;
+			 tmp377 = tmp373 - tmp376;
+			 tmp378 =
+			     (K923879532 * tmp372) + (K382683432 * tmp377);
+			 tmp392 =
+			     (K382683432 * tmp372) - (K923879532 * tmp377);
+		    }
+		    tmp457 = tmp455 + tmp456;
+		    tmp463 = tmp456 - tmp455;
+		    tmp400 = tmp370 + tmp371;
+		    tmp401 = tmp373 + tmp376;
+		    tmp402 = (K382683432 * tmp400) + (K923879532 * tmp401);
+		    tmp408 = (K923879532 * tmp400) - (K382683432 * tmp401);
+		    {
+			 fftw_real tmp383;
+			 fftw_real tmp388;
+			 fftw_real tmp403;
+			 fftw_real tmp404;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp383 = tmp379 - tmp382;
+			 tmp388 = tmp386 - tmp387;
+			 tmp389 =
+			     (K382683432 * tmp383) - (K923879532 * tmp388);
+			 tmp393 =
+			     (K382683432 * tmp388) + (K923879532 * tmp383);
+			 tmp403 = tmp379 + tmp382;
+			 tmp404 = tmp386 + tmp387;
+			 tmp405 =
+			     (K923879532 * tmp403) - (K382683432 * tmp404);
+			 tmp409 =
+			     (K923879532 * tmp404) + (K382683432 * tmp403);
+		    }
+	       }
+	       {
+		    fftw_real tmp367;
+		    fftw_real tmp390;
+		    fftw_real tmp391;
+		    fftw_real tmp394;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp367 = tmp355 + tmp366;
+		    tmp390 = tmp378 + tmp389;
+		    c_re(inout[22 * iostride]) = tmp367 - tmp390;
+		    c_re(inout[6 * iostride]) = tmp367 + tmp390;
+		    tmp391 = tmp355 - tmp366;
+		    tmp394 = tmp392 - tmp393;
+		    c_re(inout[30 * iostride]) = tmp391 - tmp394;
+		    c_re(inout[14 * iostride]) = tmp391 + tmp394;
+	       }
+	       {
+		    fftw_real tmp461;
+		    fftw_real tmp464;
+		    fftw_real tmp465;
+		    fftw_real tmp466;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp461 = tmp392 + tmp393;
+		    tmp464 = tmp462 + tmp463;
+		    c_im(inout[6 * iostride]) = tmp461 + tmp464;
+		    c_im(inout[22 * iostride]) = tmp464 - tmp461;
+		    tmp465 = tmp389 - tmp378;
+		    tmp466 = tmp463 - tmp462;
+		    c_im(inout[14 * iostride]) = tmp465 + tmp466;
+		    c_im(inout[30 * iostride]) = tmp466 - tmp465;
+	       }
+	       {
+		    fftw_real tmp399;
+		    fftw_real tmp406;
+		    fftw_real tmp407;
+		    fftw_real tmp410;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp399 = tmp395 + tmp398;
+		    tmp406 = tmp402 + tmp405;
+		    c_re(inout[18 * iostride]) = tmp399 - tmp406;
+		    c_re(inout[2 * iostride]) = tmp399 + tmp406;
+		    tmp407 = tmp395 - tmp398;
+		    tmp410 = tmp408 - tmp409;
+		    c_re(inout[26 * iostride]) = tmp407 - tmp410;
+		    c_re(inout[10 * iostride]) = tmp407 + tmp410;
+	       }
+	       {
+		    fftw_real tmp453;
+		    fftw_real tmp458;
+		    fftw_real tmp459;
+		    fftw_real tmp460;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp453 = tmp408 + tmp409;
+		    tmp458 = tmp454 + tmp457;
+		    c_im(inout[2 * iostride]) = tmp453 + tmp458;
+		    c_im(inout[18 * iostride]) = tmp458 - tmp453;
+		    tmp459 = tmp405 - tmp402;
+		    tmp460 = tmp457 - tmp454;
+		    c_im(inout[10 * iostride]) = tmp459 + tmp460;
+		    c_im(inout[26 * iostride]) = tmp460 - tmp459;
+	       }
+	  }
+	  {
+	       fftw_real tmp307;
+	       fftw_real tmp335;
+	       fftw_real tmp338;
+	       fftw_real tmp478;
+	       fftw_real tmp473;
+	       fftw_real tmp479;
+	       fftw_real tmp314;
+	       fftw_real tmp468;
+	       fftw_real tmp322;
+	       fftw_real tmp332;
+	       fftw_real tmp342;
+	       fftw_real tmp348;
+	       fftw_real tmp329;
+	       fftw_real tmp333;
+	       fftw_real tmp345;
+	       fftw_real tmp349;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp306;
+		    fftw_real tmp336;
+		    fftw_real tmp337;
+		    fftw_real tmp469;
+		    fftw_real tmp310;
+		    fftw_real tmp313;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp306 = K707106781 * (tmp304 + tmp305);
+		    tmp307 = tmp303 - tmp306;
+		    tmp335 = tmp303 + tmp306;
+		    tmp336 = (K382683432 * tmp308) + (K923879532 * tmp309);
+		    tmp337 = (K923879532 * tmp312) - (K382683432 * tmp311);
+		    tmp338 = tmp336 + tmp337;
+		    tmp478 = tmp337 - tmp336;
+		    tmp469 = K707106781 * (tmp196 + tmp201);
+		    tmp473 = tmp469 + tmp472;
+		    tmp479 = tmp472 - tmp469;
+		    tmp310 = (K923879532 * tmp308) - (K382683432 * tmp309);
+		    tmp313 = (K923879532 * tmp311) + (K382683432 * tmp312);
+		    tmp314 = tmp310 - tmp313;
+		    tmp468 = tmp310 + tmp313;
+	       }
+	       {
+		    fftw_real tmp318;
+		    fftw_real tmp321;
+		    fftw_real tmp340;
+		    fftw_real tmp341;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp318 = tmp316 - tmp317;
+		    tmp321 = tmp319 - tmp320;
+		    tmp322 = (K555570233 * tmp318) + (K831469612 * tmp321);
+		    tmp332 = (K555570233 * tmp321) - (K831469612 * tmp318);
+		    tmp340 = tmp316 + tmp317;
+		    tmp341 = tmp319 + tmp320;
+		    tmp342 = (K980785280 * tmp340) + (K195090322 * tmp341);
+		    tmp348 = (K980785280 * tmp341) - (K195090322 * tmp340);
+	       }
+	       {
+		    fftw_real tmp325;
+		    fftw_real tmp328;
+		    fftw_real tmp343;
+		    fftw_real tmp344;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp325 = tmp323 - tmp324;
+		    tmp328 = tmp326 - tmp327;
+		    tmp329 = (K555570233 * tmp325) - (K831469612 * tmp328);
+		    tmp333 = (K831469612 * tmp325) + (K555570233 * tmp328);
+		    tmp343 = tmp323 + tmp324;
+		    tmp344 = tmp326 + tmp327;
+		    tmp345 = (K980785280 * tmp343) - (K195090322 * tmp344);
+		    tmp349 = (K195090322 * tmp343) + (K980785280 * tmp344);
+	       }
+	       {
+		    fftw_real tmp315;
+		    fftw_real tmp330;
+		    fftw_real tmp331;
+		    fftw_real tmp334;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp315 = tmp307 + tmp314;
+		    tmp330 = tmp322 + tmp329;
+		    c_re(inout[21 * iostride]) = tmp315 - tmp330;
+		    c_re(inout[5 * iostride]) = tmp315 + tmp330;
+		    tmp331 = tmp307 - tmp314;
+		    tmp334 = tmp332 - tmp333;
+		    c_re(inout[29 * iostride]) = tmp331 - tmp334;
+		    c_re(inout[13 * iostride]) = tmp331 + tmp334;
+	       }
+	       {
+		    fftw_real tmp477;
+		    fftw_real tmp480;
+		    fftw_real tmp481;
+		    fftw_real tmp482;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp477 = tmp332 + tmp333;
+		    tmp480 = tmp478 + tmp479;
+		    c_im(inout[5 * iostride]) = tmp477 + tmp480;
+		    c_im(inout[21 * iostride]) = tmp480 - tmp477;
+		    tmp481 = tmp329 - tmp322;
+		    tmp482 = tmp479 - tmp478;
+		    c_im(inout[13 * iostride]) = tmp481 + tmp482;
+		    c_im(inout[29 * iostride]) = tmp482 - tmp481;
+	       }
+	       {
+		    fftw_real tmp339;
+		    fftw_real tmp346;
+		    fftw_real tmp347;
+		    fftw_real tmp350;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp339 = tmp335 + tmp338;
+		    tmp346 = tmp342 + tmp345;
+		    c_re(inout[17 * iostride]) = tmp339 - tmp346;
+		    c_re(inout[iostride]) = tmp339 + tmp346;
+		    tmp347 = tmp335 - tmp338;
+		    tmp350 = tmp348 - tmp349;
+		    c_re(inout[25 * iostride]) = tmp347 - tmp350;
+		    c_re(inout[9 * iostride]) = tmp347 + tmp350;
+	       }
+	       {
+		    fftw_real tmp467;
+		    fftw_real tmp474;
+		    fftw_real tmp475;
+		    fftw_real tmp476;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp467 = tmp348 + tmp349;
+		    tmp474 = tmp468 + tmp473;
+		    c_im(inout[iostride]) = tmp467 + tmp474;
+		    c_im(inout[17 * iostride]) = tmp474 - tmp467;
+		    tmp475 = tmp345 - tmp342;
+		    tmp476 = tmp473 - tmp468;
+		    c_im(inout[9 * iostride]) = tmp475 + tmp476;
+		    c_im(inout[25 * iostride]) = tmp476 - tmp475;
+	       }
+	  }
+	  {
+	       fftw_real tmp203;
+	       fftw_real tmp287;
+	       fftw_real tmp290;
+	       fftw_real tmp492;
+	       fftw_real tmp487;
+	       fftw_real tmp493;
+	       fftw_real tmp226;
+	       fftw_real tmp484;
+	       fftw_real tmp254;
+	       fftw_real tmp284;
+	       fftw_real tmp294;
+	       fftw_real tmp300;
+	       fftw_real tmp281;
+	       fftw_real tmp285;
+	       fftw_real tmp297;
+	       fftw_real tmp301;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp202;
+		    fftw_real tmp288;
+		    fftw_real tmp289;
+		    fftw_real tmp485;
+		    fftw_real tmp214;
+		    fftw_real tmp225;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp202 = K707106781 * (tmp196 - tmp201);
+		    tmp203 = tmp191 - tmp202;
+		    tmp287 = tmp191 + tmp202;
+		    tmp288 = (K923879532 * tmp208) + (K382683432 * tmp213);
+		    tmp289 = (K382683432 * tmp224) - (K923879532 * tmp219);
+		    tmp290 = tmp288 + tmp289;
+		    tmp492 = tmp289 - tmp288;
+		    tmp485 = K707106781 * (tmp305 - tmp304);
+		    tmp487 = tmp485 + tmp486;
+		    tmp493 = tmp486 - tmp485;
+		    tmp214 = (K382683432 * tmp208) - (K923879532 * tmp213);
+		    tmp225 = (K382683432 * tmp219) + (K923879532 * tmp224);
+		    tmp226 = tmp214 - tmp225;
+		    tmp484 = tmp214 + tmp225;
+	       }
+	       {
+		    fftw_real tmp244;
+		    fftw_real tmp253;
+		    fftw_real tmp292;
+		    fftw_real tmp293;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp244 = tmp232 - tmp243;
+		    tmp253 = tmp249 - tmp252;
+		    tmp254 = (K980785280 * tmp244) + (K195090322 * tmp253);
+		    tmp284 = (K195090322 * tmp244) - (K980785280 * tmp253);
+		    tmp292 = tmp232 + tmp243;
+		    tmp293 = tmp249 + tmp252;
+		    tmp294 = (K555570233 * tmp292) + (K831469612 * tmp293);
+		    tmp300 = (K831469612 * tmp292) - (K555570233 * tmp293);
+	       }
+	       {
+		    fftw_real tmp271;
+		    fftw_real tmp280;
+		    fftw_real tmp295;
+		    fftw_real tmp296;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp271 = tmp259 - tmp270;
+		    tmp280 = tmp276 - tmp279;
+		    tmp281 = (K195090322 * tmp271) - (K980785280 * tmp280);
+		    tmp285 = (K195090322 * tmp280) + (K980785280 * tmp271);
+		    tmp295 = tmp259 + tmp270;
+		    tmp296 = tmp276 + tmp279;
+		    tmp297 = (K831469612 * tmp295) - (K555570233 * tmp296);
+		    tmp301 = (K831469612 * tmp296) + (K555570233 * tmp295);
+	       }
+	       {
+		    fftw_real tmp227;
+		    fftw_real tmp282;
+		    fftw_real tmp283;
+		    fftw_real tmp286;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp227 = tmp203 + tmp226;
+		    tmp282 = tmp254 + tmp281;
+		    c_re(inout[23 * iostride]) = tmp227 - tmp282;
+		    c_re(inout[7 * iostride]) = tmp227 + tmp282;
+		    tmp283 = tmp203 - tmp226;
+		    tmp286 = tmp284 - tmp285;
+		    c_re(inout[31 * iostride]) = tmp283 - tmp286;
+		    c_re(inout[15 * iostride]) = tmp283 + tmp286;
+	       }
+	       {
+		    fftw_real tmp491;
+		    fftw_real tmp494;
+		    fftw_real tmp495;
+		    fftw_real tmp496;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp491 = tmp284 + tmp285;
+		    tmp494 = tmp492 + tmp493;
+		    c_im(inout[7 * iostride]) = tmp491 + tmp494;
+		    c_im(inout[23 * iostride]) = tmp494 - tmp491;
+		    tmp495 = tmp281 - tmp254;
+		    tmp496 = tmp493 - tmp492;
+		    c_im(inout[15 * iostride]) = tmp495 + tmp496;
+		    c_im(inout[31 * iostride]) = tmp496 - tmp495;
+	       }
+	       {
+		    fftw_real tmp291;
+		    fftw_real tmp298;
+		    fftw_real tmp299;
+		    fftw_real tmp302;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp291 = tmp287 + tmp290;
+		    tmp298 = tmp294 + tmp297;
+		    c_re(inout[19 * iostride]) = tmp291 - tmp298;
+		    c_re(inout[3 * iostride]) = tmp291 + tmp298;
+		    tmp299 = tmp287 - tmp290;
+		    tmp302 = tmp300 - tmp301;
+		    c_re(inout[27 * iostride]) = tmp299 - tmp302;
+		    c_re(inout[11 * iostride]) = tmp299 + tmp302;
+	       }
+	       {
+		    fftw_real tmp483;
+		    fftw_real tmp488;
+		    fftw_real tmp489;
+		    fftw_real tmp490;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp483 = tmp300 + tmp301;
+		    tmp488 = tmp484 + tmp487;
+		    c_im(inout[3 * iostride]) = tmp483 + tmp488;
+		    c_im(inout[19 * iostride]) = tmp488 - tmp483;
+		    tmp489 = tmp297 - tmp294;
+		    tmp490 = tmp487 - tmp484;
+		    c_im(inout[11 * iostride]) = tmp489 + tmp490;
+		    c_im(inout[27 * iostride]) = tmp490 - tmp489;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] =
+    { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+fftw_codelet_desc fftw_twiddle_32_desc = {
+     "fftw_twiddle_32",
+     (void (*)()) fftw_twiddle_32,
+     32,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     704,
+     31,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_4.c b/src/fftw/ftw_4.c
new file mode 100644
index 0000000..c04b449
--- /dev/null
+++ b/src/fftw/ftw_4.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:31 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 4 */
+
+/*
+ * This function contains 22 FP additions, 12 FP multiplications,
+ * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
+ * 14 stack variables, and 16 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_4(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 3) {
+	  fftw_real tmp1;
+	  fftw_real tmp25;
+	  fftw_real tmp6;
+	  fftw_real tmp24;
+	  fftw_real tmp12;
+	  fftw_real tmp20;
+	  fftw_real tmp17;
+	  fftw_real tmp21;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp25 = c_im(inout[0]);
+	  {
+	       fftw_real tmp3;
+	       fftw_real tmp5;
+	       fftw_real tmp2;
+	       fftw_real tmp4;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp3 = c_re(inout[2 * iostride]);
+	       tmp5 = c_im(inout[2 * iostride]);
+	       tmp2 = c_re(W[1]);
+	       tmp4 = c_im(W[1]);
+	       tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+	       tmp24 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	  }
+	  {
+	       fftw_real tmp9;
+	       fftw_real tmp11;
+	       fftw_real tmp8;
+	       fftw_real tmp10;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp9 = c_re(inout[iostride]);
+	       tmp11 = c_im(inout[iostride]);
+	       tmp8 = c_re(W[0]);
+	       tmp10 = c_im(W[0]);
+	       tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+	       tmp20 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	  }
+	  {
+	       fftw_real tmp14;
+	       fftw_real tmp16;
+	       fftw_real tmp13;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp14 = c_re(inout[3 * iostride]);
+	       tmp16 = c_im(inout[3 * iostride]);
+	       tmp13 = c_re(W[2]);
+	       tmp15 = c_im(W[2]);
+	       tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+	       tmp21 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp18;
+	       fftw_real tmp27;
+	       fftw_real tmp28;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = tmp1 + tmp6;
+	       tmp18 = tmp12 + tmp17;
+	       c_re(inout[2 * iostride]) = tmp7 - tmp18;
+	       c_re(inout[0]) = tmp7 + tmp18;
+	       tmp27 = tmp25 - tmp24;
+	       tmp28 = tmp12 - tmp17;
+	       c_im(inout[iostride]) = tmp27 - tmp28;
+	       c_im(inout[3 * iostride]) = tmp28 + tmp27;
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp26;
+	       fftw_real tmp19;
+	       fftw_real tmp22;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = tmp20 + tmp21;
+	       tmp26 = tmp24 + tmp25;
+	       c_im(inout[0]) = tmp23 + tmp26;
+	       c_im(inout[2 * iostride]) = tmp26 - tmp23;
+	       tmp19 = tmp1 - tmp6;
+	       tmp22 = tmp20 - tmp21;
+	       c_re(inout[3 * iostride]) = tmp19 - tmp22;
+	       c_re(inout[iostride]) = tmp19 + tmp22;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3 };
+fftw_codelet_desc fftw_twiddle_4_desc = {
+     "fftw_twiddle_4",
+     (void (*)()) fftw_twiddle_4,
+     4,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     88,
+     3,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_5.c b/src/fftw/ftw_5.c
new file mode 100644
index 0000000..157b3f7
--- /dev/null
+++ b/src/fftw/ftw_5.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:32 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 5 */
+
+/*
+ * This function contains 40 FP additions, 28 FP multiplications,
+ * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
+ * 26 stack variables, and 20 memory accesses
+ */
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_5(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 4) {
+	  fftw_real tmp1;
+	  fftw_real tmp40;
+	  fftw_real tmp30;
+	  fftw_real tmp33;
+	  fftw_real tmp37;
+	  fftw_real tmp38;
+	  fftw_real tmp39;
+	  fftw_real tmp45;
+	  fftw_real tmp44;
+	  fftw_real tmp12;
+	  fftw_real tmp23;
+	  fftw_real tmp24;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp40 = c_im(inout[0]);
+	  {
+	       fftw_real tmp6;
+	       fftw_real tmp28;
+	       fftw_real tmp22;
+	       fftw_real tmp32;
+	       fftw_real tmp11;
+	       fftw_real tmp29;
+	       fftw_real tmp17;
+	       fftw_real tmp31;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[iostride]);
+		    tmp5 = c_im(inout[iostride]);
+		    tmp2 = c_re(W[0]);
+		    tmp4 = c_im(W[0]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp28 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       {
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    fftw_real tmp18;
+		    fftw_real tmp20;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp19 = c_re(inout[3 * iostride]);
+		    tmp21 = c_im(inout[3 * iostride]);
+		    tmp18 = c_re(W[2]);
+		    tmp20 = c_im(W[2]);
+		    tmp22 = (tmp18 * tmp19) - (tmp20 * tmp21);
+		    tmp32 = (tmp20 * tmp19) + (tmp18 * tmp21);
+	       }
+	       {
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    fftw_real tmp7;
+		    fftw_real tmp9;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp8 = c_re(inout[4 * iostride]);
+		    tmp10 = c_im(inout[4 * iostride]);
+		    tmp7 = c_re(W[3]);
+		    tmp9 = c_im(W[3]);
+		    tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10);
+		    tmp29 = (tmp9 * tmp8) + (tmp7 * tmp10);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[2 * iostride]);
+		    tmp16 = c_im(inout[2 * iostride]);
+		    tmp13 = c_re(W[1]);
+		    tmp15 = c_im(W[1]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp31 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       tmp30 = tmp28 - tmp29;
+	       tmp33 = tmp31 - tmp32;
+	       tmp37 = tmp28 + tmp29;
+	       tmp38 = tmp31 + tmp32;
+	       tmp39 = tmp37 + tmp38;
+	       tmp45 = tmp17 - tmp22;
+	       tmp44 = tmp6 - tmp11;
+	       tmp12 = tmp6 + tmp11;
+	       tmp23 = tmp17 + tmp22;
+	       tmp24 = tmp12 + tmp23;
+	  }
+	  c_re(inout[0]) = tmp1 + tmp24;
+	  {
+	       fftw_real tmp34;
+	       fftw_real tmp36;
+	       fftw_real tmp27;
+	       fftw_real tmp35;
+	       fftw_real tmp25;
+	       fftw_real tmp26;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp34 = (K951056516 * tmp30) + (K587785252 * tmp33);
+	       tmp36 = (K951056516 * tmp33) - (K587785252 * tmp30);
+	       tmp25 = K559016994 * (tmp12 - tmp23);
+	       tmp26 = tmp1 - (K250000000 * tmp24);
+	       tmp27 = tmp25 + tmp26;
+	       tmp35 = tmp26 - tmp25;
+	       c_re(inout[4 * iostride]) = tmp27 - tmp34;
+	       c_re(inout[iostride]) = tmp27 + tmp34;
+	       c_re(inout[2 * iostride]) = tmp35 - tmp36;
+	       c_re(inout[3 * iostride]) = tmp35 + tmp36;
+	  }
+	  c_im(inout[0]) = tmp39 + tmp40;
+	  {
+	       fftw_real tmp46;
+	       fftw_real tmp47;
+	       fftw_real tmp43;
+	       fftw_real tmp48;
+	       fftw_real tmp41;
+	       fftw_real tmp42;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp46 = (K951056516 * tmp44) + (K587785252 * tmp45);
+	       tmp47 = (K951056516 * tmp45) - (K587785252 * tmp44);
+	       tmp41 = K559016994 * (tmp37 - tmp38);
+	       tmp42 = tmp40 - (K250000000 * tmp39);
+	       tmp43 = tmp41 + tmp42;
+	       tmp48 = tmp42 - tmp41;
+	       c_im(inout[iostride]) = tmp43 - tmp46;
+	       c_im(inout[4 * iostride]) = tmp46 + tmp43;
+	       c_im(inout[2 * iostride]) = tmp47 + tmp48;
+	       c_im(inout[3 * iostride]) = tmp48 - tmp47;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4 };
+fftw_codelet_desc fftw_twiddle_5_desc = {
+     "fftw_twiddle_5",
+     (void (*)()) fftw_twiddle_5,
+     5,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     110,
+     4,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_6.c b/src/fftw/ftw_6.c
new file mode 100644
index 0000000..4d86444
--- /dev/null
+++ b/src/fftw/ftw_6.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:33 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 6 */
+
+/*
+ * This function contains 46 FP additions, 28 FP multiplications,
+ * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
+ * 22 stack variables, and 24 memory accesses
+ */
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_6(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 5) {
+	  fftw_real tmp7;
+	  fftw_real tmp31;
+	  fftw_real tmp50;
+	  fftw_real tmp54;
+	  fftw_real tmp29;
+	  fftw_real tmp33;
+	  fftw_real tmp41;
+	  fftw_real tmp45;
+	  fftw_real tmp18;
+	  fftw_real tmp32;
+	  fftw_real tmp38;
+	  fftw_real tmp44;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp49;
+	       fftw_real tmp6;
+	       fftw_real tmp48;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp49 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[3 * iostride]);
+		    tmp5 = c_im(inout[3 * iostride]);
+		    tmp2 = c_re(W[2]);
+		    tmp4 = c_im(W[2]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp48 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       tmp7 = tmp1 - tmp6;
+	       tmp31 = tmp1 + tmp6;
+	       tmp50 = tmp48 + tmp49;
+	       tmp54 = tmp49 - tmp48;
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp39;
+	       fftw_real tmp28;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp20 = c_re(inout[4 * iostride]);
+		    tmp22 = c_im(inout[4 * iostride]);
+		    tmp19 = c_re(W[3]);
+		    tmp21 = c_im(W[3]);
+		    tmp23 = (tmp19 * tmp20) - (tmp21 * tmp22);
+		    tmp39 = (tmp21 * tmp20) + (tmp19 * tmp22);
+	       }
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[iostride]);
+		    tmp27 = c_im(inout[iostride]);
+		    tmp24 = c_re(W[0]);
+		    tmp26 = c_im(W[0]);
+		    tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27);
+		    tmp40 = (tmp26 * tmp25) + (tmp24 * tmp27);
+	       }
+	       tmp29 = tmp23 - tmp28;
+	       tmp33 = tmp23 + tmp28;
+	       tmp41 = tmp39 - tmp40;
+	       tmp45 = tmp39 + tmp40;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp36;
+	       fftw_real tmp17;
+	       fftw_real tmp37;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[2 * iostride]);
+		    tmp11 = c_im(inout[2 * iostride]);
+		    tmp8 = c_re(W[1]);
+		    tmp10 = c_im(W[1]);
+		    tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+		    tmp36 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[5 * iostride]);
+		    tmp16 = c_im(inout[5 * iostride]);
+		    tmp13 = c_re(W[4]);
+		    tmp15 = c_im(W[4]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp37 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       tmp18 = tmp12 - tmp17;
+	       tmp32 = tmp12 + tmp17;
+	       tmp38 = tmp36 - tmp37;
+	       tmp44 = tmp36 + tmp37;
+	  }
+	  {
+	       fftw_real tmp42;
+	       fftw_real tmp30;
+	       fftw_real tmp35;
+	       fftw_real tmp53;
+	       fftw_real tmp55;
+	       fftw_real tmp56;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp42 = K866025403 * (tmp38 - tmp41);
+	       tmp30 = tmp18 + tmp29;
+	       tmp35 = tmp7 - (K500000000 * tmp30);
+	       c_re(inout[3 * iostride]) = tmp7 + tmp30;
+	       c_re(inout[iostride]) = tmp35 + tmp42;
+	       c_re(inout[5 * iostride]) = tmp35 - tmp42;
+	       tmp53 = K866025403 * (tmp29 - tmp18);
+	       tmp55 = tmp38 + tmp41;
+	       tmp56 = tmp54 - (K500000000 * tmp55);
+	       c_im(inout[iostride]) = tmp53 + tmp56;
+	       c_im(inout[5 * iostride]) = tmp56 - tmp53;
+	       c_im(inout[3 * iostride]) = tmp55 + tmp54;
+	  }
+	  {
+	       fftw_real tmp46;
+	       fftw_real tmp34;
+	       fftw_real tmp43;
+	       fftw_real tmp52;
+	       fftw_real tmp47;
+	       fftw_real tmp51;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp46 = K866025403 * (tmp44 - tmp45);
+	       tmp34 = tmp32 + tmp33;
+	       tmp43 = tmp31 - (K500000000 * tmp34);
+	       c_re(inout[0]) = tmp31 + tmp34;
+	       c_re(inout[4 * iostride]) = tmp43 + tmp46;
+	       c_re(inout[2 * iostride]) = tmp43 - tmp46;
+	       tmp52 = K866025403 * (tmp33 - tmp32);
+	       tmp47 = tmp44 + tmp45;
+	       tmp51 = tmp50 - (K500000000 * tmp47);
+	       c_im(inout[0]) = tmp47 + tmp50;
+	       c_im(inout[4 * iostride]) = tmp52 + tmp51;
+	       c_im(inout[2 * iostride]) = tmp51 - tmp52;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5 };
+fftw_codelet_desc fftw_twiddle_6_desc = {
+     "fftw_twiddle_6",
+     (void (*)()) fftw_twiddle_6,
+     6,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     132,
+     5,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_64.c b/src/fftw/ftw_64.c
new file mode 100644
index 0000000..99d6e26
--- /dev/null
+++ b/src/fftw/ftw_64.c
@@ -0,0 +1,3203 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:48 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 64 */
+
+/*
+ * This function contains 1038 FP additions, 500 FP multiplications,
+ * (or, 808 additions, 270 multiplications, 230 fused multiply/add),
+ * 162 stack variables, and 256 memory accesses
+ */
+static const fftw_real K290284677 =
+FFTW_KONST(+0.290284677254462367636192375817395274691476278);
+static const fftw_real K956940335 =
+FFTW_KONST(+0.956940335732208864935797886980269969482849206);
+static const fftw_real K881921264 =
+FFTW_KONST(+0.881921264348355029712756863660388349508442621);
+static const fftw_real K471396736 =
+FFTW_KONST(+0.471396736825997648556387625905254377657460319);
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K773010453 =
+FFTW_KONST(+0.773010453362736960810906609758469800971041293);
+static const fftw_real K634393284 =
+FFTW_KONST(+0.634393284163645498215171613225493370675687095);
+static const fftw_real K098017140 =
+FFTW_KONST(+0.098017140329560601994195563888641845861136673);
+static const fftw_real K995184726 =
+FFTW_KONST(+0.995184726672196886244836953109479921575474869);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_64(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 63) {
+	  fftw_real tmp19;
+	  fftw_real tmp791;
+	  fftw_real tmp1109;
+	  fftw_real tmp1139;
+	  fftw_real tmp1047;
+	  fftw_real tmp1077;
+	  fftw_real tmp383;
+	  fftw_real tmp655;
+	  fftw_real tmp66;
+	  fftw_real tmp800;
+	  fftw_real tmp908;
+	  fftw_real tmp956;
+	  fftw_real tmp406;
+	  fftw_real tmp608;
+	  fftw_real tmp662;
+	  fftw_real tmp744;
+	  fftw_real tmp42;
+	  fftw_real tmp1076;
+	  fftw_real tmp794;
+	  fftw_real tmp1042;
+	  fftw_real tmp394;
+	  fftw_real tmp1106;
+	  fftw_real tmp658;
+	  fftw_real tmp1138;
+	  fftw_real tmp329;
+	  fftw_real tmp983;
+	  fftw_real tmp863;
+	  fftw_real tmp927;
+	  fftw_real tmp990;
+	  fftw_real tmp1026;
+	  fftw_real tmp880;
+	  fftw_real tmp930;
+	  fftw_real tmp535;
+	  fftw_real tmp703;
+	  fftw_real tmp576;
+	  fftw_real tmp714;
+	  fftw_real tmp579;
+	  fftw_real tmp704;
+	  fftw_real tmp546;
+	  fftw_real tmp715;
+	  fftw_real tmp376;
+	  fftw_real tmp991;
+	  fftw_real tmp868;
+	  fftw_real tmp882;
+	  fftw_real tmp986;
+	  fftw_real tmp1027;
+	  fftw_real tmp873;
+	  fftw_real tmp881;
+	  fftw_real tmp558;
+	  fftw_real tmp582;
+	  fftw_real tmp708;
+	  fftw_real tmp718;
+	  fftw_real tmp569;
+	  fftw_real tmp581;
+	  fftw_real tmp711;
+	  fftw_real tmp717;
+	  fftw_real tmp89;
+	  fftw_real tmp805;
+	  fftw_real tmp909;
+	  fftw_real tmp957;
+	  fftw_real tmp417;
+	  fftw_real tmp609;
+	  fftw_real tmp665;
+	  fftw_real tmp745;
+	  fftw_real tmp161;
+	  fftw_real tmp184;
+	  fftw_real tmp965;
+	  fftw_real tmp823;
+	  fftw_real tmp915;
+	  fftw_real tmp966;
+	  fftw_real tmp967;
+	  fftw_real tmp968;
+	  fftw_real tmp828;
+	  fftw_real tmp916;
+	  fftw_real tmp451;
+	  fftw_real tmp678;
+	  fftw_real tmp468;
+	  fftw_real tmp675;
+	  fftw_real tmp471;
+	  fftw_real tmp679;
+	  fftw_real tmp462;
+	  fftw_real tmp676;
+	  fftw_real tmp114;
+	  fftw_real tmp137;
+	  fftw_real tmp963;
+	  fftw_real tmp812;
+	  fftw_real tmp912;
+	  fftw_real tmp960;
+	  fftw_real tmp961;
+	  fftw_real tmp962;
+	  fftw_real tmp817;
+	  fftw_real tmp913;
+	  fftw_real tmp424;
+	  fftw_real tmp668;
+	  fftw_real tmp441;
+	  fftw_real tmp671;
+	  fftw_real tmp444;
+	  fftw_real tmp669;
+	  fftw_real tmp435;
+	  fftw_real tmp672;
+	  fftw_real tmp234;
+	  fftw_real tmp977;
+	  fftw_real tmp836;
+	  fftw_real tmp923;
+	  fftw_real tmp974;
+	  fftw_real tmp1021;
+	  fftw_real tmp853;
+	  fftw_real tmp920;
+	  fftw_real tmp480;
+	  fftw_real tmp684;
+	  fftw_real tmp521;
+	  fftw_real tmp695;
+	  fftw_real tmp524;
+	  fftw_real tmp685;
+	  fftw_real tmp491;
+	  fftw_real tmp696;
+	  fftw_real tmp281;
+	  fftw_real tmp975;
+	  fftw_real tmp841;
+	  fftw_real tmp855;
+	  fftw_real tmp980;
+	  fftw_real tmp1022;
+	  fftw_real tmp846;
+	  fftw_real tmp854;
+	  fftw_real tmp503;
+	  fftw_real tmp527;
+	  fftw_real tmp689;
+	  fftw_real tmp699;
+	  fftw_real tmp514;
+	  fftw_real tmp526;
+	  fftw_real tmp692;
+	  fftw_real tmp698;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp1045;
+	       fftw_real tmp6;
+	       fftw_real tmp1044;
+	       fftw_real tmp12;
+	       fftw_real tmp380;
+	       fftw_real tmp17;
+	       fftw_real tmp381;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp1045 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[32 * iostride]);
+		    tmp5 = c_im(inout[32 * iostride]);
+		    tmp2 = c_re(W[31]);
+		    tmp4 = c_im(W[31]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp1044 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[16 * iostride]);
+		    tmp11 = c_im(inout[16 * iostride]);
+		    tmp8 = c_re(W[15]);
+		    tmp10 = c_im(W[15]);
+		    tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+		    tmp380 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[48 * iostride]);
+		    tmp16 = c_im(inout[48 * iostride]);
+		    tmp13 = c_re(W[47]);
+		    tmp15 = c_im(W[47]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp381 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       {
+		    fftw_real tmp7;
+		    fftw_real tmp18;
+		    fftw_real tmp1107;
+		    fftw_real tmp1108;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp7 = tmp1 + tmp6;
+		    tmp18 = tmp12 + tmp17;
+		    tmp19 = tmp7 + tmp18;
+		    tmp791 = tmp7 - tmp18;
+		    tmp1107 = tmp1045 - tmp1044;
+		    tmp1108 = tmp12 - tmp17;
+		    tmp1109 = tmp1107 - tmp1108;
+		    tmp1139 = tmp1108 + tmp1107;
+	       }
+	       {
+		    fftw_real tmp1043;
+		    fftw_real tmp1046;
+		    fftw_real tmp379;
+		    fftw_real tmp382;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1043 = tmp380 + tmp381;
+		    tmp1046 = tmp1044 + tmp1045;
+		    tmp1047 = tmp1043 + tmp1046;
+		    tmp1077 = tmp1046 - tmp1043;
+		    tmp379 = tmp1 - tmp6;
+		    tmp382 = tmp380 - tmp381;
+		    tmp383 = tmp379 - tmp382;
+		    tmp655 = tmp379 + tmp382;
+	       }
+	  }
+	  {
+	       fftw_real tmp54;
+	       fftw_real tmp401;
+	       fftw_real tmp398;
+	       fftw_real tmp796;
+	       fftw_real tmp65;
+	       fftw_real tmp399;
+	       fftw_real tmp404;
+	       fftw_real tmp797;
+	       fftw_real tmp798;
+	       fftw_real tmp799;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp48;
+		    fftw_real tmp396;
+		    fftw_real tmp53;
+		    fftw_real tmp397;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp45;
+			 fftw_real tmp47;
+			 fftw_real tmp44;
+			 fftw_real tmp46;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp45 = c_re(inout[4 * iostride]);
+			 tmp47 = c_im(inout[4 * iostride]);
+			 tmp44 = c_re(W[3]);
+			 tmp46 = c_im(W[3]);
+			 tmp48 = (tmp44 * tmp45) - (tmp46 * tmp47);
+			 tmp396 = (tmp46 * tmp45) + (tmp44 * tmp47);
+		    }
+		    {
+			 fftw_real tmp50;
+			 fftw_real tmp52;
+			 fftw_real tmp49;
+			 fftw_real tmp51;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp50 = c_re(inout[36 * iostride]);
+			 tmp52 = c_im(inout[36 * iostride]);
+			 tmp49 = c_re(W[35]);
+			 tmp51 = c_im(W[35]);
+			 tmp53 = (tmp49 * tmp50) - (tmp51 * tmp52);
+			 tmp397 = (tmp51 * tmp50) + (tmp49 * tmp52);
+		    }
+		    tmp54 = tmp48 + tmp53;
+		    tmp401 = tmp48 - tmp53;
+		    tmp398 = tmp396 - tmp397;
+		    tmp796 = tmp396 + tmp397;
+	       }
+	       {
+		    fftw_real tmp59;
+		    fftw_real tmp402;
+		    fftw_real tmp64;
+		    fftw_real tmp403;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp56;
+			 fftw_real tmp58;
+			 fftw_real tmp55;
+			 fftw_real tmp57;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp56 = c_re(inout[20 * iostride]);
+			 tmp58 = c_im(inout[20 * iostride]);
+			 tmp55 = c_re(W[19]);
+			 tmp57 = c_im(W[19]);
+			 tmp59 = (tmp55 * tmp56) - (tmp57 * tmp58);
+			 tmp402 = (tmp57 * tmp56) + (tmp55 * tmp58);
+		    }
+		    {
+			 fftw_real tmp61;
+			 fftw_real tmp63;
+			 fftw_real tmp60;
+			 fftw_real tmp62;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp61 = c_re(inout[52 * iostride]);
+			 tmp63 = c_im(inout[52 * iostride]);
+			 tmp60 = c_re(W[51]);
+			 tmp62 = c_im(W[51]);
+			 tmp64 = (tmp60 * tmp61) - (tmp62 * tmp63);
+			 tmp403 = (tmp62 * tmp61) + (tmp60 * tmp63);
+		    }
+		    tmp65 = tmp59 + tmp64;
+		    tmp399 = tmp59 - tmp64;
+		    tmp404 = tmp402 - tmp403;
+		    tmp797 = tmp402 + tmp403;
+	       }
+	       tmp66 = tmp54 + tmp65;
+	       tmp798 = tmp796 - tmp797;
+	       tmp799 = tmp54 - tmp65;
+	       tmp800 = tmp798 - tmp799;
+	       tmp908 = tmp799 + tmp798;
+	       tmp956 = tmp796 + tmp797;
+	       {
+		    fftw_real tmp400;
+		    fftw_real tmp405;
+		    fftw_real tmp660;
+		    fftw_real tmp661;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp400 = tmp398 + tmp399;
+		    tmp405 = tmp401 - tmp404;
+		    tmp406 = (K382683432 * tmp400) - (K923879532 * tmp405);
+		    tmp608 = (K923879532 * tmp400) + (K382683432 * tmp405);
+		    tmp660 = tmp398 - tmp399;
+		    tmp661 = tmp401 + tmp404;
+		    tmp662 = (K923879532 * tmp660) - (K382683432 * tmp661);
+		    tmp744 = (K382683432 * tmp660) + (K923879532 * tmp661);
+	       }
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp384;
+	       fftw_real tmp29;
+	       fftw_real tmp385;
+	       fftw_real tmp386;
+	       fftw_real tmp387;
+	       fftw_real tmp35;
+	       fftw_real tmp390;
+	       fftw_real tmp40;
+	       fftw_real tmp391;
+	       fftw_real tmp389;
+	       fftw_real tmp392;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[8 * iostride]);
+		    tmp23 = c_im(inout[8 * iostride]);
+		    tmp20 = c_re(W[7]);
+		    tmp22 = c_im(W[7]);
+		    tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23);
+		    tmp384 = (tmp22 * tmp21) + (tmp20 * tmp23);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[40 * iostride]);
+		    tmp28 = c_im(inout[40 * iostride]);
+		    tmp25 = c_re(W[39]);
+		    tmp27 = c_im(W[39]);
+		    tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28);
+		    tmp385 = (tmp27 * tmp26) + (tmp25 * tmp28);
+	       }
+	       tmp386 = tmp384 - tmp385;
+	       tmp387 = tmp24 - tmp29;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[56 * iostride]);
+		    tmp34 = c_im(inout[56 * iostride]);
+		    tmp31 = c_re(W[55]);
+		    tmp33 = c_im(W[55]);
+		    tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34);
+		    tmp390 = (tmp33 * tmp32) + (tmp31 * tmp34);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[24 * iostride]);
+		    tmp39 = c_im(inout[24 * iostride]);
+		    tmp36 = c_re(W[23]);
+		    tmp38 = c_im(W[23]);
+		    tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39);
+		    tmp391 = (tmp38 * tmp37) + (tmp36 * tmp39);
+	       }
+	       tmp389 = tmp35 - tmp40;
+	       tmp392 = tmp390 - tmp391;
+	       {
+		    fftw_real tmp30;
+		    fftw_real tmp41;
+		    fftw_real tmp792;
+		    fftw_real tmp793;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = tmp24 + tmp29;
+		    tmp41 = tmp35 + tmp40;
+		    tmp42 = tmp30 + tmp41;
+		    tmp1076 = tmp41 - tmp30;
+		    tmp792 = tmp384 + tmp385;
+		    tmp793 = tmp390 + tmp391;
+		    tmp794 = tmp792 - tmp793;
+		    tmp1042 = tmp792 + tmp793;
+	       }
+	       {
+		    fftw_real tmp388;
+		    fftw_real tmp393;
+		    fftw_real tmp656;
+		    fftw_real tmp657;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp388 = tmp386 - tmp387;
+		    tmp393 = tmp389 + tmp392;
+		    tmp394 = K707106781 * (tmp388 - tmp393);
+		    tmp1106 = K707106781 * (tmp388 + tmp393);
+		    tmp656 = tmp387 + tmp386;
+		    tmp657 = tmp389 - tmp392;
+		    tmp658 = K707106781 * (tmp656 + tmp657);
+		    tmp1138 = K707106781 * (tmp657 - tmp656);
+	       }
+	  }
+	  {
+	       fftw_real tmp287;
+	       fftw_real tmp572;
+	       fftw_real tmp292;
+	       fftw_real tmp573;
+	       fftw_real tmp293;
+	       fftw_real tmp876;
+	       fftw_real tmp327;
+	       fftw_real tmp541;
+	       fftw_real tmp544;
+	       fftw_real tmp861;
+	       fftw_real tmp298;
+	       fftw_real tmp532;
+	       fftw_real tmp303;
+	       fftw_real tmp533;
+	       fftw_real tmp304;
+	       fftw_real tmp877;
+	       fftw_real tmp316;
+	       fftw_real tmp539;
+	       fftw_real tmp538;
+	       fftw_real tmp860;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp284;
+		    fftw_real tmp286;
+		    fftw_real tmp283;
+		    fftw_real tmp285;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp284 = c_re(inout[63 * iostride]);
+		    tmp286 = c_im(inout[63 * iostride]);
+		    tmp283 = c_re(W[62]);
+		    tmp285 = c_im(W[62]);
+		    tmp287 = (tmp283 * tmp284) - (tmp285 * tmp286);
+		    tmp572 = (tmp285 * tmp284) + (tmp283 * tmp286);
+	       }
+	       {
+		    fftw_real tmp289;
+		    fftw_real tmp291;
+		    fftw_real tmp288;
+		    fftw_real tmp290;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp289 = c_re(inout[31 * iostride]);
+		    tmp291 = c_im(inout[31 * iostride]);
+		    tmp288 = c_re(W[30]);
+		    tmp290 = c_im(W[30]);
+		    tmp292 = (tmp288 * tmp289) - (tmp290 * tmp291);
+		    tmp573 = (tmp290 * tmp289) + (tmp288 * tmp291);
+	       }
+	       tmp293 = tmp287 + tmp292;
+	       tmp876 = tmp572 + tmp573;
+	       {
+		    fftw_real tmp321;
+		    fftw_real tmp542;
+		    fftw_real tmp326;
+		    fftw_real tmp543;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp318;
+			 fftw_real tmp320;
+			 fftw_real tmp317;
+			 fftw_real tmp319;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp318 = c_re(inout[55 * iostride]);
+			 tmp320 = c_im(inout[55 * iostride]);
+			 tmp317 = c_re(W[54]);
+			 tmp319 = c_im(W[54]);
+			 tmp321 = (tmp317 * tmp318) - (tmp319 * tmp320);
+			 tmp542 = (tmp319 * tmp318) + (tmp317 * tmp320);
+		    }
+		    {
+			 fftw_real tmp323;
+			 fftw_real tmp325;
+			 fftw_real tmp322;
+			 fftw_real tmp324;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp323 = c_re(inout[23 * iostride]);
+			 tmp325 = c_im(inout[23 * iostride]);
+			 tmp322 = c_re(W[22]);
+			 tmp324 = c_im(W[22]);
+			 tmp326 = (tmp322 * tmp323) - (tmp324 * tmp325);
+			 tmp543 = (tmp324 * tmp323) + (tmp322 * tmp325);
+		    }
+		    tmp327 = tmp321 + tmp326;
+		    tmp541 = tmp321 - tmp326;
+		    tmp544 = tmp542 - tmp543;
+		    tmp861 = tmp542 + tmp543;
+	       }
+	       {
+		    fftw_real tmp295;
+		    fftw_real tmp297;
+		    fftw_real tmp294;
+		    fftw_real tmp296;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp295 = c_re(inout[15 * iostride]);
+		    tmp297 = c_im(inout[15 * iostride]);
+		    tmp294 = c_re(W[14]);
+		    tmp296 = c_im(W[14]);
+		    tmp298 = (tmp294 * tmp295) - (tmp296 * tmp297);
+		    tmp532 = (tmp296 * tmp295) + (tmp294 * tmp297);
+	       }
+	       {
+		    fftw_real tmp300;
+		    fftw_real tmp302;
+		    fftw_real tmp299;
+		    fftw_real tmp301;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp300 = c_re(inout[47 * iostride]);
+		    tmp302 = c_im(inout[47 * iostride]);
+		    tmp299 = c_re(W[46]);
+		    tmp301 = c_im(W[46]);
+		    tmp303 = (tmp299 * tmp300) - (tmp301 * tmp302);
+		    tmp533 = (tmp301 * tmp300) + (tmp299 * tmp302);
+	       }
+	       tmp304 = tmp298 + tmp303;
+	       tmp877 = tmp532 + tmp533;
+	       {
+		    fftw_real tmp310;
+		    fftw_real tmp536;
+		    fftw_real tmp315;
+		    fftw_real tmp537;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp307;
+			 fftw_real tmp309;
+			 fftw_real tmp306;
+			 fftw_real tmp308;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp307 = c_re(inout[7 * iostride]);
+			 tmp309 = c_im(inout[7 * iostride]);
+			 tmp306 = c_re(W[6]);
+			 tmp308 = c_im(W[6]);
+			 tmp310 = (tmp306 * tmp307) - (tmp308 * tmp309);
+			 tmp536 = (tmp308 * tmp307) + (tmp306 * tmp309);
+		    }
+		    {
+			 fftw_real tmp312;
+			 fftw_real tmp314;
+			 fftw_real tmp311;
+			 fftw_real tmp313;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp312 = c_re(inout[39 * iostride]);
+			 tmp314 = c_im(inout[39 * iostride]);
+			 tmp311 = c_re(W[38]);
+			 tmp313 = c_im(W[38]);
+			 tmp315 = (tmp311 * tmp312) - (tmp313 * tmp314);
+			 tmp537 = (tmp313 * tmp312) + (tmp311 * tmp314);
+		    }
+		    tmp316 = tmp310 + tmp315;
+		    tmp539 = tmp310 - tmp315;
+		    tmp538 = tmp536 - tmp537;
+		    tmp860 = tmp536 + tmp537;
+	       }
+	       {
+		    fftw_real tmp305;
+		    fftw_real tmp328;
+		    fftw_real tmp859;
+		    fftw_real tmp862;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp305 = tmp293 + tmp304;
+		    tmp328 = tmp316 + tmp327;
+		    tmp329 = tmp305 + tmp328;
+		    tmp983 = tmp305 - tmp328;
+		    tmp859 = tmp293 - tmp304;
+		    tmp862 = tmp860 - tmp861;
+		    tmp863 = tmp859 - tmp862;
+		    tmp927 = tmp859 + tmp862;
+	       }
+	       {
+		    fftw_real tmp988;
+		    fftw_real tmp989;
+		    fftw_real tmp878;
+		    fftw_real tmp879;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp988 = tmp876 + tmp877;
+		    tmp989 = tmp860 + tmp861;
+		    tmp990 = tmp988 - tmp989;
+		    tmp1026 = tmp988 + tmp989;
+		    tmp878 = tmp876 - tmp877;
+		    tmp879 = tmp327 - tmp316;
+		    tmp880 = tmp878 - tmp879;
+		    tmp930 = tmp878 + tmp879;
+	       }
+	       {
+		    fftw_real tmp531;
+		    fftw_real tmp534;
+		    fftw_real tmp574;
+		    fftw_real tmp575;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp531 = tmp287 - tmp292;
+		    tmp534 = tmp532 - tmp533;
+		    tmp535 = tmp531 - tmp534;
+		    tmp703 = tmp531 + tmp534;
+		    tmp574 = tmp572 - tmp573;
+		    tmp575 = tmp298 - tmp303;
+		    tmp576 = tmp574 + tmp575;
+		    tmp714 = tmp574 - tmp575;
+	       }
+	       {
+		    fftw_real tmp577;
+		    fftw_real tmp578;
+		    fftw_real tmp540;
+		    fftw_real tmp545;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp577 = tmp541 - tmp544;
+		    tmp578 = tmp539 + tmp538;
+		    tmp579 = K707106781 * (tmp577 - tmp578);
+		    tmp704 = K707106781 * (tmp578 + tmp577);
+		    tmp540 = tmp538 - tmp539;
+		    tmp545 = tmp541 + tmp544;
+		    tmp546 = K707106781 * (tmp540 - tmp545);
+		    tmp715 = K707106781 * (tmp540 + tmp545);
+	       }
+	  }
+	  {
+	       fftw_real tmp340;
+	       fftw_real tmp553;
+	       fftw_real tmp550;
+	       fftw_real tmp864;
+	       fftw_real tmp374;
+	       fftw_real tmp562;
+	       fftw_real tmp567;
+	       fftw_real tmp871;
+	       fftw_real tmp351;
+	       fftw_real tmp551;
+	       fftw_real tmp556;
+	       fftw_real tmp865;
+	       fftw_real tmp363;
+	       fftw_real tmp564;
+	       fftw_real tmp561;
+	       fftw_real tmp870;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp334;
+		    fftw_real tmp548;
+		    fftw_real tmp339;
+		    fftw_real tmp549;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp331;
+			 fftw_real tmp333;
+			 fftw_real tmp330;
+			 fftw_real tmp332;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp331 = c_re(inout[3 * iostride]);
+			 tmp333 = c_im(inout[3 * iostride]);
+			 tmp330 = c_re(W[2]);
+			 tmp332 = c_im(W[2]);
+			 tmp334 = (tmp330 * tmp331) - (tmp332 * tmp333);
+			 tmp548 = (tmp332 * tmp331) + (tmp330 * tmp333);
+		    }
+		    {
+			 fftw_real tmp336;
+			 fftw_real tmp338;
+			 fftw_real tmp335;
+			 fftw_real tmp337;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp336 = c_re(inout[35 * iostride]);
+			 tmp338 = c_im(inout[35 * iostride]);
+			 tmp335 = c_re(W[34]);
+			 tmp337 = c_im(W[34]);
+			 tmp339 = (tmp335 * tmp336) - (tmp337 * tmp338);
+			 tmp549 = (tmp337 * tmp336) + (tmp335 * tmp338);
+		    }
+		    tmp340 = tmp334 + tmp339;
+		    tmp553 = tmp334 - tmp339;
+		    tmp550 = tmp548 - tmp549;
+		    tmp864 = tmp548 + tmp549;
+	       }
+	       {
+		    fftw_real tmp368;
+		    fftw_real tmp565;
+		    fftw_real tmp373;
+		    fftw_real tmp566;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp365;
+			 fftw_real tmp367;
+			 fftw_real tmp364;
+			 fftw_real tmp366;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp365 = c_re(inout[11 * iostride]);
+			 tmp367 = c_im(inout[11 * iostride]);
+			 tmp364 = c_re(W[10]);
+			 tmp366 = c_im(W[10]);
+			 tmp368 = (tmp364 * tmp365) - (tmp366 * tmp367);
+			 tmp565 = (tmp366 * tmp365) + (tmp364 * tmp367);
+		    }
+		    {
+			 fftw_real tmp370;
+			 fftw_real tmp372;
+			 fftw_real tmp369;
+			 fftw_real tmp371;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp370 = c_re(inout[43 * iostride]);
+			 tmp372 = c_im(inout[43 * iostride]);
+			 tmp369 = c_re(W[42]);
+			 tmp371 = c_im(W[42]);
+			 tmp373 = (tmp369 * tmp370) - (tmp371 * tmp372);
+			 tmp566 = (tmp371 * tmp370) + (tmp369 * tmp372);
+		    }
+		    tmp374 = tmp368 + tmp373;
+		    tmp562 = tmp368 - tmp373;
+		    tmp567 = tmp565 - tmp566;
+		    tmp871 = tmp565 + tmp566;
+	       }
+	       {
+		    fftw_real tmp345;
+		    fftw_real tmp554;
+		    fftw_real tmp350;
+		    fftw_real tmp555;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp342;
+			 fftw_real tmp344;
+			 fftw_real tmp341;
+			 fftw_real tmp343;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp342 = c_re(inout[19 * iostride]);
+			 tmp344 = c_im(inout[19 * iostride]);
+			 tmp341 = c_re(W[18]);
+			 tmp343 = c_im(W[18]);
+			 tmp345 = (tmp341 * tmp342) - (tmp343 * tmp344);
+			 tmp554 = (tmp343 * tmp342) + (tmp341 * tmp344);
+		    }
+		    {
+			 fftw_real tmp347;
+			 fftw_real tmp349;
+			 fftw_real tmp346;
+			 fftw_real tmp348;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp347 = c_re(inout[51 * iostride]);
+			 tmp349 = c_im(inout[51 * iostride]);
+			 tmp346 = c_re(W[50]);
+			 tmp348 = c_im(W[50]);
+			 tmp350 = (tmp346 * tmp347) - (tmp348 * tmp349);
+			 tmp555 = (tmp348 * tmp347) + (tmp346 * tmp349);
+		    }
+		    tmp351 = tmp345 + tmp350;
+		    tmp551 = tmp345 - tmp350;
+		    tmp556 = tmp554 - tmp555;
+		    tmp865 = tmp554 + tmp555;
+	       }
+	       {
+		    fftw_real tmp357;
+		    fftw_real tmp559;
+		    fftw_real tmp362;
+		    fftw_real tmp560;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp354;
+			 fftw_real tmp356;
+			 fftw_real tmp353;
+			 fftw_real tmp355;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp354 = c_re(inout[59 * iostride]);
+			 tmp356 = c_im(inout[59 * iostride]);
+			 tmp353 = c_re(W[58]);
+			 tmp355 = c_im(W[58]);
+			 tmp357 = (tmp353 * tmp354) - (tmp355 * tmp356);
+			 tmp559 = (tmp355 * tmp354) + (tmp353 * tmp356);
+		    }
+		    {
+			 fftw_real tmp359;
+			 fftw_real tmp361;
+			 fftw_real tmp358;
+			 fftw_real tmp360;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp359 = c_re(inout[27 * iostride]);
+			 tmp361 = c_im(inout[27 * iostride]);
+			 tmp358 = c_re(W[26]);
+			 tmp360 = c_im(W[26]);
+			 tmp362 = (tmp358 * tmp359) - (tmp360 * tmp361);
+			 tmp560 = (tmp360 * tmp359) + (tmp358 * tmp361);
+		    }
+		    tmp363 = tmp357 + tmp362;
+		    tmp564 = tmp357 - tmp362;
+		    tmp561 = tmp559 - tmp560;
+		    tmp870 = tmp559 + tmp560;
+	       }
+	       {
+		    fftw_real tmp352;
+		    fftw_real tmp375;
+		    fftw_real tmp866;
+		    fftw_real tmp867;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp352 = tmp340 + tmp351;
+		    tmp375 = tmp363 + tmp374;
+		    tmp376 = tmp352 + tmp375;
+		    tmp991 = tmp375 - tmp352;
+		    tmp866 = tmp864 - tmp865;
+		    tmp867 = tmp340 - tmp351;
+		    tmp868 = tmp866 - tmp867;
+		    tmp882 = tmp867 + tmp866;
+	       }
+	       {
+		    fftw_real tmp984;
+		    fftw_real tmp985;
+		    fftw_real tmp869;
+		    fftw_real tmp872;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp984 = tmp864 + tmp865;
+		    tmp985 = tmp870 + tmp871;
+		    tmp986 = tmp984 - tmp985;
+		    tmp1027 = tmp984 + tmp985;
+		    tmp869 = tmp363 - tmp374;
+		    tmp872 = tmp870 - tmp871;
+		    tmp873 = tmp869 + tmp872;
+		    tmp881 = tmp869 - tmp872;
+	       }
+	       {
+		    fftw_real tmp552;
+		    fftw_real tmp557;
+		    fftw_real tmp706;
+		    fftw_real tmp707;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp552 = tmp550 + tmp551;
+		    tmp557 = tmp553 - tmp556;
+		    tmp558 = (K382683432 * tmp552) - (K923879532 * tmp557);
+		    tmp582 = (K923879532 * tmp552) + (K382683432 * tmp557);
+		    tmp706 = tmp550 - tmp551;
+		    tmp707 = tmp553 + tmp556;
+		    tmp708 = (K923879532 * tmp706) - (K382683432 * tmp707);
+		    tmp718 = (K382683432 * tmp706) + (K923879532 * tmp707);
+	       }
+	       {
+		    fftw_real tmp563;
+		    fftw_real tmp568;
+		    fftw_real tmp709;
+		    fftw_real tmp710;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp563 = tmp561 + tmp562;
+		    tmp568 = tmp564 - tmp567;
+		    tmp569 = (K382683432 * tmp563) + (K923879532 * tmp568);
+		    tmp581 = (K382683432 * tmp568) - (K923879532 * tmp563);
+		    tmp709 = tmp561 - tmp562;
+		    tmp710 = tmp564 + tmp567;
+		    tmp711 = (K923879532 * tmp709) + (K382683432 * tmp710);
+		    tmp717 = (K923879532 * tmp710) - (K382683432 * tmp709);
+	       }
+	  }
+	  {
+	       fftw_real tmp77;
+	       fftw_real tmp412;
+	       fftw_real tmp409;
+	       fftw_real tmp802;
+	       fftw_real tmp88;
+	       fftw_real tmp410;
+	       fftw_real tmp415;
+	       fftw_real tmp803;
+	       fftw_real tmp801;
+	       fftw_real tmp804;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp71;
+		    fftw_real tmp407;
+		    fftw_real tmp76;
+		    fftw_real tmp408;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp68;
+			 fftw_real tmp70;
+			 fftw_real tmp67;
+			 fftw_real tmp69;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp68 = c_re(inout[60 * iostride]);
+			 tmp70 = c_im(inout[60 * iostride]);
+			 tmp67 = c_re(W[59]);
+			 tmp69 = c_im(W[59]);
+			 tmp71 = (tmp67 * tmp68) - (tmp69 * tmp70);
+			 tmp407 = (tmp69 * tmp68) + (tmp67 * tmp70);
+		    }
+		    {
+			 fftw_real tmp73;
+			 fftw_real tmp75;
+			 fftw_real tmp72;
+			 fftw_real tmp74;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp73 = c_re(inout[28 * iostride]);
+			 tmp75 = c_im(inout[28 * iostride]);
+			 tmp72 = c_re(W[27]);
+			 tmp74 = c_im(W[27]);
+			 tmp76 = (tmp72 * tmp73) - (tmp74 * tmp75);
+			 tmp408 = (tmp74 * tmp73) + (tmp72 * tmp75);
+		    }
+		    tmp77 = tmp71 + tmp76;
+		    tmp412 = tmp71 - tmp76;
+		    tmp409 = tmp407 - tmp408;
+		    tmp802 = tmp407 + tmp408;
+	       }
+	       {
+		    fftw_real tmp82;
+		    fftw_real tmp413;
+		    fftw_real tmp87;
+		    fftw_real tmp414;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp79;
+			 fftw_real tmp81;
+			 fftw_real tmp78;
+			 fftw_real tmp80;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp79 = c_re(inout[12 * iostride]);
+			 tmp81 = c_im(inout[12 * iostride]);
+			 tmp78 = c_re(W[11]);
+			 tmp80 = c_im(W[11]);
+			 tmp82 = (tmp78 * tmp79) - (tmp80 * tmp81);
+			 tmp413 = (tmp80 * tmp79) + (tmp78 * tmp81);
+		    }
+		    {
+			 fftw_real tmp84;
+			 fftw_real tmp86;
+			 fftw_real tmp83;
+			 fftw_real tmp85;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp84 = c_re(inout[44 * iostride]);
+			 tmp86 = c_im(inout[44 * iostride]);
+			 tmp83 = c_re(W[43]);
+			 tmp85 = c_im(W[43]);
+			 tmp87 = (tmp83 * tmp84) - (tmp85 * tmp86);
+			 tmp414 = (tmp85 * tmp84) + (tmp83 * tmp86);
+		    }
+		    tmp88 = tmp82 + tmp87;
+		    tmp410 = tmp82 - tmp87;
+		    tmp415 = tmp413 - tmp414;
+		    tmp803 = tmp413 + tmp414;
+	       }
+	       tmp89 = tmp77 + tmp88;
+	       tmp801 = tmp77 - tmp88;
+	       tmp804 = tmp802 - tmp803;
+	       tmp805 = tmp801 + tmp804;
+	       tmp909 = tmp801 - tmp804;
+	       tmp957 = tmp802 + tmp803;
+	       {
+		    fftw_real tmp411;
+		    fftw_real tmp416;
+		    fftw_real tmp663;
+		    fftw_real tmp664;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp411 = tmp409 + tmp410;
+		    tmp416 = tmp412 - tmp415;
+		    tmp417 = (K382683432 * tmp411) + (K923879532 * tmp416);
+		    tmp609 = (K382683432 * tmp416) - (K923879532 * tmp411);
+		    tmp663 = tmp409 - tmp410;
+		    tmp664 = tmp412 + tmp415;
+		    tmp665 = (K923879532 * tmp663) + (K382683432 * tmp664);
+		    tmp745 = (K923879532 * tmp664) - (K382683432 * tmp663);
+	       }
+	  }
+	  {
+	       fftw_real tmp143;
+	       fftw_real tmp447;
+	       fftw_real tmp148;
+	       fftw_real tmp448;
+	       fftw_real tmp149;
+	       fftw_real tmp819;
+	       fftw_real tmp183;
+	       fftw_real tmp452;
+	       fftw_real tmp455;
+	       fftw_real tmp826;
+	       fftw_real tmp154;
+	       fftw_real tmp465;
+	       fftw_real tmp159;
+	       fftw_real tmp466;
+	       fftw_real tmp160;
+	       fftw_real tmp820;
+	       fftw_real tmp172;
+	       fftw_real tmp457;
+	       fftw_real tmp460;
+	       fftw_real tmp825;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp140;
+		    fftw_real tmp142;
+		    fftw_real tmp139;
+		    fftw_real tmp141;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp140 = c_re(inout[62 * iostride]);
+		    tmp142 = c_im(inout[62 * iostride]);
+		    tmp139 = c_re(W[61]);
+		    tmp141 = c_im(W[61]);
+		    tmp143 = (tmp139 * tmp140) - (tmp141 * tmp142);
+		    tmp447 = (tmp141 * tmp140) + (tmp139 * tmp142);
+	       }
+	       {
+		    fftw_real tmp145;
+		    fftw_real tmp147;
+		    fftw_real tmp144;
+		    fftw_real tmp146;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp145 = c_re(inout[30 * iostride]);
+		    tmp147 = c_im(inout[30 * iostride]);
+		    tmp144 = c_re(W[29]);
+		    tmp146 = c_im(W[29]);
+		    tmp148 = (tmp144 * tmp145) - (tmp146 * tmp147);
+		    tmp448 = (tmp146 * tmp145) + (tmp144 * tmp147);
+	       }
+	       tmp149 = tmp143 + tmp148;
+	       tmp819 = tmp447 + tmp448;
+	       {
+		    fftw_real tmp177;
+		    fftw_real tmp453;
+		    fftw_real tmp182;
+		    fftw_real tmp454;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp174;
+			 fftw_real tmp176;
+			 fftw_real tmp173;
+			 fftw_real tmp175;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp174 = c_re(inout[54 * iostride]);
+			 tmp176 = c_im(inout[54 * iostride]);
+			 tmp173 = c_re(W[53]);
+			 tmp175 = c_im(W[53]);
+			 tmp177 = (tmp173 * tmp174) - (tmp175 * tmp176);
+			 tmp453 = (tmp175 * tmp174) + (tmp173 * tmp176);
+		    }
+		    {
+			 fftw_real tmp179;
+			 fftw_real tmp181;
+			 fftw_real tmp178;
+			 fftw_real tmp180;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp179 = c_re(inout[22 * iostride]);
+			 tmp181 = c_im(inout[22 * iostride]);
+			 tmp178 = c_re(W[21]);
+			 tmp180 = c_im(W[21]);
+			 tmp182 = (tmp178 * tmp179) - (tmp180 * tmp181);
+			 tmp454 = (tmp180 * tmp179) + (tmp178 * tmp181);
+		    }
+		    tmp183 = tmp177 + tmp182;
+		    tmp452 = tmp177 - tmp182;
+		    tmp455 = tmp453 - tmp454;
+		    tmp826 = tmp453 + tmp454;
+	       }
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp153;
+		    fftw_real tmp150;
+		    fftw_real tmp152;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = c_re(inout[14 * iostride]);
+		    tmp153 = c_im(inout[14 * iostride]);
+		    tmp150 = c_re(W[13]);
+		    tmp152 = c_im(W[13]);
+		    tmp154 = (tmp150 * tmp151) - (tmp152 * tmp153);
+		    tmp465 = (tmp152 * tmp151) + (tmp150 * tmp153);
+	       }
+	       {
+		    fftw_real tmp156;
+		    fftw_real tmp158;
+		    fftw_real tmp155;
+		    fftw_real tmp157;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp156 = c_re(inout[46 * iostride]);
+		    tmp158 = c_im(inout[46 * iostride]);
+		    tmp155 = c_re(W[45]);
+		    tmp157 = c_im(W[45]);
+		    tmp159 = (tmp155 * tmp156) - (tmp157 * tmp158);
+		    tmp466 = (tmp157 * tmp156) + (tmp155 * tmp158);
+	       }
+	       tmp160 = tmp154 + tmp159;
+	       tmp820 = tmp465 + tmp466;
+	       {
+		    fftw_real tmp166;
+		    fftw_real tmp458;
+		    fftw_real tmp171;
+		    fftw_real tmp459;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp163;
+			 fftw_real tmp165;
+			 fftw_real tmp162;
+			 fftw_real tmp164;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp163 = c_re(inout[6 * iostride]);
+			 tmp165 = c_im(inout[6 * iostride]);
+			 tmp162 = c_re(W[5]);
+			 tmp164 = c_im(W[5]);
+			 tmp166 = (tmp162 * tmp163) - (tmp164 * tmp165);
+			 tmp458 = (tmp164 * tmp163) + (tmp162 * tmp165);
+		    }
+		    {
+			 fftw_real tmp168;
+			 fftw_real tmp170;
+			 fftw_real tmp167;
+			 fftw_real tmp169;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp168 = c_re(inout[38 * iostride]);
+			 tmp170 = c_im(inout[38 * iostride]);
+			 tmp167 = c_re(W[37]);
+			 tmp169 = c_im(W[37]);
+			 tmp171 = (tmp167 * tmp168) - (tmp169 * tmp170);
+			 tmp459 = (tmp169 * tmp168) + (tmp167 * tmp170);
+		    }
+		    tmp172 = tmp166 + tmp171;
+		    tmp457 = tmp166 - tmp171;
+		    tmp460 = tmp458 - tmp459;
+		    tmp825 = tmp458 + tmp459;
+	       }
+	       {
+		    fftw_real tmp821;
+		    fftw_real tmp822;
+		    fftw_real tmp824;
+		    fftw_real tmp827;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp161 = tmp149 + tmp160;
+		    tmp184 = tmp172 + tmp183;
+		    tmp965 = tmp161 - tmp184;
+		    tmp821 = tmp819 - tmp820;
+		    tmp822 = tmp183 - tmp172;
+		    tmp823 = tmp821 - tmp822;
+		    tmp915 = tmp821 + tmp822;
+		    tmp966 = tmp819 + tmp820;
+		    tmp967 = tmp825 + tmp826;
+		    tmp968 = tmp966 - tmp967;
+		    tmp824 = tmp149 - tmp160;
+		    tmp827 = tmp825 - tmp826;
+		    tmp828 = tmp824 - tmp827;
+		    tmp916 = tmp824 + tmp827;
+	       }
+	       {
+		    fftw_real tmp449;
+		    fftw_real tmp450;
+		    fftw_real tmp464;
+		    fftw_real tmp467;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp449 = tmp447 - tmp448;
+		    tmp450 = tmp154 - tmp159;
+		    tmp451 = tmp449 + tmp450;
+		    tmp678 = tmp449 - tmp450;
+		    tmp464 = tmp143 - tmp148;
+		    tmp467 = tmp465 - tmp466;
+		    tmp468 = tmp464 - tmp467;
+		    tmp675 = tmp464 + tmp467;
+	       }
+	       {
+		    fftw_real tmp469;
+		    fftw_real tmp470;
+		    fftw_real tmp456;
+		    fftw_real tmp461;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp469 = tmp460 - tmp457;
+		    tmp470 = tmp452 + tmp455;
+		    tmp471 = K707106781 * (tmp469 - tmp470);
+		    tmp679 = K707106781 * (tmp469 + tmp470);
+		    tmp456 = tmp452 - tmp455;
+		    tmp461 = tmp457 + tmp460;
+		    tmp462 = K707106781 * (tmp456 - tmp461);
+		    tmp676 = K707106781 * (tmp461 + tmp456);
+	       }
+	  }
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp420;
+	       fftw_real tmp101;
+	       fftw_real tmp421;
+	       fftw_real tmp102;
+	       fftw_real tmp808;
+	       fftw_real tmp136;
+	       fftw_real tmp425;
+	       fftw_real tmp428;
+	       fftw_real tmp815;
+	       fftw_real tmp107;
+	       fftw_real tmp438;
+	       fftw_real tmp112;
+	       fftw_real tmp439;
+	       fftw_real tmp113;
+	       fftw_real tmp809;
+	       fftw_real tmp125;
+	       fftw_real tmp430;
+	       fftw_real tmp433;
+	       fftw_real tmp814;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp93;
+		    fftw_real tmp95;
+		    fftw_real tmp92;
+		    fftw_real tmp94;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp93 = c_re(inout[2 * iostride]);
+		    tmp95 = c_im(inout[2 * iostride]);
+		    tmp92 = c_re(W[1]);
+		    tmp94 = c_im(W[1]);
+		    tmp96 = (tmp92 * tmp93) - (tmp94 * tmp95);
+		    tmp420 = (tmp94 * tmp93) + (tmp92 * tmp95);
+	       }
+	       {
+		    fftw_real tmp98;
+		    fftw_real tmp100;
+		    fftw_real tmp97;
+		    fftw_real tmp99;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp98 = c_re(inout[34 * iostride]);
+		    tmp100 = c_im(inout[34 * iostride]);
+		    tmp97 = c_re(W[33]);
+		    tmp99 = c_im(W[33]);
+		    tmp101 = (tmp97 * tmp98) - (tmp99 * tmp100);
+		    tmp421 = (tmp99 * tmp98) + (tmp97 * tmp100);
+	       }
+	       tmp102 = tmp96 + tmp101;
+	       tmp808 = tmp420 + tmp421;
+	       {
+		    fftw_real tmp130;
+		    fftw_real tmp426;
+		    fftw_real tmp135;
+		    fftw_real tmp427;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp127;
+			 fftw_real tmp129;
+			 fftw_real tmp126;
+			 fftw_real tmp128;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp127 = c_re(inout[58 * iostride]);
+			 tmp129 = c_im(inout[58 * iostride]);
+			 tmp126 = c_re(W[57]);
+			 tmp128 = c_im(W[57]);
+			 tmp130 = (tmp126 * tmp127) - (tmp128 * tmp129);
+			 tmp426 = (tmp128 * tmp127) + (tmp126 * tmp129);
+		    }
+		    {
+			 fftw_real tmp132;
+			 fftw_real tmp134;
+			 fftw_real tmp131;
+			 fftw_real tmp133;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp132 = c_re(inout[26 * iostride]);
+			 tmp134 = c_im(inout[26 * iostride]);
+			 tmp131 = c_re(W[25]);
+			 tmp133 = c_im(W[25]);
+			 tmp135 = (tmp131 * tmp132) - (tmp133 * tmp134);
+			 tmp427 = (tmp133 * tmp132) + (tmp131 * tmp134);
+		    }
+		    tmp136 = tmp130 + tmp135;
+		    tmp425 = tmp130 - tmp135;
+		    tmp428 = tmp426 - tmp427;
+		    tmp815 = tmp426 + tmp427;
+	       }
+	       {
+		    fftw_real tmp104;
+		    fftw_real tmp106;
+		    fftw_real tmp103;
+		    fftw_real tmp105;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp104 = c_re(inout[18 * iostride]);
+		    tmp106 = c_im(inout[18 * iostride]);
+		    tmp103 = c_re(W[17]);
+		    tmp105 = c_im(W[17]);
+		    tmp107 = (tmp103 * tmp104) - (tmp105 * tmp106);
+		    tmp438 = (tmp105 * tmp104) + (tmp103 * tmp106);
+	       }
+	       {
+		    fftw_real tmp109;
+		    fftw_real tmp111;
+		    fftw_real tmp108;
+		    fftw_real tmp110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp109 = c_re(inout[50 * iostride]);
+		    tmp111 = c_im(inout[50 * iostride]);
+		    tmp108 = c_re(W[49]);
+		    tmp110 = c_im(W[49]);
+		    tmp112 = (tmp108 * tmp109) - (tmp110 * tmp111);
+		    tmp439 = (tmp110 * tmp109) + (tmp108 * tmp111);
+	       }
+	       tmp113 = tmp107 + tmp112;
+	       tmp809 = tmp438 + tmp439;
+	       {
+		    fftw_real tmp119;
+		    fftw_real tmp431;
+		    fftw_real tmp124;
+		    fftw_real tmp432;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp116;
+			 fftw_real tmp118;
+			 fftw_real tmp115;
+			 fftw_real tmp117;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp116 = c_re(inout[10 * iostride]);
+			 tmp118 = c_im(inout[10 * iostride]);
+			 tmp115 = c_re(W[9]);
+			 tmp117 = c_im(W[9]);
+			 tmp119 = (tmp115 * tmp116) - (tmp117 * tmp118);
+			 tmp431 = (tmp117 * tmp116) + (tmp115 * tmp118);
+		    }
+		    {
+			 fftw_real tmp121;
+			 fftw_real tmp123;
+			 fftw_real tmp120;
+			 fftw_real tmp122;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp121 = c_re(inout[42 * iostride]);
+			 tmp123 = c_im(inout[42 * iostride]);
+			 tmp120 = c_re(W[41]);
+			 tmp122 = c_im(W[41]);
+			 tmp124 = (tmp120 * tmp121) - (tmp122 * tmp123);
+			 tmp432 = (tmp122 * tmp121) + (tmp120 * tmp123);
+		    }
+		    tmp125 = tmp119 + tmp124;
+		    tmp430 = tmp119 - tmp124;
+		    tmp433 = tmp431 - tmp432;
+		    tmp814 = tmp431 + tmp432;
+	       }
+	       {
+		    fftw_real tmp810;
+		    fftw_real tmp811;
+		    fftw_real tmp813;
+		    fftw_real tmp816;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp114 = tmp102 + tmp113;
+		    tmp137 = tmp125 + tmp136;
+		    tmp963 = tmp114 - tmp137;
+		    tmp810 = tmp808 - tmp809;
+		    tmp811 = tmp136 - tmp125;
+		    tmp812 = tmp810 - tmp811;
+		    tmp912 = tmp810 + tmp811;
+		    tmp960 = tmp808 + tmp809;
+		    tmp961 = tmp814 + tmp815;
+		    tmp962 = tmp960 - tmp961;
+		    tmp813 = tmp102 - tmp113;
+		    tmp816 = tmp814 - tmp815;
+		    tmp817 = tmp813 - tmp816;
+		    tmp913 = tmp813 + tmp816;
+	       }
+	       {
+		    fftw_real tmp422;
+		    fftw_real tmp423;
+		    fftw_real tmp437;
+		    fftw_real tmp440;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp422 = tmp420 - tmp421;
+		    tmp423 = tmp107 - tmp112;
+		    tmp424 = tmp422 + tmp423;
+		    tmp668 = tmp422 - tmp423;
+		    tmp437 = tmp96 - tmp101;
+		    tmp440 = tmp438 - tmp439;
+		    tmp441 = tmp437 - tmp440;
+		    tmp671 = tmp437 + tmp440;
+	       }
+	       {
+		    fftw_real tmp442;
+		    fftw_real tmp443;
+		    fftw_real tmp429;
+		    fftw_real tmp434;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp442 = tmp433 - tmp430;
+		    tmp443 = tmp425 + tmp428;
+		    tmp444 = K707106781 * (tmp442 - tmp443);
+		    tmp669 = K707106781 * (tmp442 + tmp443);
+		    tmp429 = tmp425 - tmp428;
+		    tmp434 = tmp430 + tmp433;
+		    tmp435 = K707106781 * (tmp429 - tmp434);
+		    tmp672 = K707106781 * (tmp434 + tmp429);
+	       }
+	  }
+	  {
+	       fftw_real tmp192;
+	       fftw_real tmp476;
+	       fftw_real tmp197;
+	       fftw_real tmp477;
+	       fftw_real tmp198;
+	       fftw_real tmp832;
+	       fftw_real tmp232;
+	       fftw_real tmp481;
+	       fftw_real tmp484;
+	       fftw_real tmp851;
+	       fftw_real tmp203;
+	       fftw_real tmp518;
+	       fftw_real tmp208;
+	       fftw_real tmp519;
+	       fftw_real tmp209;
+	       fftw_real tmp833;
+	       fftw_real tmp221;
+	       fftw_real tmp486;
+	       fftw_real tmp489;
+	       fftw_real tmp850;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp189;
+		    fftw_real tmp191;
+		    fftw_real tmp188;
+		    fftw_real tmp190;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp189 = c_re(inout[iostride]);
+		    tmp191 = c_im(inout[iostride]);
+		    tmp188 = c_re(W[0]);
+		    tmp190 = c_im(W[0]);
+		    tmp192 = (tmp188 * tmp189) - (tmp190 * tmp191);
+		    tmp476 = (tmp190 * tmp189) + (tmp188 * tmp191);
+	       }
+	       {
+		    fftw_real tmp194;
+		    fftw_real tmp196;
+		    fftw_real tmp193;
+		    fftw_real tmp195;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp194 = c_re(inout[33 * iostride]);
+		    tmp196 = c_im(inout[33 * iostride]);
+		    tmp193 = c_re(W[32]);
+		    tmp195 = c_im(W[32]);
+		    tmp197 = (tmp193 * tmp194) - (tmp195 * tmp196);
+		    tmp477 = (tmp195 * tmp194) + (tmp193 * tmp196);
+	       }
+	       tmp198 = tmp192 + tmp197;
+	       tmp832 = tmp476 + tmp477;
+	       {
+		    fftw_real tmp226;
+		    fftw_real tmp482;
+		    fftw_real tmp231;
+		    fftw_real tmp483;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp223;
+			 fftw_real tmp225;
+			 fftw_real tmp222;
+			 fftw_real tmp224;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp223 = c_re(inout[57 * iostride]);
+			 tmp225 = c_im(inout[57 * iostride]);
+			 tmp222 = c_re(W[56]);
+			 tmp224 = c_im(W[56]);
+			 tmp226 = (tmp222 * tmp223) - (tmp224 * tmp225);
+			 tmp482 = (tmp224 * tmp223) + (tmp222 * tmp225);
+		    }
+		    {
+			 fftw_real tmp228;
+			 fftw_real tmp230;
+			 fftw_real tmp227;
+			 fftw_real tmp229;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp228 = c_re(inout[25 * iostride]);
+			 tmp230 = c_im(inout[25 * iostride]);
+			 tmp227 = c_re(W[24]);
+			 tmp229 = c_im(W[24]);
+			 tmp231 = (tmp227 * tmp228) - (tmp229 * tmp230);
+			 tmp483 = (tmp229 * tmp228) + (tmp227 * tmp230);
+		    }
+		    tmp232 = tmp226 + tmp231;
+		    tmp481 = tmp226 - tmp231;
+		    tmp484 = tmp482 - tmp483;
+		    tmp851 = tmp482 + tmp483;
+	       }
+	       {
+		    fftw_real tmp200;
+		    fftw_real tmp202;
+		    fftw_real tmp199;
+		    fftw_real tmp201;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp200 = c_re(inout[17 * iostride]);
+		    tmp202 = c_im(inout[17 * iostride]);
+		    tmp199 = c_re(W[16]);
+		    tmp201 = c_im(W[16]);
+		    tmp203 = (tmp199 * tmp200) - (tmp201 * tmp202);
+		    tmp518 = (tmp201 * tmp200) + (tmp199 * tmp202);
+	       }
+	       {
+		    fftw_real tmp205;
+		    fftw_real tmp207;
+		    fftw_real tmp204;
+		    fftw_real tmp206;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp205 = c_re(inout[49 * iostride]);
+		    tmp207 = c_im(inout[49 * iostride]);
+		    tmp204 = c_re(W[48]);
+		    tmp206 = c_im(W[48]);
+		    tmp208 = (tmp204 * tmp205) - (tmp206 * tmp207);
+		    tmp519 = (tmp206 * tmp205) + (tmp204 * tmp207);
+	       }
+	       tmp209 = tmp203 + tmp208;
+	       tmp833 = tmp518 + tmp519;
+	       {
+		    fftw_real tmp215;
+		    fftw_real tmp487;
+		    fftw_real tmp220;
+		    fftw_real tmp488;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp212;
+			 fftw_real tmp214;
+			 fftw_real tmp211;
+			 fftw_real tmp213;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp212 = c_re(inout[9 * iostride]);
+			 tmp214 = c_im(inout[9 * iostride]);
+			 tmp211 = c_re(W[8]);
+			 tmp213 = c_im(W[8]);
+			 tmp215 = (tmp211 * tmp212) - (tmp213 * tmp214);
+			 tmp487 = (tmp213 * tmp212) + (tmp211 * tmp214);
+		    }
+		    {
+			 fftw_real tmp217;
+			 fftw_real tmp219;
+			 fftw_real tmp216;
+			 fftw_real tmp218;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp217 = c_re(inout[41 * iostride]);
+			 tmp219 = c_im(inout[41 * iostride]);
+			 tmp216 = c_re(W[40]);
+			 tmp218 = c_im(W[40]);
+			 tmp220 = (tmp216 * tmp217) - (tmp218 * tmp219);
+			 tmp488 = (tmp218 * tmp217) + (tmp216 * tmp219);
+		    }
+		    tmp221 = tmp215 + tmp220;
+		    tmp486 = tmp215 - tmp220;
+		    tmp489 = tmp487 - tmp488;
+		    tmp850 = tmp487 + tmp488;
+	       }
+	       {
+		    fftw_real tmp210;
+		    fftw_real tmp233;
+		    fftw_real tmp834;
+		    fftw_real tmp835;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp210 = tmp198 + tmp209;
+		    tmp233 = tmp221 + tmp232;
+		    tmp234 = tmp210 + tmp233;
+		    tmp977 = tmp210 - tmp233;
+		    tmp834 = tmp832 - tmp833;
+		    tmp835 = tmp232 - tmp221;
+		    tmp836 = tmp834 - tmp835;
+		    tmp923 = tmp834 + tmp835;
+	       }
+	       {
+		    fftw_real tmp972;
+		    fftw_real tmp973;
+		    fftw_real tmp849;
+		    fftw_real tmp852;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp972 = tmp832 + tmp833;
+		    tmp973 = tmp850 + tmp851;
+		    tmp974 = tmp972 - tmp973;
+		    tmp1021 = tmp972 + tmp973;
+		    tmp849 = tmp198 - tmp209;
+		    tmp852 = tmp850 - tmp851;
+		    tmp853 = tmp849 - tmp852;
+		    tmp920 = tmp849 + tmp852;
+	       }
+	       {
+		    fftw_real tmp478;
+		    fftw_real tmp479;
+		    fftw_real tmp517;
+		    fftw_real tmp520;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp478 = tmp476 - tmp477;
+		    tmp479 = tmp203 - tmp208;
+		    tmp480 = tmp478 + tmp479;
+		    tmp684 = tmp478 - tmp479;
+		    tmp517 = tmp192 - tmp197;
+		    tmp520 = tmp518 - tmp519;
+		    tmp521 = tmp517 - tmp520;
+		    tmp695 = tmp517 + tmp520;
+	       }
+	       {
+		    fftw_real tmp522;
+		    fftw_real tmp523;
+		    fftw_real tmp485;
+		    fftw_real tmp490;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp522 = tmp489 - tmp486;
+		    tmp523 = tmp481 + tmp484;
+		    tmp524 = K707106781 * (tmp522 - tmp523);
+		    tmp685 = K707106781 * (tmp522 + tmp523);
+		    tmp485 = tmp481 - tmp484;
+		    tmp490 = tmp486 + tmp489;
+		    tmp491 = K707106781 * (tmp485 - tmp490);
+		    tmp696 = K707106781 * (tmp490 + tmp485);
+	       }
+	  }
+	  {
+	       fftw_real tmp245;
+	       fftw_real tmp509;
+	       fftw_real tmp506;
+	       fftw_real tmp843;
+	       fftw_real tmp279;
+	       fftw_real tmp501;
+	       fftw_real tmp496;
+	       fftw_real tmp839;
+	       fftw_real tmp256;
+	       fftw_real tmp507;
+	       fftw_real tmp512;
+	       fftw_real tmp844;
+	       fftw_real tmp268;
+	       fftw_real tmp493;
+	       fftw_real tmp500;
+	       fftw_real tmp838;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp239;
+		    fftw_real tmp504;
+		    fftw_real tmp244;
+		    fftw_real tmp505;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp236;
+			 fftw_real tmp238;
+			 fftw_real tmp235;
+			 fftw_real tmp237;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp236 = c_re(inout[5 * iostride]);
+			 tmp238 = c_im(inout[5 * iostride]);
+			 tmp235 = c_re(W[4]);
+			 tmp237 = c_im(W[4]);
+			 tmp239 = (tmp235 * tmp236) - (tmp237 * tmp238);
+			 tmp504 = (tmp237 * tmp236) + (tmp235 * tmp238);
+		    }
+		    {
+			 fftw_real tmp241;
+			 fftw_real tmp243;
+			 fftw_real tmp240;
+			 fftw_real tmp242;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp241 = c_re(inout[37 * iostride]);
+			 tmp243 = c_im(inout[37 * iostride]);
+			 tmp240 = c_re(W[36]);
+			 tmp242 = c_im(W[36]);
+			 tmp244 = (tmp240 * tmp241) - (tmp242 * tmp243);
+			 tmp505 = (tmp242 * tmp241) + (tmp240 * tmp243);
+		    }
+		    tmp245 = tmp239 + tmp244;
+		    tmp509 = tmp239 - tmp244;
+		    tmp506 = tmp504 - tmp505;
+		    tmp843 = tmp504 + tmp505;
+	       }
+	       {
+		    fftw_real tmp273;
+		    fftw_real tmp494;
+		    fftw_real tmp278;
+		    fftw_real tmp495;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp270;
+			 fftw_real tmp272;
+			 fftw_real tmp269;
+			 fftw_real tmp271;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp270 = c_re(inout[13 * iostride]);
+			 tmp272 = c_im(inout[13 * iostride]);
+			 tmp269 = c_re(W[12]);
+			 tmp271 = c_im(W[12]);
+			 tmp273 = (tmp269 * tmp270) - (tmp271 * tmp272);
+			 tmp494 = (tmp271 * tmp270) + (tmp269 * tmp272);
+		    }
+		    {
+			 fftw_real tmp275;
+			 fftw_real tmp277;
+			 fftw_real tmp274;
+			 fftw_real tmp276;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp275 = c_re(inout[45 * iostride]);
+			 tmp277 = c_im(inout[45 * iostride]);
+			 tmp274 = c_re(W[44]);
+			 tmp276 = c_im(W[44]);
+			 tmp278 = (tmp274 * tmp275) - (tmp276 * tmp277);
+			 tmp495 = (tmp276 * tmp275) + (tmp274 * tmp277);
+		    }
+		    tmp279 = tmp273 + tmp278;
+		    tmp501 = tmp273 - tmp278;
+		    tmp496 = tmp494 - tmp495;
+		    tmp839 = tmp494 + tmp495;
+	       }
+	       {
+		    fftw_real tmp250;
+		    fftw_real tmp510;
+		    fftw_real tmp255;
+		    fftw_real tmp511;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp247;
+			 fftw_real tmp249;
+			 fftw_real tmp246;
+			 fftw_real tmp248;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp247 = c_re(inout[21 * iostride]);
+			 tmp249 = c_im(inout[21 * iostride]);
+			 tmp246 = c_re(W[20]);
+			 tmp248 = c_im(W[20]);
+			 tmp250 = (tmp246 * tmp247) - (tmp248 * tmp249);
+			 tmp510 = (tmp248 * tmp247) + (tmp246 * tmp249);
+		    }
+		    {
+			 fftw_real tmp252;
+			 fftw_real tmp254;
+			 fftw_real tmp251;
+			 fftw_real tmp253;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp252 = c_re(inout[53 * iostride]);
+			 tmp254 = c_im(inout[53 * iostride]);
+			 tmp251 = c_re(W[52]);
+			 tmp253 = c_im(W[52]);
+			 tmp255 = (tmp251 * tmp252) - (tmp253 * tmp254);
+			 tmp511 = (tmp253 * tmp252) + (tmp251 * tmp254);
+		    }
+		    tmp256 = tmp250 + tmp255;
+		    tmp507 = tmp250 - tmp255;
+		    tmp512 = tmp510 - tmp511;
+		    tmp844 = tmp510 + tmp511;
+	       }
+	       {
+		    fftw_real tmp262;
+		    fftw_real tmp498;
+		    fftw_real tmp267;
+		    fftw_real tmp499;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp259;
+			 fftw_real tmp261;
+			 fftw_real tmp258;
+			 fftw_real tmp260;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp259 = c_re(inout[61 * iostride]);
+			 tmp261 = c_im(inout[61 * iostride]);
+			 tmp258 = c_re(W[60]);
+			 tmp260 = c_im(W[60]);
+			 tmp262 = (tmp258 * tmp259) - (tmp260 * tmp261);
+			 tmp498 = (tmp260 * tmp259) + (tmp258 * tmp261);
+		    }
+		    {
+			 fftw_real tmp264;
+			 fftw_real tmp266;
+			 fftw_real tmp263;
+			 fftw_real tmp265;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp264 = c_re(inout[29 * iostride]);
+			 tmp266 = c_im(inout[29 * iostride]);
+			 tmp263 = c_re(W[28]);
+			 tmp265 = c_im(W[28]);
+			 tmp267 = (tmp263 * tmp264) - (tmp265 * tmp266);
+			 tmp499 = (tmp265 * tmp264) + (tmp263 * tmp266);
+		    }
+		    tmp268 = tmp262 + tmp267;
+		    tmp493 = tmp262 - tmp267;
+		    tmp500 = tmp498 - tmp499;
+		    tmp838 = tmp498 + tmp499;
+	       }
+	       {
+		    fftw_real tmp257;
+		    fftw_real tmp280;
+		    fftw_real tmp837;
+		    fftw_real tmp840;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp257 = tmp245 + tmp256;
+		    tmp280 = tmp268 + tmp279;
+		    tmp281 = tmp257 + tmp280;
+		    tmp975 = tmp280 - tmp257;
+		    tmp837 = tmp268 - tmp279;
+		    tmp840 = tmp838 - tmp839;
+		    tmp841 = tmp837 - tmp840;
+		    tmp855 = tmp837 + tmp840;
+	       }
+	       {
+		    fftw_real tmp978;
+		    fftw_real tmp979;
+		    fftw_real tmp842;
+		    fftw_real tmp845;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp978 = tmp843 + tmp844;
+		    tmp979 = tmp838 + tmp839;
+		    tmp980 = tmp978 - tmp979;
+		    tmp1022 = tmp978 + tmp979;
+		    tmp842 = tmp245 - tmp256;
+		    tmp845 = tmp843 - tmp844;
+		    tmp846 = tmp842 + tmp845;
+		    tmp854 = tmp845 - tmp842;
+	       }
+	       {
+		    fftw_real tmp497;
+		    fftw_real tmp502;
+		    fftw_real tmp687;
+		    fftw_real tmp688;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp497 = tmp493 - tmp496;
+		    tmp502 = tmp500 + tmp501;
+		    tmp503 = (K382683432 * tmp497) - (K923879532 * tmp502);
+		    tmp527 = (K382683432 * tmp502) + (K923879532 * tmp497);
+		    tmp687 = tmp493 + tmp496;
+		    tmp688 = tmp500 - tmp501;
+		    tmp689 = (K923879532 * tmp687) - (K382683432 * tmp688);
+		    tmp699 = (K923879532 * tmp688) + (K382683432 * tmp687);
+	       }
+	       {
+		    fftw_real tmp508;
+		    fftw_real tmp513;
+		    fftw_real tmp690;
+		    fftw_real tmp691;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp508 = tmp506 + tmp507;
+		    tmp513 = tmp509 - tmp512;
+		    tmp514 = (K923879532 * tmp508) + (K382683432 * tmp513);
+		    tmp526 = (K382683432 * tmp508) - (K923879532 * tmp513);
+		    tmp690 = tmp506 - tmp507;
+		    tmp691 = tmp509 + tmp512;
+		    tmp692 = (K382683432 * tmp690) + (K923879532 * tmp691);
+		    tmp698 = (K923879532 * tmp690) - (K382683432 * tmp691);
+	       }
+	  }
+	  {
+	       fftw_real tmp91;
+	       fftw_real tmp1015;
+	       fftw_real tmp1038;
+	       fftw_real tmp1039;
+	       fftw_real tmp1049;
+	       fftw_real tmp1055;
+	       fftw_real tmp186;
+	       fftw_real tmp1054;
+	       fftw_real tmp1024;
+	       fftw_real tmp1032;
+	       fftw_real tmp378;
+	       fftw_real tmp1051;
+	       fftw_real tmp1029;
+	       fftw_real tmp1033;
+	       fftw_real tmp1018;
+	       fftw_real tmp1040;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp43;
+		    fftw_real tmp90;
+		    fftw_real tmp1036;
+		    fftw_real tmp1037;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp43 = tmp19 + tmp42;
+		    tmp90 = tmp66 + tmp89;
+		    tmp91 = tmp43 + tmp90;
+		    tmp1015 = tmp43 - tmp90;
+		    tmp1036 = tmp1021 + tmp1022;
+		    tmp1037 = tmp1026 + tmp1027;
+		    tmp1038 = tmp1036 - tmp1037;
+		    tmp1039 = tmp1036 + tmp1037;
+	       }
+	       {
+		    fftw_real tmp1041;
+		    fftw_real tmp1048;
+		    fftw_real tmp138;
+		    fftw_real tmp185;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1041 = tmp956 + tmp957;
+		    tmp1048 = tmp1042 + tmp1047;
+		    tmp1049 = tmp1041 + tmp1048;
+		    tmp1055 = tmp1048 - tmp1041;
+		    tmp138 = tmp114 + tmp137;
+		    tmp185 = tmp161 + tmp184;
+		    tmp186 = tmp138 + tmp185;
+		    tmp1054 = tmp185 - tmp138;
+	       }
+	       {
+		    fftw_real tmp1020;
+		    fftw_real tmp1023;
+		    fftw_real tmp282;
+		    fftw_real tmp377;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1020 = tmp234 - tmp281;
+		    tmp1023 = tmp1021 - tmp1022;
+		    tmp1024 = tmp1020 + tmp1023;
+		    tmp1032 = tmp1023 - tmp1020;
+		    tmp282 = tmp234 + tmp281;
+		    tmp377 = tmp329 + tmp376;
+		    tmp378 = tmp282 + tmp377;
+		    tmp1051 = tmp377 - tmp282;
+	       }
+	       {
+		    fftw_real tmp1025;
+		    fftw_real tmp1028;
+		    fftw_real tmp1016;
+		    fftw_real tmp1017;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1025 = tmp329 - tmp376;
+		    tmp1028 = tmp1026 - tmp1027;
+		    tmp1029 = tmp1025 - tmp1028;
+		    tmp1033 = tmp1025 + tmp1028;
+		    tmp1016 = tmp960 + tmp961;
+		    tmp1017 = tmp966 + tmp967;
+		    tmp1018 = tmp1016 - tmp1017;
+		    tmp1040 = tmp1016 + tmp1017;
+	       }
+	       {
+		    fftw_real tmp187;
+		    fftw_real tmp1035;
+		    fftw_real tmp1050;
+		    fftw_real tmp1052;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp187 = tmp91 + tmp186;
+		    c_re(inout[32 * iostride]) = tmp187 - tmp378;
+		    c_re(inout[0]) = tmp187 + tmp378;
+		    tmp1035 = tmp91 - tmp186;
+		    c_re(inout[48 * iostride]) = tmp1035 - tmp1038;
+		    c_re(inout[16 * iostride]) = tmp1035 + tmp1038;
+		    {
+			 fftw_real tmp1019;
+			 fftw_real tmp1030;
+			 fftw_real tmp1057;
+			 fftw_real tmp1058;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp1019 = tmp1015 + tmp1018;
+			 tmp1030 = K707106781 * (tmp1024 + tmp1029);
+			 c_re(inout[40 * iostride]) = tmp1019 - tmp1030;
+			 c_re(inout[8 * iostride]) = tmp1019 + tmp1030;
+			 tmp1057 = K707106781 * (tmp1029 - tmp1024);
+			 tmp1058 = tmp1055 - tmp1054;
+			 c_im(inout[24 * iostride]) = tmp1057 + tmp1058;
+			 c_im(inout[56 * iostride]) = tmp1058 - tmp1057;
+		    }
+		    tmp1050 = tmp1040 + tmp1049;
+		    c_im(inout[0]) = tmp1039 + tmp1050;
+		    c_im(inout[32 * iostride]) = tmp1050 - tmp1039;
+		    tmp1052 = tmp1049 - tmp1040;
+		    c_im(inout[16 * iostride]) = tmp1051 + tmp1052;
+		    c_im(inout[48 * iostride]) = tmp1052 - tmp1051;
+		    {
+			 fftw_real tmp1053;
+			 fftw_real tmp1056;
+			 fftw_real tmp1031;
+			 fftw_real tmp1034;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp1053 = K707106781 * (tmp1032 + tmp1033);
+			 tmp1056 = tmp1054 + tmp1055;
+			 c_im(inout[8 * iostride]) = tmp1053 + tmp1056;
+			 c_im(inout[40 * iostride]) = tmp1056 - tmp1053;
+			 tmp1031 = tmp1015 - tmp1018;
+			 tmp1034 = K707106781 * (tmp1032 - tmp1033);
+			 c_re(inout[56 * iostride]) = tmp1031 - tmp1034;
+			 c_re(inout[24 * iostride]) = tmp1031 + tmp1034;
+		    }
+	       }
+	  }
+	  {
+	       fftw_real tmp959;
+	       fftw_real tmp999;
+	       fftw_real tmp1002;
+	       fftw_real tmp1068;
+	       fftw_real tmp970;
+	       fftw_real tmp1060;
+	       fftw_real tmp1063;
+	       fftw_real tmp1069;
+	       fftw_real tmp982;
+	       fftw_real tmp996;
+	       fftw_real tmp1006;
+	       fftw_real tmp1012;
+	       fftw_real tmp993;
+	       fftw_real tmp997;
+	       fftw_real tmp1009;
+	       fftw_real tmp1013;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp955;
+		    fftw_real tmp958;
+		    fftw_real tmp1000;
+		    fftw_real tmp1001;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp955 = tmp19 - tmp42;
+		    tmp958 = tmp956 - tmp957;
+		    tmp959 = tmp955 - tmp958;
+		    tmp999 = tmp955 + tmp958;
+		    tmp1000 = tmp963 + tmp962;
+		    tmp1001 = tmp965 - tmp968;
+		    tmp1002 = K707106781 * (tmp1000 + tmp1001);
+		    tmp1068 = K707106781 * (tmp1001 - tmp1000);
+	       }
+	       {
+		    fftw_real tmp964;
+		    fftw_real tmp969;
+		    fftw_real tmp1061;
+		    fftw_real tmp1062;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp964 = tmp962 - tmp963;
+		    tmp969 = tmp965 + tmp968;
+		    tmp970 = K707106781 * (tmp964 - tmp969);
+		    tmp1060 = K707106781 * (tmp964 + tmp969);
+		    tmp1061 = tmp89 - tmp66;
+		    tmp1062 = tmp1047 - tmp1042;
+		    tmp1063 = tmp1061 + tmp1062;
+		    tmp1069 = tmp1062 - tmp1061;
+	       }
+	       {
+		    fftw_real tmp976;
+		    fftw_real tmp981;
+		    fftw_real tmp1004;
+		    fftw_real tmp1005;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp976 = tmp974 - tmp975;
+		    tmp981 = tmp977 - tmp980;
+		    tmp982 = (K923879532 * tmp976) + (K382683432 * tmp981);
+		    tmp996 = (K382683432 * tmp976) - (K923879532 * tmp981);
+		    tmp1004 = tmp974 + tmp975;
+		    tmp1005 = tmp977 + tmp980;
+		    tmp1006 =
+			(K382683432 * tmp1004) + (K923879532 * tmp1005);
+		    tmp1012 =
+			(K923879532 * tmp1004) - (K382683432 * tmp1005);
+	       }
+	       {
+		    fftw_real tmp987;
+		    fftw_real tmp992;
+		    fftw_real tmp1007;
+		    fftw_real tmp1008;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp987 = tmp983 - tmp986;
+		    tmp992 = tmp990 - tmp991;
+		    tmp993 = (K382683432 * tmp987) - (K923879532 * tmp992);
+		    tmp997 = (K382683432 * tmp992) + (K923879532 * tmp987);
+		    tmp1007 = tmp983 + tmp986;
+		    tmp1008 = tmp990 + tmp991;
+		    tmp1009 =
+			(K923879532 * tmp1007) - (K382683432 * tmp1008);
+		    tmp1013 =
+			(K923879532 * tmp1008) + (K382683432 * tmp1007);
+	       }
+	       {
+		    fftw_real tmp971;
+		    fftw_real tmp994;
+		    fftw_real tmp995;
+		    fftw_real tmp998;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp971 = tmp959 + tmp970;
+		    tmp994 = tmp982 + tmp993;
+		    c_re(inout[44 * iostride]) = tmp971 - tmp994;
+		    c_re(inout[12 * iostride]) = tmp971 + tmp994;
+		    tmp995 = tmp959 - tmp970;
+		    tmp998 = tmp996 - tmp997;
+		    c_re(inout[60 * iostride]) = tmp995 - tmp998;
+		    c_re(inout[28 * iostride]) = tmp995 + tmp998;
+	       }
+	       {
+		    fftw_real tmp1067;
+		    fftw_real tmp1070;
+		    fftw_real tmp1071;
+		    fftw_real tmp1072;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1067 = tmp996 + tmp997;
+		    tmp1070 = tmp1068 + tmp1069;
+		    c_im(inout[12 * iostride]) = tmp1067 + tmp1070;
+		    c_im(inout[44 * iostride]) = tmp1070 - tmp1067;
+		    tmp1071 = tmp993 - tmp982;
+		    tmp1072 = tmp1069 - tmp1068;
+		    c_im(inout[28 * iostride]) = tmp1071 + tmp1072;
+		    c_im(inout[60 * iostride]) = tmp1072 - tmp1071;
+	       }
+	       {
+		    fftw_real tmp1003;
+		    fftw_real tmp1010;
+		    fftw_real tmp1011;
+		    fftw_real tmp1014;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1003 = tmp999 + tmp1002;
+		    tmp1010 = tmp1006 + tmp1009;
+		    c_re(inout[36 * iostride]) = tmp1003 - tmp1010;
+		    c_re(inout[4 * iostride]) = tmp1003 + tmp1010;
+		    tmp1011 = tmp999 - tmp1002;
+		    tmp1014 = tmp1012 - tmp1013;
+		    c_re(inout[52 * iostride]) = tmp1011 - tmp1014;
+		    c_re(inout[20 * iostride]) = tmp1011 + tmp1014;
+	       }
+	       {
+		    fftw_real tmp1059;
+		    fftw_real tmp1064;
+		    fftw_real tmp1065;
+		    fftw_real tmp1066;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1059 = tmp1012 + tmp1013;
+		    tmp1064 = tmp1060 + tmp1063;
+		    c_im(inout[4 * iostride]) = tmp1059 + tmp1064;
+		    c_im(inout[36 * iostride]) = tmp1064 - tmp1059;
+		    tmp1065 = tmp1009 - tmp1006;
+		    tmp1066 = tmp1063 - tmp1060;
+		    c_im(inout[20 * iostride]) = tmp1065 + tmp1066;
+		    c_im(inout[52 * iostride]) = tmp1066 - tmp1065;
+	       }
+	  }
+	  {
+	       fftw_real tmp419;
+	       fftw_real tmp591;
+	       fftw_real tmp1155;
+	       fftw_real tmp1161;
+	       fftw_real tmp474;
+	       fftw_real tmp1152;
+	       fftw_real tmp594;
+	       fftw_real tmp1160;
+	       fftw_real tmp530;
+	       fftw_real tmp588;
+	       fftw_real tmp598;
+	       fftw_real tmp604;
+	       fftw_real tmp585;
+	       fftw_real tmp589;
+	       fftw_real tmp601;
+	       fftw_real tmp605;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp395;
+		    fftw_real tmp418;
+		    fftw_real tmp1153;
+		    fftw_real tmp1154;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp395 = tmp383 - tmp394;
+		    tmp418 = tmp406 - tmp417;
+		    tmp419 = tmp395 - tmp418;
+		    tmp591 = tmp395 + tmp418;
+		    tmp1153 = tmp609 - tmp608;
+		    tmp1154 = tmp1139 - tmp1138;
+		    tmp1155 = tmp1153 + tmp1154;
+		    tmp1161 = tmp1154 - tmp1153;
+	       }
+	       {
+		    fftw_real tmp446;
+		    fftw_real tmp592;
+		    fftw_real tmp473;
+		    fftw_real tmp593;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp436;
+			 fftw_real tmp445;
+			 fftw_real tmp463;
+			 fftw_real tmp472;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp436 = tmp424 - tmp435;
+			 tmp445 = tmp441 - tmp444;
+			 tmp446 =
+			     (K195090322 * tmp436) - (K980785280 * tmp445);
+			 tmp592 =
+			     (K980785280 * tmp436) + (K195090322 * tmp445);
+			 tmp463 = tmp451 - tmp462;
+			 tmp472 = tmp468 - tmp471;
+			 tmp473 =
+			     (K195090322 * tmp463) + (K980785280 * tmp472);
+			 tmp593 =
+			     (K195090322 * tmp472) - (K980785280 * tmp463);
+		    }
+		    tmp474 = tmp446 - tmp473;
+		    tmp1152 = tmp446 + tmp473;
+		    tmp594 = tmp592 + tmp593;
+		    tmp1160 = tmp593 - tmp592;
+	       }
+	       {
+		    fftw_real tmp516;
+		    fftw_real tmp596;
+		    fftw_real tmp529;
+		    fftw_real tmp597;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp492;
+			 fftw_real tmp515;
+			 fftw_real tmp525;
+			 fftw_real tmp528;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp492 = tmp480 - tmp491;
+			 tmp515 = tmp503 - tmp514;
+			 tmp516 = tmp492 - tmp515;
+			 tmp596 = tmp492 + tmp515;
+			 tmp525 = tmp521 - tmp524;
+			 tmp528 = tmp526 - tmp527;
+			 tmp529 = tmp525 - tmp528;
+			 tmp597 = tmp525 + tmp528;
+		    }
+		    tmp530 = (K995184726 * tmp516) + (K098017140 * tmp529);
+		    tmp588 = (K098017140 * tmp516) - (K995184726 * tmp529);
+		    tmp598 = (K634393284 * tmp596) + (K773010453 * tmp597);
+		    tmp604 = (K773010453 * tmp596) - (K634393284 * tmp597);
+	       }
+	       {
+		    fftw_real tmp571;
+		    fftw_real tmp599;
+		    fftw_real tmp584;
+		    fftw_real tmp600;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp547;
+			 fftw_real tmp570;
+			 fftw_real tmp580;
+			 fftw_real tmp583;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp547 = tmp535 - tmp546;
+			 tmp570 = tmp558 - tmp569;
+			 tmp571 = tmp547 - tmp570;
+			 tmp599 = tmp547 + tmp570;
+			 tmp580 = tmp576 - tmp579;
+			 tmp583 = tmp581 - tmp582;
+			 tmp584 = tmp580 - tmp583;
+			 tmp600 = tmp580 + tmp583;
+		    }
+		    tmp585 = (K098017140 * tmp571) - (K995184726 * tmp584);
+		    tmp589 = (K098017140 * tmp584) + (K995184726 * tmp571);
+		    tmp601 = (K773010453 * tmp599) - (K634393284 * tmp600);
+		    tmp605 = (K773010453 * tmp600) + (K634393284 * tmp599);
+	       }
+	       {
+		    fftw_real tmp475;
+		    fftw_real tmp586;
+		    fftw_real tmp587;
+		    fftw_real tmp590;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp475 = tmp419 + tmp474;
+		    tmp586 = tmp530 + tmp585;
+		    c_re(inout[47 * iostride]) = tmp475 - tmp586;
+		    c_re(inout[15 * iostride]) = tmp475 + tmp586;
+		    tmp587 = tmp419 - tmp474;
+		    tmp590 = tmp588 - tmp589;
+		    c_re(inout[63 * iostride]) = tmp587 - tmp590;
+		    c_re(inout[31 * iostride]) = tmp587 + tmp590;
+	       }
+	       {
+		    fftw_real tmp1159;
+		    fftw_real tmp1162;
+		    fftw_real tmp1163;
+		    fftw_real tmp1164;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1159 = tmp588 + tmp589;
+		    tmp1162 = tmp1160 + tmp1161;
+		    c_im(inout[15 * iostride]) = tmp1159 + tmp1162;
+		    c_im(inout[47 * iostride]) = tmp1162 - tmp1159;
+		    tmp1163 = tmp585 - tmp530;
+		    tmp1164 = tmp1161 - tmp1160;
+		    c_im(inout[31 * iostride]) = tmp1163 + tmp1164;
+		    c_im(inout[63 * iostride]) = tmp1164 - tmp1163;
+	       }
+	       {
+		    fftw_real tmp595;
+		    fftw_real tmp602;
+		    fftw_real tmp603;
+		    fftw_real tmp606;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp595 = tmp591 + tmp594;
+		    tmp602 = tmp598 + tmp601;
+		    c_re(inout[39 * iostride]) = tmp595 - tmp602;
+		    c_re(inout[7 * iostride]) = tmp595 + tmp602;
+		    tmp603 = tmp591 - tmp594;
+		    tmp606 = tmp604 - tmp605;
+		    c_re(inout[55 * iostride]) = tmp603 - tmp606;
+		    c_re(inout[23 * iostride]) = tmp603 + tmp606;
+	       }
+	       {
+		    fftw_real tmp1151;
+		    fftw_real tmp1156;
+		    fftw_real tmp1157;
+		    fftw_real tmp1158;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1151 = tmp604 + tmp605;
+		    tmp1156 = tmp1152 + tmp1155;
+		    c_im(inout[7 * iostride]) = tmp1151 + tmp1156;
+		    c_im(inout[39 * iostride]) = tmp1156 - tmp1151;
+		    tmp1157 = tmp601 - tmp598;
+		    tmp1158 = tmp1155 - tmp1152;
+		    c_im(inout[23 * iostride]) = tmp1157 + tmp1158;
+		    c_im(inout[55 * iostride]) = tmp1158 - tmp1157;
+	       }
+	  }
+	  {
+	       fftw_real tmp611;
+	       fftw_real tmp639;
+	       fftw_real tmp1141;
+	       fftw_real tmp1147;
+	       fftw_real tmp618;
+	       fftw_real tmp1136;
+	       fftw_real tmp642;
+	       fftw_real tmp1146;
+	       fftw_real tmp626;
+	       fftw_real tmp636;
+	       fftw_real tmp646;
+	       fftw_real tmp652;
+	       fftw_real tmp633;
+	       fftw_real tmp637;
+	       fftw_real tmp649;
+	       fftw_real tmp653;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp607;
+		    fftw_real tmp610;
+		    fftw_real tmp1137;
+		    fftw_real tmp1140;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp607 = tmp383 + tmp394;
+		    tmp610 = tmp608 + tmp609;
+		    tmp611 = tmp607 - tmp610;
+		    tmp639 = tmp607 + tmp610;
+		    tmp1137 = tmp406 + tmp417;
+		    tmp1140 = tmp1138 + tmp1139;
+		    tmp1141 = tmp1137 + tmp1140;
+		    tmp1147 = tmp1140 - tmp1137;
+	       }
+	       {
+		    fftw_real tmp614;
+		    fftw_real tmp640;
+		    fftw_real tmp617;
+		    fftw_real tmp641;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp612;
+			 fftw_real tmp613;
+			 fftw_real tmp615;
+			 fftw_real tmp616;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp612 = tmp424 + tmp435;
+			 tmp613 = tmp441 + tmp444;
+			 tmp614 =
+			     (K831469612 * tmp612) - (K555570233 * tmp613);
+			 tmp640 =
+			     (K555570233 * tmp612) + (K831469612 * tmp613);
+			 tmp615 = tmp451 + tmp462;
+			 tmp616 = tmp468 + tmp471;
+			 tmp617 =
+			     (K831469612 * tmp615) + (K555570233 * tmp616);
+			 tmp641 =
+			     (K831469612 * tmp616) - (K555570233 * tmp615);
+		    }
+		    tmp618 = tmp614 - tmp617;
+		    tmp1136 = tmp614 + tmp617;
+		    tmp642 = tmp640 + tmp641;
+		    tmp1146 = tmp641 - tmp640;
+	       }
+	       {
+		    fftw_real tmp622;
+		    fftw_real tmp644;
+		    fftw_real tmp625;
+		    fftw_real tmp645;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp620;
+			 fftw_real tmp621;
+			 fftw_real tmp623;
+			 fftw_real tmp624;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp620 = tmp521 + tmp524;
+			 tmp621 = tmp514 + tmp503;
+			 tmp622 = tmp620 - tmp621;
+			 tmp644 = tmp620 + tmp621;
+			 tmp623 = tmp480 + tmp491;
+			 tmp624 = tmp526 + tmp527;
+			 tmp625 = tmp623 - tmp624;
+			 tmp645 = tmp623 + tmp624;
+		    }
+		    tmp626 = (K471396736 * tmp622) + (K881921264 * tmp625);
+		    tmp636 = (K471396736 * tmp625) - (K881921264 * tmp622);
+		    tmp646 = (K956940335 * tmp644) + (K290284677 * tmp645);
+		    tmp652 = (K956940335 * tmp645) - (K290284677 * tmp644);
+	       }
+	       {
+		    fftw_real tmp629;
+		    fftw_real tmp647;
+		    fftw_real tmp632;
+		    fftw_real tmp648;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp627;
+			 fftw_real tmp628;
+			 fftw_real tmp630;
+			 fftw_real tmp631;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp627 = tmp535 + tmp546;
+			 tmp628 = tmp582 + tmp581;
+			 tmp629 = tmp627 - tmp628;
+			 tmp647 = tmp627 + tmp628;
+			 tmp630 = tmp576 + tmp579;
+			 tmp631 = tmp558 + tmp569;
+			 tmp632 = tmp630 - tmp631;
+			 tmp648 = tmp630 + tmp631;
+		    }
+		    tmp633 = (K471396736 * tmp629) - (K881921264 * tmp632);
+		    tmp637 = (K881921264 * tmp629) + (K471396736 * tmp632);
+		    tmp649 = (K956940335 * tmp647) - (K290284677 * tmp648);
+		    tmp653 = (K290284677 * tmp647) + (K956940335 * tmp648);
+	       }
+	       {
+		    fftw_real tmp619;
+		    fftw_real tmp634;
+		    fftw_real tmp635;
+		    fftw_real tmp638;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp619 = tmp611 + tmp618;
+		    tmp634 = tmp626 + tmp633;
+		    c_re(inout[43 * iostride]) = tmp619 - tmp634;
+		    c_re(inout[11 * iostride]) = tmp619 + tmp634;
+		    tmp635 = tmp611 - tmp618;
+		    tmp638 = tmp636 - tmp637;
+		    c_re(inout[59 * iostride]) = tmp635 - tmp638;
+		    c_re(inout[27 * iostride]) = tmp635 + tmp638;
+	       }
+	       {
+		    fftw_real tmp1145;
+		    fftw_real tmp1148;
+		    fftw_real tmp1149;
+		    fftw_real tmp1150;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1145 = tmp636 + tmp637;
+		    tmp1148 = tmp1146 + tmp1147;
+		    c_im(inout[11 * iostride]) = tmp1145 + tmp1148;
+		    c_im(inout[43 * iostride]) = tmp1148 - tmp1145;
+		    tmp1149 = tmp633 - tmp626;
+		    tmp1150 = tmp1147 - tmp1146;
+		    c_im(inout[27 * iostride]) = tmp1149 + tmp1150;
+		    c_im(inout[59 * iostride]) = tmp1150 - tmp1149;
+	       }
+	       {
+		    fftw_real tmp643;
+		    fftw_real tmp650;
+		    fftw_real tmp651;
+		    fftw_real tmp654;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp643 = tmp639 + tmp642;
+		    tmp650 = tmp646 + tmp649;
+		    c_re(inout[35 * iostride]) = tmp643 - tmp650;
+		    c_re(inout[3 * iostride]) = tmp643 + tmp650;
+		    tmp651 = tmp639 - tmp642;
+		    tmp654 = tmp652 - tmp653;
+		    c_re(inout[51 * iostride]) = tmp651 - tmp654;
+		    c_re(inout[19 * iostride]) = tmp651 + tmp654;
+	       }
+	       {
+		    fftw_real tmp1135;
+		    fftw_real tmp1142;
+		    fftw_real tmp1143;
+		    fftw_real tmp1144;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1135 = tmp652 + tmp653;
+		    tmp1142 = tmp1136 + tmp1141;
+		    c_im(inout[3 * iostride]) = tmp1135 + tmp1142;
+		    c_im(inout[35 * iostride]) = tmp1142 - tmp1135;
+		    tmp1143 = tmp649 - tmp646;
+		    tmp1144 = tmp1141 - tmp1136;
+		    c_im(inout[19 * iostride]) = tmp1143 + tmp1144;
+		    c_im(inout[51 * iostride]) = tmp1144 - tmp1143;
+	       }
+	  }
+	  {
+	       fftw_real tmp807;
+	       fftw_real tmp891;
+	       fftw_real tmp830;
+	       fftw_real tmp1090;
+	       fftw_real tmp1093;
+	       fftw_real tmp1099;
+	       fftw_real tmp894;
+	       fftw_real tmp1098;
+	       fftw_real tmp885;
+	       fftw_real tmp889;
+	       fftw_real tmp901;
+	       fftw_real tmp905;
+	       fftw_real tmp858;
+	       fftw_real tmp888;
+	       fftw_real tmp898;
+	       fftw_real tmp904;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp795;
+		    fftw_real tmp806;
+		    fftw_real tmp892;
+		    fftw_real tmp893;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp795 = tmp791 - tmp794;
+		    tmp806 = K707106781 * (tmp800 - tmp805);
+		    tmp807 = tmp795 - tmp806;
+		    tmp891 = tmp795 + tmp806;
+		    {
+			 fftw_real tmp818;
+			 fftw_real tmp829;
+			 fftw_real tmp1091;
+			 fftw_real tmp1092;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp818 =
+			     (K382683432 * tmp812) - (K923879532 * tmp817);
+			 tmp829 =
+			     (K382683432 * tmp823) + (K923879532 * tmp828);
+			 tmp830 = tmp818 - tmp829;
+			 tmp1090 = tmp818 + tmp829;
+			 tmp1091 = K707106781 * (tmp909 - tmp908);
+			 tmp1092 = tmp1077 - tmp1076;
+			 tmp1093 = tmp1091 + tmp1092;
+			 tmp1099 = tmp1092 - tmp1091;
+		    }
+		    tmp892 = (K923879532 * tmp812) + (K382683432 * tmp817);
+		    tmp893 = (K382683432 * tmp828) - (K923879532 * tmp823);
+		    tmp894 = tmp892 + tmp893;
+		    tmp1098 = tmp893 - tmp892;
+		    {
+			 fftw_real tmp875;
+			 fftw_real tmp899;
+			 fftw_real tmp884;
+			 fftw_real tmp900;
+			 fftw_real tmp874;
+			 fftw_real tmp883;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp874 = K707106781 * (tmp868 - tmp873);
+			 tmp875 = tmp863 - tmp874;
+			 tmp899 = tmp863 + tmp874;
+			 tmp883 = K707106781 * (tmp881 - tmp882);
+			 tmp884 = tmp880 - tmp883;
+			 tmp900 = tmp880 + tmp883;
+			 tmp885 =
+			     (K195090322 * tmp875) - (K980785280 * tmp884);
+			 tmp889 =
+			     (K195090322 * tmp884) + (K980785280 * tmp875);
+			 tmp901 =
+			     (K831469612 * tmp899) - (K555570233 * tmp900);
+			 tmp905 =
+			     (K831469612 * tmp900) + (K555570233 * tmp899);
+		    }
+		    {
+			 fftw_real tmp848;
+			 fftw_real tmp896;
+			 fftw_real tmp857;
+			 fftw_real tmp897;
+			 fftw_real tmp847;
+			 fftw_real tmp856;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp847 = K707106781 * (tmp841 - tmp846);
+			 tmp848 = tmp836 - tmp847;
+			 tmp896 = tmp836 + tmp847;
+			 tmp856 = K707106781 * (tmp854 - tmp855);
+			 tmp857 = tmp853 - tmp856;
+			 tmp897 = tmp853 + tmp856;
+			 tmp858 =
+			     (K980785280 * tmp848) + (K195090322 * tmp857);
+			 tmp888 =
+			     (K195090322 * tmp848) - (K980785280 * tmp857);
+			 tmp898 =
+			     (K555570233 * tmp896) + (K831469612 * tmp897);
+			 tmp904 =
+			     (K831469612 * tmp896) - (K555570233 * tmp897);
+		    }
+	       }
+	       {
+		    fftw_real tmp831;
+		    fftw_real tmp886;
+		    fftw_real tmp887;
+		    fftw_real tmp890;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp831 = tmp807 + tmp830;
+		    tmp886 = tmp858 + tmp885;
+		    c_re(inout[46 * iostride]) = tmp831 - tmp886;
+		    c_re(inout[14 * iostride]) = tmp831 + tmp886;
+		    tmp887 = tmp807 - tmp830;
+		    tmp890 = tmp888 - tmp889;
+		    c_re(inout[62 * iostride]) = tmp887 - tmp890;
+		    c_re(inout[30 * iostride]) = tmp887 + tmp890;
+	       }
+	       {
+		    fftw_real tmp1097;
+		    fftw_real tmp1100;
+		    fftw_real tmp1101;
+		    fftw_real tmp1102;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1097 = tmp888 + tmp889;
+		    tmp1100 = tmp1098 + tmp1099;
+		    c_im(inout[14 * iostride]) = tmp1097 + tmp1100;
+		    c_im(inout[46 * iostride]) = tmp1100 - tmp1097;
+		    tmp1101 = tmp885 - tmp858;
+		    tmp1102 = tmp1099 - tmp1098;
+		    c_im(inout[30 * iostride]) = tmp1101 + tmp1102;
+		    c_im(inout[62 * iostride]) = tmp1102 - tmp1101;
+	       }
+	       {
+		    fftw_real tmp895;
+		    fftw_real tmp902;
+		    fftw_real tmp903;
+		    fftw_real tmp906;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp895 = tmp891 + tmp894;
+		    tmp902 = tmp898 + tmp901;
+		    c_re(inout[38 * iostride]) = tmp895 - tmp902;
+		    c_re(inout[6 * iostride]) = tmp895 + tmp902;
+		    tmp903 = tmp891 - tmp894;
+		    tmp906 = tmp904 - tmp905;
+		    c_re(inout[54 * iostride]) = tmp903 - tmp906;
+		    c_re(inout[22 * iostride]) = tmp903 + tmp906;
+	       }
+	       {
+		    fftw_real tmp1089;
+		    fftw_real tmp1094;
+		    fftw_real tmp1095;
+		    fftw_real tmp1096;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1089 = tmp904 + tmp905;
+		    tmp1094 = tmp1090 + tmp1093;
+		    c_im(inout[6 * iostride]) = tmp1089 + tmp1094;
+		    c_im(inout[38 * iostride]) = tmp1094 - tmp1089;
+		    tmp1095 = tmp901 - tmp898;
+		    tmp1096 = tmp1093 - tmp1090;
+		    c_im(inout[22 * iostride]) = tmp1095 + tmp1096;
+		    c_im(inout[54 * iostride]) = tmp1096 - tmp1095;
+	       }
+	  }
+	  {
+	       fftw_real tmp911;
+	       fftw_real tmp939;
+	       fftw_real tmp918;
+	       fftw_real tmp1074;
+	       fftw_real tmp1079;
+	       fftw_real tmp1085;
+	       fftw_real tmp942;
+	       fftw_real tmp1084;
+	       fftw_real tmp933;
+	       fftw_real tmp937;
+	       fftw_real tmp949;
+	       fftw_real tmp953;
+	       fftw_real tmp926;
+	       fftw_real tmp936;
+	       fftw_real tmp946;
+	       fftw_real tmp952;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp907;
+		    fftw_real tmp910;
+		    fftw_real tmp940;
+		    fftw_real tmp941;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp907 = tmp791 + tmp794;
+		    tmp910 = K707106781 * (tmp908 + tmp909);
+		    tmp911 = tmp907 - tmp910;
+		    tmp939 = tmp907 + tmp910;
+		    {
+			 fftw_real tmp914;
+			 fftw_real tmp917;
+			 fftw_real tmp1075;
+			 fftw_real tmp1078;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp914 =
+			     (K923879532 * tmp912) - (K382683432 * tmp913);
+			 tmp917 =
+			     (K923879532 * tmp915) + (K382683432 * tmp916);
+			 tmp918 = tmp914 - tmp917;
+			 tmp1074 = tmp914 + tmp917;
+			 tmp1075 = K707106781 * (tmp800 + tmp805);
+			 tmp1078 = tmp1076 + tmp1077;
+			 tmp1079 = tmp1075 + tmp1078;
+			 tmp1085 = tmp1078 - tmp1075;
+		    }
+		    tmp940 = (K382683432 * tmp912) + (K923879532 * tmp913);
+		    tmp941 = (K923879532 * tmp916) - (K382683432 * tmp915);
+		    tmp942 = tmp940 + tmp941;
+		    tmp1084 = tmp941 - tmp940;
+		    {
+			 fftw_real tmp929;
+			 fftw_real tmp947;
+			 fftw_real tmp932;
+			 fftw_real tmp948;
+			 fftw_real tmp928;
+			 fftw_real tmp931;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp928 = K707106781 * (tmp882 + tmp881);
+			 tmp929 = tmp927 - tmp928;
+			 tmp947 = tmp927 + tmp928;
+			 tmp931 = K707106781 * (tmp868 + tmp873);
+			 tmp932 = tmp930 - tmp931;
+			 tmp948 = tmp930 + tmp931;
+			 tmp933 =
+			     (K555570233 * tmp929) - (K831469612 * tmp932);
+			 tmp937 =
+			     (K831469612 * tmp929) + (K555570233 * tmp932);
+			 tmp949 =
+			     (K980785280 * tmp947) - (K195090322 * tmp948);
+			 tmp953 =
+			     (K195090322 * tmp947) + (K980785280 * tmp948);
+		    }
+		    {
+			 fftw_real tmp922;
+			 fftw_real tmp944;
+			 fftw_real tmp925;
+			 fftw_real tmp945;
+			 fftw_real tmp921;
+			 fftw_real tmp924;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp921 = K707106781 * (tmp846 + tmp841);
+			 tmp922 = tmp920 - tmp921;
+			 tmp944 = tmp920 + tmp921;
+			 tmp924 = K707106781 * (tmp854 + tmp855);
+			 tmp925 = tmp923 - tmp924;
+			 tmp945 = tmp923 + tmp924;
+			 tmp926 =
+			     (K555570233 * tmp922) + (K831469612 * tmp925);
+			 tmp936 =
+			     (K555570233 * tmp925) - (K831469612 * tmp922);
+			 tmp946 =
+			     (K980785280 * tmp944) + (K195090322 * tmp945);
+			 tmp952 =
+			     (K980785280 * tmp945) - (K195090322 * tmp944);
+		    }
+	       }
+	       {
+		    fftw_real tmp919;
+		    fftw_real tmp934;
+		    fftw_real tmp935;
+		    fftw_real tmp938;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp919 = tmp911 + tmp918;
+		    tmp934 = tmp926 + tmp933;
+		    c_re(inout[42 * iostride]) = tmp919 - tmp934;
+		    c_re(inout[10 * iostride]) = tmp919 + tmp934;
+		    tmp935 = tmp911 - tmp918;
+		    tmp938 = tmp936 - tmp937;
+		    c_re(inout[58 * iostride]) = tmp935 - tmp938;
+		    c_re(inout[26 * iostride]) = tmp935 + tmp938;
+	       }
+	       {
+		    fftw_real tmp1083;
+		    fftw_real tmp1086;
+		    fftw_real tmp1087;
+		    fftw_real tmp1088;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1083 = tmp936 + tmp937;
+		    tmp1086 = tmp1084 + tmp1085;
+		    c_im(inout[10 * iostride]) = tmp1083 + tmp1086;
+		    c_im(inout[42 * iostride]) = tmp1086 - tmp1083;
+		    tmp1087 = tmp933 - tmp926;
+		    tmp1088 = tmp1085 - tmp1084;
+		    c_im(inout[26 * iostride]) = tmp1087 + tmp1088;
+		    c_im(inout[58 * iostride]) = tmp1088 - tmp1087;
+	       }
+	       {
+		    fftw_real tmp943;
+		    fftw_real tmp950;
+		    fftw_real tmp951;
+		    fftw_real tmp954;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp943 = tmp939 + tmp942;
+		    tmp950 = tmp946 + tmp949;
+		    c_re(inout[34 * iostride]) = tmp943 - tmp950;
+		    c_re(inout[2 * iostride]) = tmp943 + tmp950;
+		    tmp951 = tmp939 - tmp942;
+		    tmp954 = tmp952 - tmp953;
+		    c_re(inout[50 * iostride]) = tmp951 - tmp954;
+		    c_re(inout[18 * iostride]) = tmp951 + tmp954;
+	       }
+	       {
+		    fftw_real tmp1073;
+		    fftw_real tmp1080;
+		    fftw_real tmp1081;
+		    fftw_real tmp1082;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1073 = tmp952 + tmp953;
+		    tmp1080 = tmp1074 + tmp1079;
+		    c_im(inout[2 * iostride]) = tmp1073 + tmp1080;
+		    c_im(inout[34 * iostride]) = tmp1080 - tmp1073;
+		    tmp1081 = tmp949 - tmp946;
+		    tmp1082 = tmp1079 - tmp1074;
+		    c_im(inout[18 * iostride]) = tmp1081 + tmp1082;
+		    c_im(inout[50 * iostride]) = tmp1082 - tmp1081;
+	       }
+	  }
+	  {
+	       fftw_real tmp667;
+	       fftw_real tmp727;
+	       fftw_real tmp1125;
+	       fftw_real tmp1131;
+	       fftw_real tmp682;
+	       fftw_real tmp1122;
+	       fftw_real tmp730;
+	       fftw_real tmp1130;
+	       fftw_real tmp702;
+	       fftw_real tmp724;
+	       fftw_real tmp734;
+	       fftw_real tmp740;
+	       fftw_real tmp721;
+	       fftw_real tmp725;
+	       fftw_real tmp737;
+	       fftw_real tmp741;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp659;
+		    fftw_real tmp666;
+		    fftw_real tmp1123;
+		    fftw_real tmp1124;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp659 = tmp655 - tmp658;
+		    tmp666 = tmp662 - tmp665;
+		    tmp667 = tmp659 - tmp666;
+		    tmp727 = tmp659 + tmp666;
+		    tmp1123 = tmp745 - tmp744;
+		    tmp1124 = tmp1109 - tmp1106;
+		    tmp1125 = tmp1123 + tmp1124;
+		    tmp1131 = tmp1124 - tmp1123;
+	       }
+	       {
+		    fftw_real tmp674;
+		    fftw_real tmp728;
+		    fftw_real tmp681;
+		    fftw_real tmp729;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp670;
+			 fftw_real tmp673;
+			 fftw_real tmp677;
+			 fftw_real tmp680;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp670 = tmp668 - tmp669;
+			 tmp673 = tmp671 - tmp672;
+			 tmp674 =
+			     (K555570233 * tmp670) - (K831469612 * tmp673);
+			 tmp728 =
+			     (K555570233 * tmp673) + (K831469612 * tmp670);
+			 tmp677 = tmp675 - tmp676;
+			 tmp680 = tmp678 - tmp679;
+			 tmp681 =
+			     (K831469612 * tmp677) + (K555570233 * tmp680);
+			 tmp729 =
+			     (K555570233 * tmp677) - (K831469612 * tmp680);
+		    }
+		    tmp682 = tmp674 - tmp681;
+		    tmp1122 = tmp674 + tmp681;
+		    tmp730 = tmp728 + tmp729;
+		    tmp1130 = tmp729 - tmp728;
+	       }
+	       {
+		    fftw_real tmp694;
+		    fftw_real tmp732;
+		    fftw_real tmp701;
+		    fftw_real tmp733;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp686;
+			 fftw_real tmp693;
+			 fftw_real tmp697;
+			 fftw_real tmp700;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp686 = tmp684 - tmp685;
+			 tmp693 = tmp689 - tmp692;
+			 tmp694 = tmp686 - tmp693;
+			 tmp732 = tmp686 + tmp693;
+			 tmp697 = tmp695 - tmp696;
+			 tmp700 = tmp698 - tmp699;
+			 tmp701 = tmp697 - tmp700;
+			 tmp733 = tmp697 + tmp700;
+		    }
+		    tmp702 = (K956940335 * tmp694) + (K290284677 * tmp701);
+		    tmp724 = (K290284677 * tmp694) - (K956940335 * tmp701);
+		    tmp734 = (K471396736 * tmp732) + (K881921264 * tmp733);
+		    tmp740 = (K881921264 * tmp732) - (K471396736 * tmp733);
+	       }
+	       {
+		    fftw_real tmp713;
+		    fftw_real tmp735;
+		    fftw_real tmp720;
+		    fftw_real tmp736;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp705;
+			 fftw_real tmp712;
+			 fftw_real tmp716;
+			 fftw_real tmp719;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp705 = tmp703 - tmp704;
+			 tmp712 = tmp708 - tmp711;
+			 tmp713 = tmp705 - tmp712;
+			 tmp735 = tmp705 + tmp712;
+			 tmp716 = tmp714 - tmp715;
+			 tmp719 = tmp717 - tmp718;
+			 tmp720 = tmp716 - tmp719;
+			 tmp736 = tmp716 + tmp719;
+		    }
+		    tmp721 = (K290284677 * tmp713) - (K956940335 * tmp720);
+		    tmp725 = (K290284677 * tmp720) + (K956940335 * tmp713);
+		    tmp737 = (K881921264 * tmp735) - (K471396736 * tmp736);
+		    tmp741 = (K881921264 * tmp736) + (K471396736 * tmp735);
+	       }
+	       {
+		    fftw_real tmp683;
+		    fftw_real tmp722;
+		    fftw_real tmp723;
+		    fftw_real tmp726;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp683 = tmp667 + tmp682;
+		    tmp722 = tmp702 + tmp721;
+		    c_re(inout[45 * iostride]) = tmp683 - tmp722;
+		    c_re(inout[13 * iostride]) = tmp683 + tmp722;
+		    tmp723 = tmp667 - tmp682;
+		    tmp726 = tmp724 - tmp725;
+		    c_re(inout[61 * iostride]) = tmp723 - tmp726;
+		    c_re(inout[29 * iostride]) = tmp723 + tmp726;
+	       }
+	       {
+		    fftw_real tmp1129;
+		    fftw_real tmp1132;
+		    fftw_real tmp1133;
+		    fftw_real tmp1134;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1129 = tmp724 + tmp725;
+		    tmp1132 = tmp1130 + tmp1131;
+		    c_im(inout[13 * iostride]) = tmp1129 + tmp1132;
+		    c_im(inout[45 * iostride]) = tmp1132 - tmp1129;
+		    tmp1133 = tmp721 - tmp702;
+		    tmp1134 = tmp1131 - tmp1130;
+		    c_im(inout[29 * iostride]) = tmp1133 + tmp1134;
+		    c_im(inout[61 * iostride]) = tmp1134 - tmp1133;
+	       }
+	       {
+		    fftw_real tmp731;
+		    fftw_real tmp738;
+		    fftw_real tmp739;
+		    fftw_real tmp742;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp731 = tmp727 + tmp730;
+		    tmp738 = tmp734 + tmp737;
+		    c_re(inout[37 * iostride]) = tmp731 - tmp738;
+		    c_re(inout[5 * iostride]) = tmp731 + tmp738;
+		    tmp739 = tmp727 - tmp730;
+		    tmp742 = tmp740 - tmp741;
+		    c_re(inout[53 * iostride]) = tmp739 - tmp742;
+		    c_re(inout[21 * iostride]) = tmp739 + tmp742;
+	       }
+	       {
+		    fftw_real tmp1121;
+		    fftw_real tmp1126;
+		    fftw_real tmp1127;
+		    fftw_real tmp1128;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1121 = tmp740 + tmp741;
+		    tmp1126 = tmp1122 + tmp1125;
+		    c_im(inout[5 * iostride]) = tmp1121 + tmp1126;
+		    c_im(inout[37 * iostride]) = tmp1126 - tmp1121;
+		    tmp1127 = tmp737 - tmp734;
+		    tmp1128 = tmp1125 - tmp1122;
+		    c_im(inout[21 * iostride]) = tmp1127 + tmp1128;
+		    c_im(inout[53 * iostride]) = tmp1128 - tmp1127;
+	       }
+	  }
+	  {
+	       fftw_real tmp747;
+	       fftw_real tmp775;
+	       fftw_real tmp1111;
+	       fftw_real tmp1117;
+	       fftw_real tmp754;
+	       fftw_real tmp1104;
+	       fftw_real tmp778;
+	       fftw_real tmp1116;
+	       fftw_real tmp762;
+	       fftw_real tmp772;
+	       fftw_real tmp782;
+	       fftw_real tmp788;
+	       fftw_real tmp769;
+	       fftw_real tmp773;
+	       fftw_real tmp785;
+	       fftw_real tmp789;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp743;
+		    fftw_real tmp746;
+		    fftw_real tmp1105;
+		    fftw_real tmp1110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp743 = tmp655 + tmp658;
+		    tmp746 = tmp744 + tmp745;
+		    tmp747 = tmp743 - tmp746;
+		    tmp775 = tmp743 + tmp746;
+		    tmp1105 = tmp662 + tmp665;
+		    tmp1110 = tmp1106 + tmp1109;
+		    tmp1111 = tmp1105 + tmp1110;
+		    tmp1117 = tmp1110 - tmp1105;
+	       }
+	       {
+		    fftw_real tmp750;
+		    fftw_real tmp776;
+		    fftw_real tmp753;
+		    fftw_real tmp777;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp748;
+			 fftw_real tmp749;
+			 fftw_real tmp751;
+			 fftw_real tmp752;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp748 = tmp668 + tmp669;
+			 tmp749 = tmp671 + tmp672;
+			 tmp750 =
+			     (K980785280 * tmp748) - (K195090322 * tmp749);
+			 tmp776 =
+			     (K980785280 * tmp749) + (K195090322 * tmp748);
+			 tmp751 = tmp675 + tmp676;
+			 tmp752 = tmp678 + tmp679;
+			 tmp753 =
+			     (K195090322 * tmp751) + (K980785280 * tmp752);
+			 tmp777 =
+			     (K980785280 * tmp751) - (K195090322 * tmp752);
+		    }
+		    tmp754 = tmp750 - tmp753;
+		    tmp1104 = tmp750 + tmp753;
+		    tmp778 = tmp776 + tmp777;
+		    tmp1116 = tmp777 - tmp776;
+	       }
+	       {
+		    fftw_real tmp758;
+		    fftw_real tmp780;
+		    fftw_real tmp761;
+		    fftw_real tmp781;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp756;
+			 fftw_real tmp757;
+			 fftw_real tmp759;
+			 fftw_real tmp760;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp756 = tmp695 + tmp696;
+			 tmp757 = tmp692 + tmp689;
+			 tmp758 = tmp756 - tmp757;
+			 tmp780 = tmp756 + tmp757;
+			 tmp759 = tmp684 + tmp685;
+			 tmp760 = tmp698 + tmp699;
+			 tmp761 = tmp759 - tmp760;
+			 tmp781 = tmp759 + tmp760;
+		    }
+		    tmp762 = (K634393284 * tmp758) + (K773010453 * tmp761);
+		    tmp772 = (K634393284 * tmp761) - (K773010453 * tmp758);
+		    tmp782 = (K995184726 * tmp780) + (K098017140 * tmp781);
+		    tmp788 = (K995184726 * tmp781) - (K098017140 * tmp780);
+	       }
+	       {
+		    fftw_real tmp765;
+		    fftw_real tmp783;
+		    fftw_real tmp768;
+		    fftw_real tmp784;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp763;
+			 fftw_real tmp764;
+			 fftw_real tmp766;
+			 fftw_real tmp767;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp763 = tmp703 + tmp704;
+			 tmp764 = tmp718 + tmp717;
+			 tmp765 = tmp763 - tmp764;
+			 tmp783 = tmp763 + tmp764;
+			 tmp766 = tmp714 + tmp715;
+			 tmp767 = tmp708 + tmp711;
+			 tmp768 = tmp766 - tmp767;
+			 tmp784 = tmp766 + tmp767;
+		    }
+		    tmp769 = (K634393284 * tmp765) - (K773010453 * tmp768);
+		    tmp773 = (K773010453 * tmp765) + (K634393284 * tmp768);
+		    tmp785 = (K995184726 * tmp783) - (K098017140 * tmp784);
+		    tmp789 = (K098017140 * tmp783) + (K995184726 * tmp784);
+	       }
+	       {
+		    fftw_real tmp755;
+		    fftw_real tmp770;
+		    fftw_real tmp771;
+		    fftw_real tmp774;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp755 = tmp747 + tmp754;
+		    tmp770 = tmp762 + tmp769;
+		    c_re(inout[41 * iostride]) = tmp755 - tmp770;
+		    c_re(inout[9 * iostride]) = tmp755 + tmp770;
+		    tmp771 = tmp747 - tmp754;
+		    tmp774 = tmp772 - tmp773;
+		    c_re(inout[57 * iostride]) = tmp771 - tmp774;
+		    c_re(inout[25 * iostride]) = tmp771 + tmp774;
+	       }
+	       {
+		    fftw_real tmp1115;
+		    fftw_real tmp1118;
+		    fftw_real tmp1119;
+		    fftw_real tmp1120;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1115 = tmp772 + tmp773;
+		    tmp1118 = tmp1116 + tmp1117;
+		    c_im(inout[9 * iostride]) = tmp1115 + tmp1118;
+		    c_im(inout[41 * iostride]) = tmp1118 - tmp1115;
+		    tmp1119 = tmp769 - tmp762;
+		    tmp1120 = tmp1117 - tmp1116;
+		    c_im(inout[25 * iostride]) = tmp1119 + tmp1120;
+		    c_im(inout[57 * iostride]) = tmp1120 - tmp1119;
+	       }
+	       {
+		    fftw_real tmp779;
+		    fftw_real tmp786;
+		    fftw_real tmp787;
+		    fftw_real tmp790;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp779 = tmp775 + tmp778;
+		    tmp786 = tmp782 + tmp785;
+		    c_re(inout[33 * iostride]) = tmp779 - tmp786;
+		    c_re(inout[iostride]) = tmp779 + tmp786;
+		    tmp787 = tmp775 - tmp778;
+		    tmp790 = tmp788 - tmp789;
+		    c_re(inout[49 * iostride]) = tmp787 - tmp790;
+		    c_re(inout[17 * iostride]) = tmp787 + tmp790;
+	       }
+	       {
+		    fftw_real tmp1103;
+		    fftw_real tmp1112;
+		    fftw_real tmp1113;
+		    fftw_real tmp1114;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1103 = tmp788 + tmp789;
+		    tmp1112 = tmp1104 + tmp1111;
+		    c_im(inout[iostride]) = tmp1103 + tmp1112;
+		    c_im(inout[33 * iostride]) = tmp1112 - tmp1103;
+		    tmp1113 = tmp785 - tmp782;
+		    tmp1114 = tmp1111 - tmp1104;
+		    c_im(inout[17 * iostride]) = tmp1113 + tmp1114;
+		    c_im(inout[49 * iostride]) = tmp1114 - tmp1113;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] =
+    { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
+fftw_codelet_desc fftw_twiddle_64_desc = {
+     "fftw_twiddle_64",
+     (void (*)()) fftw_twiddle_64,
+     64,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     1408,
+     63,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_7.c b/src/fftw/ftw_7.c
new file mode 100644
index 0000000..390987d
--- /dev/null
+++ b/src/fftw/ftw_7.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:33 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 7 */
+
+/*
+ * This function contains 72 FP additions, 60 FP multiplications,
+ * (or, 60 additions, 48 multiplications, 12 fused multiply/add),
+ * 24 stack variables, and 28 memory accesses
+ */
+static const fftw_real K222520933 =
+FFTW_KONST(+0.222520933956314404288902564496794759466355569);
+static const fftw_real K900968867 =
+FFTW_KONST(+0.900968867902419126236102319507445051165919162);
+static const fftw_real K623489801 =
+FFTW_KONST(+0.623489801858733530525004884004239810632274731);
+static const fftw_real K433883739 =
+FFTW_KONST(+0.433883739117558120475768332848358754609990728);
+static const fftw_real K974927912 =
+FFTW_KONST(+0.974927912181823607018131682993931217232785801);
+static const fftw_real K781831482 =
+FFTW_KONST(+0.781831482468029808708444526674057750232334519);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_7(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 6) {
+	  fftw_real tmp1;
+	  fftw_real tmp53;
+	  fftw_real tmp12;
+	  fftw_real tmp54;
+	  fftw_real tmp38;
+	  fftw_real tmp50;
+	  fftw_real tmp23;
+	  fftw_real tmp55;
+	  fftw_real tmp44;
+	  fftw_real tmp51;
+	  fftw_real tmp34;
+	  fftw_real tmp56;
+	  fftw_real tmp41;
+	  fftw_real tmp52;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp53 = c_im(inout[0]);
+	  {
+	       fftw_real tmp6;
+	       fftw_real tmp36;
+	       fftw_real tmp11;
+	       fftw_real tmp37;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[iostride]);
+		    tmp5 = c_im(inout[iostride]);
+		    tmp2 = c_re(W[0]);
+		    tmp4 = c_im(W[0]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp36 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       {
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    fftw_real tmp7;
+		    fftw_real tmp9;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp8 = c_re(inout[6 * iostride]);
+		    tmp10 = c_im(inout[6 * iostride]);
+		    tmp7 = c_re(W[5]);
+		    tmp9 = c_im(W[5]);
+		    tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10);
+		    tmp37 = (tmp9 * tmp8) + (tmp7 * tmp10);
+	       }
+	       tmp12 = tmp6 + tmp11;
+	       tmp54 = tmp11 - tmp6;
+	       tmp38 = tmp36 - tmp37;
+	       tmp50 = tmp36 + tmp37;
+	  }
+	  {
+	       fftw_real tmp17;
+	       fftw_real tmp42;
+	       fftw_real tmp22;
+	       fftw_real tmp43;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[2 * iostride]);
+		    tmp16 = c_im(inout[2 * iostride]);
+		    tmp13 = c_re(W[1]);
+		    tmp15 = c_im(W[1]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp42 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       {
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    fftw_real tmp18;
+		    fftw_real tmp20;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp19 = c_re(inout[5 * iostride]);
+		    tmp21 = c_im(inout[5 * iostride]);
+		    tmp18 = c_re(W[4]);
+		    tmp20 = c_im(W[4]);
+		    tmp22 = (tmp18 * tmp19) - (tmp20 * tmp21);
+		    tmp43 = (tmp20 * tmp19) + (tmp18 * tmp21);
+	       }
+	       tmp23 = tmp17 + tmp22;
+	       tmp55 = tmp22 - tmp17;
+	       tmp44 = tmp42 - tmp43;
+	       tmp51 = tmp42 + tmp43;
+	  }
+	  {
+	       fftw_real tmp28;
+	       fftw_real tmp39;
+	       fftw_real tmp33;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[3 * iostride]);
+		    tmp27 = c_im(inout[3 * iostride]);
+		    tmp24 = c_re(W[2]);
+		    tmp26 = c_im(W[2]);
+		    tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27);
+		    tmp39 = (tmp26 * tmp25) + (tmp24 * tmp27);
+	       }
+	       {
+		    fftw_real tmp30;
+		    fftw_real tmp32;
+		    fftw_real tmp29;
+		    fftw_real tmp31;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = c_re(inout[4 * iostride]);
+		    tmp32 = c_im(inout[4 * iostride]);
+		    tmp29 = c_re(W[3]);
+		    tmp31 = c_im(W[3]);
+		    tmp33 = (tmp29 * tmp30) - (tmp31 * tmp32);
+		    tmp40 = (tmp31 * tmp30) + (tmp29 * tmp32);
+	       }
+	       tmp34 = tmp28 + tmp33;
+	       tmp56 = tmp33 - tmp28;
+	       tmp41 = tmp39 - tmp40;
+	       tmp52 = tmp39 + tmp40;
+	  }
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp46;
+	       fftw_real tmp59;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       c_re(inout[0]) = tmp1 + tmp12 + tmp23 + tmp34;
+	       tmp47 =
+		   (K781831482 * tmp38) + (K974927912 * tmp44) +
+		   (K433883739 * tmp41);
+	       tmp46 =
+		   tmp1 + (K623489801 * tmp12) - (K900968867 * tmp34) -
+		   (K222520933 * tmp23);
+	       c_re(inout[6 * iostride]) = tmp46 - tmp47;
+	       c_re(inout[iostride]) = tmp46 + tmp47;
+	       {
+		    fftw_real tmp49;
+		    fftw_real tmp48;
+		    fftw_real tmp45;
+		    fftw_real tmp35;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp49 =
+			(K433883739 * tmp38) + (K974927912 * tmp41) -
+			(K781831482 * tmp44);
+		    tmp48 =
+			tmp1 + (K623489801 * tmp23) -
+			(K222520933 * tmp34) - (K900968867 * tmp12);
+		    c_re(inout[4 * iostride]) = tmp48 - tmp49;
+		    c_re(inout[3 * iostride]) = tmp48 + tmp49;
+		    tmp45 =
+			(K974927912 * tmp38) - (K781831482 * tmp41) -
+			(K433883739 * tmp44);
+		    tmp35 =
+			tmp1 + (K623489801 * tmp34) -
+			(K900968867 * tmp23) - (K222520933 * tmp12);
+		    c_re(inout[5 * iostride]) = tmp35 - tmp45;
+		    c_re(inout[2 * iostride]) = tmp35 + tmp45;
+	       }
+	       c_im(inout[0]) = tmp50 + tmp51 + tmp52 + tmp53;
+	       tmp59 =
+		   (K974927912 * tmp54) - (K781831482 * tmp56) -
+		   (K433883739 * tmp55);
+	       tmp60 =
+		   (K623489801 * tmp52) + tmp53 - (K900968867 * tmp51) -
+		   (K222520933 * tmp50);
+	       c_im(inout[2 * iostride]) = tmp59 + tmp60;
+	       c_im(inout[5 * iostride]) = tmp60 - tmp59;
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp62;
+		    fftw_real tmp57;
+		    fftw_real tmp58;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 =
+			(K433883739 * tmp54) + (K974927912 * tmp56) -
+			(K781831482 * tmp55);
+		    tmp62 =
+			(K623489801 * tmp51) + tmp53 -
+			(K222520933 * tmp52) - (K900968867 * tmp50);
+		    c_im(inout[3 * iostride]) = tmp61 + tmp62;
+		    c_im(inout[4 * iostride]) = tmp62 - tmp61;
+		    tmp57 =
+			(K781831482 * tmp54) + (K974927912 * tmp55) +
+			(K433883739 * tmp56);
+		    tmp58 =
+			(K623489801 * tmp50) + tmp53 -
+			(K900968867 * tmp52) - (K222520933 * tmp51);
+		    c_im(inout[iostride]) = tmp57 + tmp58;
+		    c_im(inout[6 * iostride]) = tmp58 - tmp57;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6 };
+fftw_codelet_desc fftw_twiddle_7_desc = {
+     "fftw_twiddle_7",
+     (void (*)()) fftw_twiddle_7,
+     7,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     154,
+     6,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_8.c b/src/fftw/ftw_8.c
new file mode 100644
index 0000000..a155bdd
--- /dev/null
+++ b/src/fftw/ftw_8.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:38 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 8 */
+
+/*
+ * This function contains 66 FP additions, 32 FP multiplications,
+ * (or, 52 additions, 18 multiplications, 14 fused multiply/add),
+ * 28 stack variables, and 32 memory accesses
+ */
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_8(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 7) {
+	  fftw_real tmp7;
+	  fftw_real tmp43;
+	  fftw_real tmp71;
+	  fftw_real tmp76;
+	  fftw_real tmp41;
+	  fftw_real tmp53;
+	  fftw_real tmp56;
+	  fftw_real tmp65;
+	  fftw_real tmp18;
+	  fftw_real tmp77;
+	  fftw_real tmp46;
+	  fftw_real tmp68;
+	  fftw_real tmp30;
+	  fftw_real tmp48;
+	  fftw_real tmp51;
+	  fftw_real tmp64;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp70;
+	       fftw_real tmp6;
+	       fftw_real tmp69;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp70 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[4 * iostride]);
+		    tmp5 = c_im(inout[4 * iostride]);
+		    tmp2 = c_re(W[3]);
+		    tmp4 = c_im(W[3]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp69 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       tmp7 = tmp1 + tmp6;
+	       tmp43 = tmp1 - tmp6;
+	       tmp71 = tmp69 + tmp70;
+	       tmp76 = tmp70 - tmp69;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp54;
+	       fftw_real tmp40;
+	       fftw_real tmp55;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[7 * iostride]);
+		    tmp34 = c_im(inout[7 * iostride]);
+		    tmp31 = c_re(W[6]);
+		    tmp33 = c_im(W[6]);
+		    tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34);
+		    tmp54 = (tmp33 * tmp32) + (tmp31 * tmp34);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[3 * iostride]);
+		    tmp39 = c_im(inout[3 * iostride]);
+		    tmp36 = c_re(W[2]);
+		    tmp38 = c_im(W[2]);
+		    tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39);
+		    tmp55 = (tmp38 * tmp37) + (tmp36 * tmp39);
+	       }
+	       tmp41 = tmp35 + tmp40;
+	       tmp53 = tmp35 - tmp40;
+	       tmp56 = tmp54 - tmp55;
+	       tmp65 = tmp54 + tmp55;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp44;
+	       fftw_real tmp17;
+	       fftw_real tmp45;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[2 * iostride]);
+		    tmp11 = c_im(inout[2 * iostride]);
+		    tmp8 = c_re(W[1]);
+		    tmp10 = c_im(W[1]);
+		    tmp12 = (tmp8 * tmp9) - (tmp10 * tmp11);
+		    tmp44 = (tmp10 * tmp9) + (tmp8 * tmp11);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[6 * iostride]);
+		    tmp16 = c_im(inout[6 * iostride]);
+		    tmp13 = c_re(W[5]);
+		    tmp15 = c_im(W[5]);
+		    tmp17 = (tmp13 * tmp14) - (tmp15 * tmp16);
+		    tmp45 = (tmp15 * tmp14) + (tmp13 * tmp16);
+	       }
+	       tmp18 = tmp12 + tmp17;
+	       tmp77 = tmp12 - tmp17;
+	       tmp46 = tmp44 - tmp45;
+	       tmp68 = tmp44 + tmp45;
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp49;
+	       fftw_real tmp29;
+	       fftw_real tmp50;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[iostride]);
+		    tmp23 = c_im(inout[iostride]);
+		    tmp20 = c_re(W[0]);
+		    tmp22 = c_im(W[0]);
+		    tmp24 = (tmp20 * tmp21) - (tmp22 * tmp23);
+		    tmp49 = (tmp22 * tmp21) + (tmp20 * tmp23);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[5 * iostride]);
+		    tmp28 = c_im(inout[5 * iostride]);
+		    tmp25 = c_re(W[4]);
+		    tmp27 = c_im(W[4]);
+		    tmp29 = (tmp25 * tmp26) - (tmp27 * tmp28);
+		    tmp50 = (tmp27 * tmp26) + (tmp25 * tmp28);
+	       }
+	       tmp30 = tmp24 + tmp29;
+	       tmp48 = tmp24 - tmp29;
+	       tmp51 = tmp49 - tmp50;
+	       tmp64 = tmp49 + tmp50;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp42;
+	       fftw_real tmp63;
+	       fftw_real tmp66;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = tmp7 + tmp18;
+	       tmp42 = tmp30 + tmp41;
+	       c_re(inout[4 * iostride]) = tmp19 - tmp42;
+	       c_re(inout[0]) = tmp19 + tmp42;
+	       {
+		    fftw_real tmp73;
+		    fftw_real tmp74;
+		    fftw_real tmp67;
+		    fftw_real tmp72;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp73 = tmp41 - tmp30;
+		    tmp74 = tmp71 - tmp68;
+		    c_im(inout[2 * iostride]) = tmp73 + tmp74;
+		    c_im(inout[6 * iostride]) = tmp74 - tmp73;
+		    tmp67 = tmp64 + tmp65;
+		    tmp72 = tmp68 + tmp71;
+		    c_im(inout[0]) = tmp67 + tmp72;
+		    c_im(inout[4 * iostride]) = tmp72 - tmp67;
+	       }
+	       tmp63 = tmp7 - tmp18;
+	       tmp66 = tmp64 - tmp65;
+	       c_re(inout[6 * iostride]) = tmp63 - tmp66;
+	       c_re(inout[2 * iostride]) = tmp63 + tmp66;
+	       {
+		    fftw_real tmp59;
+		    fftw_real tmp78;
+		    fftw_real tmp62;
+		    fftw_real tmp75;
+		    fftw_real tmp60;
+		    fftw_real tmp61;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp59 = tmp43 - tmp46;
+		    tmp78 = tmp76 - tmp77;
+		    tmp60 = tmp51 - tmp48;
+		    tmp61 = tmp53 + tmp56;
+		    tmp62 = K707106781 * (tmp60 - tmp61);
+		    tmp75 = K707106781 * (tmp60 + tmp61);
+		    c_re(inout[7 * iostride]) = tmp59 - tmp62;
+		    c_re(inout[3 * iostride]) = tmp59 + tmp62;
+		    c_im(inout[iostride]) = tmp75 + tmp78;
+		    c_im(inout[5 * iostride]) = tmp78 - tmp75;
+	       }
+	       {
+		    fftw_real tmp47;
+		    fftw_real tmp80;
+		    fftw_real tmp58;
+		    fftw_real tmp79;
+		    fftw_real tmp52;
+		    fftw_real tmp57;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp47 = tmp43 + tmp46;
+		    tmp80 = tmp77 + tmp76;
+		    tmp52 = tmp48 + tmp51;
+		    tmp57 = tmp53 - tmp56;
+		    tmp58 = K707106781 * (tmp52 + tmp57);
+		    tmp79 = K707106781 * (tmp57 - tmp52);
+		    c_re(inout[5 * iostride]) = tmp47 - tmp58;
+		    c_re(inout[iostride]) = tmp47 + tmp58;
+		    c_im(inout[3 * iostride]) = tmp79 + tmp80;
+		    c_im(inout[7 * iostride]) = tmp80 - tmp79;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7 };
+fftw_codelet_desc fftw_twiddle_8_desc = {
+     "fftw_twiddle_8",
+     (void (*)()) fftw_twiddle_8,
+     8,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     176,
+     7,
+     twiddle_order,
+};
diff --git a/src/fftw/ftw_9.c b/src/fftw/ftw_9.c
new file mode 100644
index 0000000..3990a3c
--- /dev/null
+++ b/src/fftw/ftw_9.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:07:39 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddle 9 */
+
+/*
+ * This function contains 96 FP additions, 72 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
+ * 34 stack variables, and 36 memory accesses
+ */
+static const fftw_real K939692620 =
+FFTW_KONST(+0.939692620785908384054109277324731469936208134);
+static const fftw_real K342020143 =
+FFTW_KONST(+0.342020143325668733044099614682259580763083368);
+static const fftw_real K984807753 =
+FFTW_KONST(+0.984807753012208059366743024589523013670643252);
+static const fftw_real K173648177 =
+FFTW_KONST(+0.173648177666930348851716626769314796000375677);
+static const fftw_real K642787609 =
+FFTW_KONST(+0.642787609686539326322643409907263432907559884);
+static const fftw_real K766044443 =
+FFTW_KONST(+0.766044443118978035202392650555416673935832457);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: ftw_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftw_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftw_twiddle_9(fftw_complex *A, const fftw_complex *W, int iostride,
+		    int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 8) {
+	  fftw_real tmp1;
+	  fftw_real tmp99;
+	  fftw_real tmp52;
+	  fftw_real tmp98;
+	  fftw_real tmp105;
+	  fftw_real tmp104;
+	  fftw_real tmp12;
+	  fftw_real tmp49;
+	  fftw_real tmp47;
+	  fftw_real tmp69;
+	  fftw_real tmp86;
+	  fftw_real tmp95;
+	  fftw_real tmp74;
+	  fftw_real tmp85;
+	  fftw_real tmp30;
+	  fftw_real tmp58;
+	  fftw_real tmp82;
+	  fftw_real tmp94;
+	  fftw_real tmp63;
+	  fftw_real tmp83;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp6;
+	       fftw_real tmp50;
+	       fftw_real tmp11;
+	       fftw_real tmp51;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp99 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[3 * iostride]);
+		    tmp5 = c_im(inout[3 * iostride]);
+		    tmp2 = c_re(W[2]);
+		    tmp4 = c_im(W[2]);
+		    tmp6 = (tmp2 * tmp3) - (tmp4 * tmp5);
+		    tmp50 = (tmp4 * tmp3) + (tmp2 * tmp5);
+	       }
+	       {
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    fftw_real tmp7;
+		    fftw_real tmp9;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp8 = c_re(inout[6 * iostride]);
+		    tmp10 = c_im(inout[6 * iostride]);
+		    tmp7 = c_re(W[5]);
+		    tmp9 = c_im(W[5]);
+		    tmp11 = (tmp7 * tmp8) - (tmp9 * tmp10);
+		    tmp51 = (tmp9 * tmp8) + (tmp7 * tmp10);
+	       }
+	       tmp52 = K866025403 * (tmp50 - tmp51);
+	       tmp98 = tmp50 + tmp51;
+	       tmp105 = tmp99 - (K500000000 * tmp98);
+	       tmp104 = K866025403 * (tmp11 - tmp6);
+	       tmp12 = tmp6 + tmp11;
+	       tmp49 = tmp1 - (K500000000 * tmp12);
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp71;
+	       fftw_real tmp40;
+	       fftw_real tmp66;
+	       fftw_real tmp45;
+	       fftw_real tmp67;
+	       fftw_real tmp46;
+	       fftw_real tmp72;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[2 * iostride]);
+		    tmp34 = c_im(inout[2 * iostride]);
+		    tmp31 = c_re(W[1]);
+		    tmp33 = c_im(W[1]);
+		    tmp35 = (tmp31 * tmp32) - (tmp33 * tmp34);
+		    tmp71 = (tmp33 * tmp32) + (tmp31 * tmp34);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[5 * iostride]);
+		    tmp39 = c_im(inout[5 * iostride]);
+		    tmp36 = c_re(W[4]);
+		    tmp38 = c_im(W[4]);
+		    tmp40 = (tmp36 * tmp37) - (tmp38 * tmp39);
+		    tmp66 = (tmp38 * tmp37) + (tmp36 * tmp39);
+	       }
+	       {
+		    fftw_real tmp42;
+		    fftw_real tmp44;
+		    fftw_real tmp41;
+		    fftw_real tmp43;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp42 = c_re(inout[8 * iostride]);
+		    tmp44 = c_im(inout[8 * iostride]);
+		    tmp41 = c_re(W[7]);
+		    tmp43 = c_im(W[7]);
+		    tmp45 = (tmp41 * tmp42) - (tmp43 * tmp44);
+		    tmp67 = (tmp43 * tmp42) + (tmp41 * tmp44);
+	       }
+	       tmp46 = tmp40 + tmp45;
+	       tmp72 = tmp66 + tmp67;
+	       {
+		    fftw_real tmp65;
+		    fftw_real tmp68;
+		    fftw_real tmp70;
+		    fftw_real tmp73;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp47 = tmp35 + tmp46;
+		    tmp65 = tmp35 - (K500000000 * tmp46);
+		    tmp68 = K866025403 * (tmp66 - tmp67);
+		    tmp69 = tmp65 + tmp68;
+		    tmp86 = tmp65 - tmp68;
+		    tmp95 = tmp71 + tmp72;
+		    tmp70 = K866025403 * (tmp45 - tmp40);
+		    tmp73 = tmp71 - (K500000000 * tmp72);
+		    tmp74 = tmp70 + tmp73;
+		    tmp85 = tmp73 - tmp70;
+	       }
+	  }
+	  {
+	       fftw_real tmp18;
+	       fftw_real tmp60;
+	       fftw_real tmp23;
+	       fftw_real tmp55;
+	       fftw_real tmp28;
+	       fftw_real tmp56;
+	       fftw_real tmp29;
+	       fftw_real tmp61;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp15;
+		    fftw_real tmp17;
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp15 = c_re(inout[iostride]);
+		    tmp17 = c_im(inout[iostride]);
+		    tmp14 = c_re(W[0]);
+		    tmp16 = c_im(W[0]);
+		    tmp18 = (tmp14 * tmp15) - (tmp16 * tmp17);
+		    tmp60 = (tmp16 * tmp15) + (tmp14 * tmp17);
+	       }
+	       {
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp20 = c_re(inout[4 * iostride]);
+		    tmp22 = c_im(inout[4 * iostride]);
+		    tmp19 = c_re(W[3]);
+		    tmp21 = c_im(W[3]);
+		    tmp23 = (tmp19 * tmp20) - (tmp21 * tmp22);
+		    tmp55 = (tmp21 * tmp20) + (tmp19 * tmp22);
+	       }
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[7 * iostride]);
+		    tmp27 = c_im(inout[7 * iostride]);
+		    tmp24 = c_re(W[6]);
+		    tmp26 = c_im(W[6]);
+		    tmp28 = (tmp24 * tmp25) - (tmp26 * tmp27);
+		    tmp56 = (tmp26 * tmp25) + (tmp24 * tmp27);
+	       }
+	       tmp29 = tmp23 + tmp28;
+	       tmp61 = tmp55 + tmp56;
+	       {
+		    fftw_real tmp54;
+		    fftw_real tmp57;
+		    fftw_real tmp59;
+		    fftw_real tmp62;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = tmp18 + tmp29;
+		    tmp54 = tmp18 - (K500000000 * tmp29);
+		    tmp57 = K866025403 * (tmp55 - tmp56);
+		    tmp58 = tmp54 + tmp57;
+		    tmp82 = tmp54 - tmp57;
+		    tmp94 = tmp60 + tmp61;
+		    tmp59 = K866025403 * (tmp28 - tmp23);
+		    tmp62 = tmp60 - (K500000000 * tmp61);
+		    tmp63 = tmp59 + tmp62;
+		    tmp83 = tmp62 - tmp59;
+	       }
+	  }
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp13;
+	       fftw_real tmp48;
+	       fftw_real tmp93;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp96 = K866025403 * (tmp94 - tmp95);
+	       tmp13 = tmp1 + tmp12;
+	       tmp48 = tmp30 + tmp47;
+	       tmp93 = tmp13 - (K500000000 * tmp48);
+	       c_re(inout[0]) = tmp13 + tmp48;
+	       c_re(inout[3 * iostride]) = tmp93 + tmp96;
+	       c_re(inout[6 * iostride]) = tmp93 - tmp96;
+	  }
+	  {
+	       fftw_real tmp101;
+	       fftw_real tmp97;
+	       fftw_real tmp100;
+	       fftw_real tmp102;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp101 = K866025403 * (tmp47 - tmp30);
+	       tmp97 = tmp94 + tmp95;
+	       tmp100 = tmp98 + tmp99;
+	       tmp102 = tmp100 - (K500000000 * tmp97);
+	       c_im(inout[0]) = tmp97 + tmp100;
+	       c_im(inout[6 * iostride]) = tmp102 - tmp101;
+	       c_im(inout[3 * iostride]) = tmp101 + tmp102;
+	  }
+	  {
+	       fftw_real tmp53;
+	       fftw_real tmp106;
+	       fftw_real tmp76;
+	       fftw_real tmp107;
+	       fftw_real tmp80;
+	       fftw_real tmp103;
+	       fftw_real tmp77;
+	       fftw_real tmp108;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp53 = tmp49 + tmp52;
+	       tmp106 = tmp104 + tmp105;
+	       {
+		    fftw_real tmp64;
+		    fftw_real tmp75;
+		    fftw_real tmp78;
+		    fftw_real tmp79;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp64 = (K766044443 * tmp58) + (K642787609 * tmp63);
+		    tmp75 = (K173648177 * tmp69) + (K984807753 * tmp74);
+		    tmp76 = tmp64 + tmp75;
+		    tmp107 = K866025403 * (tmp75 - tmp64);
+		    tmp78 = (K766044443 * tmp63) - (K642787609 * tmp58);
+		    tmp79 = (K173648177 * tmp74) - (K984807753 * tmp69);
+		    tmp80 = K866025403 * (tmp78 - tmp79);
+		    tmp103 = tmp78 + tmp79;
+	       }
+	       c_re(inout[iostride]) = tmp53 + tmp76;
+	       tmp77 = tmp53 - (K500000000 * tmp76);
+	       c_re(inout[7 * iostride]) = tmp77 - tmp80;
+	       c_re(inout[4 * iostride]) = tmp77 + tmp80;
+	       c_im(inout[iostride]) = tmp103 + tmp106;
+	       tmp108 = tmp106 - (K500000000 * tmp103);
+	       c_im(inout[4 * iostride]) = tmp107 + tmp108;
+	       c_im(inout[7 * iostride]) = tmp108 - tmp107;
+	  }
+	  {
+	       fftw_real tmp81;
+	       fftw_real tmp110;
+	       fftw_real tmp88;
+	       fftw_real tmp111;
+	       fftw_real tmp92;
+	       fftw_real tmp109;
+	       fftw_real tmp89;
+	       fftw_real tmp112;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp81 = tmp49 - tmp52;
+	       tmp110 = tmp105 - tmp104;
+	       {
+		    fftw_real tmp84;
+		    fftw_real tmp87;
+		    fftw_real tmp90;
+		    fftw_real tmp91;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp84 = (K173648177 * tmp82) + (K984807753 * tmp83);
+		    tmp87 = (K342020143 * tmp85) - (K939692620 * tmp86);
+		    tmp88 = tmp84 + tmp87;
+		    tmp111 = K866025403 * (tmp87 - tmp84);
+		    tmp90 = (K173648177 * tmp83) - (K984807753 * tmp82);
+		    tmp91 = (K342020143 * tmp86) + (K939692620 * tmp85);
+		    tmp92 = K866025403 * (tmp90 + tmp91);
+		    tmp109 = tmp90 - tmp91;
+	       }
+	       c_re(inout[2 * iostride]) = tmp81 + tmp88;
+	       tmp89 = tmp81 - (K500000000 * tmp88);
+	       c_re(inout[8 * iostride]) = tmp89 - tmp92;
+	       c_re(inout[5 * iostride]) = tmp89 + tmp92;
+	       c_im(inout[2 * iostride]) = tmp109 + tmp110;
+	       tmp112 = tmp110 - (K500000000 * tmp109);
+	       c_im(inout[5 * iostride]) = tmp111 + tmp112;
+	       c_im(inout[8 * iostride]) = tmp112 - tmp111;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+fftw_codelet_desc fftw_twiddle_9_desc = {
+     "fftw_twiddle_9",
+     (void (*)()) fftw_twiddle_9,
+     9,
+     FFTW_FORWARD,
+     FFTW_TWIDDLE,
+     198,
+     8,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_10.c b/src/fftw/ftwi_10.c
new file mode 100644
index 0000000..cd8da09
--- /dev/null
+++ b/src/fftw/ftwi_10.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:33 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 10 */
+
+/*
+ * This function contains 102 FP additions, 60 FP multiplications,
+ * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
+ * 42 stack variables, and 40 memory accesses
+ */
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_10.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_10(fftw_complex *A, const fftw_complex *W, int iostride,
+		      int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 9) {
+	  fftw_real tmp7;
+	  fftw_real tmp55;
+	  fftw_real tmp100;
+	  fftw_real tmp115;
+	  fftw_real tmp41;
+	  fftw_real tmp52;
+	  fftw_real tmp53;
+	  fftw_real tmp59;
+	  fftw_real tmp60;
+	  fftw_real tmp61;
+	  fftw_real tmp75;
+	  fftw_real tmp78;
+	  fftw_real tmp113;
+	  fftw_real tmp89;
+	  fftw_real tmp90;
+	  fftw_real tmp96;
+	  fftw_real tmp18;
+	  fftw_real tmp29;
+	  fftw_real tmp30;
+	  fftw_real tmp56;
+	  fftw_real tmp57;
+	  fftw_real tmp58;
+	  fftw_real tmp68;
+	  fftw_real tmp71;
+	  fftw_real tmp112;
+	  fftw_real tmp86;
+	  fftw_real tmp87;
+	  fftw_real tmp95;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp99;
+	       fftw_real tmp6;
+	       fftw_real tmp98;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp99 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[5 * iostride]);
+		    tmp5 = c_im(inout[5 * iostride]);
+		    tmp2 = c_re(W[4]);
+		    tmp4 = c_im(W[4]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp98 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       tmp7 = tmp1 - tmp6;
+	       tmp55 = tmp1 + tmp6;
+	       tmp100 = tmp98 + tmp99;
+	       tmp115 = tmp99 - tmp98;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp73;
+	       fftw_real tmp51;
+	       fftw_real tmp77;
+	       fftw_real tmp40;
+	       fftw_real tmp74;
+	       fftw_real tmp46;
+	       fftw_real tmp76;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[4 * iostride]);
+		    tmp34 = c_im(inout[4 * iostride]);
+		    tmp31 = c_re(W[3]);
+		    tmp33 = c_im(W[3]);
+		    tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34);
+		    tmp73 = (tmp31 * tmp34) - (tmp33 * tmp32);
+	       }
+	       {
+		    fftw_real tmp48;
+		    fftw_real tmp50;
+		    fftw_real tmp47;
+		    fftw_real tmp49;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp48 = c_re(inout[iostride]);
+		    tmp50 = c_im(inout[iostride]);
+		    tmp47 = c_re(W[0]);
+		    tmp49 = c_im(W[0]);
+		    tmp51 = (tmp47 * tmp48) + (tmp49 * tmp50);
+		    tmp77 = (tmp47 * tmp50) - (tmp49 * tmp48);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[9 * iostride]);
+		    tmp39 = c_im(inout[9 * iostride]);
+		    tmp36 = c_re(W[8]);
+		    tmp38 = c_im(W[8]);
+		    tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39);
+		    tmp74 = (tmp36 * tmp39) - (tmp38 * tmp37);
+	       }
+	       {
+		    fftw_real tmp43;
+		    fftw_real tmp45;
+		    fftw_real tmp42;
+		    fftw_real tmp44;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp43 = c_re(inout[6 * iostride]);
+		    tmp45 = c_im(inout[6 * iostride]);
+		    tmp42 = c_re(W[5]);
+		    tmp44 = c_im(W[5]);
+		    tmp46 = (tmp42 * tmp43) + (tmp44 * tmp45);
+		    tmp76 = (tmp42 * tmp45) - (tmp44 * tmp43);
+	       }
+	       tmp41 = tmp35 - tmp40;
+	       tmp52 = tmp46 - tmp51;
+	       tmp53 = tmp41 + tmp52;
+	       tmp59 = tmp35 + tmp40;
+	       tmp60 = tmp46 + tmp51;
+	       tmp61 = tmp59 + tmp60;
+	       tmp75 = tmp73 - tmp74;
+	       tmp78 = tmp76 - tmp77;
+	       tmp113 = tmp75 + tmp78;
+	       tmp89 = tmp73 + tmp74;
+	       tmp90 = tmp76 + tmp77;
+	       tmp96 = tmp89 + tmp90;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp66;
+	       fftw_real tmp28;
+	       fftw_real tmp70;
+	       fftw_real tmp17;
+	       fftw_real tmp67;
+	       fftw_real tmp23;
+	       fftw_real tmp69;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[2 * iostride]);
+		    tmp11 = c_im(inout[2 * iostride]);
+		    tmp8 = c_re(W[1]);
+		    tmp10 = c_im(W[1]);
+		    tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+		    tmp66 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	       }
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[3 * iostride]);
+		    tmp27 = c_im(inout[3 * iostride]);
+		    tmp24 = c_re(W[2]);
+		    tmp26 = c_im(W[2]);
+		    tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27);
+		    tmp70 = (tmp24 * tmp27) - (tmp26 * tmp25);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[7 * iostride]);
+		    tmp16 = c_im(inout[7 * iostride]);
+		    tmp13 = c_re(W[6]);
+		    tmp15 = c_im(W[6]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp67 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       {
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp20 = c_re(inout[8 * iostride]);
+		    tmp22 = c_im(inout[8 * iostride]);
+		    tmp19 = c_re(W[7]);
+		    tmp21 = c_im(W[7]);
+		    tmp23 = (tmp19 * tmp20) + (tmp21 * tmp22);
+		    tmp69 = (tmp19 * tmp22) - (tmp21 * tmp20);
+	       }
+	       tmp18 = tmp12 - tmp17;
+	       tmp29 = tmp23 - tmp28;
+	       tmp30 = tmp18 + tmp29;
+	       tmp56 = tmp12 + tmp17;
+	       tmp57 = tmp23 + tmp28;
+	       tmp58 = tmp56 + tmp57;
+	       tmp68 = tmp66 - tmp67;
+	       tmp71 = tmp69 - tmp70;
+	       tmp112 = tmp68 + tmp71;
+	       tmp86 = tmp66 + tmp67;
+	       tmp87 = tmp69 + tmp70;
+	       tmp95 = tmp86 + tmp87;
+	  }
+	  {
+	       fftw_real tmp64;
+	       fftw_real tmp54;
+	       fftw_real tmp63;
+	       fftw_real tmp80;
+	       fftw_real tmp82;
+	       fftw_real tmp72;
+	       fftw_real tmp79;
+	       fftw_real tmp81;
+	       fftw_real tmp65;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp64 = K559016994 * (tmp30 - tmp53);
+	       tmp54 = tmp30 + tmp53;
+	       tmp63 = tmp7 - (K250000000 * tmp54);
+	       tmp72 = tmp68 - tmp71;
+	       tmp79 = tmp75 - tmp78;
+	       tmp80 = (K587785252 * tmp72) - (K951056516 * tmp79);
+	       tmp82 = (K951056516 * tmp72) + (K587785252 * tmp79);
+	       c_re(inout[5 * iostride]) = tmp7 + tmp54;
+	       tmp81 = tmp64 + tmp63;
+	       c_re(inout[iostride]) = tmp81 - tmp82;
+	       c_re(inout[9 * iostride]) = tmp81 + tmp82;
+	       tmp65 = tmp63 - tmp64;
+	       c_re(inout[7 * iostride]) = tmp65 - tmp80;
+	       c_re(inout[3 * iostride]) = tmp65 + tmp80;
+	  }
+	  {
+	       fftw_real tmp114;
+	       fftw_real tmp116;
+	       fftw_real tmp117;
+	       fftw_real tmp111;
+	       fftw_real tmp120;
+	       fftw_real tmp109;
+	       fftw_real tmp110;
+	       fftw_real tmp119;
+	       fftw_real tmp118;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp114 = K559016994 * (tmp112 - tmp113);
+	       tmp116 = tmp112 + tmp113;
+	       tmp117 = tmp115 - (K250000000 * tmp116);
+	       tmp109 = tmp18 - tmp29;
+	       tmp110 = tmp41 - tmp52;
+	       tmp111 = (K951056516 * tmp109) + (K587785252 * tmp110);
+	       tmp120 = (K587785252 * tmp109) - (K951056516 * tmp110);
+	       c_im(inout[5 * iostride]) = tmp116 + tmp115;
+	       tmp119 = tmp117 - tmp114;
+	       c_im(inout[3 * iostride]) = tmp119 - tmp120;
+	       c_im(inout[7 * iostride]) = tmp120 + tmp119;
+	       tmp118 = tmp114 + tmp117;
+	       c_im(inout[iostride]) = tmp111 + tmp118;
+	       c_im(inout[9 * iostride]) = tmp118 - tmp111;
+	  }
+	  {
+	       fftw_real tmp84;
+	       fftw_real tmp62;
+	       fftw_real tmp83;
+	       fftw_real tmp92;
+	       fftw_real tmp94;
+	       fftw_real tmp88;
+	       fftw_real tmp91;
+	       fftw_real tmp93;
+	       fftw_real tmp85;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp84 = K559016994 * (tmp58 - tmp61);
+	       tmp62 = tmp58 + tmp61;
+	       tmp83 = tmp55 - (K250000000 * tmp62);
+	       tmp88 = tmp86 - tmp87;
+	       tmp91 = tmp89 - tmp90;
+	       tmp92 = (K587785252 * tmp88) - (K951056516 * tmp91);
+	       tmp94 = (K951056516 * tmp88) + (K587785252 * tmp91);
+	       c_re(inout[0]) = tmp55 + tmp62;
+	       tmp93 = tmp84 + tmp83;
+	       c_re(inout[6 * iostride]) = tmp93 - tmp94;
+	       c_re(inout[4 * iostride]) = tmp93 + tmp94;
+	       tmp85 = tmp83 - tmp84;
+	       c_re(inout[2 * iostride]) = tmp85 - tmp92;
+	       c_re(inout[8 * iostride]) = tmp85 + tmp92;
+	  }
+	  {
+	       fftw_real tmp105;
+	       fftw_real tmp97;
+	       fftw_real tmp104;
+	       fftw_real tmp103;
+	       fftw_real tmp108;
+	       fftw_real tmp101;
+	       fftw_real tmp102;
+	       fftw_real tmp107;
+	       fftw_real tmp106;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp105 = K559016994 * (tmp95 - tmp96);
+	       tmp97 = tmp95 + tmp96;
+	       tmp104 = tmp100 - (K250000000 * tmp97);
+	       tmp101 = tmp56 - tmp57;
+	       tmp102 = tmp59 - tmp60;
+	       tmp103 = (K587785252 * tmp101) - (K951056516 * tmp102);
+	       tmp108 = (K951056516 * tmp101) + (K587785252 * tmp102);
+	       c_im(inout[0]) = tmp97 + tmp100;
+	       tmp107 = tmp105 + tmp104;
+	       c_im(inout[4 * iostride]) = tmp107 - tmp108;
+	       c_im(inout[6 * iostride]) = tmp108 + tmp107;
+	       tmp106 = tmp104 - tmp105;
+	       c_im(inout[2 * iostride]) = tmp103 + tmp106;
+	       c_im(inout[8 * iostride]) = tmp106 - tmp103;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+fftw_codelet_desc fftwi_twiddle_10_desc = {
+     "fftwi_twiddle_10",
+     (void (*)()) fftwi_twiddle_10,
+     10,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     231,
+     9,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_16.c b/src/fftw/ftwi_16.c
new file mode 100644
index 0000000..da369de
--- /dev/null
+++ b/src/fftw/ftwi_16.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:35 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 16 */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 136 additions, 46 multiplications, 38 fused multiply/add),
+ * 50 stack variables, and 64 memory accesses
+ */
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_16.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_16(fftw_complex *A, const fftw_complex *W, int iostride,
+		      int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 15) {
+	  fftw_real tmp7;
+	  fftw_real tmp91;
+	  fftw_real tmp180;
+	  fftw_real tmp194;
+	  fftw_real tmp18;
+	  fftw_real tmp193;
+	  fftw_real tmp94;
+	  fftw_real tmp177;
+	  fftw_real tmp77;
+	  fftw_real tmp88;
+	  fftw_real tmp161;
+	  fftw_real tmp117;
+	  fftw_real tmp141;
+	  fftw_real tmp162;
+	  fftw_real tmp163;
+	  fftw_real tmp164;
+	  fftw_real tmp112;
+	  fftw_real tmp140;
+	  fftw_real tmp30;
+	  fftw_real tmp153;
+	  fftw_real tmp100;
+	  fftw_real tmp137;
+	  fftw_real tmp41;
+	  fftw_real tmp152;
+	  fftw_real tmp105;
+	  fftw_real tmp136;
+	  fftw_real tmp54;
+	  fftw_real tmp65;
+	  fftw_real tmp156;
+	  fftw_real tmp128;
+	  fftw_real tmp144;
+	  fftw_real tmp157;
+	  fftw_real tmp158;
+	  fftw_real tmp159;
+	  fftw_real tmp123;
+	  fftw_real tmp143;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp179;
+	       fftw_real tmp6;
+	       fftw_real tmp178;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp179 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[8 * iostride]);
+		    tmp5 = c_im(inout[8 * iostride]);
+		    tmp2 = c_re(W[7]);
+		    tmp4 = c_im(W[7]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp178 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       tmp7 = tmp1 + tmp6;
+	       tmp91 = tmp1 - tmp6;
+	       tmp180 = tmp178 + tmp179;
+	       tmp194 = tmp179 - tmp178;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp92;
+	       fftw_real tmp17;
+	       fftw_real tmp93;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[4 * iostride]);
+		    tmp11 = c_im(inout[4 * iostride]);
+		    tmp8 = c_re(W[3]);
+		    tmp10 = c_im(W[3]);
+		    tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+		    tmp92 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[12 * iostride]);
+		    tmp16 = c_im(inout[12 * iostride]);
+		    tmp13 = c_re(W[11]);
+		    tmp15 = c_im(W[11]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp93 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       tmp18 = tmp12 + tmp17;
+	       tmp193 = tmp12 - tmp17;
+	       tmp94 = tmp92 - tmp93;
+	       tmp177 = tmp92 + tmp93;
+	  }
+	  {
+	       fftw_real tmp71;
+	       fftw_real tmp108;
+	       fftw_real tmp87;
+	       fftw_real tmp115;
+	       fftw_real tmp76;
+	       fftw_real tmp109;
+	       fftw_real tmp82;
+	       fftw_real tmp114;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp68;
+		    fftw_real tmp70;
+		    fftw_real tmp67;
+		    fftw_real tmp69;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp68 = c_re(inout[15 * iostride]);
+		    tmp70 = c_im(inout[15 * iostride]);
+		    tmp67 = c_re(W[14]);
+		    tmp69 = c_im(W[14]);
+		    tmp71 = (tmp67 * tmp68) + (tmp69 * tmp70);
+		    tmp108 = (tmp67 * tmp70) - (tmp69 * tmp68);
+	       }
+	       {
+		    fftw_real tmp84;
+		    fftw_real tmp86;
+		    fftw_real tmp83;
+		    fftw_real tmp85;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp84 = c_re(inout[11 * iostride]);
+		    tmp86 = c_im(inout[11 * iostride]);
+		    tmp83 = c_re(W[10]);
+		    tmp85 = c_im(W[10]);
+		    tmp87 = (tmp83 * tmp84) + (tmp85 * tmp86);
+		    tmp115 = (tmp83 * tmp86) - (tmp85 * tmp84);
+	       }
+	       {
+		    fftw_real tmp73;
+		    fftw_real tmp75;
+		    fftw_real tmp72;
+		    fftw_real tmp74;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp73 = c_re(inout[7 * iostride]);
+		    tmp75 = c_im(inout[7 * iostride]);
+		    tmp72 = c_re(W[6]);
+		    tmp74 = c_im(W[6]);
+		    tmp76 = (tmp72 * tmp73) + (tmp74 * tmp75);
+		    tmp109 = (tmp72 * tmp75) - (tmp74 * tmp73);
+	       }
+	       {
+		    fftw_real tmp79;
+		    fftw_real tmp81;
+		    fftw_real tmp78;
+		    fftw_real tmp80;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp79 = c_re(inout[3 * iostride]);
+		    tmp81 = c_im(inout[3 * iostride]);
+		    tmp78 = c_re(W[2]);
+		    tmp80 = c_im(W[2]);
+		    tmp82 = (tmp78 * tmp79) + (tmp80 * tmp81);
+		    tmp114 = (tmp78 * tmp81) - (tmp80 * tmp79);
+	       }
+	       {
+		    fftw_real tmp113;
+		    fftw_real tmp116;
+		    fftw_real tmp110;
+		    fftw_real tmp111;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp77 = tmp71 + tmp76;
+		    tmp88 = tmp82 + tmp87;
+		    tmp161 = tmp77 - tmp88;
+		    tmp113 = tmp71 - tmp76;
+		    tmp116 = tmp114 - tmp115;
+		    tmp117 = tmp113 - tmp116;
+		    tmp141 = tmp113 + tmp116;
+		    tmp162 = tmp108 + tmp109;
+		    tmp163 = tmp114 + tmp115;
+		    tmp164 = tmp162 - tmp163;
+		    tmp110 = tmp108 - tmp109;
+		    tmp111 = tmp82 - tmp87;
+		    tmp112 = tmp110 + tmp111;
+		    tmp140 = tmp110 - tmp111;
+	       }
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp97;
+	       fftw_real tmp29;
+	       fftw_real tmp98;
+	       fftw_real tmp96;
+	       fftw_real tmp99;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[2 * iostride]);
+		    tmp23 = c_im(inout[2 * iostride]);
+		    tmp20 = c_re(W[1]);
+		    tmp22 = c_im(W[1]);
+		    tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23);
+		    tmp97 = (tmp20 * tmp23) - (tmp22 * tmp21);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[10 * iostride]);
+		    tmp28 = c_im(inout[10 * iostride]);
+		    tmp25 = c_re(W[9]);
+		    tmp27 = c_im(W[9]);
+		    tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28);
+		    tmp98 = (tmp25 * tmp28) - (tmp27 * tmp26);
+	       }
+	       tmp30 = tmp24 + tmp29;
+	       tmp153 = tmp97 + tmp98;
+	       tmp96 = tmp24 - tmp29;
+	       tmp99 = tmp97 - tmp98;
+	       tmp100 = tmp96 - tmp99;
+	       tmp137 = tmp96 + tmp99;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp102;
+	       fftw_real tmp40;
+	       fftw_real tmp103;
+	       fftw_real tmp101;
+	       fftw_real tmp104;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[14 * iostride]);
+		    tmp34 = c_im(inout[14 * iostride]);
+		    tmp31 = c_re(W[13]);
+		    tmp33 = c_im(W[13]);
+		    tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34);
+		    tmp102 = (tmp31 * tmp34) - (tmp33 * tmp32);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[6 * iostride]);
+		    tmp39 = c_im(inout[6 * iostride]);
+		    tmp36 = c_re(W[5]);
+		    tmp38 = c_im(W[5]);
+		    tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39);
+		    tmp103 = (tmp36 * tmp39) - (tmp38 * tmp37);
+	       }
+	       tmp41 = tmp35 + tmp40;
+	       tmp152 = tmp102 + tmp103;
+	       tmp101 = tmp35 - tmp40;
+	       tmp104 = tmp102 - tmp103;
+	       tmp105 = tmp101 + tmp104;
+	       tmp136 = tmp104 - tmp101;
+	  }
+	  {
+	       fftw_real tmp48;
+	       fftw_real tmp119;
+	       fftw_real tmp64;
+	       fftw_real tmp126;
+	       fftw_real tmp53;
+	       fftw_real tmp120;
+	       fftw_real tmp59;
+	       fftw_real tmp125;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp45;
+		    fftw_real tmp47;
+		    fftw_real tmp44;
+		    fftw_real tmp46;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp45 = c_re(inout[iostride]);
+		    tmp47 = c_im(inout[iostride]);
+		    tmp44 = c_re(W[0]);
+		    tmp46 = c_im(W[0]);
+		    tmp48 = (tmp44 * tmp45) + (tmp46 * tmp47);
+		    tmp119 = (tmp44 * tmp47) - (tmp46 * tmp45);
+	       }
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp63;
+		    fftw_real tmp60;
+		    fftw_real tmp62;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 = c_re(inout[13 * iostride]);
+		    tmp63 = c_im(inout[13 * iostride]);
+		    tmp60 = c_re(W[12]);
+		    tmp62 = c_im(W[12]);
+		    tmp64 = (tmp60 * tmp61) + (tmp62 * tmp63);
+		    tmp126 = (tmp60 * tmp63) - (tmp62 * tmp61);
+	       }
+	       {
+		    fftw_real tmp50;
+		    fftw_real tmp52;
+		    fftw_real tmp49;
+		    fftw_real tmp51;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp50 = c_re(inout[9 * iostride]);
+		    tmp52 = c_im(inout[9 * iostride]);
+		    tmp49 = c_re(W[8]);
+		    tmp51 = c_im(W[8]);
+		    tmp53 = (tmp49 * tmp50) + (tmp51 * tmp52);
+		    tmp120 = (tmp49 * tmp52) - (tmp51 * tmp50);
+	       }
+	       {
+		    fftw_real tmp56;
+		    fftw_real tmp58;
+		    fftw_real tmp55;
+		    fftw_real tmp57;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp56 = c_re(inout[5 * iostride]);
+		    tmp58 = c_im(inout[5 * iostride]);
+		    tmp55 = c_re(W[4]);
+		    tmp57 = c_im(W[4]);
+		    tmp59 = (tmp55 * tmp56) + (tmp57 * tmp58);
+		    tmp125 = (tmp55 * tmp58) - (tmp57 * tmp56);
+	       }
+	       {
+		    fftw_real tmp124;
+		    fftw_real tmp127;
+		    fftw_real tmp121;
+		    fftw_real tmp122;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp54 = tmp48 + tmp53;
+		    tmp65 = tmp59 + tmp64;
+		    tmp156 = tmp54 - tmp65;
+		    tmp124 = tmp48 - tmp53;
+		    tmp127 = tmp125 - tmp126;
+		    tmp128 = tmp124 - tmp127;
+		    tmp144 = tmp124 + tmp127;
+		    tmp157 = tmp119 + tmp120;
+		    tmp158 = tmp125 + tmp126;
+		    tmp159 = tmp157 - tmp158;
+		    tmp121 = tmp119 - tmp120;
+		    tmp122 = tmp59 - tmp64;
+		    tmp123 = tmp121 + tmp122;
+		    tmp143 = tmp121 - tmp122;
+	       }
+	  }
+	  {
+	       fftw_real tmp107;
+	       fftw_real tmp131;
+	       fftw_real tmp196;
+	       fftw_real tmp198;
+	       fftw_real tmp130;
+	       fftw_real tmp191;
+	       fftw_real tmp134;
+	       fftw_real tmp197;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp95;
+		    fftw_real tmp106;
+		    fftw_real tmp192;
+		    fftw_real tmp195;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp95 = tmp91 - tmp94;
+		    tmp106 = K707106781 * (tmp100 + tmp105);
+		    tmp107 = tmp95 - tmp106;
+		    tmp131 = tmp95 + tmp106;
+		    tmp192 = K707106781 * (tmp137 + tmp136);
+		    tmp195 = tmp193 + tmp194;
+		    tmp196 = tmp192 + tmp195;
+		    tmp198 = tmp195 - tmp192;
+	       }
+	       {
+		    fftw_real tmp118;
+		    fftw_real tmp129;
+		    fftw_real tmp132;
+		    fftw_real tmp133;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp118 = (K923879532 * tmp112) - (K382683432 * tmp117);
+		    tmp129 = (K923879532 * tmp123) + (K382683432 * tmp128);
+		    tmp130 = tmp118 - tmp129;
+		    tmp191 = tmp129 + tmp118;
+		    tmp132 = (K923879532 * tmp128) - (K382683432 * tmp123);
+		    tmp133 = (K382683432 * tmp112) + (K923879532 * tmp117);
+		    tmp134 = tmp132 + tmp133;
+		    tmp197 = tmp132 - tmp133;
+	       }
+	       c_re(inout[13 * iostride]) = tmp107 - tmp130;
+	       c_re(inout[5 * iostride]) = tmp107 + tmp130;
+	       c_re(inout[9 * iostride]) = tmp131 - tmp134;
+	       c_re(inout[iostride]) = tmp131 + tmp134;
+	       c_im(inout[iostride]) = tmp191 + tmp196;
+	       c_im(inout[9 * iostride]) = tmp196 - tmp191;
+	       c_im(inout[5 * iostride]) = tmp197 + tmp198;
+	       c_im(inout[13 * iostride]) = tmp198 - tmp197;
+	  }
+	  {
+	       fftw_real tmp139;
+	       fftw_real tmp147;
+	       fftw_real tmp202;
+	       fftw_real tmp204;
+	       fftw_real tmp146;
+	       fftw_real tmp199;
+	       fftw_real tmp150;
+	       fftw_real tmp203;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp135;
+		    fftw_real tmp138;
+		    fftw_real tmp200;
+		    fftw_real tmp201;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp135 = tmp91 + tmp94;
+		    tmp138 = K707106781 * (tmp136 - tmp137);
+		    tmp139 = tmp135 - tmp138;
+		    tmp147 = tmp135 + tmp138;
+		    tmp200 = K707106781 * (tmp100 - tmp105);
+		    tmp201 = tmp194 - tmp193;
+		    tmp202 = tmp200 + tmp201;
+		    tmp204 = tmp201 - tmp200;
+	       }
+	       {
+		    fftw_real tmp142;
+		    fftw_real tmp145;
+		    fftw_real tmp148;
+		    fftw_real tmp149;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp142 = (K382683432 * tmp140) - (K923879532 * tmp141);
+		    tmp145 = (K382683432 * tmp143) + (K923879532 * tmp144);
+		    tmp146 = tmp142 - tmp145;
+		    tmp199 = tmp145 + tmp142;
+		    tmp148 = (K382683432 * tmp144) - (K923879532 * tmp143);
+		    tmp149 = (K923879532 * tmp140) + (K382683432 * tmp141);
+		    tmp150 = tmp148 + tmp149;
+		    tmp203 = tmp148 - tmp149;
+	       }
+	       c_re(inout[15 * iostride]) = tmp139 - tmp146;
+	       c_re(inout[7 * iostride]) = tmp139 + tmp146;
+	       c_re(inout[11 * iostride]) = tmp147 - tmp150;
+	       c_re(inout[3 * iostride]) = tmp147 + tmp150;
+	       c_im(inout[3 * iostride]) = tmp199 + tmp202;
+	       c_im(inout[11 * iostride]) = tmp202 - tmp199;
+	       c_im(inout[7 * iostride]) = tmp203 + tmp204;
+	       c_im(inout[15 * iostride]) = tmp204 - tmp203;
+	  }
+	  {
+	       fftw_real tmp155;
+	       fftw_real tmp167;
+	       fftw_real tmp188;
+	       fftw_real tmp190;
+	       fftw_real tmp166;
+	       fftw_real tmp189;
+	       fftw_real tmp170;
+	       fftw_real tmp185;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp154;
+		    fftw_real tmp186;
+		    fftw_real tmp187;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = tmp7 - tmp18;
+		    tmp154 = tmp152 - tmp153;
+		    tmp155 = tmp151 + tmp154;
+		    tmp167 = tmp151 - tmp154;
+		    tmp186 = tmp30 - tmp41;
+		    tmp187 = tmp180 - tmp177;
+		    tmp188 = tmp186 + tmp187;
+		    tmp190 = tmp187 - tmp186;
+	       }
+	       {
+		    fftw_real tmp160;
+		    fftw_real tmp165;
+		    fftw_real tmp168;
+		    fftw_real tmp169;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp160 = tmp156 - tmp159;
+		    tmp165 = tmp161 + tmp164;
+		    tmp166 = K707106781 * (tmp160 + tmp165);
+		    tmp189 = K707106781 * (tmp160 - tmp165);
+		    tmp168 = tmp164 - tmp161;
+		    tmp169 = tmp156 + tmp159;
+		    tmp170 = K707106781 * (tmp168 - tmp169);
+		    tmp185 = K707106781 * (tmp169 + tmp168);
+	       }
+	       c_re(inout[10 * iostride]) = tmp155 - tmp166;
+	       c_re(inout[2 * iostride]) = tmp155 + tmp166;
+	       c_re(inout[14 * iostride]) = tmp167 - tmp170;
+	       c_re(inout[6 * iostride]) = tmp167 + tmp170;
+	       c_im(inout[2 * iostride]) = tmp185 + tmp188;
+	       c_im(inout[10 * iostride]) = tmp188 - tmp185;
+	       c_im(inout[6 * iostride]) = tmp189 + tmp190;
+	       c_im(inout[14 * iostride]) = tmp190 - tmp189;
+	  }
+	  {
+	       fftw_real tmp43;
+	       fftw_real tmp171;
+	       fftw_real tmp182;
+	       fftw_real tmp184;
+	       fftw_real tmp90;
+	       fftw_real tmp183;
+	       fftw_real tmp174;
+	       fftw_real tmp175;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp19;
+		    fftw_real tmp42;
+		    fftw_real tmp176;
+		    fftw_real tmp181;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp19 = tmp7 + tmp18;
+		    tmp42 = tmp30 + tmp41;
+		    tmp43 = tmp19 + tmp42;
+		    tmp171 = tmp19 - tmp42;
+		    tmp176 = tmp153 + tmp152;
+		    tmp181 = tmp177 + tmp180;
+		    tmp182 = tmp176 + tmp181;
+		    tmp184 = tmp181 - tmp176;
+	       }
+	       {
+		    fftw_real tmp66;
+		    fftw_real tmp89;
+		    fftw_real tmp172;
+		    fftw_real tmp173;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp66 = tmp54 + tmp65;
+		    tmp89 = tmp77 + tmp88;
+		    tmp90 = tmp66 + tmp89;
+		    tmp183 = tmp66 - tmp89;
+		    tmp172 = tmp162 + tmp163;
+		    tmp173 = tmp157 + tmp158;
+		    tmp174 = tmp172 - tmp173;
+		    tmp175 = tmp173 + tmp172;
+	       }
+	       c_re(inout[8 * iostride]) = tmp43 - tmp90;
+	       c_re(inout[0]) = tmp43 + tmp90;
+	       c_re(inout[12 * iostride]) = tmp171 - tmp174;
+	       c_re(inout[4 * iostride]) = tmp171 + tmp174;
+	       c_im(inout[0]) = tmp175 + tmp182;
+	       c_im(inout[8 * iostride]) = tmp182 - tmp175;
+	       c_im(inout[4 * iostride]) = tmp183 + tmp184;
+	       c_im(inout[12 * iostride]) = tmp184 - tmp183;
+	  }
+     }
+}
+
+static const int twiddle_order[] =
+    { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+fftw_codelet_desc fftwi_twiddle_16_desc = {
+     "fftwi_twiddle_16",
+     (void (*)()) fftwi_twiddle_16,
+     16,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     363,
+     15,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_2.c b/src/fftw/ftwi_2.c
new file mode 100644
index 0000000..cd0717c
--- /dev/null
+++ b/src/fftw/ftwi_2.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:24 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 2 */
+
+/*
+ * This function contains 6 FP additions, 4 FP multiplications,
+ * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
+ * 10 stack variables, and 8 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_2(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 1) {
+	  fftw_real tmp1;
+	  fftw_real tmp8;
+	  fftw_real tmp6;
+	  fftw_real tmp7;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp8 = c_im(inout[0]);
+	  {
+	       fftw_real tmp3;
+	       fftw_real tmp5;
+	       fftw_real tmp2;
+	       fftw_real tmp4;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp3 = c_re(inout[iostride]);
+	       tmp5 = c_im(inout[iostride]);
+	       tmp2 = c_re(W[0]);
+	       tmp4 = c_im(W[0]);
+	       tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+	       tmp7 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	  }
+	  c_re(inout[iostride]) = tmp1 - tmp6;
+	  c_re(inout[0]) = tmp1 + tmp6;
+	  c_im(inout[0]) = tmp7 + tmp8;
+	  c_im(inout[iostride]) = tmp8 - tmp7;
+     }
+}
+
+static const int twiddle_order[] = { 1 };
+fftw_codelet_desc fftwi_twiddle_2_desc = {
+     "fftwi_twiddle_2",
+     (void (*)()) fftwi_twiddle_2,
+     2,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     55,
+     1,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_3.c b/src/fftw/ftwi_3.c
new file mode 100644
index 0000000..45808a3
--- /dev/null
+++ b/src/fftw/ftwi_3.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:24 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 3 */
+
+/*
+ * This function contains 16 FP additions, 12 FP multiplications,
+ * (or, 10 additions, 6 multiplications, 6 fused multiply/add),
+ * 14 stack variables, and 12 memory accesses
+ */
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_3.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_3(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 2) {
+	  fftw_real tmp1;
+	  fftw_real tmp18;
+	  fftw_real tmp6;
+	  fftw_real tmp15;
+	  fftw_real tmp11;
+	  fftw_real tmp14;
+	  fftw_real tmp12;
+	  fftw_real tmp17;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp18 = c_im(inout[0]);
+	  {
+	       fftw_real tmp3;
+	       fftw_real tmp5;
+	       fftw_real tmp2;
+	       fftw_real tmp4;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp3 = c_re(inout[iostride]);
+	       tmp5 = c_im(inout[iostride]);
+	       tmp2 = c_re(W[0]);
+	       tmp4 = c_im(W[0]);
+	       tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+	       tmp15 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	  }
+	  {
+	       fftw_real tmp8;
+	       fftw_real tmp10;
+	       fftw_real tmp7;
+	       fftw_real tmp9;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp8 = c_re(inout[2 * iostride]);
+	       tmp10 = c_im(inout[2 * iostride]);
+	       tmp7 = c_re(W[1]);
+	       tmp9 = c_im(W[1]);
+	       tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10);
+	       tmp14 = (tmp7 * tmp10) - (tmp9 * tmp8);
+	  }
+	  tmp12 = tmp6 + tmp11;
+	  tmp17 = tmp15 + tmp14;
+	  {
+	       fftw_real tmp13;
+	       fftw_real tmp16;
+	       fftw_real tmp19;
+	       fftw_real tmp20;
+	       ASSERT_ALIGNED_DOUBLE;
+	       c_re(inout[0]) = tmp1 + tmp12;
+	       tmp13 = tmp1 - (K500000000 * tmp12);
+	       tmp16 = K866025403 * (tmp14 - tmp15);
+	       c_re(inout[2 * iostride]) = tmp13 - tmp16;
+	       c_re(inout[iostride]) = tmp13 + tmp16;
+	       c_im(inout[0]) = tmp17 + tmp18;
+	       tmp19 = K866025403 * (tmp6 - tmp11);
+	       tmp20 = tmp18 - (K500000000 * tmp17);
+	       c_im(inout[iostride]) = tmp19 + tmp20;
+	       c_im(inout[2 * iostride]) = tmp20 - tmp19;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2 };
+fftw_codelet_desc fftwi_twiddle_3_desc = {
+     "fftwi_twiddle_3",
+     (void (*)()) fftwi_twiddle_3,
+     3,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     77,
+     2,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_32.c b/src/fftw/ftwi_32.c
new file mode 100644
index 0000000..a392746
--- /dev/null
+++ b/src/fftw/ftwi_32.c
@@ -0,0 +1,1398 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:35 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 32 */
+
+/*
+ * This function contains 434 FP additions, 208 FP multiplications,
+ * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
+ * 90 stack variables, and 128 memory accesses
+ */
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_32.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_32(fftw_complex *A, const fftw_complex *W, int iostride,
+		      int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 31) {
+	  fftw_real tmp19;
+	  fftw_real tmp387;
+	  fftw_real tmp472;
+	  fftw_real tmp486;
+	  fftw_real tmp442;
+	  fftw_real tmp456;
+	  fftw_real tmp191;
+	  fftw_real tmp303;
+	  fftw_real tmp161;
+	  fftw_real tmp403;
+	  fftw_real tmp276;
+	  fftw_real tmp316;
+	  fftw_real tmp372;
+	  fftw_real tmp400;
+	  fftw_real tmp259;
+	  fftw_real tmp319;
+	  fftw_real tmp42;
+	  fftw_real tmp455;
+	  fftw_real tmp201;
+	  fftw_real tmp304;
+	  fftw_real tmp390;
+	  fftw_real tmp437;
+	  fftw_real tmp196;
+	  fftw_real tmp305;
+	  fftw_real tmp184;
+	  fftw_real tmp401;
+	  fftw_real tmp375;
+	  fftw_real tmp404;
+	  fftw_real tmp270;
+	  fftw_real tmp317;
+	  fftw_real tmp279;
+	  fftw_real tmp320;
+	  fftw_real tmp66;
+	  fftw_real tmp395;
+	  fftw_real tmp224;
+	  fftw_real tmp312;
+	  fftw_real tmp357;
+	  fftw_real tmp396;
+	  fftw_real tmp219;
+	  fftw_real tmp311;
+	  fftw_real tmp114;
+	  fftw_real tmp410;
+	  fftw_real tmp249;
+	  fftw_real tmp323;
+	  fftw_real tmp363;
+	  fftw_real tmp407;
+	  fftw_real tmp232;
+	  fftw_real tmp326;
+	  fftw_real tmp89;
+	  fftw_real tmp393;
+	  fftw_real tmp213;
+	  fftw_real tmp309;
+	  fftw_real tmp354;
+	  fftw_real tmp392;
+	  fftw_real tmp208;
+	  fftw_real tmp308;
+	  fftw_real tmp137;
+	  fftw_real tmp408;
+	  fftw_real tmp366;
+	  fftw_real tmp411;
+	  fftw_real tmp243;
+	  fftw_real tmp324;
+	  fftw_real tmp252;
+	  fftw_real tmp327;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp440;
+	       fftw_real tmp6;
+	       fftw_real tmp439;
+	       fftw_real tmp12;
+	       fftw_real tmp188;
+	       fftw_real tmp17;
+	       fftw_real tmp189;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp440 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[16 * iostride]);
+		    tmp5 = c_im(inout[16 * iostride]);
+		    tmp2 = c_re(W[15]);
+		    tmp4 = c_im(W[15]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp439 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[8 * iostride]);
+		    tmp11 = c_im(inout[8 * iostride]);
+		    tmp8 = c_re(W[7]);
+		    tmp10 = c_im(W[7]);
+		    tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+		    tmp188 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[24 * iostride]);
+		    tmp16 = c_im(inout[24 * iostride]);
+		    tmp13 = c_re(W[23]);
+		    tmp15 = c_im(W[23]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp189 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       {
+		    fftw_real tmp7;
+		    fftw_real tmp18;
+		    fftw_real tmp470;
+		    fftw_real tmp471;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp7 = tmp1 + tmp6;
+		    tmp18 = tmp12 + tmp17;
+		    tmp19 = tmp7 + tmp18;
+		    tmp387 = tmp7 - tmp18;
+		    tmp470 = tmp12 - tmp17;
+		    tmp471 = tmp440 - tmp439;
+		    tmp472 = tmp470 + tmp471;
+		    tmp486 = tmp471 - tmp470;
+	       }
+	       {
+		    fftw_real tmp438;
+		    fftw_real tmp441;
+		    fftw_real tmp187;
+		    fftw_real tmp190;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp438 = tmp188 + tmp189;
+		    tmp441 = tmp439 + tmp440;
+		    tmp442 = tmp438 + tmp441;
+		    tmp456 = tmp441 - tmp438;
+		    tmp187 = tmp1 - tmp6;
+		    tmp190 = tmp188 - tmp189;
+		    tmp191 = tmp187 - tmp190;
+		    tmp303 = tmp187 + tmp190;
+	       }
+	  }
+	  {
+	       fftw_real tmp143;
+	       fftw_real tmp272;
+	       fftw_real tmp159;
+	       fftw_real tmp257;
+	       fftw_real tmp148;
+	       fftw_real tmp273;
+	       fftw_real tmp154;
+	       fftw_real tmp256;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp140;
+		    fftw_real tmp142;
+		    fftw_real tmp139;
+		    fftw_real tmp141;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp140 = c_re(inout[31 * iostride]);
+		    tmp142 = c_im(inout[31 * iostride]);
+		    tmp139 = c_re(W[30]);
+		    tmp141 = c_im(W[30]);
+		    tmp143 = (tmp139 * tmp140) + (tmp141 * tmp142);
+		    tmp272 = (tmp139 * tmp142) - (tmp141 * tmp140);
+	       }
+	       {
+		    fftw_real tmp156;
+		    fftw_real tmp158;
+		    fftw_real tmp155;
+		    fftw_real tmp157;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp156 = c_re(inout[23 * iostride]);
+		    tmp158 = c_im(inout[23 * iostride]);
+		    tmp155 = c_re(W[22]);
+		    tmp157 = c_im(W[22]);
+		    tmp159 = (tmp155 * tmp156) + (tmp157 * tmp158);
+		    tmp257 = (tmp155 * tmp158) - (tmp157 * tmp156);
+	       }
+	       {
+		    fftw_real tmp145;
+		    fftw_real tmp147;
+		    fftw_real tmp144;
+		    fftw_real tmp146;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp145 = c_re(inout[15 * iostride]);
+		    tmp147 = c_im(inout[15 * iostride]);
+		    tmp144 = c_re(W[14]);
+		    tmp146 = c_im(W[14]);
+		    tmp148 = (tmp144 * tmp145) + (tmp146 * tmp147);
+		    tmp273 = (tmp144 * tmp147) - (tmp146 * tmp145);
+	       }
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp153;
+		    fftw_real tmp150;
+		    fftw_real tmp152;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = c_re(inout[7 * iostride]);
+		    tmp153 = c_im(inout[7 * iostride]);
+		    tmp150 = c_re(W[6]);
+		    tmp152 = c_im(W[6]);
+		    tmp154 = (tmp150 * tmp151) + (tmp152 * tmp153);
+		    tmp256 = (tmp150 * tmp153) - (tmp152 * tmp151);
+	       }
+	       {
+		    fftw_real tmp149;
+		    fftw_real tmp160;
+		    fftw_real tmp274;
+		    fftw_real tmp275;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp149 = tmp143 + tmp148;
+		    tmp160 = tmp154 + tmp159;
+		    tmp161 = tmp149 + tmp160;
+		    tmp403 = tmp149 - tmp160;
+		    tmp274 = tmp272 - tmp273;
+		    tmp275 = tmp154 - tmp159;
+		    tmp276 = tmp274 + tmp275;
+		    tmp316 = tmp274 - tmp275;
+	       }
+	       {
+		    fftw_real tmp370;
+		    fftw_real tmp371;
+		    fftw_real tmp255;
+		    fftw_real tmp258;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp370 = tmp272 + tmp273;
+		    tmp371 = tmp256 + tmp257;
+		    tmp372 = tmp370 + tmp371;
+		    tmp400 = tmp370 - tmp371;
+		    tmp255 = tmp143 - tmp148;
+		    tmp258 = tmp256 - tmp257;
+		    tmp259 = tmp255 - tmp258;
+		    tmp319 = tmp255 + tmp258;
+	       }
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp193;
+	       fftw_real tmp40;
+	       fftw_real tmp199;
+	       fftw_real tmp29;
+	       fftw_real tmp194;
+	       fftw_real tmp35;
+	       fftw_real tmp198;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[4 * iostride]);
+		    tmp23 = c_im(inout[4 * iostride]);
+		    tmp20 = c_re(W[3]);
+		    tmp22 = c_im(W[3]);
+		    tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23);
+		    tmp193 = (tmp20 * tmp23) - (tmp22 * tmp21);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[12 * iostride]);
+		    tmp39 = c_im(inout[12 * iostride]);
+		    tmp36 = c_re(W[11]);
+		    tmp38 = c_im(W[11]);
+		    tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39);
+		    tmp199 = (tmp36 * tmp39) - (tmp38 * tmp37);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[20 * iostride]);
+		    tmp28 = c_im(inout[20 * iostride]);
+		    tmp25 = c_re(W[19]);
+		    tmp27 = c_im(W[19]);
+		    tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28);
+		    tmp194 = (tmp25 * tmp28) - (tmp27 * tmp26);
+	       }
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[28 * iostride]);
+		    tmp34 = c_im(inout[28 * iostride]);
+		    tmp31 = c_re(W[27]);
+		    tmp33 = c_im(W[27]);
+		    tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34);
+		    tmp198 = (tmp31 * tmp34) - (tmp33 * tmp32);
+	       }
+	       {
+		    fftw_real tmp30;
+		    fftw_real tmp41;
+		    fftw_real tmp197;
+		    fftw_real tmp200;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = tmp24 + tmp29;
+		    tmp41 = tmp35 + tmp40;
+		    tmp42 = tmp30 + tmp41;
+		    tmp455 = tmp30 - tmp41;
+		    tmp197 = tmp35 - tmp40;
+		    tmp200 = tmp198 - tmp199;
+		    tmp201 = tmp197 + tmp200;
+		    tmp304 = tmp200 - tmp197;
+	       }
+	       {
+		    fftw_real tmp388;
+		    fftw_real tmp389;
+		    fftw_real tmp192;
+		    fftw_real tmp195;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp388 = tmp198 + tmp199;
+		    tmp389 = tmp193 + tmp194;
+		    tmp390 = tmp388 - tmp389;
+		    tmp437 = tmp389 + tmp388;
+		    tmp192 = tmp24 - tmp29;
+		    tmp195 = tmp193 - tmp194;
+		    tmp196 = tmp192 - tmp195;
+		    tmp305 = tmp192 + tmp195;
+	       }
+	  }
+	  {
+	       fftw_real tmp166;
+	       fftw_real tmp261;
+	       fftw_real tmp171;
+	       fftw_real tmp262;
+	       fftw_real tmp260;
+	       fftw_real tmp263;
+	       fftw_real tmp177;
+	       fftw_real tmp266;
+	       fftw_real tmp182;
+	       fftw_real tmp267;
+	       fftw_real tmp265;
+	       fftw_real tmp268;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp163;
+		    fftw_real tmp165;
+		    fftw_real tmp162;
+		    fftw_real tmp164;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp163 = c_re(inout[3 * iostride]);
+		    tmp165 = c_im(inout[3 * iostride]);
+		    tmp162 = c_re(W[2]);
+		    tmp164 = c_im(W[2]);
+		    tmp166 = (tmp162 * tmp163) + (tmp164 * tmp165);
+		    tmp261 = (tmp162 * tmp165) - (tmp164 * tmp163);
+	       }
+	       {
+		    fftw_real tmp168;
+		    fftw_real tmp170;
+		    fftw_real tmp167;
+		    fftw_real tmp169;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp168 = c_re(inout[19 * iostride]);
+		    tmp170 = c_im(inout[19 * iostride]);
+		    tmp167 = c_re(W[18]);
+		    tmp169 = c_im(W[18]);
+		    tmp171 = (tmp167 * tmp168) + (tmp169 * tmp170);
+		    tmp262 = (tmp167 * tmp170) - (tmp169 * tmp168);
+	       }
+	       tmp260 = tmp166 - tmp171;
+	       tmp263 = tmp261 - tmp262;
+	       {
+		    fftw_real tmp174;
+		    fftw_real tmp176;
+		    fftw_real tmp173;
+		    fftw_real tmp175;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp174 = c_re(inout[27 * iostride]);
+		    tmp176 = c_im(inout[27 * iostride]);
+		    tmp173 = c_re(W[26]);
+		    tmp175 = c_im(W[26]);
+		    tmp177 = (tmp173 * tmp174) + (tmp175 * tmp176);
+		    tmp266 = (tmp173 * tmp176) - (tmp175 * tmp174);
+	       }
+	       {
+		    fftw_real tmp179;
+		    fftw_real tmp181;
+		    fftw_real tmp178;
+		    fftw_real tmp180;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp179 = c_re(inout[11 * iostride]);
+		    tmp181 = c_im(inout[11 * iostride]);
+		    tmp178 = c_re(W[10]);
+		    tmp180 = c_im(W[10]);
+		    tmp182 = (tmp178 * tmp179) + (tmp180 * tmp181);
+		    tmp267 = (tmp178 * tmp181) - (tmp180 * tmp179);
+	       }
+	       tmp265 = tmp177 - tmp182;
+	       tmp268 = tmp266 - tmp267;
+	       {
+		    fftw_real tmp172;
+		    fftw_real tmp183;
+		    fftw_real tmp373;
+		    fftw_real tmp374;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp172 = tmp166 + tmp171;
+		    tmp183 = tmp177 + tmp182;
+		    tmp184 = tmp172 + tmp183;
+		    tmp401 = tmp172 - tmp183;
+		    tmp373 = tmp261 + tmp262;
+		    tmp374 = tmp266 + tmp267;
+		    tmp375 = tmp373 + tmp374;
+		    tmp404 = tmp374 - tmp373;
+	       }
+	       {
+		    fftw_real tmp264;
+		    fftw_real tmp269;
+		    fftw_real tmp277;
+		    fftw_real tmp278;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp264 = tmp260 - tmp263;
+		    tmp269 = tmp265 + tmp268;
+		    tmp270 = K707106781 * (tmp264 + tmp269);
+		    tmp317 = K707106781 * (tmp264 - tmp269);
+		    tmp277 = tmp260 + tmp263;
+		    tmp278 = tmp268 - tmp265;
+		    tmp279 = K707106781 * (tmp277 + tmp278);
+		    tmp320 = K707106781 * (tmp278 - tmp277);
+	       }
+	  }
+	  {
+	       fftw_real tmp48;
+	       fftw_real tmp215;
+	       fftw_real tmp64;
+	       fftw_real tmp222;
+	       fftw_real tmp53;
+	       fftw_real tmp216;
+	       fftw_real tmp59;
+	       fftw_real tmp221;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp45;
+		    fftw_real tmp47;
+		    fftw_real tmp44;
+		    fftw_real tmp46;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp45 = c_re(inout[2 * iostride]);
+		    tmp47 = c_im(inout[2 * iostride]);
+		    tmp44 = c_re(W[1]);
+		    tmp46 = c_im(W[1]);
+		    tmp48 = (tmp44 * tmp45) + (tmp46 * tmp47);
+		    tmp215 = (tmp44 * tmp47) - (tmp46 * tmp45);
+	       }
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp63;
+		    fftw_real tmp60;
+		    fftw_real tmp62;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 = c_re(inout[26 * iostride]);
+		    tmp63 = c_im(inout[26 * iostride]);
+		    tmp60 = c_re(W[25]);
+		    tmp62 = c_im(W[25]);
+		    tmp64 = (tmp60 * tmp61) + (tmp62 * tmp63);
+		    tmp222 = (tmp60 * tmp63) - (tmp62 * tmp61);
+	       }
+	       {
+		    fftw_real tmp50;
+		    fftw_real tmp52;
+		    fftw_real tmp49;
+		    fftw_real tmp51;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp50 = c_re(inout[18 * iostride]);
+		    tmp52 = c_im(inout[18 * iostride]);
+		    tmp49 = c_re(W[17]);
+		    tmp51 = c_im(W[17]);
+		    tmp53 = (tmp49 * tmp50) + (tmp51 * tmp52);
+		    tmp216 = (tmp49 * tmp52) - (tmp51 * tmp50);
+	       }
+	       {
+		    fftw_real tmp56;
+		    fftw_real tmp58;
+		    fftw_real tmp55;
+		    fftw_real tmp57;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp56 = c_re(inout[10 * iostride]);
+		    tmp58 = c_im(inout[10 * iostride]);
+		    tmp55 = c_re(W[9]);
+		    tmp57 = c_im(W[9]);
+		    tmp59 = (tmp55 * tmp56) + (tmp57 * tmp58);
+		    tmp221 = (tmp55 * tmp58) - (tmp57 * tmp56);
+	       }
+	       {
+		    fftw_real tmp54;
+		    fftw_real tmp65;
+		    fftw_real tmp220;
+		    fftw_real tmp223;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp54 = tmp48 + tmp53;
+		    tmp65 = tmp59 + tmp64;
+		    tmp66 = tmp54 + tmp65;
+		    tmp395 = tmp54 - tmp65;
+		    tmp220 = tmp48 - tmp53;
+		    tmp223 = tmp221 - tmp222;
+		    tmp224 = tmp220 - tmp223;
+		    tmp312 = tmp220 + tmp223;
+	       }
+	       {
+		    fftw_real tmp355;
+		    fftw_real tmp356;
+		    fftw_real tmp217;
+		    fftw_real tmp218;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp355 = tmp215 + tmp216;
+		    tmp356 = tmp221 + tmp222;
+		    tmp357 = tmp355 + tmp356;
+		    tmp396 = tmp355 - tmp356;
+		    tmp217 = tmp215 - tmp216;
+		    tmp218 = tmp59 - tmp64;
+		    tmp219 = tmp217 + tmp218;
+		    tmp311 = tmp217 - tmp218;
+	       }
+	  }
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp245;
+	       fftw_real tmp112;
+	       fftw_real tmp230;
+	       fftw_real tmp101;
+	       fftw_real tmp246;
+	       fftw_real tmp107;
+	       fftw_real tmp229;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp93;
+		    fftw_real tmp95;
+		    fftw_real tmp92;
+		    fftw_real tmp94;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp93 = c_re(inout[iostride]);
+		    tmp95 = c_im(inout[iostride]);
+		    tmp92 = c_re(W[0]);
+		    tmp94 = c_im(W[0]);
+		    tmp96 = (tmp92 * tmp93) + (tmp94 * tmp95);
+		    tmp245 = (tmp92 * tmp95) - (tmp94 * tmp93);
+	       }
+	       {
+		    fftw_real tmp109;
+		    fftw_real tmp111;
+		    fftw_real tmp108;
+		    fftw_real tmp110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp109 = c_re(inout[25 * iostride]);
+		    tmp111 = c_im(inout[25 * iostride]);
+		    tmp108 = c_re(W[24]);
+		    tmp110 = c_im(W[24]);
+		    tmp112 = (tmp108 * tmp109) + (tmp110 * tmp111);
+		    tmp230 = (tmp108 * tmp111) - (tmp110 * tmp109);
+	       }
+	       {
+		    fftw_real tmp98;
+		    fftw_real tmp100;
+		    fftw_real tmp97;
+		    fftw_real tmp99;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp98 = c_re(inout[17 * iostride]);
+		    tmp100 = c_im(inout[17 * iostride]);
+		    tmp97 = c_re(W[16]);
+		    tmp99 = c_im(W[16]);
+		    tmp101 = (tmp97 * tmp98) + (tmp99 * tmp100);
+		    tmp246 = (tmp97 * tmp100) - (tmp99 * tmp98);
+	       }
+	       {
+		    fftw_real tmp104;
+		    fftw_real tmp106;
+		    fftw_real tmp103;
+		    fftw_real tmp105;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp104 = c_re(inout[9 * iostride]);
+		    tmp106 = c_im(inout[9 * iostride]);
+		    tmp103 = c_re(W[8]);
+		    tmp105 = c_im(W[8]);
+		    tmp107 = (tmp103 * tmp104) + (tmp105 * tmp106);
+		    tmp229 = (tmp103 * tmp106) - (tmp105 * tmp104);
+	       }
+	       {
+		    fftw_real tmp102;
+		    fftw_real tmp113;
+		    fftw_real tmp247;
+		    fftw_real tmp248;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp102 = tmp96 + tmp101;
+		    tmp113 = tmp107 + tmp112;
+		    tmp114 = tmp102 + tmp113;
+		    tmp410 = tmp102 - tmp113;
+		    tmp247 = tmp245 - tmp246;
+		    tmp248 = tmp107 - tmp112;
+		    tmp249 = tmp247 + tmp248;
+		    tmp323 = tmp247 - tmp248;
+	       }
+	       {
+		    fftw_real tmp361;
+		    fftw_real tmp362;
+		    fftw_real tmp228;
+		    fftw_real tmp231;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp361 = tmp245 + tmp246;
+		    tmp362 = tmp229 + tmp230;
+		    tmp363 = tmp361 + tmp362;
+		    tmp407 = tmp361 - tmp362;
+		    tmp228 = tmp96 - tmp101;
+		    tmp231 = tmp229 - tmp230;
+		    tmp232 = tmp228 - tmp231;
+		    tmp326 = tmp228 + tmp231;
+	       }
+	  }
+	  {
+	       fftw_real tmp71;
+	       fftw_real tmp204;
+	       fftw_real tmp87;
+	       fftw_real tmp211;
+	       fftw_real tmp76;
+	       fftw_real tmp205;
+	       fftw_real tmp82;
+	       fftw_real tmp210;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp68;
+		    fftw_real tmp70;
+		    fftw_real tmp67;
+		    fftw_real tmp69;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp68 = c_re(inout[30 * iostride]);
+		    tmp70 = c_im(inout[30 * iostride]);
+		    tmp67 = c_re(W[29]);
+		    tmp69 = c_im(W[29]);
+		    tmp71 = (tmp67 * tmp68) + (tmp69 * tmp70);
+		    tmp204 = (tmp67 * tmp70) - (tmp69 * tmp68);
+	       }
+	       {
+		    fftw_real tmp84;
+		    fftw_real tmp86;
+		    fftw_real tmp83;
+		    fftw_real tmp85;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp84 = c_re(inout[22 * iostride]);
+		    tmp86 = c_im(inout[22 * iostride]);
+		    tmp83 = c_re(W[21]);
+		    tmp85 = c_im(W[21]);
+		    tmp87 = (tmp83 * tmp84) + (tmp85 * tmp86);
+		    tmp211 = (tmp83 * tmp86) - (tmp85 * tmp84);
+	       }
+	       {
+		    fftw_real tmp73;
+		    fftw_real tmp75;
+		    fftw_real tmp72;
+		    fftw_real tmp74;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp73 = c_re(inout[14 * iostride]);
+		    tmp75 = c_im(inout[14 * iostride]);
+		    tmp72 = c_re(W[13]);
+		    tmp74 = c_im(W[13]);
+		    tmp76 = (tmp72 * tmp73) + (tmp74 * tmp75);
+		    tmp205 = (tmp72 * tmp75) - (tmp74 * tmp73);
+	       }
+	       {
+		    fftw_real tmp79;
+		    fftw_real tmp81;
+		    fftw_real tmp78;
+		    fftw_real tmp80;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp79 = c_re(inout[6 * iostride]);
+		    tmp81 = c_im(inout[6 * iostride]);
+		    tmp78 = c_re(W[5]);
+		    tmp80 = c_im(W[5]);
+		    tmp82 = (tmp78 * tmp79) + (tmp80 * tmp81);
+		    tmp210 = (tmp78 * tmp81) - (tmp80 * tmp79);
+	       }
+	       {
+		    fftw_real tmp77;
+		    fftw_real tmp88;
+		    fftw_real tmp209;
+		    fftw_real tmp212;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp77 = tmp71 + tmp76;
+		    tmp88 = tmp82 + tmp87;
+		    tmp89 = tmp77 + tmp88;
+		    tmp393 = tmp77 - tmp88;
+		    tmp209 = tmp71 - tmp76;
+		    tmp212 = tmp210 - tmp211;
+		    tmp213 = tmp209 - tmp212;
+		    tmp309 = tmp209 + tmp212;
+	       }
+	       {
+		    fftw_real tmp352;
+		    fftw_real tmp353;
+		    fftw_real tmp206;
+		    fftw_real tmp207;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp352 = tmp204 + tmp205;
+		    tmp353 = tmp210 + tmp211;
+		    tmp354 = tmp352 + tmp353;
+		    tmp392 = tmp352 - tmp353;
+		    tmp206 = tmp204 - tmp205;
+		    tmp207 = tmp82 - tmp87;
+		    tmp208 = tmp206 + tmp207;
+		    tmp308 = tmp206 - tmp207;
+	       }
+	  }
+	  {
+	       fftw_real tmp119;
+	       fftw_real tmp234;
+	       fftw_real tmp124;
+	       fftw_real tmp235;
+	       fftw_real tmp233;
+	       fftw_real tmp236;
+	       fftw_real tmp130;
+	       fftw_real tmp239;
+	       fftw_real tmp135;
+	       fftw_real tmp240;
+	       fftw_real tmp238;
+	       fftw_real tmp241;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp116;
+		    fftw_real tmp118;
+		    fftw_real tmp115;
+		    fftw_real tmp117;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp116 = c_re(inout[5 * iostride]);
+		    tmp118 = c_im(inout[5 * iostride]);
+		    tmp115 = c_re(W[4]);
+		    tmp117 = c_im(W[4]);
+		    tmp119 = (tmp115 * tmp116) + (tmp117 * tmp118);
+		    tmp234 = (tmp115 * tmp118) - (tmp117 * tmp116);
+	       }
+	       {
+		    fftw_real tmp121;
+		    fftw_real tmp123;
+		    fftw_real tmp120;
+		    fftw_real tmp122;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp121 = c_re(inout[21 * iostride]);
+		    tmp123 = c_im(inout[21 * iostride]);
+		    tmp120 = c_re(W[20]);
+		    tmp122 = c_im(W[20]);
+		    tmp124 = (tmp120 * tmp121) + (tmp122 * tmp123);
+		    tmp235 = (tmp120 * tmp123) - (tmp122 * tmp121);
+	       }
+	       tmp233 = tmp119 - tmp124;
+	       tmp236 = tmp234 - tmp235;
+	       {
+		    fftw_real tmp127;
+		    fftw_real tmp129;
+		    fftw_real tmp126;
+		    fftw_real tmp128;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp127 = c_re(inout[29 * iostride]);
+		    tmp129 = c_im(inout[29 * iostride]);
+		    tmp126 = c_re(W[28]);
+		    tmp128 = c_im(W[28]);
+		    tmp130 = (tmp126 * tmp127) + (tmp128 * tmp129);
+		    tmp239 = (tmp126 * tmp129) - (tmp128 * tmp127);
+	       }
+	       {
+		    fftw_real tmp132;
+		    fftw_real tmp134;
+		    fftw_real tmp131;
+		    fftw_real tmp133;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp132 = c_re(inout[13 * iostride]);
+		    tmp134 = c_im(inout[13 * iostride]);
+		    tmp131 = c_re(W[12]);
+		    tmp133 = c_im(W[12]);
+		    tmp135 = (tmp131 * tmp132) + (tmp133 * tmp134);
+		    tmp240 = (tmp131 * tmp134) - (tmp133 * tmp132);
+	       }
+	       tmp238 = tmp130 - tmp135;
+	       tmp241 = tmp239 - tmp240;
+	       {
+		    fftw_real tmp125;
+		    fftw_real tmp136;
+		    fftw_real tmp364;
+		    fftw_real tmp365;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp125 = tmp119 + tmp124;
+		    tmp136 = tmp130 + tmp135;
+		    tmp137 = tmp125 + tmp136;
+		    tmp408 = tmp125 - tmp136;
+		    tmp364 = tmp234 + tmp235;
+		    tmp365 = tmp239 + tmp240;
+		    tmp366 = tmp364 + tmp365;
+		    tmp411 = tmp365 - tmp364;
+	       }
+	       {
+		    fftw_real tmp237;
+		    fftw_real tmp242;
+		    fftw_real tmp250;
+		    fftw_real tmp251;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp237 = tmp233 - tmp236;
+		    tmp242 = tmp238 + tmp241;
+		    tmp243 = K707106781 * (tmp237 + tmp242);
+		    tmp324 = K707106781 * (tmp237 - tmp242);
+		    tmp250 = tmp233 + tmp236;
+		    tmp251 = tmp241 - tmp238;
+		    tmp252 = K707106781 * (tmp250 + tmp251);
+		    tmp327 = K707106781 * (tmp251 - tmp250);
+	       }
+	  }
+	  {
+	       fftw_real tmp91;
+	       fftw_real tmp383;
+	       fftw_real tmp444;
+	       fftw_real tmp446;
+	       fftw_real tmp186;
+	       fftw_real tmp445;
+	       fftw_real tmp386;
+	       fftw_real tmp435;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp43;
+		    fftw_real tmp90;
+		    fftw_real tmp436;
+		    fftw_real tmp443;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp43 = tmp19 + tmp42;
+		    tmp90 = tmp66 + tmp89;
+		    tmp91 = tmp43 + tmp90;
+		    tmp383 = tmp43 - tmp90;
+		    tmp436 = tmp357 + tmp354;
+		    tmp443 = tmp437 + tmp442;
+		    tmp444 = tmp436 + tmp443;
+		    tmp446 = tmp443 - tmp436;
+	       }
+	       {
+		    fftw_real tmp138;
+		    fftw_real tmp185;
+		    fftw_real tmp384;
+		    fftw_real tmp385;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp138 = tmp114 + tmp137;
+		    tmp185 = tmp161 + tmp184;
+		    tmp186 = tmp138 + tmp185;
+		    tmp445 = tmp138 - tmp185;
+		    tmp384 = tmp372 + tmp375;
+		    tmp385 = tmp363 + tmp366;
+		    tmp386 = tmp384 - tmp385;
+		    tmp435 = tmp385 + tmp384;
+	       }
+	       c_re(inout[16 * iostride]) = tmp91 - tmp186;
+	       c_re(inout[0]) = tmp91 + tmp186;
+	       c_re(inout[24 * iostride]) = tmp383 - tmp386;
+	       c_re(inout[8 * iostride]) = tmp383 + tmp386;
+	       c_im(inout[0]) = tmp435 + tmp444;
+	       c_im(inout[16 * iostride]) = tmp444 - tmp435;
+	       c_im(inout[8 * iostride]) = tmp445 + tmp446;
+	       c_im(inout[24 * iostride]) = tmp446 - tmp445;
+	  }
+	  {
+	       fftw_real tmp359;
+	       fftw_real tmp379;
+	       fftw_real tmp450;
+	       fftw_real tmp452;
+	       fftw_real tmp368;
+	       fftw_real tmp381;
+	       fftw_real tmp377;
+	       fftw_real tmp380;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp351;
+		    fftw_real tmp358;
+		    fftw_real tmp448;
+		    fftw_real tmp449;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp351 = tmp19 - tmp42;
+		    tmp358 = tmp354 - tmp357;
+		    tmp359 = tmp351 + tmp358;
+		    tmp379 = tmp351 - tmp358;
+		    tmp448 = tmp66 - tmp89;
+		    tmp449 = tmp442 - tmp437;
+		    tmp450 = tmp448 + tmp449;
+		    tmp452 = tmp449 - tmp448;
+	       }
+	       {
+		    fftw_real tmp360;
+		    fftw_real tmp367;
+		    fftw_real tmp369;
+		    fftw_real tmp376;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp360 = tmp114 - tmp137;
+		    tmp367 = tmp363 - tmp366;
+		    tmp368 = tmp360 - tmp367;
+		    tmp381 = tmp360 + tmp367;
+		    tmp369 = tmp161 - tmp184;
+		    tmp376 = tmp372 - tmp375;
+		    tmp377 = tmp369 + tmp376;
+		    tmp380 = tmp376 - tmp369;
+	       }
+	       {
+		    fftw_real tmp378;
+		    fftw_real tmp451;
+		    fftw_real tmp382;
+		    fftw_real tmp447;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp378 = K707106781 * (tmp368 + tmp377);
+		    c_re(inout[20 * iostride]) = tmp359 - tmp378;
+		    c_re(inout[4 * iostride]) = tmp359 + tmp378;
+		    tmp451 = K707106781 * (tmp368 - tmp377);
+		    c_im(inout[12 * iostride]) = tmp451 + tmp452;
+		    c_im(inout[28 * iostride]) = tmp452 - tmp451;
+		    tmp382 = K707106781 * (tmp380 - tmp381);
+		    c_re(inout[28 * iostride]) = tmp379 - tmp382;
+		    c_re(inout[12 * iostride]) = tmp379 + tmp382;
+		    tmp447 = K707106781 * (tmp381 + tmp380);
+		    c_im(inout[4 * iostride]) = tmp447 + tmp450;
+		    c_im(inout[20 * iostride]) = tmp450 - tmp447;
+	       }
+	  }
+	  {
+	       fftw_real tmp391;
+	       fftw_real tmp419;
+	       fftw_real tmp398;
+	       fftw_real tmp454;
+	       fftw_real tmp422;
+	       fftw_real tmp462;
+	       fftw_real tmp406;
+	       fftw_real tmp417;
+	       fftw_real tmp457;
+	       fftw_real tmp463;
+	       fftw_real tmp426;
+	       fftw_real tmp433;
+	       fftw_real tmp413;
+	       fftw_real tmp416;
+	       fftw_real tmp429;
+	       fftw_real tmp432;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp394;
+		    fftw_real tmp397;
+		    fftw_real tmp424;
+		    fftw_real tmp425;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp391 = tmp387 - tmp390;
+		    tmp419 = tmp387 + tmp390;
+		    tmp394 = tmp392 - tmp393;
+		    tmp397 = tmp395 + tmp396;
+		    tmp398 = K707106781 * (tmp394 - tmp397);
+		    tmp454 = K707106781 * (tmp397 + tmp394);
+		    {
+			 fftw_real tmp420;
+			 fftw_real tmp421;
+			 fftw_real tmp402;
+			 fftw_real tmp405;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp420 = tmp395 - tmp396;
+			 tmp421 = tmp393 + tmp392;
+			 tmp422 = K707106781 * (tmp420 + tmp421);
+			 tmp462 = K707106781 * (tmp420 - tmp421);
+			 tmp402 = tmp400 - tmp401;
+			 tmp405 = tmp403 - tmp404;
+			 tmp406 =
+			     (K382683432 * tmp402) - (K923879532 * tmp405);
+			 tmp417 =
+			     (K923879532 * tmp402) + (K382683432 * tmp405);
+		    }
+		    tmp457 = tmp455 + tmp456;
+		    tmp463 = tmp456 - tmp455;
+		    tmp424 = tmp400 + tmp401;
+		    tmp425 = tmp403 + tmp404;
+		    tmp426 = (K923879532 * tmp424) - (K382683432 * tmp425);
+		    tmp433 = (K382683432 * tmp424) + (K923879532 * tmp425);
+		    {
+			 fftw_real tmp409;
+			 fftw_real tmp412;
+			 fftw_real tmp427;
+			 fftw_real tmp428;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp409 = tmp407 - tmp408;
+			 tmp412 = tmp410 - tmp411;
+			 tmp413 =
+			     (K382683432 * tmp409) + (K923879532 * tmp412);
+			 tmp416 =
+			     (K382683432 * tmp412) - (K923879532 * tmp409);
+			 tmp427 = tmp407 + tmp408;
+			 tmp428 = tmp410 + tmp411;
+			 tmp429 =
+			     (K923879532 * tmp427) + (K382683432 * tmp428);
+			 tmp432 =
+			     (K923879532 * tmp428) - (K382683432 * tmp427);
+		    }
+	       }
+	       {
+		    fftw_real tmp399;
+		    fftw_real tmp414;
+		    fftw_real tmp415;
+		    fftw_real tmp418;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp399 = tmp391 - tmp398;
+		    tmp414 = tmp406 - tmp413;
+		    c_re(inout[30 * iostride]) = tmp399 - tmp414;
+		    c_re(inout[14 * iostride]) = tmp399 + tmp414;
+		    tmp415 = tmp391 + tmp398;
+		    tmp418 = tmp416 + tmp417;
+		    c_re(inout[22 * iostride]) = tmp415 - tmp418;
+		    c_re(inout[6 * iostride]) = tmp415 + tmp418;
+	       }
+	       {
+		    fftw_real tmp465;
+		    fftw_real tmp466;
+		    fftw_real tmp461;
+		    fftw_real tmp464;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp465 = tmp416 - tmp417;
+		    tmp466 = tmp463 - tmp462;
+		    c_im(inout[14 * iostride]) = tmp465 + tmp466;
+		    c_im(inout[30 * iostride]) = tmp466 - tmp465;
+		    tmp461 = tmp413 + tmp406;
+		    tmp464 = tmp462 + tmp463;
+		    c_im(inout[6 * iostride]) = tmp461 + tmp464;
+		    c_im(inout[22 * iostride]) = tmp464 - tmp461;
+	       }
+	       {
+		    fftw_real tmp423;
+		    fftw_real tmp430;
+		    fftw_real tmp431;
+		    fftw_real tmp434;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp423 = tmp419 - tmp422;
+		    tmp430 = tmp426 - tmp429;
+		    c_re(inout[26 * iostride]) = tmp423 - tmp430;
+		    c_re(inout[10 * iostride]) = tmp423 + tmp430;
+		    tmp431 = tmp419 + tmp422;
+		    tmp434 = tmp432 + tmp433;
+		    c_re(inout[18 * iostride]) = tmp431 - tmp434;
+		    c_re(inout[2 * iostride]) = tmp431 + tmp434;
+	       }
+	       {
+		    fftw_real tmp459;
+		    fftw_real tmp460;
+		    fftw_real tmp453;
+		    fftw_real tmp458;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp459 = tmp432 - tmp433;
+		    tmp460 = tmp457 - tmp454;
+		    c_im(inout[10 * iostride]) = tmp459 + tmp460;
+		    c_im(inout[26 * iostride]) = tmp460 - tmp459;
+		    tmp453 = tmp429 + tmp426;
+		    tmp458 = tmp454 + tmp457;
+		    c_im(inout[2 * iostride]) = tmp453 + tmp458;
+		    c_im(inout[18 * iostride]) = tmp458 - tmp453;
+	       }
+	  }
+	  {
+	       fftw_real tmp307;
+	       fftw_real tmp335;
+	       fftw_real tmp338;
+	       fftw_real tmp492;
+	       fftw_real tmp487;
+	       fftw_real tmp493;
+	       fftw_real tmp314;
+	       fftw_real tmp484;
+	       fftw_real tmp322;
+	       fftw_real tmp333;
+	       fftw_real tmp342;
+	       fftw_real tmp349;
+	       fftw_real tmp329;
+	       fftw_real tmp332;
+	       fftw_real tmp345;
+	       fftw_real tmp348;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp306;
+		    fftw_real tmp336;
+		    fftw_real tmp337;
+		    fftw_real tmp485;
+		    fftw_real tmp310;
+		    fftw_real tmp313;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp306 = K707106781 * (tmp304 - tmp305);
+		    tmp307 = tmp303 - tmp306;
+		    tmp335 = tmp303 + tmp306;
+		    tmp336 = (K382683432 * tmp312) - (K923879532 * tmp311);
+		    tmp337 = (K923879532 * tmp308) + (K382683432 * tmp309);
+		    tmp338 = tmp336 + tmp337;
+		    tmp492 = tmp336 - tmp337;
+		    tmp485 = K707106781 * (tmp196 - tmp201);
+		    tmp487 = tmp485 + tmp486;
+		    tmp493 = tmp486 - tmp485;
+		    tmp310 = (K382683432 * tmp308) - (K923879532 * tmp309);
+		    tmp313 = (K382683432 * tmp311) + (K923879532 * tmp312);
+		    tmp314 = tmp310 - tmp313;
+		    tmp484 = tmp313 + tmp310;
+	       }
+	       {
+		    fftw_real tmp318;
+		    fftw_real tmp321;
+		    fftw_real tmp340;
+		    fftw_real tmp341;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp318 = tmp316 - tmp317;
+		    tmp321 = tmp319 - tmp320;
+		    tmp322 = (K195090322 * tmp318) - (K980785280 * tmp321);
+		    tmp333 = (K980785280 * tmp318) + (K195090322 * tmp321);
+		    tmp340 = tmp316 + tmp317;
+		    tmp341 = tmp319 + tmp320;
+		    tmp342 = (K831469612 * tmp340) - (K555570233 * tmp341);
+		    tmp349 = (K555570233 * tmp340) + (K831469612 * tmp341);
+	       }
+	       {
+		    fftw_real tmp325;
+		    fftw_real tmp328;
+		    fftw_real tmp343;
+		    fftw_real tmp344;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp325 = tmp323 - tmp324;
+		    tmp328 = tmp326 - tmp327;
+		    tmp329 = (K195090322 * tmp325) + (K980785280 * tmp328);
+		    tmp332 = (K195090322 * tmp328) - (K980785280 * tmp325);
+		    tmp343 = tmp323 + tmp324;
+		    tmp344 = tmp326 + tmp327;
+		    tmp345 = (K831469612 * tmp343) + (K555570233 * tmp344);
+		    tmp348 = (K831469612 * tmp344) - (K555570233 * tmp343);
+	       }
+	       {
+		    fftw_real tmp315;
+		    fftw_real tmp330;
+		    fftw_real tmp331;
+		    fftw_real tmp334;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp315 = tmp307 - tmp314;
+		    tmp330 = tmp322 - tmp329;
+		    c_re(inout[31 * iostride]) = tmp315 - tmp330;
+		    c_re(inout[15 * iostride]) = tmp315 + tmp330;
+		    tmp331 = tmp307 + tmp314;
+		    tmp334 = tmp332 + tmp333;
+		    c_re(inout[23 * iostride]) = tmp331 - tmp334;
+		    c_re(inout[7 * iostride]) = tmp331 + tmp334;
+	       }
+	       {
+		    fftw_real tmp495;
+		    fftw_real tmp496;
+		    fftw_real tmp491;
+		    fftw_real tmp494;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp495 = tmp332 - tmp333;
+		    tmp496 = tmp493 - tmp492;
+		    c_im(inout[15 * iostride]) = tmp495 + tmp496;
+		    c_im(inout[31 * iostride]) = tmp496 - tmp495;
+		    tmp491 = tmp329 + tmp322;
+		    tmp494 = tmp492 + tmp493;
+		    c_im(inout[7 * iostride]) = tmp491 + tmp494;
+		    c_im(inout[23 * iostride]) = tmp494 - tmp491;
+	       }
+	       {
+		    fftw_real tmp339;
+		    fftw_real tmp346;
+		    fftw_real tmp347;
+		    fftw_real tmp350;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp339 = tmp335 - tmp338;
+		    tmp346 = tmp342 - tmp345;
+		    c_re(inout[27 * iostride]) = tmp339 - tmp346;
+		    c_re(inout[11 * iostride]) = tmp339 + tmp346;
+		    tmp347 = tmp335 + tmp338;
+		    tmp350 = tmp348 + tmp349;
+		    c_re(inout[19 * iostride]) = tmp347 - tmp350;
+		    c_re(inout[3 * iostride]) = tmp347 + tmp350;
+	       }
+	       {
+		    fftw_real tmp489;
+		    fftw_real tmp490;
+		    fftw_real tmp483;
+		    fftw_real tmp488;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp489 = tmp348 - tmp349;
+		    tmp490 = tmp487 - tmp484;
+		    c_im(inout[11 * iostride]) = tmp489 + tmp490;
+		    c_im(inout[27 * iostride]) = tmp490 - tmp489;
+		    tmp483 = tmp345 + tmp342;
+		    tmp488 = tmp484 + tmp487;
+		    c_im(inout[3 * iostride]) = tmp483 + tmp488;
+		    c_im(inout[19 * iostride]) = tmp488 - tmp483;
+	       }
+	  }
+	  {
+	       fftw_real tmp203;
+	       fftw_real tmp287;
+	       fftw_real tmp290;
+	       fftw_real tmp478;
+	       fftw_real tmp473;
+	       fftw_real tmp479;
+	       fftw_real tmp226;
+	       fftw_real tmp468;
+	       fftw_real tmp254;
+	       fftw_real tmp285;
+	       fftw_real tmp294;
+	       fftw_real tmp301;
+	       fftw_real tmp281;
+	       fftw_real tmp284;
+	       fftw_real tmp297;
+	       fftw_real tmp300;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp202;
+		    fftw_real tmp288;
+		    fftw_real tmp289;
+		    fftw_real tmp469;
+		    fftw_real tmp214;
+		    fftw_real tmp225;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp202 = K707106781 * (tmp196 + tmp201);
+		    tmp203 = tmp191 - tmp202;
+		    tmp287 = tmp191 + tmp202;
+		    tmp288 = (K923879532 * tmp224) - (K382683432 * tmp219);
+		    tmp289 = (K382683432 * tmp208) + (K923879532 * tmp213);
+		    tmp290 = tmp288 + tmp289;
+		    tmp478 = tmp288 - tmp289;
+		    tmp469 = K707106781 * (tmp305 + tmp304);
+		    tmp473 = tmp469 + tmp472;
+		    tmp479 = tmp472 - tmp469;
+		    tmp214 = (K923879532 * tmp208) - (K382683432 * tmp213);
+		    tmp225 = (K923879532 * tmp219) + (K382683432 * tmp224);
+		    tmp226 = tmp214 - tmp225;
+		    tmp468 = tmp225 + tmp214;
+	       }
+	       {
+		    fftw_real tmp244;
+		    fftw_real tmp253;
+		    fftw_real tmp292;
+		    fftw_real tmp293;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp244 = tmp232 - tmp243;
+		    tmp253 = tmp249 - tmp252;
+		    tmp254 = (K555570233 * tmp244) - (K831469612 * tmp253);
+		    tmp285 = (K831469612 * tmp244) + (K555570233 * tmp253);
+		    tmp292 = tmp232 + tmp243;
+		    tmp293 = tmp249 + tmp252;
+		    tmp294 = (K980785280 * tmp292) - (K195090322 * tmp293);
+		    tmp301 = (K195090322 * tmp292) + (K980785280 * tmp293);
+	       }
+	       {
+		    fftw_real tmp271;
+		    fftw_real tmp280;
+		    fftw_real tmp295;
+		    fftw_real tmp296;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp271 = tmp259 - tmp270;
+		    tmp280 = tmp276 - tmp279;
+		    tmp281 = (K555570233 * tmp271) + (K831469612 * tmp280);
+		    tmp284 = (K555570233 * tmp280) - (K831469612 * tmp271);
+		    tmp295 = tmp259 + tmp270;
+		    tmp296 = tmp276 + tmp279;
+		    tmp297 = (K980785280 * tmp295) + (K195090322 * tmp296);
+		    tmp300 = (K980785280 * tmp296) - (K195090322 * tmp295);
+	       }
+	       {
+		    fftw_real tmp227;
+		    fftw_real tmp282;
+		    fftw_real tmp283;
+		    fftw_real tmp286;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp227 = tmp203 + tmp226;
+		    tmp282 = tmp254 + tmp281;
+		    c_re(inout[21 * iostride]) = tmp227 - tmp282;
+		    c_re(inout[5 * iostride]) = tmp227 + tmp282;
+		    tmp283 = tmp203 - tmp226;
+		    tmp286 = tmp284 - tmp285;
+		    c_re(inout[29 * iostride]) = tmp283 - tmp286;
+		    c_re(inout[13 * iostride]) = tmp283 + tmp286;
+	       }
+	       {
+		    fftw_real tmp477;
+		    fftw_real tmp480;
+		    fftw_real tmp481;
+		    fftw_real tmp482;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp477 = tmp285 + tmp284;
+		    tmp480 = tmp478 + tmp479;
+		    c_im(inout[5 * iostride]) = tmp477 + tmp480;
+		    c_im(inout[21 * iostride]) = tmp480 - tmp477;
+		    tmp481 = tmp254 - tmp281;
+		    tmp482 = tmp479 - tmp478;
+		    c_im(inout[13 * iostride]) = tmp481 + tmp482;
+		    c_im(inout[29 * iostride]) = tmp482 - tmp481;
+	       }
+	       {
+		    fftw_real tmp291;
+		    fftw_real tmp298;
+		    fftw_real tmp299;
+		    fftw_real tmp302;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp291 = tmp287 + tmp290;
+		    tmp298 = tmp294 + tmp297;
+		    c_re(inout[17 * iostride]) = tmp291 - tmp298;
+		    c_re(inout[iostride]) = tmp291 + tmp298;
+		    tmp299 = tmp287 - tmp290;
+		    tmp302 = tmp300 - tmp301;
+		    c_re(inout[25 * iostride]) = tmp299 - tmp302;
+		    c_re(inout[9 * iostride]) = tmp299 + tmp302;
+	       }
+	       {
+		    fftw_real tmp467;
+		    fftw_real tmp474;
+		    fftw_real tmp475;
+		    fftw_real tmp476;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp467 = tmp301 + tmp300;
+		    tmp474 = tmp468 + tmp473;
+		    c_im(inout[iostride]) = tmp467 + tmp474;
+		    c_im(inout[17 * iostride]) = tmp474 - tmp467;
+		    tmp475 = tmp294 - tmp297;
+		    tmp476 = tmp473 - tmp468;
+		    c_im(inout[9 * iostride]) = tmp475 + tmp476;
+		    c_im(inout[25 * iostride]) = tmp476 - tmp475;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] =
+    { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+fftw_codelet_desc fftwi_twiddle_32_desc = {
+     "fftwi_twiddle_32",
+     (void (*)()) fftwi_twiddle_32,
+     32,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     715,
+     31,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_4.c b/src/fftw/ftwi_4.c
new file mode 100644
index 0000000..ee5eedb
--- /dev/null
+++ b/src/fftw/ftwi_4.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:25 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 4 */
+
+/*
+ * This function contains 22 FP additions, 12 FP multiplications,
+ * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
+ * 14 stack variables, and 16 memory accesses
+ */
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_4.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_4(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 3) {
+	  fftw_real tmp1;
+	  fftw_real tmp25;
+	  fftw_real tmp6;
+	  fftw_real tmp24;
+	  fftw_real tmp12;
+	  fftw_real tmp20;
+	  fftw_real tmp17;
+	  fftw_real tmp21;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp25 = c_im(inout[0]);
+	  {
+	       fftw_real tmp3;
+	       fftw_real tmp5;
+	       fftw_real tmp2;
+	       fftw_real tmp4;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp3 = c_re(inout[2 * iostride]);
+	       tmp5 = c_im(inout[2 * iostride]);
+	       tmp2 = c_re(W[1]);
+	       tmp4 = c_im(W[1]);
+	       tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+	       tmp24 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	  }
+	  {
+	       fftw_real tmp9;
+	       fftw_real tmp11;
+	       fftw_real tmp8;
+	       fftw_real tmp10;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp9 = c_re(inout[iostride]);
+	       tmp11 = c_im(inout[iostride]);
+	       tmp8 = c_re(W[0]);
+	       tmp10 = c_im(W[0]);
+	       tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+	       tmp20 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	  }
+	  {
+	       fftw_real tmp14;
+	       fftw_real tmp16;
+	       fftw_real tmp13;
+	       fftw_real tmp15;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp14 = c_re(inout[3 * iostride]);
+	       tmp16 = c_im(inout[3 * iostride]);
+	       tmp13 = c_re(W[2]);
+	       tmp15 = c_im(W[2]);
+	       tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+	       tmp21 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	  }
+	  {
+	       fftw_real tmp7;
+	       fftw_real tmp18;
+	       fftw_real tmp27;
+	       fftw_real tmp28;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp7 = tmp1 + tmp6;
+	       tmp18 = tmp12 + tmp17;
+	       c_re(inout[2 * iostride]) = tmp7 - tmp18;
+	       c_re(inout[0]) = tmp7 + tmp18;
+	       tmp27 = tmp12 - tmp17;
+	       tmp28 = tmp25 - tmp24;
+	       c_im(inout[iostride]) = tmp27 + tmp28;
+	       c_im(inout[3 * iostride]) = tmp28 - tmp27;
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp26;
+	       fftw_real tmp19;
+	       fftw_real tmp22;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp23 = tmp20 + tmp21;
+	       tmp26 = tmp24 + tmp25;
+	       c_im(inout[0]) = tmp23 + tmp26;
+	       c_im(inout[2 * iostride]) = tmp26 - tmp23;
+	       tmp19 = tmp1 - tmp6;
+	       tmp22 = tmp20 - tmp21;
+	       c_re(inout[iostride]) = tmp19 - tmp22;
+	       c_re(inout[3 * iostride]) = tmp19 + tmp22;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3 };
+fftw_codelet_desc fftwi_twiddle_4_desc = {
+     "fftwi_twiddle_4",
+     (void (*)()) fftwi_twiddle_4,
+     4,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     99,
+     3,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_5.c b/src/fftw/ftwi_5.c
new file mode 100644
index 0000000..c27b152
--- /dev/null
+++ b/src/fftw/ftwi_5.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:25 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 5 */
+
+/*
+ * This function contains 40 FP additions, 28 FP multiplications,
+ * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
+ * 26 stack variables, and 20 memory accesses
+ */
+static const fftw_real K559016994 =
+FFTW_KONST(+0.559016994374947424102293417182819058860154590);
+static const fftw_real K250000000 =
+FFTW_KONST(+0.250000000000000000000000000000000000000000000);
+static const fftw_real K951056516 =
+FFTW_KONST(+0.951056516295153572116439333379382143405698634);
+static const fftw_real K587785252 =
+FFTW_KONST(+0.587785252292473129168705954639072768597652438);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_5.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_5(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 4) {
+	  fftw_real tmp1;
+	  fftw_real tmp40;
+	  fftw_real tmp30;
+	  fftw_real tmp33;
+	  fftw_real tmp37;
+	  fftw_real tmp38;
+	  fftw_real tmp39;
+	  fftw_real tmp42;
+	  fftw_real tmp41;
+	  fftw_real tmp12;
+	  fftw_real tmp23;
+	  fftw_real tmp24;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp40 = c_im(inout[0]);
+	  {
+	       fftw_real tmp6;
+	       fftw_real tmp28;
+	       fftw_real tmp22;
+	       fftw_real tmp32;
+	       fftw_real tmp11;
+	       fftw_real tmp29;
+	       fftw_real tmp17;
+	       fftw_real tmp31;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[iostride]);
+		    tmp5 = c_im(inout[iostride]);
+		    tmp2 = c_re(W[0]);
+		    tmp4 = c_im(W[0]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp28 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       {
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    fftw_real tmp18;
+		    fftw_real tmp20;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp19 = c_re(inout[3 * iostride]);
+		    tmp21 = c_im(inout[3 * iostride]);
+		    tmp18 = c_re(W[2]);
+		    tmp20 = c_im(W[2]);
+		    tmp22 = (tmp18 * tmp19) + (tmp20 * tmp21);
+		    tmp32 = (tmp18 * tmp21) - (tmp20 * tmp19);
+	       }
+	       {
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    fftw_real tmp7;
+		    fftw_real tmp9;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp8 = c_re(inout[4 * iostride]);
+		    tmp10 = c_im(inout[4 * iostride]);
+		    tmp7 = c_re(W[3]);
+		    tmp9 = c_im(W[3]);
+		    tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10);
+		    tmp29 = (tmp7 * tmp10) - (tmp9 * tmp8);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[2 * iostride]);
+		    tmp16 = c_im(inout[2 * iostride]);
+		    tmp13 = c_re(W[1]);
+		    tmp15 = c_im(W[1]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp31 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       tmp30 = tmp28 - tmp29;
+	       tmp33 = tmp31 - tmp32;
+	       tmp37 = tmp28 + tmp29;
+	       tmp38 = tmp31 + tmp32;
+	       tmp39 = tmp37 + tmp38;
+	       tmp42 = tmp17 - tmp22;
+	       tmp41 = tmp6 - tmp11;
+	       tmp12 = tmp6 + tmp11;
+	       tmp23 = tmp17 + tmp22;
+	       tmp24 = tmp12 + tmp23;
+	  }
+	  c_re(inout[0]) = tmp1 + tmp24;
+	  {
+	       fftw_real tmp34;
+	       fftw_real tmp36;
+	       fftw_real tmp27;
+	       fftw_real tmp35;
+	       fftw_real tmp25;
+	       fftw_real tmp26;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp34 = (K587785252 * tmp30) - (K951056516 * tmp33);
+	       tmp36 = (K951056516 * tmp30) + (K587785252 * tmp33);
+	       tmp25 = tmp1 - (K250000000 * tmp24);
+	       tmp26 = K559016994 * (tmp12 - tmp23);
+	       tmp27 = tmp25 - tmp26;
+	       tmp35 = tmp26 + tmp25;
+	       c_re(inout[2 * iostride]) = tmp27 - tmp34;
+	       c_re(inout[3 * iostride]) = tmp27 + tmp34;
+	       c_re(inout[iostride]) = tmp35 - tmp36;
+	       c_re(inout[4 * iostride]) = tmp35 + tmp36;
+	  }
+	  c_im(inout[0]) = tmp39 + tmp40;
+	  {
+	       fftw_real tmp43;
+	       fftw_real tmp47;
+	       fftw_real tmp46;
+	       fftw_real tmp48;
+	       fftw_real tmp44;
+	       fftw_real tmp45;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp43 = (K951056516 * tmp41) + (K587785252 * tmp42);
+	       tmp47 = (K587785252 * tmp41) - (K951056516 * tmp42);
+	       tmp44 = K559016994 * (tmp37 - tmp38);
+	       tmp45 = tmp40 - (K250000000 * tmp39);
+	       tmp46 = tmp44 + tmp45;
+	       tmp48 = tmp45 - tmp44;
+	       c_im(inout[iostride]) = tmp43 + tmp46;
+	       c_im(inout[4 * iostride]) = tmp46 - tmp43;
+	       c_im(inout[2 * iostride]) = tmp47 + tmp48;
+	       c_im(inout[3 * iostride]) = tmp48 - tmp47;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4 };
+fftw_codelet_desc fftwi_twiddle_5_desc = {
+     "fftwi_twiddle_5",
+     (void (*)()) fftwi_twiddle_5,
+     5,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     121,
+     4,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_6.c b/src/fftw/ftwi_6.c
new file mode 100644
index 0000000..c53b117
--- /dev/null
+++ b/src/fftw/ftwi_6.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:26 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 6 */
+
+/*
+ * This function contains 46 FP additions, 28 FP multiplications,
+ * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
+ * 22 stack variables, and 24 memory accesses
+ */
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_6.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_6(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 5) {
+	  fftw_real tmp7;
+	  fftw_real tmp31;
+	  fftw_real tmp50;
+	  fftw_real tmp54;
+	  fftw_real tmp29;
+	  fftw_real tmp33;
+	  fftw_real tmp38;
+	  fftw_real tmp44;
+	  fftw_real tmp18;
+	  fftw_real tmp32;
+	  fftw_real tmp41;
+	  fftw_real tmp45;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp49;
+	       fftw_real tmp6;
+	       fftw_real tmp48;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp49 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[3 * iostride]);
+		    tmp5 = c_im(inout[3 * iostride]);
+		    tmp2 = c_re(W[2]);
+		    tmp4 = c_im(W[2]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp48 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       tmp7 = tmp1 - tmp6;
+	       tmp31 = tmp1 + tmp6;
+	       tmp50 = tmp48 + tmp49;
+	       tmp54 = tmp49 - tmp48;
+	  }
+	  {
+	       fftw_real tmp23;
+	       fftw_real tmp36;
+	       fftw_real tmp28;
+	       fftw_real tmp37;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp20 = c_re(inout[4 * iostride]);
+		    tmp22 = c_im(inout[4 * iostride]);
+		    tmp19 = c_re(W[3]);
+		    tmp21 = c_im(W[3]);
+		    tmp23 = (tmp19 * tmp20) + (tmp21 * tmp22);
+		    tmp36 = (tmp19 * tmp22) - (tmp21 * tmp20);
+	       }
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[iostride]);
+		    tmp27 = c_im(inout[iostride]);
+		    tmp24 = c_re(W[0]);
+		    tmp26 = c_im(W[0]);
+		    tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27);
+		    tmp37 = (tmp24 * tmp27) - (tmp26 * tmp25);
+	       }
+	       tmp29 = tmp23 - tmp28;
+	       tmp33 = tmp23 + tmp28;
+	       tmp38 = tmp36 - tmp37;
+	       tmp44 = tmp36 + tmp37;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp39;
+	       fftw_real tmp17;
+	       fftw_real tmp40;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[2 * iostride]);
+		    tmp11 = c_im(inout[2 * iostride]);
+		    tmp8 = c_re(W[1]);
+		    tmp10 = c_im(W[1]);
+		    tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+		    tmp39 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[5 * iostride]);
+		    tmp16 = c_im(inout[5 * iostride]);
+		    tmp13 = c_re(W[4]);
+		    tmp15 = c_im(W[4]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp40 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       tmp18 = tmp12 - tmp17;
+	       tmp32 = tmp12 + tmp17;
+	       tmp41 = tmp39 - tmp40;
+	       tmp45 = tmp39 + tmp40;
+	  }
+	  {
+	       fftw_real tmp42;
+	       fftw_real tmp30;
+	       fftw_real tmp35;
+	       fftw_real tmp53;
+	       fftw_real tmp55;
+	       fftw_real tmp56;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp42 = K866025403 * (tmp38 - tmp41);
+	       tmp30 = tmp18 + tmp29;
+	       tmp35 = tmp7 - (K500000000 * tmp30);
+	       c_re(inout[3 * iostride]) = tmp7 + tmp30;
+	       c_re(inout[iostride]) = tmp35 + tmp42;
+	       c_re(inout[5 * iostride]) = tmp35 - tmp42;
+	       tmp53 = K866025403 * (tmp18 - tmp29);
+	       tmp55 = tmp41 + tmp38;
+	       tmp56 = tmp54 - (K500000000 * tmp55);
+	       c_im(inout[iostride]) = tmp53 + tmp56;
+	       c_im(inout[5 * iostride]) = tmp56 - tmp53;
+	       c_im(inout[3 * iostride]) = tmp55 + tmp54;
+	  }
+	  {
+	       fftw_real tmp46;
+	       fftw_real tmp34;
+	       fftw_real tmp43;
+	       fftw_real tmp52;
+	       fftw_real tmp47;
+	       fftw_real tmp51;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp46 = K866025403 * (tmp44 - tmp45);
+	       tmp34 = tmp32 + tmp33;
+	       tmp43 = tmp31 - (K500000000 * tmp34);
+	       c_re(inout[0]) = tmp31 + tmp34;
+	       c_re(inout[4 * iostride]) = tmp43 + tmp46;
+	       c_re(inout[2 * iostride]) = tmp43 - tmp46;
+	       tmp52 = K866025403 * (tmp32 - tmp33);
+	       tmp47 = tmp45 + tmp44;
+	       tmp51 = tmp50 - (K500000000 * tmp47);
+	       c_im(inout[0]) = tmp47 + tmp50;
+	       c_im(inout[4 * iostride]) = tmp52 + tmp51;
+	       c_im(inout[2 * iostride]) = tmp51 - tmp52;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5 };
+fftw_codelet_desc fftwi_twiddle_6_desc = {
+     "fftwi_twiddle_6",
+     (void (*)()) fftwi_twiddle_6,
+     6,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     143,
+     5,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_64.c b/src/fftw/ftwi_64.c
new file mode 100644
index 0000000..3c35cb9
--- /dev/null
+++ b/src/fftw/ftwi_64.c
@@ -0,0 +1,3207 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:40 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 64 */
+
+/*
+ * This function contains 1038 FP additions, 500 FP multiplications,
+ * (or, 808 additions, 270 multiplications, 230 fused multiply/add),
+ * 162 stack variables, and 256 memory accesses
+ */
+static const fftw_real K098017140 =
+FFTW_KONST(+0.098017140329560601994195563888641845861136673);
+static const fftw_real K995184726 =
+FFTW_KONST(+0.995184726672196886244836953109479921575474869);
+static const fftw_real K773010453 =
+FFTW_KONST(+0.773010453362736960810906609758469800971041293);
+static const fftw_real K634393284 =
+FFTW_KONST(+0.634393284163645498215171613225493370675687095);
+static const fftw_real K195090322 =
+FFTW_KONST(+0.195090322016128267848284868477022240927691618);
+static const fftw_real K980785280 =
+FFTW_KONST(+0.980785280403230449126182236134239036973933731);
+static const fftw_real K471396736 =
+FFTW_KONST(+0.471396736825997648556387625905254377657460319);
+static const fftw_real K881921264 =
+FFTW_KONST(+0.881921264348355029712756863660388349508442621);
+static const fftw_real K956940335 =
+FFTW_KONST(+0.956940335732208864935797886980269969482849206);
+static const fftw_real K290284677 =
+FFTW_KONST(+0.290284677254462367636192375817395274691476278);
+static const fftw_real K831469612 =
+FFTW_KONST(+0.831469612302545237078788377617905756738560812);
+static const fftw_real K555570233 =
+FFTW_KONST(+0.555570233019602224742830813948532874374937191);
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+static const fftw_real K382683432 =
+FFTW_KONST(+0.382683432365089771728459984030398866761344562);
+static const fftw_real K923879532 =
+FFTW_KONST(+0.923879532511286756128183189396788286822416626);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_64.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_64(fftw_complex *A, const fftw_complex *W, int iostride,
+		      int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 63) {
+	  fftw_real tmp19;
+	  fftw_real tmp791;
+	  fftw_real tmp1109;
+	  fftw_real tmp1139;
+	  fftw_real tmp1047;
+	  fftw_real tmp1077;
+	  fftw_real tmp383;
+	  fftw_real tmp655;
+	  fftw_real tmp66;
+	  fftw_real tmp800;
+	  fftw_real tmp909;
+	  fftw_real tmp993;
+	  fftw_real tmp417;
+	  fftw_real tmp608;
+	  fftw_real tmp665;
+	  fftw_real tmp744;
+	  fftw_real tmp42;
+	  fftw_real tmp1076;
+	  fftw_real tmp794;
+	  fftw_real tmp1042;
+	  fftw_real tmp394;
+	  fftw_real tmp1138;
+	  fftw_real tmp658;
+	  fftw_real tmp1106;
+	  fftw_real tmp329;
+	  fftw_real tmp1007;
+	  fftw_real tmp863;
+	  fftw_real tmp923;
+	  fftw_real tmp976;
+	  fftw_real tmp1004;
+	  fftw_real tmp880;
+	  fftw_real tmp920;
+	  fftw_real tmp535;
+	  fftw_real tmp703;
+	  fftw_real tmp576;
+	  fftw_real tmp714;
+	  fftw_real tmp579;
+	  fftw_real tmp704;
+	  fftw_real tmp546;
+	  fftw_real tmp715;
+	  fftw_real tmp376;
+	  fftw_real tmp1005;
+	  fftw_real tmp868;
+	  fftw_real tmp881;
+	  fftw_real tmp979;
+	  fftw_real tmp1008;
+	  fftw_real tmp873;
+	  fftw_real tmp882;
+	  fftw_real tmp558;
+	  fftw_real tmp582;
+	  fftw_real tmp708;
+	  fftw_real tmp718;
+	  fftw_real tmp569;
+	  fftw_real tmp581;
+	  fftw_real tmp711;
+	  fftw_real tmp717;
+	  fftw_real tmp89;
+	  fftw_real tmp805;
+	  fftw_real tmp908;
+	  fftw_real tmp992;
+	  fftw_real tmp406;
+	  fftw_real tmp609;
+	  fftw_real tmp662;
+	  fftw_real tmp745;
+	  fftw_real tmp161;
+	  fftw_real tmp184;
+	  fftw_real tmp997;
+	  fftw_real tmp812;
+	  fftw_real tmp912;
+	  fftw_real tmp956;
+	  fftw_real tmp957;
+	  fftw_real tmp996;
+	  fftw_real tmp817;
+	  fftw_real tmp913;
+	  fftw_real tmp424;
+	  fftw_real tmp668;
+	  fftw_real tmp441;
+	  fftw_real tmp671;
+	  fftw_real tmp444;
+	  fftw_real tmp669;
+	  fftw_real tmp435;
+	  fftw_real tmp672;
+	  fftw_real tmp114;
+	  fftw_real tmp137;
+	  fftw_real tmp999;
+	  fftw_real tmp823;
+	  fftw_real tmp915;
+	  fftw_real tmp959;
+	  fftw_real tmp960;
+	  fftw_real tmp1000;
+	  fftw_real tmp828;
+	  fftw_real tmp916;
+	  fftw_real tmp451;
+	  fftw_real tmp678;
+	  fftw_real tmp468;
+	  fftw_real tmp675;
+	  fftw_real tmp471;
+	  fftw_real tmp679;
+	  fftw_real tmp462;
+	  fftw_real tmp676;
+	  fftw_real tmp234;
+	  fftw_real tmp1014;
+	  fftw_real tmp836;
+	  fftw_real tmp930;
+	  fftw_real tmp967;
+	  fftw_real tmp1011;
+	  fftw_real tmp853;
+	  fftw_real tmp927;
+	  fftw_real tmp480;
+	  fftw_real tmp684;
+	  fftw_real tmp521;
+	  fftw_real tmp695;
+	  fftw_real tmp524;
+	  fftw_real tmp685;
+	  fftw_real tmp491;
+	  fftw_real tmp696;
+	  fftw_real tmp281;
+	  fftw_real tmp1012;
+	  fftw_real tmp841;
+	  fftw_real tmp854;
+	  fftw_real tmp970;
+	  fftw_real tmp1015;
+	  fftw_real tmp846;
+	  fftw_real tmp855;
+	  fftw_real tmp503;
+	  fftw_real tmp527;
+	  fftw_real tmp689;
+	  fftw_real tmp699;
+	  fftw_real tmp514;
+	  fftw_real tmp526;
+	  fftw_real tmp692;
+	  fftw_real tmp698;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp1045;
+	       fftw_real tmp6;
+	       fftw_real tmp1044;
+	       fftw_real tmp12;
+	       fftw_real tmp380;
+	       fftw_real tmp17;
+	       fftw_real tmp381;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp1045 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[32 * iostride]);
+		    tmp5 = c_im(inout[32 * iostride]);
+		    tmp2 = c_re(W[31]);
+		    tmp4 = c_im(W[31]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp1044 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[16 * iostride]);
+		    tmp11 = c_im(inout[16 * iostride]);
+		    tmp8 = c_re(W[15]);
+		    tmp10 = c_im(W[15]);
+		    tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+		    tmp380 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[48 * iostride]);
+		    tmp16 = c_im(inout[48 * iostride]);
+		    tmp13 = c_re(W[47]);
+		    tmp15 = c_im(W[47]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp381 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       {
+		    fftw_real tmp7;
+		    fftw_real tmp18;
+		    fftw_real tmp1107;
+		    fftw_real tmp1108;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp7 = tmp1 + tmp6;
+		    tmp18 = tmp12 + tmp17;
+		    tmp19 = tmp7 + tmp18;
+		    tmp791 = tmp7 - tmp18;
+		    tmp1107 = tmp12 - tmp17;
+		    tmp1108 = tmp1045 - tmp1044;
+		    tmp1109 = tmp1107 + tmp1108;
+		    tmp1139 = tmp1108 - tmp1107;
+	       }
+	       {
+		    fftw_real tmp1043;
+		    fftw_real tmp1046;
+		    fftw_real tmp379;
+		    fftw_real tmp382;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1043 = tmp380 + tmp381;
+		    tmp1046 = tmp1044 + tmp1045;
+		    tmp1047 = tmp1043 + tmp1046;
+		    tmp1077 = tmp1046 - tmp1043;
+		    tmp379 = tmp1 - tmp6;
+		    tmp382 = tmp380 - tmp381;
+		    tmp383 = tmp379 - tmp382;
+		    tmp655 = tmp379 + tmp382;
+	       }
+	  }
+	  {
+	       fftw_real tmp54;
+	       fftw_real tmp412;
+	       fftw_real tmp409;
+	       fftw_real tmp797;
+	       fftw_real tmp65;
+	       fftw_real tmp410;
+	       fftw_real tmp415;
+	       fftw_real tmp798;
+	       fftw_real tmp796;
+	       fftw_real tmp799;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp48;
+		    fftw_real tmp407;
+		    fftw_real tmp53;
+		    fftw_real tmp408;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp45;
+			 fftw_real tmp47;
+			 fftw_real tmp44;
+			 fftw_real tmp46;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp45 = c_re(inout[4 * iostride]);
+			 tmp47 = c_im(inout[4 * iostride]);
+			 tmp44 = c_re(W[3]);
+			 tmp46 = c_im(W[3]);
+			 tmp48 = (tmp44 * tmp45) + (tmp46 * tmp47);
+			 tmp407 = (tmp44 * tmp47) - (tmp46 * tmp45);
+		    }
+		    {
+			 fftw_real tmp50;
+			 fftw_real tmp52;
+			 fftw_real tmp49;
+			 fftw_real tmp51;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp50 = c_re(inout[36 * iostride]);
+			 tmp52 = c_im(inout[36 * iostride]);
+			 tmp49 = c_re(W[35]);
+			 tmp51 = c_im(W[35]);
+			 tmp53 = (tmp49 * tmp50) + (tmp51 * tmp52);
+			 tmp408 = (tmp49 * tmp52) - (tmp51 * tmp50);
+		    }
+		    tmp54 = tmp48 + tmp53;
+		    tmp412 = tmp48 - tmp53;
+		    tmp409 = tmp407 - tmp408;
+		    tmp797 = tmp407 + tmp408;
+	       }
+	       {
+		    fftw_real tmp59;
+		    fftw_real tmp413;
+		    fftw_real tmp64;
+		    fftw_real tmp414;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp56;
+			 fftw_real tmp58;
+			 fftw_real tmp55;
+			 fftw_real tmp57;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp56 = c_re(inout[20 * iostride]);
+			 tmp58 = c_im(inout[20 * iostride]);
+			 tmp55 = c_re(W[19]);
+			 tmp57 = c_im(W[19]);
+			 tmp59 = (tmp55 * tmp56) + (tmp57 * tmp58);
+			 tmp413 = (tmp55 * tmp58) - (tmp57 * tmp56);
+		    }
+		    {
+			 fftw_real tmp61;
+			 fftw_real tmp63;
+			 fftw_real tmp60;
+			 fftw_real tmp62;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp61 = c_re(inout[52 * iostride]);
+			 tmp63 = c_im(inout[52 * iostride]);
+			 tmp60 = c_re(W[51]);
+			 tmp62 = c_im(W[51]);
+			 tmp64 = (tmp60 * tmp61) + (tmp62 * tmp63);
+			 tmp414 = (tmp60 * tmp63) - (tmp62 * tmp61);
+		    }
+		    tmp65 = tmp59 + tmp64;
+		    tmp410 = tmp59 - tmp64;
+		    tmp415 = tmp413 - tmp414;
+		    tmp798 = tmp413 + tmp414;
+	       }
+	       tmp66 = tmp54 + tmp65;
+	       tmp796 = tmp54 - tmp65;
+	       tmp799 = tmp797 - tmp798;
+	       tmp800 = tmp796 - tmp799;
+	       tmp909 = tmp796 + tmp799;
+	       tmp993 = tmp797 + tmp798;
+	       {
+		    fftw_real tmp411;
+		    fftw_real tmp416;
+		    fftw_real tmp663;
+		    fftw_real tmp664;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp411 = tmp409 + tmp410;
+		    tmp416 = tmp412 - tmp415;
+		    tmp417 = (K923879532 * tmp411) + (K382683432 * tmp416);
+		    tmp608 = (K923879532 * tmp416) - (K382683432 * tmp411);
+		    tmp663 = tmp409 - tmp410;
+		    tmp664 = tmp412 + tmp415;
+		    tmp665 = (K382683432 * tmp663) + (K923879532 * tmp664);
+		    tmp744 = (K382683432 * tmp664) - (K923879532 * tmp663);
+	       }
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp385;
+	       fftw_real tmp29;
+	       fftw_real tmp386;
+	       fftw_real tmp384;
+	       fftw_real tmp387;
+	       fftw_real tmp35;
+	       fftw_real tmp390;
+	       fftw_real tmp40;
+	       fftw_real tmp391;
+	       fftw_real tmp389;
+	       fftw_real tmp392;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[8 * iostride]);
+		    tmp23 = c_im(inout[8 * iostride]);
+		    tmp20 = c_re(W[7]);
+		    tmp22 = c_im(W[7]);
+		    tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23);
+		    tmp385 = (tmp20 * tmp23) - (tmp22 * tmp21);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[40 * iostride]);
+		    tmp28 = c_im(inout[40 * iostride]);
+		    tmp25 = c_re(W[39]);
+		    tmp27 = c_im(W[39]);
+		    tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28);
+		    tmp386 = (tmp25 * tmp28) - (tmp27 * tmp26);
+	       }
+	       tmp384 = tmp24 - tmp29;
+	       tmp387 = tmp385 - tmp386;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[56 * iostride]);
+		    tmp34 = c_im(inout[56 * iostride]);
+		    tmp31 = c_re(W[55]);
+		    tmp33 = c_im(W[55]);
+		    tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34);
+		    tmp390 = (tmp31 * tmp34) - (tmp33 * tmp32);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[24 * iostride]);
+		    tmp39 = c_im(inout[24 * iostride]);
+		    tmp36 = c_re(W[23]);
+		    tmp38 = c_im(W[23]);
+		    tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39);
+		    tmp391 = (tmp36 * tmp39) - (tmp38 * tmp37);
+	       }
+	       tmp389 = tmp35 - tmp40;
+	       tmp392 = tmp390 - tmp391;
+	       {
+		    fftw_real tmp30;
+		    fftw_real tmp41;
+		    fftw_real tmp792;
+		    fftw_real tmp793;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = tmp24 + tmp29;
+		    tmp41 = tmp35 + tmp40;
+		    tmp42 = tmp30 + tmp41;
+		    tmp1076 = tmp30 - tmp41;
+		    tmp792 = tmp390 + tmp391;
+		    tmp793 = tmp385 + tmp386;
+		    tmp794 = tmp792 - tmp793;
+		    tmp1042 = tmp793 + tmp792;
+	       }
+	       {
+		    fftw_real tmp388;
+		    fftw_real tmp393;
+		    fftw_real tmp656;
+		    fftw_real tmp657;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp388 = tmp384 - tmp387;
+		    tmp393 = tmp389 + tmp392;
+		    tmp394 = K707106781 * (tmp388 + tmp393);
+		    tmp1138 = K707106781 * (tmp388 - tmp393);
+		    tmp656 = tmp392 - tmp389;
+		    tmp657 = tmp384 + tmp387;
+		    tmp658 = K707106781 * (tmp656 - tmp657);
+		    tmp1106 = K707106781 * (tmp657 + tmp656);
+	       }
+	  }
+	  {
+	       fftw_real tmp287;
+	       fftw_real tmp572;
+	       fftw_real tmp292;
+	       fftw_real tmp573;
+	       fftw_real tmp293;
+	       fftw_real tmp876;
+	       fftw_real tmp327;
+	       fftw_real tmp541;
+	       fftw_real tmp544;
+	       fftw_real tmp860;
+	       fftw_real tmp298;
+	       fftw_real tmp532;
+	       fftw_real tmp303;
+	       fftw_real tmp533;
+	       fftw_real tmp304;
+	       fftw_real tmp877;
+	       fftw_real tmp316;
+	       fftw_real tmp536;
+	       fftw_real tmp539;
+	       fftw_real tmp861;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp284;
+		    fftw_real tmp286;
+		    fftw_real tmp283;
+		    fftw_real tmp285;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp284 = c_re(inout[63 * iostride]);
+		    tmp286 = c_im(inout[63 * iostride]);
+		    tmp283 = c_re(W[62]);
+		    tmp285 = c_im(W[62]);
+		    tmp287 = (tmp283 * tmp284) + (tmp285 * tmp286);
+		    tmp572 = (tmp283 * tmp286) - (tmp285 * tmp284);
+	       }
+	       {
+		    fftw_real tmp289;
+		    fftw_real tmp291;
+		    fftw_real tmp288;
+		    fftw_real tmp290;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp289 = c_re(inout[31 * iostride]);
+		    tmp291 = c_im(inout[31 * iostride]);
+		    tmp288 = c_re(W[30]);
+		    tmp290 = c_im(W[30]);
+		    tmp292 = (tmp288 * tmp289) + (tmp290 * tmp291);
+		    tmp573 = (tmp288 * tmp291) - (tmp290 * tmp289);
+	       }
+	       tmp293 = tmp287 + tmp292;
+	       tmp876 = tmp572 + tmp573;
+	       {
+		    fftw_real tmp321;
+		    fftw_real tmp542;
+		    fftw_real tmp326;
+		    fftw_real tmp543;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp318;
+			 fftw_real tmp320;
+			 fftw_real tmp317;
+			 fftw_real tmp319;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp318 = c_re(inout[55 * iostride]);
+			 tmp320 = c_im(inout[55 * iostride]);
+			 tmp317 = c_re(W[54]);
+			 tmp319 = c_im(W[54]);
+			 tmp321 = (tmp317 * tmp318) + (tmp319 * tmp320);
+			 tmp542 = (tmp317 * tmp320) - (tmp319 * tmp318);
+		    }
+		    {
+			 fftw_real tmp323;
+			 fftw_real tmp325;
+			 fftw_real tmp322;
+			 fftw_real tmp324;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp323 = c_re(inout[23 * iostride]);
+			 tmp325 = c_im(inout[23 * iostride]);
+			 tmp322 = c_re(W[22]);
+			 tmp324 = c_im(W[22]);
+			 tmp326 = (tmp322 * tmp323) + (tmp324 * tmp325);
+			 tmp543 = (tmp322 * tmp325) - (tmp324 * tmp323);
+		    }
+		    tmp327 = tmp321 + tmp326;
+		    tmp541 = tmp321 - tmp326;
+		    tmp544 = tmp542 - tmp543;
+		    tmp860 = tmp542 + tmp543;
+	       }
+	       {
+		    fftw_real tmp295;
+		    fftw_real tmp297;
+		    fftw_real tmp294;
+		    fftw_real tmp296;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp295 = c_re(inout[15 * iostride]);
+		    tmp297 = c_im(inout[15 * iostride]);
+		    tmp294 = c_re(W[14]);
+		    tmp296 = c_im(W[14]);
+		    tmp298 = (tmp294 * tmp295) + (tmp296 * tmp297);
+		    tmp532 = (tmp294 * tmp297) - (tmp296 * tmp295);
+	       }
+	       {
+		    fftw_real tmp300;
+		    fftw_real tmp302;
+		    fftw_real tmp299;
+		    fftw_real tmp301;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp300 = c_re(inout[47 * iostride]);
+		    tmp302 = c_im(inout[47 * iostride]);
+		    tmp299 = c_re(W[46]);
+		    tmp301 = c_im(W[46]);
+		    tmp303 = (tmp299 * tmp300) + (tmp301 * tmp302);
+		    tmp533 = (tmp299 * tmp302) - (tmp301 * tmp300);
+	       }
+	       tmp304 = tmp298 + tmp303;
+	       tmp877 = tmp532 + tmp533;
+	       {
+		    fftw_real tmp310;
+		    fftw_real tmp537;
+		    fftw_real tmp315;
+		    fftw_real tmp538;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp307;
+			 fftw_real tmp309;
+			 fftw_real tmp306;
+			 fftw_real tmp308;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp307 = c_re(inout[7 * iostride]);
+			 tmp309 = c_im(inout[7 * iostride]);
+			 tmp306 = c_re(W[6]);
+			 tmp308 = c_im(W[6]);
+			 tmp310 = (tmp306 * tmp307) + (tmp308 * tmp309);
+			 tmp537 = (tmp306 * tmp309) - (tmp308 * tmp307);
+		    }
+		    {
+			 fftw_real tmp312;
+			 fftw_real tmp314;
+			 fftw_real tmp311;
+			 fftw_real tmp313;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp312 = c_re(inout[39 * iostride]);
+			 tmp314 = c_im(inout[39 * iostride]);
+			 tmp311 = c_re(W[38]);
+			 tmp313 = c_im(W[38]);
+			 tmp315 = (tmp311 * tmp312) + (tmp313 * tmp314);
+			 tmp538 = (tmp311 * tmp314) - (tmp313 * tmp312);
+		    }
+		    tmp316 = tmp310 + tmp315;
+		    tmp536 = tmp310 - tmp315;
+		    tmp539 = tmp537 - tmp538;
+		    tmp861 = tmp537 + tmp538;
+	       }
+	       {
+		    fftw_real tmp305;
+		    fftw_real tmp328;
+		    fftw_real tmp859;
+		    fftw_real tmp862;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp305 = tmp293 + tmp304;
+		    tmp328 = tmp316 + tmp327;
+		    tmp329 = tmp305 + tmp328;
+		    tmp1007 = tmp305 - tmp328;
+		    tmp859 = tmp293 - tmp304;
+		    tmp862 = tmp860 - tmp861;
+		    tmp863 = tmp859 + tmp862;
+		    tmp923 = tmp859 - tmp862;
+	       }
+	       {
+		    fftw_real tmp974;
+		    fftw_real tmp975;
+		    fftw_real tmp878;
+		    fftw_real tmp879;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp974 = tmp876 + tmp877;
+		    tmp975 = tmp861 + tmp860;
+		    tmp976 = tmp974 + tmp975;
+		    tmp1004 = tmp974 - tmp975;
+		    tmp878 = tmp876 - tmp877;
+		    tmp879 = tmp316 - tmp327;
+		    tmp880 = tmp878 + tmp879;
+		    tmp920 = tmp878 - tmp879;
+	       }
+	       {
+		    fftw_real tmp531;
+		    fftw_real tmp534;
+		    fftw_real tmp574;
+		    fftw_real tmp575;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp531 = tmp287 - tmp292;
+		    tmp534 = tmp532 - tmp533;
+		    tmp535 = tmp531 - tmp534;
+		    tmp703 = tmp531 + tmp534;
+		    tmp574 = tmp572 - tmp573;
+		    tmp575 = tmp298 - tmp303;
+		    tmp576 = tmp574 + tmp575;
+		    tmp714 = tmp574 - tmp575;
+	       }
+	       {
+		    fftw_real tmp577;
+		    fftw_real tmp578;
+		    fftw_real tmp540;
+		    fftw_real tmp545;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp577 = tmp536 + tmp539;
+		    tmp578 = tmp544 - tmp541;
+		    tmp579 = K707106781 * (tmp577 + tmp578);
+		    tmp704 = K707106781 * (tmp578 - tmp577);
+		    tmp540 = tmp536 - tmp539;
+		    tmp545 = tmp541 + tmp544;
+		    tmp546 = K707106781 * (tmp540 + tmp545);
+		    tmp715 = K707106781 * (tmp540 - tmp545);
+	       }
+	  }
+	  {
+	       fftw_real tmp340;
+	       fftw_real tmp564;
+	       fftw_real tmp561;
+	       fftw_real tmp865;
+	       fftw_real tmp374;
+	       fftw_real tmp551;
+	       fftw_real tmp556;
+	       fftw_real tmp871;
+	       fftw_real tmp351;
+	       fftw_real tmp562;
+	       fftw_real tmp567;
+	       fftw_real tmp866;
+	       fftw_real tmp363;
+	       fftw_real tmp553;
+	       fftw_real tmp550;
+	       fftw_real tmp870;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp334;
+		    fftw_real tmp559;
+		    fftw_real tmp339;
+		    fftw_real tmp560;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp331;
+			 fftw_real tmp333;
+			 fftw_real tmp330;
+			 fftw_real tmp332;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp331 = c_re(inout[3 * iostride]);
+			 tmp333 = c_im(inout[3 * iostride]);
+			 tmp330 = c_re(W[2]);
+			 tmp332 = c_im(W[2]);
+			 tmp334 = (tmp330 * tmp331) + (tmp332 * tmp333);
+			 tmp559 = (tmp330 * tmp333) - (tmp332 * tmp331);
+		    }
+		    {
+			 fftw_real tmp336;
+			 fftw_real tmp338;
+			 fftw_real tmp335;
+			 fftw_real tmp337;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp336 = c_re(inout[35 * iostride]);
+			 tmp338 = c_im(inout[35 * iostride]);
+			 tmp335 = c_re(W[34]);
+			 tmp337 = c_im(W[34]);
+			 tmp339 = (tmp335 * tmp336) + (tmp337 * tmp338);
+			 tmp560 = (tmp335 * tmp338) - (tmp337 * tmp336);
+		    }
+		    tmp340 = tmp334 + tmp339;
+		    tmp564 = tmp334 - tmp339;
+		    tmp561 = tmp559 - tmp560;
+		    tmp865 = tmp559 + tmp560;
+	       }
+	       {
+		    fftw_real tmp368;
+		    fftw_real tmp554;
+		    fftw_real tmp373;
+		    fftw_real tmp555;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp365;
+			 fftw_real tmp367;
+			 fftw_real tmp364;
+			 fftw_real tmp366;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp365 = c_re(inout[11 * iostride]);
+			 tmp367 = c_im(inout[11 * iostride]);
+			 tmp364 = c_re(W[10]);
+			 tmp366 = c_im(W[10]);
+			 tmp368 = (tmp364 * tmp365) + (tmp366 * tmp367);
+			 tmp554 = (tmp364 * tmp367) - (tmp366 * tmp365);
+		    }
+		    {
+			 fftw_real tmp370;
+			 fftw_real tmp372;
+			 fftw_real tmp369;
+			 fftw_real tmp371;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp370 = c_re(inout[43 * iostride]);
+			 tmp372 = c_im(inout[43 * iostride]);
+			 tmp369 = c_re(W[42]);
+			 tmp371 = c_im(W[42]);
+			 tmp373 = (tmp369 * tmp370) + (tmp371 * tmp372);
+			 tmp555 = (tmp369 * tmp372) - (tmp371 * tmp370);
+		    }
+		    tmp374 = tmp368 + tmp373;
+		    tmp551 = tmp368 - tmp373;
+		    tmp556 = tmp554 - tmp555;
+		    tmp871 = tmp554 + tmp555;
+	       }
+	       {
+		    fftw_real tmp345;
+		    fftw_real tmp565;
+		    fftw_real tmp350;
+		    fftw_real tmp566;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp342;
+			 fftw_real tmp344;
+			 fftw_real tmp341;
+			 fftw_real tmp343;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp342 = c_re(inout[19 * iostride]);
+			 tmp344 = c_im(inout[19 * iostride]);
+			 tmp341 = c_re(W[18]);
+			 tmp343 = c_im(W[18]);
+			 tmp345 = (tmp341 * tmp342) + (tmp343 * tmp344);
+			 tmp565 = (tmp341 * tmp344) - (tmp343 * tmp342);
+		    }
+		    {
+			 fftw_real tmp347;
+			 fftw_real tmp349;
+			 fftw_real tmp346;
+			 fftw_real tmp348;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp347 = c_re(inout[51 * iostride]);
+			 tmp349 = c_im(inout[51 * iostride]);
+			 tmp346 = c_re(W[50]);
+			 tmp348 = c_im(W[50]);
+			 tmp350 = (tmp346 * tmp347) + (tmp348 * tmp349);
+			 tmp566 = (tmp346 * tmp349) - (tmp348 * tmp347);
+		    }
+		    tmp351 = tmp345 + tmp350;
+		    tmp562 = tmp345 - tmp350;
+		    tmp567 = tmp565 - tmp566;
+		    tmp866 = tmp565 + tmp566;
+	       }
+	       {
+		    fftw_real tmp357;
+		    fftw_real tmp548;
+		    fftw_real tmp362;
+		    fftw_real tmp549;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp354;
+			 fftw_real tmp356;
+			 fftw_real tmp353;
+			 fftw_real tmp355;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp354 = c_re(inout[59 * iostride]);
+			 tmp356 = c_im(inout[59 * iostride]);
+			 tmp353 = c_re(W[58]);
+			 tmp355 = c_im(W[58]);
+			 tmp357 = (tmp353 * tmp354) + (tmp355 * tmp356);
+			 tmp548 = (tmp353 * tmp356) - (tmp355 * tmp354);
+		    }
+		    {
+			 fftw_real tmp359;
+			 fftw_real tmp361;
+			 fftw_real tmp358;
+			 fftw_real tmp360;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp359 = c_re(inout[27 * iostride]);
+			 tmp361 = c_im(inout[27 * iostride]);
+			 tmp358 = c_re(W[26]);
+			 tmp360 = c_im(W[26]);
+			 tmp362 = (tmp358 * tmp359) + (tmp360 * tmp361);
+			 tmp549 = (tmp358 * tmp361) - (tmp360 * tmp359);
+		    }
+		    tmp363 = tmp357 + tmp362;
+		    tmp553 = tmp357 - tmp362;
+		    tmp550 = tmp548 - tmp549;
+		    tmp870 = tmp548 + tmp549;
+	       }
+	       {
+		    fftw_real tmp352;
+		    fftw_real tmp375;
+		    fftw_real tmp864;
+		    fftw_real tmp867;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp352 = tmp340 + tmp351;
+		    tmp375 = tmp363 + tmp374;
+		    tmp376 = tmp352 + tmp375;
+		    tmp1005 = tmp352 - tmp375;
+		    tmp864 = tmp340 - tmp351;
+		    tmp867 = tmp865 - tmp866;
+		    tmp868 = tmp864 - tmp867;
+		    tmp881 = tmp864 + tmp867;
+	       }
+	       {
+		    fftw_real tmp977;
+		    fftw_real tmp978;
+		    fftw_real tmp869;
+		    fftw_real tmp872;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp977 = tmp865 + tmp866;
+		    tmp978 = tmp870 + tmp871;
+		    tmp979 = tmp977 + tmp978;
+		    tmp1008 = tmp978 - tmp977;
+		    tmp869 = tmp363 - tmp374;
+		    tmp872 = tmp870 - tmp871;
+		    tmp873 = tmp869 + tmp872;
+		    tmp882 = tmp872 - tmp869;
+	       }
+	       {
+		    fftw_real tmp552;
+		    fftw_real tmp557;
+		    fftw_real tmp706;
+		    fftw_real tmp707;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp552 = tmp550 + tmp551;
+		    tmp557 = tmp553 - tmp556;
+		    tmp558 = (K923879532 * tmp552) - (K382683432 * tmp557);
+		    tmp582 = (K382683432 * tmp552) + (K923879532 * tmp557);
+		    tmp706 = tmp550 - tmp551;
+		    tmp707 = tmp553 + tmp556;
+		    tmp708 = (K382683432 * tmp706) - (K923879532 * tmp707);
+		    tmp718 = (K923879532 * tmp706) + (K382683432 * tmp707);
+	       }
+	       {
+		    fftw_real tmp563;
+		    fftw_real tmp568;
+		    fftw_real tmp709;
+		    fftw_real tmp710;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp563 = tmp561 + tmp562;
+		    tmp568 = tmp564 - tmp567;
+		    tmp569 = (K923879532 * tmp563) + (K382683432 * tmp568);
+		    tmp581 = (K923879532 * tmp568) - (K382683432 * tmp563);
+		    tmp709 = tmp561 - tmp562;
+		    tmp710 = tmp564 + tmp567;
+		    tmp711 = (K382683432 * tmp709) + (K923879532 * tmp710);
+		    tmp717 = (K382683432 * tmp710) - (K923879532 * tmp709);
+	       }
+	  }
+	  {
+	       fftw_real tmp77;
+	       fftw_real tmp401;
+	       fftw_real tmp398;
+	       fftw_real tmp802;
+	       fftw_real tmp88;
+	       fftw_real tmp399;
+	       fftw_real tmp404;
+	       fftw_real tmp803;
+	       fftw_real tmp801;
+	       fftw_real tmp804;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp71;
+		    fftw_real tmp396;
+		    fftw_real tmp76;
+		    fftw_real tmp397;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp68;
+			 fftw_real tmp70;
+			 fftw_real tmp67;
+			 fftw_real tmp69;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp68 = c_re(inout[60 * iostride]);
+			 tmp70 = c_im(inout[60 * iostride]);
+			 tmp67 = c_re(W[59]);
+			 tmp69 = c_im(W[59]);
+			 tmp71 = (tmp67 * tmp68) + (tmp69 * tmp70);
+			 tmp396 = (tmp67 * tmp70) - (tmp69 * tmp68);
+		    }
+		    {
+			 fftw_real tmp73;
+			 fftw_real tmp75;
+			 fftw_real tmp72;
+			 fftw_real tmp74;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp73 = c_re(inout[28 * iostride]);
+			 tmp75 = c_im(inout[28 * iostride]);
+			 tmp72 = c_re(W[27]);
+			 tmp74 = c_im(W[27]);
+			 tmp76 = (tmp72 * tmp73) + (tmp74 * tmp75);
+			 tmp397 = (tmp72 * tmp75) - (tmp74 * tmp73);
+		    }
+		    tmp77 = tmp71 + tmp76;
+		    tmp401 = tmp71 - tmp76;
+		    tmp398 = tmp396 - tmp397;
+		    tmp802 = tmp396 + tmp397;
+	       }
+	       {
+		    fftw_real tmp82;
+		    fftw_real tmp402;
+		    fftw_real tmp87;
+		    fftw_real tmp403;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp79;
+			 fftw_real tmp81;
+			 fftw_real tmp78;
+			 fftw_real tmp80;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp79 = c_re(inout[12 * iostride]);
+			 tmp81 = c_im(inout[12 * iostride]);
+			 tmp78 = c_re(W[11]);
+			 tmp80 = c_im(W[11]);
+			 tmp82 = (tmp78 * tmp79) + (tmp80 * tmp81);
+			 tmp402 = (tmp78 * tmp81) - (tmp80 * tmp79);
+		    }
+		    {
+			 fftw_real tmp84;
+			 fftw_real tmp86;
+			 fftw_real tmp83;
+			 fftw_real tmp85;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp84 = c_re(inout[44 * iostride]);
+			 tmp86 = c_im(inout[44 * iostride]);
+			 tmp83 = c_re(W[43]);
+			 tmp85 = c_im(W[43]);
+			 tmp87 = (tmp83 * tmp84) + (tmp85 * tmp86);
+			 tmp403 = (tmp83 * tmp86) - (tmp85 * tmp84);
+		    }
+		    tmp88 = tmp82 + tmp87;
+		    tmp399 = tmp82 - tmp87;
+		    tmp404 = tmp402 - tmp403;
+		    tmp803 = tmp402 + tmp403;
+	       }
+	       tmp89 = tmp77 + tmp88;
+	       tmp801 = tmp77 - tmp88;
+	       tmp804 = tmp802 - tmp803;
+	       tmp805 = tmp801 + tmp804;
+	       tmp908 = tmp804 - tmp801;
+	       tmp992 = tmp802 + tmp803;
+	       {
+		    fftw_real tmp400;
+		    fftw_real tmp405;
+		    fftw_real tmp660;
+		    fftw_real tmp661;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp400 = tmp398 + tmp399;
+		    tmp405 = tmp401 - tmp404;
+		    tmp406 = (K923879532 * tmp400) - (K382683432 * tmp405);
+		    tmp609 = (K382683432 * tmp400) + (K923879532 * tmp405);
+		    tmp660 = tmp398 - tmp399;
+		    tmp661 = tmp401 + tmp404;
+		    tmp662 = (K382683432 * tmp660) - (K923879532 * tmp661);
+		    tmp745 = (K923879532 * tmp660) + (K382683432 * tmp661);
+	       }
+	  }
+	  {
+	       fftw_real tmp143;
+	       fftw_real tmp420;
+	       fftw_real tmp148;
+	       fftw_real tmp421;
+	       fftw_real tmp149;
+	       fftw_real tmp808;
+	       fftw_real tmp183;
+	       fftw_real tmp433;
+	       fftw_real tmp432;
+	       fftw_real tmp814;
+	       fftw_real tmp154;
+	       fftw_real tmp438;
+	       fftw_real tmp159;
+	       fftw_real tmp439;
+	       fftw_real tmp160;
+	       fftw_real tmp809;
+	       fftw_real tmp172;
+	       fftw_real tmp425;
+	       fftw_real tmp428;
+	       fftw_real tmp815;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp140;
+		    fftw_real tmp142;
+		    fftw_real tmp139;
+		    fftw_real tmp141;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp140 = c_re(inout[62 * iostride]);
+		    tmp142 = c_im(inout[62 * iostride]);
+		    tmp139 = c_re(W[61]);
+		    tmp141 = c_im(W[61]);
+		    tmp143 = (tmp139 * tmp140) + (tmp141 * tmp142);
+		    tmp420 = (tmp139 * tmp142) - (tmp141 * tmp140);
+	       }
+	       {
+		    fftw_real tmp145;
+		    fftw_real tmp147;
+		    fftw_real tmp144;
+		    fftw_real tmp146;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp145 = c_re(inout[30 * iostride]);
+		    tmp147 = c_im(inout[30 * iostride]);
+		    tmp144 = c_re(W[29]);
+		    tmp146 = c_im(W[29]);
+		    tmp148 = (tmp144 * tmp145) + (tmp146 * tmp147);
+		    tmp421 = (tmp144 * tmp147) - (tmp146 * tmp145);
+	       }
+	       tmp149 = tmp143 + tmp148;
+	       tmp808 = tmp420 + tmp421;
+	       {
+		    fftw_real tmp177;
+		    fftw_real tmp430;
+		    fftw_real tmp182;
+		    fftw_real tmp431;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp174;
+			 fftw_real tmp176;
+			 fftw_real tmp173;
+			 fftw_real tmp175;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp174 = c_re(inout[54 * iostride]);
+			 tmp176 = c_im(inout[54 * iostride]);
+			 tmp173 = c_re(W[53]);
+			 tmp175 = c_im(W[53]);
+			 tmp177 = (tmp173 * tmp174) + (tmp175 * tmp176);
+			 tmp430 = (tmp173 * tmp176) - (tmp175 * tmp174);
+		    }
+		    {
+			 fftw_real tmp179;
+			 fftw_real tmp181;
+			 fftw_real tmp178;
+			 fftw_real tmp180;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp179 = c_re(inout[22 * iostride]);
+			 tmp181 = c_im(inout[22 * iostride]);
+			 tmp178 = c_re(W[21]);
+			 tmp180 = c_im(W[21]);
+			 tmp182 = (tmp178 * tmp179) + (tmp180 * tmp181);
+			 tmp431 = (tmp178 * tmp181) - (tmp180 * tmp179);
+		    }
+		    tmp183 = tmp177 + tmp182;
+		    tmp433 = tmp177 - tmp182;
+		    tmp432 = tmp430 - tmp431;
+		    tmp814 = tmp430 + tmp431;
+	       }
+	       {
+		    fftw_real tmp151;
+		    fftw_real tmp153;
+		    fftw_real tmp150;
+		    fftw_real tmp152;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp151 = c_re(inout[14 * iostride]);
+		    tmp153 = c_im(inout[14 * iostride]);
+		    tmp150 = c_re(W[13]);
+		    tmp152 = c_im(W[13]);
+		    tmp154 = (tmp150 * tmp151) + (tmp152 * tmp153);
+		    tmp438 = (tmp150 * tmp153) - (tmp152 * tmp151);
+	       }
+	       {
+		    fftw_real tmp156;
+		    fftw_real tmp158;
+		    fftw_real tmp155;
+		    fftw_real tmp157;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp156 = c_re(inout[46 * iostride]);
+		    tmp158 = c_im(inout[46 * iostride]);
+		    tmp155 = c_re(W[45]);
+		    tmp157 = c_im(W[45]);
+		    tmp159 = (tmp155 * tmp156) + (tmp157 * tmp158);
+		    tmp439 = (tmp155 * tmp158) - (tmp157 * tmp156);
+	       }
+	       tmp160 = tmp154 + tmp159;
+	       tmp809 = tmp438 + tmp439;
+	       {
+		    fftw_real tmp166;
+		    fftw_real tmp426;
+		    fftw_real tmp171;
+		    fftw_real tmp427;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp163;
+			 fftw_real tmp165;
+			 fftw_real tmp162;
+			 fftw_real tmp164;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp163 = c_re(inout[6 * iostride]);
+			 tmp165 = c_im(inout[6 * iostride]);
+			 tmp162 = c_re(W[5]);
+			 tmp164 = c_im(W[5]);
+			 tmp166 = (tmp162 * tmp163) + (tmp164 * tmp165);
+			 tmp426 = (tmp162 * tmp165) - (tmp164 * tmp163);
+		    }
+		    {
+			 fftw_real tmp168;
+			 fftw_real tmp170;
+			 fftw_real tmp167;
+			 fftw_real tmp169;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp168 = c_re(inout[38 * iostride]);
+			 tmp170 = c_im(inout[38 * iostride]);
+			 tmp167 = c_re(W[37]);
+			 tmp169 = c_im(W[37]);
+			 tmp171 = (tmp167 * tmp168) + (tmp169 * tmp170);
+			 tmp427 = (tmp167 * tmp170) - (tmp169 * tmp168);
+		    }
+		    tmp172 = tmp166 + tmp171;
+		    tmp425 = tmp166 - tmp171;
+		    tmp428 = tmp426 - tmp427;
+		    tmp815 = tmp426 + tmp427;
+	       }
+	       {
+		    fftw_real tmp810;
+		    fftw_real tmp811;
+		    fftw_real tmp813;
+		    fftw_real tmp816;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp161 = tmp149 + tmp160;
+		    tmp184 = tmp172 + tmp183;
+		    tmp997 = tmp161 - tmp184;
+		    tmp810 = tmp808 - tmp809;
+		    tmp811 = tmp172 - tmp183;
+		    tmp812 = tmp810 + tmp811;
+		    tmp912 = tmp810 - tmp811;
+		    tmp956 = tmp808 + tmp809;
+		    tmp957 = tmp815 + tmp814;
+		    tmp996 = tmp956 - tmp957;
+		    tmp813 = tmp149 - tmp160;
+		    tmp816 = tmp814 - tmp815;
+		    tmp817 = tmp813 + tmp816;
+		    tmp913 = tmp813 - tmp816;
+	       }
+	       {
+		    fftw_real tmp422;
+		    fftw_real tmp423;
+		    fftw_real tmp437;
+		    fftw_real tmp440;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp422 = tmp420 - tmp421;
+		    tmp423 = tmp154 - tmp159;
+		    tmp424 = tmp422 + tmp423;
+		    tmp668 = tmp422 - tmp423;
+		    tmp437 = tmp143 - tmp148;
+		    tmp440 = tmp438 - tmp439;
+		    tmp441 = tmp437 - tmp440;
+		    tmp671 = tmp437 + tmp440;
+	       }
+	       {
+		    fftw_real tmp442;
+		    fftw_real tmp443;
+		    fftw_real tmp429;
+		    fftw_real tmp434;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp442 = tmp425 - tmp428;
+		    tmp443 = tmp433 + tmp432;
+		    tmp444 = K707106781 * (tmp442 + tmp443);
+		    tmp669 = K707106781 * (tmp442 - tmp443);
+		    tmp429 = tmp425 + tmp428;
+		    tmp434 = tmp432 - tmp433;
+		    tmp435 = K707106781 * (tmp429 + tmp434);
+		    tmp672 = K707106781 * (tmp434 - tmp429);
+	       }
+	  }
+	  {
+	       fftw_real tmp96;
+	       fftw_real tmp464;
+	       fftw_real tmp101;
+	       fftw_real tmp465;
+	       fftw_real tmp102;
+	       fftw_real tmp819;
+	       fftw_real tmp136;
+	       fftw_real tmp457;
+	       fftw_real tmp460;
+	       fftw_real tmp825;
+	       fftw_real tmp107;
+	       fftw_real tmp448;
+	       fftw_real tmp112;
+	       fftw_real tmp449;
+	       fftw_real tmp113;
+	       fftw_real tmp820;
+	       fftw_real tmp125;
+	       fftw_real tmp452;
+	       fftw_real tmp455;
+	       fftw_real tmp826;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp93;
+		    fftw_real tmp95;
+		    fftw_real tmp92;
+		    fftw_real tmp94;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp93 = c_re(inout[2 * iostride]);
+		    tmp95 = c_im(inout[2 * iostride]);
+		    tmp92 = c_re(W[1]);
+		    tmp94 = c_im(W[1]);
+		    tmp96 = (tmp92 * tmp93) + (tmp94 * tmp95);
+		    tmp464 = (tmp92 * tmp95) - (tmp94 * tmp93);
+	       }
+	       {
+		    fftw_real tmp98;
+		    fftw_real tmp100;
+		    fftw_real tmp97;
+		    fftw_real tmp99;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp98 = c_re(inout[34 * iostride]);
+		    tmp100 = c_im(inout[34 * iostride]);
+		    tmp97 = c_re(W[33]);
+		    tmp99 = c_im(W[33]);
+		    tmp101 = (tmp97 * tmp98) + (tmp99 * tmp100);
+		    tmp465 = (tmp97 * tmp100) - (tmp99 * tmp98);
+	       }
+	       tmp102 = tmp96 + tmp101;
+	       tmp819 = tmp464 + tmp465;
+	       {
+		    fftw_real tmp130;
+		    fftw_real tmp458;
+		    fftw_real tmp135;
+		    fftw_real tmp459;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp127;
+			 fftw_real tmp129;
+			 fftw_real tmp126;
+			 fftw_real tmp128;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp127 = c_re(inout[58 * iostride]);
+			 tmp129 = c_im(inout[58 * iostride]);
+			 tmp126 = c_re(W[57]);
+			 tmp128 = c_im(W[57]);
+			 tmp130 = (tmp126 * tmp127) + (tmp128 * tmp129);
+			 tmp458 = (tmp126 * tmp129) - (tmp128 * tmp127);
+		    }
+		    {
+			 fftw_real tmp132;
+			 fftw_real tmp134;
+			 fftw_real tmp131;
+			 fftw_real tmp133;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp132 = c_re(inout[26 * iostride]);
+			 tmp134 = c_im(inout[26 * iostride]);
+			 tmp131 = c_re(W[25]);
+			 tmp133 = c_im(W[25]);
+			 tmp135 = (tmp131 * tmp132) + (tmp133 * tmp134);
+			 tmp459 = (tmp131 * tmp134) - (tmp133 * tmp132);
+		    }
+		    tmp136 = tmp130 + tmp135;
+		    tmp457 = tmp130 - tmp135;
+		    tmp460 = tmp458 - tmp459;
+		    tmp825 = tmp458 + tmp459;
+	       }
+	       {
+		    fftw_real tmp104;
+		    fftw_real tmp106;
+		    fftw_real tmp103;
+		    fftw_real tmp105;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp104 = c_re(inout[18 * iostride]);
+		    tmp106 = c_im(inout[18 * iostride]);
+		    tmp103 = c_re(W[17]);
+		    tmp105 = c_im(W[17]);
+		    tmp107 = (tmp103 * tmp104) + (tmp105 * tmp106);
+		    tmp448 = (tmp103 * tmp106) - (tmp105 * tmp104);
+	       }
+	       {
+		    fftw_real tmp109;
+		    fftw_real tmp111;
+		    fftw_real tmp108;
+		    fftw_real tmp110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp109 = c_re(inout[50 * iostride]);
+		    tmp111 = c_im(inout[50 * iostride]);
+		    tmp108 = c_re(W[49]);
+		    tmp110 = c_im(W[49]);
+		    tmp112 = (tmp108 * tmp109) + (tmp110 * tmp111);
+		    tmp449 = (tmp108 * tmp111) - (tmp110 * tmp109);
+	       }
+	       tmp113 = tmp107 + tmp112;
+	       tmp820 = tmp448 + tmp449;
+	       {
+		    fftw_real tmp119;
+		    fftw_real tmp453;
+		    fftw_real tmp124;
+		    fftw_real tmp454;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp116;
+			 fftw_real tmp118;
+			 fftw_real tmp115;
+			 fftw_real tmp117;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp116 = c_re(inout[10 * iostride]);
+			 tmp118 = c_im(inout[10 * iostride]);
+			 tmp115 = c_re(W[9]);
+			 tmp117 = c_im(W[9]);
+			 tmp119 = (tmp115 * tmp116) + (tmp117 * tmp118);
+			 tmp453 = (tmp115 * tmp118) - (tmp117 * tmp116);
+		    }
+		    {
+			 fftw_real tmp121;
+			 fftw_real tmp123;
+			 fftw_real tmp120;
+			 fftw_real tmp122;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp121 = c_re(inout[42 * iostride]);
+			 tmp123 = c_im(inout[42 * iostride]);
+			 tmp120 = c_re(W[41]);
+			 tmp122 = c_im(W[41]);
+			 tmp124 = (tmp120 * tmp121) + (tmp122 * tmp123);
+			 tmp454 = (tmp120 * tmp123) - (tmp122 * tmp121);
+		    }
+		    tmp125 = tmp119 + tmp124;
+		    tmp452 = tmp119 - tmp124;
+		    tmp455 = tmp453 - tmp454;
+		    tmp826 = tmp453 + tmp454;
+	       }
+	       {
+		    fftw_real tmp821;
+		    fftw_real tmp822;
+		    fftw_real tmp824;
+		    fftw_real tmp827;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp114 = tmp102 + tmp113;
+		    tmp137 = tmp125 + tmp136;
+		    tmp999 = tmp114 - tmp137;
+		    tmp821 = tmp819 - tmp820;
+		    tmp822 = tmp125 - tmp136;
+		    tmp823 = tmp821 + tmp822;
+		    tmp915 = tmp821 - tmp822;
+		    tmp959 = tmp819 + tmp820;
+		    tmp960 = tmp826 + tmp825;
+		    tmp1000 = tmp959 - tmp960;
+		    tmp824 = tmp102 - tmp113;
+		    tmp827 = tmp825 - tmp826;
+		    tmp828 = tmp824 + tmp827;
+		    tmp916 = tmp824 - tmp827;
+	       }
+	       {
+		    fftw_real tmp447;
+		    fftw_real tmp450;
+		    fftw_real tmp466;
+		    fftw_real tmp467;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp447 = tmp96 - tmp101;
+		    tmp450 = tmp448 - tmp449;
+		    tmp451 = tmp447 - tmp450;
+		    tmp678 = tmp447 + tmp450;
+		    tmp466 = tmp464 - tmp465;
+		    tmp467 = tmp107 - tmp112;
+		    tmp468 = tmp466 + tmp467;
+		    tmp675 = tmp466 - tmp467;
+	       }
+	       {
+		    fftw_real tmp469;
+		    fftw_real tmp470;
+		    fftw_real tmp456;
+		    fftw_real tmp461;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp469 = tmp452 + tmp455;
+		    tmp470 = tmp460 - tmp457;
+		    tmp471 = K707106781 * (tmp469 + tmp470);
+		    tmp679 = K707106781 * (tmp470 - tmp469);
+		    tmp456 = tmp452 - tmp455;
+		    tmp461 = tmp457 + tmp460;
+		    tmp462 = K707106781 * (tmp456 + tmp461);
+		    tmp676 = K707106781 * (tmp456 - tmp461);
+	       }
+	  }
+	  {
+	       fftw_real tmp192;
+	       fftw_real tmp517;
+	       fftw_real tmp197;
+	       fftw_real tmp518;
+	       fftw_real tmp198;
+	       fftw_real tmp849;
+	       fftw_real tmp232;
+	       fftw_real tmp486;
+	       fftw_real tmp489;
+	       fftw_real tmp833;
+	       fftw_real tmp203;
+	       fftw_real tmp477;
+	       fftw_real tmp208;
+	       fftw_real tmp478;
+	       fftw_real tmp209;
+	       fftw_real tmp850;
+	       fftw_real tmp221;
+	       fftw_real tmp481;
+	       fftw_real tmp484;
+	       fftw_real tmp834;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp189;
+		    fftw_real tmp191;
+		    fftw_real tmp188;
+		    fftw_real tmp190;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp189 = c_re(inout[iostride]);
+		    tmp191 = c_im(inout[iostride]);
+		    tmp188 = c_re(W[0]);
+		    tmp190 = c_im(W[0]);
+		    tmp192 = (tmp188 * tmp189) + (tmp190 * tmp191);
+		    tmp517 = (tmp188 * tmp191) - (tmp190 * tmp189);
+	       }
+	       {
+		    fftw_real tmp194;
+		    fftw_real tmp196;
+		    fftw_real tmp193;
+		    fftw_real tmp195;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp194 = c_re(inout[33 * iostride]);
+		    tmp196 = c_im(inout[33 * iostride]);
+		    tmp193 = c_re(W[32]);
+		    tmp195 = c_im(W[32]);
+		    tmp197 = (tmp193 * tmp194) + (tmp195 * tmp196);
+		    tmp518 = (tmp193 * tmp196) - (tmp195 * tmp194);
+	       }
+	       tmp198 = tmp192 + tmp197;
+	       tmp849 = tmp517 + tmp518;
+	       {
+		    fftw_real tmp226;
+		    fftw_real tmp487;
+		    fftw_real tmp231;
+		    fftw_real tmp488;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp223;
+			 fftw_real tmp225;
+			 fftw_real tmp222;
+			 fftw_real tmp224;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp223 = c_re(inout[57 * iostride]);
+			 tmp225 = c_im(inout[57 * iostride]);
+			 tmp222 = c_re(W[56]);
+			 tmp224 = c_im(W[56]);
+			 tmp226 = (tmp222 * tmp223) + (tmp224 * tmp225);
+			 tmp487 = (tmp222 * tmp225) - (tmp224 * tmp223);
+		    }
+		    {
+			 fftw_real tmp228;
+			 fftw_real tmp230;
+			 fftw_real tmp227;
+			 fftw_real tmp229;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp228 = c_re(inout[25 * iostride]);
+			 tmp230 = c_im(inout[25 * iostride]);
+			 tmp227 = c_re(W[24]);
+			 tmp229 = c_im(W[24]);
+			 tmp231 = (tmp227 * tmp228) + (tmp229 * tmp230);
+			 tmp488 = (tmp227 * tmp230) - (tmp229 * tmp228);
+		    }
+		    tmp232 = tmp226 + tmp231;
+		    tmp486 = tmp226 - tmp231;
+		    tmp489 = tmp487 - tmp488;
+		    tmp833 = tmp487 + tmp488;
+	       }
+	       {
+		    fftw_real tmp200;
+		    fftw_real tmp202;
+		    fftw_real tmp199;
+		    fftw_real tmp201;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp200 = c_re(inout[17 * iostride]);
+		    tmp202 = c_im(inout[17 * iostride]);
+		    tmp199 = c_re(W[16]);
+		    tmp201 = c_im(W[16]);
+		    tmp203 = (tmp199 * tmp200) + (tmp201 * tmp202);
+		    tmp477 = (tmp199 * tmp202) - (tmp201 * tmp200);
+	       }
+	       {
+		    fftw_real tmp205;
+		    fftw_real tmp207;
+		    fftw_real tmp204;
+		    fftw_real tmp206;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp205 = c_re(inout[49 * iostride]);
+		    tmp207 = c_im(inout[49 * iostride]);
+		    tmp204 = c_re(W[48]);
+		    tmp206 = c_im(W[48]);
+		    tmp208 = (tmp204 * tmp205) + (tmp206 * tmp207);
+		    tmp478 = (tmp204 * tmp207) - (tmp206 * tmp205);
+	       }
+	       tmp209 = tmp203 + tmp208;
+	       tmp850 = tmp477 + tmp478;
+	       {
+		    fftw_real tmp215;
+		    fftw_real tmp482;
+		    fftw_real tmp220;
+		    fftw_real tmp483;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp212;
+			 fftw_real tmp214;
+			 fftw_real tmp211;
+			 fftw_real tmp213;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp212 = c_re(inout[9 * iostride]);
+			 tmp214 = c_im(inout[9 * iostride]);
+			 tmp211 = c_re(W[8]);
+			 tmp213 = c_im(W[8]);
+			 tmp215 = (tmp211 * tmp212) + (tmp213 * tmp214);
+			 tmp482 = (tmp211 * tmp214) - (tmp213 * tmp212);
+		    }
+		    {
+			 fftw_real tmp217;
+			 fftw_real tmp219;
+			 fftw_real tmp216;
+			 fftw_real tmp218;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp217 = c_re(inout[41 * iostride]);
+			 tmp219 = c_im(inout[41 * iostride]);
+			 tmp216 = c_re(W[40]);
+			 tmp218 = c_im(W[40]);
+			 tmp220 = (tmp216 * tmp217) + (tmp218 * tmp219);
+			 tmp483 = (tmp216 * tmp219) - (tmp218 * tmp217);
+		    }
+		    tmp221 = tmp215 + tmp220;
+		    tmp481 = tmp215 - tmp220;
+		    tmp484 = tmp482 - tmp483;
+		    tmp834 = tmp482 + tmp483;
+	       }
+	       {
+		    fftw_real tmp210;
+		    fftw_real tmp233;
+		    fftw_real tmp832;
+		    fftw_real tmp835;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp210 = tmp198 + tmp209;
+		    tmp233 = tmp221 + tmp232;
+		    tmp234 = tmp210 + tmp233;
+		    tmp1014 = tmp210 - tmp233;
+		    tmp832 = tmp198 - tmp209;
+		    tmp835 = tmp833 - tmp834;
+		    tmp836 = tmp832 + tmp835;
+		    tmp930 = tmp832 - tmp835;
+	       }
+	       {
+		    fftw_real tmp965;
+		    fftw_real tmp966;
+		    fftw_real tmp851;
+		    fftw_real tmp852;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp965 = tmp849 + tmp850;
+		    tmp966 = tmp834 + tmp833;
+		    tmp967 = tmp965 + tmp966;
+		    tmp1011 = tmp965 - tmp966;
+		    tmp851 = tmp849 - tmp850;
+		    tmp852 = tmp221 - tmp232;
+		    tmp853 = tmp851 + tmp852;
+		    tmp927 = tmp851 - tmp852;
+	       }
+	       {
+		    fftw_real tmp476;
+		    fftw_real tmp479;
+		    fftw_real tmp519;
+		    fftw_real tmp520;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp476 = tmp192 - tmp197;
+		    tmp479 = tmp477 - tmp478;
+		    tmp480 = tmp476 - tmp479;
+		    tmp684 = tmp476 + tmp479;
+		    tmp519 = tmp517 - tmp518;
+		    tmp520 = tmp203 - tmp208;
+		    tmp521 = tmp519 + tmp520;
+		    tmp695 = tmp519 - tmp520;
+	       }
+	       {
+		    fftw_real tmp522;
+		    fftw_real tmp523;
+		    fftw_real tmp485;
+		    fftw_real tmp490;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp522 = tmp481 + tmp484;
+		    tmp523 = tmp489 - tmp486;
+		    tmp524 = K707106781 * (tmp522 + tmp523);
+		    tmp685 = K707106781 * (tmp523 - tmp522);
+		    tmp485 = tmp481 - tmp484;
+		    tmp490 = tmp486 + tmp489;
+		    tmp491 = K707106781 * (tmp485 + tmp490);
+		    tmp696 = K707106781 * (tmp485 - tmp490);
+	       }
+	  }
+	  {
+	       fftw_real tmp245;
+	       fftw_real tmp509;
+	       fftw_real tmp506;
+	       fftw_real tmp838;
+	       fftw_real tmp279;
+	       fftw_real tmp496;
+	       fftw_real tmp501;
+	       fftw_real tmp844;
+	       fftw_real tmp256;
+	       fftw_real tmp507;
+	       fftw_real tmp512;
+	       fftw_real tmp839;
+	       fftw_real tmp268;
+	       fftw_real tmp498;
+	       fftw_real tmp495;
+	       fftw_real tmp843;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp239;
+		    fftw_real tmp504;
+		    fftw_real tmp244;
+		    fftw_real tmp505;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp236;
+			 fftw_real tmp238;
+			 fftw_real tmp235;
+			 fftw_real tmp237;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp236 = c_re(inout[5 * iostride]);
+			 tmp238 = c_im(inout[5 * iostride]);
+			 tmp235 = c_re(W[4]);
+			 tmp237 = c_im(W[4]);
+			 tmp239 = (tmp235 * tmp236) + (tmp237 * tmp238);
+			 tmp504 = (tmp235 * tmp238) - (tmp237 * tmp236);
+		    }
+		    {
+			 fftw_real tmp241;
+			 fftw_real tmp243;
+			 fftw_real tmp240;
+			 fftw_real tmp242;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp241 = c_re(inout[37 * iostride]);
+			 tmp243 = c_im(inout[37 * iostride]);
+			 tmp240 = c_re(W[36]);
+			 tmp242 = c_im(W[36]);
+			 tmp244 = (tmp240 * tmp241) + (tmp242 * tmp243);
+			 tmp505 = (tmp240 * tmp243) - (tmp242 * tmp241);
+		    }
+		    tmp245 = tmp239 + tmp244;
+		    tmp509 = tmp239 - tmp244;
+		    tmp506 = tmp504 - tmp505;
+		    tmp838 = tmp504 + tmp505;
+	       }
+	       {
+		    fftw_real tmp273;
+		    fftw_real tmp499;
+		    fftw_real tmp278;
+		    fftw_real tmp500;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp270;
+			 fftw_real tmp272;
+			 fftw_real tmp269;
+			 fftw_real tmp271;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp270 = c_re(inout[13 * iostride]);
+			 tmp272 = c_im(inout[13 * iostride]);
+			 tmp269 = c_re(W[12]);
+			 tmp271 = c_im(W[12]);
+			 tmp273 = (tmp269 * tmp270) + (tmp271 * tmp272);
+			 tmp499 = (tmp269 * tmp272) - (tmp271 * tmp270);
+		    }
+		    {
+			 fftw_real tmp275;
+			 fftw_real tmp277;
+			 fftw_real tmp274;
+			 fftw_real tmp276;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp275 = c_re(inout[45 * iostride]);
+			 tmp277 = c_im(inout[45 * iostride]);
+			 tmp274 = c_re(W[44]);
+			 tmp276 = c_im(W[44]);
+			 tmp278 = (tmp274 * tmp275) + (tmp276 * tmp277);
+			 tmp500 = (tmp274 * tmp277) - (tmp276 * tmp275);
+		    }
+		    tmp279 = tmp273 + tmp278;
+		    tmp496 = tmp273 - tmp278;
+		    tmp501 = tmp499 - tmp500;
+		    tmp844 = tmp499 + tmp500;
+	       }
+	       {
+		    fftw_real tmp250;
+		    fftw_real tmp510;
+		    fftw_real tmp255;
+		    fftw_real tmp511;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp247;
+			 fftw_real tmp249;
+			 fftw_real tmp246;
+			 fftw_real tmp248;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp247 = c_re(inout[21 * iostride]);
+			 tmp249 = c_im(inout[21 * iostride]);
+			 tmp246 = c_re(W[20]);
+			 tmp248 = c_im(W[20]);
+			 tmp250 = (tmp246 * tmp247) + (tmp248 * tmp249);
+			 tmp510 = (tmp246 * tmp249) - (tmp248 * tmp247);
+		    }
+		    {
+			 fftw_real tmp252;
+			 fftw_real tmp254;
+			 fftw_real tmp251;
+			 fftw_real tmp253;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp252 = c_re(inout[53 * iostride]);
+			 tmp254 = c_im(inout[53 * iostride]);
+			 tmp251 = c_re(W[52]);
+			 tmp253 = c_im(W[52]);
+			 tmp255 = (tmp251 * tmp252) + (tmp253 * tmp254);
+			 tmp511 = (tmp251 * tmp254) - (tmp253 * tmp252);
+		    }
+		    tmp256 = tmp250 + tmp255;
+		    tmp507 = tmp250 - tmp255;
+		    tmp512 = tmp510 - tmp511;
+		    tmp839 = tmp510 + tmp511;
+	       }
+	       {
+		    fftw_real tmp262;
+		    fftw_real tmp493;
+		    fftw_real tmp267;
+		    fftw_real tmp494;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp259;
+			 fftw_real tmp261;
+			 fftw_real tmp258;
+			 fftw_real tmp260;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp259 = c_re(inout[61 * iostride]);
+			 tmp261 = c_im(inout[61 * iostride]);
+			 tmp258 = c_re(W[60]);
+			 tmp260 = c_im(W[60]);
+			 tmp262 = (tmp258 * tmp259) + (tmp260 * tmp261);
+			 tmp493 = (tmp258 * tmp261) - (tmp260 * tmp259);
+		    }
+		    {
+			 fftw_real tmp264;
+			 fftw_real tmp266;
+			 fftw_real tmp263;
+			 fftw_real tmp265;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp264 = c_re(inout[29 * iostride]);
+			 tmp266 = c_im(inout[29 * iostride]);
+			 tmp263 = c_re(W[28]);
+			 tmp265 = c_im(W[28]);
+			 tmp267 = (tmp263 * tmp264) + (tmp265 * tmp266);
+			 tmp494 = (tmp263 * tmp266) - (tmp265 * tmp264);
+		    }
+		    tmp268 = tmp262 + tmp267;
+		    tmp498 = tmp262 - tmp267;
+		    tmp495 = tmp493 - tmp494;
+		    tmp843 = tmp493 + tmp494;
+	       }
+	       {
+		    fftw_real tmp257;
+		    fftw_real tmp280;
+		    fftw_real tmp837;
+		    fftw_real tmp840;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp257 = tmp245 + tmp256;
+		    tmp280 = tmp268 + tmp279;
+		    tmp281 = tmp257 + tmp280;
+		    tmp1012 = tmp257 - tmp280;
+		    tmp837 = tmp245 - tmp256;
+		    tmp840 = tmp838 - tmp839;
+		    tmp841 = tmp837 - tmp840;
+		    tmp854 = tmp837 + tmp840;
+	       }
+	       {
+		    fftw_real tmp968;
+		    fftw_real tmp969;
+		    fftw_real tmp842;
+		    fftw_real tmp845;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp968 = tmp838 + tmp839;
+		    tmp969 = tmp843 + tmp844;
+		    tmp970 = tmp968 + tmp969;
+		    tmp1015 = tmp969 - tmp968;
+		    tmp842 = tmp268 - tmp279;
+		    tmp845 = tmp843 - tmp844;
+		    tmp846 = tmp842 + tmp845;
+		    tmp855 = tmp845 - tmp842;
+	       }
+	       {
+		    fftw_real tmp497;
+		    fftw_real tmp502;
+		    fftw_real tmp687;
+		    fftw_real tmp688;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp497 = tmp495 + tmp496;
+		    tmp502 = tmp498 - tmp501;
+		    tmp503 = (K923879532 * tmp497) - (K382683432 * tmp502);
+		    tmp527 = (K382683432 * tmp497) + (K923879532 * tmp502);
+		    tmp687 = tmp495 - tmp496;
+		    tmp688 = tmp498 + tmp501;
+		    tmp689 = (K382683432 * tmp687) - (K923879532 * tmp688);
+		    tmp699 = (K923879532 * tmp687) + (K382683432 * tmp688);
+	       }
+	       {
+		    fftw_real tmp508;
+		    fftw_real tmp513;
+		    fftw_real tmp690;
+		    fftw_real tmp691;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp508 = tmp506 + tmp507;
+		    tmp513 = tmp509 - tmp512;
+		    tmp514 = (K923879532 * tmp508) + (K382683432 * tmp513);
+		    tmp526 = (K923879532 * tmp513) - (K382683432 * tmp508);
+		    tmp690 = tmp506 - tmp507;
+		    tmp691 = tmp509 + tmp512;
+		    tmp692 = (K382683432 * tmp690) + (K923879532 * tmp691);
+		    tmp698 = (K382683432 * tmp691) - (K923879532 * tmp690);
+	       }
+	  }
+	  {
+	       fftw_real tmp91;
+	       fftw_real tmp955;
+	       fftw_real tmp990;
+	       fftw_real tmp1039;
+	       fftw_real tmp1049;
+	       fftw_real tmp1055;
+	       fftw_real tmp186;
+	       fftw_real tmp1054;
+	       fftw_real tmp972;
+	       fftw_real tmp985;
+	       fftw_real tmp378;
+	       fftw_real tmp1051;
+	       fftw_real tmp981;
+	       fftw_real tmp984;
+	       fftw_real tmp962;
+	       fftw_real tmp1040;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp43;
+		    fftw_real tmp90;
+		    fftw_real tmp988;
+		    fftw_real tmp989;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp43 = tmp19 + tmp42;
+		    tmp90 = tmp66 + tmp89;
+		    tmp91 = tmp43 + tmp90;
+		    tmp955 = tmp43 - tmp90;
+		    tmp988 = tmp976 + tmp979;
+		    tmp989 = tmp967 + tmp970;
+		    tmp990 = tmp988 - tmp989;
+		    tmp1039 = tmp989 + tmp988;
+	       }
+	       {
+		    fftw_real tmp1041;
+		    fftw_real tmp1048;
+		    fftw_real tmp138;
+		    fftw_real tmp185;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1041 = tmp993 + tmp992;
+		    tmp1048 = tmp1042 + tmp1047;
+		    tmp1049 = tmp1041 + tmp1048;
+		    tmp1055 = tmp1048 - tmp1041;
+		    tmp138 = tmp114 + tmp137;
+		    tmp185 = tmp161 + tmp184;
+		    tmp186 = tmp138 + tmp185;
+		    tmp1054 = tmp138 - tmp185;
+	       }
+	       {
+		    fftw_real tmp964;
+		    fftw_real tmp971;
+		    fftw_real tmp282;
+		    fftw_real tmp377;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp964 = tmp234 - tmp281;
+		    tmp971 = tmp967 - tmp970;
+		    tmp972 = tmp964 - tmp971;
+		    tmp985 = tmp964 + tmp971;
+		    tmp282 = tmp234 + tmp281;
+		    tmp377 = tmp329 + tmp376;
+		    tmp378 = tmp282 + tmp377;
+		    tmp1051 = tmp282 - tmp377;
+	       }
+	       {
+		    fftw_real tmp973;
+		    fftw_real tmp980;
+		    fftw_real tmp958;
+		    fftw_real tmp961;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp973 = tmp329 - tmp376;
+		    tmp980 = tmp976 - tmp979;
+		    tmp981 = tmp973 + tmp980;
+		    tmp984 = tmp980 - tmp973;
+		    tmp958 = tmp956 + tmp957;
+		    tmp961 = tmp959 + tmp960;
+		    tmp962 = tmp958 - tmp961;
+		    tmp1040 = tmp961 + tmp958;
+	       }
+	       {
+		    fftw_real tmp187;
+		    fftw_real tmp987;
+		    fftw_real tmp1050;
+		    fftw_real tmp1052;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp187 = tmp91 + tmp186;
+		    c_re(inout[32 * iostride]) = tmp187 - tmp378;
+		    c_re(inout[0]) = tmp187 + tmp378;
+		    tmp987 = tmp91 - tmp186;
+		    c_re(inout[48 * iostride]) = tmp987 - tmp990;
+		    c_re(inout[16 * iostride]) = tmp987 + tmp990;
+		    {
+			 fftw_real tmp963;
+			 fftw_real tmp982;
+			 fftw_real tmp1057;
+			 fftw_real tmp1058;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp963 = tmp955 + tmp962;
+			 tmp982 = K707106781 * (tmp972 + tmp981);
+			 c_re(inout[40 * iostride]) = tmp963 - tmp982;
+			 c_re(inout[8 * iostride]) = tmp963 + tmp982;
+			 tmp1057 = K707106781 * (tmp972 - tmp981);
+			 tmp1058 = tmp1055 - tmp1054;
+			 c_im(inout[24 * iostride]) = tmp1057 + tmp1058;
+			 c_im(inout[56 * iostride]) = tmp1058 - tmp1057;
+		    }
+		    tmp1050 = tmp1040 + tmp1049;
+		    c_im(inout[0]) = tmp1039 + tmp1050;
+		    c_im(inout[32 * iostride]) = tmp1050 - tmp1039;
+		    tmp1052 = tmp1049 - tmp1040;
+		    c_im(inout[16 * iostride]) = tmp1051 + tmp1052;
+		    c_im(inout[48 * iostride]) = tmp1052 - tmp1051;
+		    {
+			 fftw_real tmp1053;
+			 fftw_real tmp1056;
+			 fftw_real tmp983;
+			 fftw_real tmp986;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp1053 = K707106781 * (tmp985 + tmp984);
+			 tmp1056 = tmp1054 + tmp1055;
+			 c_im(inout[8 * iostride]) = tmp1053 + tmp1056;
+			 c_im(inout[40 * iostride]) = tmp1056 - tmp1053;
+			 tmp983 = tmp955 - tmp962;
+			 tmp986 = K707106781 * (tmp984 - tmp985);
+			 c_re(inout[56 * iostride]) = tmp983 - tmp986;
+			 c_re(inout[24 * iostride]) = tmp983 + tmp986;
+		    }
+	       }
+	  }
+	  {
+	       fftw_real tmp995;
+	       fftw_real tmp1023;
+	       fftw_real tmp1026;
+	       fftw_real tmp1068;
+	       fftw_real tmp1002;
+	       fftw_real tmp1060;
+	       fftw_real tmp1063;
+	       fftw_real tmp1069;
+	       fftw_real tmp1010;
+	       fftw_real tmp1021;
+	       fftw_real tmp1030;
+	       fftw_real tmp1037;
+	       fftw_real tmp1017;
+	       fftw_real tmp1020;
+	       fftw_real tmp1033;
+	       fftw_real tmp1036;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp991;
+		    fftw_real tmp994;
+		    fftw_real tmp1024;
+		    fftw_real tmp1025;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp991 = tmp19 - tmp42;
+		    tmp994 = tmp992 - tmp993;
+		    tmp995 = tmp991 - tmp994;
+		    tmp1023 = tmp991 + tmp994;
+		    tmp1024 = tmp999 - tmp1000;
+		    tmp1025 = tmp997 + tmp996;
+		    tmp1026 = K707106781 * (tmp1024 + tmp1025);
+		    tmp1068 = K707106781 * (tmp1024 - tmp1025);
+	       }
+	       {
+		    fftw_real tmp998;
+		    fftw_real tmp1001;
+		    fftw_real tmp1061;
+		    fftw_real tmp1062;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp998 = tmp996 - tmp997;
+		    tmp1001 = tmp999 + tmp1000;
+		    tmp1002 = K707106781 * (tmp998 - tmp1001);
+		    tmp1060 = K707106781 * (tmp1001 + tmp998);
+		    tmp1061 = tmp66 - tmp89;
+		    tmp1062 = tmp1047 - tmp1042;
+		    tmp1063 = tmp1061 + tmp1062;
+		    tmp1069 = tmp1062 - tmp1061;
+	       }
+	       {
+		    fftw_real tmp1006;
+		    fftw_real tmp1009;
+		    fftw_real tmp1028;
+		    fftw_real tmp1029;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1006 = tmp1004 - tmp1005;
+		    tmp1009 = tmp1007 - tmp1008;
+		    tmp1010 =
+			(K382683432 * tmp1006) - (K923879532 * tmp1009);
+		    tmp1021 =
+			(K923879532 * tmp1006) + (K382683432 * tmp1009);
+		    tmp1028 = tmp1004 + tmp1005;
+		    tmp1029 = tmp1007 + tmp1008;
+		    tmp1030 =
+			(K923879532 * tmp1028) - (K382683432 * tmp1029);
+		    tmp1037 =
+			(K382683432 * tmp1028) + (K923879532 * tmp1029);
+	       }
+	       {
+		    fftw_real tmp1013;
+		    fftw_real tmp1016;
+		    fftw_real tmp1031;
+		    fftw_real tmp1032;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1013 = tmp1011 - tmp1012;
+		    tmp1016 = tmp1014 - tmp1015;
+		    tmp1017 =
+			(K382683432 * tmp1013) + (K923879532 * tmp1016);
+		    tmp1020 =
+			(K382683432 * tmp1016) - (K923879532 * tmp1013);
+		    tmp1031 = tmp1011 + tmp1012;
+		    tmp1032 = tmp1014 + tmp1015;
+		    tmp1033 =
+			(K923879532 * tmp1031) + (K382683432 * tmp1032);
+		    tmp1036 =
+			(K923879532 * tmp1032) - (K382683432 * tmp1031);
+	       }
+	       {
+		    fftw_real tmp1003;
+		    fftw_real tmp1018;
+		    fftw_real tmp1019;
+		    fftw_real tmp1022;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1003 = tmp995 - tmp1002;
+		    tmp1018 = tmp1010 - tmp1017;
+		    c_re(inout[60 * iostride]) = tmp1003 - tmp1018;
+		    c_re(inout[28 * iostride]) = tmp1003 + tmp1018;
+		    tmp1019 = tmp995 + tmp1002;
+		    tmp1022 = tmp1020 + tmp1021;
+		    c_re(inout[44 * iostride]) = tmp1019 - tmp1022;
+		    c_re(inout[12 * iostride]) = tmp1019 + tmp1022;
+	       }
+	       {
+		    fftw_real tmp1071;
+		    fftw_real tmp1072;
+		    fftw_real tmp1067;
+		    fftw_real tmp1070;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1071 = tmp1020 - tmp1021;
+		    tmp1072 = tmp1069 - tmp1068;
+		    c_im(inout[28 * iostride]) = tmp1071 + tmp1072;
+		    c_im(inout[60 * iostride]) = tmp1072 - tmp1071;
+		    tmp1067 = tmp1017 + tmp1010;
+		    tmp1070 = tmp1068 + tmp1069;
+		    c_im(inout[12 * iostride]) = tmp1067 + tmp1070;
+		    c_im(inout[44 * iostride]) = tmp1070 - tmp1067;
+	       }
+	       {
+		    fftw_real tmp1027;
+		    fftw_real tmp1034;
+		    fftw_real tmp1035;
+		    fftw_real tmp1038;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1027 = tmp1023 - tmp1026;
+		    tmp1034 = tmp1030 - tmp1033;
+		    c_re(inout[52 * iostride]) = tmp1027 - tmp1034;
+		    c_re(inout[20 * iostride]) = tmp1027 + tmp1034;
+		    tmp1035 = tmp1023 + tmp1026;
+		    tmp1038 = tmp1036 + tmp1037;
+		    c_re(inout[36 * iostride]) = tmp1035 - tmp1038;
+		    c_re(inout[4 * iostride]) = tmp1035 + tmp1038;
+	       }
+	       {
+		    fftw_real tmp1065;
+		    fftw_real tmp1066;
+		    fftw_real tmp1059;
+		    fftw_real tmp1064;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1065 = tmp1036 - tmp1037;
+		    tmp1066 = tmp1063 - tmp1060;
+		    c_im(inout[20 * iostride]) = tmp1065 + tmp1066;
+		    c_im(inout[52 * iostride]) = tmp1066 - tmp1065;
+		    tmp1059 = tmp1033 + tmp1030;
+		    tmp1064 = tmp1060 + tmp1063;
+		    c_im(inout[4 * iostride]) = tmp1059 + tmp1064;
+		    c_im(inout[36 * iostride]) = tmp1064 - tmp1059;
+	       }
+	  }
+	  {
+	       fftw_real tmp419;
+	       fftw_real tmp591;
+	       fftw_real tmp1125;
+	       fftw_real tmp1131;
+	       fftw_real tmp474;
+	       fftw_real tmp1122;
+	       fftw_real tmp594;
+	       fftw_real tmp1130;
+	       fftw_real tmp530;
+	       fftw_real tmp589;
+	       fftw_real tmp598;
+	       fftw_real tmp605;
+	       fftw_real tmp585;
+	       fftw_real tmp588;
+	       fftw_real tmp601;
+	       fftw_real tmp604;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp395;
+		    fftw_real tmp418;
+		    fftw_real tmp1123;
+		    fftw_real tmp1124;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp395 = tmp383 - tmp394;
+		    tmp418 = tmp406 - tmp417;
+		    tmp419 = tmp395 - tmp418;
+		    tmp591 = tmp395 + tmp418;
+		    tmp1123 = tmp608 - tmp609;
+		    tmp1124 = tmp1109 - tmp1106;
+		    tmp1125 = tmp1123 + tmp1124;
+		    tmp1131 = tmp1124 - tmp1123;
+	       }
+	       {
+		    fftw_real tmp446;
+		    fftw_real tmp593;
+		    fftw_real tmp473;
+		    fftw_real tmp592;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp436;
+			 fftw_real tmp445;
+			 fftw_real tmp463;
+			 fftw_real tmp472;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp436 = tmp424 - tmp435;
+			 tmp445 = tmp441 - tmp444;
+			 tmp446 =
+			     (K555570233 * tmp436) - (K831469612 * tmp445);
+			 tmp593 =
+			     (K555570233 * tmp445) + (K831469612 * tmp436);
+			 tmp463 = tmp451 - tmp462;
+			 tmp472 = tmp468 - tmp471;
+			 tmp473 =
+			     (K831469612 * tmp463) + (K555570233 * tmp472);
+			 tmp592 =
+			     (K555570233 * tmp463) - (K831469612 * tmp472);
+		    }
+		    tmp474 = tmp446 - tmp473;
+		    tmp1122 = tmp473 + tmp446;
+		    tmp594 = tmp592 + tmp593;
+		    tmp1130 = tmp592 - tmp593;
+	       }
+	       {
+		    fftw_real tmp516;
+		    fftw_real tmp596;
+		    fftw_real tmp529;
+		    fftw_real tmp597;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp492;
+			 fftw_real tmp515;
+			 fftw_real tmp525;
+			 fftw_real tmp528;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp492 = tmp480 - tmp491;
+			 tmp515 = tmp503 - tmp514;
+			 tmp516 = tmp492 - tmp515;
+			 tmp596 = tmp492 + tmp515;
+			 tmp525 = tmp521 - tmp524;
+			 tmp528 = tmp526 - tmp527;
+			 tmp529 = tmp525 - tmp528;
+			 tmp597 = tmp525 + tmp528;
+		    }
+		    tmp530 = (K290284677 * tmp516) - (K956940335 * tmp529);
+		    tmp589 = (K956940335 * tmp516) + (K290284677 * tmp529);
+		    tmp598 = (K881921264 * tmp596) - (K471396736 * tmp597);
+		    tmp605 = (K471396736 * tmp596) + (K881921264 * tmp597);
+	       }
+	       {
+		    fftw_real tmp571;
+		    fftw_real tmp599;
+		    fftw_real tmp584;
+		    fftw_real tmp600;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp547;
+			 fftw_real tmp570;
+			 fftw_real tmp580;
+			 fftw_real tmp583;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp547 = tmp535 - tmp546;
+			 tmp570 = tmp558 - tmp569;
+			 tmp571 = tmp547 - tmp570;
+			 tmp599 = tmp547 + tmp570;
+			 tmp580 = tmp576 - tmp579;
+			 tmp583 = tmp581 - tmp582;
+			 tmp584 = tmp580 - tmp583;
+			 tmp600 = tmp580 + tmp583;
+		    }
+		    tmp585 = (K290284677 * tmp571) + (K956940335 * tmp584);
+		    tmp588 = (K290284677 * tmp584) - (K956940335 * tmp571);
+		    tmp601 = (K881921264 * tmp599) + (K471396736 * tmp600);
+		    tmp604 = (K881921264 * tmp600) - (K471396736 * tmp599);
+	       }
+	       {
+		    fftw_real tmp475;
+		    fftw_real tmp586;
+		    fftw_real tmp587;
+		    fftw_real tmp590;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp475 = tmp419 + tmp474;
+		    tmp586 = tmp530 + tmp585;
+		    c_re(inout[45 * iostride]) = tmp475 - tmp586;
+		    c_re(inout[13 * iostride]) = tmp475 + tmp586;
+		    tmp587 = tmp419 - tmp474;
+		    tmp590 = tmp588 - tmp589;
+		    c_re(inout[61 * iostride]) = tmp587 - tmp590;
+		    c_re(inout[29 * iostride]) = tmp587 + tmp590;
+	       }
+	       {
+		    fftw_real tmp1129;
+		    fftw_real tmp1132;
+		    fftw_real tmp1133;
+		    fftw_real tmp1134;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1129 = tmp589 + tmp588;
+		    tmp1132 = tmp1130 + tmp1131;
+		    c_im(inout[13 * iostride]) = tmp1129 + tmp1132;
+		    c_im(inout[45 * iostride]) = tmp1132 - tmp1129;
+		    tmp1133 = tmp530 - tmp585;
+		    tmp1134 = tmp1131 - tmp1130;
+		    c_im(inout[29 * iostride]) = tmp1133 + tmp1134;
+		    c_im(inout[61 * iostride]) = tmp1134 - tmp1133;
+	       }
+	       {
+		    fftw_real tmp595;
+		    fftw_real tmp602;
+		    fftw_real tmp603;
+		    fftw_real tmp606;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp595 = tmp591 + tmp594;
+		    tmp602 = tmp598 + tmp601;
+		    c_re(inout[37 * iostride]) = tmp595 - tmp602;
+		    c_re(inout[5 * iostride]) = tmp595 + tmp602;
+		    tmp603 = tmp591 - tmp594;
+		    tmp606 = tmp604 - tmp605;
+		    c_re(inout[53 * iostride]) = tmp603 - tmp606;
+		    c_re(inout[21 * iostride]) = tmp603 + tmp606;
+	       }
+	       {
+		    fftw_real tmp1121;
+		    fftw_real tmp1126;
+		    fftw_real tmp1127;
+		    fftw_real tmp1128;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1121 = tmp605 + tmp604;
+		    tmp1126 = tmp1122 + tmp1125;
+		    c_im(inout[5 * iostride]) = tmp1121 + tmp1126;
+		    c_im(inout[37 * iostride]) = tmp1126 - tmp1121;
+		    tmp1127 = tmp598 - tmp601;
+		    tmp1128 = tmp1125 - tmp1122;
+		    c_im(inout[21 * iostride]) = tmp1127 + tmp1128;
+		    c_im(inout[53 * iostride]) = tmp1128 - tmp1127;
+	       }
+	  }
+	  {
+	       fftw_real tmp611;
+	       fftw_real tmp639;
+	       fftw_real tmp1111;
+	       fftw_real tmp1117;
+	       fftw_real tmp618;
+	       fftw_real tmp1104;
+	       fftw_real tmp642;
+	       fftw_real tmp1116;
+	       fftw_real tmp626;
+	       fftw_real tmp637;
+	       fftw_real tmp646;
+	       fftw_real tmp653;
+	       fftw_real tmp633;
+	       fftw_real tmp636;
+	       fftw_real tmp649;
+	       fftw_real tmp652;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp607;
+		    fftw_real tmp610;
+		    fftw_real tmp1105;
+		    fftw_real tmp1110;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp607 = tmp383 + tmp394;
+		    tmp610 = tmp608 + tmp609;
+		    tmp611 = tmp607 - tmp610;
+		    tmp639 = tmp607 + tmp610;
+		    tmp1105 = tmp417 + tmp406;
+		    tmp1110 = tmp1106 + tmp1109;
+		    tmp1111 = tmp1105 + tmp1110;
+		    tmp1117 = tmp1110 - tmp1105;
+	       }
+	       {
+		    fftw_real tmp614;
+		    fftw_real tmp641;
+		    fftw_real tmp617;
+		    fftw_real tmp640;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp612;
+			 fftw_real tmp613;
+			 fftw_real tmp615;
+			 fftw_real tmp616;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp612 = tmp424 + tmp435;
+			 tmp613 = tmp441 + tmp444;
+			 tmp614 =
+			     (K980785280 * tmp612) - (K195090322 * tmp613);
+			 tmp641 =
+			     (K980785280 * tmp613) + (K195090322 * tmp612);
+			 tmp615 = tmp451 + tmp462;
+			 tmp616 = tmp468 + tmp471;
+			 tmp617 =
+			     (K195090322 * tmp615) + (K980785280 * tmp616);
+			 tmp640 =
+			     (K980785280 * tmp615) - (K195090322 * tmp616);
+		    }
+		    tmp618 = tmp614 - tmp617;
+		    tmp1104 = tmp617 + tmp614;
+		    tmp642 = tmp640 + tmp641;
+		    tmp1116 = tmp640 - tmp641;
+	       }
+	       {
+		    fftw_real tmp622;
+		    fftw_real tmp644;
+		    fftw_real tmp625;
+		    fftw_real tmp645;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp620;
+			 fftw_real tmp621;
+			 fftw_real tmp623;
+			 fftw_real tmp624;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp620 = tmp576 + tmp579;
+			 tmp621 = tmp569 + tmp558;
+			 tmp622 = tmp620 - tmp621;
+			 tmp644 = tmp620 + tmp621;
+			 tmp623 = tmp535 + tmp546;
+			 tmp624 = tmp581 + tmp582;
+			 tmp625 = tmp623 - tmp624;
+			 tmp645 = tmp623 + tmp624;
+		    }
+		    tmp626 = (K634393284 * tmp622) - (K773010453 * tmp625);
+		    tmp637 = (K773010453 * tmp622) + (K634393284 * tmp625);
+		    tmp646 = (K995184726 * tmp644) - (K098017140 * tmp645);
+		    tmp653 = (K098017140 * tmp644) + (K995184726 * tmp645);
+	       }
+	       {
+		    fftw_real tmp629;
+		    fftw_real tmp647;
+		    fftw_real tmp632;
+		    fftw_real tmp648;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp627;
+			 fftw_real tmp628;
+			 fftw_real tmp630;
+			 fftw_real tmp631;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp627 = tmp521 + tmp524;
+			 tmp628 = tmp514 + tmp503;
+			 tmp629 = tmp627 - tmp628;
+			 tmp647 = tmp627 + tmp628;
+			 tmp630 = tmp480 + tmp491;
+			 tmp631 = tmp526 + tmp527;
+			 tmp632 = tmp630 - tmp631;
+			 tmp648 = tmp630 + tmp631;
+		    }
+		    tmp633 = (K634393284 * tmp629) + (K773010453 * tmp632);
+		    tmp636 = (K634393284 * tmp632) - (K773010453 * tmp629);
+		    tmp649 = (K995184726 * tmp647) + (K098017140 * tmp648);
+		    tmp652 = (K995184726 * tmp648) - (K098017140 * tmp647);
+	       }
+	       {
+		    fftw_real tmp619;
+		    fftw_real tmp634;
+		    fftw_real tmp635;
+		    fftw_real tmp638;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp619 = tmp611 - tmp618;
+		    tmp634 = tmp626 - tmp633;
+		    c_re(inout[57 * iostride]) = tmp619 - tmp634;
+		    c_re(inout[25 * iostride]) = tmp619 + tmp634;
+		    tmp635 = tmp611 + tmp618;
+		    tmp638 = tmp636 + tmp637;
+		    c_re(inout[41 * iostride]) = tmp635 - tmp638;
+		    c_re(inout[9 * iostride]) = tmp635 + tmp638;
+	       }
+	       {
+		    fftw_real tmp1119;
+		    fftw_real tmp1120;
+		    fftw_real tmp1115;
+		    fftw_real tmp1118;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1119 = tmp636 - tmp637;
+		    tmp1120 = tmp1117 - tmp1116;
+		    c_im(inout[25 * iostride]) = tmp1119 + tmp1120;
+		    c_im(inout[57 * iostride]) = tmp1120 - tmp1119;
+		    tmp1115 = tmp633 + tmp626;
+		    tmp1118 = tmp1116 + tmp1117;
+		    c_im(inout[9 * iostride]) = tmp1115 + tmp1118;
+		    c_im(inout[41 * iostride]) = tmp1118 - tmp1115;
+	       }
+	       {
+		    fftw_real tmp643;
+		    fftw_real tmp650;
+		    fftw_real tmp651;
+		    fftw_real tmp654;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp643 = tmp639 - tmp642;
+		    tmp650 = tmp646 - tmp649;
+		    c_re(inout[49 * iostride]) = tmp643 - tmp650;
+		    c_re(inout[17 * iostride]) = tmp643 + tmp650;
+		    tmp651 = tmp639 + tmp642;
+		    tmp654 = tmp652 + tmp653;
+		    c_re(inout[33 * iostride]) = tmp651 - tmp654;
+		    c_re(inout[iostride]) = tmp651 + tmp654;
+	       }
+	       {
+		    fftw_real tmp1113;
+		    fftw_real tmp1114;
+		    fftw_real tmp1103;
+		    fftw_real tmp1112;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1113 = tmp652 - tmp653;
+		    tmp1114 = tmp1111 - tmp1104;
+		    c_im(inout[17 * iostride]) = tmp1113 + tmp1114;
+		    c_im(inout[49 * iostride]) = tmp1114 - tmp1113;
+		    tmp1103 = tmp649 + tmp646;
+		    tmp1112 = tmp1104 + tmp1111;
+		    c_im(inout[iostride]) = tmp1103 + tmp1112;
+		    c_im(inout[33 * iostride]) = tmp1112 - tmp1103;
+	       }
+	  }
+	  {
+	       fftw_real tmp807;
+	       fftw_real tmp891;
+	       fftw_real tmp830;
+	       fftw_real tmp1074;
+	       fftw_real tmp1079;
+	       fftw_real tmp1085;
+	       fftw_real tmp894;
+	       fftw_real tmp1084;
+	       fftw_real tmp885;
+	       fftw_real tmp888;
+	       fftw_real tmp901;
+	       fftw_real tmp904;
+	       fftw_real tmp858;
+	       fftw_real tmp889;
+	       fftw_real tmp898;
+	       fftw_real tmp905;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp795;
+		    fftw_real tmp806;
+		    fftw_real tmp892;
+		    fftw_real tmp893;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp795 = tmp791 + tmp794;
+		    tmp806 = K707106781 * (tmp800 + tmp805);
+		    tmp807 = tmp795 - tmp806;
+		    tmp891 = tmp795 + tmp806;
+		    {
+			 fftw_real tmp818;
+			 fftw_real tmp829;
+			 fftw_real tmp1075;
+			 fftw_real tmp1078;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp818 =
+			     (K923879532 * tmp812) - (K382683432 * tmp817);
+			 tmp829 =
+			     (K923879532 * tmp823) + (K382683432 * tmp828);
+			 tmp830 = tmp818 - tmp829;
+			 tmp1074 = tmp829 + tmp818;
+			 tmp1075 = K707106781 * (tmp909 + tmp908);
+			 tmp1078 = tmp1076 + tmp1077;
+			 tmp1079 = tmp1075 + tmp1078;
+			 tmp1085 = tmp1078 - tmp1075;
+		    }
+		    tmp892 = (K923879532 * tmp828) - (K382683432 * tmp823);
+		    tmp893 = (K382683432 * tmp812) + (K923879532 * tmp817);
+		    tmp894 = tmp892 + tmp893;
+		    tmp1084 = tmp892 - tmp893;
+		    {
+			 fftw_real tmp875;
+			 fftw_real tmp899;
+			 fftw_real tmp884;
+			 fftw_real tmp900;
+			 fftw_real tmp874;
+			 fftw_real tmp883;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp874 = K707106781 * (tmp868 + tmp873);
+			 tmp875 = tmp863 - tmp874;
+			 tmp899 = tmp863 + tmp874;
+			 tmp883 = K707106781 * (tmp881 + tmp882);
+			 tmp884 = tmp880 - tmp883;
+			 tmp900 = tmp880 + tmp883;
+			 tmp885 =
+			     (K555570233 * tmp875) + (K831469612 * tmp884);
+			 tmp888 =
+			     (K555570233 * tmp884) - (K831469612 * tmp875);
+			 tmp901 =
+			     (K980785280 * tmp899) + (K195090322 * tmp900);
+			 tmp904 =
+			     (K980785280 * tmp900) - (K195090322 * tmp899);
+		    }
+		    {
+			 fftw_real tmp848;
+			 fftw_real tmp896;
+			 fftw_real tmp857;
+			 fftw_real tmp897;
+			 fftw_real tmp847;
+			 fftw_real tmp856;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp847 = K707106781 * (tmp841 + tmp846);
+			 tmp848 = tmp836 - tmp847;
+			 tmp896 = tmp836 + tmp847;
+			 tmp856 = K707106781 * (tmp854 + tmp855);
+			 tmp857 = tmp853 - tmp856;
+			 tmp897 = tmp853 + tmp856;
+			 tmp858 =
+			     (K555570233 * tmp848) - (K831469612 * tmp857);
+			 tmp889 =
+			     (K831469612 * tmp848) + (K555570233 * tmp857);
+			 tmp898 =
+			     (K980785280 * tmp896) - (K195090322 * tmp897);
+			 tmp905 =
+			     (K195090322 * tmp896) + (K980785280 * tmp897);
+		    }
+	       }
+	       {
+		    fftw_real tmp831;
+		    fftw_real tmp886;
+		    fftw_real tmp887;
+		    fftw_real tmp890;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp831 = tmp807 + tmp830;
+		    tmp886 = tmp858 + tmp885;
+		    c_re(inout[42 * iostride]) = tmp831 - tmp886;
+		    c_re(inout[10 * iostride]) = tmp831 + tmp886;
+		    tmp887 = tmp807 - tmp830;
+		    tmp890 = tmp888 - tmp889;
+		    c_re(inout[58 * iostride]) = tmp887 - tmp890;
+		    c_re(inout[26 * iostride]) = tmp887 + tmp890;
+	       }
+	       {
+		    fftw_real tmp1083;
+		    fftw_real tmp1086;
+		    fftw_real tmp1087;
+		    fftw_real tmp1088;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1083 = tmp889 + tmp888;
+		    tmp1086 = tmp1084 + tmp1085;
+		    c_im(inout[10 * iostride]) = tmp1083 + tmp1086;
+		    c_im(inout[42 * iostride]) = tmp1086 - tmp1083;
+		    tmp1087 = tmp858 - tmp885;
+		    tmp1088 = tmp1085 - tmp1084;
+		    c_im(inout[26 * iostride]) = tmp1087 + tmp1088;
+		    c_im(inout[58 * iostride]) = tmp1088 - tmp1087;
+	       }
+	       {
+		    fftw_real tmp895;
+		    fftw_real tmp902;
+		    fftw_real tmp903;
+		    fftw_real tmp906;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp895 = tmp891 + tmp894;
+		    tmp902 = tmp898 + tmp901;
+		    c_re(inout[34 * iostride]) = tmp895 - tmp902;
+		    c_re(inout[2 * iostride]) = tmp895 + tmp902;
+		    tmp903 = tmp891 - tmp894;
+		    tmp906 = tmp904 - tmp905;
+		    c_re(inout[50 * iostride]) = tmp903 - tmp906;
+		    c_re(inout[18 * iostride]) = tmp903 + tmp906;
+	       }
+	       {
+		    fftw_real tmp1073;
+		    fftw_real tmp1080;
+		    fftw_real tmp1081;
+		    fftw_real tmp1082;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1073 = tmp905 + tmp904;
+		    tmp1080 = tmp1074 + tmp1079;
+		    c_im(inout[2 * iostride]) = tmp1073 + tmp1080;
+		    c_im(inout[34 * iostride]) = tmp1080 - tmp1073;
+		    tmp1081 = tmp898 - tmp901;
+		    tmp1082 = tmp1079 - tmp1074;
+		    c_im(inout[18 * iostride]) = tmp1081 + tmp1082;
+		    c_im(inout[50 * iostride]) = tmp1082 - tmp1081;
+	       }
+	  }
+	  {
+	       fftw_real tmp911;
+	       fftw_real tmp939;
+	       fftw_real tmp918;
+	       fftw_real tmp1090;
+	       fftw_real tmp1093;
+	       fftw_real tmp1099;
+	       fftw_real tmp942;
+	       fftw_real tmp1098;
+	       fftw_real tmp933;
+	       fftw_real tmp936;
+	       fftw_real tmp949;
+	       fftw_real tmp952;
+	       fftw_real tmp926;
+	       fftw_real tmp937;
+	       fftw_real tmp946;
+	       fftw_real tmp953;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp907;
+		    fftw_real tmp910;
+		    fftw_real tmp940;
+		    fftw_real tmp941;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp907 = tmp791 - tmp794;
+		    tmp910 = K707106781 * (tmp908 - tmp909);
+		    tmp911 = tmp907 - tmp910;
+		    tmp939 = tmp907 + tmp910;
+		    {
+			 fftw_real tmp914;
+			 fftw_real tmp917;
+			 fftw_real tmp1091;
+			 fftw_real tmp1092;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp914 =
+			     (K382683432 * tmp912) - (K923879532 * tmp913);
+			 tmp917 =
+			     (K382683432 * tmp915) + (K923879532 * tmp916);
+			 tmp918 = tmp914 - tmp917;
+			 tmp1090 = tmp917 + tmp914;
+			 tmp1091 = K707106781 * (tmp800 - tmp805);
+			 tmp1092 = tmp1077 - tmp1076;
+			 tmp1093 = tmp1091 + tmp1092;
+			 tmp1099 = tmp1092 - tmp1091;
+		    }
+		    tmp940 = (K382683432 * tmp916) - (K923879532 * tmp915);
+		    tmp941 = (K923879532 * tmp912) + (K382683432 * tmp913);
+		    tmp942 = tmp940 + tmp941;
+		    tmp1098 = tmp940 - tmp941;
+		    {
+			 fftw_real tmp929;
+			 fftw_real tmp947;
+			 fftw_real tmp932;
+			 fftw_real tmp948;
+			 fftw_real tmp928;
+			 fftw_real tmp931;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp928 = K707106781 * (tmp841 - tmp846);
+			 tmp929 = tmp927 - tmp928;
+			 tmp947 = tmp927 + tmp928;
+			 tmp931 = K707106781 * (tmp855 - tmp854);
+			 tmp932 = tmp930 - tmp931;
+			 tmp948 = tmp930 + tmp931;
+			 tmp933 =
+			     (K195090322 * tmp929) + (K980785280 * tmp932);
+			 tmp936 =
+			     (K195090322 * tmp932) - (K980785280 * tmp929);
+			 tmp949 =
+			     (K831469612 * tmp947) + (K555570233 * tmp948);
+			 tmp952 =
+			     (K831469612 * tmp948) - (K555570233 * tmp947);
+		    }
+		    {
+			 fftw_real tmp922;
+			 fftw_real tmp944;
+			 fftw_real tmp925;
+			 fftw_real tmp945;
+			 fftw_real tmp921;
+			 fftw_real tmp924;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp921 = K707106781 * (tmp868 - tmp873);
+			 tmp922 = tmp920 - tmp921;
+			 tmp944 = tmp920 + tmp921;
+			 tmp924 = K707106781 * (tmp882 - tmp881);
+			 tmp925 = tmp923 - tmp924;
+			 tmp945 = tmp923 + tmp924;
+			 tmp926 =
+			     (K195090322 * tmp922) - (K980785280 * tmp925);
+			 tmp937 =
+			     (K980785280 * tmp922) + (K195090322 * tmp925);
+			 tmp946 =
+			     (K831469612 * tmp944) - (K555570233 * tmp945);
+			 tmp953 =
+			     (K555570233 * tmp944) + (K831469612 * tmp945);
+		    }
+	       }
+	       {
+		    fftw_real tmp919;
+		    fftw_real tmp934;
+		    fftw_real tmp935;
+		    fftw_real tmp938;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp919 = tmp911 - tmp918;
+		    tmp934 = tmp926 - tmp933;
+		    c_re(inout[62 * iostride]) = tmp919 - tmp934;
+		    c_re(inout[30 * iostride]) = tmp919 + tmp934;
+		    tmp935 = tmp911 + tmp918;
+		    tmp938 = tmp936 + tmp937;
+		    c_re(inout[46 * iostride]) = tmp935 - tmp938;
+		    c_re(inout[14 * iostride]) = tmp935 + tmp938;
+	       }
+	       {
+		    fftw_real tmp1101;
+		    fftw_real tmp1102;
+		    fftw_real tmp1097;
+		    fftw_real tmp1100;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1101 = tmp936 - tmp937;
+		    tmp1102 = tmp1099 - tmp1098;
+		    c_im(inout[30 * iostride]) = tmp1101 + tmp1102;
+		    c_im(inout[62 * iostride]) = tmp1102 - tmp1101;
+		    tmp1097 = tmp933 + tmp926;
+		    tmp1100 = tmp1098 + tmp1099;
+		    c_im(inout[14 * iostride]) = tmp1097 + tmp1100;
+		    c_im(inout[46 * iostride]) = tmp1100 - tmp1097;
+	       }
+	       {
+		    fftw_real tmp943;
+		    fftw_real tmp950;
+		    fftw_real tmp951;
+		    fftw_real tmp954;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp943 = tmp939 - tmp942;
+		    tmp950 = tmp946 - tmp949;
+		    c_re(inout[54 * iostride]) = tmp943 - tmp950;
+		    c_re(inout[22 * iostride]) = tmp943 + tmp950;
+		    tmp951 = tmp939 + tmp942;
+		    tmp954 = tmp952 + tmp953;
+		    c_re(inout[38 * iostride]) = tmp951 - tmp954;
+		    c_re(inout[6 * iostride]) = tmp951 + tmp954;
+	       }
+	       {
+		    fftw_real tmp1095;
+		    fftw_real tmp1096;
+		    fftw_real tmp1089;
+		    fftw_real tmp1094;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1095 = tmp952 - tmp953;
+		    tmp1096 = tmp1093 - tmp1090;
+		    c_im(inout[22 * iostride]) = tmp1095 + tmp1096;
+		    c_im(inout[54 * iostride]) = tmp1096 - tmp1095;
+		    tmp1089 = tmp949 + tmp946;
+		    tmp1094 = tmp1090 + tmp1093;
+		    c_im(inout[6 * iostride]) = tmp1089 + tmp1094;
+		    c_im(inout[38 * iostride]) = tmp1094 - tmp1089;
+	       }
+	  }
+	  {
+	       fftw_real tmp667;
+	       fftw_real tmp727;
+	       fftw_real tmp1155;
+	       fftw_real tmp1161;
+	       fftw_real tmp682;
+	       fftw_real tmp1152;
+	       fftw_real tmp730;
+	       fftw_real tmp1160;
+	       fftw_real tmp702;
+	       fftw_real tmp725;
+	       fftw_real tmp734;
+	       fftw_real tmp741;
+	       fftw_real tmp721;
+	       fftw_real tmp724;
+	       fftw_real tmp737;
+	       fftw_real tmp740;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp659;
+		    fftw_real tmp666;
+		    fftw_real tmp1153;
+		    fftw_real tmp1154;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp659 = tmp655 - tmp658;
+		    tmp666 = tmp662 - tmp665;
+		    tmp667 = tmp659 - tmp666;
+		    tmp727 = tmp659 + tmp666;
+		    tmp1153 = tmp744 - tmp745;
+		    tmp1154 = tmp1139 - tmp1138;
+		    tmp1155 = tmp1153 + tmp1154;
+		    tmp1161 = tmp1154 - tmp1153;
+	       }
+	       {
+		    fftw_real tmp674;
+		    fftw_real tmp729;
+		    fftw_real tmp681;
+		    fftw_real tmp728;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp670;
+			 fftw_real tmp673;
+			 fftw_real tmp677;
+			 fftw_real tmp680;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp670 = tmp668 - tmp669;
+			 tmp673 = tmp671 - tmp672;
+			 tmp674 =
+			     (K195090322 * tmp670) - (K980785280 * tmp673);
+			 tmp729 =
+			     (K980785280 * tmp670) + (K195090322 * tmp673);
+			 tmp677 = tmp675 - tmp676;
+			 tmp680 = tmp678 - tmp679;
+			 tmp681 =
+			     (K195090322 * tmp677) + (K980785280 * tmp680);
+			 tmp728 =
+			     (K195090322 * tmp680) - (K980785280 * tmp677);
+		    }
+		    tmp682 = tmp674 - tmp681;
+		    tmp1152 = tmp681 + tmp674;
+		    tmp730 = tmp728 + tmp729;
+		    tmp1160 = tmp728 - tmp729;
+	       }
+	       {
+		    fftw_real tmp694;
+		    fftw_real tmp732;
+		    fftw_real tmp701;
+		    fftw_real tmp733;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp686;
+			 fftw_real tmp693;
+			 fftw_real tmp697;
+			 fftw_real tmp700;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp686 = tmp684 - tmp685;
+			 tmp693 = tmp689 - tmp692;
+			 tmp694 = tmp686 - tmp693;
+			 tmp732 = tmp686 + tmp693;
+			 tmp697 = tmp695 - tmp696;
+			 tmp700 = tmp698 - tmp699;
+			 tmp701 = tmp697 - tmp700;
+			 tmp733 = tmp697 + tmp700;
+		    }
+		    tmp702 = (K098017140 * tmp694) - (K995184726 * tmp701);
+		    tmp725 = (K995184726 * tmp694) + (K098017140 * tmp701);
+		    tmp734 = (K773010453 * tmp732) - (K634393284 * tmp733);
+		    tmp741 = (K634393284 * tmp732) + (K773010453 * tmp733);
+	       }
+	       {
+		    fftw_real tmp713;
+		    fftw_real tmp735;
+		    fftw_real tmp720;
+		    fftw_real tmp736;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp705;
+			 fftw_real tmp712;
+			 fftw_real tmp716;
+			 fftw_real tmp719;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp705 = tmp703 - tmp704;
+			 tmp712 = tmp708 - tmp711;
+			 tmp713 = tmp705 - tmp712;
+			 tmp735 = tmp705 + tmp712;
+			 tmp716 = tmp714 - tmp715;
+			 tmp719 = tmp717 - tmp718;
+			 tmp720 = tmp716 - tmp719;
+			 tmp736 = tmp716 + tmp719;
+		    }
+		    tmp721 = (K098017140 * tmp713) + (K995184726 * tmp720);
+		    tmp724 = (K098017140 * tmp720) - (K995184726 * tmp713);
+		    tmp737 = (K773010453 * tmp735) + (K634393284 * tmp736);
+		    tmp740 = (K773010453 * tmp736) - (K634393284 * tmp735);
+	       }
+	       {
+		    fftw_real tmp683;
+		    fftw_real tmp722;
+		    fftw_real tmp723;
+		    fftw_real tmp726;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp683 = tmp667 + tmp682;
+		    tmp722 = tmp702 + tmp721;
+		    c_re(inout[47 * iostride]) = tmp683 - tmp722;
+		    c_re(inout[15 * iostride]) = tmp683 + tmp722;
+		    tmp723 = tmp667 - tmp682;
+		    tmp726 = tmp724 - tmp725;
+		    c_re(inout[63 * iostride]) = tmp723 - tmp726;
+		    c_re(inout[31 * iostride]) = tmp723 + tmp726;
+	       }
+	       {
+		    fftw_real tmp1159;
+		    fftw_real tmp1162;
+		    fftw_real tmp1163;
+		    fftw_real tmp1164;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1159 = tmp725 + tmp724;
+		    tmp1162 = tmp1160 + tmp1161;
+		    c_im(inout[15 * iostride]) = tmp1159 + tmp1162;
+		    c_im(inout[47 * iostride]) = tmp1162 - tmp1159;
+		    tmp1163 = tmp702 - tmp721;
+		    tmp1164 = tmp1161 - tmp1160;
+		    c_im(inout[31 * iostride]) = tmp1163 + tmp1164;
+		    c_im(inout[63 * iostride]) = tmp1164 - tmp1163;
+	       }
+	       {
+		    fftw_real tmp731;
+		    fftw_real tmp738;
+		    fftw_real tmp739;
+		    fftw_real tmp742;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp731 = tmp727 + tmp730;
+		    tmp738 = tmp734 + tmp737;
+		    c_re(inout[39 * iostride]) = tmp731 - tmp738;
+		    c_re(inout[7 * iostride]) = tmp731 + tmp738;
+		    tmp739 = tmp727 - tmp730;
+		    tmp742 = tmp740 - tmp741;
+		    c_re(inout[55 * iostride]) = tmp739 - tmp742;
+		    c_re(inout[23 * iostride]) = tmp739 + tmp742;
+	       }
+	       {
+		    fftw_real tmp1151;
+		    fftw_real tmp1156;
+		    fftw_real tmp1157;
+		    fftw_real tmp1158;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1151 = tmp741 + tmp740;
+		    tmp1156 = tmp1152 + tmp1155;
+		    c_im(inout[7 * iostride]) = tmp1151 + tmp1156;
+		    c_im(inout[39 * iostride]) = tmp1156 - tmp1151;
+		    tmp1157 = tmp734 - tmp737;
+		    tmp1158 = tmp1155 - tmp1152;
+		    c_im(inout[23 * iostride]) = tmp1157 + tmp1158;
+		    c_im(inout[55 * iostride]) = tmp1158 - tmp1157;
+	       }
+	  }
+	  {
+	       fftw_real tmp747;
+	       fftw_real tmp775;
+	       fftw_real tmp1141;
+	       fftw_real tmp1147;
+	       fftw_real tmp754;
+	       fftw_real tmp1136;
+	       fftw_real tmp778;
+	       fftw_real tmp1146;
+	       fftw_real tmp762;
+	       fftw_real tmp773;
+	       fftw_real tmp782;
+	       fftw_real tmp789;
+	       fftw_real tmp769;
+	       fftw_real tmp772;
+	       fftw_real tmp785;
+	       fftw_real tmp788;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp743;
+		    fftw_real tmp746;
+		    fftw_real tmp1137;
+		    fftw_real tmp1140;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp743 = tmp655 + tmp658;
+		    tmp746 = tmp744 + tmp745;
+		    tmp747 = tmp743 - tmp746;
+		    tmp775 = tmp743 + tmp746;
+		    tmp1137 = tmp665 + tmp662;
+		    tmp1140 = tmp1138 + tmp1139;
+		    tmp1141 = tmp1137 + tmp1140;
+		    tmp1147 = tmp1140 - tmp1137;
+	       }
+	       {
+		    fftw_real tmp750;
+		    fftw_real tmp777;
+		    fftw_real tmp753;
+		    fftw_real tmp776;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp748;
+			 fftw_real tmp749;
+			 fftw_real tmp751;
+			 fftw_real tmp752;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp748 = tmp668 + tmp669;
+			 tmp749 = tmp671 + tmp672;
+			 tmp750 =
+			     (K831469612 * tmp748) - (K555570233 * tmp749);
+			 tmp777 =
+			     (K555570233 * tmp748) + (K831469612 * tmp749);
+			 tmp751 = tmp675 + tmp676;
+			 tmp752 = tmp678 + tmp679;
+			 tmp753 =
+			     (K831469612 * tmp751) + (K555570233 * tmp752);
+			 tmp776 =
+			     (K831469612 * tmp752) - (K555570233 * tmp751);
+		    }
+		    tmp754 = tmp750 - tmp753;
+		    tmp1136 = tmp753 + tmp750;
+		    tmp778 = tmp776 + tmp777;
+		    tmp1146 = tmp776 - tmp777;
+	       }
+	       {
+		    fftw_real tmp758;
+		    fftw_real tmp780;
+		    fftw_real tmp761;
+		    fftw_real tmp781;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp756;
+			 fftw_real tmp757;
+			 fftw_real tmp759;
+			 fftw_real tmp760;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp756 = tmp714 + tmp715;
+			 tmp757 = tmp711 + tmp708;
+			 tmp758 = tmp756 - tmp757;
+			 tmp780 = tmp756 + tmp757;
+			 tmp759 = tmp703 + tmp704;
+			 tmp760 = tmp717 + tmp718;
+			 tmp761 = tmp759 - tmp760;
+			 tmp781 = tmp759 + tmp760;
+		    }
+		    tmp762 = (K471396736 * tmp758) - (K881921264 * tmp761);
+		    tmp773 = (K881921264 * tmp758) + (K471396736 * tmp761);
+		    tmp782 = (K956940335 * tmp780) - (K290284677 * tmp781);
+		    tmp789 = (K290284677 * tmp780) + (K956940335 * tmp781);
+	       }
+	       {
+		    fftw_real tmp765;
+		    fftw_real tmp783;
+		    fftw_real tmp768;
+		    fftw_real tmp784;
+		    ASSERT_ALIGNED_DOUBLE;
+		    {
+			 fftw_real tmp763;
+			 fftw_real tmp764;
+			 fftw_real tmp766;
+			 fftw_real tmp767;
+			 ASSERT_ALIGNED_DOUBLE;
+			 tmp763 = tmp695 + tmp696;
+			 tmp764 = tmp692 + tmp689;
+			 tmp765 = tmp763 - tmp764;
+			 tmp783 = tmp763 + tmp764;
+			 tmp766 = tmp684 + tmp685;
+			 tmp767 = tmp698 + tmp699;
+			 tmp768 = tmp766 - tmp767;
+			 tmp784 = tmp766 + tmp767;
+		    }
+		    tmp769 = (K471396736 * tmp765) + (K881921264 * tmp768);
+		    tmp772 = (K471396736 * tmp768) - (K881921264 * tmp765);
+		    tmp785 = (K956940335 * tmp783) + (K290284677 * tmp784);
+		    tmp788 = (K956940335 * tmp784) - (K290284677 * tmp783);
+	       }
+	       {
+		    fftw_real tmp755;
+		    fftw_real tmp770;
+		    fftw_real tmp771;
+		    fftw_real tmp774;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp755 = tmp747 - tmp754;
+		    tmp770 = tmp762 - tmp769;
+		    c_re(inout[59 * iostride]) = tmp755 - tmp770;
+		    c_re(inout[27 * iostride]) = tmp755 + tmp770;
+		    tmp771 = tmp747 + tmp754;
+		    tmp774 = tmp772 + tmp773;
+		    c_re(inout[43 * iostride]) = tmp771 - tmp774;
+		    c_re(inout[11 * iostride]) = tmp771 + tmp774;
+	       }
+	       {
+		    fftw_real tmp1149;
+		    fftw_real tmp1150;
+		    fftw_real tmp1145;
+		    fftw_real tmp1148;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1149 = tmp772 - tmp773;
+		    tmp1150 = tmp1147 - tmp1146;
+		    c_im(inout[27 * iostride]) = tmp1149 + tmp1150;
+		    c_im(inout[59 * iostride]) = tmp1150 - tmp1149;
+		    tmp1145 = tmp769 + tmp762;
+		    tmp1148 = tmp1146 + tmp1147;
+		    c_im(inout[11 * iostride]) = tmp1145 + tmp1148;
+		    c_im(inout[43 * iostride]) = tmp1148 - tmp1145;
+	       }
+	       {
+		    fftw_real tmp779;
+		    fftw_real tmp786;
+		    fftw_real tmp787;
+		    fftw_real tmp790;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp779 = tmp775 - tmp778;
+		    tmp786 = tmp782 - tmp785;
+		    c_re(inout[51 * iostride]) = tmp779 - tmp786;
+		    c_re(inout[19 * iostride]) = tmp779 + tmp786;
+		    tmp787 = tmp775 + tmp778;
+		    tmp790 = tmp788 + tmp789;
+		    c_re(inout[35 * iostride]) = tmp787 - tmp790;
+		    c_re(inout[3 * iostride]) = tmp787 + tmp790;
+	       }
+	       {
+		    fftw_real tmp1143;
+		    fftw_real tmp1144;
+		    fftw_real tmp1135;
+		    fftw_real tmp1142;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp1143 = tmp788 - tmp789;
+		    tmp1144 = tmp1141 - tmp1136;
+		    c_im(inout[19 * iostride]) = tmp1143 + tmp1144;
+		    c_im(inout[51 * iostride]) = tmp1144 - tmp1143;
+		    tmp1135 = tmp785 + tmp782;
+		    tmp1142 = tmp1136 + tmp1141;
+		    c_im(inout[3 * iostride]) = tmp1135 + tmp1142;
+		    c_im(inout[35 * iostride]) = tmp1142 - tmp1135;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] =
+    { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
+fftw_codelet_desc fftwi_twiddle_64_desc = {
+     "fftwi_twiddle_64",
+     (void (*)()) fftwi_twiddle_64,
+     64,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     1419,
+     63,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_7.c b/src/fftw/ftwi_7.c
new file mode 100644
index 0000000..ea3fbc3
--- /dev/null
+++ b/src/fftw/ftwi_7.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:26 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 7 */
+
+/*
+ * This function contains 72 FP additions, 60 FP multiplications,
+ * (or, 60 additions, 48 multiplications, 12 fused multiply/add),
+ * 24 stack variables, and 28 memory accesses
+ */
+static const fftw_real K222520933 =
+FFTW_KONST(+0.222520933956314404288902564496794759466355569);
+static const fftw_real K900968867 =
+FFTW_KONST(+0.900968867902419126236102319507445051165919162);
+static const fftw_real K623489801 =
+FFTW_KONST(+0.623489801858733530525004884004239810632274731);
+static const fftw_real K433883739 =
+FFTW_KONST(+0.433883739117558120475768332848358754609990728);
+static const fftw_real K974927912 =
+FFTW_KONST(+0.974927912181823607018131682993931217232785801);
+static const fftw_real K781831482 =
+FFTW_KONST(+0.781831482468029808708444526674057750232334519);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_7.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_7(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 6) {
+	  fftw_real tmp1;
+	  fftw_real tmp53;
+	  fftw_real tmp12;
+	  fftw_real tmp54;
+	  fftw_real tmp38;
+	  fftw_real tmp50;
+	  fftw_real tmp23;
+	  fftw_real tmp55;
+	  fftw_real tmp44;
+	  fftw_real tmp51;
+	  fftw_real tmp34;
+	  fftw_real tmp56;
+	  fftw_real tmp41;
+	  fftw_real tmp52;
+	  ASSERT_ALIGNED_DOUBLE;
+	  tmp1 = c_re(inout[0]);
+	  tmp53 = c_im(inout[0]);
+	  {
+	       fftw_real tmp6;
+	       fftw_real tmp37;
+	       fftw_real tmp11;
+	       fftw_real tmp36;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[iostride]);
+		    tmp5 = c_im(inout[iostride]);
+		    tmp2 = c_re(W[0]);
+		    tmp4 = c_im(W[0]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp37 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       {
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    fftw_real tmp7;
+		    fftw_real tmp9;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp8 = c_re(inout[6 * iostride]);
+		    tmp10 = c_im(inout[6 * iostride]);
+		    tmp7 = c_re(W[5]);
+		    tmp9 = c_im(W[5]);
+		    tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10);
+		    tmp36 = (tmp7 * tmp10) - (tmp9 * tmp8);
+	       }
+	       tmp12 = tmp6 + tmp11;
+	       tmp54 = tmp6 - tmp11;
+	       tmp38 = tmp36 - tmp37;
+	       tmp50 = tmp37 + tmp36;
+	  }
+	  {
+	       fftw_real tmp17;
+	       fftw_real tmp43;
+	       fftw_real tmp22;
+	       fftw_real tmp42;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[2 * iostride]);
+		    tmp16 = c_im(inout[2 * iostride]);
+		    tmp13 = c_re(W[1]);
+		    tmp15 = c_im(W[1]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp43 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       {
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    fftw_real tmp18;
+		    fftw_real tmp20;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp19 = c_re(inout[5 * iostride]);
+		    tmp21 = c_im(inout[5 * iostride]);
+		    tmp18 = c_re(W[4]);
+		    tmp20 = c_im(W[4]);
+		    tmp22 = (tmp18 * tmp19) + (tmp20 * tmp21);
+		    tmp42 = (tmp18 * tmp21) - (tmp20 * tmp19);
+	       }
+	       tmp23 = tmp17 + tmp22;
+	       tmp55 = tmp17 - tmp22;
+	       tmp44 = tmp42 - tmp43;
+	       tmp51 = tmp43 + tmp42;
+	  }
+	  {
+	       fftw_real tmp28;
+	       fftw_real tmp40;
+	       fftw_real tmp33;
+	       fftw_real tmp39;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[3 * iostride]);
+		    tmp27 = c_im(inout[3 * iostride]);
+		    tmp24 = c_re(W[2]);
+		    tmp26 = c_im(W[2]);
+		    tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27);
+		    tmp40 = (tmp24 * tmp27) - (tmp26 * tmp25);
+	       }
+	       {
+		    fftw_real tmp30;
+		    fftw_real tmp32;
+		    fftw_real tmp29;
+		    fftw_real tmp31;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = c_re(inout[4 * iostride]);
+		    tmp32 = c_im(inout[4 * iostride]);
+		    tmp29 = c_re(W[3]);
+		    tmp31 = c_im(W[3]);
+		    tmp33 = (tmp29 * tmp30) + (tmp31 * tmp32);
+		    tmp39 = (tmp29 * tmp32) - (tmp31 * tmp30);
+	       }
+	       tmp34 = tmp28 + tmp33;
+	       tmp56 = tmp28 - tmp33;
+	       tmp41 = tmp39 - tmp40;
+	       tmp52 = tmp40 + tmp39;
+	  }
+	  {
+	       fftw_real tmp47;
+	       fftw_real tmp46;
+	       fftw_real tmp59;
+	       fftw_real tmp60;
+	       ASSERT_ALIGNED_DOUBLE;
+	       c_re(inout[0]) = tmp1 + tmp12 + tmp23 + tmp34;
+	       tmp47 =
+		   (K781831482 * tmp38) + (K974927912 * tmp44) +
+		   (K433883739 * tmp41);
+	       tmp46 =
+		   tmp1 + (K623489801 * tmp12) - (K900968867 * tmp34) -
+		   (K222520933 * tmp23);
+	       c_re(inout[6 * iostride]) = tmp46 - tmp47;
+	       c_re(inout[iostride]) = tmp46 + tmp47;
+	       {
+		    fftw_real tmp49;
+		    fftw_real tmp48;
+		    fftw_real tmp45;
+		    fftw_real tmp35;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp49 =
+			(K433883739 * tmp38) + (K974927912 * tmp41) -
+			(K781831482 * tmp44);
+		    tmp48 =
+			tmp1 + (K623489801 * tmp23) -
+			(K222520933 * tmp34) - (K900968867 * tmp12);
+		    c_re(inout[4 * iostride]) = tmp48 - tmp49;
+		    c_re(inout[3 * iostride]) = tmp48 + tmp49;
+		    tmp45 =
+			(K974927912 * tmp38) - (K781831482 * tmp41) -
+			(K433883739 * tmp44);
+		    tmp35 =
+			tmp1 + (K623489801 * tmp34) -
+			(K900968867 * tmp23) - (K222520933 * tmp12);
+		    c_re(inout[5 * iostride]) = tmp35 - tmp45;
+		    c_re(inout[2 * iostride]) = tmp35 + tmp45;
+	       }
+	       c_im(inout[0]) = tmp50 + tmp51 + tmp52 + tmp53;
+	       tmp59 =
+		   (K974927912 * tmp54) - (K781831482 * tmp56) -
+		   (K433883739 * tmp55);
+	       tmp60 =
+		   (K623489801 * tmp52) + tmp53 - (K900968867 * tmp51) -
+		   (K222520933 * tmp50);
+	       c_im(inout[2 * iostride]) = tmp59 + tmp60;
+	       c_im(inout[5 * iostride]) = tmp60 - tmp59;
+	       {
+		    fftw_real tmp61;
+		    fftw_real tmp62;
+		    fftw_real tmp57;
+		    fftw_real tmp58;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp61 =
+			(K433883739 * tmp54) + (K974927912 * tmp56) -
+			(K781831482 * tmp55);
+		    tmp62 =
+			(K623489801 * tmp51) + tmp53 -
+			(K222520933 * tmp52) - (K900968867 * tmp50);
+		    c_im(inout[3 * iostride]) = tmp61 + tmp62;
+		    c_im(inout[4 * iostride]) = tmp62 - tmp61;
+		    tmp57 =
+			(K781831482 * tmp54) + (K974927912 * tmp55) +
+			(K433883739 * tmp56);
+		    tmp58 =
+			(K623489801 * tmp50) + tmp53 -
+			(K900968867 * tmp52) - (K222520933 * tmp51);
+		    c_im(inout[iostride]) = tmp57 + tmp58;
+		    c_im(inout[6 * iostride]) = tmp58 - tmp57;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6 };
+fftw_codelet_desc fftwi_twiddle_7_desc = {
+     "fftwi_twiddle_7",
+     (void (*)()) fftwi_twiddle_7,
+     7,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     165,
+     6,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_8.c b/src/fftw/ftwi_8.c
new file mode 100644
index 0000000..ed8720a
--- /dev/null
+++ b/src/fftw/ftwi_8.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:32 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 8 */
+
+/*
+ * This function contains 66 FP additions, 32 FP multiplications,
+ * (or, 52 additions, 18 multiplications, 14 fused multiply/add),
+ * 28 stack variables, and 32 memory accesses
+ */
+static const fftw_real K707106781 =
+FFTW_KONST(+0.707106781186547524400844362104849039284835938);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_8.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_8(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 7) {
+	  fftw_real tmp7;
+	  fftw_real tmp43;
+	  fftw_real tmp71;
+	  fftw_real tmp77;
+	  fftw_real tmp41;
+	  fftw_real tmp53;
+	  fftw_real tmp56;
+	  fftw_real tmp64;
+	  fftw_real tmp18;
+	  fftw_real tmp76;
+	  fftw_real tmp46;
+	  fftw_real tmp68;
+	  fftw_real tmp30;
+	  fftw_real tmp48;
+	  fftw_real tmp51;
+	  fftw_real tmp65;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp1;
+	       fftw_real tmp70;
+	       fftw_real tmp6;
+	       fftw_real tmp69;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp70 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[4 * iostride]);
+		    tmp5 = c_im(inout[4 * iostride]);
+		    tmp2 = c_re(W[3]);
+		    tmp4 = c_im(W[3]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp69 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       tmp7 = tmp1 + tmp6;
+	       tmp43 = tmp1 - tmp6;
+	       tmp71 = tmp69 + tmp70;
+	       tmp77 = tmp70 - tmp69;
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp54;
+	       fftw_real tmp40;
+	       fftw_real tmp55;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[7 * iostride]);
+		    tmp34 = c_im(inout[7 * iostride]);
+		    tmp31 = c_re(W[6]);
+		    tmp33 = c_im(W[6]);
+		    tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34);
+		    tmp54 = (tmp31 * tmp34) - (tmp33 * tmp32);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[3 * iostride]);
+		    tmp39 = c_im(inout[3 * iostride]);
+		    tmp36 = c_re(W[2]);
+		    tmp38 = c_im(W[2]);
+		    tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39);
+		    tmp55 = (tmp36 * tmp39) - (tmp38 * tmp37);
+	       }
+	       tmp41 = tmp35 + tmp40;
+	       tmp53 = tmp35 - tmp40;
+	       tmp56 = tmp54 - tmp55;
+	       tmp64 = tmp54 + tmp55;
+	  }
+	  {
+	       fftw_real tmp12;
+	       fftw_real tmp44;
+	       fftw_real tmp17;
+	       fftw_real tmp45;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp9;
+		    fftw_real tmp11;
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp9 = c_re(inout[2 * iostride]);
+		    tmp11 = c_im(inout[2 * iostride]);
+		    tmp8 = c_re(W[1]);
+		    tmp10 = c_im(W[1]);
+		    tmp12 = (tmp8 * tmp9) + (tmp10 * tmp11);
+		    tmp44 = (tmp8 * tmp11) - (tmp10 * tmp9);
+	       }
+	       {
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    fftw_real tmp13;
+		    fftw_real tmp15;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp14 = c_re(inout[6 * iostride]);
+		    tmp16 = c_im(inout[6 * iostride]);
+		    tmp13 = c_re(W[5]);
+		    tmp15 = c_im(W[5]);
+		    tmp17 = (tmp13 * tmp14) + (tmp15 * tmp16);
+		    tmp45 = (tmp13 * tmp16) - (tmp15 * tmp14);
+	       }
+	       tmp18 = tmp12 + tmp17;
+	       tmp76 = tmp12 - tmp17;
+	       tmp46 = tmp44 - tmp45;
+	       tmp68 = tmp44 + tmp45;
+	  }
+	  {
+	       fftw_real tmp24;
+	       fftw_real tmp49;
+	       fftw_real tmp29;
+	       fftw_real tmp50;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp21;
+		    fftw_real tmp23;
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp21 = c_re(inout[iostride]);
+		    tmp23 = c_im(inout[iostride]);
+		    tmp20 = c_re(W[0]);
+		    tmp22 = c_im(W[0]);
+		    tmp24 = (tmp20 * tmp21) + (tmp22 * tmp23);
+		    tmp49 = (tmp20 * tmp23) - (tmp22 * tmp21);
+	       }
+	       {
+		    fftw_real tmp26;
+		    fftw_real tmp28;
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp26 = c_re(inout[5 * iostride]);
+		    tmp28 = c_im(inout[5 * iostride]);
+		    tmp25 = c_re(W[4]);
+		    tmp27 = c_im(W[4]);
+		    tmp29 = (tmp25 * tmp26) + (tmp27 * tmp28);
+		    tmp50 = (tmp25 * tmp28) - (tmp27 * tmp26);
+	       }
+	       tmp30 = tmp24 + tmp29;
+	       tmp48 = tmp24 - tmp29;
+	       tmp51 = tmp49 - tmp50;
+	       tmp65 = tmp49 + tmp50;
+	  }
+	  {
+	       fftw_real tmp19;
+	       fftw_real tmp42;
+	       fftw_real tmp63;
+	       fftw_real tmp66;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp19 = tmp7 + tmp18;
+	       tmp42 = tmp30 + tmp41;
+	       c_re(inout[4 * iostride]) = tmp19 - tmp42;
+	       c_re(inout[0]) = tmp19 + tmp42;
+	       {
+		    fftw_real tmp73;
+		    fftw_real tmp74;
+		    fftw_real tmp67;
+		    fftw_real tmp72;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp73 = tmp30 - tmp41;
+		    tmp74 = tmp71 - tmp68;
+		    c_im(inout[2 * iostride]) = tmp73 + tmp74;
+		    c_im(inout[6 * iostride]) = tmp74 - tmp73;
+		    tmp67 = tmp65 + tmp64;
+		    tmp72 = tmp68 + tmp71;
+		    c_im(inout[0]) = tmp67 + tmp72;
+		    c_im(inout[4 * iostride]) = tmp72 - tmp67;
+	       }
+	       tmp63 = tmp7 - tmp18;
+	       tmp66 = tmp64 - tmp65;
+	       c_re(inout[6 * iostride]) = tmp63 - tmp66;
+	       c_re(inout[2 * iostride]) = tmp63 + tmp66;
+	       {
+		    fftw_real tmp59;
+		    fftw_real tmp78;
+		    fftw_real tmp62;
+		    fftw_real tmp75;
+		    fftw_real tmp60;
+		    fftw_real tmp61;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp59 = tmp43 + tmp46;
+		    tmp78 = tmp76 + tmp77;
+		    tmp60 = tmp56 - tmp53;
+		    tmp61 = tmp48 + tmp51;
+		    tmp62 = K707106781 * (tmp60 - tmp61);
+		    tmp75 = K707106781 * (tmp61 + tmp60);
+		    c_re(inout[7 * iostride]) = tmp59 - tmp62;
+		    c_re(inout[3 * iostride]) = tmp59 + tmp62;
+		    c_im(inout[iostride]) = tmp75 + tmp78;
+		    c_im(inout[5 * iostride]) = tmp78 - tmp75;
+	       }
+	       {
+		    fftw_real tmp47;
+		    fftw_real tmp80;
+		    fftw_real tmp58;
+		    fftw_real tmp79;
+		    fftw_real tmp52;
+		    fftw_real tmp57;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp47 = tmp43 - tmp46;
+		    tmp80 = tmp77 - tmp76;
+		    tmp52 = tmp48 - tmp51;
+		    tmp57 = tmp53 + tmp56;
+		    tmp58 = K707106781 * (tmp52 + tmp57);
+		    tmp79 = K707106781 * (tmp52 - tmp57);
+		    c_re(inout[5 * iostride]) = tmp47 - tmp58;
+		    c_re(inout[iostride]) = tmp47 + tmp58;
+		    c_im(inout[3 * iostride]) = tmp79 + tmp80;
+		    c_im(inout[7 * iostride]) = tmp80 - tmp79;
+	       }
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7 };
+fftw_codelet_desc fftwi_twiddle_8_desc = {
+     "fftwi_twiddle_8",
+     (void (*)()) fftwi_twiddle_8,
+     8,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     187,
+     7,
+     twiddle_order,
+};
diff --git a/src/fftw/ftwi_9.c b/src/fftw/ftwi_9.c
new file mode 100644
index 0000000..a972cd3
--- /dev/null
+++ b/src/fftw/ftwi_9.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Mon Mar 24 02:08:32 EST 2003 */
+
+#include "fftw-int.h"
+#include "fftw.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -twiddleinv 9 */
+
+/*
+ * This function contains 96 FP additions, 72 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
+ * 34 stack variables, and 36 memory accesses
+ */
+static const fftw_real K642787609 =
+FFTW_KONST(+0.642787609686539326322643409907263432907559884);
+static const fftw_real K766044443 =
+FFTW_KONST(+0.766044443118978035202392650555416673935832457);
+static const fftw_real K939692620 =
+FFTW_KONST(+0.939692620785908384054109277324731469936208134);
+static const fftw_real K342020143 =
+FFTW_KONST(+0.342020143325668733044099614682259580763083368);
+static const fftw_real K984807753 =
+FFTW_KONST(+0.984807753012208059366743024589523013670643252);
+static const fftw_real K173648177 =
+FFTW_KONST(+0.173648177666930348851716626769314796000375677);
+static const fftw_real K500000000 =
+FFTW_KONST(+0.500000000000000000000000000000000000000000000);
+static const fftw_real K866025403 =
+FFTW_KONST(+0.866025403784438646763723170752936183471402627);
+
+/*
+ * Generator Id's : 
+ * $Id: ftwi_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ * $Id: ftwi_9.c,v 1.1 2008/10/17 06:13:18 scuri Exp $
+ */
+
+void fftwi_twiddle_9(fftw_complex *A, const fftw_complex *W, int iostride,
+		     int m, int dist)
+{
+     int i;
+     fftw_complex *inout;
+     inout = A;
+     for (i = m; i > 0; i = i - 1, inout = inout + dist, W = W + 8) {
+	  fftw_real tmp1;
+	  fftw_real tmp99;
+	  fftw_real tmp64;
+	  fftw_real tmp98;
+	  fftw_real tmp105;
+	  fftw_real tmp104;
+	  fftw_real tmp12;
+	  fftw_real tmp61;
+	  fftw_real tmp47;
+	  fftw_real tmp78;
+	  fftw_real tmp89;
+	  fftw_real tmp54;
+	  fftw_real tmp75;
+	  fftw_real tmp90;
+	  fftw_real tmp30;
+	  fftw_real tmp68;
+	  fftw_real tmp86;
+	  fftw_real tmp59;
+	  fftw_real tmp71;
+	  fftw_real tmp87;
+	  ASSERT_ALIGNED_DOUBLE;
+	  {
+	       fftw_real tmp6;
+	       fftw_real tmp63;
+	       fftw_real tmp11;
+	       fftw_real tmp62;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp1 = c_re(inout[0]);
+	       tmp99 = c_im(inout[0]);
+	       {
+		    fftw_real tmp3;
+		    fftw_real tmp5;
+		    fftw_real tmp2;
+		    fftw_real tmp4;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp3 = c_re(inout[3 * iostride]);
+		    tmp5 = c_im(inout[3 * iostride]);
+		    tmp2 = c_re(W[2]);
+		    tmp4 = c_im(W[2]);
+		    tmp6 = (tmp2 * tmp3) + (tmp4 * tmp5);
+		    tmp63 = (tmp2 * tmp5) - (tmp4 * tmp3);
+	       }
+	       {
+		    fftw_real tmp8;
+		    fftw_real tmp10;
+		    fftw_real tmp7;
+		    fftw_real tmp9;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp8 = c_re(inout[6 * iostride]);
+		    tmp10 = c_im(inout[6 * iostride]);
+		    tmp7 = c_re(W[5]);
+		    tmp9 = c_im(W[5]);
+		    tmp11 = (tmp7 * tmp8) + (tmp9 * tmp10);
+		    tmp62 = (tmp7 * tmp10) - (tmp9 * tmp8);
+	       }
+	       tmp64 = K866025403 * (tmp62 - tmp63);
+	       tmp98 = tmp63 + tmp62;
+	       tmp105 = tmp99 - (K500000000 * tmp98);
+	       tmp104 = K866025403 * (tmp6 - tmp11);
+	       tmp12 = tmp6 + tmp11;
+	       tmp61 = tmp1 - (K500000000 * tmp12);
+	  }
+	  {
+	       fftw_real tmp35;
+	       fftw_real tmp50;
+	       fftw_real tmp40;
+	       fftw_real tmp51;
+	       fftw_real tmp45;
+	       fftw_real tmp52;
+	       fftw_real tmp46;
+	       fftw_real tmp53;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp32;
+		    fftw_real tmp34;
+		    fftw_real tmp31;
+		    fftw_real tmp33;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp32 = c_re(inout[2 * iostride]);
+		    tmp34 = c_im(inout[2 * iostride]);
+		    tmp31 = c_re(W[1]);
+		    tmp33 = c_im(W[1]);
+		    tmp35 = (tmp31 * tmp32) + (tmp33 * tmp34);
+		    tmp50 = (tmp31 * tmp34) - (tmp33 * tmp32);
+	       }
+	       {
+		    fftw_real tmp37;
+		    fftw_real tmp39;
+		    fftw_real tmp36;
+		    fftw_real tmp38;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp37 = c_re(inout[5 * iostride]);
+		    tmp39 = c_im(inout[5 * iostride]);
+		    tmp36 = c_re(W[4]);
+		    tmp38 = c_im(W[4]);
+		    tmp40 = (tmp36 * tmp37) + (tmp38 * tmp39);
+		    tmp51 = (tmp36 * tmp39) - (tmp38 * tmp37);
+	       }
+	       {
+		    fftw_real tmp42;
+		    fftw_real tmp44;
+		    fftw_real tmp41;
+		    fftw_real tmp43;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp42 = c_re(inout[8 * iostride]);
+		    tmp44 = c_im(inout[8 * iostride]);
+		    tmp41 = c_re(W[7]);
+		    tmp43 = c_im(W[7]);
+		    tmp45 = (tmp41 * tmp42) + (tmp43 * tmp44);
+		    tmp52 = (tmp41 * tmp44) - (tmp43 * tmp42);
+	       }
+	       tmp46 = tmp40 + tmp45;
+	       tmp53 = tmp51 + tmp52;
+	       {
+		    fftw_real tmp76;
+		    fftw_real tmp77;
+		    fftw_real tmp73;
+		    fftw_real tmp74;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp47 = tmp35 + tmp46;
+		    tmp76 = tmp35 - (K500000000 * tmp46);
+		    tmp77 = K866025403 * (tmp52 - tmp51);
+		    tmp78 = tmp76 - tmp77;
+		    tmp89 = tmp76 + tmp77;
+		    tmp54 = tmp50 + tmp53;
+		    tmp73 = tmp50 - (K500000000 * tmp53);
+		    tmp74 = K866025403 * (tmp40 - tmp45);
+		    tmp75 = tmp73 - tmp74;
+		    tmp90 = tmp74 + tmp73;
+	       }
+	  }
+	  {
+	       fftw_real tmp18;
+	       fftw_real tmp55;
+	       fftw_real tmp23;
+	       fftw_real tmp56;
+	       fftw_real tmp28;
+	       fftw_real tmp57;
+	       fftw_real tmp29;
+	       fftw_real tmp58;
+	       ASSERT_ALIGNED_DOUBLE;
+	       {
+		    fftw_real tmp15;
+		    fftw_real tmp17;
+		    fftw_real tmp14;
+		    fftw_real tmp16;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp15 = c_re(inout[iostride]);
+		    tmp17 = c_im(inout[iostride]);
+		    tmp14 = c_re(W[0]);
+		    tmp16 = c_im(W[0]);
+		    tmp18 = (tmp14 * tmp15) + (tmp16 * tmp17);
+		    tmp55 = (tmp14 * tmp17) - (tmp16 * tmp15);
+	       }
+	       {
+		    fftw_real tmp20;
+		    fftw_real tmp22;
+		    fftw_real tmp19;
+		    fftw_real tmp21;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp20 = c_re(inout[4 * iostride]);
+		    tmp22 = c_im(inout[4 * iostride]);
+		    tmp19 = c_re(W[3]);
+		    tmp21 = c_im(W[3]);
+		    tmp23 = (tmp19 * tmp20) + (tmp21 * tmp22);
+		    tmp56 = (tmp19 * tmp22) - (tmp21 * tmp20);
+	       }
+	       {
+		    fftw_real tmp25;
+		    fftw_real tmp27;
+		    fftw_real tmp24;
+		    fftw_real tmp26;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp25 = c_re(inout[7 * iostride]);
+		    tmp27 = c_im(inout[7 * iostride]);
+		    tmp24 = c_re(W[6]);
+		    tmp26 = c_im(W[6]);
+		    tmp28 = (tmp24 * tmp25) + (tmp26 * tmp27);
+		    tmp57 = (tmp24 * tmp27) - (tmp26 * tmp25);
+	       }
+	       tmp29 = tmp23 + tmp28;
+	       tmp58 = tmp56 + tmp57;
+	       {
+		    fftw_real tmp66;
+		    fftw_real tmp67;
+		    fftw_real tmp69;
+		    fftw_real tmp70;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp30 = tmp18 + tmp29;
+		    tmp66 = tmp18 - (K500000000 * tmp29);
+		    tmp67 = K866025403 * (tmp57 - tmp56);
+		    tmp68 = tmp66 - tmp67;
+		    tmp86 = tmp66 + tmp67;
+		    tmp59 = tmp55 + tmp58;
+		    tmp69 = tmp55 - (K500000000 * tmp58);
+		    tmp70 = K866025403 * (tmp23 - tmp28);
+		    tmp71 = tmp69 - tmp70;
+		    tmp87 = tmp70 + tmp69;
+	       }
+	  }
+	  {
+	       fftw_real tmp60;
+	       fftw_real tmp13;
+	       fftw_real tmp48;
+	       fftw_real tmp49;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp60 = K866025403 * (tmp54 - tmp59);
+	       tmp13 = tmp1 + tmp12;
+	       tmp48 = tmp30 + tmp47;
+	       tmp49 = tmp13 - (K500000000 * tmp48);
+	       c_re(inout[0]) = tmp13 + tmp48;
+	       c_re(inout[3 * iostride]) = tmp49 + tmp60;
+	       c_re(inout[6 * iostride]) = tmp49 - tmp60;
+	  }
+	  {
+	       fftw_real tmp101;
+	       fftw_real tmp97;
+	       fftw_real tmp100;
+	       fftw_real tmp102;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp101 = K866025403 * (tmp30 - tmp47);
+	       tmp97 = tmp59 + tmp54;
+	       tmp100 = tmp98 + tmp99;
+	       tmp102 = tmp100 - (K500000000 * tmp97);
+	       c_im(inout[0]) = tmp97 + tmp100;
+	       c_im(inout[6 * iostride]) = tmp102 - tmp101;
+	       c_im(inout[3 * iostride]) = tmp101 + tmp102;
+	  }
+	  {
+	       fftw_real tmp65;
+	       fftw_real tmp110;
+	       fftw_real tmp80;
+	       fftw_real tmp111;
+	       fftw_real tmp84;
+	       fftw_real tmp109;
+	       fftw_real tmp81;
+	       fftw_real tmp112;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp65 = tmp61 - tmp64;
+	       tmp110 = tmp105 - tmp104;
+	       {
+		    fftw_real tmp72;
+		    fftw_real tmp79;
+		    fftw_real tmp82;
+		    fftw_real tmp83;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp72 = (K173648177 * tmp68) - (K984807753 * tmp71);
+		    tmp79 = (K342020143 * tmp75) + (K939692620 * tmp78);
+		    tmp80 = tmp72 - tmp79;
+		    tmp111 = K866025403 * (tmp72 + tmp79);
+		    tmp82 = (K342020143 * tmp78) - (K939692620 * tmp75);
+		    tmp83 = (K173648177 * tmp71) + (K984807753 * tmp68);
+		    tmp84 = K866025403 * (tmp82 - tmp83);
+		    tmp109 = tmp83 + tmp82;
+	       }
+	       c_re(inout[2 * iostride]) = tmp65 + tmp80;
+	       tmp81 = tmp65 - (K500000000 * tmp80);
+	       c_re(inout[8 * iostride]) = tmp81 - tmp84;
+	       c_re(inout[5 * iostride]) = tmp81 + tmp84;
+	       c_im(inout[2 * iostride]) = tmp109 + tmp110;
+	       tmp112 = tmp110 - (K500000000 * tmp109);
+	       c_im(inout[5 * iostride]) = tmp111 + tmp112;
+	       c_im(inout[8 * iostride]) = tmp112 - tmp111;
+	  }
+	  {
+	       fftw_real tmp85;
+	       fftw_real tmp106;
+	       fftw_real tmp92;
+	       fftw_real tmp107;
+	       fftw_real tmp96;
+	       fftw_real tmp103;
+	       fftw_real tmp93;
+	       fftw_real tmp108;
+	       ASSERT_ALIGNED_DOUBLE;
+	       tmp85 = tmp61 + tmp64;
+	       tmp106 = tmp104 + tmp105;
+	       {
+		    fftw_real tmp88;
+		    fftw_real tmp91;
+		    fftw_real tmp94;
+		    fftw_real tmp95;
+		    ASSERT_ALIGNED_DOUBLE;
+		    tmp88 = (K766044443 * tmp86) - (K642787609 * tmp87);
+		    tmp91 = (K173648177 * tmp89) - (K984807753 * tmp90);
+		    tmp92 = tmp88 + tmp91;
+		    tmp107 = K866025403 * (tmp88 - tmp91);
+		    tmp94 = (K173648177 * tmp90) + (K984807753 * tmp89);
+		    tmp95 = (K766044443 * tmp87) + (K642787609 * tmp86);
+		    tmp96 = K866025403 * (tmp94 - tmp95);
+		    tmp103 = tmp95 + tmp94;
+	       }
+	       c_re(inout[iostride]) = tmp85 + tmp92;
+	       tmp93 = tmp85 - (K500000000 * tmp92);
+	       c_re(inout[7 * iostride]) = tmp93 - tmp96;
+	       c_re(inout[4 * iostride]) = tmp93 + tmp96;
+	       c_im(inout[iostride]) = tmp103 + tmp106;
+	       tmp108 = tmp106 - (K500000000 * tmp103);
+	       c_im(inout[4 * iostride]) = tmp107 + tmp108;
+	       c_im(inout[7 * iostride]) = tmp108 - tmp107;
+	  }
+     }
+}
+
+static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+fftw_codelet_desc fftwi_twiddle_9_desc = {
+     "fftwi_twiddle_9",
+     (void (*)()) fftwi_twiddle_9,
+     9,
+     FFTW_BACKWARD,
+     FFTW_TWIDDLE,
+     209,
+     8,
+     twiddle_order,
+};
diff --git a/src/fftw/generic.c b/src/fftw/generic.c
new file mode 100644
index 0000000..93ad3ec
--- /dev/null
+++ b/src/fftw/generic.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ *
+ * generic.c -- "generic" codelets.  They work for all n (and they are
+ * slow)
+ */
+#include "fftw-int.h"
+#include <stdlib.h>
+
+void fftw_twiddle_generic(fftw_complex *A, const fftw_complex *W,
+			  int m, int r, int n, int stride)
+{
+     int i, j, k;
+     const fftw_complex *jp;
+     fftw_complex *kp;
+     fftw_complex *tmp = (fftw_complex *)
+     fftw_malloc(r * sizeof(fftw_complex));
+
+     for (i = 0; i < m; ++i) {
+	  for (k = 0, kp = tmp; k < r; ++k, kp++) {
+	       fftw_real r0, i0, rt, it, rw, iw;
+	       int l1 = i + m * k;
+	       int l0;
+
+	       r0 = i0 = 0.0;
+	       for (j = 0, jp = A + i * stride, l0 = 0; j < r; ++j,
+		    jp += m * stride) {
+		    rw = c_re(W[l0]);
+		    iw = c_im(W[l0]);
+		    rt = c_re(*jp);
+		    it = c_im(*jp);
+		    r0 += rt * rw - it * iw;
+		    i0 += rt * iw + it * rw;
+		    l0 += l1;
+		    if (l0 >= n)
+			 l0 -= n;
+	       }
+	       c_re(*kp) = r0;
+	       c_im(*kp) = i0;
+	  }
+	  for (k = 0, kp = A + i * stride; k < r; ++k, kp += m * stride)
+	       *kp = tmp[k];
+     }
+
+     fftw_free(tmp);
+}
+
+void fftwi_twiddle_generic(fftw_complex *A, const fftw_complex *W,
+			   int m, int r, int n, int stride)
+{
+     int i, j, k;
+     const fftw_complex *jp;
+     fftw_complex *kp;
+     fftw_complex *tmp = (fftw_complex *)
+     fftw_malloc(r * sizeof(fftw_complex));
+
+     for (i = 0; i < m; ++i) {
+	  for (k = 0, kp = tmp; k < r; ++k, kp++) {
+	       fftw_real r0, i0, rt, it, rw, iw;
+	       int l1 = i + m * k;
+	       int l0;
+
+	       r0 = i0 = 0.0;
+	       for (j = 0, jp = A + i * stride, l0 = 0; j < r; ++j,
+		    jp += m * stride) {
+		    rw = c_re(W[l0]);
+		    iw = c_im(W[l0]);
+		    rt = c_re(*jp);
+		    it = c_im(*jp);
+		    r0 += rt * rw + it * iw;
+		    i0 += it * rw - rt * iw;
+		    l0 += l1;
+		    if (l0 >= n)
+			 l0 -= n;
+	       }
+	       c_re(*kp) = r0;
+	       c_im(*kp) = i0;
+	  }
+	  for (k = 0, kp = A + i * stride; k < r; ++k, kp += m * stride)
+	       *kp = tmp[k];
+     }
+
+     fftw_free(tmp);
+}
diff --git a/src/fftw/malloc.c b/src/fftw/malloc.c
new file mode 100644
index 0000000..7ae22e4
--- /dev/null
+++ b/src/fftw/malloc.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * malloc.c -- memory allocation related functions
+ */
+
+/* $Id: malloc.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+#include "fftw-int.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+fftw_malloc_type_function fftw_malloc_hook = 0;
+fftw_free_type_function fftw_free_hook = 0;
+fftw_die_type_function fftw_die_hook = 0;
+
+/**********************************************************
+ *   DEBUGGING CODE
+ **********************************************************/
+#ifdef FFTW_DEBUG
+static int fftw_malloc_cnt = 0;
+
+/*
+ * debugging malloc/free.  Initialize every malloced and freed area to
+ * random values, just to make sure we are not using uninitialized
+ * pointers.  Also check for writes past the ends of allocated blocks,
+ * and a couple of other things.
+ *
+ * This code is a quick and dirty hack -- use at your own risk.
+ */
+
+static int fftw_malloc_total = 0, fftw_malloc_max = 0, fftw_malloc_cnt_max = 0;
+
+#define MAGIC 0xABadCafe
+#define PAD_FACTOR 2
+#define TWOINTS (2 * sizeof(int))
+
+#define VERBOSE_ALLOCATION 0
+
+#if VERBOSE_ALLOCATION
+#define WHEN_VERBOSE(a) a
+#else
+#define WHEN_VERBOSE(a)
+#endif
+
+void *fftw_malloc(size_t n)
+{
+     char *p;
+     int i;
+
+     fftw_malloc_total += n;
+
+     if (fftw_malloc_total > fftw_malloc_max)
+	  fftw_malloc_max = fftw_malloc_total;
+
+     p = (char *) malloc(PAD_FACTOR * n + TWOINTS);
+     if (!p)
+	  fftw_die("fftw_malloc: out of memory\n");
+
+     /* store the size in a known position */
+     ((int *) p)[0] = n;
+     ((int *) p)[1] = MAGIC;
+     for (i = 0; i < PAD_FACTOR * n; ++i)
+	  p[i + TWOINTS] = (char) (i ^ 0xDEADBEEF);
+
+     ++fftw_malloc_cnt;
+
+     if (fftw_malloc_cnt > fftw_malloc_cnt_max)
+	  fftw_malloc_cnt_max = fftw_malloc_cnt;
+
+     /* skip the size we stored previously */
+     return (void *) (p + TWOINTS);
+}
+
+void fftw_free(void *p)
+{
+     char *q;
+     
+     if (!p)
+	  return;
+
+     q = ((char *) p) - TWOINTS;
+     if (!q)
+	  fftw_die("fftw_free: tried to free NULL+TWOINTS pointer!\n");
+
+     {
+	  int n = ((int *) q)[0];
+	  int magic = ((int *) q)[1];
+	  int i;
+
+	  WHEN_VERBOSE( {
+		       printf("FFTW_FREE %d\n", n);
+		       fflush(stdout);
+		       })
+
+	  *((int *) q) = 0;	/* set to zero to detect duplicate free's */
+
+	  if (magic != MAGIC)
+	       fftw_die("Wrong magic in fftw_free()!\n");
+	  ((int *) q)[1] = ~MAGIC;
+
+	  if (n < 0)
+	       fftw_die("Tried to free block with corrupt size descriptor!\n");
+
+	  fftw_malloc_total -= n;
+
+	  if (fftw_malloc_total < 0)
+	       fftw_die("fftw_malloc_total went negative!\n");
+
+	  /* check for writing past end of array: */
+	  for (i = n; i < PAD_FACTOR * n; ++i)
+	       if (q[i + TWOINTS] != (char) (i ^ 0xDEADBEEF)) {
+		    fflush(stdout);
+		    fprintf(stderr, "Byte %d past end of array has changed!\n",
+			    i - n + 1);
+		    fftw_die("Array bounds overwritten!\n");
+	       }
+	  for (i = 0; i < PAD_FACTOR * n; ++i)
+	       q[i + TWOINTS] = (char) (i ^ 0xBEEFDEAD);
+
+	  --fftw_malloc_cnt;
+
+	  if (fftw_malloc_cnt < 0)
+	       fftw_die("fftw_malloc_cnt went negative!\n");
+
+	  if (fftw_malloc_cnt == 0 && fftw_malloc_total > 0 ||
+	      fftw_malloc_cnt > 0 && fftw_malloc_total == 0)
+	       fftw_die("fftw_malloc_cnt/total not zero at the same time!\n");
+
+	  free(q);
+     }
+}
+
+#else
+/**********************************************************
+ *   NON DEBUGGING CODE
+ **********************************************************/
+/* production version, no hacks */
+
+void *fftw_malloc(size_t n)
+{
+     void *p;
+
+     if (fftw_malloc_hook)
+	  return fftw_malloc_hook(n);
+
+     if (n == 0)
+	  n = 1;
+
+     p = malloc(n);
+
+     if (!p)
+	  fftw_die("fftw_malloc: out of memory\n");
+
+     return p;
+}
+
+void fftw_free(void *p)
+{
+     if (p) {
+	  if (fftw_free_hook) {
+	       fftw_free_hook(p);
+	       return;
+	  }
+	  free(p);
+     }
+}
+
+#endif
+
+/* die when fatal errors occur */
+void fftw_die(const char *s)
+{
+     if (fftw_die_hook)
+	  fftw_die_hook(s);
+
+     fflush(stdout);
+     fprintf(stderr, "fftw: %s", s);
+     exit(EXIT_FAILURE);
+}
+
+/* check for memory leaks when debugging */
+void fftw_check_memory_leaks(void)
+{
+     extern int fftw_node_cnt, fftw_plan_cnt, fftw_twiddle_size;
+
+#ifdef FFTW_DEBUG
+     if (fftw_malloc_cnt || fftw_malloc_total ||
+	 fftw_node_cnt || fftw_plan_cnt || fftw_twiddle_size) {
+	  fflush(stdout);
+	  fprintf(stderr,
+		  "MEMORY LEAK!!!\n"
+		  "fftw_malloc = %d"
+		  " node=%d plan=%d twiddle=%d\n"
+		  "fftw_malloc_total = %d\n",
+		  fftw_malloc_cnt,
+		  fftw_node_cnt, fftw_plan_cnt, fftw_twiddle_size,
+		  fftw_malloc_total);
+	  exit(EXIT_FAILURE);
+     }
+#else
+     if (fftw_node_cnt || fftw_plan_cnt || fftw_twiddle_size) {
+	  fflush(stdout);
+	  fprintf(stderr,
+		  "MEMORY LEAK!!!\n"
+		  " node=%d plan=%d twiddle=%d\n",
+		  fftw_node_cnt, fftw_plan_cnt, fftw_twiddle_size);
+	  exit(EXIT_FAILURE);
+     }
+#endif
+}
+
+void fftw_print_max_memory_usage(void)
+{
+#ifdef FFTW_DEBUG
+     printf("\nMaximum number of blocks allocated = %d\n"
+	    "Maximum number of bytes allocated  = %0.3f kB\n",
+	    fftw_malloc_cnt_max, fftw_malloc_max / 1024.0);
+#endif
+}
diff --git a/src/fftw/planner.c b/src/fftw/planner.c
new file mode 100644
index 0000000..30217d2
--- /dev/null
+++ b/src/fftw/planner.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * planner.c -- find the optimal plan
+ */
+
+/* $Id: planner.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+#include "fftw-int.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+extern fftw_generic_codelet fftw_twiddle_generic;
+extern fftw_generic_codelet fftwi_twiddle_generic;
+extern fftw_codelet_desc *fftw_config[];
+
+fftw_plan_hook_ptr fftw_plan_hook = (fftw_plan_hook_ptr) NULL;
+
+static void init_test_array(fftw_complex *arr, int stride, int n)
+{
+     int j;
+
+     for (j = 0; j < n; ++j) {
+	  c_re(arr[stride * j]) = 0.0;
+	  c_im(arr[stride * j]) = 0.0;
+     }
+}
+
+/*
+ * The timer keeps doubling the number of iterations
+ * until the program runs for more than FFTW_TIME_MIN
+ */
+static double fftw_measure_runtime(fftw_plan plan,
+				   fftw_complex *in, int istride,
+				   fftw_complex *out, int ostride)
+{
+     fftw_time begin, end, start;
+     double t, tmax, tmin;
+     int i, iter;
+     int n;
+     int repeat;
+     int howmany = plan->vector_size;
+
+     n = plan->n;
+
+     iter = 1;
+
+     for (;;) {
+	  tmin = 1.0E10;
+	  tmax = -1.0E10;
+	  init_test_array(in, istride, n * howmany);
+
+	  start = fftw_get_time();
+	  /* repeat the measurement FFTW_TIME_REPEAT times */
+	  for (repeat = 0; repeat < FFTW_TIME_REPEAT; ++repeat) {
+	       begin = fftw_get_time();
+	       for (i = 0; i < iter; ++i) {
+		    fftw(plan, howmany, in, istride, istride,
+			 out, ostride, ostride);
+	       }
+	       end = fftw_get_time();
+
+	       t = fftw_time_to_sec(fftw_time_diff(end, begin));
+	       if (t < tmin)
+		    tmin = t;
+	       if (t > tmax)
+		    tmax = t;
+
+	       /* do not run for too long */
+	       t = fftw_time_to_sec(fftw_time_diff(end, start));
+	       if (t > FFTW_TIME_LIMIT)
+		    break;
+	  }
+
+	  if (tmin >= FFTW_TIME_MIN)
+	       break;
+
+	  iter *= 2;
+     }
+
+     tmin /= (double) iter;
+     tmax /= (double) iter;
+
+     return tmin;
+}
+
+/* auxiliary functions */
+static void compute_cost(fftw_plan plan,
+			 fftw_complex *in, int istride,
+			 fftw_complex *out, int ostride)
+{
+     if (plan->flags & FFTW_MEASURE)
+	  plan->cost = fftw_measure_runtime(plan, in, istride, out, ostride);
+     else {
+	  double c;
+	  c = plan->n * fftw_estimate_node(plan->root) * plan->vector_size;
+	  plan->cost = c;
+     }
+}
+
+static void run_plan_hooks(fftw_plan p)
+{
+     if (fftw_plan_hook && p) {
+	  fftw_complete_twiddle(p->root, p->n);
+	  fftw_plan_hook(p);
+     }
+}
+
+
+/* macrology */
+#define FOR_ALL_CODELETS(p) \
+   fftw_codelet_desc **__q, *p;                         \
+   for (__q = &fftw_config[0]; (p = (*__q)); ++__q)
+
+/******************************************
+ *      Recursive planner                 *
+ ******************************************/
+static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, 
+			 int flags, int vector_size,
+			 fftw_complex *, int, fftw_complex *, int);
+
+/*
+ * the planner consists of two parts: one that tries to
+ * use accumulated wisdom, and one that does not.
+ * A small driver invokes both parts in sequence
+ */
+
+/* planner with wisdom: look up the codelet suggested by the wisdom */
+static fftw_plan planner_wisdom(fftw_plan *table, int n,
+				fftw_direction dir, int flags,
+				int vector_size,
+				fftw_complex *in, int istride,
+				fftw_complex *out, int ostride)
+{
+     fftw_plan best = (fftw_plan) 0;
+     fftw_plan_node *node;
+     int have_wisdom;
+     enum fftw_node_type wisdom_type;
+     int wisdom_signature;
+     fftw_recurse_kind wisdom_recurse_kind;
+
+     /* see if we remember any wisdom for this case */
+     have_wisdom = fftw_wisdom_lookup(n, flags, dir, FFTW_WISDOM,
+				      istride, ostride,
+				      &wisdom_type, &wisdom_signature,
+				      &wisdom_recurse_kind, 0);
+
+     if (!have_wisdom)
+	  return best;
+
+     if (wisdom_type == FFTW_NOTW) {
+	  FOR_ALL_CODELETS(p) {
+	       if (p->dir == dir && p->type == wisdom_type) {
+		    /* see if wisdom applies */
+		    if (wisdom_signature == p->signature &&
+			p->size == n) {
+			 node = fftw_make_node_notw(n, p);
+			 best = fftw_make_plan(n, dir, node, flags,
+					       p->type, p->signature,
+					       FFTW_NORMAL_RECURSE,
+					       vector_size);
+			 fftw_use_plan(best);
+			 run_plan_hooks(best);
+			 return best;
+		    }
+	       }
+	  }
+     }
+     if (wisdom_type == FFTW_TWIDDLE) {
+	  FOR_ALL_CODELETS(p) {
+	       if (p->dir == dir && p->type == wisdom_type) {
+
+		    /* see if wisdom applies */
+		    if (wisdom_signature == p->signature &&
+			p->size > 1 &&
+			(n % p->size) == 0) {
+			 fftw_plan r = planner(table, n / p->size, dir, 
+					       flags | FFTW_NO_VECTOR_RECURSE,
+					       wisdom_recurse_kind ==
+					       FFTW_VECTOR_RECURSE ?
+					       p->size : vector_size,
+					       in, istride, out, ostride);
+			 node = fftw_make_node_twiddle(n, p,
+						       r->root, flags);
+			 best = fftw_make_plan(n, dir, node, flags,
+					       p->type, p->signature,
+					       wisdom_recurse_kind, 
+					       vector_size);
+			 fftw_use_plan(best);
+			 run_plan_hooks(best);
+			 fftw_destroy_plan_internal(r);
+			 return best;
+		    }
+	       }
+	  }
+     }
+     /* 
+      * BUG (or: TODO)  Can we have generic wisdom? This is probably
+      * an academic question
+      */
+
+     return best;
+}
+
+/*
+ * planner with no wisdom: try all combinations and pick
+ * the best
+ */
+static fftw_plan planner_normal(fftw_plan *table, int n, fftw_direction dir,
+				int flags, int vector_size,
+				fftw_complex *in, int istride,
+				fftw_complex *out, int ostride)
+{
+     fftw_plan best = (fftw_plan) 0;
+     fftw_plan newplan;
+     fftw_plan_node *node;
+
+     /* see if we have any codelet that solves the problem */
+     {
+	  FOR_ALL_CODELETS(p) {
+	       if (p->dir == dir && p->type == FFTW_NOTW) {
+		    if (p->size == n) {
+			 node = fftw_make_node_notw(n, p);
+			 newplan = fftw_make_plan(n, dir, node, flags,
+						  p->type, p->signature,
+						  FFTW_NORMAL_RECURSE,
+						  vector_size);
+			 fftw_use_plan(newplan);
+			 compute_cost(newplan, in, istride, out, ostride);
+			 run_plan_hooks(newplan);
+			 best = fftw_pick_better(newplan, best);
+		    }
+	       }
+	  }
+     }
+
+     /* Then, try all available twiddle codelets */
+     {
+	  FOR_ALL_CODELETS(p) {
+	       if (p->dir == dir && p->type == FFTW_TWIDDLE) {
+		    if ((n % p->size) == 0 &&
+			p->size > 1 &&
+			(!best || n != p->size)) {
+			 fftw_plan r = planner(table, n / p->size, dir, 
+					       flags | FFTW_NO_VECTOR_RECURSE,
+					       vector_size,
+					       in, istride, out, ostride);
+			 node = fftw_make_node_twiddle(n, p,
+						       r->root, flags);
+			 newplan = fftw_make_plan(n, dir, node, flags,
+						  p->type, p->signature,
+			                          FFTW_NORMAL_RECURSE,
+						  vector_size);
+			 fftw_use_plan(newplan);
+			 fftw_destroy_plan_internal(r);
+			 compute_cost(newplan, in, istride, out, ostride);
+			 run_plan_hooks(newplan);
+			 best = fftw_pick_better(newplan, best);
+		    }
+	       }
+	  }
+     }
+
+     /* try vector recursion unless prohibited by the flags: */
+     if (! (flags & FFTW_NO_VECTOR_RECURSE)) {
+	  FOR_ALL_CODELETS(p) {
+	       if (p->dir == dir && p->type == FFTW_TWIDDLE) {
+		    if ((n % p->size) == 0 &&
+			p->size > 1 &&
+			(!best || n != p->size)) {
+			 fftw_plan r = planner(table, n / p->size, dir, 
+					       flags | FFTW_NO_VECTOR_RECURSE,
+					       p->size,
+					       in, istride, out, ostride);
+			 node = fftw_make_node_twiddle(n, p,
+						       r->root, flags);
+			 newplan = fftw_make_plan(n, dir, node, flags,
+						  p->type, p->signature,
+			                          FFTW_VECTOR_RECURSE,
+						  vector_size);
+			 fftw_use_plan(newplan);
+			 fftw_destroy_plan_internal(r);
+			 compute_cost(newplan, in, istride, out, ostride);
+			 run_plan_hooks(newplan);
+			 best = fftw_pick_better(newplan, best);
+		    }
+	       }
+	  }
+     }
+
+     /* 
+      * resort to generic or rader codelets for unknown factors
+      */
+     {
+	  fftw_generic_codelet *codelet = (dir == FFTW_FORWARD ?
+					   fftw_twiddle_generic :
+					   fftwi_twiddle_generic);
+	  int size, prev_size = 0, remaining_factors = n;
+	  fftw_plan r;
+
+	  while (remaining_factors > 1) {
+	       size = fftw_factor(remaining_factors);
+	       remaining_factors /= size;
+
+	       /* don't try the same factor more than once */
+	       if (size == prev_size)
+		    continue;
+	       prev_size = size;
+
+	       /* Look for codelets corresponding to this factor. */
+	       {
+		    FOR_ALL_CODELETS(p) {
+			 if (p->dir == dir && p->type == FFTW_TWIDDLE
+			     && p->size == size) {
+			      size = 0;
+			      break;
+			 }
+		    }
+	       }
+
+	       /*
+		* only try a generic/rader codelet if there were no
+	        * twiddle codelets for this factor
+		*/
+	       if (!size)
+		    continue;
+
+	       r = planner(table, n / size, dir,
+			   flags | FFTW_NO_VECTOR_RECURSE,
+			   vector_size,
+			   in, istride, out, ostride);
+
+	       /* Try Rader codelet: */
+	       node = fftw_make_node_rader(n, size, dir, r->root, flags);
+	       newplan = fftw_make_plan(n, dir, node, flags, FFTW_RADER, 0,
+					FFTW_NORMAL_RECURSE, vector_size);
+	       fftw_use_plan(newplan);
+	       compute_cost(newplan, in, istride, out, ostride);
+	       run_plan_hooks(newplan);
+	       best = fftw_pick_better(newplan, best);
+
+	       if (size < 100) {	/*
+					 * only try generic for small 
+					 * sizes 
+					 */
+		    /* Try generic codelet: */
+		    node = fftw_make_node_generic(n, size, codelet,
+						  r->root, flags);
+		    newplan = fftw_make_plan(n, dir, node, flags,
+					     FFTW_GENERIC, 0,
+					     FFTW_NORMAL_RECURSE, vector_size);
+		    fftw_use_plan(newplan);
+		    compute_cost(newplan, in, istride, out, ostride);
+		    run_plan_hooks(newplan);
+		    best = fftw_pick_better(newplan, best);
+	       }
+	       fftw_destroy_plan_internal(r);
+	  }
+     }
+
+     if (!best)
+	  fftw_die("bug in planner\n");
+
+     return best;
+}
+
+static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir,
+			 int flags, int vector_size,
+			 fftw_complex *in, int istride,
+			 fftw_complex *out, int ostride)
+{
+     fftw_plan best = (fftw_plan) 0;
+
+     if (vector_size > 1)
+	  flags |= FFTW_NO_VECTOR_RECURSE;
+
+     /* see if plan has already been computed */
+     best = fftw_lookup(table, n, flags, vector_size);
+     if (best) {
+	  fftw_use_plan(best);
+	  return best;
+     }
+     /* try a wise plan */
+     best = planner_wisdom(table, n, dir, flags, vector_size,
+			   in, istride, out, ostride);
+
+     if (!best) {
+	  /* No wisdom.  Plan normally. */
+	  best = planner_normal(table, n, dir, flags,
+				vector_size,
+				in, istride, out, ostride);
+     }
+     if (best) {
+	  fftw_insert(table, best);
+
+	  /* remember the wisdom */
+	  fftw_wisdom_add(n, flags, dir, FFTW_WISDOM, istride, ostride,
+			  best->wisdom_type,
+			  best->wisdom_signature,
+			  best->recurse_kind);
+     }
+     return best;
+}
+
+fftw_plan fftw_create_plan_specific(int n, fftw_direction dir, int flags,
+				    fftw_complex *in, int istride,
+				    fftw_complex *out, int ostride)
+{
+     fftw_plan table;
+     fftw_plan p1;
+
+     /* validate parameters */
+     if (n <= 0)
+	  return (fftw_plan) 0;
+
+#ifndef FFTW_ENABLE_VECTOR_RECURSE
+     /* TEMPORARY: disable vector recursion until it is more tested. */
+     flags |= FFTW_NO_VECTOR_RECURSE;
+#endif
+
+     if ((dir != FFTW_FORWARD) && (dir != FFTW_BACKWARD))
+	  return (fftw_plan) 0;
+
+     fftw_make_empty_table(&table);
+     p1 = planner(&table, n, dir, flags, 1,
+		  in, istride, out, ostride);
+     fftw_destroy_table(&table);
+     
+     if (p1)
+	  fftw_complete_twiddle(p1->root, n);
+     return p1;
+}
+
+fftw_plan fftw_create_plan(int n, fftw_direction dir, int flags)
+{
+     fftw_complex *tmp_in, *tmp_out;
+     fftw_plan p;
+
+     if (flags & FFTW_MEASURE) {
+	  tmp_in = (fftw_complex *) fftw_malloc(2 * n * sizeof(fftw_complex));
+	  if (!tmp_in)
+	       return 0;
+	  tmp_out = tmp_in + n;
+
+	  p = fftw_create_plan_specific(n, dir, flags,
+					tmp_in, 1, tmp_out, 1);
+
+	  fftw_free(tmp_in);
+     } else
+	  p = fftw_create_plan_specific(n, dir, flags,
+			   (fftw_complex *) 0, 1, (fftw_complex *) 0, 1);
+
+     return p;
+}
+
+void fftw_destroy_plan(fftw_plan plan)
+{
+     fftw_destroy_plan_internal(plan);
+}
diff --git a/src/fftw/putils.c b/src/fftw/putils.c
new file mode 100644
index 0000000..7cbe87d
--- /dev/null
+++ b/src/fftw/putils.c
@@ -0,0 +1,555 @@
+
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * putils.c -- plan utilities shared by planner.c and rplanner.c
+ */
+
+/* $Id: putils.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+#include "fftw-int.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+int fftw_node_cnt = 0;
+int fftw_plan_cnt = 0;
+
+/*
+ * These two constants are used for the FFTW_ESTIMATE flag to help
+ * create a heuristic plan.  They don't affect FFTW_MEASURE.
+ */
+#define NOTW_OPTIMAL_SIZE 32
+#define TWIDDLE_OPTIMAL_SIZE 12
+
+#define IS_POWER_OF_TWO(n) (((n) & ((n) - 1)) == 0)
+
+/* constructors --- I wish I had ML */
+fftw_plan_node *fftw_make_node(void)
+{
+     fftw_plan_node *p = (fftw_plan_node *)
+     fftw_malloc(sizeof(fftw_plan_node));
+     p->refcnt = 0;
+     fftw_node_cnt++;
+     return p;
+}
+
+void fftw_use_node(fftw_plan_node *p)
+{
+     ++p->refcnt;
+}
+
+fftw_plan_node *fftw_make_node_notw(int size, const fftw_codelet_desc *config)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = config->type;
+     p->nodeu.notw.size = size;
+     p->nodeu.notw.codelet = (fftw_notw_codelet *) config->codelet;
+     p->nodeu.notw.codelet_desc = config;
+     return p;
+}
+
+fftw_plan_node *fftw_make_node_real2hc(int size,
+				       const fftw_codelet_desc *config)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = config->type;
+     p->nodeu.real2hc.size = size;
+     p->nodeu.real2hc.codelet = (fftw_real2hc_codelet *) config->codelet;
+     p->nodeu.real2hc.codelet_desc = config;
+     return p;
+}
+
+fftw_plan_node *fftw_make_node_hc2real(int size,
+				       const fftw_codelet_desc *config)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = config->type;
+     p->nodeu.hc2real.size = size;
+     p->nodeu.hc2real.codelet = (fftw_hc2real_codelet *) config->codelet;
+     p->nodeu.hc2real.codelet_desc = config;
+     return p;
+}
+
+fftw_plan_node *fftw_make_node_twiddle(int n,
+				       const fftw_codelet_desc *config,
+				       fftw_plan_node *recurse,
+				       int flags)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = config->type;
+     p->nodeu.twiddle.size = config->size;
+     p->nodeu.twiddle.codelet = (fftw_twiddle_codelet *) config->codelet;
+     p->nodeu.twiddle.recurse = recurse;
+     p->nodeu.twiddle.codelet_desc = config;
+     fftw_use_node(recurse);
+     if (flags & FFTW_MEASURE)
+	  p->nodeu.twiddle.tw = fftw_create_twiddle(n, config);
+     else
+	  p->nodeu.twiddle.tw = 0;
+     return p;
+}
+
+fftw_plan_node *fftw_make_node_hc2hc(int n, fftw_direction dir,
+				     const fftw_codelet_desc *config,
+				     fftw_plan_node *recurse,
+				     int flags)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = config->type;
+     p->nodeu.hc2hc.size = config->size;
+     p->nodeu.hc2hc.dir = dir;
+     p->nodeu.hc2hc.codelet = (fftw_hc2hc_codelet *) config->codelet;
+     p->nodeu.hc2hc.recurse = recurse;
+     p->nodeu.hc2hc.codelet_desc = config;
+     fftw_use_node(recurse);
+     if (flags & FFTW_MEASURE)
+	  p->nodeu.hc2hc.tw = fftw_create_twiddle(n, config);
+     else
+	  p->nodeu.hc2hc.tw = 0;
+     return p;
+}
+
+fftw_plan_node *fftw_make_node_generic(int n, int size,
+				       fftw_generic_codelet *codelet,
+				       fftw_plan_node *recurse,
+				       int flags)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = FFTW_GENERIC;
+     p->nodeu.generic.size = size;
+     p->nodeu.generic.codelet = codelet;
+     p->nodeu.generic.recurse = recurse;
+     fftw_use_node(recurse);
+
+     if (flags & FFTW_MEASURE)
+	  p->nodeu.generic.tw = fftw_create_twiddle(n,
+					  (const fftw_codelet_desc *) 0);
+     else
+	  p->nodeu.generic.tw = 0;
+     return p;
+}
+
+fftw_plan_node *fftw_make_node_rgeneric(int n, int size,
+					fftw_direction dir,
+					fftw_rgeneric_codelet *codelet,
+					fftw_plan_node *recurse,
+					int flags)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     if (size % 2 == 0 || (n / size) % 2 == 0)
+	  fftw_die("invalid size for rgeneric codelet\n");
+
+     p->type = FFTW_RGENERIC;
+     p->nodeu.rgeneric.size = size;
+     p->nodeu.rgeneric.dir = dir;
+     p->nodeu.rgeneric.codelet = codelet;
+     p->nodeu.rgeneric.recurse = recurse;
+     fftw_use_node(recurse);
+
+     if (flags & FFTW_MEASURE)
+	  p->nodeu.rgeneric.tw = fftw_create_twiddle(n,
+					  (const fftw_codelet_desc *) 0);
+     else
+	  p->nodeu.rgeneric.tw = 0;
+     return p;
+}
+
+/* 
+ * Note that these two Rader-related things must go here, rather than
+ * in rader.c, in order that putils.c (and rplanner.c) won't depend
+ * upon rader.c. 
+ */
+
+fftw_rader_data *fftw_rader_top = NULL;
+
+static void fftw_destroy_rader(fftw_rader_data * d)
+{
+     if (d) {
+	  d->refcount--;
+	  if (d->refcount <= 0) {
+	       fftw_rader_data *cur = fftw_rader_top, *prev = NULL;
+
+	       while (cur && cur != d) {
+		    prev = cur;
+		    cur = cur->next;
+	       }
+	       if (!cur)
+		    fftw_die("invalid Rader data pointer\n");
+
+	       if (prev)
+		    prev->next = d->next;
+	       else
+		    fftw_rader_top = d->next;
+
+	       fftw_destroy_plan_internal(d->plan);
+	       fftw_free(d->omega);
+	       fftw_free(d->cdesc);
+	       fftw_free(d);
+	  }
+     }
+}
+
+static void destroy_tree(fftw_plan_node *p)
+{
+     if (p) {
+	  --p->refcnt;
+	  if (p->refcnt == 0) {
+	       switch (p->type) {
+		   case FFTW_NOTW:
+		   case FFTW_REAL2HC:
+		   case FFTW_HC2REAL:
+			break;
+
+		   case FFTW_TWIDDLE:
+			if (p->nodeu.twiddle.tw)
+			     fftw_destroy_twiddle(p->nodeu.twiddle.tw);
+			destroy_tree(p->nodeu.twiddle.recurse);
+			break;
+
+		   case FFTW_HC2HC:
+			if (p->nodeu.hc2hc.tw)
+			     fftw_destroy_twiddle(p->nodeu.hc2hc.tw);
+			destroy_tree(p->nodeu.hc2hc.recurse);
+			break;
+
+		   case FFTW_GENERIC:
+			if (p->nodeu.generic.tw)
+			     fftw_destroy_twiddle(p->nodeu.generic.tw);
+			destroy_tree(p->nodeu.generic.recurse);
+			break;
+
+		   case FFTW_RADER:
+			if (p->nodeu.rader.tw)
+			     fftw_destroy_twiddle(p->nodeu.rader.tw);
+			if (p->nodeu.rader.rader_data)
+			     fftw_destroy_rader(p->nodeu.rader.rader_data);
+			destroy_tree(p->nodeu.rader.recurse);
+			break;
+
+		   case FFTW_RGENERIC:
+			if (p->nodeu.rgeneric.tw)
+			     fftw_destroy_twiddle(p->nodeu.rgeneric.tw);
+			destroy_tree(p->nodeu.rgeneric.recurse);
+			break;
+	       }
+
+	       fftw_free(p);
+	       fftw_node_cnt--;
+	  }
+     }
+}
+
+/* create a plan with twiddle factors, and other bells and whistles */
+fftw_plan fftw_make_plan(int n, fftw_direction dir,
+			 fftw_plan_node *root, int flags,
+			 enum fftw_node_type wisdom_type,
+			 int wisdom_signature,
+			 fftw_recurse_kind recurse_kind, int vector_size)
+{
+     fftw_plan p = (fftw_plan) fftw_malloc(sizeof(struct fftw_plan_struct));
+
+     p->n = n;
+     p->dir = dir;
+     p->flags = flags;
+     fftw_use_node(root);
+     p->root = root;
+     p->cost = 0.0;
+     p->wisdom_type = wisdom_type;
+     p->wisdom_signature = wisdom_signature;
+     p->recurse_kind = recurse_kind;
+     p->vector_size = vector_size;
+     if (recurse_kind == FFTW_VECTOR_RECURSE && vector_size > 1)
+	  fftw_die("invalid vector-recurse plan attempted\n");
+     p->next = (fftw_plan) 0;
+     p->refcnt = 0;
+     fftw_plan_cnt++;
+     return p;
+}
+
+/*
+ * complete with twiddle factors (because nodes don't have
+ * them when FFTW_ESTIMATE is set)
+ */
+void fftw_complete_twiddle(fftw_plan_node *p, int n)
+{
+     int r;
+     switch (p->type) {
+	 case FFTW_NOTW:
+	 case FFTW_REAL2HC:
+	 case FFTW_HC2REAL:
+	      break;
+
+	 case FFTW_TWIDDLE:
+	      r = p->nodeu.twiddle.size;
+	      if (!p->nodeu.twiddle.tw)
+		   p->nodeu.twiddle.tw =
+		       fftw_create_twiddle(n, p->nodeu.twiddle.codelet_desc);
+	      fftw_complete_twiddle(p->nodeu.twiddle.recurse, n / r);
+	      break;
+
+	 case FFTW_HC2HC:
+	      r = p->nodeu.hc2hc.size;
+	      if (!p->nodeu.hc2hc.tw)
+		   p->nodeu.hc2hc.tw =
+		       fftw_create_twiddle(n, p->nodeu.hc2hc.codelet_desc);
+	      fftw_complete_twiddle(p->nodeu.hc2hc.recurse, n / r);
+	      break;
+
+	 case FFTW_GENERIC:
+	      r = p->nodeu.generic.size;
+	      if (!p->nodeu.generic.tw)
+		   p->nodeu.generic.tw =
+		       fftw_create_twiddle(n, (const fftw_codelet_desc *) 0);
+	      fftw_complete_twiddle(p->nodeu.generic.recurse, n / r);
+	      break;
+
+	 case FFTW_RADER:
+	      r = p->nodeu.rader.size;
+	      if (!p->nodeu.rader.tw)
+		   p->nodeu.rader.tw =
+		       fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc);
+	      fftw_complete_twiddle(p->nodeu.rader.recurse, n / r);
+	      break;
+
+	 case FFTW_RGENERIC:
+	      r = p->nodeu.rgeneric.size;
+	      if (!p->nodeu.rgeneric.tw)
+		   p->nodeu.rgeneric.tw =
+		       fftw_create_twiddle(n, (const fftw_codelet_desc *) 0);
+	      fftw_complete_twiddle(p->nodeu.rgeneric.recurse, n / r);
+	      break;
+
+     }
+}
+
+void fftw_use_plan(fftw_plan p)
+{
+     ++p->refcnt;
+}
+
+void fftw_destroy_plan_internal(fftw_plan p)
+{
+     --p->refcnt;
+
+     if (p->refcnt == 0) {
+	  destroy_tree(p->root);
+	  fftw_plan_cnt--;
+	  fftw_free(p);
+     }
+}
+
+/* end of constructors */
+
+/* management of plan tables */
+void fftw_make_empty_table(fftw_plan *table)
+{
+     *table = (fftw_plan) 0;
+}
+
+void fftw_insert(fftw_plan *table, fftw_plan this_plan)
+{
+     fftw_use_plan(this_plan);
+     this_plan->next = *table;
+     *table = this_plan;
+}
+
+fftw_plan fftw_lookup(fftw_plan *table, int n, int flags, int vector_size)
+{
+     fftw_plan p;
+
+     for (p = *table; p &&
+	  (p->n != n || p->flags != flags || p->vector_size != vector_size); 
+          p = p->next);
+
+     return p;
+}
+
+void fftw_destroy_table(fftw_plan *table)
+{
+     fftw_plan p, q;
+
+     for (p = *table; p; p = q) {
+	  q = p->next;
+	  fftw_destroy_plan_internal(p);
+     }
+}
+
+double fftw_estimate_node(fftw_plan_node *p)
+{
+     int k;
+
+     switch (p->type) {
+	 case FFTW_NOTW:
+	      k = p->nodeu.notw.size;
+	      goto common1;
+
+	 case FFTW_REAL2HC:
+	      k = p->nodeu.real2hc.size;
+	      goto common1;
+
+	 case FFTW_HC2REAL:
+	      k = p->nodeu.hc2real.size;
+	    common1:
+	      return 1.0 + 0.1 * (k - NOTW_OPTIMAL_SIZE) *
+		  (k - NOTW_OPTIMAL_SIZE);
+
+	 case FFTW_TWIDDLE:
+	      k = p->nodeu.twiddle.size;
+	      return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) *
+		  (k - TWIDDLE_OPTIMAL_SIZE)
+		  + fftw_estimate_node(p->nodeu.twiddle.recurse);
+
+	 case FFTW_HC2HC:
+	      k = p->nodeu.hc2hc.size;
+	      return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) *
+		  (k - TWIDDLE_OPTIMAL_SIZE)
+		  + fftw_estimate_node(p->nodeu.hc2hc.recurse);
+
+	 case FFTW_GENERIC:
+	      k = p->nodeu.generic.size;
+	      return 10.0 + k * k
+		  + fftw_estimate_node(p->nodeu.generic.recurse);
+
+	 case FFTW_RADER:
+	      k = p->nodeu.rader.size;
+	      return 10.0 + 10 * k
+		  + fftw_estimate_node(p->nodeu.rader.recurse);
+
+	 case FFTW_RGENERIC:
+	      k = p->nodeu.rgeneric.size;
+	      return 10.0 + k * k
+		  + fftw_estimate_node(p->nodeu.rgeneric.recurse);
+     }
+     return 1.0E20;
+}
+
+/* pick the better of two plans and destroy the other one. */
+fftw_plan fftw_pick_better(fftw_plan p1, fftw_plan p2)
+{
+     if (!p1)
+	  return p2;
+
+     if (!p2)
+	  return p1;
+
+     if (p1->cost > p2->cost) {
+	  fftw_destroy_plan_internal(p1);
+	  return p2;
+     } else {
+	  fftw_destroy_plan_internal(p2);
+	  return p1;
+     }
+}
+
+/* find the smallest prime factor of n */
+int fftw_factor(int n)
+{
+     int r;
+
+     /* try 2 */
+     if ((n & 1) == 0)
+	  return 2;
+
+     /* try odd numbers up to sqrt(n) */
+     for (r = 3; r * r <= n; r += 2)
+	  if (n % r == 0)
+	       return r;
+
+     /* n is prime */
+     return n;
+}
+
+static void print_node(FILE *f, fftw_plan_node *p, int indent)
+{
+     if (p) {
+	  switch (p->type) {
+	      case FFTW_NOTW:
+		   fprintf(f, "%*sFFTW_NOTW %d\n", indent, "",
+			   p->nodeu.notw.size);
+		   break;
+	      case FFTW_REAL2HC:
+		   fprintf(f, "%*sFFTW_REAL2HC %d\n", indent, "",
+			   p->nodeu.real2hc.size);
+		   break;
+	      case FFTW_HC2REAL:
+		   fprintf(f, "%*sFFTW_HC2REAL %d\n", indent, "",
+			   p->nodeu.hc2real.size);
+		   break;
+	      case FFTW_TWIDDLE:
+		   fprintf(f, "%*sFFTW_TWIDDLE %d\n", indent, "",
+			   p->nodeu.twiddle.size);
+		   print_node(f, p->nodeu.twiddle.recurse, indent);
+		   break;
+	      case FFTW_HC2HC:
+		   fprintf(f, "%*sFFTW_HC2HC %d\n", indent, "",
+			   p->nodeu.hc2hc.size);
+		   print_node(f, p->nodeu.hc2hc.recurse, indent);
+		   break;
+	      case FFTW_GENERIC:
+		   fprintf(f, "%*sFFTW_GENERIC %d\n", indent, "",
+			   p->nodeu.generic.size);
+		   print_node(f, p->nodeu.generic.recurse, indent);
+		   break;
+	      case FFTW_RADER:
+		   fprintf(f, "%*sFFTW_RADER %d\n", indent, "",
+			   p->nodeu.rader.size);
+
+		   fprintf(f, "%*splan for size %d convolution:\n",
+			   indent + 4, "", p->nodeu.rader.size - 1);
+		   print_node(f, p->nodeu.rader.rader_data->plan->root,
+			      indent + 6);
+
+		   print_node(f, p->nodeu.rader.recurse, indent);
+		   break;
+	      case FFTW_RGENERIC:
+		   fprintf(f, "%*sFFTW_RGENERIC %d\n", indent, "",
+			   p->nodeu.rgeneric.size);
+		   print_node(f, p->nodeu.rgeneric.recurse, indent);
+		   break;
+	  }
+     }
+}
+
+void fftw_fprint_plan(FILE *f, fftw_plan p)
+{
+
+     fprintf(f, "plan: (cost = %e)\n", p->cost);
+     if (p->recurse_kind == FFTW_VECTOR_RECURSE)
+	  fprintf(f, "(vector recursion)\n");
+     else if (p->vector_size > 1)
+	  fprintf(f, "(vector-size %d)\n", p->vector_size);
+     print_node(f, p->root, 0);
+}
+
+void fftw_print_plan(fftw_plan p)
+{
+     fftw_fprint_plan(stdout, p);
+}
+
+size_t fftw_sizeof_fftw_real(void)
+{
+     return(sizeof(fftw_real));
+}
diff --git a/src/fftw/rader.c b/src/fftw/rader.c
new file mode 100644
index 0000000..156529b
--- /dev/null
+++ b/src/fftw/rader.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * Compute transforms of prime sizes using Rader's trick: turn them
+ * into convolutions of size n - 1, which you then perform via a pair
+ * of FFTs. 
+ */
+
+#include <stdlib.h>
+#include <math.h>
+
+#include "fftw-int.h"
+
+#ifdef FFTW_DEBUG
+#define WHEN_DEBUG(a) a
+#else
+#define WHEN_DEBUG(a)
+#endif
+
+/* compute n^m mod p, where m >= 0 and p > 0. */
+static int power_mod(int n, int m, int p)
+{
+     if (m == 0)
+	  return 1;
+     else if (m % 2 == 0) {
+	  int x = power_mod(n, m / 2, p);
+	  return MULMOD(x, x, p);
+     }
+     else
+	  return MULMOD(n, power_mod(n, m - 1, p), p);
+}
+
+/*
+ * Find the period of n in the multiplicative group mod p (p prime).
+ * That is, return the smallest m such that n^m == 1 mod p.
+ */
+static int period(int n, int p)
+{
+     int prod = n, period = 1;
+
+     while (prod != 1) {
+	  prod = MULMOD(prod, n, p);
+	  ++period;
+	  if (prod == 0)
+	       fftw_die("non-prime order in Rader\n");
+     }
+     return period;
+}
+
+/* find a generator for the multiplicative group mod p, where p is prime */
+static int find_generator(int p)
+{
+     int g;
+
+     for (g = 1; g < p; ++g)
+	  if (period(g, p) == p - 1)
+	       break;
+     if (g == p)
+	  fftw_die("couldn't find generator for Rader\n");
+     return g;
+}
+
+/***************************************************************************/
+
+static fftw_rader_data *create_rader_aux(int p, int flags)
+{
+     fftw_complex *omega, *work;
+     int g, ginv, gpower;
+     int i;
+     FFTW_TRIG_REAL twoPiOverN;
+     fftw_real scale = 1.0 / (p - 1);	/* for convolution */
+     fftw_plan plan;
+     fftw_rader_data *d;
+
+     if (p < 2)
+	  fftw_die("non-prime order in Rader\n");
+
+     flags &= ~FFTW_IN_PLACE;
+
+     d = (fftw_rader_data *) fftw_malloc(sizeof(fftw_rader_data));
+
+     g = find_generator(p);
+     ginv = power_mod(g, p - 2, p);
+
+     omega = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));
+
+     plan = fftw_create_plan(p - 1, FFTW_FORWARD,
+			     flags & ~FFTW_NO_VECTOR_RECURSE);
+
+     work = (fftw_complex *) fftw_malloc((p - 1) * sizeof(fftw_complex));
+
+     twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) p;
+     gpower = 1;
+     for (i = 0; i < p - 1; ++i) {
+	  c_re(work[i]) = scale * FFTW_TRIG_COS(twoPiOverN * gpower);
+	  c_im(work[i]) = FFTW_FORWARD * scale * FFTW_TRIG_SIN(twoPiOverN 
+							       * gpower);
+	  gpower = MULMOD(gpower, ginv, p);
+     }
+
+     /* fft permuted roots of unity */
+     fftw_executor_simple(p - 1, work, omega, plan->root, 1, 1,
+			  plan->recurse_kind);
+
+     fftw_free(work);
+
+     d->plan = plan;
+     d->omega = omega;
+     d->g = g;
+     d->ginv = ginv;
+     d->p = p;
+     d->flags = flags;
+     d->refcount = 1;
+     d->next = NULL;
+
+     d->cdesc = (fftw_codelet_desc *) fftw_malloc(sizeof(fftw_codelet_desc));
+     d->cdesc->name = NULL;
+     d->cdesc->codelet = NULL;
+     d->cdesc->size = p;
+     d->cdesc->dir = FFTW_FORWARD;
+     d->cdesc->type = FFTW_RADER;
+     d->cdesc->signature = g;
+     d->cdesc->ntwiddle = 0;
+     d->cdesc->twiddle_order = NULL;
+     return d;
+}
+
+/***************************************************************************/
+
+static fftw_rader_data *fftw_create_rader(int p, int flags)
+{
+     fftw_rader_data *d = fftw_rader_top;
+
+     flags &= ~FFTW_IN_PLACE;
+     while (d && (d->p != p || d->flags != flags))
+	  d = d->next;
+     if (d) {
+	  d->refcount++;
+	  return d;
+     }
+     d = create_rader_aux(p, flags);
+     d->next = fftw_rader_top;
+     fftw_rader_top = d;
+     return d;
+}
+
+/***************************************************************************/
+
+/* Compute the prime FFTs, premultiplied by twiddle factors.  Below, we
+ * extensively use the identity that fft(x*)* = ifft(x) in order to
+ * share data between forward and backward transforms and to obviate
+ * the necessity of having separate forward and backward plans. */
+
+void fftw_twiddle_rader(fftw_complex *A, const fftw_complex *W,
+			int m, int r, int stride,
+			fftw_rader_data * d)
+{
+     fftw_complex *tmp = (fftw_complex *)
+     fftw_malloc((r - 1) * sizeof(fftw_complex));
+     int i, k, gpower = 1, g = d->g, ginv = d->ginv;
+     fftw_real a0r, a0i;
+     fftw_complex *omega = d->omega;
+
+     for (i = 0; i < m; ++i, A += stride, W += r - 1) {
+	  /* 
+	   * Here, we fft W[k-1] * A[k*(m*stride)], using Rader.
+	   * (Actually, W is pre-permuted to match the permutation that we 
+	   * will do on A.) 
+	   */
+
+	  /* First, permute the input and multiply by W, storing in tmp: */
+	  /* gpower == g^k mod r in the following loop */
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	       fftw_real rA, iA, rW, iW;
+	       rW = c_re(W[k]);
+	       iW = c_im(W[k]);
+	       rA = c_re(A[gpower * (m * stride)]);
+	       iA = c_im(A[gpower * (m * stride)]);
+	       c_re(tmp[k]) = rW * rA - iW * iA;
+	       c_im(tmp[k]) = rW * iA + iW * rA;
+	  }
+
+	  WHEN_DEBUG( {
+		     if (gpower != 1)
+		     fftw_die("incorrect generator in Rader\n");
+		     }
+	  );
+
+	  /* FFT tmp to A: */
+	  fftw_executor_simple(r - 1, tmp, A + (m * stride),
+			       d->plan->root, 1, m * stride,
+			       d->plan->recurse_kind);
+
+	  /* set output DC component: */
+	  a0r = c_re(A[0]);
+	  a0i = c_im(A[0]);
+	  c_re(A[0]) += c_re(A[(m * stride)]);
+	  c_im(A[0]) += c_im(A[(m * stride)]);
+
+	  /* now, multiply by omega: */
+	  for (k = 0; k < r - 1; ++k) {
+	       fftw_real rA, iA, rW, iW;
+	       rW = c_re(omega[k]);
+	       iW = c_im(omega[k]);
+	       rA = c_re(A[(k + 1) * (m * stride)]);
+	       iA = c_im(A[(k + 1) * (m * stride)]);
+	       c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
+	       c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
+	  }
+
+	  /* this will add A[0] to all of the outputs after the ifft */
+	  c_re(A[(m * stride)]) += a0r;
+	  c_im(A[(m * stride)]) -= a0i;
+
+	  /* inverse FFT: */
+	  fftw_executor_simple(r - 1, A + (m * stride), tmp,
+			       d->plan->root, m * stride, 1,
+			       d->plan->recurse_kind);
+
+	  /* finally, do inverse permutation to unshuffle the output: */
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+	       c_re(A[gpower * (m * stride)]) = c_re(tmp[k]);
+	       c_im(A[gpower * (m * stride)]) = -c_im(tmp[k]);
+	  }
+
+	  WHEN_DEBUG( {
+		     if (gpower != 1)
+		     fftw_die("incorrect generator in Rader\n");
+		     }
+	  );
+
+     }
+
+     fftw_free(tmp);
+}
+
+void fftwi_twiddle_rader(fftw_complex *A, const fftw_complex *W,
+			 int m, int r, int stride,
+			 fftw_rader_data * d)
+{
+     fftw_complex *tmp = (fftw_complex *)
+     fftw_malloc((r - 1) * sizeof(fftw_complex));
+     int i, k, gpower = 1, g = d->g, ginv = d->ginv;
+     fftw_real a0r, a0i;
+     fftw_complex *omega = d->omega;
+
+     for (i = 0; i < m; ++i, A += stride, W += r - 1) {
+	  /* 
+	   * Here, we fft W[k-1]* * A[k*(m*stride)], using Rader. 
+	   * (Actually, W is pre-permuted to match the permutation that
+	   * we will do on A.) 
+	   */
+
+	  /* First, permute the input and multiply by W*, storing in tmp: */
+	  /* gpower == g^k mod r in the following loop */
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	       fftw_real rA, iA, rW, iW;
+	       rW = c_re(W[k]);
+	       iW = c_im(W[k]);
+	       rA = c_re(A[gpower * (m * stride)]);
+	       iA = c_im(A[gpower * (m * stride)]);
+	       c_re(tmp[k]) = rW * rA + iW * iA;
+	       c_im(tmp[k]) = iW * rA - rW * iA;
+	  }
+
+	  WHEN_DEBUG( {
+		     if (gpower != 1)
+		     fftw_die("incorrect generator in Rader\n");
+		     }
+	  );
+
+	  /* FFT tmp to A: */
+	  fftw_executor_simple(r - 1, tmp, A + (m * stride),
+			       d->plan->root, 1, m * stride,
+			       d->plan->recurse_kind);
+
+	  /* set output DC component: */
+	  a0r = c_re(A[0]);
+	  a0i = c_im(A[0]);
+	  c_re(A[0]) += c_re(A[(m * stride)]);
+	  c_im(A[0]) -= c_im(A[(m * stride)]);
+
+	  /* now, multiply by omega: */
+	  for (k = 0; k < r - 1; ++k) {
+	       fftw_real rA, iA, rW, iW;
+	       rW = c_re(omega[k]);
+	       iW = c_im(omega[k]);
+	       rA = c_re(A[(k + 1) * (m * stride)]);
+	       iA = c_im(A[(k + 1) * (m * stride)]);
+	       c_re(A[(k + 1) * (m * stride)]) = rW * rA - iW * iA;
+	       c_im(A[(k + 1) * (m * stride)]) = -(rW * iA + iW * rA);
+	  }
+
+	  /* this will add A[0] to all of the outputs after the ifft */
+	  c_re(A[(m * stride)]) += a0r;
+	  c_im(A[(m * stride)]) += a0i;
+
+	  /* inverse FFT: */
+	  fftw_executor_simple(r - 1, A + (m * stride), tmp,
+			       d->plan->root, m * stride, 1,
+			       d->plan->recurse_kind);
+
+	  /* finally, do inverse permutation to unshuffle the output: */
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+	       A[gpower * (m * stride)] = tmp[k];
+	  }
+
+	  WHEN_DEBUG( {
+		     if (gpower != 1)
+		     fftw_die("incorrect generator in Rader\n");
+		     }
+	  );
+     }
+
+     fftw_free(tmp);
+}
+
+/***************************************************************************/
+
+/*
+ * Make an FFTW_RADER plan node.  Note that this function must go
+ * here, rather than in putils.c, because it indirectly calls the
+ * fftw_planner.  If we included it in putils.c, which is also used
+ * by rfftw, then any program using rfftw would be linked with all
+ * of the FFTW codelets, even if they were not needed.   I wish that the
+ * darn linkers operated on a function rather than a file granularity. 
+ */
+fftw_plan_node *fftw_make_node_rader(int n, int size, fftw_direction dir,
+				     fftw_plan_node *recurse,
+				     int flags)
+{
+     fftw_plan_node *p = fftw_make_node();
+
+     p->type = FFTW_RADER;
+     p->nodeu.rader.size = size;
+     p->nodeu.rader.codelet = dir == FFTW_FORWARD ?
+	 fftw_twiddle_rader : fftwi_twiddle_rader;
+     p->nodeu.rader.rader_data = fftw_create_rader(size, flags);
+     p->nodeu.rader.recurse = recurse;
+     fftw_use_node(recurse);
+
+     if (flags & FFTW_MEASURE)
+	  p->nodeu.rader.tw =
+	      fftw_create_twiddle(n, p->nodeu.rader.rader_data->cdesc);
+     else
+	  p->nodeu.rader.tw = 0;
+     return p;
+}
diff --git a/src/fftw/timer.c b/src/fftw/timer.c
new file mode 100644
index 0000000..a7d05a4
--- /dev/null
+++ b/src/fftw/timer.c
@@ -0,0 +1,164 @@
+
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * timer.c -- this file measures the execution time of 
+ *            ffts.  This information is used by the planner.
+ */
+
+/* $Id: timer.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#include <time.h>
+#include "fftw-int.h"
+#include <math.h>
+#include <stdlib.h>
+
+/********************* System-specific Timing Support *********************/
+
+#if defined(HAVE_MAC_TIMER) && !defined(HAVE_MAC_PCI_TIMER)
+
+/* Use Macintosh Time Manager to get the time: */
+
+/*
+ * make sure compiler (CW) recognizes the pascal keywords that are in
+ * Timer.h
+ */
+#pragma only_std_keywords off	
+
+#include <Timer.h>
+
+#pragma only_std_keywords reset
+
+fftw_time get_Mac_microseconds(void)
+{
+     fftw_time t;
+     UnsignedWide microsec;	/* 
+				 * microsec.lo and microsec.hi are
+				 * unsigned long's, and are the two parts
+				 * of a 64 bit unsigned integer 
+				 */
+
+     Microseconds(&microsec);	/* get time in microseconds */
+
+     /* store lo and hi words into our structure: */
+     t.lo = microsec.lo;
+     t.hi = microsec.hi;
+
+     return t;
+}
+
+fftw_time fftw_time_diff(fftw_time t1, fftw_time t2)
+/*
+ * This function takes the difference t1 - t2 of two 64 bit
+ * integers, represented by the 32 bit lo and hi words.
+ * if t1 < t2, returns 0. 
+ */
+{
+     fftw_time diff;
+
+     if (t1.hi < t2.hi) {	/* something is wrong...t1 < t2! */
+	  diff.hi = diff.lo = 0;
+	  return diff;
+     } else
+	  diff.hi = t1.hi - t2.hi;
+
+     if (t1.lo < t2.lo) {
+	  if (diff.hi > 0)
+	       diff.hi -= 1;	/* carry */
+	  else {		/* something is wrong...t1 < t2! */
+	       diff.hi = diff.lo = 0;
+	       return diff;
+	  }
+     }
+     diff.lo = t1.lo - t2.lo;
+
+     return diff;
+}
+
+#endif
+
+#ifdef HAVE_WIN32_TIMER
+#include <windows.h>
+
+static LARGE_INTEGER gFreq;
+static int gHaveHiResTimer = 0;
+static int gFirstTime = 1;
+
+unsigned long GetPerfTime(void)
+{
+     LARGE_INTEGER lCounter;
+
+     if (gFirstTime) {
+	  gFirstTime = 0;
+
+	  if (QueryPerformanceFrequency(&gFreq)) {
+	       gHaveHiResTimer = 1;
+	  }
+     }
+     if (gHaveHiResTimer) {
+	  QueryPerformanceCounter(&lCounter);
+	  return lCounter.u.LowPart;
+     } else {
+	  return (unsigned long) clock();
+     }
+}
+
+double GetPerfSec(double pTime)
+{
+     if (gHaveHiResTimer) {
+	  return pTime / gFreq.u.LowPart;	// assumes HighPart==0
+
+     } else {
+	  return pTime / CLOCKS_PER_SEC;
+     }
+}
+
+#endif				/* HAVE_WIN32_TIMER */
+
+#if defined(FFTW_USE_GETTIMEOFDAY)
+
+/* timer support routines for systems having gettimeofday */
+
+#if defined(HAVE_BSDGETTIMEOFDAY) && ! defined(HAVE_GETTIMEOFDAY)
+#define gettimeofday BSDgettimeofday
+#endif
+
+fftw_time fftw_gettimeofday_get_time(void)
+{
+     struct timeval tv;
+     gettimeofday(&tv, 0);
+     return tv;
+}
+
+fftw_time fftw_gettimeofday_time_diff(fftw_time t1, fftw_time t2)
+{
+     fftw_time diff;
+
+     diff.tv_sec = t1.tv_sec - t2.tv_sec;
+     diff.tv_usec = t1.tv_usec - t2.tv_usec;
+     /* normalize */
+     while (diff.tv_usec < 0) {
+	  diff.tv_usec += 1000000L;
+	  diff.tv_sec -= 1;
+     }
+
+     return diff;
+}
+#endif
diff --git a/src/fftw/twiddle.c b/src/fftw/twiddle.c
new file mode 100644
index 0000000..16e9fd0
--- /dev/null
+++ b/src/fftw/twiddle.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * twiddle.c -- compute twiddle factors
+ * These are the twiddle factors for *direct* fft.  Flip sign to get
+ * the inverse
+ */
+
+/* $Id: twiddle.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+#include "fftw-int.h"
+#include <math.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#ifndef TRUE
+#define TRUE (1 == 1)
+#endif
+
+#ifndef FALSE
+#define FALSE (1 == 0)
+#endif
+
+#ifdef USE_FFTW_SAFE_MULMOD
+/* compute (x * y) mod p, but watch out for integer overflows; we must
+   have x, y >= 0, p > 0.  This routine is slow. */
+int fftw_safe_mulmod(int x, int y, int p)
+{
+     if (y == 0 || x <= INT_MAX / y)
+	  return((x * y) % p);
+     else {
+	  int y2 = y/2;
+	  return((fftw_safe_mulmod(x, y2, p) +
+		  fftw_safe_mulmod(x, y - y2, p)) % p);
+     }
+}
+#endif /* USE_FFTW_SAFE_MULMOD */
+
+static fftw_complex *fftw_compute_rader_twiddle(int n, int r, int g)
+{
+     FFTW_TRIG_REAL twoPiOverN;
+     int m = n / r;
+     int i, j, gpower;
+     fftw_complex *W;
+
+     twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) n;
+     W = (fftw_complex *) fftw_malloc((r - 1) * m * sizeof(fftw_complex));
+     for (i = 0; i < m; ++i)
+	  for (gpower = 1, j = 0; j < r - 1; ++j,
+		    gpower = MULMOD(gpower, g, r)) {
+	       int k = i * (r - 1) + j;
+	       FFTW_TRIG_REAL
+		   ij = (FFTW_TRIG_REAL) (i * gpower);
+	       c_re(W[k]) = FFTW_TRIG_COS(twoPiOverN * ij);
+	       c_im(W[k]) = FFTW_FORWARD * FFTW_TRIG_SIN(twoPiOverN * ij);
+	  }
+
+     return W;
+}
+
+/*
+ * compute the W coefficients (that is, powers of the root of 1)
+ * and store them into an array.
+ */
+static fftw_complex *fftw_compute_twiddle(int n, const fftw_codelet_desc *d)
+{
+     FFTW_TRIG_REAL twoPiOverN;
+     int i, j;
+     fftw_complex *W;
+
+     twoPiOverN = FFTW_K2PI / (FFTW_TRIG_REAL) n;
+
+     if (!d) {
+	  /* generic codelet, needs all twiddles in order */
+	  W = (fftw_complex *) fftw_malloc(n * sizeof(fftw_complex));
+	  for (i = 0; i < n; ++i) {
+	       c_re(W[i]) = FFTW_TRIG_COS(twoPiOverN * (FFTW_TRIG_REAL) i);
+	       c_im(W[i]) = FFTW_FORWARD * FFTW_TRIG_SIN(twoPiOverN * (FFTW_TRIG_REAL) i);
+	  }
+     } else if (d->type == FFTW_RADER)
+	  W = fftw_compute_rader_twiddle(n, d->size, d->signature);
+     else {
+	  int r = d->size;
+	  int m = n / r, m_alloc;
+	  int r1 = d->ntwiddle;
+	  int istart;
+
+	  if (d->type == FFTW_TWIDDLE) {
+	       istart = 0;
+	       m_alloc = m;
+	  } else if (d->type == FFTW_HC2HC) {
+	       /*
+		* This is tricky, do not change lightly.
+		*/
+	       m = (m + 1) / 2;
+	       m_alloc = m - 1;
+	       istart = 1;
+	  } else {
+	       fftw_die("compute_twiddle: invalid argument\n");
+	       /* paranoia for gcc */
+	       m_alloc = 0;
+	       istart = 0;
+	  }
+
+	  W = (fftw_complex *) fftw_malloc(r1 * m_alloc * sizeof(fftw_complex));
+	  for (i = istart; i < m; ++i)
+	       for (j = 0; j < r1; ++j) {
+		    int k = (i - istart) * r1 + j;
+		    FFTW_TRIG_REAL
+			ij = (FFTW_TRIG_REAL) (i * d->twiddle_order[j]);
+		    c_re(W[k]) = FFTW_TRIG_COS(twoPiOverN * ij);
+		    c_im(W[k]) = FFTW_FORWARD * FFTW_TRIG_SIN(twoPiOverN * ij);
+	       }
+     }
+
+     return W;
+}
+
+/*
+ * these routines implement a simple reference-count-based 
+ * management of twiddle structures
+ */
+static fftw_twiddle *twlist = (fftw_twiddle *) 0;
+int fftw_twiddle_size = 0;	/* total allocated size, for debugging */
+
+/* true if the two codelets can share the same twiddle factors */
+static int compatible(const fftw_codelet_desc *d1, const fftw_codelet_desc *d2)
+{
+     int i;
+
+     /* true if they are the same codelet */
+     if (d1 == d2)
+	  return TRUE;
+
+     /* false if one is null and the other is not */
+     if (!d1 || !d2)
+	  return FALSE;
+
+     /* false if size is different */
+     if (d1->size != d2->size)
+	  return FALSE;
+
+     /* false if different types (FFTW_TWIDDLE/FFTW_HC2HC/FFTW_RADER) */
+     if (d1->type != d2->type)
+	  return FALSE;
+
+     /* false if they need different # of twiddles */
+     if (d1->ntwiddle != d2->ntwiddle)
+	  return FALSE;
+
+     /* false if the twiddle orders are different */
+     for (i = 0; i < d1->ntwiddle; ++i)
+	  if (d1->twiddle_order[i] != d2->twiddle_order[i])
+	       return FALSE;
+
+     return TRUE;
+}
+
+fftw_twiddle *fftw_create_twiddle(int n, const fftw_codelet_desc *d)
+{
+     fftw_twiddle *tw;
+
+     /* lookup this n in the twiddle list */
+     for (tw = twlist; tw; tw = tw->next)
+	  if (n == tw->n && compatible(d, tw->cdesc)) {
+	       ++tw->refcnt;
+	       return tw;
+	  }
+     /* not found --- allocate a new struct twiddle */
+     tw = (fftw_twiddle *) fftw_malloc(sizeof(fftw_twiddle));
+     fftw_twiddle_size += n;
+
+     tw->n = n;
+     tw->cdesc = d;
+     tw->twarray = fftw_compute_twiddle(n, d);
+     tw->refcnt = 1;
+
+     /* enqueue the new struct */
+     tw->next = twlist;
+     twlist = tw;
+
+     return tw;
+}
+
+void fftw_destroy_twiddle(fftw_twiddle * tw)
+{
+     fftw_twiddle **p;
+     --tw->refcnt;
+
+     if (tw->refcnt == 0) {
+	  /* remove from the list of known twiddle factors */
+	  for (p = &twlist; p; p = &((*p)->next))
+	       if (*p == tw) {
+		    *p = tw->next;
+		    fftw_twiddle_size -= tw->n;
+		    fftw_free(tw->twarray);
+		    fftw_free(tw);
+		    return;
+	       }
+	  fftw_die("BUG in fftw_destroy_twiddle\n");
+     }
+}
diff --git a/src/fftw/wisdom.c b/src/fftw/wisdom.c
new file mode 100644
index 0000000..b487ea8
--- /dev/null
+++ b/src/fftw/wisdom.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * wisdom.c -- manage the wisdom
+ */
+
+#include "fftw-int.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+struct wisdom {
+     int n;
+     int flags;
+     fftw_direction dir;
+     enum fftw_wisdom_category category;
+     int istride;
+     int ostride;
+     int vector_size;
+     enum fftw_node_type type;	      /* this is the wisdom */
+     int signature;		      /* this is the wisdom */
+     fftw_recurse_kind recurse_kind;  /* this is the wisdom */
+     struct wisdom *next;
+};
+
+/* list of wisdom */
+static struct wisdom *wisdom_list = (struct wisdom *) 0;
+
+int fftw_wisdom_lookup(int n, int flags, fftw_direction dir,
+		       enum fftw_wisdom_category category,
+		       int istride, int ostride,
+		       enum fftw_node_type *type,
+		       int *signature, fftw_recurse_kind *recurse_kind,
+		       int replacep)
+{
+     struct wisdom *p;
+
+     if (!(flags & FFTW_USE_WISDOM))
+	  return 0;		/* simply ignore if wisdom is disabled */
+
+     flags |= FFTW_MEASURE;	/* 
+				 * always use (only) wisdom from
+				 * measurements 
+				 */
+
+     for (p = wisdom_list; p; p = p->next) {
+	  if (p->n == n && p->flags == flags && p->dir == dir &&
+	      p->istride == istride && p->ostride == ostride &&
+	      p->category == category) {
+	       /* found wisdom */
+	       if (replacep) {
+		    /* replace old wisdom with new */
+		    p->type = *type;
+		    p->signature = *signature;
+		    p->recurse_kind = *recurse_kind;
+	       } else {
+		    *type = p->type;
+		    *signature = p->signature;
+		    *recurse_kind = p->recurse_kind;
+	       }
+	       return 1;
+	  }
+     }
+
+     return 0;
+}
+
+void fftw_wisdom_add(int n, int flags, fftw_direction dir,
+		     enum fftw_wisdom_category category,
+		     int istride, int ostride,
+		     enum fftw_node_type type,
+		     int signature,
+		     fftw_recurse_kind recurse_kind)
+{
+     struct wisdom *p;
+
+     if ((flags & FFTW_NO_VECTOR_RECURSE) && 
+	 recurse_kind == FFTW_VECTOR_RECURSE)
+	  fftw_die("bug in planner (conflicting plan options)\n");
+
+     if (!(flags & FFTW_USE_WISDOM))
+	  return;		/* simply ignore if wisdom is disabled */
+
+     if (!(flags & FFTW_MEASURE))
+	  return;		/* only measurements produce wisdom */
+
+     if (fftw_wisdom_lookup(n, flags, dir, category, istride, ostride,
+			    &type, &signature, &recurse_kind, 1))
+	  return;		/* wisdom overwrote old wisdom */
+
+     p = (struct wisdom *) fftw_malloc(sizeof(struct wisdom));
+
+     p->n = n;
+     p->flags = flags;
+     p->dir = dir;
+     p->category = category;
+     p->istride = istride;
+     p->ostride = ostride;
+     p->type = type;
+     p->signature = signature;
+     p->recurse_kind = recurse_kind;
+
+     /* remember this wisdom */
+     p->next = wisdom_list;
+     wisdom_list = p;
+}
+
+void fftw_forget_wisdom(void)
+{
+     while (wisdom_list) {
+	  struct wisdom *p;
+
+	  p = wisdom_list;
+	  wisdom_list = wisdom_list->next;
+	  fftw_free(p);
+     }
+}
+
+/*
+ * user-visible routines, to convert wisdom into strings etc.
+ */
+static const char *WISDOM_FORMAT_VERSION = "FFTW-" FFTW_VERSION;
+
+static void (*emit) (char c, void *data);
+
+static void emit_string(const char *s, void *data)
+{
+     while (*s)
+	  emit(*s++, data);
+}
+
+static void emit_int(int n, void *data)
+{
+     char buf[128];
+
+     sprintf(buf, "%d", n);
+     emit_string(buf, data);
+}
+
+/* dump wisdom in lisp-like format */
+void fftw_export_wisdom(void (*emitter) (char c, void *), void *data)
+{
+     struct wisdom *p;
+
+     /* install the output handler */
+     emit = emitter;
+
+     emit('(', data);
+     emit_string(WISDOM_FORMAT_VERSION, data);
+
+     for (p = wisdom_list; p; p = p->next) {
+	  emit(' ', data);	/* separator to make the output nicer */
+	  emit('(', data);
+	  emit_int((int) p->n, data);
+	  emit(' ', data);
+	  emit_int((int) p->flags, data);
+	  emit(' ', data);
+	  emit_int((int) p->dir, data);
+	  emit(' ', data);
+	  emit_int((int) p->category, data);
+	  emit(' ', data);
+	  emit_int((int) p->istride, data);
+	  emit(' ', data);
+	  emit_int((int) p->ostride, data);
+	  emit(' ', data);
+	  emit_int((int) p->type, data);
+	  emit(' ', data);
+	  emit_int((int) p->signature, data);
+	  emit(' ', data);
+	  emit_int((int) p->recurse_kind, data);
+	  emit(')', data);
+     }
+     emit(')', data);
+}
+
+/* input part */
+static int next_char;
+static int (*get_input) (void *data);
+static fftw_status input_error;
+
+static void read_char(void *data)
+{
+     next_char = get_input(data);
+     if (next_char == 0 ||
+	 next_char == EOF)
+	  input_error = FFTW_FAILURE;
+}
+
+/* skip blanks, newlines, tabs, etc */
+static void eat_blanks(void *data)
+{
+     while (isspace(next_char))
+	  read_char(data);
+}
+
+static int read_int(void *data)
+{
+     int sign = 1;
+     int n = 0;
+
+     eat_blanks(data);
+     if (next_char == '-') {
+	  sign = -1;
+	  read_char(data);
+	  eat_blanks(data);
+     }
+     if (!isdigit(next_char)) {
+	  /* error, no digit */
+	  input_error = FFTW_FAILURE;
+	  return 0;
+     }
+     while (isdigit(next_char)) {
+	  n = n * 10 + (next_char - '0');
+	  read_char(data);
+     }
+
+     return sign * n;
+}
+
+#define EXPECT(c)                     \
+{				      \
+     eat_blanks(data);		      \
+     if (input_error == FFTW_FAILURE || \
+         next_char != c)	      \
+	  return FFTW_FAILURE;	      \
+     read_char(data);		      \
+}
+
+#define EXPECT_INT(n)                                 \
+{				                      \
+     n = read_int(data);	                      \
+     if (input_error == FFTW_FAILURE)                 \
+	  return FFTW_FAILURE;		              \
+}
+
+#define EXPECT_STRING(s)             \
+{                                    \
+     const char *s1 = s;		     \
+     while (*s1) {		     \
+	  EXPECT(*s1);		     \
+	  ++s1;			     \
+     }				     \
+}
+
+fftw_status fftw_import_wisdom(int (*g) (void *), void *data)
+{
+     int n;
+     int flags;
+     fftw_direction dir;
+     int dir_int;
+     enum fftw_wisdom_category category;
+     int category_int;
+     enum fftw_node_type type;
+     int recurse_kind_int;
+     fftw_recurse_kind recurse_kind;
+     int type_int;
+     int signature;
+     int istride, ostride;
+
+     get_input = g;
+     input_error = FFTW_SUCCESS;
+
+     read_char(data);
+
+     eat_blanks(data);
+     EXPECT('(');
+     eat_blanks(data);
+     EXPECT_STRING(WISDOM_FORMAT_VERSION);
+     eat_blanks(data);
+
+     while (next_char != ')') {
+	  EXPECT('(');
+	  EXPECT_INT(n);
+	  EXPECT_INT(flags);
+	  /* paranoid respect for enumerated types */
+	  EXPECT_INT(dir_int);
+	  dir = (fftw_direction) dir_int;
+	  EXPECT_INT(category_int);
+	  category = (enum fftw_wisdom_category) category_int;
+	  EXPECT_INT(istride);
+	  EXPECT_INT(ostride);
+	  EXPECT_INT(type_int);
+	  type = (enum fftw_node_type) type_int;
+	  EXPECT_INT(signature);
+	  EXPECT_INT(recurse_kind_int);
+	  recurse_kind = (fftw_recurse_kind) recurse_kind_int;
+	  eat_blanks(data);
+	  EXPECT(')');
+
+	  /* the wisdom has been read properly. Add it */
+	  fftw_wisdom_add(n, flags, dir, category,
+			  istride, ostride,
+			  type, signature, recurse_kind);
+
+	  /* prepare for next morsel of wisdom */
+	  eat_blanks(data);
+     }
+
+     return FFTW_SUCCESS;
+}
diff --git a/src/fftw/wisdomio.c b/src/fftw/wisdomio.c
new file mode 100644
index 0000000..a085151
--- /dev/null
+++ b/src/fftw/wisdomio.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fftw-int.h"
+
+/**************** import/export using file ***************/
+
+static void file_emitter(char c, void *data)
+{
+     putc(c, (FILE *) data);
+}
+
+void fftw_export_wisdom_to_file(FILE *output_file)
+{
+     if (output_file)
+	  fftw_export_wisdom(file_emitter, (void *) output_file);
+}
+
+static int file_get_input(void *data)
+{
+     return getc((FILE *) data);
+}
+
+fftw_status fftw_import_wisdom_from_file(FILE *input_file)
+{
+     if (!input_file)
+	  return FFTW_FAILURE;
+     return fftw_import_wisdom(file_get_input, (void *) input_file);
+}
+
+/*************** import/export using string **************/
+
+static void emission_counter(char c, void *data)
+{
+     int *counter = (int *) data;
+
+     ++*counter;
+}
+
+static void string_emitter(char c, void *data)
+{
+     char **output_string = (char **) data;
+
+     *((*output_string)++) = c;
+     **output_string = 0;
+}
+
+char *fftw_export_wisdom_to_string(void)
+{
+     int string_length = 0;
+     char *s, *s2;
+
+     fftw_export_wisdom(emission_counter, (void *) &string_length);
+
+     s = (char *) fftw_malloc(sizeof(char) * (string_length + 1));
+     if (!s)
+	  return 0;
+     s2 = s;
+
+     fftw_export_wisdom(string_emitter, (void *) &s2);
+
+     if (s + string_length != s2)
+	  fftw_die("Unexpected output string length!\n");
+
+     return s;
+}
+
+static int string_get_input(void *data)
+{
+     char **input_string = (char **) data;
+
+     if (**input_string)
+	  return *((*input_string)++);
+     else
+	  return 0;
+}
+
+fftw_status fftw_import_wisdom_from_string(const char *input_string)
+{
+     const char *s = input_string;
+
+     if (!input_string)
+	  return FFTW_FAILURE;
+     return fftw_import_wisdom(string_get_input, (void *) &s);
+}
diff --git a/src/fftw3/api/api.h b/src/fftw3/api/api.h
new file mode 100644
index 0000000..6b37151
--- /dev/null
+++ b/src/fftw3/api/api.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* internal API definitions */
+#ifndef __API_H__
+#define __API_H__
+
+/* just in case: force <fftw3.h> not to use C99 complex numbers */
+#undef _Complex_I
+
+#include "fftw3.h"
+#include "ifftw.h"
+
+/* the API ``plan'' contains both the kernel plan and problem */
+struct X(plan_s) {
+     plan *pln;
+     problem *prb;
+     int sign;
+};
+
+/* shorthand */
+typedef struct X(plan_s) apiplan;
+
+/* complex type for internal use */
+typedef R C[2];
+
+void X(extract_reim)(int sign, C *c, R **r, R **i);
+
+#define TAINT_UNALIGNED(p, flg) TAINT(p, ((flg) & FFTW_UNALIGNED) != 0)
+
+tensor *X(mktensor_rowmajor)(int rnk, const int *n,
+			     const int *niphys, const int *nophys,
+			     int is, int os);
+
+tensor *X(mktensor_iodims)(int rank, const X(iodim) *dims, int is, int os);
+const int *X(rdft2_pad)(int rnk, const int *n, const int *nembed,
+			int inplace, int cmplx, int **nfree);
+
+int X(many_kosherp)(int rnk, const int *n, int howmany);
+int X(guru_kosherp)(int rank, const X(iodim) *dims,
+		    int howmany_rank, const X(iodim) *howmany_dims);
+
+
+printer *X(mkprinter_file)(FILE *f);
+
+planner *X(the_planner)(void);
+void X(configure_planner)(planner *plnr);
+
+void X(mapflags)(planner *, unsigned);
+
+apiplan *X(mkapiplan)(int sign, unsigned flags, problem *prb);
+
+#endif				/* __API_H__ */
diff --git a/src/fftw3/api/apiplan.c b/src/fftw3/api/apiplan.c
new file mode 100644
index 0000000..b9fee6b
--- /dev/null
+++ b/src/fftw3/api/apiplan.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+
+apiplan *X(mkapiplan)(int sign, unsigned flags, problem *prb)
+WITH_ALIGNED_STACK({
+     plan *pln;
+     plan *pln0;
+     apiplan *p = 0;
+     planner *plnr = X(the_planner)();
+     
+     /* map API flags into FFTW flags */
+     X(mapflags)(plnr, flags);
+     
+     /* create plan */
+     plnr->planner_flags &= ~BLESSING;
+     pln = plnr->adt->mkplan(plnr, prb);
+     
+     if (pln) {
+	  AWAKE(pln, 1);
+	  
+	  /* build apiplan */
+	  p = (apiplan *) MALLOC(sizeof(apiplan), PLANS);
+	  p->pln = pln;
+	  p->prb = prb;
+	  p->sign = sign; /* cache for execute_dft */
+	  
+	  /* blessing protocol */
+	  plnr->planner_flags |= BLESSING;
+	  pln0 = plnr->adt->mkplan(plnr, prb);
+	  X(plan_destroy_internal)(pln0);
+     } else {
+	  X(problem_destroy)(prb);
+     }
+     
+     /* discard all information not necessary to reconstruct the
+	plan */
+     plnr->adt->forget(plnr, FORGET_ACCURSED);
+     
+     return p;
+})
+
+void X(destroy_plan)(X(plan) p)
+{
+     if (p) {
+          AWAKE(p->pln, 0);
+          X(plan_destroy_internal)(p->pln);
+          X(problem_destroy)(p->prb);
+          X(ifree)(p);
+     }
+}
diff --git a/src/fftw3/api/config.h b/src/fftw3/api/config.h
new file mode 100644
index 0000000..e97bf0d
--- /dev/null
+++ b/src/fftw3/api/config.h
@@ -0,0 +1,301 @@
+/* configuration file for the IM_FFTW3 library */
+
+/* extra CFLAGS for codelets */
+#define CODELET_OPTIM	""
+
+/* Define to a macro mangling the given C identifier (in lower and upper
+   case), which must not contain underscores, for linking with Fortran. */
+#undef F77_FUNC
+
+/* As F77_FUNC, but for C identifiers containing underscores. */
+#undef F77_FUNC_
+
+/* Define if F77_FUNC and F77_FUNC_ are equivalent. */
+#undef F77_FUNC_EQUIV
+
+/* C compiler name and flags */
+#define FFTW_CC	""
+
+/* Define to enable extra FFTW debugging code. */
+#undef FFTW_DEBUG
+
+/* Define to enable alignment debugging hacks. */
+#undef FFTW_DEBUG_ALIGNMENT
+
+/* Define to enable debugging malloc. */
+#undef FFTW_DEBUG_MALLOC
+
+/* enable fast, unsafe modular multiplications, risking overflow for large
+   prime sizes */
+#undef FFTW_ENABLE_UNSAFE_MULMOD
+
+/* Define to compile in long-double precision. */
+#undef FFTW_LDOUBLE
+
+/* Define to compile in single precision. */
+#define FFTW_SINGLE 1
+
+/* Define to enable 3DNow! optimizations. */
+#undef HAVE_3DNOW
+
+/* Define to 1 if you have `alloca', as a function or macro. */
+#undef HAVE_ALLOCA
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+   */
+#undef HAVE_ALLOCA_H
+
+/* Define to enable Altivec optimizations. */
+#undef HAVE_ALTIVEC
+
+/* Define to 1 if you have the `BSDgettimeofday' function. */
+#undef HAVE_BSDGETTIMEOFDAY
+
+/* Define to 1 if you have the `clock_gettime' function. */
+#undef HAVE_CLOCK_GETTIME
+
+/* Define to 1 if you have the `cosl' function. */
+#undef HAVE_COSL
+
+/* Define to 1 if you have the <c_asm.h> header file. */
+#undef HAVE_C_ASM_H
+
+/* Define to 1 if you have the declaration of `drand48', and to 0 if you don't. */
+#define HAVE_DECL_DRAND48	0
+
+/* Define to 1 if you have the declaration of `memalign', and to 0 if you
+   don't. */
+#define HAVE_DECL_MEMALIGN	0
+
+/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if
+   you don't. */
+#define HAVE_DECL_POSIX_MEMALIGN	0
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+#undef HAVE_DOPRNT
+
+/* Define to 1 if you have the `drand48' function. */
+#undef HAVE_DRAND48
+
+/* Define to 1 if you have the `gethrtime' function. */
+#undef HAVE_GETHRTIME
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Define to 1 if hrtime_t is defined in <sys/time.h> */
+#undef HAVE_HRTIME_T
+
+/* Define to 1 if you have the <intrinsics.h> header file. */
+#undef HAVE_INTRINSICS_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define if the isnan() function/macro is available. */
+#define HAVE_ISNAN	1
+
+/* Define to enable AMD K7 optimizations. */
+#undef HAVE_K7
+
+/* Define to 1 if you have the <libintl.h> header file. */
+#undef HAVE_LIBINTL_H
+
+/* Define to 1 if you have the `m' library (-lm). */
+#undef HAVE_LIBM
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H	1
+
+/* Define to 1 if you have the <malloc.h> header file. */
+/* #define HAVE_MALLOC_H	1 */
+
+/* Define to 1 if you have the `memalign' function. */
+#undef HAVE_MEMALIGN
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H	1
+
+/* Define to 1 if you have the `memset' function. */
+#define HAVE_MEMSET	1
+
+/* Define to 1 if you have the `posix_memalign' function. */
+#undef HAVE_POSIX_MEMALIGN
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* Define to 1 if you have the `read_real_time' function. */
+#undef HAVE_READ_REAL_TIME
+
+/* Define to 1 if you have the `sinl' function. */
+#define HAVE_SINL	1
+
+/* Define to 1 if you have the `sqrt' function. */
+#define HAVE_SQRT	1
+
+/* Define to enable SSE optimizations. */
+#undef HAVE_SSE
+/*#define HAVE_SSE 1 */
+
+/* Define to enable SSE2 optimizations. */
+#undef HAVE_SSE2
+
+/* Define to 1 if you have the <stddef.h> header file. */
+#define HAVE_STDDEF_H	1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H	1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H	1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H	1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H	1
+
+/* Define to 1 if you have the `tanl' function. */
+#define HAVE_TANL	1
+
+/* Define if we have a threads library. */
+#undef HAVE_THREADS
+
+/* Define to 1 if you have the `time_base_to_time' function. */
+#undef HAVE_TIME_BASE_TO_TIME
+
+/* Define to 1 if the system has the type `uintptr_t'. */
+/* #undef HAVE_UINTPTR_T */
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `vprintf' function. */
+#define HAVE_VPRINTF
+
+/* Define to 1 if you have the `_mm_free' function. */
+#undef HAVE__MM_FREE
+
+/* Define to 1 if you have the `_mm_malloc' function. */
+#undef HAVE__MM_MALLOC
+
+/* Define if you have the UNICOS _rtc() intrinsic. */
+#undef HAVE__RTC
+
+/* Name of package */
+#define PACKAGE	"FFTW"
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME	"FFTW_PACKAGENAME"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING	"FFTW V3"
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION	3
+
+/* Define to the necessary symbol if this constant uses a non-standard name on
+   your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* The size of a `double', as computed by sizeof. */
+#define SIZEOF_DOUBLE	8
+
+/* The size of a `int', as computed by sizeof. */
+#define SIZEOF_INT	4
+
+/* The size of a `long', as computed by sizeof. */
+#define SIZEOF_LONG	4
+
+/* The size of a `long double', as computed by sizeof. */
+#define SIZEOF_LONG_DOUBLE	8
+
+/* The size of a `long long', as computed by sizeof. */
+#undef SIZEOF_LONG_LONG	/* NO LONG LONG IN VC++ */
+
+/* The size of a `unsigned int', as computed by sizeof. */
+#define SIZEOF_UNSIGNED_INT	4
+
+/* The size of a `unsigned long', as computed by sizeof. */
+#define SIZEOF_UNSIGNED_LONG	4
+
+/* The size of a `unsigned long long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG_LONG
+
+/* The size of a `void *', as computed by sizeof. */
+#define SIZEOF_VOID_P	4
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at run-time.
+        STACK_DIRECTION > 0 => grows toward higher addresses
+        STACK_DIRECTION < 0 => grows toward lower addresses
+        STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS	1
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define if we have and are using OpenMP multithreading directives */
+#undef USING_OPENMP_THREADS
+
+/* Define if we have and are using POSIX threads. */
+#undef USING_POSIX_THREADS
+
+/* Version number of package */
+#define VERSION	"3"
+
+/* Use common Windows Fortran mangling styles for the Fortran interfaces. */
+#undef WINDOWS_F77_MANGLING
+
+/* Use our own 16-byte aligned malloc routine; mainly helpful for Windows
+   systems lacking aligned allocation system-library routines. */
+#undef WITHOUT_CYCLE_COUNTER
+
+/* Use our own 16-byte aligned malloc routine; mainly helpful for Windows
+   systems lacking aligned allocation system-library routines. */
+#define WITH_OUR_MALLOC16	1
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define as `__inline' if that's what the C compiler calls it, or to nothing
+   if it is not supported. */
+#define inline	__inline
+
+/* Define to `unsigned' if <sys/types.h> does not define. */
+#undef size_t
+
+/*			 VC++ specific parameters  */
+/*			added May14th 2003, S.Ruel */
+#define		HAVE_CONFIG_H		1
+#define		isnan				_isnan
+#define		__inline__			__inline
+#define		__asm__				__asm
+
+/* The intel and processor pack libraries should be compatible */
+#ifdef _MSC_VER
+#define		__ICC				1
+#endif
diff --git a/src/fftw3/api/configure.c b/src/fftw3/api/configure.c
new file mode 100644
index 0000000..995281b
--- /dev/null
+++ b/src/fftw3/api/configure.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+#include "rdft.h"
+#include "reodft.h"
+
+void X(configure_planner)(planner *plnr)
+{
+     X(dft_conf_standard)(plnr);
+     X(rdft_conf_standard)(plnr);
+     X(reodft_conf_standard)(plnr);
+}
diff --git a/src/fftw3/api/execute-dft-c2r.c b/src/fftw3/api/execute-dft-c2r.c
new file mode 100644
index 0000000..18b0749
--- /dev/null
+++ b/src/fftw3/api/execute-dft-c2r.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+/* guru interface: requires care in alignment, r - i, etcetera. */
+void X(execute_dft_c2r)(const X(plan) p, C *in, R *out)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) p->pln;
+     pln->apply((plan *) pln, out, in[0], in[0]+1);
+})
diff --git a/src/fftw3/api/execute-dft-r2c.c b/src/fftw3/api/execute-dft-r2c.c
new file mode 100644
index 0000000..1f4818f
--- /dev/null
+++ b/src/fftw3/api/execute-dft-r2c.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+/* guru interface: requires care in alignment, r - i, etcetera. */
+void X(execute_dft_r2c)(const X(plan) p, R *in, C *out)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) p->pln;
+     pln->apply((plan *) pln, in, out[0], out[0]+1);
+})
diff --git a/src/fftw3/api/execute-dft.c b/src/fftw3/api/execute-dft.c
new file mode 100644
index 0000000..67fc9b8
--- /dev/null
+++ b/src/fftw3/api/execute-dft.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+/* guru interface: requires care in alignment etcetera. */
+void X(execute_dft)(const X(plan) p, C *in, C *out)
+WITH_ALIGNED_STACK({
+     plan_dft *pln = (plan_dft *) p->pln;
+     if (p->sign == FFT_SIGN)
+	  pln->apply((plan *) pln, in[0], in[0]+1, out[0], out[0]+1);
+     else
+	  pln->apply((plan *) pln, in[0]+1, in[0], out[0]+1, out[0]);
+})
diff --git a/src/fftw3/api/execute-r2r.c b/src/fftw3/api/execute-r2r.c
new file mode 100644
index 0000000..d05f4ef
--- /dev/null
+++ b/src/fftw3/api/execute-r2r.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+/* guru interface: requires care in alignment, etcetera. */
+void X(execute_r2r)(const X(plan) p, R *in, R *out)
+WITH_ALIGNED_STACK({
+     plan_rdft *pln = (plan_rdft *) p->pln;
+     pln->apply((plan *) pln, in, out);
+})
diff --git a/src/fftw3/api/execute-split-dft-c2r.c b/src/fftw3/api/execute-split-dft-c2r.c
new file mode 100644
index 0000000..5941436
--- /dev/null
+++ b/src/fftw3/api/execute-split-dft-c2r.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+/* guru interface: requires care in alignment, r - i, etcetera. */
+void X(execute_split_dft_c2r)(const X(plan) p, R *ri, R *ii, R *out)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) p->pln;
+     pln->apply((plan *) pln, out, ri, ii);
+})
diff --git a/src/fftw3/api/execute-split-dft-r2c.c b/src/fftw3/api/execute-split-dft-r2c.c
new file mode 100644
index 0000000..72eccb8
--- /dev/null
+++ b/src/fftw3/api/execute-split-dft-r2c.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+/* guru interface: requires care in alignment, r - i, etcetera. */
+void X(execute_split_dft_r2c)(const X(plan) p, R *in, R *ro, R *io)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) p->pln;
+     pln->apply((plan *) pln, in, ro, io);
+})
diff --git a/src/fftw3/api/execute-split-dft.c b/src/fftw3/api/execute-split-dft.c
new file mode 100644
index 0000000..4c226a9
--- /dev/null
+++ b/src/fftw3/api/execute-split-dft.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+/* guru interface: requires care in alignment, r - i, etcetera. */
+void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, R *ro, R *io)
+WITH_ALIGNED_STACK({
+     plan_dft *pln = (plan_dft *) p->pln;
+     pln->apply((plan *) pln, ri, ii, ro, io);
+})
diff --git a/src/fftw3/api/execute.c b/src/fftw3/api/execute.c
new file mode 100644
index 0000000..f995c1c
--- /dev/null
+++ b/src/fftw3/api/execute.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+void X(execute)(const X(plan) p)
+WITH_ALIGNED_STACK({
+     plan *pln = p->pln;
+     pln->adt->solve(pln, p->prb);
+})
diff --git a/src/fftw3/api/export-wisdom-to-file.c b/src/fftw3/api/export-wisdom-to-file.c
new file mode 100644
index 0000000..769f1b4
--- /dev/null
+++ b/src/fftw3/api/export-wisdom-to-file.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+void X(export_wisdom_to_file)(FILE *output_file)
+{
+     printer *p = X(mkprinter_file)(output_file);
+     planner *plnr = X(the_planner)();
+     plnr->adt->exprt(plnr, p);
+     X(printer_destroy)(p);
+}
diff --git a/src/fftw3/api/export-wisdom-to-string.c b/src/fftw3/api/export-wisdom-to-string.c
new file mode 100644
index 0000000..df38b51
--- /dev/null
+++ b/src/fftw3/api/export-wisdom-to-string.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+typedef struct {
+     printer super;
+     int *cnt;
+} P_cnt;
+
+static void putchr_cnt(printer * p_, char c)
+{
+     P_cnt *p = (P_cnt *) p_;
+     UNUSED(c);
+     ++*p->cnt;
+}
+
+static printer *mkprinter_cnt(int *cnt)
+{
+     P_cnt *p = (P_cnt *) X(mkprinter)(sizeof(P_cnt), putchr_cnt, 0);
+     p->cnt = cnt;
+     *cnt = 0;
+     return &p->super;
+}
+
+typedef struct {
+     printer super;
+     char *s;
+} P_str;
+
+static void putchr_str(printer * p_, char c)
+{
+     P_str *p = (P_str *) p_;
+     *p->s++ = c;
+     *p->s = 0;
+}
+
+static printer *mkprinter_str(char *s)
+{
+     P_str *p = (P_str *) X(mkprinter)(sizeof(P_str), putchr_str, 0);
+     p->s = s;
+     *s = 0;
+     return &p->super;
+}
+
+char *X(export_wisdom_to_string)(void)
+{
+     printer *p;
+     planner *plnr = X(the_planner)();
+     int cnt;
+     char *s;
+
+     p = mkprinter_cnt(&cnt);
+     plnr->adt->exprt(plnr, p);
+     X(printer_destroy)(p);
+
+     s = (char *) NATIVE_MALLOC(sizeof(char) * (cnt + 1), OTHER);
+     if (s) {
+          p = mkprinter_str(s);
+          plnr->adt->exprt(plnr, p);
+          X(printer_destroy)(p);
+     }
+
+     return s;
+}
diff --git a/src/fftw3/api/export-wisdom.c b/src/fftw3/api/export-wisdom.c
new file mode 100644
index 0000000..d05c067
--- /dev/null
+++ b/src/fftw3/api/export-wisdom.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+typedef struct {
+     printer super;
+     void (*write_char)(char c, void *);
+     void *data;
+} P;
+
+static void putchr_generic(printer * p_, char c)
+{
+     P *p = (P *) p_;
+     (p->write_char)(c, p->data);
+}
+
+void X(export_wisdom)(void (*write_char)(char c, void *), void *data)
+{
+     P *p = (P *) X(mkprinter)(sizeof(P), putchr_generic, 0);
+     planner *plnr = X(the_planner)();
+
+     p->write_char = write_char;
+     p->data = data;
+     plnr->adt->exprt(plnr, (printer *) p);
+     X(printer_destroy)((printer *) p);
+}
diff --git a/src/fftw3/api/extract-reim.c b/src/fftw3/api/extract-reim.c
new file mode 100644
index 0000000..3a90ab5
--- /dev/null
+++ b/src/fftw3/api/extract-reim.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+/* decompose complex pointer into real and imaginary parts.
+   Flip real and imaginary if there the sign does not match
+   FFTW's idea of what the sign should be */
+
+void X(extract_reim)(int sign, C *c, R **r, R **i)
+{
+     if (sign == FFT_SIGN) {
+          *r = c[0] + 0;
+          *i = c[0] + 1;
+     } else {
+          *r = c[0] + 1;
+          *i = c[0] + 0;
+     }
+}
diff --git a/src/fftw3/api/f77api.c b/src/fftw3/api/f77api.c
new file mode 100644
index 0000000..3f70716
--- /dev/null
+++ b/src/fftw3/api/f77api.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+#include "rdft.h"
+
+/* if F77_FUNC is not defined, then we don't know how to mangle identifiers
+   for the Fortran linker, and we must omit the f77 API. */
+#if defined(F77_FUNC) || defined(WINDOWS_F77_MANGLING)
+
+/*-----------------------------------------------------------------------*/
+/* some internal functions used by the f77 api */
+
+/* in fortran, the natural array ordering is column-major, which
+   corresponds to reversing the dimensions relative to C's row-major */
+static int *reverse_n(int rnk, const int *n)
+{
+     int *nrev;
+     int i;
+     A(FINITE_RNK(rnk));
+     nrev = (int *) MALLOC(sizeof(int) * rnk, PROBLEMS);
+     for (i = 0; i < rnk; ++i)
+          nrev[rnk - i - 1] = n[i];
+     return nrev;
+}
+
+/* f77 doesn't have data structures, so we have to pass iodims as
+   parallel arrays */
+static X(iodim) *make_dims(int rnk, const int *n,
+			   const int *is, const int *os)
+{
+     X(iodim) *dims;
+     int i;
+     A(FINITE_RNK(rnk));
+     dims = (X(iodim) *) MALLOC(sizeof(X(iodim)) * rnk, PROBLEMS);
+     for (i = 0; i < rnk; ++i) {
+          dims[i].n = n[i];
+          dims[i].is = is[i];
+          dims[i].os = os[i];
+     }
+     return dims;
+}
+
+typedef struct {
+     void (*f77_write_char)(char *, void *);
+     void *data;
+} write_char_data;
+
+static void write_char(char c, void *d)
+{
+     write_char_data *ad = (write_char_data *) d;
+     ad->f77_write_char(&c, ad->data);
+}
+
+typedef struct {
+     void (*f77_read_char)(int *, void *);
+     void *data;
+} read_char_data;
+
+static int read_char(void *d)
+{
+     read_char_data *ed = (read_char_data *) d;
+     int c;
+     ed->f77_read_char(&c, ed->data);
+     return (c < 0 ? EOF : c);
+}
+
+static X(r2r_kind) *ints2kinds(int rnk, const int *ik)
+{
+     if (!FINITE_RNK(rnk) || rnk == 0)
+	  return 0;
+     else {
+	  int i;
+	  X(r2r_kind) *k;
+
+	  k = (X(r2r_kind) *) MALLOC(sizeof(X(r2r_kind)) * rnk, PROBLEMS);
+	  /* reverse order for Fortran -> C */
+	  for (i = 0; i < rnk; ++i)
+	       k[i] = (X(r2r_kind)) ik[rnk - 1 - i];
+	  return k;
+     }
+}
+
+/*-----------------------------------------------------------------------*/
+
+#include "x77.h"
+
+#define F77(a, A) F77x(x77(a), X77(A))
+
+#ifndef WINDOWS_F77_MANGLING
+
+#if defined(F77_FUNC)
+#  define F77x(a, A) F77_FUNC(a, A)
+#  include "f77funcs.h"
+#endif
+
+/* If identifiers with underscores are mangled differently than those
+   without underscores, then we include *both* mangling versions.  The
+   reason is that the only Fortran compiler that does such differing
+   mangling is currently g77 (which adds an extra underscore to names
+   with underscores), whereas other compilers running on the same
+   machine are likely to use g77's non-underscored mangling.  (I'm sick
+   of users complaining that FFTW works with g77 but not with e.g.
+   pgf77 or ifc on the same machine.)  Note that all FFTW identifiers
+   contain underscores, and configure picks g77 by default. */
+#if defined(F77_FUNC_) && !defined(F77_FUNC_EQUIV)
+#  undef F77x
+#  define F77x(a, A) F77_FUNC_(a, A)
+#  include "f77funcs.h"
+#endif
+
+#else /* WINDOWS_F77_MANGLING */
+
+/* Various mangling conventions common (?) under Windows. */
+
+/* g77 */
+#  define WINDOWS_F77_FUNC(a, A) a ## __
+#  define F77x(a, A) WINDOWS_F77_FUNC(a, A)
+#  include "f77funcs.h"
+
+/* Digital/Compaq/HP Visual Fortran, Intel Fortran.  stdcall attribute
+   is apparently required to adjust for calling conventions (callee
+   pops stack in stdcall).  See also:
+       http://msdn.microsoft.com/library/en-us/vccore98/html/_core_mixed.2d.language_programming.3a_.overview.asp
+*/
+#  undef WINDOWS_F77_FUNC
+#  if defined(__GNUC__)
+#    define WINDOWS_F77_FUNC(a, A) __attribute__((stdcall)) A
+#  elif defined(_MSC_VER) || defined(_ICC) || defined(_STDCALL_SUPPORTED)
+#    define WINDOWS_F77_FUNC(a, A) __stdcall A
+#  else
+#    define WINDOWS_F77_FUNC(a, A) A /* oh well */
+#  endif
+#  include "f77funcs.h"
+
+#endif /* WINDOWS_F77_MANGLING */
+
+#endif				/* F77_FUNC */
diff --git a/src/fftw3/api/f77funcs.h b/src/fftw3/api/f77funcs.h
new file mode 100644
index 0000000..ea0dd5c
--- /dev/null
+++ b/src/fftw3/api/f77funcs.h
@@ -0,0 +1,438 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* Functions in the FFTW Fortran API, mangled according to the
+   F77(...) macro.  This file is designed to be #included by
+   f77api.c, possibly multiple times in order to support multiple
+   compiler manglings (via redefinition of F77). */
+
+void F77(execute, EXECUTE)(X(plan) * const p)
+WITH_ALIGNED_STACK({
+     plan *pln = (*p)->pln;
+     pln->adt->solve(pln, (*p)->prb);
+})
+
+void F77(destroy_plan, DESTROY_PLAN)(X(plan) *p)
+{
+     X(destroy_plan)(*p);
+}
+
+void F77(cleanup, CLEANUP)(void)
+{
+     X(cleanup)();
+}
+
+void F77(forget_wisdom, FORGET_WISDOM)(void)
+{
+     X(forget_wisdom)();
+}
+
+void F77(export_wisdom, EXPORT_WISDOM)(void (*f77_write_char)(char *, void *),
+				       void *data)
+{
+     write_char_data ad;
+     ad.f77_write_char = f77_write_char;
+     ad.data = data;
+     X(export_wisdom)(write_char, (void *) &ad);
+}
+
+void F77(import_wisdom, IMPORT_WISDOM)(int *isuccess,
+				       void (*f77_read_char)(int *, void *),
+				       void *data)
+{
+     read_char_data ed;
+     ed.f77_read_char = f77_read_char;
+     ed.data = data;
+     *isuccess = X(import_wisdom)(read_char, (void *) &ed);
+}
+
+void F77(import_system_wisdom, IMPORT_SYSTEM_WISDOM)(int *isuccess)
+{
+     *isuccess = X(import_system_wisdom)();
+}
+
+void F77(print_plan, PRINT_PLAN)(X(plan) * const p)
+{
+     X(print_plan)(*p);
+}
+
+void F77(flops,FLOPS)(X(plan) *p, double *add, double *mul, double *fma)
+{
+     X(flops)(*p, add, mul, fma);
+}
+
+/******************************** DFT ***********************************/
+
+void F77(plan_dft, PLAN_DFT)(X(plan) *p, int *rank, const int *n,
+			     C *in, C *out, int *sign, int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     *p = X(plan_dft)(*rank, nrev, in, out, *sign, *flags);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_dft_1d, PLAN_DFT_1D)(X(plan) *p, int *n, C *in, C *out,
+				   int *sign, int *flags)
+{
+     *p = X(plan_dft_1d)(*n, in, out, *sign, *flags);
+}
+
+void F77(plan_dft_2d, PLAN_DFT_2D)(X(plan) *p, int *nx, int *ny,
+				   C *in, C *out, int *sign, int *flags)
+{
+     *p = X(plan_dft_2d)(*ny, *nx, in, out, *sign, *flags);
+}
+
+void F77(plan_dft_3d, PLAN_DFT_3D)(X(plan) *p, int *nx, int *ny, int *nz,
+				   C *in, C *out,
+				   int *sign, int *flags)
+{
+     *p = X(plan_dft_3d)(*nz, *ny, *nx, in, out, *sign, *flags);
+}
+
+void F77(plan_many_dft, PLAN_MANY_DFT)(X(plan) *p, int *rank, const int *n,
+				       int *howmany,
+				       C *in, const int *inembed,
+				       int *istride, int *idist,
+				       C *out, const int *onembed,
+				       int *ostride, int *odist,
+				       int *sign, int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     int *inembedrev = reverse_n(*rank, inembed);
+     int *onembedrev = reverse_n(*rank, onembed);
+     *p = X(plan_many_dft)(*rank, nrev, *howmany,
+			   in, inembedrev, *istride, *idist,
+			   out, onembedrev, *ostride, *odist,
+			   *sign, *flags);
+     X(ifree0)(onembedrev);
+     X(ifree0)(inembedrev);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_guru_dft, PLAN_GURU_DFT)(X(plan) *p, int *rank, const int *n,
+				       const int *is, const int *os,
+				       int *howmany_rank, const int *h_n,
+				       const int *h_is, const int *h_os,
+				       C *in, C *out, int *sign, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     *p = X(plan_guru_dft)(*rank, dims, *howmany_rank, howmany_dims,
+			   in, out, *sign, *flags);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(plan_guru_split_dft, PLAN_GURU_SPLIT_DFT)(X(plan) *p, int *rank, const int *n,
+				       const int *is, const int *os,
+				       int *howmany_rank, const int *h_n,
+				       const int *h_is, const int *h_os,
+				       R *ri, R *ii, R *ro, R *io, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     *p = X(plan_guru_split_dft)(*rank, dims, *howmany_rank, howmany_dims,
+			   ri, ii, ro, io, *flags);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(execute_dft, EXECUTE_DFT)(X(plan) * const p, C *in, C *out)
+WITH_ALIGNED_STACK({
+     plan_dft *pln = (plan_dft *) (*p)->pln;
+     if ((*p)->sign == FFT_SIGN)
+          pln->apply((plan *) pln, in[0], in[0]+1, out[0], out[0]+1);
+     else
+          pln->apply((plan *) pln, in[0]+1, in[0], out[0]+1, out[0]);
+})
+
+void F77(execute_split_dft, EXECUTE_SPLIT_DFT)(X(plan) * const p,
+					       R *ri, R *ii, R *ro, R *io)
+WITH_ALIGNED_STACK({
+     plan_dft *pln = (plan_dft *) (*p)->pln;
+     pln->apply((plan *) pln, ri, ii, ro, io);
+})
+
+/****************************** DFT r2c *********************************/
+
+void F77(plan_dft_r2c, PLAN_DFT_R2C)(X(plan) *p, int *rank, const int *n,
+				     R *in, C *out, int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     *p = X(plan_dft_r2c)(*rank, nrev, in, out, *flags);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_dft_r2c_1d, PLAN_DFT_R2C_1D)(X(plan) *p, int *n, R *in, C *out,
+					   int *flags)
+{
+     *p = X(plan_dft_r2c_1d)(*n, in, out, *flags);
+}
+
+void F77(plan_dft_r2c_2d, PLAN_DFT_R2C_2D)(X(plan) *p, int *nx, int *ny,
+					   R *in, C *out, int *flags)
+{
+     *p = X(plan_dft_r2c_2d)(*ny, *nx, in, out, *flags);
+}
+
+void F77(plan_dft_r2c_3d, PLAN_DFT_R2C_3D)(X(plan) *p,
+					   int *nx, int *ny, int *nz,
+					   R *in, C *out,
+					   int *flags)
+{
+     *p = X(plan_dft_r2c_3d)(*nz, *ny, *nx, in, out, *flags);
+}
+
+void F77(plan_many_dft_r2c, PLAN_MANY_DFT_R2C)(
+     X(plan) *p, int *rank, const int *n,
+     int *howmany,
+     R *in, const int *inembed, int *istride, int *idist,
+     C *out, const int *onembed, int *ostride, int *odist,
+     int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     int *inembedrev = reverse_n(*rank, inembed);
+     int *onembedrev = reverse_n(*rank, onembed);
+     *p = X(plan_many_dft_r2c)(*rank, nrev, *howmany,
+			       in, inembedrev, *istride, *idist,
+			       out, onembedrev, *ostride, *odist,
+			       *flags);
+     X(ifree0)(onembedrev);
+     X(ifree0)(inembedrev);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_guru_dft_r2c, PLAN_GURU_DFT_R2C)(
+     X(plan) *p, int *rank, const int *n,
+     const int *is, const int *os,
+     int *howmany_rank, const int *h_n,
+     const int *h_is, const int *h_os,
+     R *in, C *out, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     *p = X(plan_guru_dft_r2c)(*rank, dims, *howmany_rank, howmany_dims,
+			       in, out, *flags);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(plan_guru_split_dft_r2c, PLAN_GURU_SPLIT_DFT_R2C)(
+     X(plan) *p, int *rank, const int *n,
+     const int *is, const int *os,
+     int *howmany_rank, const int *h_n,
+     const int *h_is, const int *h_os,
+     R *in, R *ro, R *io, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     *p = X(plan_guru_split_dft_r2c)(*rank, dims, *howmany_rank, howmany_dims,
+			       in, ro, io, *flags);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(execute_dft_r2c, EXECUTE_DFT_R2C)(X(plan) * const p, R *in, C *out)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln;
+     pln->apply((plan *) pln, in, out[0], out[0]+1);
+})
+
+void F77(execute_split_dft_r2c, EXECUTE_SPLIT_DFT_R2C)(X(plan) * const p,
+						       R *in, R *ro, R *io)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln;
+     pln->apply((plan *) pln, in, ro, io);
+})
+
+/****************************** DFT c2r *********************************/
+
+void F77(plan_dft_c2r, PLAN_DFT_C2R)(X(plan) *p, int *rank, const int *n,
+				     C *in, R *out, int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     *p = X(plan_dft_c2r)(*rank, nrev, in, out, *flags);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_dft_c2r_1d, PLAN_DFT_C2R_1D)(X(plan) *p, int *n, C *in, R *out,
+					   int *flags)
+{
+     *p = X(plan_dft_c2r_1d)(*n, in, out, *flags);
+}
+
+void F77(plan_dft_c2r_2d, PLAN_DFT_C2R_2D)(X(plan) *p, int *nx, int *ny,
+					   C *in, R *out, int *flags)
+{
+     *p = X(plan_dft_c2r_2d)(*ny, *nx, in, out, *flags);
+}
+
+void F77(plan_dft_c2r_3d, PLAN_DFT_C2R_3D)(X(plan) *p,
+					   int *nx, int *ny, int *nz,
+					   C *in, R *out,
+					   int *flags)
+{
+     *p = X(plan_dft_c2r_3d)(*nz, *ny, *nx, in, out, *flags);
+}
+
+void F77(plan_many_dft_c2r, PLAN_MANY_DFT_C2R)(
+     X(plan) *p, int *rank, const int *n,
+     int *howmany,
+     C *in, const int *inembed, int *istride, int *idist,
+     R *out, const int *onembed, int *ostride, int *odist,
+     int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     int *inembedrev = reverse_n(*rank, inembed);
+     int *onembedrev = reverse_n(*rank, onembed);
+     *p = X(plan_many_dft_c2r)(*rank, nrev, *howmany,
+			       in, inembedrev, *istride, *idist,
+			       out, onembedrev, *ostride, *odist,
+			       *flags);
+     X(ifree0)(onembedrev);
+     X(ifree0)(inembedrev);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_guru_dft_c2r, PLAN_GURU_DFT_C2R)(
+     X(plan) *p, int *rank, const int *n,
+     const int *is, const int *os,
+     int *howmany_rank, const int *h_n,
+     const int *h_is, const int *h_os,
+     C *in, R *out, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     *p = X(plan_guru_dft_c2r)(*rank, dims, *howmany_rank, howmany_dims,
+			       in, out, *flags);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(plan_guru_split_dft_c2r, PLAN_GURU_SPLIT_DFT_C2R)(
+     X(plan) *p, int *rank, const int *n,
+     const int *is, const int *os,
+     int *howmany_rank, const int *h_n,
+     const int *h_is, const int *h_os,
+     R *ri, R *ii, R *out, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     *p = X(plan_guru_split_dft_c2r)(*rank, dims, *howmany_rank, howmany_dims,
+			       ri, ii, out, *flags);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(execute_dft_c2r, EXECUTE_DFT_C2R)(X(plan) * const p, C *in, R *out)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln;
+     pln->apply((plan *) pln, out, in[0], in[0]+1);
+})
+
+void F77(execute_split_dft_c2r, EXECUTE_SPLIT_DFT_C2R)(X(plan) * const p,
+					   R *ri, R *ii, R *out)
+WITH_ALIGNED_STACK({
+     plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln;
+     pln->apply((plan *) pln, out, ri, ii);
+})
+
+/****************************** r2r *********************************/
+
+void F77(plan_r2r, PLAN_R2R)(X(plan) *p, int *rank, const int *n,
+			     R *in, R *out,
+			     int *kind, int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     X(r2r_kind) *k = ints2kinds(*rank, kind);
+     *p = X(plan_r2r)(*rank, nrev, in, out, k, *flags);
+     X(ifree0)(k);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_r2r_1d, PLAN_R2R_1D)(X(plan) *p, int *n, R *in, R *out,
+				   int *kind, int *flags)
+{
+     *p = X(plan_r2r_1d)(*n, in, out, (X(r2r_kind)) *kind, *flags);
+}
+
+void F77(plan_r2r_2d, PLAN_R2R_2D)(X(plan) *p, int *nx, int *ny,
+				   R *in, R *out, 
+				   int *kindx, int *kindy, int *flags)
+{
+     *p = X(plan_r2r_2d)(*ny, *nx, in, out,
+			 (X(r2r_kind)) *kindy, (X(r2r_kind)) *kindx, *flags);
+}
+
+void F77(plan_r2r_3d, PLAN_R2R_3D)(X(plan) *p,
+				   int *nx, int *ny, int *nz,
+				   R *in, R *out,
+				   int *kindx, int *kindy, int *kindz,
+				   int *flags)
+{
+     *p = X(plan_r2r_3d)(*nz, *ny, *nx, in, out,
+			 (X(r2r_kind)) *kindz, (X(r2r_kind)) *kindy, 
+			 (X(r2r_kind)) *kindx, *flags);
+}
+
+void F77(plan_many_r2r, PLAN_MANY_R2R)(
+     X(plan) *p, int *rank, const int *n,
+     int *howmany,
+     R *in, const int *inembed, int *istride, int *idist,
+     R *out, const int *onembed, int *ostride, int *odist,
+     int *kind, int *flags)
+{
+     int *nrev = reverse_n(*rank, n);
+     int *inembedrev = reverse_n(*rank, inembed);
+     int *onembedrev = reverse_n(*rank, onembed);
+     X(r2r_kind) *k = ints2kinds(*rank, kind);
+     *p = X(plan_many_r2r)(*rank, nrev, *howmany,
+			       in, inembedrev, *istride, *idist,
+			       out, onembedrev, *ostride, *odist,
+			       k, *flags);
+     X(ifree0)(k);
+     X(ifree0)(onembedrev);
+     X(ifree0)(inembedrev);
+     X(ifree0)(nrev);
+}
+
+void F77(plan_guru_r2r, PLAN_GURU_R2R)(
+     X(plan) *p, int *rank, const int *n,
+     const int *is, const int *os,
+     int *howmany_rank, const int *h_n,
+     const int *h_is, const int *h_os,
+     R *in, R *out, int *kind, int *flags)
+{
+     X(iodim) *dims = make_dims(*rank, n, is, os);
+     X(iodim) *howmany_dims = make_dims(*howmany_rank, h_n, h_is, h_os);
+     X(r2r_kind) *k = ints2kinds(*rank, kind);
+     *p = X(plan_guru_r2r)(*rank, dims, *howmany_rank, howmany_dims,
+			       in, out, k, *flags);
+     X(ifree0)(k);
+     X(ifree0)(howmany_dims);
+     X(ifree0)(dims);
+}
+
+void F77(execute_r2r, EXECUTE_R2R)(X(plan) * const p, R *in, R *out)
+WITH_ALIGNED_STACK({
+     plan_rdft *pln = (plan_rdft *) (*p)->pln;
+     pln->apply((plan *) pln, in, out);
+})
diff --git a/src/fftw3/api/fftw3.h b/src/fftw3/api/fftw3.h
new file mode 100644
index 0000000..6cb3254
--- /dev/null
+++ b/src/fftw3/api/fftw3.h
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* header file for fftw3 */
+/* $Id: fftw3.h,v 1.1 2008/10/17 06:10:47 scuri Exp $ */
+
+#ifndef FFTW3_H
+#define FFTW3_H
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+/* If <complex.h> is included, use the C99 complex type.  Otherwise
+   define a type bit-compatible with C99 complex */
+#ifdef _Complex_I
+#  define FFTW_DEFINE_COMPLEX(R, C) typedef R _Complex C
+#else
+#  define FFTW_DEFINE_COMPLEX(R, C) typedef R C[2]
+#endif
+
+#define FFTW_CONCAT(prefix, name) prefix ## name
+#define FFTW_MANGLE_DOUBLE(name) FFTW_CONCAT(fftw_, name)
+#define FFTW_MANGLE_FLOAT(name) FFTW_CONCAT(fftwf_, name)
+#define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name)
+
+
+enum fftw_r2r_kind_do_not_use_me {
+     FFTW_R2HC=0, FFTW_HC2R=1, FFTW_DHT=2,
+     FFTW_REDFT00=3, FFTW_REDFT01=4, FFTW_REDFT10=5, FFTW_REDFT11=6,
+     FFTW_RODFT00=7, FFTW_RODFT01=8, FFTW_RODFT10=9, FFTW_RODFT11=10
+};
+
+struct fftw_iodim_do_not_use_me {
+     int n;                     /* dimension size */
+     int is;			/* input stride */
+     int os;			/* output stride */
+};
+
+/*
+  huge second-order macro that defines prototypes for all API
+  functions.  We expand this macro for each supported precision
+ 
+  X: name-mangling macro
+  R: real data type
+  C: complex data type
+*/
+
+#define FFTW_DEFINE_API(X, R, C)					\
+									\
+FFTW_DEFINE_COMPLEX(R, C);						\
+									\
+typedef struct X(plan_s) *X(plan);					\
+									\
+typedef struct fftw_iodim_do_not_use_me X(iodim);			\
+									\
+typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind);			\
+									\
+void X(execute)(const X(plan) p);					\
+									\
+X(plan) X(plan_dft)(int rank, const int *n,				\
+		    C *in, C *out, int sign, unsigned flags);		\
+									\
+X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign,			\
+		       unsigned flags);					\
+X(plan) X(plan_dft_2d)(int nx, int ny,					\
+		       C *in, C *out, int sign, unsigned flags);	\
+X(plan) X(plan_dft_3d)(int nx, int ny, int nz,				\
+		       C *in, C *out, int sign, unsigned flags);	\
+									\
+X(plan) X(plan_many_dft)(int rank, const int *n,			\
+                         int howmany,					\
+                         C *in, const int *inembed,			\
+                         int istride, int idist,			\
+                         C *out, const int *onembed,			\
+                         int ostride, int odist,			\
+                         int sign, unsigned flags);			\
+									\
+X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims,		\
+			 int howmany_rank,				\
+			 const X(iodim) *howmany_dims,			\
+			 C *in, C *out,					\
+			 int sign, unsigned flags);			\
+X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims,		\
+			 int howmany_rank,				\
+			 const X(iodim) *howmany_dims,			\
+			 R *ri, R *ii, R *ro, R *io,			\
+			 unsigned flags);				\
+									\
+void X(execute_dft)(const X(plan) p, C *in, C *out);			\
+void X(execute_split_dft)(const X(plan) p, R *ri, R *ii, R *ro, R *io);	\
+									\
+X(plan) X(plan_many_dft_r2c)(int rank, const int *n,			\
+                             int howmany,				\
+                             R *in, const int *inembed,			\
+                             int istride, int idist,			\
+                             C *out, const int *onembed,		\
+                             int ostride, int odist,			\
+                             unsigned flags);				\
+									\
+X(plan) X(plan_dft_r2c)(int rank, const int *n,				\
+                        R *in, C *out, unsigned flags);			\
+									\
+X(plan) X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags);		\
+X(plan) X(plan_dft_r2c_2d)(int nx, int ny,				\
+			   R *in, C *out, unsigned flags);		\
+X(plan) X(plan_dft_r2c_3d)(int nx, int ny,				\
+			   int nz,					\
+			   R *in, C *out, unsigned flags);		\
+									\
+									\
+X(plan) X(plan_many_dft_c2r)(int rank, const int *n,			\
+			     int howmany,				\
+			     C *in, const int *inembed,			\
+			     int istride, int idist,			\
+			     R *out, const int *onembed,		\
+			     int ostride, int odist,			\
+			     unsigned flags);				\
+									\
+X(plan) X(plan_dft_c2r)(int rank, const int *n,				\
+                        C *in, R *out, unsigned flags);			\
+									\
+X(plan) X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags);		\
+X(plan) X(plan_dft_c2r_2d)(int nx, int ny,				\
+			   C *in, R *out, unsigned flags);		\
+X(plan) X(plan_dft_c2r_3d)(int nx, int ny,				\
+			   int nz,					\
+			   C *in, R *out, unsigned flags);		\
+									\
+X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims,		\
+			     int howmany_rank,				\
+			     const X(iodim) *howmany_dims,		\
+			     R *in, C *out,				\
+			     unsigned flags);				\
+X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims,		\
+			     int howmany_rank,				\
+			     const X(iodim) *howmany_dims,		\
+			     C *in, R *out,				\
+			     unsigned flags);				\
+									\
+X(plan) X(plan_guru_split_dft_r2c)(int rank, const X(iodim) *dims,	\
+			     int howmany_rank,				\
+			     const X(iodim) *howmany_dims,		\
+			     R *in, R *ro, R *io,			\
+			     unsigned flags);				\
+X(plan) X(plan_guru_split_dft_c2r)(int rank, const X(iodim) *dims,	\
+			     int howmany_rank,				\
+			     const X(iodim) *howmany_dims,		\
+			     R *ri, R *ii, R *out,			\
+			     unsigned flags);				\
+									\
+void X(execute_dft_r2c)(const X(plan) p, R *in, C *out);		\
+void X(execute_dft_c2r)(const X(plan) p, C *in, R *out);		\
+									\
+void X(execute_split_dft_r2c)(const X(plan) p, R *in, R *ro, R *io);	\
+void X(execute_split_dft_c2r)(const X(plan) p, R *ri, R *ii, R *out);	\
+									\
+X(plan) X(plan_many_r2r)(int rank, const int *n,			\
+                         int howmany,					\
+                         R *in, const int *inembed,			\
+                         int istride, int idist,			\
+                         R *out, const int *onembed,			\
+                         int ostride, int odist,			\
+                         const X(r2r_kind) *kind, unsigned flags);	\
+									\
+X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out,		\
+                    const X(r2r_kind) *kind, unsigned flags);		\
+									\
+X(plan) X(plan_r2r_1d)(int n, R *in, R *out,				\
+                       X(r2r_kind) kind, unsigned flags);		\
+X(plan) X(plan_r2r_2d)(int nx, int ny, R *in, R *out,			\
+                       X(r2r_kind) kindx, X(r2r_kind) kindy,		\
+                       unsigned flags);					\
+X(plan) X(plan_r2r_3d)(int nx, int ny, int nz,				\
+                       R *in, R *out, X(r2r_kind) kindx,		\
+                       X(r2r_kind) kindy, X(r2r_kind) kindz,		\
+                       unsigned flags);					\
+									\
+X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims,		\
+                         int howmany_rank,				\
+                         const X(iodim) *howmany_dims,			\
+                         R *in, R *out,					\
+                         const X(r2r_kind) *kind, unsigned flags);	\
+void X(execute_r2r)(const X(plan) p, R *in, R *out);			\
+									\
+void X(destroy_plan)(X(plan) p);					\
+void X(forget_wisdom)(void);						\
+void X(cleanup)(void);							\
+									\
+void X(plan_with_nthreads)(int nthreads);				\
+int X(init_threads)(void);						\
+void X(cleanup_threads)(void);						\
+									\
+void X(export_wisdom_to_file)(FILE *output_file);			\
+char *X(export_wisdom_to_string)(void);					\
+void X(export_wisdom)(void (*write_char)(char c, void *), void *data);	\
+int X(import_system_wisdom)(void);					\
+int X(import_wisdom_from_file)(FILE *input_file);			\
+int X(import_wisdom_from_string)(const char *input_string);		\
+int X(import_wisdom)(int (*read_char)(void *), void *data);		\
+									\
+void X(fprint_plan)(const X(plan) p, FILE *output_file);		\
+void X(print_plan)(const X(plan) p);					\
+									\
+void *X(malloc)(size_t n);						\
+void X(free)(void *p);							\
+									\
+void X(flops)(const X(plan) p, double *add, double *mul, double *fma);	\
+									\
+extern const char X(version)[];						\
+extern const char X(cc)[];						\
+extern const char X(codelet_optim)[];
+
+
+/* end of FFTW_DEFINE_API macro */
+
+FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex)
+FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex)
+FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex)
+
+#define FFTW_FORWARD (-1)
+#define FFTW_BACKWARD (+1)
+
+/* documented flags */
+#define FFTW_MEASURE (0U)
+#define FFTW_DESTROY_INPUT (1U << 0)
+#define FFTW_UNALIGNED (1U << 1)
+#define FFTW_CONSERVE_MEMORY (1U << 2)
+#define FFTW_EXHAUSTIVE (1U << 3) /* NO_EXHAUSTIVE is default */
+#define FFTW_PRESERVE_INPUT (1U << 4) /* cancels FFTW_DESTROY_INPUT */
+#define FFTW_PATIENT (1U << 5) /* IMPATIENT is default */
+#define FFTW_ESTIMATE (1U << 6)
+
+/* undocumented beyond-guru flags */
+#define FFTW_ESTIMATE_PATIENT (1U << 7)
+#define FFTW_BELIEVE_PCOST (1U << 8)
+#define FFTW_DFT_R2HC_ICKY (1U << 9)
+#define FFTW_NONTHREADED_ICKY (1U << 10)
+#define FFTW_NO_BUFFERING (1U << 11)
+#define FFTW_NO_INDIRECT_OP (1U << 12)
+#define FFTW_ALLOW_LARGE_GENERIC (1U << 13) /* NO_LARGE_GENERIC is default */
+#define FFTW_NO_RANK_SPLITS (1U << 14)
+#define FFTW_NO_VRANK_SPLITS (1U << 15)
+#define FFTW_NO_VRECURSE (1U << 16)
+
+#define FFTW_NO_SIMD (1U << 17)
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* FFTW3_H */
diff --git a/src/fftw3/api/flops.c b/src/fftw3/api/flops.c
new file mode 100644
index 0000000..43e8891
--- /dev/null
+++ b/src/fftw3/api/flops.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+void X(flops)(const X(plan) p, double *add, double *mul, double *fma)
+{
+     opcnt *o = &p->pln->ops;
+     *add = o->add; *mul = o->mul; *fma = o->fma;
+}
diff --git a/src/fftw3/api/forget-wisdom.c b/src/fftw3/api/forget-wisdom.c
new file mode 100644
index 0000000..10ea580
--- /dev/null
+++ b/src/fftw3/api/forget-wisdom.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+void X(forget_wisdom)(void)
+{
+     planner *plnr = X(the_planner)();
+     plnr->adt->forget(plnr, FORGET_EVERYTHING);
+}
diff --git a/src/fftw3/api/import-system-wisdom.c b/src/fftw3/api/import-system-wisdom.c
new file mode 100644
index 0000000..393b3a6
--- /dev/null
+++ b/src/fftw3/api/import-system-wisdom.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+#if defined(FFTW_SINGLE)
+#  define WISDOM_NAME "wisdomf"
+#elif defined(FFTW_LDOUBLE)
+#  define WISDOM_NAME "wisdoml"
+#else
+#  define WISDOM_NAME "wisdom"
+#endif
+
+int X(import_system_wisdom)(void)
+{
+#if defined(__WIN32__) || defined(WIN32) || defined(_WINDOWS)
+     return 0; /* TODO? */
+#else
+
+     FILE *f;
+     f = fopen("/etc/fftw/" WISDOM_NAME, "r");
+     if (f) {
+          int ret = X(import_wisdom_from_file)(f);
+          fclose(f);
+          return ret;
+     } else
+          return 0;
+#endif
+}
diff --git a/src/fftw3/api/import-wisdom-from-file.c b/src/fftw3/api/import-wisdom-from-file.c
new file mode 100644
index 0000000..e682ab9
--- /dev/null
+++ b/src/fftw3/api/import-wisdom-from-file.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include <stdio.h>
+
+/* getc()/putc() are *unbelievably* slow on linux.  Looks like glibc
+   is grabbing a lock for each call to getc()/putc(), or something
+   like that.  You pay the price for these idiotic posix threads
+   whether you use them or not.
+
+   So, we do our own buffering.  This completely defeats the purpose
+   of having stdio in the first place, of course.
+*/
+  
+#define BUFSZ 256
+
+typedef struct {
+     scanner super;
+     FILE *f;
+     char buf[BUFSZ];
+     char *bufr, *bufw;
+} S;
+
+static int getchr_file(scanner * sc_)
+{
+     S *sc = (S *) sc_;
+
+     if (sc->bufr >= sc->bufw) {
+	  sc->bufr = sc->buf;
+	  sc->bufw = sc->buf + fread(sc->buf, 1, BUFSZ, sc->f);
+	  if (sc->bufr >= sc->bufw)
+	       return EOF;
+     }
+
+     return *(sc->bufr++);
+}
+
+static scanner *mkscanner_file(FILE *f)
+{
+     S *sc = (S *) X(mkscanner)(sizeof(S), getchr_file);
+     sc->f = f;
+     sc->bufr = sc->bufw = sc->buf;
+     return &sc->super;
+}
+
+int X(import_wisdom_from_file)(FILE *input_file)
+{
+     scanner *s = mkscanner_file(input_file);
+     planner *plnr = X(the_planner)();
+     int ret = plnr->adt->imprt(plnr, s);
+     X(scanner_destroy)(s);
+     return ret;
+}
diff --git a/src/fftw3/api/import-wisdom-from-string.c b/src/fftw3/api/import-wisdom-from-string.c
new file mode 100644
index 0000000..d1ca6b8
--- /dev/null
+++ b/src/fftw3/api/import-wisdom-from-string.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+typedef struct {
+     scanner super;
+     const char *s;
+} S_str;
+
+static int getchr_str(scanner * sc_)
+{
+     S_str *sc = (S_str *) sc_;
+     if (!*sc->s)
+          return EOF;
+     return *sc->s++;
+}
+
+static scanner *mkscanner_str(const char *s)
+{
+     S_str *sc = (S_str *) X(mkscanner)(sizeof(S_str), getchr_str);
+     sc->s = s;
+     return &sc->super;
+}
+
+int X(import_wisdom_from_string)(const char *input_string)
+{
+     scanner *s = mkscanner_str(input_string);
+     planner *plnr = X(the_planner)();
+     int ret = plnr->adt->imprt(plnr, s);
+     X(scanner_destroy)(s);
+     return ret;
+}
diff --git a/src/fftw3/api/import-wisdom.c b/src/fftw3/api/import-wisdom.c
new file mode 100644
index 0000000..505ebab
--- /dev/null
+++ b/src/fftw3/api/import-wisdom.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+typedef struct {
+     scanner super;
+     int (*read_char)(void *);
+     void *data;
+} S;
+
+static int getchr_generic(scanner * s_)
+{
+     S *s = (S *) s_;
+     return (s->read_char)(s->data);
+}
+
+int X(import_wisdom)(int (*read_char)(void *), void *data)
+{
+     S *s = (S *) X(mkscanner)(sizeof(S), getchr_generic);
+     planner *plnr = X(the_planner)();
+     int ret;
+
+     s->read_char = read_char;
+     s->data = data;
+     ret = plnr->adt->imprt(plnr, (scanner *) s);
+     X(scanner_destroy)((scanner *) s);
+     return ret;
+}
diff --git a/src/fftw3/api/map-r2r-kind.c b/src/fftw3/api/map-r2r-kind.c
new file mode 100644
index 0000000..adba2ce
--- /dev/null
+++ b/src/fftw3/api/map-r2r-kind.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+rdft_kind *X(map_r2r_kind)(int rank, const X(r2r_kind) * kind)
+{
+     int i;
+     rdft_kind *k;
+
+     A(FINITE_RNK(rank));
+     k = (rdft_kind *) MALLOC(rank * sizeof(rdft_kind), PROBLEMS);
+     for (i = 0; i < rank; ++i) {
+	  rdft_kind m;
+          switch (kind[i]) {
+	      case FFTW_R2HC: m = R2HC; break;
+	      case FFTW_HC2R: m = HC2R; break;
+	      case FFTW_DHT: m = DHT; break;
+	      case FFTW_REDFT00: m = REDFT00; break;
+	      case FFTW_REDFT01: m = REDFT01; break;
+	      case FFTW_REDFT10: m = REDFT10; break;
+	      case FFTW_REDFT11: m = REDFT11; break;
+	      case FFTW_RODFT00: m = RODFT00; break;
+	      case FFTW_RODFT01: m = RODFT01; break;
+	      case FFTW_RODFT10: m = RODFT10; break;
+	      case FFTW_RODFT11: m = RODFT11; break;
+	      default: m = R2HC; A(0);
+          }
+	  k[i] = m;
+     }
+     return k;
+}
diff --git a/src/fftw3/api/mapflags.c b/src/fftw3/api/mapflags.c
new file mode 100644
index 0000000..abb4e6b
--- /dev/null
+++ b/src/fftw3/api/mapflags.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+/* a flag operation: x is either a flag, in which case xm == 0, or
+   a mask, in which case xm == x; using this we can compactly code
+   the various bit operations via (flags & x) ^ xm or (flags | x) ^ xm. */
+typedef struct {
+     unsigned x, xm;
+} flagmask;
+
+typedef struct {
+     flagmask flag;
+     flagmask op;
+} flagop;
+
+#define FLAGP(f, msk)(((f) & (msk).x) ^ (msk).xm)
+#define OP(f, msk)(((f) | (msk).x) ^ (msk).xm)
+
+#define YES(x) {x, 0}
+#define NO(x) {x, x}
+#define IMPLIES(predicate, consequence) { predicate, consequence }
+#define EQV(a, b) IMPLIES(YES(a), YES(b)), IMPLIES(NO(a), NO(b))
+#define NEQV(a, b) IMPLIES(YES(a), NO(b)), IMPLIES(NO(a), YES(b))
+
+static void map_flags(unsigned *iflags, unsigned *oflags,
+		      const flagop flagmap[], int nmap)
+{
+     int i;
+     for (i = 0; i < nmap; ++i)
+          if (FLAGP(*iflags, flagmap[i].flag))
+               *oflags = OP(*oflags, flagmap[i].op);
+}
+
+#define NELEM(array)(sizeof(array) / sizeof((array)[0]))
+
+void X(mapflags)(planner *plnr, unsigned flags)
+{
+     unsigned tmpflags;
+
+     /* map of api flags -> api flags, to implement consistency rules
+        and combination flags */
+     const flagop self_flagmap[] = {
+	  /* in some cases (notably for halfcomplex->real transforms),
+	     DESTROY_INPUT is the default, so we need to support
+	     an inverse flag to disable it: */
+	  IMPLIES(YES(FFTW_PRESERVE_INPUT), NO(FFTW_DESTROY_INPUT)),
+
+	  IMPLIES(YES(FFTW_EXHAUSTIVE), YES(FFTW_PATIENT)),
+
+	  IMPLIES(YES(FFTW_ESTIMATE), NO(FFTW_PATIENT)),
+	  IMPLIES(YES(FFTW_ESTIMATE),
+		  YES(FFTW_ESTIMATE_PATIENT | FFTW_NO_INDIRECT_OP)),
+
+	  /* a canonical set of fftw2-like impatience flags */
+	  IMPLIES(NO(FFTW_PATIENT),
+		  YES(FFTW_NO_VRECURSE
+		      | FFTW_NO_RANK_SPLITS
+		      | FFTW_NO_VRANK_SPLITS
+		      | FFTW_NONTHREADED_ICKY
+		      | FFTW_DFT_R2HC_ICKY
+		      | FFTW_BELIEVE_PCOST))
+     };
+
+     /* map of (processed) api flags to internal problem/planner flags */
+     const flagop problem_flagmap[] = {
+	  EQV(FFTW_DESTROY_INPUT, DESTROY_INPUT),
+	  EQV(FFTW_NO_SIMD, NO_SIMD),
+	  EQV(FFTW_CONSERVE_MEMORY, CONSERVE_MEMORY)
+     };
+     const flagop planner_flagmap[] = {
+	  NEQV(FFTW_EXHAUSTIVE, NO_EXHAUSTIVE),
+
+	  /* the following are undocumented, "beyond-guru" flags that
+	     require some understanding of FFTW internals */
+	  EQV(FFTW_ESTIMATE_PATIENT, ESTIMATE),
+	  EQV(FFTW_BELIEVE_PCOST, BELIEVE_PCOST),
+	  EQV(FFTW_DFT_R2HC_ICKY, DFT_R2HC_ICKY),
+	  EQV(FFTW_NONTHREADED_ICKY, NONTHREADED_ICKY),
+	  EQV(FFTW_NO_BUFFERING, NO_BUFFERING),
+	  EQV(FFTW_NO_INDIRECT_OP, NO_INDIRECT_OP),
+	  NEQV(FFTW_ALLOW_LARGE_GENERIC, NO_LARGE_GENERIC),
+	  EQV(FFTW_NO_RANK_SPLITS, NO_RANK_SPLITS),
+	  EQV(FFTW_NO_VRANK_SPLITS, NO_VRANK_SPLITS),
+	  EQV(FFTW_NO_VRECURSE, NO_VRECURSE)
+     };
+
+     map_flags(&flags, &flags, self_flagmap, NELEM(self_flagmap));
+
+     tmpflags = 0;
+     map_flags(&flags, &tmpflags, problem_flagmap, NELEM(problem_flagmap));
+     plnr->problem_flags = tmpflags;
+
+     tmpflags = 0;
+     map_flags(&flags, &tmpflags, planner_flagmap, NELEM(planner_flagmap));
+     plnr->planner_flags = tmpflags;
+}
diff --git a/src/fftw3/api/mkprinter-file.c b/src/fftw3/api/mkprinter-file.c
new file mode 100644
index 0000000..24476f9
--- /dev/null
+++ b/src/fftw3/api/mkprinter-file.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include <stdio.h>
+
+#define BUFSZ 256
+
+typedef struct {
+     printer super;
+     FILE *f;
+     char buf[BUFSZ];
+     char *bufw;
+} P;
+
+static void myflush(P *p)
+{
+     fwrite(p->buf, 1, p->bufw - p->buf, p->f);
+     p->bufw = p->buf;
+}
+
+static void myputchr(printer *p_, char c)
+{
+     P *p = (P *) p_;
+     if (p->bufw >= p->buf + BUFSZ)
+	  myflush(p);
+     *p->bufw++ = c;
+}
+
+static void mycleanup(printer *p_)
+{
+     P *p = (P *) p_;
+     myflush(p);
+}
+
+printer *X(mkprinter_file)(FILE *f)
+{
+     P *p = (P *) X(mkprinter)(sizeof(P), myputchr, mycleanup);
+     p->f = f;
+     p->bufw = p->buf;
+     return &p->super;
+}
diff --git a/src/fftw3/api/mktensor-iodims.c b/src/fftw3/api/mktensor-iodims.c
new file mode 100644
index 0000000..99ce49e
--- /dev/null
+++ b/src/fftw3/api/mktensor-iodims.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+tensor *X(mktensor_iodims)(int rank, const X(iodim) *dims, int is, int os)
+{
+     int i;
+     tensor *x = X(mktensor)(rank);
+
+     if (FINITE_RNK(rank)) {
+          for (i = 0; i < rank; ++i) {
+               x->dims[i].n = dims[i].n;
+               x->dims[i].is = dims[i].is * is;
+               x->dims[i].os = dims[i].os * os;
+          }
+     }
+     return x;
+}
+
+static int iodims_kosherp(int rank, const X(iodim) *dims, int allow_minfty)
+{
+     int i;
+
+     if (rank < 0) return 0;
+
+     if (allow_minfty) {
+	  if (!FINITE_RNK(rank)) return 1;
+	  for (i = 0; i < rank; ++i)
+	       if (dims[i].n < 0) return 0;
+     } else {
+	  if (!FINITE_RNK(rank)) return 0;
+	  for (i = 0; i < rank; ++i)
+	       if (dims[i].n <= 0) return 0;
+     }
+
+     return 1;
+}
+
+int X(guru_kosherp)(int rank, const X(iodim) *dims,
+		    int howmany_rank, const X(iodim) *howmany_dims)
+{
+     return (iodims_kosherp(rank, dims, 0) &&
+	     iodims_kosherp(howmany_rank, howmany_dims, 1));
+}
diff --git a/src/fftw3/api/mktensor-rowmajor.c b/src/fftw3/api/mktensor-rowmajor.c
new file mode 100644
index 0000000..84b87ca
--- /dev/null
+++ b/src/fftw3/api/mktensor-rowmajor.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+tensor *X(mktensor_rowmajor)(int rnk, const int *n,
+			     const int *niphys, const int *nophys,
+			     int is, int os)
+{
+     tensor *x = X(mktensor)(rnk);
+
+     if (FINITE_RNK(rnk) && rnk > 0) {
+          int i;
+
+          A(n && niphys && nophys);
+          x->dims[rnk - 1].is = is;
+          x->dims[rnk - 1].os = os;
+          x->dims[rnk - 1].n = n[rnk - 1];
+          for (i = rnk - 1; i > 0; --i) {
+               x->dims[i - 1].is = x->dims[i].is * niphys[i];
+               x->dims[i - 1].os = x->dims[i].os * nophys[i];
+               x->dims[i - 1].n = n[i - 1];
+          }
+     }
+     return x;
+}
+
+static int rowmajor_kosherp(int rnk, const int *n)
+{
+     int i;
+
+     if (!FINITE_RNK(rnk)) return 0;
+     if (rnk < 0) return 0;
+
+     for (i = 0; i < rnk; ++i)
+	  if (n[i] <= 0) return 0;
+
+     return 1;
+}
+
+int X(many_kosherp)(int rnk, const int *n, int howmany)
+{
+     return (howmany >= 0) && rowmajor_kosherp(rnk, n);
+}
diff --git a/src/fftw3/api/plan-dft-1d.c b/src/fftw3/api/plan-dft-1d.c
new file mode 100644
index 0000000..02ff69d
--- /dev/null
+++ b/src/fftw3/api/plan-dft-1d.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+X(plan) X(plan_dft_1d)(int n, C *in, C *out, int sign, unsigned flags)
+{
+     return X(plan_dft)(1, &n, in, out, sign, flags);
+}
diff --git a/src/fftw3/api/plan-dft-2d.c b/src/fftw3/api/plan-dft-2d.c
new file mode 100644
index 0000000..f41e7e3
--- /dev/null
+++ b/src/fftw3/api/plan-dft-2d.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+X(plan) X(plan_dft_2d)(int nx, int ny, C *in, C *out, int sign, unsigned flags)
+{
+     int n[2];
+     n[0] = nx;
+     n[1] = ny;
+     return X(plan_dft)(2, n, in, out, sign, flags);
+}
diff --git a/src/fftw3/api/plan-dft-3d.c b/src/fftw3/api/plan-dft-3d.c
new file mode 100644
index 0000000..a605a46
--- /dev/null
+++ b/src/fftw3/api/plan-dft-3d.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+X(plan) X(plan_dft_3d)(int nx, int ny, int nz,
+		       C *in, C *out, int sign, unsigned flags)
+{
+     int n[3];
+     n[0] = nx;
+     n[1] = ny;
+     n[2] = nz;
+     return X(plan_dft)(3, n, in, out, sign, flags);
+}
diff --git a/src/fftw3/api/plan-dft-c2r-1d.c b/src/fftw3/api/plan-dft-c2r-1d.c
new file mode 100644
index 0000000..7dc9a93
--- /dev/null
+++ b/src/fftw3/api/plan-dft-c2r-1d.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_c2r_1d)(int n, C *in, R *out, unsigned flags)
+{
+     return X(plan_dft_c2r)(1, &n, in, out, flags);
+}
diff --git a/src/fftw3/api/plan-dft-c2r-2d.c b/src/fftw3/api/plan-dft-c2r-2d.c
new file mode 100644
index 0000000..37d39bb
--- /dev/null
+++ b/src/fftw3/api/plan-dft-c2r-2d.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_c2r_2d)(int nx, int ny, C *in, R *out, unsigned flags)
+{
+     int n[2];
+     n[0] = nx;
+     n[1] = ny;
+     return X(plan_dft_c2r)(2, n, in, out, flags);
+}
diff --git a/src/fftw3/api/plan-dft-c2r-3d.c b/src/fftw3/api/plan-dft-c2r-3d.c
new file mode 100644
index 0000000..4644126
--- /dev/null
+++ b/src/fftw3/api/plan-dft-c2r-3d.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_c2r_3d)(int nx, int ny, int nz,
+			   C *in, R *out, unsigned flags)
+{
+     int n[3];
+     n[0] = nx;
+     n[1] = ny;
+     n[2] = nz;
+     return X(plan_dft_c2r)(3, n, in, out, flags);
+}
diff --git a/src/fftw3/api/plan-dft-c2r.c b/src/fftw3/api/plan-dft-c2r.c
new file mode 100644
index 0000000..118c1cb
--- /dev/null
+++ b/src/fftw3/api/plan-dft-c2r.c
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_c2r)(int rank, const int *n, C *in, R *out, unsigned flags)
+{
+     return X(plan_many_dft_c2r)(rank, n, 1,
+				 in, 0, 1, 1, out, 0, 1, 1, flags);
+}
diff --git a/src/fftw3/api/plan-dft-r2c-1d.c b/src/fftw3/api/plan-dft-r2c-1d.c
new file mode 100644
index 0000000..5d4865a
--- /dev/null
+++ b/src/fftw3/api/plan-dft-r2c-1d.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_r2c_1d)(int n, R *in, C *out, unsigned flags)
+{
+     return X(plan_dft_r2c)(1, &n, in, out, flags);
+}
diff --git a/src/fftw3/api/plan-dft-r2c-2d.c b/src/fftw3/api/plan-dft-r2c-2d.c
new file mode 100644
index 0000000..289841c
--- /dev/null
+++ b/src/fftw3/api/plan-dft-r2c-2d.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_r2c_2d)(int nx, int ny, R *in, C *out, unsigned flags)
+{
+     int n[2];
+     n[0] = nx;
+     n[1] = ny;
+     return X(plan_dft_r2c)(2, n, in, out, flags);
+}
diff --git a/src/fftw3/api/plan-dft-r2c-3d.c b/src/fftw3/api/plan-dft-r2c-3d.c
new file mode 100644
index 0000000..c25d04f
--- /dev/null
+++ b/src/fftw3/api/plan-dft-r2c-3d.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_r2c_3d)(int nx, int ny, int nz,
+			   R *in, C *out, unsigned flags)
+{
+     int n[3];
+     n[0] = nx;
+     n[1] = ny;
+     n[2] = nz;
+     return X(plan_dft_r2c)(3, n, in, out, flags);
+}
diff --git a/src/fftw3/api/plan-dft-r2c.c b/src/fftw3/api/plan-dft-r2c.c
new file mode 100644
index 0000000..8624995
--- /dev/null
+++ b/src/fftw3/api/plan-dft-r2c.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft_r2c)(int rank, const int *n, R *in, C *out, unsigned flags)
+{
+     return X(plan_many_dft_r2c)(rank, n, 1,
+				 in, 0, 1, 1, 
+				 out, 0, 1, 1, 
+				 flags);
+}
diff --git a/src/fftw3/api/plan-dft.c b/src/fftw3/api/plan-dft.c
new file mode 100644
index 0000000..f9c8756
--- /dev/null
+++ b/src/fftw3/api/plan-dft.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_dft)(int rank, const int *n,
+		    C *in, C *out, int sign, unsigned flags)
+{
+     return X(plan_many_dft)(rank, n, 1,
+			     in, 0, 1, 1, 
+			     out, 0, 1, 1, 
+			     sign, flags);
+}
diff --git a/src/fftw3/api/plan-guru-dft-c2r.c b/src/fftw3/api/plan-guru-dft-c2r.c
new file mode 100644
index 0000000..0ab3a9c
--- /dev/null
+++ b/src/fftw3/api/plan-guru-dft-c2r.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+X(plan) X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims,
+			     int howmany_rank, const X(iodim) *howmany_dims,
+			     C *in, R *out, unsigned flags)
+{
+     R *ri, *ii;
+
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     X(extract_reim)(FFT_SIGN, in, &ri, &ii);
+
+     if (out != ri)
+	  flags |= FFTW_DESTROY_INPUT;
+     return X(mkapiplan)(
+	  0, flags, 
+	  X(mkproblem_rdft2_d)(X(mktensor_iodims)(rank, dims, 2, 1),
+			       X(mktensor_iodims)(howmany_rank, howmany_dims,
+						  2, 1),
+			       TAINT_UNALIGNED(out, flags),
+			       TAINT_UNALIGNED(ri, flags),
+			       TAINT_UNALIGNED(ii, flags), HC2R));
+}
diff --git a/src/fftw3/api/plan-guru-dft-r2c.c b/src/fftw3/api/plan-guru-dft-r2c.c
new file mode 100644
index 0000000..c4e4d41
--- /dev/null
+++ b/src/fftw3/api/plan-guru-dft-r2c.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+X(plan) X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims,
+			     int howmany_rank,
+			     const X(iodim) *howmany_dims,
+			     R *in, C *out, unsigned flags)
+{
+     R *ro, *io;
+
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     X(extract_reim)(FFT_SIGN, out, &ro, &io);
+
+     return X(mkapiplan)(
+	  0, flags,
+	  X(mkproblem_rdft2_d)(X(mktensor_iodims)(rank, dims, 1, 2),
+			       X(mktensor_iodims)(howmany_rank, howmany_dims,
+						  1, 2),
+			       TAINT_UNALIGNED(in, flags),
+			       TAINT_UNALIGNED(ro, flags),
+			       TAINT_UNALIGNED(io, flags), R2HC));
+}
diff --git a/src/fftw3/api/plan-guru-dft.c b/src/fftw3/api/plan-guru-dft.c
new file mode 100644
index 0000000..32c894b
--- /dev/null
+++ b/src/fftw3/api/plan-guru-dft.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+X(plan) X(plan_guru_dft)(int rank, const X(iodim) *dims,
+			 int howmany_rank, const X(iodim) *howmany_dims,
+			 C *in, C *out, int sign, unsigned flags)
+{
+     R *ri, *ii, *ro, *io;
+
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     X(extract_reim)(sign, in, &ri, &ii);
+     X(extract_reim)(sign, out, &ro, &io);
+
+     return X(mkapiplan)(
+	  sign, flags,
+	  X(mkproblem_dft_d)(X(mktensor_iodims)(rank, dims, 2, 2),
+			     X(mktensor_iodims)(howmany_rank, howmany_dims,
+						2, 2),
+			     TAINT_UNALIGNED(ri, flags),
+			     TAINT_UNALIGNED(ii, flags), 
+			     TAINT_UNALIGNED(ro, flags),
+			     TAINT_UNALIGNED(io, flags)));
+}
diff --git a/src/fftw3/api/plan-guru-r2r.c b/src/fftw3/api/plan-guru-r2r.c
new file mode 100644
index 0000000..d5f2120
--- /dev/null
+++ b/src/fftw3/api/plan-guru-r2r.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+rdft_kind *X(map_r2r_kind)(int rank, const X(r2r_kind) * kind);
+
+X(plan) X(plan_guru_r2r)(int rank, const X(iodim) *dims,
+			 int howmany_rank,
+			 const X(iodim) *howmany_dims,
+			 R *in, R *out,
+			 const X(r2r_kind) * kind, unsigned flags)
+{
+     X(plan) p;
+     rdft_kind *k;
+
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     k = X(map_r2r_kind)(rank, kind);
+     p = X(mkapiplan)(
+	  0, flags,
+	  X(mkproblem_rdft_d)(X(mktensor_iodims)(rank, dims, 1, 1),
+			      X(mktensor_iodims)(howmany_rank, howmany_dims,
+						 1, 1), 
+			      TAINT_UNALIGNED(in, flags),
+			      TAINT_UNALIGNED(out, flags), k));
+     X(ifree0)(k);
+     return p;
+}
diff --git a/src/fftw3/api/plan-guru-split-dft-c2r.c b/src/fftw3/api/plan-guru-split-dft-c2r.c
new file mode 100644
index 0000000..f51bc9a
--- /dev/null
+++ b/src/fftw3/api/plan-guru-split-dft-c2r.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+X(plan) X(plan_guru_split_dft_c2r)(int rank, const X(iodim) *dims,
+			     int howmany_rank, const X(iodim) *howmany_dims,
+			     R *ri, R *ii, R *out, unsigned flags)
+{
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     if (out != ri)
+	  flags |= FFTW_DESTROY_INPUT;
+     return X(mkapiplan)(
+	  0, flags, 
+	  X(mkproblem_rdft2_d)(X(mktensor_iodims)(rank, dims, 1, 1),
+			       X(mktensor_iodims)(howmany_rank, howmany_dims,
+						  1, 1),
+			       TAINT_UNALIGNED(out, flags),
+			       TAINT_UNALIGNED(ri, flags),
+			       TAINT_UNALIGNED(ii, flags), HC2R));
+}
diff --git a/src/fftw3/api/plan-guru-split-dft-r2c.c b/src/fftw3/api/plan-guru-split-dft-r2c.c
new file mode 100644
index 0000000..3c5eafd
--- /dev/null
+++ b/src/fftw3/api/plan-guru-split-dft-r2c.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+X(plan) X(plan_guru_split_dft_r2c)(int rank, const X(iodim) *dims,
+				   int howmany_rank,
+				   const X(iodim) *howmany_dims,
+				   R *in, R *ro, R *io, unsigned flags)
+{
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     return X(mkapiplan)(
+	  0, flags,
+	  X(mkproblem_rdft2_d)(X(mktensor_iodims)(rank, dims, 1, 1),
+			       X(mktensor_iodims)(howmany_rank, howmany_dims,
+						  1, 1),
+			       TAINT_UNALIGNED(in, flags),
+			       TAINT_UNALIGNED(ro, flags),
+			       TAINT_UNALIGNED(io, flags), R2HC));
+}
diff --git a/src/fftw3/api/plan-guru-split-dft.c b/src/fftw3/api/plan-guru-split-dft.c
new file mode 100644
index 0000000..62437c2
--- /dev/null
+++ b/src/fftw3/api/plan-guru-split-dft.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+X(plan) X(plan_guru_split_dft)(int rank, const X(iodim) *dims,
+			       int howmany_rank, const X(iodim) *howmany_dims,
+			       R *ri, R *ii, R *ro, R *io, unsigned flags)
+{
+     if (!X(guru_kosherp)(rank, dims, howmany_rank, howmany_dims)) return 0;
+
+     return X(mkapiplan)(
+	  ii - ri == 1 && io - ro == 1 ? FFT_SIGN : -FFT_SIGN, flags,
+	  X(mkproblem_dft_d)(X(mktensor_iodims)(rank, dims, 1, 1),
+			     X(mktensor_iodims)(howmany_rank, howmany_dims,
+						1, 1),
+			     TAINT_UNALIGNED(ri, flags),
+			     TAINT_UNALIGNED(ii, flags), 
+			     TAINT_UNALIGNED(ro, flags),
+			     TAINT_UNALIGNED(io, flags)));
+}
diff --git a/src/fftw3/api/plan-many-dft-c2r.c b/src/fftw3/api/plan-many-dft-c2r.c
new file mode 100644
index 0000000..81b6338
--- /dev/null
+++ b/src/fftw3/api/plan-many-dft-c2r.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+X(plan) X(plan_many_dft_c2r)(int rank, const int *n,
+			     int howmany,
+			     C *in, const int *inembed,
+			     int istride, int idist,
+			     R *out, const int *onembed,
+			     int ostride, int odist, unsigned flags)
+{
+     R *ri, *ii;
+     int *nfi, *nfo;
+     int inplace;
+     X(plan) p;
+
+     if (!X(many_kosherp)(rank, n, howmany)) return 0;
+
+     X(extract_reim)(FFT_SIGN, in, &ri, &ii);
+     inplace = out == ri;
+
+     if (!inplace)
+	  flags |= FFTW_DESTROY_INPUT;
+     p = X(mkapiplan)(
+	  0, flags,
+	  X(mkproblem_rdft2_d)(
+	       X(mktensor_rowmajor)(
+		    rank, n, 
+		    X (rdft2_pad)(rank, n, inembed, inplace, 1, &nfi),
+		    X (rdft2_pad)(rank, n, onembed, inplace, 0, &nfo),
+		    2 * istride, ostride),
+	       X(mktensor_1d)(howmany, 2 * idist, odist),
+	       TAINT_UNALIGNED(out, flags),
+	       TAINT_UNALIGNED(ri, flags), TAINT_UNALIGNED(ii, flags),
+	       HC2R));
+
+     X(ifree0)(nfi);
+     X(ifree0)(nfo);
+     return p;
+}
diff --git a/src/fftw3/api/plan-many-dft-r2c.c b/src/fftw3/api/plan-many-dft-r2c.c
new file mode 100644
index 0000000..093d9ba
--- /dev/null
+++ b/src/fftw3/api/plan-many-dft-r2c.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+X(plan) X(plan_many_dft_r2c)(int rank, const int *n,
+			     int howmany,
+			     R *in, const int *inembed,
+			     int istride, int idist,
+			     C *out, const int *onembed,
+			     int ostride, int odist, unsigned flags)
+{
+     R *ro, *io;
+     int *nfi, *nfo;
+     int inplace;
+     X(plan) p;
+
+     if (!X(many_kosherp)(rank, n, howmany)) return 0;
+
+     X(extract_reim)(FFT_SIGN, out, &ro, &io);
+     inplace = in == ro;
+
+     p = X(mkapiplan)(
+	  0, flags, 
+	  X(mkproblem_rdft2_d)(
+	       X(mktensor_rowmajor)(
+		    rank, n,
+		    X(rdft2_pad)(rank, n, inembed, inplace, 0, &nfi),
+		    X(rdft2_pad)(rank, n, onembed, inplace, 1, &nfo),
+		    istride, 2 * ostride), 
+	       X(mktensor_1d)(howmany, idist, 2 * odist),
+	       TAINT_UNALIGNED(in, flags),
+	       TAINT_UNALIGNED(ro, flags), TAINT_UNALIGNED(io, flags),
+	       R2HC));
+
+     X(ifree0)(nfi);
+     X(ifree0)(nfo);
+     return p;
+}
diff --git a/src/fftw3/api/plan-many-dft.c b/src/fftw3/api/plan-many-dft.c
new file mode 100644
index 0000000..56f0ed4
--- /dev/null
+++ b/src/fftw3/api/plan-many-dft.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "dft.h"
+
+#define N0(nembed)((nembed) ? (nembed) : n)
+
+X(plan) X(plan_many_dft)(int rank, const int *n,
+			 int howmany,
+			 C *in, const int *inembed,
+			 int istride, int idist,
+			 C *out, const int *onembed,
+			 int ostride, int odist, int sign, unsigned flags)
+{
+     R *ri, *ii, *ro, *io;
+
+     if (!X(many_kosherp)(rank, n, howmany)) return 0;
+
+     X(extract_reim)(sign, in, &ri, &ii);
+     X(extract_reim)(sign, out, &ro, &io);
+
+     return 
+	  X(mkapiplan)(sign, flags,
+		       X(mkproblem_dft_d)(
+			    X(mktensor_rowmajor)(rank, n, 
+						 N0(inembed), N0(onembed),
+						 2 * istride, 2 * ostride),
+			    X(mktensor_1d)(howmany, 2 * idist, 2 * odist),
+			    TAINT_UNALIGNED(ri, flags),
+			    TAINT_UNALIGNED(ii, flags),
+			    TAINT_UNALIGNED(ro, flags),
+			    TAINT_UNALIGNED(io, flags)));
+}
diff --git a/src/fftw3/api/plan-many-r2r.c b/src/fftw3/api/plan-many-r2r.c
new file mode 100644
index 0000000..0a6815f
--- /dev/null
+++ b/src/fftw3/api/plan-many-r2r.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+#include "rdft.h"
+
+rdft_kind *X(map_r2r_kind)(int rank, const X(r2r_kind) * kind);
+
+#define N0(nembed)((nembed) ? (nembed) : n)
+
+X(plan) X(plan_many_r2r)(int rank, const int *n,
+			 int howmany,
+			 R *in, const int *inembed,
+			 int istride, int idist,
+			 R *out, const int *onembed,
+			 int ostride, int odist,
+			 const X(r2r_kind) * kind, unsigned flags)
+{
+     X(plan) p;
+     rdft_kind *k;
+
+     if (!X(many_kosherp)(rank, n, howmany)) return 0;
+
+     k = X(map_r2r_kind)(rank, kind);
+     p = X(mkapiplan)(
+	  0, flags,
+	  X(mkproblem_rdft_d)(X(mktensor_rowmajor)(rank, n, 
+						   N0(inembed), N0(onembed),
+						   istride, ostride),
+			      X(mktensor_1d)(howmany, idist, odist),
+			      TAINT_UNALIGNED(in, flags), 
+			      TAINT_UNALIGNED(out, flags), k));
+     X(ifree0)(k);
+     return p;
+}
diff --git a/src/fftw3/api/plan-r2r-1d.c b/src/fftw3/api/plan-r2r-1d.c
new file mode 100644
index 0000000..372d1c0
--- /dev/null
+++ b/src/fftw3/api/plan-r2r-1d.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_r2r_1d)(int n, R *in, R *out, X(r2r_kind) kind, unsigned flags)
+{
+     return X(plan_r2r)(1, &n, in, out, &kind, flags);
+}
diff --git a/src/fftw3/api/plan-r2r-2d.c b/src/fftw3/api/plan-r2r-2d.c
new file mode 100644
index 0000000..7702560
--- /dev/null
+++ b/src/fftw3/api/plan-r2r-2d.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_r2r_2d)(int nx, int ny, R *in, R *out,
+		       X(r2r_kind) kindx, X(r2r_kind) kindy, unsigned flags)
+{
+     int n[2];
+     X(r2r_kind) kind[2];
+     n[0] = nx;
+     n[1] = ny;
+     kind[0] = kindx;
+     kind[1] = kindy;
+     return X(plan_r2r)(2, n, in, out, kind, flags);
+}
diff --git a/src/fftw3/api/plan-r2r-3d.c b/src/fftw3/api/plan-r2r-3d.c
new file mode 100644
index 0000000..84ae651
--- /dev/null
+++ b/src/fftw3/api/plan-r2r-3d.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_r2r_3d)(int nx, int ny, int nz,
+		       R *in, R *out, X(r2r_kind) kindx,
+		       X(r2r_kind) kindy, X(r2r_kind) kindz, unsigned flags)
+{
+     int n[3];
+     X(r2r_kind) kind[3];
+     n[0] = nx;
+     n[1] = ny;
+     n[2] = nz;
+     kind[0] = kindx;
+     kind[1] = kindy;
+     kind[2] = kindz;
+     return X(plan_r2r)(3, n, in, out, kind, flags);
+}
diff --git a/src/fftw3/api/plan-r2r.c b/src/fftw3/api/plan-r2r.c
new file mode 100644
index 0000000..ba8f971
--- /dev/null
+++ b/src/fftw3/api/plan-r2r.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+X(plan) X(plan_r2r)(int rank, const int *n, R *in, R *out,
+		    const X(r2r_kind) * kind, unsigned flags)
+{
+     return X(plan_many_r2r)(rank, n, 1, in, 0, 1, 1, out, 0, 1, 1, kind,
+			     flags);
+}
diff --git a/src/fftw3/api/print-plan.c b/src/fftw3/api/print-plan.c
new file mode 100644
index 0000000..094446d
--- /dev/null
+++ b/src/fftw3/api/print-plan.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+void X(fprint_plan)(const X(plan) p, FILE *output_file)
+{
+     printer *pr = X(mkprinter_file)(output_file);
+     plan *pln = p->pln;
+     pln->adt->print(pln, pr);
+     X(printer_destroy)(pr);
+}
+
+void X(print_plan)(const X(plan) p)
+{
+     X(fprint_plan)(p, stdout);
+}
diff --git a/src/fftw3/api/rdft2-pad.c b/src/fftw3/api/rdft2-pad.c
new file mode 100644
index 0000000..499a55d
--- /dev/null
+++ b/src/fftw3/api/rdft2-pad.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <string.h>
+#include "api.h"
+
+const int *X(rdft2_pad)(int rnk, const int *n, const int *nembed,
+			int inplace, int cmplx, int **nfree)
+{
+     A(FINITE_RNK(rnk));
+     *nfree = 0;
+     if (!nembed && rnk > 0) {
+          if (inplace || cmplx) {
+               int *np = (int *) MALLOC(sizeof(int) * rnk, PROBLEMS);
+               memcpy(np, n, sizeof(int) * rnk);
+               np[rnk - 1] = (n[rnk - 1] / 2 + 1) * (1 + !cmplx);
+               nembed = *nfree = np;
+          } else
+               nembed = n;
+     }
+     return nembed;
+}
diff --git a/src/fftw3/api/the-planner.c b/src/fftw3/api/the-planner.c
new file mode 100644
index 0000000..8f6d546
--- /dev/null
+++ b/src/fftw3/api/the-planner.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "api.h"
+
+static planner *plnr = 0;
+
+/* create the planner for the rest of the API */
+planner *X(the_planner)(void)
+{
+     if (!plnr) {
+          plnr = X(mkplanner)();
+          X(configure_planner)(plnr);
+     }
+
+     return plnr;
+}
+
+void X(cleanup)(void)
+{
+     if (plnr) {
+          X(planner_destroy)(plnr);
+          plnr = 0;
+     }
+}
diff --git a/src/fftw3/api/version.c b/src/fftw3/api/version.c
new file mode 100644
index 0000000..2abf1ec
--- /dev/null
+++ b/src/fftw3/api/version.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: version.c,v 1.1 2008/10/17 06:10:48 scuri Exp $ */
+
+#include "api.h"
+
+const char X(cc)[] = FFTW_CC;
+const char X(codelet_optim)[] = CODELET_OPTIM;
+
+const char X(version)[] = PACKAGE "-" VERSION
+
+#if HAVE_SSE
+   "-sse"
+#endif
+
+#if HAVE_SSE2
+   "-sse2"
+#endif
+
+#if HAVE_ALTIVEC
+   "-altivec"
+#endif
+
+#if HAVE_3DNOW
+   "-3dnow"
+#endif
+
+#if HAVE_K7
+   "-k7"
+#endif
+;
diff --git a/src/fftw3/api/x77.h b/src/fftw3/api/x77.h
new file mode 100644
index 0000000..74513b3
--- /dev/null
+++ b/src/fftw3/api/x77.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* Fortran-like (e.g. as in BLAS) type prefixes for F77 interface */
+#if defined(FFTW_SINGLE)
+#  define x77(name) CONCAT(sfftw_, name)
+#  define X77(NAME) CONCAT(SFFTW_, NAME)
+#elif defined(FFTW_LDOUBLE)
+/* FIXME: what is best?  BLAS uses D..._X, apparently.  Ugh. */
+#  define x77(name) CONCAT(lfftw_, name)
+#  define X77(NAME) CONCAT(LFFTW_, NAME)
+#else
+#  define x77(name) CONCAT(dfftw_, name)
+#  define X77(NAME) CONCAT(DFFTW_, NAME)
+#endif
diff --git a/src/fftw3/dft/buffered.c b/src/fftw3/dft/buffered.c
new file mode 100644
index 0000000..763ebe3
--- /dev/null
+++ b/src/fftw3/dft/buffered.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: buffered.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+typedef struct {
+     int nbuf;
+     int maxbufsz;
+     int skew_alignment;
+     int skew;
+     const char *nam;
+} bufadt;
+
+typedef struct {
+     solver super;
+     const bufadt *adt;
+} S;
+
+typedef struct {
+     plan_dft super;
+
+     plan *cld, *cldcpy, *cldrest;
+     int n, vl, nbuf, bufdist;
+     int ivs, ovs;
+     int roffset, ioffset;
+
+     const S *slv;
+} P;
+
+/* transform a vector input with the help of bufs */
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int nbuf = ego->nbuf;
+     R *bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist * 2, BUFFERS);
+
+     plan_dft *cld = (plan_dft *) ego->cld;
+     plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
+     plan_dft *cldrest;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     int roffset = ego->roffset, ioffset = ego->ioffset;
+
+     /* note unsigned i:  the obvious statement
+
+          for (i = 0; i <= vl - nbuf; i += nbuf) 
+
+	is wrong */
+     for (i = nbuf; i <= vl; i += nbuf) {
+          /* transform to bufs: */
+          cld->apply((plan *) cld, ri, ii, bufs + roffset, bufs + ioffset);
+	  ri += ivs; ii += ivs;
+
+          /* copy back */
+          cldcpy->apply((plan *) cldcpy, bufs+roffset, bufs+ioffset, ro, io);
+	  ro += ovs; io += ovs;
+     }
+
+     /* Do the remaining transforms, if any: */
+     cldrest = (plan_dft *) ego->cldrest;
+     cldrest->apply((plan *) cldrest, ri, ii, ro, io);
+
+     X(ifree)(bufs);
+}
+
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+
+     AWAKE(ego->cld, flg);
+     AWAKE(ego->cldcpy, flg);
+     AWAKE(ego->cldrest, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cldrest);
+     X(plan_destroy_internal)(ego->cldcpy);
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s-%d%v/%d-%d%(%p%)%(%p%)%(%p%))",
+              ego->slv->adt->nam,
+              ego->n, ego->nbuf,
+              ego->vl, ego->bufdist % ego->n,
+              ego->cld, ego->cldcpy, ego->cldrest);
+}
+
+static int compute_nbuf(int n, int vl, const S *ego)
+{
+     return X(compute_nbuf)(n, vl, ego->adt->nbuf, ego->adt->maxbufsz);
+}
+
+static int toobig(int n, const S *ego)
+{
+     return (n > ego->adt->maxbufsz);
+}
+
+static int applicable0(const problem *p_, const S *ego, const planner *plnr)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          const iodim *d = p->sz->dims;
+
+          if (1
+	      && p->vecsz->rnk <= 1
+	      && p->sz->rnk == 1
+	       ) {
+
+	       if (toobig(p->sz->dims[0].n, ego) && CONSERVE_MEMORYP(plnr))
+                    return 0;
+
+               /*
+		 In principle, the buffered transforms might be useful
+		 when working out of place.  However, in order to
+		 prevent infinite loops in the planner, we require
+		 that the output stride of the buffered transforms be
+		 greater than 2.
+               */
+               if (p->ri != p->ro)
+                    return (d[0].os > 2);
+
+               /* We can always do a single transform in-place */
+               if (p->vecsz->rnk == 0)
+                    return 1;
+
+               /*
+		* If the problem is in place, the input/output strides must
+		* be the same or the whole thing must fit in the buffer.
+		*/
+               return ((X(tensor_inplace_strides2)(p->sz, p->vecsz))
+                       || (compute_nbuf(d[0].n, p->vecsz->dims[0].n, ego)
+                           == p->vecsz->dims[0].n));
+          }
+     }
+     return 0;
+}
+
+static int applicable(const problem *p_, const S *ego, const planner *plnr)
+{
+     if (NO_BUFFERINGP(plnr)) return 0;
+     if (!applicable0(p_, ego, plnr)) return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  const problem_dft *p = (const problem_dft *) p_;
+	  if (p->ri != p->ro) return 0;
+	  if (toobig(p->sz->dims[0].n, ego)) return 0;
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const bufadt *adt = ego->adt;
+     P *pln;
+     plan *cld = (plan *) 0;
+     plan *cldcpy = (plan *) 0;
+     plan *cldrest = (plan *) 0;
+     const problem_dft *p = (const problem_dft *) p_;
+     R *bufs = (R *) 0;
+     int nbuf = 0, bufdist, n, vl;
+     int ivs, ovs, roffset, ioffset;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     if (!applicable(p_, ego, plnr))
+          goto nada;
+
+     n = X(tensor_sz)(p->sz);
+
+     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+
+     nbuf = compute_nbuf(n, vl, ego);
+     A(nbuf > 0);
+
+     /*
+      * Determine BUFDIST, the offset between successive array bufs.
+      * bufdist = n + skew, where skew is chosen such that bufdist %
+      * skew_alignment = skew.
+      */
+     if (vl == 1) {
+          bufdist = n;
+     } else {
+          bufdist =
+               n + ((adt->skew_alignment + adt->skew - n % adt->skew_alignment)
+                    % adt->skew_alignment);
+          A(p->vecsz->rnk == 1);
+     }
+
+     /* attempt to keep real and imaginary part in the same order,
+	so as to allow optimizations in the the copy plan */
+     roffset = (p->ri - p->ii > 0) ? 1 : 0;
+     ioffset = 1 - roffset;
+
+     /* initial allocation for the purpose of planning */
+     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist * 2, BUFFERS);
+
+     cld = X(mkplan_d)(plnr,
+		       X(mkproblem_dft_d)(
+			    X(mktensor_1d)(n, p->sz->dims[0].is, 2),
+			    X(mktensor_1d)(nbuf, ivs, bufdist * 2),
+			    TAINT(p->ri, ivs * nbuf),
+			    TAINT(p->ii, ivs * nbuf),
+			    bufs + roffset, 
+			    bufs + ioffset));
+     if (!cld)
+          goto nada;
+
+     /* copying back from the buffer is a rank-0 transform: */
+     cldcpy = X(mkplan_d)(plnr,
+			  X(mkproblem_dft_d)(
+			       X(mktensor_0d)(),
+			       X(mktensor_2d)(nbuf, bufdist * 2, ovs,
+					      n, 2, p->sz->dims[0].os),
+			       bufs + roffset, 
+			       bufs + ioffset, 
+			       TAINT(p->ro, ovs * nbuf), 
+			       TAINT(p->io, ovs * nbuf)));
+     if (!cldcpy)
+          goto nada;
+
+     /* deallocate buffers, let apply() allocate them for real */
+     X(ifree)(bufs);
+     bufs = 0;
+
+     /* plan the leftover transforms (cldrest): */
+     {
+	  int id = ivs * (nbuf * (vl / nbuf));
+	  int od = ovs * (nbuf * (vl / nbuf));
+	  cldrest = X(mkplan_d)(plnr, 
+				X(mkproblem_dft_d)(
+				     X(tensor_copy)(p->sz),
+				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
+				     p->ri+id, p->ii+id, p->ro+od, p->io+od));
+     }
+     if (!cldrest)
+          goto nada;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+     pln->cld = cld;
+     pln->cldcpy = cldcpy;
+     pln->cldrest = cldrest;
+     pln->slv = ego;
+     pln->n = n;
+     pln->vl = vl;
+     pln->ivs = ivs * nbuf;
+     pln->ovs = ovs * nbuf;
+     pln->roffset = roffset;
+     pln->ioffset = ioffset;
+
+     pln->nbuf = nbuf;
+     pln->bufdist = bufdist;
+
+     {
+	  opcnt t;
+	  X(ops_add)(&cld->ops, &cldcpy->ops, &t);
+	  X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
+     }
+
+     return &(pln->super.super);
+
+ nada:
+     X(ifree0)(bufs);
+     X(plan_destroy_internal)(cldrest);
+     X(plan_destroy_internal)(cldcpy);
+     X(plan_destroy_internal)(cld);
+     return (plan *) 0;
+}
+
+static solver *mksolver(const bufadt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+
+void X(dft_buffered_register)(planner *p)
+{
+     /* FIXME: what are good defaults? */
+     static const bufadt adt = {
+	  /* nbuf */           8,
+	  /* maxbufsz */       (65536 / sizeof(R)),
+	  /* skew_alignment */ 8,
+#if HAVE_SIMD  /* 5 is odd and screws up the alignment. */
+	  /* skew */           6,
+#else
+	  /* skew */           5,
+#endif
+	  /* nam */            "dft-buffered"
+     };
+
+     REGISTER_SOLVER(p, mksolver(&adt));
+}
diff --git a/src/fftw3/dft/codelet-dft.h b/src/fftw3/dft/codelet-dft.h
new file mode 100644
index 0000000..1a5d376
--- /dev/null
+++ b/src/fftw3/dft/codelet-dft.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: codelet-dft.h,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/*
+ * This header file must include every file or define every
+ * type or macro which is required to compile a codelet.
+ */
+
+#ifndef __CODELET_H__
+#define __CODELET_H__
+
+#include "ifftw.h"
+
+/**************************************************************
+ * types of codelets
+ **************************************************************/
+
+/* DFT codelets */
+typedef struct kdft_desc_s kdft_desc;
+
+typedef struct {
+     int (*okp)(
+	  const kdft_desc *desc,
+	  const R *ri, const R *ii, const R *ro, const R *io,
+	  int is, int os, int vl, int ivs, int ovs,
+	  const planner *plnr);
+     int vl;
+} kdft_genus;
+
+struct kdft_desc_s {
+     int sz;    /* size of transform computed */
+     const char *nam;
+     opcnt ops;
+     const kdft_genus *genus;
+     int is;
+     int os;
+     int ivs;
+     int ovs;
+};
+
+typedef void (*kdft) (const R *ri, const R *ii, R *ro, R *io,
+                      stride is, stride os, int vl, int ivs, int ovs);
+void X(kdft_register)(planner *p, kdft codelet, const kdft_desc *desc);
+
+
+typedef struct ct_desc_s ct_desc;
+
+typedef struct {
+     int (*okp)(
+	  const struct ct_desc_s *desc,
+	  const R *rio, const R *iio, int ios, int vs, int m, int dist,
+	  const planner *plnr);
+     int vl;
+} ct_genus;
+
+struct ct_desc_s {
+     int radix;
+     const char *nam;
+     const tw_instr *tw;
+     opcnt ops;
+     const ct_genus *genus;
+     int s1;
+     int s2;
+     int dist;
+};
+
+typedef const R *(*kdft_dit) (R *rioarray, R *iioarray, const R *W,
+                              stride ios, int m, int dist);
+void X(kdft_dit_register)(planner *p, kdft_dit codelet, const ct_desc *desc);
+
+
+typedef const R *(*kdft_difsq) (R *rioarray, R *iioarray,
+                                const R *W, stride is, stride vs,
+                                int m, int dist);
+void X(kdft_difsq_register)(planner *p, kdft_difsq codelet,
+                            const ct_desc *desc);
+
+
+typedef const R *(*kdft_dif) (R *rioarray, R *iioarray, const R *W,
+                              stride ios, int m, int dist);
+void X(kdft_dif_register)(planner *p, kdft_dif codelet, const ct_desc *desc);
+
+extern const solvtab X(solvtab_dft_standard);
+extern const solvtab X(solvtab_dft_inplace);
+
+#if HAVE_K7
+extern const solvtab X(solvtab_dft_k7);
+#endif
+
+#if HAVE_SIMD
+extern const solvtab X(solvtab_dft_simd);
+#endif
+
+#endif				/* __CODELET_H__ */
diff --git a/src/fftw3/dft/codelets/f.h b/src/fftw3/dft/codelets/f.h
new file mode 100644
index 0000000..6a9dd0b
--- /dev/null
+++ b/src/fftw3/dft/codelets/f.h
@@ -0,0 +1 @@
+#include "t.h"  /* same stuff, no need to duplicate */
diff --git a/src/fftw3/dft/codelets/inplace/icodlist.c b/src/fftw3/dft/codelets/inplace/icodlist.c
new file mode 100644
index 0000000..708755e
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/icodlist.c
@@ -0,0 +1,20 @@
+#include "ifftw.h"
+
+extern void X(codelet_q1_2)(planner *);
+extern void X(codelet_q1_4)(planner *);
+extern void X(codelet_q1_8)(planner *);
+extern void X(codelet_q1_3)(planner *);
+extern void X(codelet_q1_5)(planner *);
+extern void X(codelet_q1_6)(planner *);
+
+
+extern const solvtab X(solvtab_dft_inplace);
+const solvtab X(solvtab_dft_inplace) = {
+   SOLVTAB(X(codelet_q1_2)),
+   SOLVTAB(X(codelet_q1_4)),
+   SOLVTAB(X(codelet_q1_8)),
+   SOLVTAB(X(codelet_q1_3)),
+   SOLVTAB(X(codelet_q1_5)),
+   SOLVTAB(X(codelet_q1_6)),
+   SOLVTAB_END
+};
diff --git a/src/fftw3/dft/codelets/inplace/q1_2.c b/src/fftw3/dft/codelets/inplace/q1_2.c
new file mode 100644
index 0000000..6057fe6
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/q1_2.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:39:14 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twidsq -compact -variables 4 -reload-twiddle -dif -n 2 -name q1_2 -include q.h */
+
+/*
+ * This function contains 12 FP additions, 8 FP multiplications,
+ * (or, 8 additions, 4 multiplications, 4 fused multiply/add),
+ * 17 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: q1_2.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_2.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_2.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ */
+
+#include "q.h"
+
+static const R *q1_2(R *rio, R *iio, const R *W, stride is, stride vs, int m, int dist)
+{
+     int i;
+     for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 2) {
+	  E T1, T2, T4, T6, T7, T8, T9, Ta, Tc, Te, Tf, Tg;
+	  T1 = rio[0];
+	  T2 = rio[WS(is, 1)];
+	  T4 = T1 - T2;
+	  T6 = iio[0];
+	  T7 = iio[WS(is, 1)];
+	  T8 = T6 - T7;
+	  T9 = rio[WS(vs, 1)];
+	  Ta = rio[WS(vs, 1) + WS(is, 1)];
+	  Tc = T9 - Ta;
+	  Te = iio[WS(vs, 1)];
+	  Tf = iio[WS(vs, 1) + WS(is, 1)];
+	  Tg = Te - Tf;
+	  rio[0] = T1 + T2;
+	  iio[0] = T6 + T7;
+	  rio[WS(is, 1)] = T9 + Ta;
+	  iio[WS(is, 1)] = Te + Tf;
+	  {
+	       E Tb, Td, T3, T5;
+	       Tb = W[0];
+	       Td = W[1];
+	       rio[WS(vs, 1) + WS(is, 1)] = FMA(Tb, Tc, Td * Tg);
+	       iio[WS(vs, 1) + WS(is, 1)] = FNMS(Td, Tc, Tb * Tg);
+	       T3 = W[0];
+	       T5 = W[1];
+	       rio[WS(vs, 1)] = FMA(T3, T4, T5 * T8);
+	       iio[WS(vs, 1)] = FNMS(T5, T4, T3 * T8);
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 2},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 2, "q1_2", twinstr, {8, 4, 4, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_q1_2) (planner *p) {
+     X(kdft_difsq_register) (p, q1_2, &desc);
+}
diff --git a/src/fftw3/dft/codelets/inplace/q1_3.c b/src/fftw3/dft/codelets/inplace/q1_3.c
new file mode 100644
index 0000000..a05996f
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/q1_3.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:39:14 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twidsq -compact -variables 4 -reload-twiddle -dif -n 3 -name q1_3 -include q.h */
+
+/*
+ * This function contains 48 FP additions, 36 FP multiplications,
+ * (or, 30 additions, 18 multiplications, 18 fused multiply/add),
+ * 35 stack variables, and 36 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: q1_3.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_3.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_3.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ */
+
+#include "q.h"
+
+static const R *q1_3(R *rio, R *iio, const R *W, stride is, stride vs, int m, int dist)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 4) {
+	  E T1, T4, T6, Tc, Td, Te, T9, Tf, Tl, To, Tq, Tw, Tx, Ty, Tt;
+	  E Tz, TR, TS, TN, TT, TF, TI, TK, TQ;
+	  {
+	       E T2, T3, Tr, Ts;
+	       T1 = rio[0];
+	       T2 = rio[WS(is, 1)];
+	       T3 = rio[WS(is, 2)];
+	       T4 = T2 + T3;
+	       T6 = FNMS(KP500000000, T4, T1);
+	       Tc = KP866025403 * (T3 - T2);
+	       {
+		    E T7, T8, Tm, Tn;
+		    Td = iio[0];
+		    T7 = iio[WS(is, 1)];
+		    T8 = iio[WS(is, 2)];
+		    Te = T7 + T8;
+		    T9 = KP866025403 * (T7 - T8);
+		    Tf = FNMS(KP500000000, Te, Td);
+		    Tl = rio[WS(vs, 1)];
+		    Tm = rio[WS(vs, 1) + WS(is, 1)];
+		    Tn = rio[WS(vs, 1) + WS(is, 2)];
+		    To = Tm + Tn;
+		    Tq = FNMS(KP500000000, To, Tl);
+		    Tw = KP866025403 * (Tn - Tm);
+	       }
+	       Tx = iio[WS(vs, 1)];
+	       Tr = iio[WS(vs, 1) + WS(is, 1)];
+	       Ts = iio[WS(vs, 1) + WS(is, 2)];
+	       Ty = Tr + Ts;
+	       Tt = KP866025403 * (Tr - Ts);
+	       Tz = FNMS(KP500000000, Ty, Tx);
+	       {
+		    E TL, TM, TG, TH;
+		    TR = iio[WS(vs, 2)];
+		    TL = iio[WS(vs, 2) + WS(is, 1)];
+		    TM = iio[WS(vs, 2) + WS(is, 2)];
+		    TS = TL + TM;
+		    TN = KP866025403 * (TL - TM);
+		    TT = FNMS(KP500000000, TS, TR);
+		    TF = rio[WS(vs, 2)];
+		    TG = rio[WS(vs, 2) + WS(is, 1)];
+		    TH = rio[WS(vs, 2) + WS(is, 2)];
+		    TI = TG + TH;
+		    TK = FNMS(KP500000000, TI, TF);
+		    TQ = KP866025403 * (TH - TG);
+	       }
+	  }
+	  rio[0] = T1 + T4;
+	  iio[0] = Td + Te;
+	  rio[WS(is, 1)] = Tl + To;
+	  iio[WS(is, 1)] = Tx + Ty;
+	  iio[WS(is, 2)] = TR + TS;
+	  rio[WS(is, 2)] = TF + TI;
+	  {
+	       E Ta, Tg, T5, Tb;
+	       Ta = T6 + T9;
+	       Tg = Tc + Tf;
+	       T5 = W[0];
+	       Tb = W[1];
+	       rio[WS(vs, 1)] = FMA(T5, Ta, Tb * Tg);
+	       iio[WS(vs, 1)] = FNMS(Tb, Ta, T5 * Tg);
+	  }
+	  {
+	       E TW, TY, TV, TX;
+	       TW = TK - TN;
+	       TY = TT - TQ;
+	       TV = W[2];
+	       TX = W[3];
+	       rio[WS(vs, 2) + WS(is, 2)] = FMA(TV, TW, TX * TY);
+	       iio[WS(vs, 2) + WS(is, 2)] = FNMS(TX, TW, TV * TY);
+	  }
+	  {
+	       E TC, TE, TB, TD;
+	       TC = Tq - Tt;
+	       TE = Tz - Tw;
+	       TB = W[2];
+	       TD = W[3];
+	       rio[WS(vs, 2) + WS(is, 1)] = FMA(TB, TC, TD * TE);
+	       iio[WS(vs, 2) + WS(is, 1)] = FNMS(TD, TC, TB * TE);
+	  }
+	  {
+	       E Tu, TA, Tp, Tv;
+	       Tu = Tq + Tt;
+	       TA = Tw + Tz;
+	       Tp = W[0];
+	       Tv = W[1];
+	       rio[WS(vs, 1) + WS(is, 1)] = FMA(Tp, Tu, Tv * TA);
+	       iio[WS(vs, 1) + WS(is, 1)] = FNMS(Tv, Tu, Tp * TA);
+	  }
+	  {
+	       E TO, TU, TJ, TP;
+	       TO = TK + TN;
+	       TU = TQ + TT;
+	       TJ = W[0];
+	       TP = W[1];
+	       rio[WS(vs, 1) + WS(is, 2)] = FMA(TJ, TO, TP * TU);
+	       iio[WS(vs, 1) + WS(is, 2)] = FNMS(TP, TO, TJ * TU);
+	  }
+	  {
+	       E Ti, Tk, Th, Tj;
+	       Ti = T6 - T9;
+	       Tk = Tf - Tc;
+	       Th = W[2];
+	       Tj = W[3];
+	       rio[WS(vs, 2)] = FMA(Th, Ti, Tj * Tk);
+	       iio[WS(vs, 2)] = FNMS(Tj, Ti, Th * Tk);
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 3},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 3, "q1_3", twinstr, {30, 18, 18, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_q1_3) (planner *p) {
+     X(kdft_difsq_register) (p, q1_3, &desc);
+}
diff --git a/src/fftw3/dft/codelets/inplace/q1_4.c b/src/fftw3/dft/codelets/inplace/q1_4.c
new file mode 100644
index 0000000..c4300f5
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/q1_4.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:39:14 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twidsq -compact -variables 4 -reload-twiddle -dif -n 4 -name q1_4 -include q.h */
+
+/*
+ * This function contains 88 FP additions, 48 FP multiplications,
+ * (or, 64 additions, 24 multiplications, 24 fused multiply/add),
+ * 37 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: q1_4.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_4.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_4.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ */
+
+#include "q.h"
+
+static const R *q1_4(R *rio, R *iio, const R *W, stride is, stride vs, int m, int dist)
+{
+     int i;
+     for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 6) {
+	  E T3, Te, Tb, Tq, T6, T8, Th, Tr, Tv, TG, TD, TS, Ty, TA, TJ;
+	  E TT, TX, T18, T15, T1k, T10, T12, T1b, T1l, T1p, T1A, T1x, T1M, T1s, T1u;
+	  E T1D, T1N;
+	  {
+	       E T1, T2, T9, Ta;
+	       T1 = rio[0];
+	       T2 = rio[WS(is, 2)];
+	       T3 = T1 + T2;
+	       Te = T1 - T2;
+	       T9 = iio[0];
+	       Ta = iio[WS(is, 2)];
+	       Tb = T9 - Ta;
+	       Tq = T9 + Ta;
+	  }
+	  {
+	       E T4, T5, Tf, Tg;
+	       T4 = rio[WS(is, 1)];
+	       T5 = rio[WS(is, 3)];
+	       T6 = T4 + T5;
+	       T8 = T4 - T5;
+	       Tf = iio[WS(is, 1)];
+	       Tg = iio[WS(is, 3)];
+	       Th = Tf - Tg;
+	       Tr = Tf + Tg;
+	  }
+	  {
+	       E Tt, Tu, TB, TC;
+	       Tt = rio[WS(vs, 1)];
+	       Tu = rio[WS(vs, 1) + WS(is, 2)];
+	       Tv = Tt + Tu;
+	       TG = Tt - Tu;
+	       TB = iio[WS(vs, 1)];
+	       TC = iio[WS(vs, 1) + WS(is, 2)];
+	       TD = TB - TC;
+	       TS = TB + TC;
+	  }
+	  {
+	       E Tw, Tx, TH, TI;
+	       Tw = rio[WS(vs, 1) + WS(is, 1)];
+	       Tx = rio[WS(vs, 1) + WS(is, 3)];
+	       Ty = Tw + Tx;
+	       TA = Tw - Tx;
+	       TH = iio[WS(vs, 1) + WS(is, 1)];
+	       TI = iio[WS(vs, 1) + WS(is, 3)];
+	       TJ = TH - TI;
+	       TT = TH + TI;
+	  }
+	  {
+	       E TV, TW, T13, T14;
+	       TV = rio[WS(vs, 2)];
+	       TW = rio[WS(vs, 2) + WS(is, 2)];
+	       TX = TV + TW;
+	       T18 = TV - TW;
+	       T13 = iio[WS(vs, 2)];
+	       T14 = iio[WS(vs, 2) + WS(is, 2)];
+	       T15 = T13 - T14;
+	       T1k = T13 + T14;
+	  }
+	  {
+	       E TY, TZ, T19, T1a;
+	       TY = rio[WS(vs, 2) + WS(is, 1)];
+	       TZ = rio[WS(vs, 2) + WS(is, 3)];
+	       T10 = TY + TZ;
+	       T12 = TY - TZ;
+	       T19 = iio[WS(vs, 2) + WS(is, 1)];
+	       T1a = iio[WS(vs, 2) + WS(is, 3)];
+	       T1b = T19 - T1a;
+	       T1l = T19 + T1a;
+	  }
+	  {
+	       E T1n, T1o, T1v, T1w;
+	       T1n = rio[WS(vs, 3)];
+	       T1o = rio[WS(vs, 3) + WS(is, 2)];
+	       T1p = T1n + T1o;
+	       T1A = T1n - T1o;
+	       T1v = iio[WS(vs, 3)];
+	       T1w = iio[WS(vs, 3) + WS(is, 2)];
+	       T1x = T1v - T1w;
+	       T1M = T1v + T1w;
+	  }
+	  {
+	       E T1q, T1r, T1B, T1C;
+	       T1q = rio[WS(vs, 3) + WS(is, 1)];
+	       T1r = rio[WS(vs, 3) + WS(is, 3)];
+	       T1s = T1q + T1r;
+	       T1u = T1q - T1r;
+	       T1B = iio[WS(vs, 3) + WS(is, 1)];
+	       T1C = iio[WS(vs, 3) + WS(is, 3)];
+	       T1D = T1B - T1C;
+	       T1N = T1B + T1C;
+	  }
+	  rio[0] = T3 + T6;
+	  iio[0] = Tq + Tr;
+	  rio[WS(is, 1)] = Tv + Ty;
+	  iio[WS(is, 1)] = TS + TT;
+	  rio[WS(is, 2)] = TX + T10;
+	  iio[WS(is, 2)] = T1k + T1l;
+	  iio[WS(is, 3)] = T1M + T1N;
+	  rio[WS(is, 3)] = T1p + T1s;
+	  {
+	       E Tc, Ti, T7, Td;
+	       Tc = T8 + Tb;
+	       Ti = Te - Th;
+	       T7 = W[4];
+	       Td = W[5];
+	       iio[WS(vs, 3)] = FNMS(Td, Ti, T7 * Tc);
+	       rio[WS(vs, 3)] = FMA(Td, Tc, T7 * Ti);
+	  }
+	  {
+	       E T1K, T1O, T1J, T1L;
+	       T1K = T1p - T1s;
+	       T1O = T1M - T1N;
+	       T1J = W[2];
+	       T1L = W[3];
+	       rio[WS(vs, 2) + WS(is, 3)] = FMA(T1J, T1K, T1L * T1O);
+	       iio[WS(vs, 2) + WS(is, 3)] = FNMS(T1L, T1K, T1J * T1O);
+	  }
+	  {
+	       E Tk, Tm, Tj, Tl;
+	       Tk = Tb - T8;
+	       Tm = Te + Th;
+	       Tj = W[0];
+	       Tl = W[1];
+	       iio[WS(vs, 1)] = FNMS(Tl, Tm, Tj * Tk);
+	       rio[WS(vs, 1)] = FMA(Tl, Tk, Tj * Tm);
+	  }
+	  {
+	       E To, Ts, Tn, Tp;
+	       To = T3 - T6;
+	       Ts = Tq - Tr;
+	       Tn = W[2];
+	       Tp = W[3];
+	       rio[WS(vs, 2)] = FMA(Tn, To, Tp * Ts);
+	       iio[WS(vs, 2)] = FNMS(Tp, To, Tn * Ts);
+	  }
+	  {
+	       E T16, T1c, T11, T17;
+	       T16 = T12 + T15;
+	       T1c = T18 - T1b;
+	       T11 = W[4];
+	       T17 = W[5];
+	       iio[WS(vs, 3) + WS(is, 2)] = FNMS(T17, T1c, T11 * T16);
+	       rio[WS(vs, 3) + WS(is, 2)] = FMA(T17, T16, T11 * T1c);
+	  }
+	  {
+	       E T1G, T1I, T1F, T1H;
+	       T1G = T1x - T1u;
+	       T1I = T1A + T1D;
+	       T1F = W[0];
+	       T1H = W[1];
+	       iio[WS(vs, 1) + WS(is, 3)] = FNMS(T1H, T1I, T1F * T1G);
+	       rio[WS(vs, 1) + WS(is, 3)] = FMA(T1H, T1G, T1F * T1I);
+	  }
+	  {
+	       E TQ, TU, TP, TR;
+	       TQ = Tv - Ty;
+	       TU = TS - TT;
+	       TP = W[2];
+	       TR = W[3];
+	       rio[WS(vs, 2) + WS(is, 1)] = FMA(TP, TQ, TR * TU);
+	       iio[WS(vs, 2) + WS(is, 1)] = FNMS(TR, TQ, TP * TU);
+	  }
+	  {
+	       E T1e, T1g, T1d, T1f;
+	       T1e = T15 - T12;
+	       T1g = T18 + T1b;
+	       T1d = W[0];
+	       T1f = W[1];
+	       iio[WS(vs, 1) + WS(is, 2)] = FNMS(T1f, T1g, T1d * T1e);
+	       rio[WS(vs, 1) + WS(is, 2)] = FMA(T1f, T1e, T1d * T1g);
+	  }
+	  {
+	       E T1i, T1m, T1h, T1j;
+	       T1i = TX - T10;
+	       T1m = T1k - T1l;
+	       T1h = W[2];
+	       T1j = W[3];
+	       rio[WS(vs, 2) + WS(is, 2)] = FMA(T1h, T1i, T1j * T1m);
+	       iio[WS(vs, 2) + WS(is, 2)] = FNMS(T1j, T1i, T1h * T1m);
+	  }
+	  {
+	       E T1y, T1E, T1t, T1z;
+	       T1y = T1u + T1x;
+	       T1E = T1A - T1D;
+	       T1t = W[4];
+	       T1z = W[5];
+	       iio[WS(vs, 3) + WS(is, 3)] = FNMS(T1z, T1E, T1t * T1y);
+	       rio[WS(vs, 3) + WS(is, 3)] = FMA(T1z, T1y, T1t * T1E);
+	  }
+	  {
+	       E TM, TO, TL, TN;
+	       TM = TD - TA;
+	       TO = TG + TJ;
+	       TL = W[0];
+	       TN = W[1];
+	       iio[WS(vs, 1) + WS(is, 1)] = FNMS(TN, TO, TL * TM);
+	       rio[WS(vs, 1) + WS(is, 1)] = FMA(TN, TM, TL * TO);
+	  }
+	  {
+	       E TE, TK, Tz, TF;
+	       TE = TA + TD;
+	       TK = TG - TJ;
+	       Tz = W[4];
+	       TF = W[5];
+	       iio[WS(vs, 3) + WS(is, 1)] = FNMS(TF, TK, Tz * TE);
+	       rio[WS(vs, 3) + WS(is, 1)] = FMA(TF, TE, Tz * TK);
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 4},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 4, "q1_4", twinstr, {64, 24, 24, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_q1_4) (planner *p) {
+     X(kdft_difsq_register) (p, q1_4, &desc);
+}
diff --git a/src/fftw3/dft/codelets/inplace/q1_5.c b/src/fftw3/dft/codelets/inplace/q1_5.c
new file mode 100644
index 0000000..144dcb8
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/q1_5.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:39:14 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twidsq -compact -variables 4 -reload-twiddle -dif -n 5 -name q1_5 -include q.h */
+
+/*
+ * This function contains 200 FP additions, 140 FP multiplications,
+ * (or, 130 additions, 70 multiplications, 70 fused multiply/add),
+ * 75 stack variables, and 100 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: q1_5.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_5.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_5.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ */
+
+#include "q.h"
+
+static const R *q1_5(R *rio, R *iio, const R *W, stride is, stride vs, int m, int dist)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 8) {
+	  E T1, Ta, TG, Tv, T8, Tb, Tp, Tj, TD, To, Tq, Tr, TN, TW, T1s;
+	  E T1h, TU, TX, T1b, T15, T1p, T1a, T1c, T1d, T1z, T1I, T2e, T23, T1G, T1J;
+	  E T1X, T1R, T2b, T1W, T1Y, T1Z, T3v, T3p, T3J, T3u, T3w, T3x, T37, T3g, T3M;
+	  E T3B, T3e, T3h, T2l, T2u, T30, T2P, T2s, T2v, T2J, T2D, T2X, T2I, T2K, T2L;
+	  {
+	       E T7, Tu, T4, Tt;
+	       T1 = rio[0];
+	       {
+		    E T5, T6, T2, T3;
+		    T5 = rio[WS(is, 2)];
+		    T6 = rio[WS(is, 3)];
+		    T7 = T5 + T6;
+		    Tu = T5 - T6;
+		    T2 = rio[WS(is, 1)];
+		    T3 = rio[WS(is, 4)];
+		    T4 = T2 + T3;
+		    Tt = T2 - T3;
+	       }
+	       Ta = KP559016994 * (T4 - T7);
+	       TG = FNMS(KP587785252, Tt, KP951056516 * Tu);
+	       Tv = FMA(KP951056516, Tt, KP587785252 * Tu);
+	       T8 = T4 + T7;
+	       Tb = FNMS(KP250000000, T8, T1);
+	  }
+	  {
+	       E Ti, Tn, Tf, Tm;
+	       Tp = iio[0];
+	       {
+		    E Tg, Th, Td, Te;
+		    Tg = iio[WS(is, 2)];
+		    Th = iio[WS(is, 3)];
+		    Ti = Tg - Th;
+		    Tn = Tg + Th;
+		    Td = iio[WS(is, 1)];
+		    Te = iio[WS(is, 4)];
+		    Tf = Td - Te;
+		    Tm = Td + Te;
+	       }
+	       Tj = FMA(KP951056516, Tf, KP587785252 * Ti);
+	       TD = FNMS(KP587785252, Tf, KP951056516 * Ti);
+	       To = KP559016994 * (Tm - Tn);
+	       Tq = Tm + Tn;
+	       Tr = FNMS(KP250000000, Tq, Tp);
+	  }
+	  {
+	       E TT, T1g, TQ, T1f;
+	       TN = rio[WS(vs, 1)];
+	       {
+		    E TR, TS, TO, TP;
+		    TR = rio[WS(vs, 1) + WS(is, 2)];
+		    TS = rio[WS(vs, 1) + WS(is, 3)];
+		    TT = TR + TS;
+		    T1g = TR - TS;
+		    TO = rio[WS(vs, 1) + WS(is, 1)];
+		    TP = rio[WS(vs, 1) + WS(is, 4)];
+		    TQ = TO + TP;
+		    T1f = TO - TP;
+	       }
+	       TW = KP559016994 * (TQ - TT);
+	       T1s = FNMS(KP587785252, T1f, KP951056516 * T1g);
+	       T1h = FMA(KP951056516, T1f, KP587785252 * T1g);
+	       TU = TQ + TT;
+	       TX = FNMS(KP250000000, TU, TN);
+	  }
+	  {
+	       E T14, T19, T11, T18;
+	       T1b = iio[WS(vs, 1)];
+	       {
+		    E T12, T13, TZ, T10;
+		    T12 = iio[WS(vs, 1) + WS(is, 2)];
+		    T13 = iio[WS(vs, 1) + WS(is, 3)];
+		    T14 = T12 - T13;
+		    T19 = T12 + T13;
+		    TZ = iio[WS(vs, 1) + WS(is, 1)];
+		    T10 = iio[WS(vs, 1) + WS(is, 4)];
+		    T11 = TZ - T10;
+		    T18 = TZ + T10;
+	       }
+	       T15 = FMA(KP951056516, T11, KP587785252 * T14);
+	       T1p = FNMS(KP587785252, T11, KP951056516 * T14);
+	       T1a = KP559016994 * (T18 - T19);
+	       T1c = T18 + T19;
+	       T1d = FNMS(KP250000000, T1c, T1b);
+	  }
+	  {
+	       E T1F, T22, T1C, T21;
+	       T1z = rio[WS(vs, 2)];
+	       {
+		    E T1D, T1E, T1A, T1B;
+		    T1D = rio[WS(vs, 2) + WS(is, 2)];
+		    T1E = rio[WS(vs, 2) + WS(is, 3)];
+		    T1F = T1D + T1E;
+		    T22 = T1D - T1E;
+		    T1A = rio[WS(vs, 2) + WS(is, 1)];
+		    T1B = rio[WS(vs, 2) + WS(is, 4)];
+		    T1C = T1A + T1B;
+		    T21 = T1A - T1B;
+	       }
+	       T1I = KP559016994 * (T1C - T1F);
+	       T2e = FNMS(KP587785252, T21, KP951056516 * T22);
+	       T23 = FMA(KP951056516, T21, KP587785252 * T22);
+	       T1G = T1C + T1F;
+	       T1J = FNMS(KP250000000, T1G, T1z);
+	  }
+	  {
+	       E T1Q, T1V, T1N, T1U;
+	       T1X = iio[WS(vs, 2)];
+	       {
+		    E T1O, T1P, T1L, T1M;
+		    T1O = iio[WS(vs, 2) + WS(is, 2)];
+		    T1P = iio[WS(vs, 2) + WS(is, 3)];
+		    T1Q = T1O - T1P;
+		    T1V = T1O + T1P;
+		    T1L = iio[WS(vs, 2) + WS(is, 1)];
+		    T1M = iio[WS(vs, 2) + WS(is, 4)];
+		    T1N = T1L - T1M;
+		    T1U = T1L + T1M;
+	       }
+	       T1R = FMA(KP951056516, T1N, KP587785252 * T1Q);
+	       T2b = FNMS(KP587785252, T1N, KP951056516 * T1Q);
+	       T1W = KP559016994 * (T1U - T1V);
+	       T1Y = T1U + T1V;
+	       T1Z = FNMS(KP250000000, T1Y, T1X);
+	  }
+	  {
+	       E T3o, T3t, T3l, T3s;
+	       T3v = iio[WS(vs, 4)];
+	       {
+		    E T3m, T3n, T3j, T3k;
+		    T3m = iio[WS(vs, 4) + WS(is, 2)];
+		    T3n = iio[WS(vs, 4) + WS(is, 3)];
+		    T3o = T3m - T3n;
+		    T3t = T3m + T3n;
+		    T3j = iio[WS(vs, 4) + WS(is, 1)];
+		    T3k = iio[WS(vs, 4) + WS(is, 4)];
+		    T3l = T3j - T3k;
+		    T3s = T3j + T3k;
+	       }
+	       T3p = FMA(KP951056516, T3l, KP587785252 * T3o);
+	       T3J = FNMS(KP587785252, T3l, KP951056516 * T3o);
+	       T3u = KP559016994 * (T3s - T3t);
+	       T3w = T3s + T3t;
+	       T3x = FNMS(KP250000000, T3w, T3v);
+	  }
+	  {
+	       E T3d, T3A, T3a, T3z;
+	       T37 = rio[WS(vs, 4)];
+	       {
+		    E T3b, T3c, T38, T39;
+		    T3b = rio[WS(vs, 4) + WS(is, 2)];
+		    T3c = rio[WS(vs, 4) + WS(is, 3)];
+		    T3d = T3b + T3c;
+		    T3A = T3b - T3c;
+		    T38 = rio[WS(vs, 4) + WS(is, 1)];
+		    T39 = rio[WS(vs, 4) + WS(is, 4)];
+		    T3a = T38 + T39;
+		    T3z = T38 - T39;
+	       }
+	       T3g = KP559016994 * (T3a - T3d);
+	       T3M = FNMS(KP587785252, T3z, KP951056516 * T3A);
+	       T3B = FMA(KP951056516, T3z, KP587785252 * T3A);
+	       T3e = T3a + T3d;
+	       T3h = FNMS(KP250000000, T3e, T37);
+	  }
+	  {
+	       E T2r, T2O, T2o, T2N;
+	       T2l = rio[WS(vs, 3)];
+	       {
+		    E T2p, T2q, T2m, T2n;
+		    T2p = rio[WS(vs, 3) + WS(is, 2)];
+		    T2q = rio[WS(vs, 3) + WS(is, 3)];
+		    T2r = T2p + T2q;
+		    T2O = T2p - T2q;
+		    T2m = rio[WS(vs, 3) + WS(is, 1)];
+		    T2n = rio[WS(vs, 3) + WS(is, 4)];
+		    T2o = T2m + T2n;
+		    T2N = T2m - T2n;
+	       }
+	       T2u = KP559016994 * (T2o - T2r);
+	       T30 = FNMS(KP587785252, T2N, KP951056516 * T2O);
+	       T2P = FMA(KP951056516, T2N, KP587785252 * T2O);
+	       T2s = T2o + T2r;
+	       T2v = FNMS(KP250000000, T2s, T2l);
+	  }
+	  {
+	       E T2C, T2H, T2z, T2G;
+	       T2J = iio[WS(vs, 3)];
+	       {
+		    E T2A, T2B, T2x, T2y;
+		    T2A = iio[WS(vs, 3) + WS(is, 2)];
+		    T2B = iio[WS(vs, 3) + WS(is, 3)];
+		    T2C = T2A - T2B;
+		    T2H = T2A + T2B;
+		    T2x = iio[WS(vs, 3) + WS(is, 1)];
+		    T2y = iio[WS(vs, 3) + WS(is, 4)];
+		    T2z = T2x - T2y;
+		    T2G = T2x + T2y;
+	       }
+	       T2D = FMA(KP951056516, T2z, KP587785252 * T2C);
+	       T2X = FNMS(KP587785252, T2z, KP951056516 * T2C);
+	       T2I = KP559016994 * (T2G - T2H);
+	       T2K = T2G + T2H;
+	       T2L = FNMS(KP250000000, T2K, T2J);
+	  }
+	  rio[0] = T1 + T8;
+	  iio[0] = Tp + Tq;
+	  rio[WS(is, 1)] = TN + TU;
+	  iio[WS(is, 1)] = T1b + T1c;
+	  rio[WS(is, 2)] = T1z + T1G;
+	  iio[WS(is, 2)] = T1X + T1Y;
+	  iio[WS(is, 4)] = T3v + T3w;
+	  rio[WS(is, 4)] = T37 + T3e;
+	  rio[WS(is, 3)] = T2l + T2s;
+	  iio[WS(is, 3)] = T2J + T2K;
+	  {
+	       E Tk, Ty, Tw, TA, Tc, Ts;
+	       Tc = Ta + Tb;
+	       Tk = Tc + Tj;
+	       Ty = Tc - Tj;
+	       Ts = To + Tr;
+	       Tw = Ts - Tv;
+	       TA = Tv + Ts;
+	       {
+		    E T9, Tl, Tx, Tz;
+		    T9 = W[0];
+		    Tl = W[1];
+		    rio[WS(vs, 1)] = FMA(T9, Tk, Tl * Tw);
+		    iio[WS(vs, 1)] = FNMS(Tl, Tk, T9 * Tw);
+		    Tx = W[6];
+		    Tz = W[7];
+		    rio[WS(vs, 4)] = FMA(Tx, Ty, Tz * TA);
+		    iio[WS(vs, 4)] = FNMS(Tz, Ty, Tx * TA);
+	       }
+	  }
+	  {
+	       E TE, TK, TI, TM, TC, TH;
+	       TC = Tb - Ta;
+	       TE = TC - TD;
+	       TK = TC + TD;
+	       TH = Tr - To;
+	       TI = TG + TH;
+	       TM = TH - TG;
+	       {
+		    E TB, TF, TJ, TL;
+		    TB = W[2];
+		    TF = W[3];
+		    rio[WS(vs, 2)] = FMA(TB, TE, TF * TI);
+		    iio[WS(vs, 2)] = FNMS(TF, TE, TB * TI);
+		    TJ = W[4];
+		    TL = W[5];
+		    rio[WS(vs, 3)] = FMA(TJ, TK, TL * TM);
+		    iio[WS(vs, 3)] = FNMS(TL, TK, TJ * TM);
+	       }
+	  }
+	  {
+	       E T2c, T2i, T2g, T2k, T2a, T2f;
+	       T2a = T1J - T1I;
+	       T2c = T2a - T2b;
+	       T2i = T2a + T2b;
+	       T2f = T1Z - T1W;
+	       T2g = T2e + T2f;
+	       T2k = T2f - T2e;
+	       {
+		    E T29, T2d, T2h, T2j;
+		    T29 = W[2];
+		    T2d = W[3];
+		    rio[WS(vs, 2) + WS(is, 2)] = FMA(T29, T2c, T2d * T2g);
+		    iio[WS(vs, 2) + WS(is, 2)] = FNMS(T2d, T2c, T29 * T2g);
+		    T2h = W[4];
+		    T2j = W[5];
+		    rio[WS(vs, 3) + WS(is, 2)] = FMA(T2h, T2i, T2j * T2k);
+		    iio[WS(vs, 3) + WS(is, 2)] = FNMS(T2j, T2i, T2h * T2k);
+	       }
+	  }
+	  {
+	       E T3K, T3Q, T3O, T3S, T3I, T3N;
+	       T3I = T3h - T3g;
+	       T3K = T3I - T3J;
+	       T3Q = T3I + T3J;
+	       T3N = T3x - T3u;
+	       T3O = T3M + T3N;
+	       T3S = T3N - T3M;
+	       {
+		    E T3H, T3L, T3P, T3R;
+		    T3H = W[2];
+		    T3L = W[3];
+		    rio[WS(vs, 2) + WS(is, 4)] = FMA(T3H, T3K, T3L * T3O);
+		    iio[WS(vs, 2) + WS(is, 4)] = FNMS(T3L, T3K, T3H * T3O);
+		    T3P = W[4];
+		    T3R = W[5];
+		    rio[WS(vs, 3) + WS(is, 4)] = FMA(T3P, T3Q, T3R * T3S);
+		    iio[WS(vs, 3) + WS(is, 4)] = FNMS(T3R, T3Q, T3P * T3S);
+	       }
+	  }
+	  {
+	       E T1S, T26, T24, T28, T1K, T20;
+	       T1K = T1I + T1J;
+	       T1S = T1K + T1R;
+	       T26 = T1K - T1R;
+	       T20 = T1W + T1Z;
+	       T24 = T20 - T23;
+	       T28 = T23 + T20;
+	       {
+		    E T1H, T1T, T25, T27;
+		    T1H = W[0];
+		    T1T = W[1];
+		    rio[WS(vs, 1) + WS(is, 2)] = FMA(T1H, T1S, T1T * T24);
+		    iio[WS(vs, 1) + WS(is, 2)] = FNMS(T1T, T1S, T1H * T24);
+		    T25 = W[6];
+		    T27 = W[7];
+		    rio[WS(vs, 4) + WS(is, 2)] = FMA(T25, T26, T27 * T28);
+		    iio[WS(vs, 4) + WS(is, 2)] = FNMS(T27, T26, T25 * T28);
+	       }
+	  }
+	  {
+	       E T2E, T2S, T2Q, T2U, T2w, T2M;
+	       T2w = T2u + T2v;
+	       T2E = T2w + T2D;
+	       T2S = T2w - T2D;
+	       T2M = T2I + T2L;
+	       T2Q = T2M - T2P;
+	       T2U = T2P + T2M;
+	       {
+		    E T2t, T2F, T2R, T2T;
+		    T2t = W[0];
+		    T2F = W[1];
+		    rio[WS(vs, 1) + WS(is, 3)] = FMA(T2t, T2E, T2F * T2Q);
+		    iio[WS(vs, 1) + WS(is, 3)] = FNMS(T2F, T2E, T2t * T2Q);
+		    T2R = W[6];
+		    T2T = W[7];
+		    rio[WS(vs, 4) + WS(is, 3)] = FMA(T2R, T2S, T2T * T2U);
+		    iio[WS(vs, 4) + WS(is, 3)] = FNMS(T2T, T2S, T2R * T2U);
+	       }
+	  }
+	  {
+	       E T2Y, T34, T32, T36, T2W, T31;
+	       T2W = T2v - T2u;
+	       T2Y = T2W - T2X;
+	       T34 = T2W + T2X;
+	       T31 = T2L - T2I;
+	       T32 = T30 + T31;
+	       T36 = T31 - T30;
+	       {
+		    E T2V, T2Z, T33, T35;
+		    T2V = W[2];
+		    T2Z = W[3];
+		    rio[WS(vs, 2) + WS(is, 3)] = FMA(T2V, T2Y, T2Z * T32);
+		    iio[WS(vs, 2) + WS(is, 3)] = FNMS(T2Z, T2Y, T2V * T32);
+		    T33 = W[4];
+		    T35 = W[5];
+		    rio[WS(vs, 3) + WS(is, 3)] = FMA(T33, T34, T35 * T36);
+		    iio[WS(vs, 3) + WS(is, 3)] = FNMS(T35, T34, T33 * T36);
+	       }
+	  }
+	  {
+	       E T3q, T3E, T3C, T3G, T3i, T3y;
+	       T3i = T3g + T3h;
+	       T3q = T3i + T3p;
+	       T3E = T3i - T3p;
+	       T3y = T3u + T3x;
+	       T3C = T3y - T3B;
+	       T3G = T3B + T3y;
+	       {
+		    E T3f, T3r, T3D, T3F;
+		    T3f = W[0];
+		    T3r = W[1];
+		    rio[WS(vs, 1) + WS(is, 4)] = FMA(T3f, T3q, T3r * T3C);
+		    iio[WS(vs, 1) + WS(is, 4)] = FNMS(T3r, T3q, T3f * T3C);
+		    T3D = W[6];
+		    T3F = W[7];
+		    rio[WS(vs, 4) + WS(is, 4)] = FMA(T3D, T3E, T3F * T3G);
+		    iio[WS(vs, 4) + WS(is, 4)] = FNMS(T3F, T3E, T3D * T3G);
+	       }
+	  }
+	  {
+	       E T1q, T1w, T1u, T1y, T1o, T1t;
+	       T1o = TX - TW;
+	       T1q = T1o - T1p;
+	       T1w = T1o + T1p;
+	       T1t = T1d - T1a;
+	       T1u = T1s + T1t;
+	       T1y = T1t - T1s;
+	       {
+		    E T1n, T1r, T1v, T1x;
+		    T1n = W[2];
+		    T1r = W[3];
+		    rio[WS(vs, 2) + WS(is, 1)] = FMA(T1n, T1q, T1r * T1u);
+		    iio[WS(vs, 2) + WS(is, 1)] = FNMS(T1r, T1q, T1n * T1u);
+		    T1v = W[4];
+		    T1x = W[5];
+		    rio[WS(vs, 3) + WS(is, 1)] = FMA(T1v, T1w, T1x * T1y);
+		    iio[WS(vs, 3) + WS(is, 1)] = FNMS(T1x, T1w, T1v * T1y);
+	       }
+	  }
+	  {
+	       E T16, T1k, T1i, T1m, TY, T1e;
+	       TY = TW + TX;
+	       T16 = TY + T15;
+	       T1k = TY - T15;
+	       T1e = T1a + T1d;
+	       T1i = T1e - T1h;
+	       T1m = T1h + T1e;
+	       {
+		    E TV, T17, T1j, T1l;
+		    TV = W[0];
+		    T17 = W[1];
+		    rio[WS(vs, 1) + WS(is, 1)] = FMA(TV, T16, T17 * T1i);
+		    iio[WS(vs, 1) + WS(is, 1)] = FNMS(T17, T16, TV * T1i);
+		    T1j = W[6];
+		    T1l = W[7];
+		    rio[WS(vs, 4) + WS(is, 1)] = FMA(T1j, T1k, T1l * T1m);
+		    iio[WS(vs, 4) + WS(is, 1)] = FNMS(T1l, T1k, T1j * T1m);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 5},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 5, "q1_5", twinstr, {130, 70, 70, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_q1_5) (planner *p) {
+     X(kdft_difsq_register) (p, q1_5, &desc);
+}
diff --git a/src/fftw3/dft/codelets/inplace/q1_6.c b/src/fftw3/dft/codelets/inplace/q1_6.c
new file mode 100644
index 0000000..82bdac3
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/q1_6.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:39:14 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twidsq -compact -variables 4 -reload-twiddle -dif -n 6 -name q1_6 -include q.h */
+
+/*
+ * This function contains 276 FP additions, 168 FP multiplications,
+ * (or, 192 additions, 84 multiplications, 84 fused multiply/add),
+ * 85 stack variables, and 144 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: q1_6.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_6.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_6.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ */
+
+#include "q.h"
+
+static const R *q1_6(R *rio, R *iio, const R *W, stride is, stride vs, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 10) {
+	  E T3, Tc, Tt, TM, TX, T16, T1n, T1G, T2h, T2A, T1R, T20, T2L, T2U, T3b;
+	  E T3u, T3F, T3O, T45, T4o, T4Z, T5i, T4z, T4I, Ta, TP, Tf, Tq, Tn, TN;
+	  E Tu, TJ, T14, T1J, T19, T1k, T1h, T1H, T1o, T1D, T2b, T2B, T2i, T2x, T1Y;
+	  E T2D, T23, T2e, T2S, T3x, T2X, T38, T35, T3v, T3c, T3r, T3M, T4r, T3R, T42;
+	  E T3Z, T4p, T46, T4l, T4T, T5j, T50, T5f, T4G, T5l, T4L, T4W;
+	  {
+	       E T1, T2, T1l, T1m;
+	       T1 = rio[0];
+	       T2 = rio[WS(is, 3)];
+	       T3 = T1 + T2;
+	       Tc = T1 - T2;
+	       {
+		    E Tr, Ts, TV, TW;
+		    Tr = iio[0];
+		    Ts = iio[WS(is, 3)];
+		    Tt = Tr - Ts;
+		    TM = Tr + Ts;
+		    TV = rio[WS(vs, 1)];
+		    TW = rio[WS(vs, 1) + WS(is, 3)];
+		    TX = TV + TW;
+		    T16 = TV - TW;
+	       }
+	       T1l = iio[WS(vs, 1)];
+	       T1m = iio[WS(vs, 1) + WS(is, 3)];
+	       T1n = T1l - T1m;
+	       T1G = T1l + T1m;
+	       {
+		    E T2f, T2g, T1P, T1Q;
+		    T2f = iio[WS(vs, 2)];
+		    T2g = iio[WS(vs, 2) + WS(is, 3)];
+		    T2h = T2f - T2g;
+		    T2A = T2f + T2g;
+		    T1P = rio[WS(vs, 2)];
+		    T1Q = rio[WS(vs, 2) + WS(is, 3)];
+		    T1R = T1P + T1Q;
+		    T20 = T1P - T1Q;
+	       }
+	  }
+	  {
+	       E T2J, T2K, T43, T44;
+	       T2J = rio[WS(vs, 3)];
+	       T2K = rio[WS(vs, 3) + WS(is, 3)];
+	       T2L = T2J + T2K;
+	       T2U = T2J - T2K;
+	       {
+		    E T39, T3a, T3D, T3E;
+		    T39 = iio[WS(vs, 3)];
+		    T3a = iio[WS(vs, 3) + WS(is, 3)];
+		    T3b = T39 - T3a;
+		    T3u = T39 + T3a;
+		    T3D = rio[WS(vs, 4)];
+		    T3E = rio[WS(vs, 4) + WS(is, 3)];
+		    T3F = T3D + T3E;
+		    T3O = T3D - T3E;
+	       }
+	       T43 = iio[WS(vs, 4)];
+	       T44 = iio[WS(vs, 4) + WS(is, 3)];
+	       T45 = T43 - T44;
+	       T4o = T43 + T44;
+	       {
+		    E T4X, T4Y, T4x, T4y;
+		    T4X = iio[WS(vs, 5)];
+		    T4Y = iio[WS(vs, 5) + WS(is, 3)];
+		    T4Z = T4X - T4Y;
+		    T5i = T4X + T4Y;
+		    T4x = rio[WS(vs, 5)];
+		    T4y = rio[WS(vs, 5) + WS(is, 3)];
+		    T4z = T4x + T4y;
+		    T4I = T4x - T4y;
+	       }
+	  }
+	  {
+	       E T6, Td, T9, Te;
+	       {
+		    E T4, T5, T7, T8;
+		    T4 = rio[WS(is, 2)];
+		    T5 = rio[WS(is, 5)];
+		    T6 = T4 + T5;
+		    Td = T4 - T5;
+		    T7 = rio[WS(is, 4)];
+		    T8 = rio[WS(is, 1)];
+		    T9 = T7 + T8;
+		    Te = T7 - T8;
+	       }
+	       Ta = T6 + T9;
+	       TP = KP866025403 * (T9 - T6);
+	       Tf = Td + Te;
+	       Tq = KP866025403 * (Te - Td);
+	  }
+	  {
+	       E Tj, TH, Tm, TI;
+	       {
+		    E Th, Ti, Tk, Tl;
+		    Th = iio[WS(is, 2)];
+		    Ti = iio[WS(is, 5)];
+		    Tj = Th - Ti;
+		    TH = Th + Ti;
+		    Tk = iio[WS(is, 4)];
+		    Tl = iio[WS(is, 1)];
+		    Tm = Tk - Tl;
+		    TI = Tk + Tl;
+	       }
+	       Tn = KP866025403 * (Tj - Tm);
+	       TN = TH + TI;
+	       Tu = Tj + Tm;
+	       TJ = KP866025403 * (TH - TI);
+	  }
+	  {
+	       E T10, T17, T13, T18;
+	       {
+		    E TY, TZ, T11, T12;
+		    TY = rio[WS(vs, 1) + WS(is, 2)];
+		    TZ = rio[WS(vs, 1) + WS(is, 5)];
+		    T10 = TY + TZ;
+		    T17 = TY - TZ;
+		    T11 = rio[WS(vs, 1) + WS(is, 4)];
+		    T12 = rio[WS(vs, 1) + WS(is, 1)];
+		    T13 = T11 + T12;
+		    T18 = T11 - T12;
+	       }
+	       T14 = T10 + T13;
+	       T1J = KP866025403 * (T13 - T10);
+	       T19 = T17 + T18;
+	       T1k = KP866025403 * (T18 - T17);
+	  }
+	  {
+	       E T1d, T1B, T1g, T1C;
+	       {
+		    E T1b, T1c, T1e, T1f;
+		    T1b = iio[WS(vs, 1) + WS(is, 2)];
+		    T1c = iio[WS(vs, 1) + WS(is, 5)];
+		    T1d = T1b - T1c;
+		    T1B = T1b + T1c;
+		    T1e = iio[WS(vs, 1) + WS(is, 4)];
+		    T1f = iio[WS(vs, 1) + WS(is, 1)];
+		    T1g = T1e - T1f;
+		    T1C = T1e + T1f;
+	       }
+	       T1h = KP866025403 * (T1d - T1g);
+	       T1H = T1B + T1C;
+	       T1o = T1d + T1g;
+	       T1D = KP866025403 * (T1B - T1C);
+	  }
+	  {
+	       E T27, T2v, T2a, T2w;
+	       {
+		    E T25, T26, T28, T29;
+		    T25 = iio[WS(vs, 2) + WS(is, 2)];
+		    T26 = iio[WS(vs, 2) + WS(is, 5)];
+		    T27 = T25 - T26;
+		    T2v = T25 + T26;
+		    T28 = iio[WS(vs, 2) + WS(is, 4)];
+		    T29 = iio[WS(vs, 2) + WS(is, 1)];
+		    T2a = T28 - T29;
+		    T2w = T28 + T29;
+	       }
+	       T2b = KP866025403 * (T27 - T2a);
+	       T2B = T2v + T2w;
+	       T2i = T27 + T2a;
+	       T2x = KP866025403 * (T2v - T2w);
+	  }
+	  {
+	       E T1U, T21, T1X, T22;
+	       {
+		    E T1S, T1T, T1V, T1W;
+		    T1S = rio[WS(vs, 2) + WS(is, 2)];
+		    T1T = rio[WS(vs, 2) + WS(is, 5)];
+		    T1U = T1S + T1T;
+		    T21 = T1S - T1T;
+		    T1V = rio[WS(vs, 2) + WS(is, 4)];
+		    T1W = rio[WS(vs, 2) + WS(is, 1)];
+		    T1X = T1V + T1W;
+		    T22 = T1V - T1W;
+	       }
+	       T1Y = T1U + T1X;
+	       T2D = KP866025403 * (T1X - T1U);
+	       T23 = T21 + T22;
+	       T2e = KP866025403 * (T22 - T21);
+	  }
+	  {
+	       E T2O, T2V, T2R, T2W;
+	       {
+		    E T2M, T2N, T2P, T2Q;
+		    T2M = rio[WS(vs, 3) + WS(is, 2)];
+		    T2N = rio[WS(vs, 3) + WS(is, 5)];
+		    T2O = T2M + T2N;
+		    T2V = T2M - T2N;
+		    T2P = rio[WS(vs, 3) + WS(is, 4)];
+		    T2Q = rio[WS(vs, 3) + WS(is, 1)];
+		    T2R = T2P + T2Q;
+		    T2W = T2P - T2Q;
+	       }
+	       T2S = T2O + T2R;
+	       T3x = KP866025403 * (T2R - T2O);
+	       T2X = T2V + T2W;
+	       T38 = KP866025403 * (T2W - T2V);
+	  }
+	  {
+	       E T31, T3p, T34, T3q;
+	       {
+		    E T2Z, T30, T32, T33;
+		    T2Z = iio[WS(vs, 3) + WS(is, 2)];
+		    T30 = iio[WS(vs, 3) + WS(is, 5)];
+		    T31 = T2Z - T30;
+		    T3p = T2Z + T30;
+		    T32 = iio[WS(vs, 3) + WS(is, 4)];
+		    T33 = iio[WS(vs, 3) + WS(is, 1)];
+		    T34 = T32 - T33;
+		    T3q = T32 + T33;
+	       }
+	       T35 = KP866025403 * (T31 - T34);
+	       T3v = T3p + T3q;
+	       T3c = T31 + T34;
+	       T3r = KP866025403 * (T3p - T3q);
+	  }
+	  {
+	       E T3I, T3P, T3L, T3Q;
+	       {
+		    E T3G, T3H, T3J, T3K;
+		    T3G = rio[WS(vs, 4) + WS(is, 2)];
+		    T3H = rio[WS(vs, 4) + WS(is, 5)];
+		    T3I = T3G + T3H;
+		    T3P = T3G - T3H;
+		    T3J = rio[WS(vs, 4) + WS(is, 4)];
+		    T3K = rio[WS(vs, 4) + WS(is, 1)];
+		    T3L = T3J + T3K;
+		    T3Q = T3J - T3K;
+	       }
+	       T3M = T3I + T3L;
+	       T4r = KP866025403 * (T3L - T3I);
+	       T3R = T3P + T3Q;
+	       T42 = KP866025403 * (T3Q - T3P);
+	  }
+	  {
+	       E T3V, T4j, T3Y, T4k;
+	       {
+		    E T3T, T3U, T3W, T3X;
+		    T3T = iio[WS(vs, 4) + WS(is, 2)];
+		    T3U = iio[WS(vs, 4) + WS(is, 5)];
+		    T3V = T3T - T3U;
+		    T4j = T3T + T3U;
+		    T3W = iio[WS(vs, 4) + WS(is, 4)];
+		    T3X = iio[WS(vs, 4) + WS(is, 1)];
+		    T3Y = T3W - T3X;
+		    T4k = T3W + T3X;
+	       }
+	       T3Z = KP866025403 * (T3V - T3Y);
+	       T4p = T4j + T4k;
+	       T46 = T3V + T3Y;
+	       T4l = KP866025403 * (T4j - T4k);
+	  }
+	  {
+	       E T4P, T5d, T4S, T5e;
+	       {
+		    E T4N, T4O, T4Q, T4R;
+		    T4N = iio[WS(vs, 5) + WS(is, 2)];
+		    T4O = iio[WS(vs, 5) + WS(is, 5)];
+		    T4P = T4N - T4O;
+		    T5d = T4N + T4O;
+		    T4Q = iio[WS(vs, 5) + WS(is, 4)];
+		    T4R = iio[WS(vs, 5) + WS(is, 1)];
+		    T4S = T4Q - T4R;
+		    T5e = T4Q + T4R;
+	       }
+	       T4T = KP866025403 * (T4P - T4S);
+	       T5j = T5d + T5e;
+	       T50 = T4P + T4S;
+	       T5f = KP866025403 * (T5d - T5e);
+	  }
+	  {
+	       E T4C, T4J, T4F, T4K;
+	       {
+		    E T4A, T4B, T4D, T4E;
+		    T4A = rio[WS(vs, 5) + WS(is, 2)];
+		    T4B = rio[WS(vs, 5) + WS(is, 5)];
+		    T4C = T4A + T4B;
+		    T4J = T4A - T4B;
+		    T4D = rio[WS(vs, 5) + WS(is, 4)];
+		    T4E = rio[WS(vs, 5) + WS(is, 1)];
+		    T4F = T4D + T4E;
+		    T4K = T4D - T4E;
+	       }
+	       T4G = T4C + T4F;
+	       T5l = KP866025403 * (T4F - T4C);
+	       T4L = T4J + T4K;
+	       T4W = KP866025403 * (T4K - T4J);
+	  }
+	  rio[0] = T3 + Ta;
+	  iio[0] = TM + TN;
+	  rio[WS(is, 1)] = TX + T14;
+	  iio[WS(is, 1)] = T1G + T1H;
+	  rio[WS(is, 3)] = T2L + T2S;
+	  rio[WS(is, 2)] = T1R + T1Y;
+	  iio[WS(is, 2)] = T2A + T2B;
+	  iio[WS(is, 3)] = T3u + T3v;
+	  iio[WS(is, 4)] = T4o + T4p;
+	  iio[WS(is, 5)] = T5i + T5j;
+	  rio[WS(is, 5)] = T4z + T4G;
+	  rio[WS(is, 4)] = T3F + T3M;
+	  {
+	       E T1w, T1y, T1v, T1x;
+	       T1w = T16 + T19;
+	       T1y = T1n + T1o;
+	       T1v = W[4];
+	       T1x = W[5];
+	       rio[WS(vs, 3) + WS(is, 1)] = FMA(T1v, T1w, T1x * T1y);
+	       iio[WS(vs, 3) + WS(is, 1)] = FNMS(T1x, T1w, T1v * T1y);
+	  }
+	  {
+	       E T58, T5a, T57, T59;
+	       T58 = T4I + T4L;
+	       T5a = T4Z + T50;
+	       T57 = W[4];
+	       T59 = W[5];
+	       rio[WS(vs, 3) + WS(is, 5)] = FMA(T57, T58, T59 * T5a);
+	       iio[WS(vs, 3) + WS(is, 5)] = FNMS(T59, T58, T57 * T5a);
+	  }
+	  {
+	       E TC, TE, TB, TD;
+	       TC = Tc + Tf;
+	       TE = Tt + Tu;
+	       TB = W[4];
+	       TD = W[5];
+	       rio[WS(vs, 3)] = FMA(TB, TC, TD * TE);
+	       iio[WS(vs, 3)] = FNMS(TD, TC, TB * TE);
+	  }
+	  {
+	       E T4e, T4g, T4d, T4f;
+	       T4e = T3O + T3R;
+	       T4g = T45 + T46;
+	       T4d = W[4];
+	       T4f = W[5];
+	       rio[WS(vs, 3) + WS(is, 4)] = FMA(T4d, T4e, T4f * T4g);
+	       iio[WS(vs, 3) + WS(is, 4)] = FNMS(T4f, T4e, T4d * T4g);
+	  }
+	  {
+	       E T3k, T3m, T3j, T3l;
+	       T3k = T2U + T2X;
+	       T3m = T3b + T3c;
+	       T3j = W[4];
+	       T3l = W[5];
+	       rio[WS(vs, 3) + WS(is, 3)] = FMA(T3j, T3k, T3l * T3m);
+	       iio[WS(vs, 3) + WS(is, 3)] = FNMS(T3l, T3k, T3j * T3m);
+	  }
+	  {
+	       E T2q, T2s, T2p, T2r;
+	       T2q = T20 + T23;
+	       T2s = T2h + T2i;
+	       T2p = W[4];
+	       T2r = W[5];
+	       rio[WS(vs, 3) + WS(is, 2)] = FMA(T2p, T2q, T2r * T2s);
+	       iio[WS(vs, 3) + WS(is, 2)] = FNMS(T2r, T2q, T2p * T2s);
+	  }
+	  {
+	       E T5g, T5o, T5m, T5q, T5c, T5k;
+	       T5c = FNMS(KP500000000, T4G, T4z);
+	       T5g = T5c - T5f;
+	       T5o = T5c + T5f;
+	       T5k = FNMS(KP500000000, T5j, T5i);
+	       T5m = T5k - T5l;
+	       T5q = T5l + T5k;
+	       {
+		    E T5b, T5h, T5n, T5p;
+		    T5b = W[2];
+		    T5h = W[3];
+		    rio[WS(vs, 2) + WS(is, 5)] = FMA(T5b, T5g, T5h * T5m);
+		    iio[WS(vs, 2) + WS(is, 5)] = FNMS(T5h, T5g, T5b * T5m);
+		    T5n = W[6];
+		    T5p = W[7];
+		    rio[WS(vs, 4) + WS(is, 5)] = FMA(T5n, T5o, T5p * T5q);
+		    iio[WS(vs, 4) + WS(is, 5)] = FNMS(T5p, T5o, T5n * T5q);
+	       }
+	  }
+	  {
+	       E To, Ty, Tw, TA, Tg, Tv;
+	       Tg = FNMS(KP500000000, Tf, Tc);
+	       To = Tg + Tn;
+	       Ty = Tg - Tn;
+	       Tv = FNMS(KP500000000, Tu, Tt);
+	       Tw = Tq + Tv;
+	       TA = Tv - Tq;
+	       {
+		    E Tb, Tp, Tx, Tz;
+		    Tb = W[0];
+		    Tp = W[1];
+		    rio[WS(vs, 1)] = FMA(Tb, To, Tp * Tw);
+		    iio[WS(vs, 1)] = FNMS(Tp, To, Tb * Tw);
+		    Tx = W[8];
+		    Tz = W[9];
+		    rio[WS(vs, 5)] = FMA(Tx, Ty, Tz * TA);
+		    iio[WS(vs, 5)] = FNMS(Tz, Ty, Tx * TA);
+	       }
+	  }
+	  {
+	       E T36, T3g, T3e, T3i, T2Y, T3d;
+	       T2Y = FNMS(KP500000000, T2X, T2U);
+	       T36 = T2Y + T35;
+	       T3g = T2Y - T35;
+	       T3d = FNMS(KP500000000, T3c, T3b);
+	       T3e = T38 + T3d;
+	       T3i = T3d - T38;
+	       {
+		    E T2T, T37, T3f, T3h;
+		    T2T = W[0];
+		    T37 = W[1];
+		    rio[WS(vs, 1) + WS(is, 3)] = FMA(T2T, T36, T37 * T3e);
+		    iio[WS(vs, 1) + WS(is, 3)] = FNMS(T37, T36, T2T * T3e);
+		    T3f = W[8];
+		    T3h = W[9];
+		    rio[WS(vs, 5) + WS(is, 3)] = FMA(T3f, T3g, T3h * T3i);
+		    iio[WS(vs, 5) + WS(is, 3)] = FNMS(T3h, T3g, T3f * T3i);
+	       }
+	  }
+	  {
+	       E T2y, T2G, T2E, T2I, T2u, T2C;
+	       T2u = FNMS(KP500000000, T1Y, T1R);
+	       T2y = T2u - T2x;
+	       T2G = T2u + T2x;
+	       T2C = FNMS(KP500000000, T2B, T2A);
+	       T2E = T2C - T2D;
+	       T2I = T2D + T2C;
+	       {
+		    E T2t, T2z, T2F, T2H;
+		    T2t = W[2];
+		    T2z = W[3];
+		    rio[WS(vs, 2) + WS(is, 2)] = FMA(T2t, T2y, T2z * T2E);
+		    iio[WS(vs, 2) + WS(is, 2)] = FNMS(T2z, T2y, T2t * T2E);
+		    T2F = W[6];
+		    T2H = W[7];
+		    rio[WS(vs, 4) + WS(is, 2)] = FMA(T2F, T2G, T2H * T2I);
+		    iio[WS(vs, 4) + WS(is, 2)] = FNMS(T2H, T2G, T2F * T2I);
+	       }
+	  }
+	  {
+	       E T3s, T3A, T3y, T3C, T3o, T3w;
+	       T3o = FNMS(KP500000000, T2S, T2L);
+	       T3s = T3o - T3r;
+	       T3A = T3o + T3r;
+	       T3w = FNMS(KP500000000, T3v, T3u);
+	       T3y = T3w - T3x;
+	       T3C = T3x + T3w;
+	       {
+		    E T3n, T3t, T3z, T3B;
+		    T3n = W[2];
+		    T3t = W[3];
+		    rio[WS(vs, 2) + WS(is, 3)] = FMA(T3n, T3s, T3t * T3y);
+		    iio[WS(vs, 2) + WS(is, 3)] = FNMS(T3t, T3s, T3n * T3y);
+		    T3z = W[6];
+		    T3B = W[7];
+		    rio[WS(vs, 4) + WS(is, 3)] = FMA(T3z, T3A, T3B * T3C);
+		    iio[WS(vs, 4) + WS(is, 3)] = FNMS(T3B, T3A, T3z * T3C);
+	       }
+	  }
+	  {
+	       E T1E, T1M, T1K, T1O, T1A, T1I;
+	       T1A = FNMS(KP500000000, T14, TX);
+	       T1E = T1A - T1D;
+	       T1M = T1A + T1D;
+	       T1I = FNMS(KP500000000, T1H, T1G);
+	       T1K = T1I - T1J;
+	       T1O = T1J + T1I;
+	       {
+		    E T1z, T1F, T1L, T1N;
+		    T1z = W[2];
+		    T1F = W[3];
+		    rio[WS(vs, 2) + WS(is, 1)] = FMA(T1z, T1E, T1F * T1K);
+		    iio[WS(vs, 2) + WS(is, 1)] = FNMS(T1F, T1E, T1z * T1K);
+		    T1L = W[6];
+		    T1N = W[7];
+		    rio[WS(vs, 4) + WS(is, 1)] = FMA(T1L, T1M, T1N * T1O);
+		    iio[WS(vs, 4) + WS(is, 1)] = FNMS(T1N, T1M, T1L * T1O);
+	       }
+	  }
+	  {
+	       E T4m, T4u, T4s, T4w, T4i, T4q;
+	       T4i = FNMS(KP500000000, T3M, T3F);
+	       T4m = T4i - T4l;
+	       T4u = T4i + T4l;
+	       T4q = FNMS(KP500000000, T4p, T4o);
+	       T4s = T4q - T4r;
+	       T4w = T4r + T4q;
+	       {
+		    E T4h, T4n, T4t, T4v;
+		    T4h = W[2];
+		    T4n = W[3];
+		    rio[WS(vs, 2) + WS(is, 4)] = FMA(T4h, T4m, T4n * T4s);
+		    iio[WS(vs, 2) + WS(is, 4)] = FNMS(T4n, T4m, T4h * T4s);
+		    T4t = W[6];
+		    T4v = W[7];
+		    rio[WS(vs, 4) + WS(is, 4)] = FMA(T4t, T4u, T4v * T4w);
+		    iio[WS(vs, 4) + WS(is, 4)] = FNMS(T4v, T4u, T4t * T4w);
+	       }
+	  }
+	  {
+	       E TK, TS, TQ, TU, TG, TO;
+	       TG = FNMS(KP500000000, Ta, T3);
+	       TK = TG - TJ;
+	       TS = TG + TJ;
+	       TO = FNMS(KP500000000, TN, TM);
+	       TQ = TO - TP;
+	       TU = TP + TO;
+	       {
+		    E TF, TL, TR, TT;
+		    TF = W[2];
+		    TL = W[3];
+		    rio[WS(vs, 2)] = FMA(TF, TK, TL * TQ);
+		    iio[WS(vs, 2)] = FNMS(TL, TK, TF * TQ);
+		    TR = W[6];
+		    TT = W[7];
+		    rio[WS(vs, 4)] = FMA(TR, TS, TT * TU);
+		    iio[WS(vs, 4)] = FNMS(TT, TS, TR * TU);
+	       }
+	  }
+	  {
+	       E T2c, T2m, T2k, T2o, T24, T2j;
+	       T24 = FNMS(KP500000000, T23, T20);
+	       T2c = T24 + T2b;
+	       T2m = T24 - T2b;
+	       T2j = FNMS(KP500000000, T2i, T2h);
+	       T2k = T2e + T2j;
+	       T2o = T2j - T2e;
+	       {
+		    E T1Z, T2d, T2l, T2n;
+		    T1Z = W[0];
+		    T2d = W[1];
+		    rio[WS(vs, 1) + WS(is, 2)] = FMA(T1Z, T2c, T2d * T2k);
+		    iio[WS(vs, 1) + WS(is, 2)] = FNMS(T2d, T2c, T1Z * T2k);
+		    T2l = W[8];
+		    T2n = W[9];
+		    rio[WS(vs, 5) + WS(is, 2)] = FMA(T2l, T2m, T2n * T2o);
+		    iio[WS(vs, 5) + WS(is, 2)] = FNMS(T2n, T2m, T2l * T2o);
+	       }
+	  }
+	  {
+	       E T40, T4a, T48, T4c, T3S, T47;
+	       T3S = FNMS(KP500000000, T3R, T3O);
+	       T40 = T3S + T3Z;
+	       T4a = T3S - T3Z;
+	       T47 = FNMS(KP500000000, T46, T45);
+	       T48 = T42 + T47;
+	       T4c = T47 - T42;
+	       {
+		    E T3N, T41, T49, T4b;
+		    T3N = W[0];
+		    T41 = W[1];
+		    rio[WS(vs, 1) + WS(is, 4)] = FMA(T3N, T40, T41 * T48);
+		    iio[WS(vs, 1) + WS(is, 4)] = FNMS(T41, T40, T3N * T48);
+		    T49 = W[8];
+		    T4b = W[9];
+		    rio[WS(vs, 5) + WS(is, 4)] = FMA(T49, T4a, T4b * T4c);
+		    iio[WS(vs, 5) + WS(is, 4)] = FNMS(T4b, T4a, T49 * T4c);
+	       }
+	  }
+	  {
+	       E T1i, T1s, T1q, T1u, T1a, T1p;
+	       T1a = FNMS(KP500000000, T19, T16);
+	       T1i = T1a + T1h;
+	       T1s = T1a - T1h;
+	       T1p = FNMS(KP500000000, T1o, T1n);
+	       T1q = T1k + T1p;
+	       T1u = T1p - T1k;
+	       {
+		    E T15, T1j, T1r, T1t;
+		    T15 = W[0];
+		    T1j = W[1];
+		    rio[WS(vs, 1) + WS(is, 1)] = FMA(T15, T1i, T1j * T1q);
+		    iio[WS(vs, 1) + WS(is, 1)] = FNMS(T1j, T1i, T15 * T1q);
+		    T1r = W[8];
+		    T1t = W[9];
+		    rio[WS(vs, 5) + WS(is, 1)] = FMA(T1r, T1s, T1t * T1u);
+		    iio[WS(vs, 5) + WS(is, 1)] = FNMS(T1t, T1s, T1r * T1u);
+	       }
+	  }
+	  {
+	       E T4U, T54, T52, T56, T4M, T51;
+	       T4M = FNMS(KP500000000, T4L, T4I);
+	       T4U = T4M + T4T;
+	       T54 = T4M - T4T;
+	       T51 = FNMS(KP500000000, T50, T4Z);
+	       T52 = T4W + T51;
+	       T56 = T51 - T4W;
+	       {
+		    E T4H, T4V, T53, T55;
+		    T4H = W[0];
+		    T4V = W[1];
+		    rio[WS(vs, 1) + WS(is, 5)] = FMA(T4H, T4U, T4V * T52);
+		    iio[WS(vs, 1) + WS(is, 5)] = FNMS(T4V, T4U, T4H * T52);
+		    T53 = W[8];
+		    T55 = W[9];
+		    rio[WS(vs, 5) + WS(is, 5)] = FMA(T53, T54, T55 * T56);
+		    iio[WS(vs, 5) + WS(is, 5)] = FNMS(T55, T54, T53 * T56);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 6},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 6, "q1_6", twinstr, {192, 84, 84, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_q1_6) (planner *p) {
+     X(kdft_difsq_register) (p, q1_6, &desc);
+}
diff --git a/src/fftw3/dft/codelets/inplace/q1_8.c b/src/fftw3/dft/codelets/inplace/q1_8.c
new file mode 100644
index 0000000..84409a7
--- /dev/null
+++ b/src/fftw3/dft/codelets/inplace/q1_8.c
@@ -0,0 +1,1149 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:39:14 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twidsq -compact -variables 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h */
+
+/*
+ * This function contains 528 FP additions, 256 FP multiplications,
+ * (or, 416 additions, 144 multiplications, 112 fused multiply/add),
+ * 142 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: q1_8.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_8.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ * $Id: q1_8.c,v 1.1 2008/10/17 06:11:08 scuri Exp $
+ */
+
+#include "q.h"
+
+static const R *q1_8(R *rio, R *iio, const R *W, stride is, stride vs, int m, int dist)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, rio = rio + dist, iio = iio + dist, W = W + 14) {
+	  E T7, T14, T1g, Tk, TC, TQ, T10, TM, T1w, T2p, T2z, T1H, T1M, T1W, T2j;
+	  E T1V, T7R, T8O, T90, T84, T8m, T8A, T8K, T8w, T9g, Ta9, Taj, T9r, T9w, T9G;
+	  E Ta3, T9F, Te, T17, T1h, Tp, Tu, TE, T11, TD, T1p, T2m, T2y, T1C, T1U;
+	  E T28, T2i, T24, T7Y, T8R, T91, T89, T8e, T8o, T8L, T8n, T99, Ta6, Tai, T9m;
+	  E T9E, T9S, Ta2, T9O, T2H, T3E, T3Q, T2U, T3c, T3q, T3A, T3m, T46, T4Z, T59;
+	  E T4h, T4m, T4w, T4T, T4v, T5h, T6e, T6q, T5u, T5M, T60, T6a, T5W, T6G, T7z;
+	  E T7J, T6R, T6W, T76, T7t, T75, T2O, T3H, T3R, T2Z, T34, T3e, T3B, T3d, T3Z;
+	  E T4W, T58, T4c, T4u, T4I, T4S, T4E, T5o, T6h, T6r, T5z, T5E, T5O, T6b, T5N;
+	  E T6z, T7w, T7I, T6M, T74, T7i, T7s, T7e;
+	  {
+	       E T3, Ty, Tj, TY, T6, Tg, TB, TZ;
+	       {
+		    E T1, T2, Th, Ti;
+		    T1 = rio[0];
+		    T2 = rio[WS(is, 4)];
+		    T3 = T1 + T2;
+		    Ty = T1 - T2;
+		    Th = iio[0];
+		    Ti = iio[WS(is, 4)];
+		    Tj = Th - Ti;
+		    TY = Th + Ti;
+	       }
+	       {
+		    E T4, T5, Tz, TA;
+		    T4 = rio[WS(is, 2)];
+		    T5 = rio[WS(is, 6)];
+		    T6 = T4 + T5;
+		    Tg = T4 - T5;
+		    Tz = iio[WS(is, 2)];
+		    TA = iio[WS(is, 6)];
+		    TB = Tz - TA;
+		    TZ = Tz + TA;
+	       }
+	       T7 = T3 + T6;
+	       T14 = T3 - T6;
+	       T1g = TY + TZ;
+	       Tk = Tg + Tj;
+	       TC = Ty - TB;
+	       TQ = Tj - Tg;
+	       T10 = TY - TZ;
+	       TM = Ty + TB;
+	  }
+	  {
+	       E T1s, T1I, T1L, T2n, T1v, T1D, T1G, T2o;
+	       {
+		    E T1q, T1r, T1J, T1K;
+		    T1q = rio[WS(vs, 1) + WS(is, 1)];
+		    T1r = rio[WS(vs, 1) + WS(is, 5)];
+		    T1s = T1q + T1r;
+		    T1I = T1q - T1r;
+		    T1J = iio[WS(vs, 1) + WS(is, 1)];
+		    T1K = iio[WS(vs, 1) + WS(is, 5)];
+		    T1L = T1J - T1K;
+		    T2n = T1J + T1K;
+	       }
+	       {
+		    E T1t, T1u, T1E, T1F;
+		    T1t = rio[WS(vs, 1) + WS(is, 7)];
+		    T1u = rio[WS(vs, 1) + WS(is, 3)];
+		    T1v = T1t + T1u;
+		    T1D = T1t - T1u;
+		    T1E = iio[WS(vs, 1) + WS(is, 7)];
+		    T1F = iio[WS(vs, 1) + WS(is, 3)];
+		    T1G = T1E - T1F;
+		    T2o = T1E + T1F;
+	       }
+	       T1w = T1s + T1v;
+	       T2p = T2n - T2o;
+	       T2z = T2n + T2o;
+	       T1H = T1D - T1G;
+	       T1M = T1I + T1L;
+	       T1W = T1D + T1G;
+	       T2j = T1v - T1s;
+	       T1V = T1L - T1I;
+	  }
+	  {
+	       E T7N, T8i, T83, T8I, T7Q, T80, T8l, T8J;
+	       {
+		    E T7L, T7M, T81, T82;
+		    T7L = rio[WS(vs, 6)];
+		    T7M = rio[WS(vs, 6) + WS(is, 4)];
+		    T7N = T7L + T7M;
+		    T8i = T7L - T7M;
+		    T81 = iio[WS(vs, 6)];
+		    T82 = iio[WS(vs, 6) + WS(is, 4)];
+		    T83 = T81 - T82;
+		    T8I = T81 + T82;
+	       }
+	       {
+		    E T7O, T7P, T8j, T8k;
+		    T7O = rio[WS(vs, 6) + WS(is, 2)];
+		    T7P = rio[WS(vs, 6) + WS(is, 6)];
+		    T7Q = T7O + T7P;
+		    T80 = T7O - T7P;
+		    T8j = iio[WS(vs, 6) + WS(is, 2)];
+		    T8k = iio[WS(vs, 6) + WS(is, 6)];
+		    T8l = T8j - T8k;
+		    T8J = T8j + T8k;
+	       }
+	       T7R = T7N + T7Q;
+	       T8O = T7N - T7Q;
+	       T90 = T8I + T8J;
+	       T84 = T80 + T83;
+	       T8m = T8i - T8l;
+	       T8A = T83 - T80;
+	       T8K = T8I - T8J;
+	       T8w = T8i + T8l;
+	  }
+	  {
+	       E T9c, T9s, T9v, Ta7, T9f, T9n, T9q, Ta8;
+	       {
+		    E T9a, T9b, T9t, T9u;
+		    T9a = rio[WS(vs, 7) + WS(is, 1)];
+		    T9b = rio[WS(vs, 7) + WS(is, 5)];
+		    T9c = T9a + T9b;
+		    T9s = T9a - T9b;
+		    T9t = iio[WS(vs, 7) + WS(is, 1)];
+		    T9u = iio[WS(vs, 7) + WS(is, 5)];
+		    T9v = T9t - T9u;
+		    Ta7 = T9t + T9u;
+	       }
+	       {
+		    E T9d, T9e, T9o, T9p;
+		    T9d = rio[WS(vs, 7) + WS(is, 7)];
+		    T9e = rio[WS(vs, 7) + WS(is, 3)];
+		    T9f = T9d + T9e;
+		    T9n = T9d - T9e;
+		    T9o = iio[WS(vs, 7) + WS(is, 7)];
+		    T9p = iio[WS(vs, 7) + WS(is, 3)];
+		    T9q = T9o - T9p;
+		    Ta8 = T9o + T9p;
+	       }
+	       T9g = T9c + T9f;
+	       Ta9 = Ta7 - Ta8;
+	       Taj = Ta7 + Ta8;
+	       T9r = T9n - T9q;
+	       T9w = T9s + T9v;
+	       T9G = T9n + T9q;
+	       Ta3 = T9f - T9c;
+	       T9F = T9v - T9s;
+	  }
+	  {
+	       E Ta, Tq, Tt, T15, Td, Tl, To, T16;
+	       {
+		    E T8, T9, Tr, Ts;
+		    T8 = rio[WS(is, 1)];
+		    T9 = rio[WS(is, 5)];
+		    Ta = T8 + T9;
+		    Tq = T8 - T9;
+		    Tr = iio[WS(is, 1)];
+		    Ts = iio[WS(is, 5)];
+		    Tt = Tr - Ts;
+		    T15 = Tr + Ts;
+	       }
+	       {
+		    E Tb, Tc, Tm, Tn;
+		    Tb = rio[WS(is, 7)];
+		    Tc = rio[WS(is, 3)];
+		    Td = Tb + Tc;
+		    Tl = Tb - Tc;
+		    Tm = iio[WS(is, 7)];
+		    Tn = iio[WS(is, 3)];
+		    To = Tm - Tn;
+		    T16 = Tm + Tn;
+	       }
+	       Te = Ta + Td;
+	       T17 = T15 - T16;
+	       T1h = T15 + T16;
+	       Tp = Tl - To;
+	       Tu = Tq + Tt;
+	       TE = Tl + To;
+	       T11 = Td - Ta;
+	       TD = Tt - Tq;
+	  }
+	  {
+	       E T1l, T1Q, T1B, T2g, T1o, T1y, T1T, T2h;
+	       {
+		    E T1j, T1k, T1z, T1A;
+		    T1j = rio[WS(vs, 1)];
+		    T1k = rio[WS(vs, 1) + WS(is, 4)];
+		    T1l = T1j + T1k;
+		    T1Q = T1j - T1k;
+		    T1z = iio[WS(vs, 1)];
+		    T1A = iio[WS(vs, 1) + WS(is, 4)];
+		    T1B = T1z - T1A;
+		    T2g = T1z + T1A;
+	       }
+	       {
+		    E T1m, T1n, T1R, T1S;
+		    T1m = rio[WS(vs, 1) + WS(is, 2)];
+		    T1n = rio[WS(vs, 1) + WS(is, 6)];
+		    T1o = T1m + T1n;
+		    T1y = T1m - T1n;
+		    T1R = iio[WS(vs, 1) + WS(is, 2)];
+		    T1S = iio[WS(vs, 1) + WS(is, 6)];
+		    T1T = T1R - T1S;
+		    T2h = T1R + T1S;
+	       }
+	       T1p = T1l + T1o;
+	       T2m = T1l - T1o;
+	       T2y = T2g + T2h;
+	       T1C = T1y + T1B;
+	       T1U = T1Q - T1T;
+	       T28 = T1B - T1y;
+	       T2i = T2g - T2h;
+	       T24 = T1Q + T1T;
+	  }
+	  {
+	       E T7U, T8a, T8d, T8P, T7X, T85, T88, T8Q;
+	       {
+		    E T7S, T7T, T8b, T8c;
+		    T7S = rio[WS(vs, 6) + WS(is, 1)];
+		    T7T = rio[WS(vs, 6) + WS(is, 5)];
+		    T7U = T7S + T7T;
+		    T8a = T7S - T7T;
+		    T8b = iio[WS(vs, 6) + WS(is, 1)];
+		    T8c = iio[WS(vs, 6) + WS(is, 5)];
+		    T8d = T8b - T8c;
+		    T8P = T8b + T8c;
+	       }
+	       {
+		    E T7V, T7W, T86, T87;
+		    T7V = rio[WS(vs, 6) + WS(is, 7)];
+		    T7W = rio[WS(vs, 6) + WS(is, 3)];
+		    T7X = T7V + T7W;
+		    T85 = T7V - T7W;
+		    T86 = iio[WS(vs, 6) + WS(is, 7)];
+		    T87 = iio[WS(vs, 6) + WS(is, 3)];
+		    T88 = T86 - T87;
+		    T8Q = T86 + T87;
+	       }
+	       T7Y = T7U + T7X;
+	       T8R = T8P - T8Q;
+	       T91 = T8P + T8Q;
+	       T89 = T85 - T88;
+	       T8e = T8a + T8d;
+	       T8o = T85 + T88;
+	       T8L = T7X - T7U;
+	       T8n = T8d - T8a;
+	  }
+	  {
+	       E T95, T9A, T9l, Ta0, T98, T9i, T9D, Ta1;
+	       {
+		    E T93, T94, T9j, T9k;
+		    T93 = rio[WS(vs, 7)];
+		    T94 = rio[WS(vs, 7) + WS(is, 4)];
+		    T95 = T93 + T94;
+		    T9A = T93 - T94;
+		    T9j = iio[WS(vs, 7)];
+		    T9k = iio[WS(vs, 7) + WS(is, 4)];
+		    T9l = T9j - T9k;
+		    Ta0 = T9j + T9k;
+	       }
+	       {
+		    E T96, T97, T9B, T9C;
+		    T96 = rio[WS(vs, 7) + WS(is, 2)];
+		    T97 = rio[WS(vs, 7) + WS(is, 6)];
+		    T98 = T96 + T97;
+		    T9i = T96 - T97;
+		    T9B = iio[WS(vs, 7) + WS(is, 2)];
+		    T9C = iio[WS(vs, 7) + WS(is, 6)];
+		    T9D = T9B - T9C;
+		    Ta1 = T9B + T9C;
+	       }
+	       T99 = T95 + T98;
+	       Ta6 = T95 - T98;
+	       Tai = Ta0 + Ta1;
+	       T9m = T9i + T9l;
+	       T9E = T9A - T9D;
+	       T9S = T9l - T9i;
+	       Ta2 = Ta0 - Ta1;
+	       T9O = T9A + T9D;
+	  }
+	  {
+	       E T2D, T38, T2T, T3y, T2G, T2Q, T3b, T3z;
+	       {
+		    E T2B, T2C, T2R, T2S;
+		    T2B = rio[WS(vs, 2)];
+		    T2C = rio[WS(vs, 2) + WS(is, 4)];
+		    T2D = T2B + T2C;
+		    T38 = T2B - T2C;
+		    T2R = iio[WS(vs, 2)];
+		    T2S = iio[WS(vs, 2) + WS(is, 4)];
+		    T2T = T2R - T2S;
+		    T3y = T2R + T2S;
+	       }
+	       {
+		    E T2E, T2F, T39, T3a;
+		    T2E = rio[WS(vs, 2) + WS(is, 2)];
+		    T2F = rio[WS(vs, 2) + WS(is, 6)];
+		    T2G = T2E + T2F;
+		    T2Q = T2E - T2F;
+		    T39 = iio[WS(vs, 2) + WS(is, 2)];
+		    T3a = iio[WS(vs, 2) + WS(is, 6)];
+		    T3b = T39 - T3a;
+		    T3z = T39 + T3a;
+	       }
+	       T2H = T2D + T2G;
+	       T3E = T2D - T2G;
+	       T3Q = T3y + T3z;
+	       T2U = T2Q + T2T;
+	       T3c = T38 - T3b;
+	       T3q = T2T - T2Q;
+	       T3A = T3y - T3z;
+	       T3m = T38 + T3b;
+	  }
+	  {
+	       E T42, T4i, T4l, T4X, T45, T4d, T4g, T4Y;
+	       {
+		    E T40, T41, T4j, T4k;
+		    T40 = rio[WS(vs, 3) + WS(is, 1)];
+		    T41 = rio[WS(vs, 3) + WS(is, 5)];
+		    T42 = T40 + T41;
+		    T4i = T40 - T41;
+		    T4j = iio[WS(vs, 3) + WS(is, 1)];
+		    T4k = iio[WS(vs, 3) + WS(is, 5)];
+		    T4l = T4j - T4k;
+		    T4X = T4j + T4k;
+	       }
+	       {
+		    E T43, T44, T4e, T4f;
+		    T43 = rio[WS(vs, 3) + WS(is, 7)];
+		    T44 = rio[WS(vs, 3) + WS(is, 3)];
+		    T45 = T43 + T44;
+		    T4d = T43 - T44;
+		    T4e = iio[WS(vs, 3) + WS(is, 7)];
+		    T4f = iio[WS(vs, 3) + WS(is, 3)];
+		    T4g = T4e - T4f;
+		    T4Y = T4e + T4f;
+	       }
+	       T46 = T42 + T45;
+	       T4Z = T4X - T4Y;
+	       T59 = T4X + T4Y;
+	       T4h = T4d - T4g;
+	       T4m = T4i + T4l;
+	       T4w = T4d + T4g;
+	       T4T = T45 - T42;
+	       T4v = T4l - T4i;
+	  }
+	  {
+	       E T5d, T5I, T5t, T68, T5g, T5q, T5L, T69;
+	       {
+		    E T5b, T5c, T5r, T5s;
+		    T5b = rio[WS(vs, 4)];
+		    T5c = rio[WS(vs, 4) + WS(is, 4)];
+		    T5d = T5b + T5c;
+		    T5I = T5b - T5c;
+		    T5r = iio[WS(vs, 4)];
+		    T5s = iio[WS(vs, 4) + WS(is, 4)];
+		    T5t = T5r - T5s;
+		    T68 = T5r + T5s;
+	       }
+	       {
+		    E T5e, T5f, T5J, T5K;
+		    T5e = rio[WS(vs, 4) + WS(is, 2)];
+		    T5f = rio[WS(vs, 4) + WS(is, 6)];
+		    T5g = T5e + T5f;
+		    T5q = T5e - T5f;
+		    T5J = iio[WS(vs, 4) + WS(is, 2)];
+		    T5K = iio[WS(vs, 4) + WS(is, 6)];
+		    T5L = T5J - T5K;
+		    T69 = T5J + T5K;
+	       }
+	       T5h = T5d + T5g;
+	       T6e = T5d - T5g;
+	       T6q = T68 + T69;
+	       T5u = T5q + T5t;
+	       T5M = T5I - T5L;
+	       T60 = T5t - T5q;
+	       T6a = T68 - T69;
+	       T5W = T5I + T5L;
+	  }
+	  {
+	       E T6C, T6S, T6V, T7x, T6F, T6N, T6Q, T7y;
+	       {
+		    E T6A, T6B, T6T, T6U;
+		    T6A = rio[WS(vs, 5) + WS(is, 1)];
+		    T6B = rio[WS(vs, 5) + WS(is, 5)];
+		    T6C = T6A + T6B;
+		    T6S = T6A - T6B;
+		    T6T = iio[WS(vs, 5) + WS(is, 1)];
+		    T6U = iio[WS(vs, 5) + WS(is, 5)];
+		    T6V = T6T - T6U;
+		    T7x = T6T + T6U;
+	       }
+	       {
+		    E T6D, T6E, T6O, T6P;
+		    T6D = rio[WS(vs, 5) + WS(is, 7)];
+		    T6E = rio[WS(vs, 5) + WS(is, 3)];
+		    T6F = T6D + T6E;
+		    T6N = T6D - T6E;
+		    T6O = iio[WS(vs, 5) + WS(is, 7)];
+		    T6P = iio[WS(vs, 5) + WS(is, 3)];
+		    T6Q = T6O - T6P;
+		    T7y = T6O + T6P;
+	       }
+	       T6G = T6C + T6F;
+	       T7z = T7x - T7y;
+	       T7J = T7x + T7y;
+	       T6R = T6N - T6Q;
+	       T6W = T6S + T6V;
+	       T76 = T6N + T6Q;
+	       T7t = T6F - T6C;
+	       T75 = T6V - T6S;
+	  }
+	  {
+	       E T2K, T30, T33, T3F, T2N, T2V, T2Y, T3G;
+	       {
+		    E T2I, T2J, T31, T32;
+		    T2I = rio[WS(vs, 2) + WS(is, 1)];
+		    T2J = rio[WS(vs, 2) + WS(is, 5)];
+		    T2K = T2I + T2J;
+		    T30 = T2I - T2J;
+		    T31 = iio[WS(vs, 2) + WS(is, 1)];
+		    T32 = iio[WS(vs, 2) + WS(is, 5)];
+		    T33 = T31 - T32;
+		    T3F = T31 + T32;
+	       }
+	       {
+		    E T2L, T2M, T2W, T2X;
+		    T2L = rio[WS(vs, 2) + WS(is, 7)];
+		    T2M = rio[WS(vs, 2) + WS(is, 3)];
+		    T2N = T2L + T2M;
+		    T2V = T2L - T2M;
+		    T2W = iio[WS(vs, 2) + WS(is, 7)];
+		    T2X = iio[WS(vs, 2) + WS(is, 3)];
+		    T2Y = T2W - T2X;
+		    T3G = T2W + T2X;
+	       }
+	       T2O = T2K + T2N;
+	       T3H = T3F - T3G;
+	       T3R = T3F + T3G;
+	       T2Z = T2V - T2Y;
+	       T34 = T30 + T33;
+	       T3e = T2V + T2Y;
+	       T3B = T2N - T2K;
+	       T3d = T33 - T30;
+	  }
+	  {
+	       E T3V, T4q, T4b, T4Q, T3Y, T48, T4t, T4R;
+	       {
+		    E T3T, T3U, T49, T4a;
+		    T3T = rio[WS(vs, 3)];
+		    T3U = rio[WS(vs, 3) + WS(is, 4)];
+		    T3V = T3T + T3U;
+		    T4q = T3T - T3U;
+		    T49 = iio[WS(vs, 3)];
+		    T4a = iio[WS(vs, 3) + WS(is, 4)];
+		    T4b = T49 - T4a;
+		    T4Q = T49 + T4a;
+	       }
+	       {
+		    E T3W, T3X, T4r, T4s;
+		    T3W = rio[WS(vs, 3) + WS(is, 2)];
+		    T3X = rio[WS(vs, 3) + WS(is, 6)];
+		    T3Y = T3W + T3X;
+		    T48 = T3W - T3X;
+		    T4r = iio[WS(vs, 3) + WS(is, 2)];
+		    T4s = iio[WS(vs, 3) + WS(is, 6)];
+		    T4t = T4r - T4s;
+		    T4R = T4r + T4s;
+	       }
+	       T3Z = T3V + T3Y;
+	       T4W = T3V - T3Y;
+	       T58 = T4Q + T4R;
+	       T4c = T48 + T4b;
+	       T4u = T4q - T4t;
+	       T4I = T4b - T48;
+	       T4S = T4Q - T4R;
+	       T4E = T4q + T4t;
+	  }
+	  {
+	       E T5k, T5A, T5D, T6f, T5n, T5v, T5y, T6g;
+	       {
+		    E T5i, T5j, T5B, T5C;
+		    T5i = rio[WS(vs, 4) + WS(is, 1)];
+		    T5j = rio[WS(vs, 4) + WS(is, 5)];
+		    T5k = T5i + T5j;
+		    T5A = T5i - T5j;
+		    T5B = iio[WS(vs, 4) + WS(is, 1)];
+		    T5C = iio[WS(vs, 4) + WS(is, 5)];
+		    T5D = T5B - T5C;
+		    T6f = T5B + T5C;
+	       }
+	       {
+		    E T5l, T5m, T5w, T5x;
+		    T5l = rio[WS(vs, 4) + WS(is, 7)];
+		    T5m = rio[WS(vs, 4) + WS(is, 3)];
+		    T5n = T5l + T5m;
+		    T5v = T5l - T5m;
+		    T5w = iio[WS(vs, 4) + WS(is, 7)];
+		    T5x = iio[WS(vs, 4) + WS(is, 3)];
+		    T5y = T5w - T5x;
+		    T6g = T5w + T5x;
+	       }
+	       T5o = T5k + T5n;
+	       T6h = T6f - T6g;
+	       T6r = T6f + T6g;
+	       T5z = T5v - T5y;
+	       T5E = T5A + T5D;
+	       T5O = T5v + T5y;
+	       T6b = T5n - T5k;
+	       T5N = T5D - T5A;
+	  }
+	  {
+	       E T6v, T70, T6L, T7q, T6y, T6I, T73, T7r;
+	       {
+		    E T6t, T6u, T6J, T6K;
+		    T6t = rio[WS(vs, 5)];
+		    T6u = rio[WS(vs, 5) + WS(is, 4)];
+		    T6v = T6t + T6u;
+		    T70 = T6t - T6u;
+		    T6J = iio[WS(vs, 5)];
+		    T6K = iio[WS(vs, 5) + WS(is, 4)];
+		    T6L = T6J - T6K;
+		    T7q = T6J + T6K;
+	       }
+	       {
+		    E T6w, T6x, T71, T72;
+		    T6w = rio[WS(vs, 5) + WS(is, 2)];
+		    T6x = rio[WS(vs, 5) + WS(is, 6)];
+		    T6y = T6w + T6x;
+		    T6I = T6w - T6x;
+		    T71 = iio[WS(vs, 5) + WS(is, 2)];
+		    T72 = iio[WS(vs, 5) + WS(is, 6)];
+		    T73 = T71 - T72;
+		    T7r = T71 + T72;
+	       }
+	       T6z = T6v + T6y;
+	       T7w = T6v - T6y;
+	       T7I = T7q + T7r;
+	       T6M = T6I + T6L;
+	       T74 = T70 - T73;
+	       T7i = T6L - T6I;
+	       T7s = T7q - T7r;
+	       T7e = T70 + T73;
+	  }
+	  rio[0] = T7 + Te;
+	  iio[0] = T1g + T1h;
+	  rio[WS(is, 1)] = T1p + T1w;
+	  iio[WS(is, 1)] = T2y + T2z;
+	  rio[WS(is, 3)] = T3Z + T46;
+	  rio[WS(is, 2)] = T2H + T2O;
+	  iio[WS(is, 2)] = T3Q + T3R;
+	  iio[WS(is, 3)] = T58 + T59;
+	  rio[WS(is, 6)] = T7R + T7Y;
+	  iio[WS(is, 6)] = T90 + T91;
+	  iio[WS(is, 5)] = T7I + T7J;
+	  rio[WS(is, 5)] = T6z + T6G;
+	  iio[WS(is, 4)] = T6q + T6r;
+	  rio[WS(is, 4)] = T5h + T5o;
+	  rio[WS(is, 7)] = T99 + T9g;
+	  iio[WS(is, 7)] = Tai + Taj;
+	  {
+	       E T12, T18, TX, T13;
+	       T12 = T10 - T11;
+	       T18 = T14 - T17;
+	       TX = W[10];
+	       T13 = W[11];
+	       iio[WS(vs, 6)] = FNMS(T13, T18, TX * T12);
+	       rio[WS(vs, 6)] = FMA(T13, T12, TX * T18);
+	  }
+	  {
+	       E Tag, Tak, Taf, Tah;
+	       Tag = T99 - T9g;
+	       Tak = Tai - Taj;
+	       Taf = W[6];
+	       Tah = W[7];
+	       rio[WS(vs, 4) + WS(is, 7)] = FMA(Taf, Tag, Tah * Tak);
+	       iio[WS(vs, 4) + WS(is, 7)] = FNMS(Tah, Tag, Taf * Tak);
+	  }
+	  {
+	       E T8M, T8S, T8H, T8N;
+	       T8M = T8K - T8L;
+	       T8S = T8O - T8R;
+	       T8H = W[10];
+	       T8N = W[11];
+	       iio[WS(vs, 6) + WS(is, 6)] = FNMS(T8N, T8S, T8H * T8M);
+	       rio[WS(vs, 6) + WS(is, 6)] = FMA(T8N, T8M, T8H * T8S);
+	  }
+	  {
+	       E T2k, T2q, T2f, T2l;
+	       T2k = T2i - T2j;
+	       T2q = T2m - T2p;
+	       T2f = W[10];
+	       T2l = W[11];
+	       iio[WS(vs, 6) + WS(is, 1)] = FNMS(T2l, T2q, T2f * T2k);
+	       rio[WS(vs, 6) + WS(is, 1)] = FMA(T2l, T2k, T2f * T2q);
+	  }
+	  {
+	       E Ta4, Taa, T9Z, Ta5;
+	       Ta4 = Ta2 - Ta3;
+	       Taa = Ta6 - Ta9;
+	       T9Z = W[10];
+	       Ta5 = W[11];
+	       iio[WS(vs, 6) + WS(is, 7)] = FNMS(Ta5, Taa, T9Z * Ta4);
+	       rio[WS(vs, 6) + WS(is, 7)] = FMA(Ta5, Ta4, T9Z * Taa);
+	  }
+	  {
+	       E T8Y, T92, T8X, T8Z;
+	       T8Y = T7R - T7Y;
+	       T92 = T90 - T91;
+	       T8X = W[6];
+	       T8Z = W[7];
+	       rio[WS(vs, 4) + WS(is, 6)] = FMA(T8X, T8Y, T8Z * T92);
+	       iio[WS(vs, 4) + WS(is, 6)] = FNMS(T8Z, T8Y, T8X * T92);
+	  }
+	  {
+	       E T2w, T2A, T2v, T2x;
+	       T2w = T1p - T1w;
+	       T2A = T2y - T2z;
+	       T2v = W[6];
+	       T2x = W[7];
+	       rio[WS(vs, 4) + WS(is, 1)] = FMA(T2v, T2w, T2x * T2A);
+	       iio[WS(vs, 4) + WS(is, 1)] = FNMS(T2x, T2w, T2v * T2A);
+	  }
+	  {
+	       E Tac, Tae, Tab, Tad;
+	       Tac = Ta3 + Ta2;
+	       Tae = Ta6 + Ta9;
+	       Tab = W[2];
+	       Tad = W[3];
+	       iio[WS(vs, 2) + WS(is, 7)] = FNMS(Tad, Tae, Tab * Tac);
+	       rio[WS(vs, 2) + WS(is, 7)] = FMA(Tad, Tac, Tab * Tae);
+	  }
+	  {
+	       E T8U, T8W, T8T, T8V;
+	       T8U = T8L + T8K;
+	       T8W = T8O + T8R;
+	       T8T = W[2];
+	       T8V = W[3];
+	       iio[WS(vs, 2) + WS(is, 6)] = FNMS(T8V, T8W, T8T * T8U);
+	       rio[WS(vs, 2) + WS(is, 6)] = FMA(T8V, T8U, T8T * T8W);
+	  }
+	  {
+	       E T1a, T1c, T19, T1b;
+	       T1a = T11 + T10;
+	       T1c = T14 + T17;
+	       T19 = W[2];
+	       T1b = W[3];
+	       iio[WS(vs, 2)] = FNMS(T1b, T1c, T19 * T1a);
+	       rio[WS(vs, 2)] = FMA(T1b, T1a, T19 * T1c);
+	  }
+	  {
+	       E T1e, T1i, T1d, T1f;
+	       T1e = T7 - Te;
+	       T1i = T1g - T1h;
+	       T1d = W[6];
+	       T1f = W[7];
+	       rio[WS(vs, 4)] = FMA(T1d, T1e, T1f * T1i);
+	       iio[WS(vs, 4)] = FNMS(T1f, T1e, T1d * T1i);
+	  }
+	  {
+	       E T2s, T2u, T2r, T2t;
+	       T2s = T2j + T2i;
+	       T2u = T2m + T2p;
+	       T2r = W[2];
+	       T2t = W[3];
+	       iio[WS(vs, 2) + WS(is, 1)] = FNMS(T2t, T2u, T2r * T2s);
+	       rio[WS(vs, 2) + WS(is, 1)] = FMA(T2t, T2s, T2r * T2u);
+	  }
+	  {
+	       E T3C, T3I, T3x, T3D;
+	       T3C = T3A - T3B;
+	       T3I = T3E - T3H;
+	       T3x = W[10];
+	       T3D = W[11];
+	       iio[WS(vs, 6) + WS(is, 2)] = FNMS(T3D, T3I, T3x * T3C);
+	       rio[WS(vs, 6) + WS(is, 2)] = FMA(T3D, T3C, T3x * T3I);
+	  }
+	  {
+	       E T4U, T50, T4P, T4V;
+	       T4U = T4S - T4T;
+	       T50 = T4W - T4Z;
+	       T4P = W[10];
+	       T4V = W[11];
+	       iio[WS(vs, 6) + WS(is, 3)] = FNMS(T4V, T50, T4P * T4U);
+	       rio[WS(vs, 6) + WS(is, 3)] = FMA(T4V, T4U, T4P * T50);
+	  }
+	  {
+	       E T56, T5a, T55, T57;
+	       T56 = T3Z - T46;
+	       T5a = T58 - T59;
+	       T55 = W[6];
+	       T57 = W[7];
+	       rio[WS(vs, 4) + WS(is, 3)] = FMA(T55, T56, T57 * T5a);
+	       iio[WS(vs, 4) + WS(is, 3)] = FNMS(T57, T56, T55 * T5a);
+	  }
+	  {
+	       E T6o, T6s, T6n, T6p;
+	       T6o = T5h - T5o;
+	       T6s = T6q - T6r;
+	       T6n = W[6];
+	       T6p = W[7];
+	       rio[WS(vs, 4) + WS(is, 4)] = FMA(T6n, T6o, T6p * T6s);
+	       iio[WS(vs, 4) + WS(is, 4)] = FNMS(T6p, T6o, T6n * T6s);
+	  }
+	  {
+	       E T7u, T7A, T7p, T7v;
+	       T7u = T7s - T7t;
+	       T7A = T7w - T7z;
+	       T7p = W[10];
+	       T7v = W[11];
+	       iio[WS(vs, 6) + WS(is, 5)] = FNMS(T7v, T7A, T7p * T7u);
+	       rio[WS(vs, 6) + WS(is, 5)] = FMA(T7v, T7u, T7p * T7A);
+	  }
+	  {
+	       E T6c, T6i, T67, T6d;
+	       T6c = T6a - T6b;
+	       T6i = T6e - T6h;
+	       T67 = W[10];
+	       T6d = W[11];
+	       iio[WS(vs, 6) + WS(is, 4)] = FNMS(T6d, T6i, T67 * T6c);
+	       rio[WS(vs, 6) + WS(is, 4)] = FMA(T6d, T6c, T67 * T6i);
+	  }
+	  {
+	       E T7G, T7K, T7F, T7H;
+	       T7G = T6z - T6G;
+	       T7K = T7I - T7J;
+	       T7F = W[6];
+	       T7H = W[7];
+	       rio[WS(vs, 4) + WS(is, 5)] = FMA(T7F, T7G, T7H * T7K);
+	       iio[WS(vs, 4) + WS(is, 5)] = FNMS(T7H, T7G, T7F * T7K);
+	  }
+	  {
+	       E T3O, T3S, T3N, T3P;
+	       T3O = T2H - T2O;
+	       T3S = T3Q - T3R;
+	       T3N = W[6];
+	       T3P = W[7];
+	       rio[WS(vs, 4) + WS(is, 2)] = FMA(T3N, T3O, T3P * T3S);
+	       iio[WS(vs, 4) + WS(is, 2)] = FNMS(T3P, T3O, T3N * T3S);
+	  }
+	  {
+	       E T3K, T3M, T3J, T3L;
+	       T3K = T3B + T3A;
+	       T3M = T3E + T3H;
+	       T3J = W[2];
+	       T3L = W[3];
+	       iio[WS(vs, 2) + WS(is, 2)] = FNMS(T3L, T3M, T3J * T3K);
+	       rio[WS(vs, 2) + WS(is, 2)] = FMA(T3L, T3K, T3J * T3M);
+	  }
+	  {
+	       E T7C, T7E, T7B, T7D;
+	       T7C = T7t + T7s;
+	       T7E = T7w + T7z;
+	       T7B = W[2];
+	       T7D = W[3];
+	       iio[WS(vs, 2) + WS(is, 5)] = FNMS(T7D, T7E, T7B * T7C);
+	       rio[WS(vs, 2) + WS(is, 5)] = FMA(T7D, T7C, T7B * T7E);
+	  }
+	  {
+	       E T6k, T6m, T6j, T6l;
+	       T6k = T6b + T6a;
+	       T6m = T6e + T6h;
+	       T6j = W[2];
+	       T6l = W[3];
+	       iio[WS(vs, 2) + WS(is, 4)] = FNMS(T6l, T6m, T6j * T6k);
+	       rio[WS(vs, 2) + WS(is, 4)] = FMA(T6l, T6k, T6j * T6m);
+	  }
+	  {
+	       E T52, T54, T51, T53;
+	       T52 = T4T + T4S;
+	       T54 = T4W + T4Z;
+	       T51 = W[2];
+	       T53 = W[3];
+	       iio[WS(vs, 2) + WS(is, 3)] = FNMS(T53, T54, T51 * T52);
+	       rio[WS(vs, 2) + WS(is, 3)] = FMA(T53, T52, T51 * T54);
+	  }
+	  {
+	       E T5G, T5S, T5Q, T5U, T5F, T5P;
+	       T5F = KP707106781 * (T5z - T5E);
+	       T5G = T5u - T5F;
+	       T5S = T5u + T5F;
+	       T5P = KP707106781 * (T5N - T5O);
+	       T5Q = T5M - T5P;
+	       T5U = T5M + T5P;
+	       {
+		    E T5p, T5H, T5R, T5T;
+		    T5p = W[12];
+		    T5H = W[13];
+		    iio[WS(vs, 7) + WS(is, 4)] = FNMS(T5H, T5Q, T5p * T5G);
+		    rio[WS(vs, 7) + WS(is, 4)] = FMA(T5H, T5G, T5p * T5Q);
+		    T5R = W[4];
+		    T5T = W[5];
+		    iio[WS(vs, 3) + WS(is, 4)] = FNMS(T5T, T5U, T5R * T5S);
+		    rio[WS(vs, 3) + WS(is, 4)] = FMA(T5T, T5S, T5R * T5U);
+	       }
+	  }
+	  {
+	       E Tw, TI, TG, TK, Tv, TF;
+	       Tv = KP707106781 * (Tp - Tu);
+	       Tw = Tk - Tv;
+	       TI = Tk + Tv;
+	       TF = KP707106781 * (TD - TE);
+	       TG = TC - TF;
+	       TK = TC + TF;
+	       {
+		    E Tf, Tx, TH, TJ;
+		    Tf = W[12];
+		    Tx = W[13];
+		    iio[WS(vs, 7)] = FNMS(Tx, TG, Tf * Tw);
+		    rio[WS(vs, 7)] = FMA(Tx, Tw, Tf * TG);
+		    TH = W[4];
+		    TJ = W[5];
+		    iio[WS(vs, 3)] = FNMS(TJ, TK, TH * TI);
+		    rio[WS(vs, 3)] = FMA(TJ, TI, TH * TK);
+	       }
+	  }
+	  {
+	       E T9Q, T9W, T9U, T9Y, T9P, T9T;
+	       T9P = KP707106781 * (T9w + T9r);
+	       T9Q = T9O - T9P;
+	       T9W = T9O + T9P;
+	       T9T = KP707106781 * (T9F + T9G);
+	       T9U = T9S - T9T;
+	       T9Y = T9S + T9T;
+	       {
+		    E T9N, T9R, T9V, T9X;
+		    T9N = W[8];
+		    T9R = W[9];
+		    rio[WS(vs, 5) + WS(is, 7)] = FMA(T9N, T9Q, T9R * T9U);
+		    iio[WS(vs, 5) + WS(is, 7)] = FNMS(T9R, T9Q, T9N * T9U);
+		    T9V = W[0];
+		    T9X = W[1];
+		    rio[WS(vs, 1) + WS(is, 7)] = FMA(T9V, T9W, T9X * T9Y);
+		    iio[WS(vs, 1) + WS(is, 7)] = FNMS(T9X, T9W, T9V * T9Y);
+	       }
+	  }
+	  {
+	       E T36, T3i, T3g, T3k, T35, T3f;
+	       T35 = KP707106781 * (T2Z - T34);
+	       T36 = T2U - T35;
+	       T3i = T2U + T35;
+	       T3f = KP707106781 * (T3d - T3e);
+	       T3g = T3c - T3f;
+	       T3k = T3c + T3f;
+	       {
+		    E T2P, T37, T3h, T3j;
+		    T2P = W[12];
+		    T37 = W[13];
+		    iio[WS(vs, 7) + WS(is, 2)] = FNMS(T37, T3g, T2P * T36);
+		    rio[WS(vs, 7) + WS(is, 2)] = FMA(T37, T36, T2P * T3g);
+		    T3h = W[4];
+		    T3j = W[5];
+		    iio[WS(vs, 3) + WS(is, 2)] = FNMS(T3j, T3k, T3h * T3i);
+		    rio[WS(vs, 3) + WS(is, 2)] = FMA(T3j, T3i, T3h * T3k);
+	       }
+	  }
+	  {
+	       E T5Y, T64, T62, T66, T5X, T61;
+	       T5X = KP707106781 * (T5E + T5z);
+	       T5Y = T5W - T5X;
+	       T64 = T5W + T5X;
+	       T61 = KP707106781 * (T5N + T5O);
+	       T62 = T60 - T61;
+	       T66 = T60 + T61;
+	       {
+		    E T5V, T5Z, T63, T65;
+		    T5V = W[8];
+		    T5Z = W[9];
+		    rio[WS(vs, 5) + WS(is, 4)] = FMA(T5V, T5Y, T5Z * T62);
+		    iio[WS(vs, 5) + WS(is, 4)] = FNMS(T5Z, T5Y, T5V * T62);
+		    T63 = W[0];
+		    T65 = W[1];
+		    rio[WS(vs, 1) + WS(is, 4)] = FMA(T63, T64, T65 * T66);
+		    iio[WS(vs, 1) + WS(is, 4)] = FNMS(T65, T64, T63 * T66);
+	       }
+	  }
+	  {
+	       E T7g, T7m, T7k, T7o, T7f, T7j;
+	       T7f = KP707106781 * (T6W + T6R);
+	       T7g = T7e - T7f;
+	       T7m = T7e + T7f;
+	       T7j = KP707106781 * (T75 + T76);
+	       T7k = T7i - T7j;
+	       T7o = T7i + T7j;
+	       {
+		    E T7d, T7h, T7l, T7n;
+		    T7d = W[8];
+		    T7h = W[9];
+		    rio[WS(vs, 5) + WS(is, 5)] = FMA(T7d, T7g, T7h * T7k);
+		    iio[WS(vs, 5) + WS(is, 5)] = FNMS(T7h, T7g, T7d * T7k);
+		    T7l = W[0];
+		    T7n = W[1];
+		    rio[WS(vs, 1) + WS(is, 5)] = FMA(T7l, T7m, T7n * T7o);
+		    iio[WS(vs, 1) + WS(is, 5)] = FNMS(T7n, T7m, T7l * T7o);
+	       }
+	  }
+	  {
+	       E T8g, T8s, T8q, T8u, T8f, T8p;
+	       T8f = KP707106781 * (T89 - T8e);
+	       T8g = T84 - T8f;
+	       T8s = T84 + T8f;
+	       T8p = KP707106781 * (T8n - T8o);
+	       T8q = T8m - T8p;
+	       T8u = T8m + T8p;
+	       {
+		    E T7Z, T8h, T8r, T8t;
+		    T7Z = W[12];
+		    T8h = W[13];
+		    iio[WS(vs, 7) + WS(is, 6)] = FNMS(T8h, T8q, T7Z * T8g);
+		    rio[WS(vs, 7) + WS(is, 6)] = FMA(T8h, T8g, T7Z * T8q);
+		    T8r = W[4];
+		    T8t = W[5];
+		    iio[WS(vs, 3) + WS(is, 6)] = FNMS(T8t, T8u, T8r * T8s);
+		    rio[WS(vs, 3) + WS(is, 6)] = FMA(T8t, T8s, T8r * T8u);
+	       }
+	  }
+	  {
+	       E T4G, T4M, T4K, T4O, T4F, T4J;
+	       T4F = KP707106781 * (T4m + T4h);
+	       T4G = T4E - T4F;
+	       T4M = T4E + T4F;
+	       T4J = KP707106781 * (T4v + T4w);
+	       T4K = T4I - T4J;
+	       T4O = T4I + T4J;
+	       {
+		    E T4D, T4H, T4L, T4N;
+		    T4D = W[8];
+		    T4H = W[9];
+		    rio[WS(vs, 5) + WS(is, 3)] = FMA(T4D, T4G, T4H * T4K);
+		    iio[WS(vs, 5) + WS(is, 3)] = FNMS(T4H, T4G, T4D * T4K);
+		    T4L = W[0];
+		    T4N = W[1];
+		    rio[WS(vs, 1) + WS(is, 3)] = FMA(T4L, T4M, T4N * T4O);
+		    iio[WS(vs, 1) + WS(is, 3)] = FNMS(T4N, T4M, T4L * T4O);
+	       }
+	  }
+	  {
+	       E TO, TU, TS, TW, TN, TR;
+	       TN = KP707106781 * (Tu + Tp);
+	       TO = TM - TN;
+	       TU = TM + TN;
+	       TR = KP707106781 * (TD + TE);
+	       TS = TQ - TR;
+	       TW = TQ + TR;
+	       {
+		    E TL, TP, TT, TV;
+		    TL = W[8];
+		    TP = W[9];
+		    rio[WS(vs, 5)] = FMA(TL, TO, TP * TS);
+		    iio[WS(vs, 5)] = FNMS(TP, TO, TL * TS);
+		    TT = W[0];
+		    TV = W[1];
+		    rio[WS(vs, 1)] = FMA(TT, TU, TV * TW);
+		    iio[WS(vs, 1)] = FNMS(TV, TU, TT * TW);
+	       }
+	  }
+	  {
+	       E T26, T2c, T2a, T2e, T25, T29;
+	       T25 = KP707106781 * (T1M + T1H);
+	       T26 = T24 - T25;
+	       T2c = T24 + T25;
+	       T29 = KP707106781 * (T1V + T1W);
+	       T2a = T28 - T29;
+	       T2e = T28 + T29;
+	       {
+		    E T23, T27, T2b, T2d;
+		    T23 = W[8];
+		    T27 = W[9];
+		    rio[WS(vs, 5) + WS(is, 1)] = FMA(T23, T26, T27 * T2a);
+		    iio[WS(vs, 5) + WS(is, 1)] = FNMS(T27, T26, T23 * T2a);
+		    T2b = W[0];
+		    T2d = W[1];
+		    rio[WS(vs, 1) + WS(is, 1)] = FMA(T2b, T2c, T2d * T2e);
+		    iio[WS(vs, 1) + WS(is, 1)] = FNMS(T2d, T2c, T2b * T2e);
+	       }
+	  }
+	  {
+	       E T9y, T9K, T9I, T9M, T9x, T9H;
+	       T9x = KP707106781 * (T9r - T9w);
+	       T9y = T9m - T9x;
+	       T9K = T9m + T9x;
+	       T9H = KP707106781 * (T9F - T9G);
+	       T9I = T9E - T9H;
+	       T9M = T9E + T9H;
+	       {
+		    E T9h, T9z, T9J, T9L;
+		    T9h = W[12];
+		    T9z = W[13];
+		    iio[WS(vs, 7) + WS(is, 7)] = FNMS(T9z, T9I, T9h * T9y);
+		    rio[WS(vs, 7) + WS(is, 7)] = FMA(T9z, T9y, T9h * T9I);
+		    T9J = W[4];
+		    T9L = W[5];
+		    iio[WS(vs, 3) + WS(is, 7)] = FNMS(T9L, T9M, T9J * T9K);
+		    rio[WS(vs, 3) + WS(is, 7)] = FMA(T9L, T9K, T9J * T9M);
+	       }
+	  }
+	  {
+	       E T6Y, T7a, T78, T7c, T6X, T77;
+	       T6X = KP707106781 * (T6R - T6W);
+	       T6Y = T6M - T6X;
+	       T7a = T6M + T6X;
+	       T77 = KP707106781 * (T75 - T76);
+	       T78 = T74 - T77;
+	       T7c = T74 + T77;
+	       {
+		    E T6H, T6Z, T79, T7b;
+		    T6H = W[12];
+		    T6Z = W[13];
+		    iio[WS(vs, 7) + WS(is, 5)] = FNMS(T6Z, T78, T6H * T6Y);
+		    rio[WS(vs, 7) + WS(is, 5)] = FMA(T6Z, T6Y, T6H * T78);
+		    T79 = W[4];
+		    T7b = W[5];
+		    iio[WS(vs, 3) + WS(is, 5)] = FNMS(T7b, T7c, T79 * T7a);
+		    rio[WS(vs, 3) + WS(is, 5)] = FMA(T7b, T7a, T79 * T7c);
+	       }
+	  }
+	  {
+	       E T1O, T20, T1Y, T22, T1N, T1X;
+	       T1N = KP707106781 * (T1H - T1M);
+	       T1O = T1C - T1N;
+	       T20 = T1C + T1N;
+	       T1X = KP707106781 * (T1V - T1W);
+	       T1Y = T1U - T1X;
+	       T22 = T1U + T1X;
+	       {
+		    E T1x, T1P, T1Z, T21;
+		    T1x = W[12];
+		    T1P = W[13];
+		    iio[WS(vs, 7) + WS(is, 1)] = FNMS(T1P, T1Y, T1x * T1O);
+		    rio[WS(vs, 7) + WS(is, 1)] = FMA(T1P, T1O, T1x * T1Y);
+		    T1Z = W[4];
+		    T21 = W[5];
+		    iio[WS(vs, 3) + WS(is, 1)] = FNMS(T21, T22, T1Z * T20);
+		    rio[WS(vs, 3) + WS(is, 1)] = FMA(T21, T20, T1Z * T22);
+	       }
+	  }
+	  {
+	       E T4o, T4A, T4y, T4C, T4n, T4x;
+	       T4n = KP707106781 * (T4h - T4m);
+	       T4o = T4c - T4n;
+	       T4A = T4c + T4n;
+	       T4x = KP707106781 * (T4v - T4w);
+	       T4y = T4u - T4x;
+	       T4C = T4u + T4x;
+	       {
+		    E T47, T4p, T4z, T4B;
+		    T47 = W[12];
+		    T4p = W[13];
+		    iio[WS(vs, 7) + WS(is, 3)] = FNMS(T4p, T4y, T47 * T4o);
+		    rio[WS(vs, 7) + WS(is, 3)] = FMA(T4p, T4o, T47 * T4y);
+		    T4z = W[4];
+		    T4B = W[5];
+		    iio[WS(vs, 3) + WS(is, 3)] = FNMS(T4B, T4C, T4z * T4A);
+		    rio[WS(vs, 3) + WS(is, 3)] = FMA(T4B, T4A, T4z * T4C);
+	       }
+	  }
+	  {
+	       E T3o, T3u, T3s, T3w, T3n, T3r;
+	       T3n = KP707106781 * (T34 + T2Z);
+	       T3o = T3m - T3n;
+	       T3u = T3m + T3n;
+	       T3r = KP707106781 * (T3d + T3e);
+	       T3s = T3q - T3r;
+	       T3w = T3q + T3r;
+	       {
+		    E T3l, T3p, T3t, T3v;
+		    T3l = W[8];
+		    T3p = W[9];
+		    rio[WS(vs, 5) + WS(is, 2)] = FMA(T3l, T3o, T3p * T3s);
+		    iio[WS(vs, 5) + WS(is, 2)] = FNMS(T3p, T3o, T3l * T3s);
+		    T3t = W[0];
+		    T3v = W[1];
+		    rio[WS(vs, 1) + WS(is, 2)] = FMA(T3t, T3u, T3v * T3w);
+		    iio[WS(vs, 1) + WS(is, 2)] = FNMS(T3v, T3u, T3t * T3w);
+	       }
+	  }
+	  {
+	       E T8y, T8E, T8C, T8G, T8x, T8B;
+	       T8x = KP707106781 * (T8e + T89);
+	       T8y = T8w - T8x;
+	       T8E = T8w + T8x;
+	       T8B = KP707106781 * (T8n + T8o);
+	       T8C = T8A - T8B;
+	       T8G = T8A + T8B;
+	       {
+		    E T8v, T8z, T8D, T8F;
+		    T8v = W[8];
+		    T8z = W[9];
+		    rio[WS(vs, 5) + WS(is, 6)] = FMA(T8v, T8y, T8z * T8C);
+		    iio[WS(vs, 5) + WS(is, 6)] = FNMS(T8z, T8y, T8v * T8C);
+		    T8D = W[0];
+		    T8F = W[1];
+		    rio[WS(vs, 1) + WS(is, 6)] = FMA(T8D, T8E, T8F * T8G);
+		    iio[WS(vs, 1) + WS(is, 6)] = FNMS(T8F, T8E, T8D * T8G);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 8},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 8, "q1_8", twinstr, {416, 144, 112, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_q1_8) (planner *p) {
+     X(kdft_difsq_register) (p, q1_8, &desc);
+}
diff --git a/src/fftw3/dft/codelets/n.c b/src/fftw3/dft/codelets/n.c
new file mode 100644
index 0000000..22f0528
--- /dev/null
+++ b/src/fftw3/dft/codelets/n.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "codelet-dft.h"
+#include "n.h"
+
+static int okp(const kdft_desc *d,
+	       const R *ri, const R *ii, 
+	       const R *ro, const R *io,
+	       int is, int os, int vl, int ivs, int ovs,
+	       const planner *plnr)
+{
+     UNUSED(ri); UNUSED(ii); UNUSED(ro); UNUSED(io); UNUSED(vl); UNUSED(plnr);
+     return (1
+	     && (!d->is || (d->is == is))
+	     && (!d->os || (d->os == os))
+	     && (!d->ivs || (d->ivs == ivs))
+	     && (!d->ovs || (d->ovs == ovs))
+	  );
+}
+
+const kdft_genus GENUS = { okp, 1 };
diff --git a/src/fftw3/dft/codelets/n.h b/src/fftw3/dft/codelets/n.h
new file mode 100644
index 0000000..249c1c9
--- /dev/null
+++ b/src/fftw3/dft/codelets/n.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(dft_n_genus)
+extern const kdft_genus GENUS;
diff --git a/src/fftw3/dft/codelets/q.h b/src/fftw3/dft/codelets/q.h
new file mode 100644
index 0000000..6a9dd0b
--- /dev/null
+++ b/src/fftw3/dft/codelets/q.h
@@ -0,0 +1 @@
+#include "t.h"  /* same stuff, no need to duplicate */
diff --git a/src/fftw3/dft/codelets/standard/m1_16.c b/src/fftw3/dft/codelets/standard/m1_16.c
new file mode 100644
index 0000000..9882bce
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/m1_16.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:37 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw_noinline -compact -variables 4 -n 16 -name m1_16 -include n.h */
+
+/*
+ * This function contains 144 FP additions, 24 FP multiplications,
+ * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
+ * 49 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: m1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: m1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: m1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void m1_16_0(const R *ri, const R *ii, R *ro, R *io, stride is, stride os)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
+	  E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
+	  E T1U, T1A;
+	  {
+	       E T3, TL, Ty, T1k, T6, T1j, TB, TM;
+	       {
+		    E T1, T2, Tw, Tx;
+		    T1 = ri[0];
+		    T2 = ri[WS(is, 8)];
+		    T3 = T1 + T2;
+		    TL = T1 - T2;
+		    Tw = ii[0];
+		    Tx = ii[WS(is, 8)];
+		    Ty = Tw + Tx;
+		    T1k = Tw - Tx;
+	       }
+	       {
+		    E T4, T5, Tz, TA;
+		    T4 = ri[WS(is, 4)];
+		    T5 = ri[WS(is, 12)];
+		    T6 = T4 + T5;
+		    T1j = T4 - T5;
+		    Tz = ii[WS(is, 4)];
+		    TA = ii[WS(is, 12)];
+		    TB = Tz + TA;
+		    TM = Tz - TA;
+	       }
+	       T7 = T3 + T6;
+	       T1R = T3 - T6;
+	       T25 = Ty - TB;
+	       TC = Ty + TB;
+	       TN = TL - TM;
+	       T1x = TL + TM;
+	       T1H = T1k - T1j;
+	       T1l = T1j + T1k;
+	  }
+	  {
+	       E Tp, T17, T1f, T20, Ts, T1c, T1a, T21;
+	       {
+		    E Tn, To, T1d, T1e;
+		    Tn = ri[WS(is, 15)];
+		    To = ri[WS(is, 7)];
+		    Tp = Tn + To;
+		    T17 = Tn - To;
+		    T1d = ii[WS(is, 15)];
+		    T1e = ii[WS(is, 7)];
+		    T1f = T1d - T1e;
+		    T20 = T1d + T1e;
+	       }
+	       {
+		    E Tq, Tr, T18, T19;
+		    Tq = ri[WS(is, 3)];
+		    Tr = ri[WS(is, 11)];
+		    Ts = Tq + Tr;
+		    T1c = Tq - Tr;
+		    T18 = ii[WS(is, 3)];
+		    T19 = ii[WS(is, 11)];
+		    T1a = T18 - T19;
+		    T21 = T18 + T19;
+	       }
+	       Tt = Tp + Ts;
+	       T22 = T20 - T21;
+	       T2h = T20 + T21;
+	       T1b = T17 - T1a;
+	       T1g = T1c + T1f;
+	       T1E = T1f - T1c;
+	       T1Z = Tp - Ts;
+	       T1D = T17 + T1a;
+	  }
+	  {
+	       E Ta, TP, TF, TO, Td, TR, TI, TS;
+	       {
+		    E T8, T9, TD, TE;
+		    T8 = ri[WS(is, 2)];
+		    T9 = ri[WS(is, 10)];
+		    Ta = T8 + T9;
+		    TP = T8 - T9;
+		    TD = ii[WS(is, 2)];
+		    TE = ii[WS(is, 10)];
+		    TF = TD + TE;
+		    TO = TD - TE;
+	       }
+	       {
+		    E Tb, Tc, TG, TH;
+		    Tb = ri[WS(is, 14)];
+		    Tc = ri[WS(is, 6)];
+		    Td = Tb + Tc;
+		    TR = Tb - Tc;
+		    TG = ii[WS(is, 14)];
+		    TH = ii[WS(is, 6)];
+		    TI = TG + TH;
+		    TS = TG - TH;
+	       }
+	       Te = Ta + Td;
+	       T1S = TF - TI;
+	       T26 = Td - Ta;
+	       TJ = TF + TI;
+	       TQ = TO - TP;
+	       T1m = TR - TS;
+	       T1n = TP + TO;
+	       TT = TR + TS;
+	  }
+	  {
+	       E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
+	       {
+		    E Tg, Th, TX, TY;
+		    Tg = ri[WS(is, 1)];
+		    Th = ri[WS(is, 9)];
+		    Ti = Tg + Th;
+		    T11 = Tg - Th;
+		    TX = ii[WS(is, 1)];
+		    TY = ii[WS(is, 9)];
+		    TZ = TX - TY;
+		    T1V = TX + TY;
+	       }
+	       {
+		    E Tj, Tk, T12, T13;
+		    Tj = ri[WS(is, 5)];
+		    Tk = ri[WS(is, 13)];
+		    Tl = Tj + Tk;
+		    TW = Tj - Tk;
+		    T12 = ii[WS(is, 5)];
+		    T13 = ii[WS(is, 13)];
+		    T14 = T12 - T13;
+		    T1W = T12 + T13;
+	       }
+	       Tm = Ti + Tl;
+	       T1X = T1V - T1W;
+	       T2g = T1V + T1W;
+	       T10 = TW + TZ;
+	       T15 = T11 - T14;
+	       T1B = T11 + T14;
+	       T1U = Ti - Tl;
+	       T1A = TZ - TW;
+	  }
+	  {
+	       E Tf, Tu, T2j, T2k;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       ro[WS(os, 8)] = Tf - Tu;
+	       ro[0] = Tf + Tu;
+	       T2j = TC + TJ;
+	       T2k = T2g + T2h;
+	       io[WS(os, 8)] = T2j - T2k;
+	       io[0] = T2j + T2k;
+	  }
+	  {
+	       E Tv, TK, T2f, T2i;
+	       Tv = Tt - Tm;
+	       TK = TC - TJ;
+	       io[WS(os, 4)] = Tv + TK;
+	       io[WS(os, 12)] = TK - Tv;
+	       T2f = T7 - Te;
+	       T2i = T2g - T2h;
+	       ro[WS(os, 12)] = T2f - T2i;
+	       ro[WS(os, 4)] = T2f + T2i;
+	  }
+	  {
+	       E T1T, T27, T24, T28, T1Y, T23;
+	       T1T = T1R + T1S;
+	       T27 = T25 - T26;
+	       T1Y = T1U + T1X;
+	       T23 = T1Z - T22;
+	       T24 = KP707106781 * (T1Y + T23);
+	       T28 = KP707106781 * (T23 - T1Y);
+	       ro[WS(os, 10)] = T1T - T24;
+	       io[WS(os, 6)] = T27 + T28;
+	       ro[WS(os, 2)] = T1T + T24;
+	       io[WS(os, 14)] = T27 - T28;
+	  }
+	  {
+	       E T29, T2d, T2c, T2e, T2a, T2b;
+	       T29 = T1R - T1S;
+	       T2d = T26 + T25;
+	       T2a = T1X - T1U;
+	       T2b = T1Z + T22;
+	       T2c = KP707106781 * (T2a - T2b);
+	       T2e = KP707106781 * (T2a + T2b);
+	       ro[WS(os, 14)] = T29 - T2c;
+	       io[WS(os, 2)] = T2d + T2e;
+	       ro[WS(os, 6)] = T29 + T2c;
+	       io[WS(os, 10)] = T2d - T2e;
+	  }
+	  {
+	       E TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o;
+	       TU = KP707106781 * (TQ - TT);
+	       TV = TN + TU;
+	       T1r = TN - TU;
+	       T1o = KP707106781 * (T1m - T1n);
+	       T1p = T1l - T1o;
+	       T1v = T1l + T1o;
+	       {
+		    E T16, T1h, T1s, T1t;
+		    T16 = FMA(KP923879532, T10, KP382683432 * T15);
+		    T1h = FNMS(KP923879532, T1g, KP382683432 * T1b);
+		    T1i = T16 + T1h;
+		    T1q = T1h - T16;
+		    T1s = FNMS(KP923879532, T15, KP382683432 * T10);
+		    T1t = FMA(KP382683432, T1g, KP923879532 * T1b);
+		    T1u = T1s - T1t;
+		    T1w = T1s + T1t;
+	       }
+	       ro[WS(os, 11)] = TV - T1i;
+	       io[WS(os, 11)] = T1v - T1w;
+	       ro[WS(os, 3)] = TV + T1i;
+	       io[WS(os, 3)] = T1v + T1w;
+	       io[WS(os, 15)] = T1p - T1q;
+	       ro[WS(os, 15)] = T1r - T1u;
+	       io[WS(os, 7)] = T1p + T1q;
+	       ro[WS(os, 7)] = T1r + T1u;
+	  }
+	  {
+	       E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
+	       T1y = KP707106781 * (T1n + T1m);
+	       T1z = T1x + T1y;
+	       T1L = T1x - T1y;
+	       T1I = KP707106781 * (TQ + TT);
+	       T1J = T1H - T1I;
+	       T1P = T1H + T1I;
+	       {
+		    E T1C, T1F, T1M, T1N;
+		    T1C = FMA(KP382683432, T1A, KP923879532 * T1B);
+		    T1F = FNMS(KP382683432, T1E, KP923879532 * T1D);
+		    T1G = T1C + T1F;
+		    T1K = T1F - T1C;
+		    T1M = FNMS(KP382683432, T1B, KP923879532 * T1A);
+		    T1N = FMA(KP923879532, T1E, KP382683432 * T1D);
+		    T1O = T1M - T1N;
+		    T1Q = T1M + T1N;
+	       }
+	       ro[WS(os, 9)] = T1z - T1G;
+	       io[WS(os, 9)] = T1P - T1Q;
+	       ro[WS(os, 1)] = T1z + T1G;
+	       io[WS(os, 1)] = T1P + T1Q;
+	       io[WS(os, 13)] = T1J - T1K;
+	       ro[WS(os, 13)] = T1L - T1O;
+	       io[WS(os, 5)] = T1J + T1K;
+	       ro[WS(os, 5)] = T1L + T1O;
+	  }
+     }
+}
+
+static void m1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i -= 1) {
+	  m1_16_0(ri, ii, ro, io, is, os);
+	  ri += ivs;
+	  ii += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kdft_desc desc = { 16, "m1_16", {136, 16, 8, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_m1_16) (planner *p) {
+     X(kdft_register) (p, m1_16, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/m1_32.c b/src/fftw3/dft/codelets/standard/m1_32.c
new file mode 100644
index 0000000..aa75d21
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/m1_32.c
@@ -0,0 +1,684 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:38 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw_noinline -compact -variables 4 -n 32 -name m1_32 -include n.h */
+
+/*
+ * This function contains 372 FP additions, 84 FP multiplications,
+ * (or, 340 additions, 52 multiplications, 32 fused multiply/add),
+ * 99 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: m1_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: m1_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: m1_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void m1_32_0(const R *ri, const R *ii, R *ro, R *io, stride is, stride os)
+{
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T7, T4r, T4Z, T18, T1z, T3t, T3T, T2T, Te, T1f, T50, T4s, T2W, T3u, T1G;
+	  E T3U, Tm, T1n, T1O, T2Z, T3y, T3X, T4w, T53, Tt, T1u, T1V, T2Y, T3B, T3W;
+	  E T4z, T52, T2t, T3L, T3O, T2K, TR, TY, T5F, T5G, T5H, T5I, T4R, T5j, T2E;
+	  E T3P, T4W, T5k, T2N, T3M, T22, T3E, T3H, T2j, TC, TJ, T5A, T5B, T5C, T5D;
+	  E T4G, T5g, T2d, T3F, T4L, T5h, T2m, T3I;
+	  {
+	       E T3, T1x, T14, T2S, T6, T2R, T17, T1y;
+	       {
+		    E T1, T2, T12, T13;
+		    T1 = ri[0];
+		    T2 = ri[WS(is, 16)];
+		    T3 = T1 + T2;
+		    T1x = T1 - T2;
+		    T12 = ii[0];
+		    T13 = ii[WS(is, 16)];
+		    T14 = T12 + T13;
+		    T2S = T12 - T13;
+	       }
+	       {
+		    E T4, T5, T15, T16;
+		    T4 = ri[WS(is, 8)];
+		    T5 = ri[WS(is, 24)];
+		    T6 = T4 + T5;
+		    T2R = T4 - T5;
+		    T15 = ii[WS(is, 8)];
+		    T16 = ii[WS(is, 24)];
+		    T17 = T15 + T16;
+		    T1y = T15 - T16;
+	       }
+	       T7 = T3 + T6;
+	       T4r = T3 - T6;
+	       T4Z = T14 - T17;
+	       T18 = T14 + T17;
+	       T1z = T1x - T1y;
+	       T3t = T1x + T1y;
+	       T3T = T2S - T2R;
+	       T2T = T2R + T2S;
+	  }
+	  {
+	       E Ta, T1B, T1b, T1A, Td, T1D, T1e, T1E;
+	       {
+		    E T8, T9, T19, T1a;
+		    T8 = ri[WS(is, 4)];
+		    T9 = ri[WS(is, 20)];
+		    Ta = T8 + T9;
+		    T1B = T8 - T9;
+		    T19 = ii[WS(is, 4)];
+		    T1a = ii[WS(is, 20)];
+		    T1b = T19 + T1a;
+		    T1A = T19 - T1a;
+	       }
+	       {
+		    E Tb, Tc, T1c, T1d;
+		    Tb = ri[WS(is, 28)];
+		    Tc = ri[WS(is, 12)];
+		    Td = Tb + Tc;
+		    T1D = Tb - Tc;
+		    T1c = ii[WS(is, 28)];
+		    T1d = ii[WS(is, 12)];
+		    T1e = T1c + T1d;
+		    T1E = T1c - T1d;
+	       }
+	       Te = Ta + Td;
+	       T1f = T1b + T1e;
+	       T50 = Td - Ta;
+	       T4s = T1b - T1e;
+	       {
+		    E T2U, T2V, T1C, T1F;
+		    T2U = T1D - T1E;
+		    T2V = T1B + T1A;
+		    T2W = KP707106781 * (T2U - T2V);
+		    T3u = KP707106781 * (T2V + T2U);
+		    T1C = T1A - T1B;
+		    T1F = T1D + T1E;
+		    T1G = KP707106781 * (T1C - T1F);
+		    T3U = KP707106781 * (T1C + T1F);
+	       }
+	  }
+	  {
+	       E Ti, T1L, T1j, T1J, Tl, T1I, T1m, T1M, T1K, T1N;
+	       {
+		    E Tg, Th, T1h, T1i;
+		    Tg = ri[WS(is, 2)];
+		    Th = ri[WS(is, 18)];
+		    Ti = Tg + Th;
+		    T1L = Tg - Th;
+		    T1h = ii[WS(is, 2)];
+		    T1i = ii[WS(is, 18)];
+		    T1j = T1h + T1i;
+		    T1J = T1h - T1i;
+	       }
+	       {
+		    E Tj, Tk, T1k, T1l;
+		    Tj = ri[WS(is, 10)];
+		    Tk = ri[WS(is, 26)];
+		    Tl = Tj + Tk;
+		    T1I = Tj - Tk;
+		    T1k = ii[WS(is, 10)];
+		    T1l = ii[WS(is, 26)];
+		    T1m = T1k + T1l;
+		    T1M = T1k - T1l;
+	       }
+	       Tm = Ti + Tl;
+	       T1n = T1j + T1m;
+	       T1K = T1I + T1J;
+	       T1N = T1L - T1M;
+	       T1O = FNMS(KP923879532, T1N, KP382683432 * T1K);
+	       T2Z = FMA(KP923879532, T1K, KP382683432 * T1N);
+	       {
+		    E T3w, T3x, T4u, T4v;
+		    T3w = T1J - T1I;
+		    T3x = T1L + T1M;
+		    T3y = FNMS(KP382683432, T3x, KP923879532 * T3w);
+		    T3X = FMA(KP382683432, T3w, KP923879532 * T3x);
+		    T4u = T1j - T1m;
+		    T4v = Ti - Tl;
+		    T4w = T4u - T4v;
+		    T53 = T4v + T4u;
+	       }
+	  }
+	  {
+	       E Tp, T1S, T1q, T1Q, Ts, T1P, T1t, T1T, T1R, T1U;
+	       {
+		    E Tn, To, T1o, T1p;
+		    Tn = ri[WS(is, 30)];
+		    To = ri[WS(is, 14)];
+		    Tp = Tn + To;
+		    T1S = Tn - To;
+		    T1o = ii[WS(is, 30)];
+		    T1p = ii[WS(is, 14)];
+		    T1q = T1o + T1p;
+		    T1Q = T1o - T1p;
+	       }
+	       {
+		    E Tq, Tr, T1r, T1s;
+		    Tq = ri[WS(is, 6)];
+		    Tr = ri[WS(is, 22)];
+		    Ts = Tq + Tr;
+		    T1P = Tq - Tr;
+		    T1r = ii[WS(is, 6)];
+		    T1s = ii[WS(is, 22)];
+		    T1t = T1r + T1s;
+		    T1T = T1r - T1s;
+	       }
+	       Tt = Tp + Ts;
+	       T1u = T1q + T1t;
+	       T1R = T1P + T1Q;
+	       T1U = T1S - T1T;
+	       T1V = FMA(KP382683432, T1R, KP923879532 * T1U);
+	       T2Y = FNMS(KP923879532, T1R, KP382683432 * T1U);
+	       {
+		    E T3z, T3A, T4x, T4y;
+		    T3z = T1Q - T1P;
+		    T3A = T1S + T1T;
+		    T3B = FMA(KP923879532, T3z, KP382683432 * T3A);
+		    T3W = FNMS(KP382683432, T3z, KP923879532 * T3A);
+		    T4x = Tp - Ts;
+		    T4y = T1q - T1t;
+		    T4z = T4x + T4y;
+		    T52 = T4x - T4y;
+	       }
+	  }
+	  {
+	       E TN, T2p, T2J, T4S, TQ, T2G, T2s, T4T, TU, T2x, T2w, T4O, TX, T2z, T2C;
+	       E T4P;
+	       {
+		    E TL, TM, T2H, T2I;
+		    TL = ri[WS(is, 31)];
+		    TM = ri[WS(is, 15)];
+		    TN = TL + TM;
+		    T2p = TL - TM;
+		    T2H = ii[WS(is, 31)];
+		    T2I = ii[WS(is, 15)];
+		    T2J = T2H - T2I;
+		    T4S = T2H + T2I;
+	       }
+	       {
+		    E TO, TP, T2q, T2r;
+		    TO = ri[WS(is, 7)];
+		    TP = ri[WS(is, 23)];
+		    TQ = TO + TP;
+		    T2G = TO - TP;
+		    T2q = ii[WS(is, 7)];
+		    T2r = ii[WS(is, 23)];
+		    T2s = T2q - T2r;
+		    T4T = T2q + T2r;
+	       }
+	       {
+		    E TS, TT, T2u, T2v;
+		    TS = ri[WS(is, 3)];
+		    TT = ri[WS(is, 19)];
+		    TU = TS + TT;
+		    T2x = TS - TT;
+		    T2u = ii[WS(is, 3)];
+		    T2v = ii[WS(is, 19)];
+		    T2w = T2u - T2v;
+		    T4O = T2u + T2v;
+	       }
+	       {
+		    E TV, TW, T2A, T2B;
+		    TV = ri[WS(is, 27)];
+		    TW = ri[WS(is, 11)];
+		    TX = TV + TW;
+		    T2z = TV - TW;
+		    T2A = ii[WS(is, 27)];
+		    T2B = ii[WS(is, 11)];
+		    T2C = T2A - T2B;
+		    T4P = T2A + T2B;
+	       }
+	       T2t = T2p - T2s;
+	       T3L = T2p + T2s;
+	       T3O = T2J - T2G;
+	       T2K = T2G + T2J;
+	       TR = TN + TQ;
+	       TY = TU + TX;
+	       T5F = TR - TY;
+	       {
+		    E T4N, T4Q, T2y, T2D;
+		    T5G = T4S + T4T;
+		    T5H = T4O + T4P;
+		    T5I = T5G - T5H;
+		    T4N = TN - TQ;
+		    T4Q = T4O - T4P;
+		    T4R = T4N - T4Q;
+		    T5j = T4N + T4Q;
+		    T2y = T2w - T2x;
+		    T2D = T2z + T2C;
+		    T2E = KP707106781 * (T2y - T2D);
+		    T3P = KP707106781 * (T2y + T2D);
+		    {
+			 E T4U, T4V, T2L, T2M;
+			 T4U = T4S - T4T;
+			 T4V = TX - TU;
+			 T4W = T4U - T4V;
+			 T5k = T4V + T4U;
+			 T2L = T2z - T2C;
+			 T2M = T2x + T2w;
+			 T2N = KP707106781 * (T2L - T2M);
+			 T3M = KP707106781 * (T2M + T2L);
+		    }
+	       }
+	  }
+	  {
+	       E Ty, T2f, T21, T4C, TB, T1Y, T2i, T4D, TF, T28, T2b, T4I, TI, T23, T26;
+	       E T4J;
+	       {
+		    E Tw, Tx, T1Z, T20;
+		    Tw = ri[WS(is, 1)];
+		    Tx = ri[WS(is, 17)];
+		    Ty = Tw + Tx;
+		    T2f = Tw - Tx;
+		    T1Z = ii[WS(is, 1)];
+		    T20 = ii[WS(is, 17)];
+		    T21 = T1Z - T20;
+		    T4C = T1Z + T20;
+	       }
+	       {
+		    E Tz, TA, T2g, T2h;
+		    Tz = ri[WS(is, 9)];
+		    TA = ri[WS(is, 25)];
+		    TB = Tz + TA;
+		    T1Y = Tz - TA;
+		    T2g = ii[WS(is, 9)];
+		    T2h = ii[WS(is, 25)];
+		    T2i = T2g - T2h;
+		    T4D = T2g + T2h;
+	       }
+	       {
+		    E TD, TE, T29, T2a;
+		    TD = ri[WS(is, 5)];
+		    TE = ri[WS(is, 21)];
+		    TF = TD + TE;
+		    T28 = TD - TE;
+		    T29 = ii[WS(is, 5)];
+		    T2a = ii[WS(is, 21)];
+		    T2b = T29 - T2a;
+		    T4I = T29 + T2a;
+	       }
+	       {
+		    E TG, TH, T24, T25;
+		    TG = ri[WS(is, 29)];
+		    TH = ri[WS(is, 13)];
+		    TI = TG + TH;
+		    T23 = TG - TH;
+		    T24 = ii[WS(is, 29)];
+		    T25 = ii[WS(is, 13)];
+		    T26 = T24 - T25;
+		    T4J = T24 + T25;
+	       }
+	       T22 = T1Y + T21;
+	       T3E = T2f + T2i;
+	       T3H = T21 - T1Y;
+	       T2j = T2f - T2i;
+	       TC = Ty + TB;
+	       TJ = TF + TI;
+	       T5A = TC - TJ;
+	       {
+		    E T4E, T4F, T27, T2c;
+		    T5B = T4C + T4D;
+		    T5C = T4I + T4J;
+		    T5D = T5B - T5C;
+		    T4E = T4C - T4D;
+		    T4F = TI - TF;
+		    T4G = T4E - T4F;
+		    T5g = T4F + T4E;
+		    T27 = T23 - T26;
+		    T2c = T28 + T2b;
+		    T2d = KP707106781 * (T27 - T2c);
+		    T3F = KP707106781 * (T2c + T27);
+		    {
+			 E T4H, T4K, T2k, T2l;
+			 T4H = Ty - TB;
+			 T4K = T4I - T4J;
+			 T4L = T4H - T4K;
+			 T5h = T4H + T4K;
+			 T2k = T2b - T28;
+			 T2l = T23 + T26;
+			 T2m = KP707106781 * (T2k - T2l);
+			 T3I = KP707106781 * (T2k + T2l);
+		    }
+	       }
+	  }
+	  {
+	       E T4B, T57, T5a, T5c, T4Y, T56, T55, T5b;
+	       {
+		    E T4t, T4A, T58, T59;
+		    T4t = T4r - T4s;
+		    T4A = KP707106781 * (T4w - T4z);
+		    T4B = T4t + T4A;
+		    T57 = T4t - T4A;
+		    T58 = FNMS(KP923879532, T4L, KP382683432 * T4G);
+		    T59 = FMA(KP382683432, T4W, KP923879532 * T4R);
+		    T5a = T58 - T59;
+		    T5c = T58 + T59;
+	       }
+	       {
+		    E T4M, T4X, T51, T54;
+		    T4M = FMA(KP923879532, T4G, KP382683432 * T4L);
+		    T4X = FNMS(KP923879532, T4W, KP382683432 * T4R);
+		    T4Y = T4M + T4X;
+		    T56 = T4X - T4M;
+		    T51 = T4Z - T50;
+		    T54 = KP707106781 * (T52 - T53);
+		    T55 = T51 - T54;
+		    T5b = T51 + T54;
+	       }
+	       ro[WS(os, 22)] = T4B - T4Y;
+	       io[WS(os, 22)] = T5b - T5c;
+	       ro[WS(os, 6)] = T4B + T4Y;
+	       io[WS(os, 6)] = T5b + T5c;
+	       io[WS(os, 30)] = T55 - T56;
+	       ro[WS(os, 30)] = T57 - T5a;
+	       io[WS(os, 14)] = T55 + T56;
+	       ro[WS(os, 14)] = T57 + T5a;
+	  }
+	  {
+	       E T5f, T5r, T5u, T5w, T5m, T5q, T5p, T5v;
+	       {
+		    E T5d, T5e, T5s, T5t;
+		    T5d = T4r + T4s;
+		    T5e = KP707106781 * (T53 + T52);
+		    T5f = T5d + T5e;
+		    T5r = T5d - T5e;
+		    T5s = FNMS(KP382683432, T5h, KP923879532 * T5g);
+		    T5t = FMA(KP923879532, T5k, KP382683432 * T5j);
+		    T5u = T5s - T5t;
+		    T5w = T5s + T5t;
+	       }
+	       {
+		    E T5i, T5l, T5n, T5o;
+		    T5i = FMA(KP382683432, T5g, KP923879532 * T5h);
+		    T5l = FNMS(KP382683432, T5k, KP923879532 * T5j);
+		    T5m = T5i + T5l;
+		    T5q = T5l - T5i;
+		    T5n = T50 + T4Z;
+		    T5o = KP707106781 * (T4w + T4z);
+		    T5p = T5n - T5o;
+		    T5v = T5n + T5o;
+	       }
+	       ro[WS(os, 18)] = T5f - T5m;
+	       io[WS(os, 18)] = T5v - T5w;
+	       ro[WS(os, 2)] = T5f + T5m;
+	       io[WS(os, 2)] = T5v + T5w;
+	       io[WS(os, 26)] = T5p - T5q;
+	       ro[WS(os, 26)] = T5r - T5u;
+	       io[WS(os, 10)] = T5p + T5q;
+	       ro[WS(os, 10)] = T5r + T5u;
+	  }
+	  {
+	       E T5z, T5P, T5S, T5U, T5K, T5O, T5N, T5T;
+	       {
+		    E T5x, T5y, T5Q, T5R;
+		    T5x = T7 - Te;
+		    T5y = T1n - T1u;
+		    T5z = T5x + T5y;
+		    T5P = T5x - T5y;
+		    T5Q = T5D - T5A;
+		    T5R = T5F + T5I;
+		    T5S = KP707106781 * (T5Q - T5R);
+		    T5U = KP707106781 * (T5Q + T5R);
+	       }
+	       {
+		    E T5E, T5J, T5L, T5M;
+		    T5E = T5A + T5D;
+		    T5J = T5F - T5I;
+		    T5K = KP707106781 * (T5E + T5J);
+		    T5O = KP707106781 * (T5J - T5E);
+		    T5L = T18 - T1f;
+		    T5M = Tt - Tm;
+		    T5N = T5L - T5M;
+		    T5T = T5M + T5L;
+	       }
+	       ro[WS(os, 20)] = T5z - T5K;
+	       io[WS(os, 20)] = T5T - T5U;
+	       ro[WS(os, 4)] = T5z + T5K;
+	       io[WS(os, 4)] = T5T + T5U;
+	       io[WS(os, 28)] = T5N - T5O;
+	       ro[WS(os, 28)] = T5P - T5S;
+	       io[WS(os, 12)] = T5N + T5O;
+	       ro[WS(os, 12)] = T5P + T5S;
+	  }
+	  {
+	       E Tv, T5V, T5Y, T60, T10, T11, T1w, T5Z;
+	       {
+		    E Tf, Tu, T5W, T5X;
+		    Tf = T7 + Te;
+		    Tu = Tm + Tt;
+		    Tv = Tf + Tu;
+		    T5V = Tf - Tu;
+		    T5W = T5B + T5C;
+		    T5X = T5G + T5H;
+		    T5Y = T5W - T5X;
+		    T60 = T5W + T5X;
+	       }
+	       {
+		    E TK, TZ, T1g, T1v;
+		    TK = TC + TJ;
+		    TZ = TR + TY;
+		    T10 = TK + TZ;
+		    T11 = TZ - TK;
+		    T1g = T18 + T1f;
+		    T1v = T1n + T1u;
+		    T1w = T1g - T1v;
+		    T5Z = T1g + T1v;
+	       }
+	       ro[WS(os, 16)] = Tv - T10;
+	       io[WS(os, 16)] = T5Z - T60;
+	       ro[0] = Tv + T10;
+	       io[0] = T5Z + T60;
+	       io[WS(os, 8)] = T11 + T1w;
+	       ro[WS(os, 8)] = T5V + T5Y;
+	       io[WS(os, 24)] = T1w - T11;
+	       ro[WS(os, 24)] = T5V - T5Y;
+	  }
+	  {
+	       E T1X, T33, T31, T37, T2o, T34, T2P, T35;
+	       {
+		    E T1H, T1W, T2X, T30;
+		    T1H = T1z - T1G;
+		    T1W = T1O - T1V;
+		    T1X = T1H + T1W;
+		    T33 = T1H - T1W;
+		    T2X = T2T - T2W;
+		    T30 = T2Y - T2Z;
+		    T31 = T2X - T30;
+		    T37 = T2X + T30;
+	       }
+	       {
+		    E T2e, T2n, T2F, T2O;
+		    T2e = T22 - T2d;
+		    T2n = T2j - T2m;
+		    T2o = FMA(KP980785280, T2e, KP195090322 * T2n);
+		    T34 = FNMS(KP980785280, T2n, KP195090322 * T2e);
+		    T2F = T2t - T2E;
+		    T2O = T2K - T2N;
+		    T2P = FNMS(KP980785280, T2O, KP195090322 * T2F);
+		    T35 = FMA(KP195090322, T2O, KP980785280 * T2F);
+	       }
+	       {
+		    E T2Q, T38, T32, T36;
+		    T2Q = T2o + T2P;
+		    ro[WS(os, 23)] = T1X - T2Q;
+		    ro[WS(os, 7)] = T1X + T2Q;
+		    T38 = T34 + T35;
+		    io[WS(os, 23)] = T37 - T38;
+		    io[WS(os, 7)] = T37 + T38;
+		    T32 = T2P - T2o;
+		    io[WS(os, 31)] = T31 - T32;
+		    io[WS(os, 15)] = T31 + T32;
+		    T36 = T34 - T35;
+		    ro[WS(os, 31)] = T33 - T36;
+		    ro[WS(os, 15)] = T33 + T36;
+	       }
+	  }
+	  {
+	       E T3D, T41, T3Z, T45, T3K, T42, T3R, T43;
+	       {
+		    E T3v, T3C, T3V, T3Y;
+		    T3v = T3t - T3u;
+		    T3C = T3y - T3B;
+		    T3D = T3v + T3C;
+		    T41 = T3v - T3C;
+		    T3V = T3T - T3U;
+		    T3Y = T3W - T3X;
+		    T3Z = T3V - T3Y;
+		    T45 = T3V + T3Y;
+	       }
+	       {
+		    E T3G, T3J, T3N, T3Q;
+		    T3G = T3E - T3F;
+		    T3J = T3H - T3I;
+		    T3K = FMA(KP555570233, T3G, KP831469612 * T3J);
+		    T42 = FNMS(KP831469612, T3G, KP555570233 * T3J);
+		    T3N = T3L - T3M;
+		    T3Q = T3O - T3P;
+		    T3R = FNMS(KP831469612, T3Q, KP555570233 * T3N);
+		    T43 = FMA(KP831469612, T3N, KP555570233 * T3Q);
+	       }
+	       {
+		    E T3S, T46, T40, T44;
+		    T3S = T3K + T3R;
+		    ro[WS(os, 21)] = T3D - T3S;
+		    ro[WS(os, 5)] = T3D + T3S;
+		    T46 = T42 + T43;
+		    io[WS(os, 21)] = T45 - T46;
+		    io[WS(os, 5)] = T45 + T46;
+		    T40 = T3R - T3K;
+		    io[WS(os, 29)] = T3Z - T40;
+		    io[WS(os, 13)] = T3Z + T40;
+		    T44 = T42 - T43;
+		    ro[WS(os, 29)] = T41 - T44;
+		    ro[WS(os, 13)] = T41 + T44;
+	       }
+	  }
+	  {
+	       E T49, T4l, T4j, T4p, T4c, T4m, T4f, T4n;
+	       {
+		    E T47, T48, T4h, T4i;
+		    T47 = T3t + T3u;
+		    T48 = T3X + T3W;
+		    T49 = T47 + T48;
+		    T4l = T47 - T48;
+		    T4h = T3T + T3U;
+		    T4i = T3y + T3B;
+		    T4j = T4h - T4i;
+		    T4p = T4h + T4i;
+	       }
+	       {
+		    E T4a, T4b, T4d, T4e;
+		    T4a = T3E + T3F;
+		    T4b = T3H + T3I;
+		    T4c = FMA(KP980785280, T4a, KP195090322 * T4b);
+		    T4m = FNMS(KP195090322, T4a, KP980785280 * T4b);
+		    T4d = T3L + T3M;
+		    T4e = T3O + T3P;
+		    T4f = FNMS(KP195090322, T4e, KP980785280 * T4d);
+		    T4n = FMA(KP195090322, T4d, KP980785280 * T4e);
+	       }
+	       {
+		    E T4g, T4q, T4k, T4o;
+		    T4g = T4c + T4f;
+		    ro[WS(os, 17)] = T49 - T4g;
+		    ro[WS(os, 1)] = T49 + T4g;
+		    T4q = T4m + T4n;
+		    io[WS(os, 17)] = T4p - T4q;
+		    io[WS(os, 1)] = T4p + T4q;
+		    T4k = T4f - T4c;
+		    io[WS(os, 25)] = T4j - T4k;
+		    io[WS(os, 9)] = T4j + T4k;
+		    T4o = T4m - T4n;
+		    ro[WS(os, 25)] = T4l - T4o;
+		    ro[WS(os, 9)] = T4l + T4o;
+	       }
+	  }
+	  {
+	       E T3b, T3n, T3l, T3r, T3e, T3o, T3h, T3p;
+	       {
+		    E T39, T3a, T3j, T3k;
+		    T39 = T1z + T1G;
+		    T3a = T2Z + T2Y;
+		    T3b = T39 + T3a;
+		    T3n = T39 - T3a;
+		    T3j = T2T + T2W;
+		    T3k = T1O + T1V;
+		    T3l = T3j - T3k;
+		    T3r = T3j + T3k;
+	       }
+	       {
+		    E T3c, T3d, T3f, T3g;
+		    T3c = T22 + T2d;
+		    T3d = T2j + T2m;
+		    T3e = FMA(KP555570233, T3c, KP831469612 * T3d);
+		    T3o = FNMS(KP555570233, T3d, KP831469612 * T3c);
+		    T3f = T2t + T2E;
+		    T3g = T2K + T2N;
+		    T3h = FNMS(KP555570233, T3g, KP831469612 * T3f);
+		    T3p = FMA(KP831469612, T3g, KP555570233 * T3f);
+	       }
+	       {
+		    E T3i, T3s, T3m, T3q;
+		    T3i = T3e + T3h;
+		    ro[WS(os, 19)] = T3b - T3i;
+		    ro[WS(os, 3)] = T3b + T3i;
+		    T3s = T3o + T3p;
+		    io[WS(os, 19)] = T3r - T3s;
+		    io[WS(os, 3)] = T3r + T3s;
+		    T3m = T3h - T3e;
+		    io[WS(os, 27)] = T3l - T3m;
+		    io[WS(os, 11)] = T3l + T3m;
+		    T3q = T3o - T3p;
+		    ro[WS(os, 27)] = T3n - T3q;
+		    ro[WS(os, 11)] = T3n + T3q;
+	       }
+	  }
+     }
+}
+
+static void m1_32(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i -= 1) {
+	  m1_32_0(ri, ii, ro, io, is, os);
+	  ri += ivs;
+	  ii += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kdft_desc desc = { 32, "m1_32", {340, 52, 32, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_m1_32) (planner *p) {
+     X(kdft_register) (p, m1_32, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/m1_64.c b/src/fftw3/dft/codelets/standard/m1_64.c
new file mode 100644
index 0000000..72047bf
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/m1_64.c
@@ -0,0 +1,1568 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:42 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw_noinline -compact -variables 4 -n 64 -name m1_64 -include n.h */
+
+/*
+ * This function contains 912 FP additions, 248 FP multiplications,
+ * (or, 808 additions, 144 multiplications, 104 fused multiply/add),
+ * 171 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: m1_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: m1_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: m1_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void m1_64_0(const R *ri, const R *ii, R *ro, R *io, stride is, stride os)
+{
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e;
+	  E T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I;
+	  E T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R;
+	  E T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C;
+	  E Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V;
+	  E T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O;
+	  E T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50;
+	  E T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z;
+	  E T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D;
+	  {
+	       E T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g;
+	       E T3c;
+	       {
+		    E T1, T2, T24, T25;
+		    T1 = ri[0];
+		    T2 = ri[WS(is, 32)];
+		    T3 = T1 + T2;
+		    T35 = T1 - T2;
+		    T24 = ii[0];
+		    T25 = ii[WS(is, 32)];
+		    T26 = T24 + T25;
+		    T5Y = T24 - T25;
+	       }
+	       {
+		    E T4, T5, T27, T28;
+		    T4 = ri[WS(is, 16)];
+		    T5 = ri[WS(is, 48)];
+		    T6 = T4 + T5;
+		    T5X = T4 - T5;
+		    T27 = ii[WS(is, 16)];
+		    T28 = ii[WS(is, 48)];
+		    T29 = T27 + T28;
+		    T36 = T27 - T28;
+	       }
+	       {
+		    E T8, T9, T2b, T2c;
+		    T8 = ri[WS(is, 8)];
+		    T9 = ri[WS(is, 40)];
+		    Ta = T8 + T9;
+		    T39 = T8 - T9;
+		    T2b = ii[WS(is, 8)];
+		    T2c = ii[WS(is, 40)];
+		    T2d = T2b + T2c;
+		    T38 = T2b - T2c;
+	       }
+	       {
+		    E Tb, Tc, T2e, T2f;
+		    Tb = ri[WS(is, 56)];
+		    Tc = ri[WS(is, 24)];
+		    Td = Tb + Tc;
+		    T3b = Tb - Tc;
+		    T2e = ii[WS(is, 56)];
+		    T2f = ii[WS(is, 24)];
+		    T2g = T2e + T2f;
+		    T3c = T2e - T2f;
+	       }
+	       {
+		    E T7, Te, T2a, T2h;
+		    T37 = T35 - T36;
+		    T7B = T35 + T36;
+		    T8F = T5Y - T5X;
+		    T5Z = T5X + T5Y;
+		    T7 = T3 + T6;
+		    Te = Ta + Td;
+		    Tf = T7 + Te;
+		    Td9 = T7 - Te;
+		    {
+			 E Tbz, TbA, T60, T61;
+			 Tbz = T26 - T29;
+			 TbA = Td - Ta;
+			 TbB = Tbz - TbA;
+			 TcB = TbA + Tbz;
+			 T60 = T3b - T3c;
+			 T61 = T39 + T38;
+			 T62 = KP707106781 * (T60 - T61);
+			 T7C = KP707106781 * (T61 + T60);
+		    }
+		    T2a = T26 + T29;
+		    T2h = T2d + T2g;
+		    T2i = T2a + T2h;
+		    TdH = T2a - T2h;
+		    {
+			 E Taf, Tag, T3a, T3d;
+			 Taf = T3 - T6;
+			 Tag = T2d - T2g;
+			 Tah = Taf - Tag;
+			 Tcb = Taf + Tag;
+			 T3a = T38 - T39;
+			 T3d = T3b + T3c;
+			 T3e = KP707106781 * (T3a - T3d);
+			 T8G = KP707106781 * (T3a + T3d);
+		    }
+	       }
+	  }
+	  {
+	       E Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v;
+	       E T3r;
+	       {
+		    E Tg, Th, T2j, T2k;
+		    Tg = ri[WS(is, 4)];
+		    Th = ri[WS(is, 36)];
+		    Ti = Tg + Th;
+		    T3j = Tg - Th;
+		    T2j = ii[WS(is, 4)];
+		    T2k = ii[WS(is, 36)];
+		    T2l = T2j + T2k;
+		    T3h = T2j - T2k;
+	       }
+	       {
+		    E Tj, Tk, T2m, T2n;
+		    Tj = ri[WS(is, 20)];
+		    Tk = ri[WS(is, 52)];
+		    Tl = Tj + Tk;
+		    T3g = Tj - Tk;
+		    T2m = ii[WS(is, 20)];
+		    T2n = ii[WS(is, 52)];
+		    T2o = T2m + T2n;
+		    T3k = T2m - T2n;
+	       }
+	       {
+		    E Tn, To, T2q, T2r;
+		    Tn = ri[WS(is, 60)];
+		    To = ri[WS(is, 28)];
+		    Tp = Tn + To;
+		    T3q = Tn - To;
+		    T2q = ii[WS(is, 60)];
+		    T2r = ii[WS(is, 28)];
+		    T2s = T2q + T2r;
+		    T3o = T2q - T2r;
+	       }
+	       {
+		    E Tq, Tr, T2t, T2u;
+		    Tq = ri[WS(is, 12)];
+		    Tr = ri[WS(is, 44)];
+		    Ts = Tq + Tr;
+		    T3n = Tq - Tr;
+		    T2t = ii[WS(is, 12)];
+		    T2u = ii[WS(is, 44)];
+		    T2v = T2t + T2u;
+		    T3r = T2t - T2u;
+	       }
+	       {
+		    E Tm, Tt, Tai, Taj;
+		    Tm = Ti + Tl;
+		    Tt = Tp + Ts;
+		    Tu = Tm + Tt;
+		    TdI = Tt - Tm;
+		    Tai = T2l - T2o;
+		    Taj = Ti - Tl;
+		    Tak = Tai - Taj;
+		    TbD = Taj + Tai;
+	       }
+	       {
+		    E Tal, Tam, T2p, T2w;
+		    Tal = Tp - Ts;
+		    Tam = T2s - T2v;
+		    Tan = Tal + Tam;
+		    TbC = Tal - Tam;
+		    T2p = T2l + T2o;
+		    T2w = T2s + T2v;
+		    T2x = T2p + T2w;
+		    Tda = T2p - T2w;
+	       }
+	       {
+		    E T3i, T3l, T7E, T7F;
+		    T3i = T3g + T3h;
+		    T3l = T3j - T3k;
+		    T3m = FNMS(KP923879532, T3l, KP382683432 * T3i);
+		    T65 = FMA(KP923879532, T3i, KP382683432 * T3l);
+		    T7E = T3h - T3g;
+		    T7F = T3j + T3k;
+		    T7G = FNMS(KP382683432, T7F, KP923879532 * T7E);
+		    T8J = FMA(KP382683432, T7E, KP923879532 * T7F);
+	       }
+	       {
+		    E T7H, T7I, T3p, T3s;
+		    T7H = T3o - T3n;
+		    T7I = T3q + T3r;
+		    T7J = FMA(KP923879532, T7H, KP382683432 * T7I);
+		    T8I = FNMS(KP382683432, T7H, KP923879532 * T7I);
+		    T3p = T3n + T3o;
+		    T3s = T3q - T3r;
+		    T3t = FMA(KP382683432, T3p, KP923879532 * T3s);
+		    T64 = FNMS(KP923879532, T3p, KP382683432 * T3s);
+	       }
+	  }
+	  {
+	       E Ty, T3H, T2B, T3x, TB, T3w, T2E, T3I, TI, T3L, T2L, T3B, TF, T3K, T2I;
+	       E T3E;
+	       {
+		    E Tw, Tx, T2C, T2D;
+		    Tw = ri[WS(is, 2)];
+		    Tx = ri[WS(is, 34)];
+		    Ty = Tw + Tx;
+		    T3H = Tw - Tx;
+		    {
+			 E T2z, T2A, Tz, TA;
+			 T2z = ii[WS(is, 2)];
+			 T2A = ii[WS(is, 34)];
+			 T2B = T2z + T2A;
+			 T3x = T2z - T2A;
+			 Tz = ri[WS(is, 18)];
+			 TA = ri[WS(is, 50)];
+			 TB = Tz + TA;
+			 T3w = Tz - TA;
+		    }
+		    T2C = ii[WS(is, 18)];
+		    T2D = ii[WS(is, 50)];
+		    T2E = T2C + T2D;
+		    T3I = T2C - T2D;
+		    {
+			 E TG, TH, T3z, T2J, T2K, T3A;
+			 TG = ri[WS(is, 58)];
+			 TH = ri[WS(is, 26)];
+			 T3z = TG - TH;
+			 T2J = ii[WS(is, 58)];
+			 T2K = ii[WS(is, 26)];
+			 T3A = T2J - T2K;
+			 TI = TG + TH;
+			 T3L = T3z + T3A;
+			 T2L = T2J + T2K;
+			 T3B = T3z - T3A;
+		    }
+		    {
+			 E TD, TE, T3C, T2G, T2H, T3D;
+			 TD = ri[WS(is, 10)];
+			 TE = ri[WS(is, 42)];
+			 T3C = TD - TE;
+			 T2G = ii[WS(is, 10)];
+			 T2H = ii[WS(is, 42)];
+			 T3D = T2G - T2H;
+			 TF = TD + TE;
+			 T3K = T3D - T3C;
+			 T2I = T2G + T2H;
+			 T3E = T3C + T3D;
+		    }
+	       }
+	       {
+		    E TC, TJ, Taq, Tar;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    Tdd = TC - TJ;
+		    Taq = T2B - T2E;
+		    Tar = TI - TF;
+		    Tas = Taq - Tar;
+		    Tce = Tar + Taq;
+	       }
+	       {
+		    E Tat, Tau, T2F, T2M;
+		    Tat = Ty - TB;
+		    Tau = T2I - T2L;
+		    Tav = Tat - Tau;
+		    Tcf = Tat + Tau;
+		    T2F = T2B + T2E;
+		    T2M = T2I + T2L;
+		    T2N = T2F + T2M;
+		    Tdc = T2F - T2M;
+	       }
+	       {
+		    E T3y, T3F, T7M, T7N;
+		    T3y = T3w + T3x;
+		    T3F = KP707106781 * (T3B - T3E);
+		    T3G = T3y - T3F;
+		    T6G = T3y + T3F;
+		    T7M = T3x - T3w;
+		    T7N = KP707106781 * (T3K + T3L);
+		    T7O = T7M - T7N;
+		    T9k = T7M + T7N;
+	       }
+	       {
+		    E T7P, T7Q, T3J, T3M;
+		    T7P = T3H + T3I;
+		    T7Q = KP707106781 * (T3E + T3B);
+		    T7R = T7P - T7Q;
+		    T9l = T7P + T7Q;
+		    T3J = T3H - T3I;
+		    T3M = KP707106781 * (T3K - T3L);
+		    T3N = T3J - T3M;
+		    T6H = T3J + T3M;
+	       }
+	  }
+	  {
+	       E T1z, T53, T5L, Tbo, T1C, T5I, T56, Tbp, T1J, Tb9, T5h, T5N, T1G, Tb8, T5c;
+	       E T5O;
+	       {
+		    E T1x, T1y, T54, T55;
+		    T1x = ri[WS(is, 63)];
+		    T1y = ri[WS(is, 31)];
+		    T1z = T1x + T1y;
+		    T53 = T1x - T1y;
+		    {
+			 E T5J, T5K, T1A, T1B;
+			 T5J = ii[WS(is, 63)];
+			 T5K = ii[WS(is, 31)];
+			 T5L = T5J - T5K;
+			 Tbo = T5J + T5K;
+			 T1A = ri[WS(is, 15)];
+			 T1B = ri[WS(is, 47)];
+			 T1C = T1A + T1B;
+			 T5I = T1A - T1B;
+		    }
+		    T54 = ii[WS(is, 15)];
+		    T55 = ii[WS(is, 47)];
+		    T56 = T54 - T55;
+		    Tbp = T54 + T55;
+		    {
+			 E T1H, T1I, T5d, T5e, T5f, T5g;
+			 T1H = ri[WS(is, 55)];
+			 T1I = ri[WS(is, 23)];
+			 T5d = T1H - T1I;
+			 T5e = ii[WS(is, 55)];
+			 T5f = ii[WS(is, 23)];
+			 T5g = T5e - T5f;
+			 T1J = T1H + T1I;
+			 Tb9 = T5e + T5f;
+			 T5h = T5d + T5g;
+			 T5N = T5d - T5g;
+		    }
+		    {
+			 E T1E, T1F, T5b, T58, T59, T5a;
+			 T1E = ri[WS(is, 7)];
+			 T1F = ri[WS(is, 39)];
+			 T5b = T1E - T1F;
+			 T58 = ii[WS(is, 7)];
+			 T59 = ii[WS(is, 39)];
+			 T5a = T58 - T59;
+			 T1G = T1E + T1F;
+			 Tb8 = T58 + T59;
+			 T5c = T5a - T5b;
+			 T5O = T5b + T5a;
+		    }
+	       }
+	       {
+		    E T1D, T1K, Tbq, Tbr;
+		    T1D = T1z + T1C;
+		    T1K = T1G + T1J;
+		    T1L = T1D + T1K;
+		    Tdv = T1D - T1K;
+		    Tbq = Tbo - Tbp;
+		    Tbr = T1J - T1G;
+		    Tbs = Tbq - Tbr;
+		    Tcw = Tbr + Tbq;
+	       }
+	       {
+		    E TdA, TdB, T57, T5i;
+		    TdA = Tbo + Tbp;
+		    TdB = Tb8 + Tb9;
+		    TdC = TdA - TdB;
+		    Teo = TdA + TdB;
+		    T57 = T53 - T56;
+		    T5i = KP707106781 * (T5c - T5h);
+		    T5j = T57 - T5i;
+		    T6V = T57 + T5i;
+	       }
+	       {
+		    E T5M, T5P, T8w, T8x;
+		    T5M = T5I + T5L;
+		    T5P = KP707106781 * (T5N - T5O);
+		    T5Q = T5M - T5P;
+		    T6Y = T5M + T5P;
+		    T8w = T5L - T5I;
+		    T8x = KP707106781 * (T5c + T5h);
+		    T8y = T8w - T8x;
+		    T9C = T8w + T8x;
+	       }
+	       {
+		    E Tb7, Tba, T8l, T8m;
+		    Tb7 = T1z - T1C;
+		    Tba = Tb8 - Tb9;
+		    Tbb = Tb7 - Tba;
+		    Tct = Tb7 + Tba;
+		    T8l = T53 + T56;
+		    T8m = KP707106781 * (T5O + T5N);
+		    T8n = T8l - T8m;
+		    T9z = T8l + T8m;
+	       }
+	  }
+	  {
+	       E TN, T40, T2Q, T3Q, TQ, T3P, T2T, T41, TX, T44, T30, T3U, TU, T43, T2X;
+	       E T3X;
+	       {
+		    E TL, TM, T2R, T2S;
+		    TL = ri[WS(is, 62)];
+		    TM = ri[WS(is, 30)];
+		    TN = TL + TM;
+		    T40 = TL - TM;
+		    {
+			 E T2O, T2P, TO, TP;
+			 T2O = ii[WS(is, 62)];
+			 T2P = ii[WS(is, 30)];
+			 T2Q = T2O + T2P;
+			 T3Q = T2O - T2P;
+			 TO = ri[WS(is, 14)];
+			 TP = ri[WS(is, 46)];
+			 TQ = TO + TP;
+			 T3P = TO - TP;
+		    }
+		    T2R = ii[WS(is, 14)];
+		    T2S = ii[WS(is, 46)];
+		    T2T = T2R + T2S;
+		    T41 = T2R - T2S;
+		    {
+			 E TV, TW, T3S, T2Y, T2Z, T3T;
+			 TV = ri[WS(is, 54)];
+			 TW = ri[WS(is, 22)];
+			 T3S = TV - TW;
+			 T2Y = ii[WS(is, 54)];
+			 T2Z = ii[WS(is, 22)];
+			 T3T = T2Y - T2Z;
+			 TX = TV + TW;
+			 T44 = T3S + T3T;
+			 T30 = T2Y + T2Z;
+			 T3U = T3S - T3T;
+		    }
+		    {
+			 E TS, TT, T3V, T2V, T2W, T3W;
+			 TS = ri[WS(is, 6)];
+			 TT = ri[WS(is, 38)];
+			 T3V = TS - TT;
+			 T2V = ii[WS(is, 6)];
+			 T2W = ii[WS(is, 38)];
+			 T3W = T2V - T2W;
+			 TU = TS + TT;
+			 T43 = T3W - T3V;
+			 T2X = T2V + T2W;
+			 T3X = T3V + T3W;
+		    }
+	       }
+	       {
+		    E TR, TY, Tax, Tay;
+		    TR = TN + TQ;
+		    TY = TU + TX;
+		    TZ = TR + TY;
+		    Tdf = TR - TY;
+		    Tax = T2Q - T2T;
+		    Tay = TX - TU;
+		    Taz = Tax - Tay;
+		    Tch = Tay + Tax;
+	       }
+	       {
+		    E TaA, TaB, T2U, T31;
+		    TaA = TN - TQ;
+		    TaB = T2X - T30;
+		    TaC = TaA - TaB;
+		    Tci = TaA + TaB;
+		    T2U = T2Q + T2T;
+		    T31 = T2X + T30;
+		    T32 = T2U + T31;
+		    Tdg = T2U - T31;
+	       }
+	       {
+		    E T3R, T3Y, T7T, T7U;
+		    T3R = T3P + T3Q;
+		    T3Y = KP707106781 * (T3U - T3X);
+		    T3Z = T3R - T3Y;
+		    T6J = T3R + T3Y;
+		    T7T = T40 + T41;
+		    T7U = KP707106781 * (T3X + T3U);
+		    T7V = T7T - T7U;
+		    T9n = T7T + T7U;
+	       }
+	       {
+		    E T7W, T7X, T42, T45;
+		    T7W = T3Q - T3P;
+		    T7X = KP707106781 * (T43 + T44);
+		    T7Y = T7W - T7X;
+		    T9o = T7W + T7X;
+		    T42 = T40 - T41;
+		    T45 = KP707106781 * (T43 - T44);
+		    T46 = T42 - T45;
+		    T6K = T42 + T45;
+	       }
+	  }
+	  {
+	       E T14, T4P, T4d, TaG, T17, T4a, T4S, TaH, T1e, TaZ, T4j, T4V, T1b, TaY, T4o;
+	       E T4U;
+	       {
+		    E T12, T13, T4Q, T4R;
+		    T12 = ri[WS(is, 1)];
+		    T13 = ri[WS(is, 33)];
+		    T14 = T12 + T13;
+		    T4P = T12 - T13;
+		    {
+			 E T4b, T4c, T15, T16;
+			 T4b = ii[WS(is, 1)];
+			 T4c = ii[WS(is, 33)];
+			 T4d = T4b - T4c;
+			 TaG = T4b + T4c;
+			 T15 = ri[WS(is, 17)];
+			 T16 = ri[WS(is, 49)];
+			 T17 = T15 + T16;
+			 T4a = T15 - T16;
+		    }
+		    T4Q = ii[WS(is, 17)];
+		    T4R = ii[WS(is, 49)];
+		    T4S = T4Q - T4R;
+		    TaH = T4Q + T4R;
+		    {
+			 E T1c, T1d, T4f, T4g, T4h, T4i;
+			 T1c = ri[WS(is, 57)];
+			 T1d = ri[WS(is, 25)];
+			 T4f = T1c - T1d;
+			 T4g = ii[WS(is, 57)];
+			 T4h = ii[WS(is, 25)];
+			 T4i = T4g - T4h;
+			 T1e = T1c + T1d;
+			 TaZ = T4g + T4h;
+			 T4j = T4f - T4i;
+			 T4V = T4f + T4i;
+		    }
+		    {
+			 E T19, T1a, T4k, T4l, T4m, T4n;
+			 T19 = ri[WS(is, 9)];
+			 T1a = ri[WS(is, 41)];
+			 T4k = T19 - T1a;
+			 T4l = ii[WS(is, 9)];
+			 T4m = ii[WS(is, 41)];
+			 T4n = T4l - T4m;
+			 T1b = T19 + T1a;
+			 TaY = T4l + T4m;
+			 T4o = T4k + T4n;
+			 T4U = T4n - T4k;
+		    }
+	       }
+	       {
+		    E T18, T1f, TaX, Tb0;
+		    T18 = T14 + T17;
+		    T1f = T1b + T1e;
+		    T1g = T18 + T1f;
+		    Tdp = T18 - T1f;
+		    TaX = T14 - T17;
+		    Tb0 = TaY - TaZ;
+		    Tb1 = TaX - Tb0;
+		    Tcm = TaX + Tb0;
+	       }
+	       {
+		    E Tdk, Tdl, T4e, T4p;
+		    Tdk = TaG + TaH;
+		    Tdl = TaY + TaZ;
+		    Tdm = Tdk - Tdl;
+		    Tej = Tdk + Tdl;
+		    T4e = T4a + T4d;
+		    T4p = KP707106781 * (T4j - T4o);
+		    T4q = T4e - T4p;
+		    T6R = T4e + T4p;
+	       }
+	       {
+		    E T4T, T4W, T8d, T8e;
+		    T4T = T4P - T4S;
+		    T4W = KP707106781 * (T4U - T4V);
+		    T4X = T4T - T4W;
+		    T6O = T4T + T4W;
+		    T8d = T4P + T4S;
+		    T8e = KP707106781 * (T4o + T4j);
+		    T8f = T8d - T8e;
+		    T9s = T8d + T8e;
+	       }
+	       {
+		    E TaI, TaJ, T82, T83;
+		    TaI = TaG - TaH;
+		    TaJ = T1e - T1b;
+		    TaK = TaI - TaJ;
+		    Tcp = TaJ + TaI;
+		    T82 = T4d - T4a;
+		    T83 = KP707106781 * (T4U + T4V);
+		    T84 = T82 - T83;
+		    T9v = T82 + T83;
+	       }
+	  }
+	  {
+	       E T1j, TaR, T1m, TaS, T4G, T4L, TaT, TaQ, T89, T88, T1q, TaM, T1t, TaN, T4v;
+	       E T4A, TaO, TaL, T86, T85;
+	       {
+		    E T4H, T4F, T4C, T4K;
+		    {
+			 E T1h, T1i, T4D, T4E;
+			 T1h = ri[WS(is, 5)];
+			 T1i = ri[WS(is, 37)];
+			 T1j = T1h + T1i;
+			 T4H = T1h - T1i;
+			 T4D = ii[WS(is, 5)];
+			 T4E = ii[WS(is, 37)];
+			 T4F = T4D - T4E;
+			 TaR = T4D + T4E;
+		    }
+		    {
+			 E T1k, T1l, T4I, T4J;
+			 T1k = ri[WS(is, 21)];
+			 T1l = ri[WS(is, 53)];
+			 T1m = T1k + T1l;
+			 T4C = T1k - T1l;
+			 T4I = ii[WS(is, 21)];
+			 T4J = ii[WS(is, 53)];
+			 T4K = T4I - T4J;
+			 TaS = T4I + T4J;
+		    }
+		    T4G = T4C + T4F;
+		    T4L = T4H - T4K;
+		    TaT = TaR - TaS;
+		    TaQ = T1j - T1m;
+		    T89 = T4H + T4K;
+		    T88 = T4F - T4C;
+	       }
+	       {
+		    E T4r, T4z, T4w, T4u;
+		    {
+			 E T1o, T1p, T4x, T4y;
+			 T1o = ri[WS(is, 61)];
+			 T1p = ri[WS(is, 29)];
+			 T1q = T1o + T1p;
+			 T4r = T1o - T1p;
+			 T4x = ii[WS(is, 61)];
+			 T4y = ii[WS(is, 29)];
+			 T4z = T4x - T4y;
+			 TaM = T4x + T4y;
+		    }
+		    {
+			 E T1r, T1s, T4s, T4t;
+			 T1r = ri[WS(is, 13)];
+			 T1s = ri[WS(is, 45)];
+			 T1t = T1r + T1s;
+			 T4w = T1r - T1s;
+			 T4s = ii[WS(is, 13)];
+			 T4t = ii[WS(is, 45)];
+			 T4u = T4s - T4t;
+			 TaN = T4s + T4t;
+		    }
+		    T4v = T4r - T4u;
+		    T4A = T4w + T4z;
+		    TaO = TaM - TaN;
+		    TaL = T1q - T1t;
+		    T86 = T4z - T4w;
+		    T85 = T4r + T4u;
+	       }
+	       {
+		    E T1n, T1u, Tb2, Tb3;
+		    T1n = T1j + T1m;
+		    T1u = T1q + T1t;
+		    T1v = T1n + T1u;
+		    Tdn = T1u - T1n;
+		    Tb2 = TaT - TaQ;
+		    Tb3 = TaL + TaO;
+		    Tb4 = KP707106781 * (Tb2 - Tb3);
+		    Tcq = KP707106781 * (Tb2 + Tb3);
+	       }
+	       {
+		    E Tdq, Tdr, T4B, T4M;
+		    Tdq = TaR + TaS;
+		    Tdr = TaM + TaN;
+		    Tds = Tdq - Tdr;
+		    Tek = Tdq + Tdr;
+		    T4B = FNMS(KP923879532, T4A, KP382683432 * T4v);
+		    T4M = FMA(KP923879532, T4G, KP382683432 * T4L);
+		    T4N = T4B - T4M;
+		    T6P = T4M + T4B;
+	       }
+	       {
+		    E T4Y, T4Z, T8g, T8h;
+		    T4Y = FNMS(KP923879532, T4L, KP382683432 * T4G);
+		    T4Z = FMA(KP382683432, T4A, KP923879532 * T4v);
+		    T50 = T4Y - T4Z;
+		    T6S = T4Y + T4Z;
+		    T8g = FNMS(KP382683432, T89, KP923879532 * T88);
+		    T8h = FMA(KP923879532, T86, KP382683432 * T85);
+		    T8i = T8g - T8h;
+		    T9w = T8g + T8h;
+	       }
+	       {
+		    E TaP, TaU, T87, T8a;
+		    TaP = TaL - TaO;
+		    TaU = TaQ + TaT;
+		    TaV = KP707106781 * (TaP - TaU);
+		    Tcn = KP707106781 * (TaU + TaP);
+		    T87 = FNMS(KP382683432, T86, KP923879532 * T85);
+		    T8a = FMA(KP382683432, T88, KP923879532 * T89);
+		    T8b = T87 - T8a;
+		    T9t = T8a + T87;
+	       }
+	  }
+	  {
+	       E T1O, Tbc, T1R, Tbd, T5o, T5t, Tbf, Tbe, T8p, T8o, T1V, Tbi, T1Y, Tbj, T5z;
+	       E T5E, Tbk, Tbh, T8s, T8r;
+	       {
+		    E T5p, T5n, T5k, T5s;
+		    {
+			 E T1M, T1N, T5l, T5m;
+			 T1M = ri[WS(is, 3)];
+			 T1N = ri[WS(is, 35)];
+			 T1O = T1M + T1N;
+			 T5p = T1M - T1N;
+			 T5l = ii[WS(is, 3)];
+			 T5m = ii[WS(is, 35)];
+			 T5n = T5l - T5m;
+			 Tbc = T5l + T5m;
+		    }
+		    {
+			 E T1P, T1Q, T5q, T5r;
+			 T1P = ri[WS(is, 19)];
+			 T1Q = ri[WS(is, 51)];
+			 T1R = T1P + T1Q;
+			 T5k = T1P - T1Q;
+			 T5q = ii[WS(is, 19)];
+			 T5r = ii[WS(is, 51)];
+			 T5s = T5q - T5r;
+			 Tbd = T5q + T5r;
+		    }
+		    T5o = T5k + T5n;
+		    T5t = T5p - T5s;
+		    Tbf = T1O - T1R;
+		    Tbe = Tbc - Tbd;
+		    T8p = T5p + T5s;
+		    T8o = T5n - T5k;
+	       }
+	       {
+		    E T5A, T5y, T5v, T5D;
+		    {
+			 E T1T, T1U, T5w, T5x;
+			 T1T = ri[WS(is, 59)];
+			 T1U = ri[WS(is, 27)];
+			 T1V = T1T + T1U;
+			 T5A = T1T - T1U;
+			 T5w = ii[WS(is, 59)];
+			 T5x = ii[WS(is, 27)];
+			 T5y = T5w - T5x;
+			 Tbi = T5w + T5x;
+		    }
+		    {
+			 E T1W, T1X, T5B, T5C;
+			 T1W = ri[WS(is, 11)];
+			 T1X = ri[WS(is, 43)];
+			 T1Y = T1W + T1X;
+			 T5v = T1W - T1X;
+			 T5B = ii[WS(is, 11)];
+			 T5C = ii[WS(is, 43)];
+			 T5D = T5B - T5C;
+			 Tbj = T5B + T5C;
+		    }
+		    T5z = T5v + T5y;
+		    T5E = T5A - T5D;
+		    Tbk = Tbi - Tbj;
+		    Tbh = T1V - T1Y;
+		    T8s = T5A + T5D;
+		    T8r = T5y - T5v;
+	       }
+	       {
+		    E T1S, T1Z, Tbt, Tbu;
+		    T1S = T1O + T1R;
+		    T1Z = T1V + T1Y;
+		    T20 = T1S + T1Z;
+		    TdD = T1Z - T1S;
+		    Tbt = Tbh - Tbk;
+		    Tbu = Tbf + Tbe;
+		    Tbv = KP707106781 * (Tbt - Tbu);
+		    Tcu = KP707106781 * (Tbu + Tbt);
+	       }
+	       {
+		    E Tdw, Tdx, T5u, T5F;
+		    Tdw = Tbc + Tbd;
+		    Tdx = Tbi + Tbj;
+		    Tdy = Tdw - Tdx;
+		    Tep = Tdw + Tdx;
+		    T5u = FNMS(KP923879532, T5t, KP382683432 * T5o);
+		    T5F = FMA(KP382683432, T5z, KP923879532 * T5E);
+		    T5G = T5u - T5F;
+		    T6Z = T5u + T5F;
+	       }
+	       {
+		    E T5R, T5S, T8z, T8A;
+		    T5R = FNMS(KP923879532, T5z, KP382683432 * T5E);
+		    T5S = FMA(KP923879532, T5o, KP382683432 * T5t);
+		    T5T = T5R - T5S;
+		    T6W = T5S + T5R;
+		    T8z = FNMS(KP382683432, T8r, KP923879532 * T8s);
+		    T8A = FMA(KP382683432, T8o, KP923879532 * T8p);
+		    T8B = T8z - T8A;
+		    T9A = T8A + T8z;
+	       }
+	       {
+		    E Tbg, Tbl, T8q, T8t;
+		    Tbg = Tbe - Tbf;
+		    Tbl = Tbh + Tbk;
+		    Tbm = KP707106781 * (Tbg - Tbl);
+		    Tcx = KP707106781 * (Tbg + Tbl);
+		    T8q = FNMS(KP382683432, T8p, KP923879532 * T8o);
+		    T8t = FMA(KP923879532, T8r, KP382683432 * T8s);
+		    T8u = T8q - T8t;
+		    T9D = T8q + T8t;
+	       }
+	  }
+	  {
+	       E T11, TeD, TeG, TeI, T22, T23, T34, TeH;
+	       {
+		    E Tv, T10, TeE, TeF;
+		    Tv = Tf + Tu;
+		    T10 = TK + TZ;
+		    T11 = Tv + T10;
+		    TeD = Tv - T10;
+		    TeE = Tej + Tek;
+		    TeF = Teo + Tep;
+		    TeG = TeE - TeF;
+		    TeI = TeE + TeF;
+	       }
+	       {
+		    E T1w, T21, T2y, T33;
+		    T1w = T1g + T1v;
+		    T21 = T1L + T20;
+		    T22 = T1w + T21;
+		    T23 = T21 - T1w;
+		    T2y = T2i + T2x;
+		    T33 = T2N + T32;
+		    T34 = T2y - T33;
+		    TeH = T2y + T33;
+	       }
+	       ro[WS(os, 32)] = T11 - T22;
+	       io[WS(os, 32)] = TeH - TeI;
+	       ro[0] = T11 + T22;
+	       io[0] = TeH + TeI;
+	       io[WS(os, 16)] = T23 + T34;
+	       ro[WS(os, 16)] = TeD + TeG;
+	       io[WS(os, 48)] = T34 - T23;
+	       ro[WS(os, 48)] = TeD - TeG;
+	  }
+	  {
+	       E Teh, Tex, Tev, TeB, Tem, Tey, Ter, Tez;
+	       {
+		    E Tef, Teg, Tet, Teu;
+		    Tef = Tf - Tu;
+		    Teg = T2N - T32;
+		    Teh = Tef + Teg;
+		    Tex = Tef - Teg;
+		    Tet = T2i - T2x;
+		    Teu = TZ - TK;
+		    Tev = Tet - Teu;
+		    TeB = Teu + Tet;
+	       }
+	       {
+		    E Tei, Tel, Ten, Teq;
+		    Tei = T1g - T1v;
+		    Tel = Tej - Tek;
+		    Tem = Tei + Tel;
+		    Tey = Tel - Tei;
+		    Ten = T1L - T20;
+		    Teq = Teo - Tep;
+		    Ter = Ten - Teq;
+		    Tez = Ten + Teq;
+	       }
+	       {
+		    E Tes, TeC, Tew, TeA;
+		    Tes = KP707106781 * (Tem + Ter);
+		    ro[WS(os, 40)] = Teh - Tes;
+		    ro[WS(os, 8)] = Teh + Tes;
+		    TeC = KP707106781 * (Tey + Tez);
+		    io[WS(os, 40)] = TeB - TeC;
+		    io[WS(os, 8)] = TeB + TeC;
+		    Tew = KP707106781 * (Ter - Tem);
+		    io[WS(os, 56)] = Tev - Tew;
+		    io[WS(os, 24)] = Tev + Tew;
+		    TeA = KP707106781 * (Tey - Tez);
+		    ro[WS(os, 56)] = Tex - TeA;
+		    ro[WS(os, 24)] = Tex + TeA;
+	       }
+	  }
+	  {
+	       E Tdb, TdV, Te5, TdJ, Tdi, Te6, Te3, Teb, TdM, TdW, Tdu, TdQ, Te0, Tea, TdF;
+	       E TdR;
+	       {
+		    E Tde, Tdh, Tdo, Tdt;
+		    Tdb = Td9 - Tda;
+		    TdV = Td9 + Tda;
+		    Te5 = TdI + TdH;
+		    TdJ = TdH - TdI;
+		    Tde = Tdc - Tdd;
+		    Tdh = Tdf + Tdg;
+		    Tdi = KP707106781 * (Tde - Tdh);
+		    Te6 = KP707106781 * (Tde + Tdh);
+		    {
+			 E Te1, Te2, TdK, TdL;
+			 Te1 = Tdv + Tdy;
+			 Te2 = TdD + TdC;
+			 Te3 = FNMS(KP382683432, Te2, KP923879532 * Te1);
+			 Teb = FMA(KP923879532, Te2, KP382683432 * Te1);
+			 TdK = Tdf - Tdg;
+			 TdL = Tdd + Tdc;
+			 TdM = KP707106781 * (TdK - TdL);
+			 TdW = KP707106781 * (TdL + TdK);
+		    }
+		    Tdo = Tdm - Tdn;
+		    Tdt = Tdp - Tds;
+		    Tdu = FMA(KP923879532, Tdo, KP382683432 * Tdt);
+		    TdQ = FNMS(KP923879532, Tdt, KP382683432 * Tdo);
+		    {
+			 E TdY, TdZ, Tdz, TdE;
+			 TdY = Tdn + Tdm;
+			 TdZ = Tdp + Tds;
+			 Te0 = FMA(KP382683432, TdY, KP923879532 * TdZ);
+			 Tea = FNMS(KP382683432, TdZ, KP923879532 * TdY);
+			 Tdz = Tdv - Tdy;
+			 TdE = TdC - TdD;
+			 TdF = FNMS(KP923879532, TdE, KP382683432 * Tdz);
+			 TdR = FMA(KP382683432, TdE, KP923879532 * Tdz);
+		    }
+	       }
+	       {
+		    E Tdj, TdG, TdT, TdU;
+		    Tdj = Tdb + Tdi;
+		    TdG = Tdu + TdF;
+		    ro[WS(os, 44)] = Tdj - TdG;
+		    ro[WS(os, 12)] = Tdj + TdG;
+		    TdT = TdJ + TdM;
+		    TdU = TdQ + TdR;
+		    io[WS(os, 44)] = TdT - TdU;
+		    io[WS(os, 12)] = TdT + TdU;
+	       }
+	       {
+		    E TdN, TdO, TdP, TdS;
+		    TdN = TdJ - TdM;
+		    TdO = TdF - Tdu;
+		    io[WS(os, 60)] = TdN - TdO;
+		    io[WS(os, 28)] = TdN + TdO;
+		    TdP = Tdb - Tdi;
+		    TdS = TdQ - TdR;
+		    ro[WS(os, 60)] = TdP - TdS;
+		    ro[WS(os, 28)] = TdP + TdS;
+	       }
+	       {
+		    E TdX, Te4, Ted, Tee;
+		    TdX = TdV + TdW;
+		    Te4 = Te0 + Te3;
+		    ro[WS(os, 36)] = TdX - Te4;
+		    ro[WS(os, 4)] = TdX + Te4;
+		    Ted = Te5 + Te6;
+		    Tee = Tea + Teb;
+		    io[WS(os, 36)] = Ted - Tee;
+		    io[WS(os, 4)] = Ted + Tee;
+	       }
+	       {
+		    E Te7, Te8, Te9, Tec;
+		    Te7 = Te5 - Te6;
+		    Te8 = Te3 - Te0;
+		    io[WS(os, 52)] = Te7 - Te8;
+		    io[WS(os, 20)] = Te7 + Te8;
+		    Te9 = TdV - TdW;
+		    Tec = Tea - Teb;
+		    ro[WS(os, 52)] = Te9 - Tec;
+		    ro[WS(os, 20)] = Te9 + Tec;
+	       }
+	  }
+	  {
+	       E Tcd, TcP, TcD, TcZ, Tck, Td0, TcX, Td5, Tcs, TcK, TcG, TcQ, TcU, Td4, Tcz;
+	       E TcL, Tcc, TcC;
+	       Tcc = KP707106781 * (TbD + TbC);
+	       Tcd = Tcb - Tcc;
+	       TcP = Tcb + Tcc;
+	       TcC = KP707106781 * (Tak + Tan);
+	       TcD = TcB - TcC;
+	       TcZ = TcB + TcC;
+	       {
+		    E Tcg, Tcj, TcV, TcW;
+		    Tcg = FNMS(KP382683432, Tcf, KP923879532 * Tce);
+		    Tcj = FMA(KP923879532, Tch, KP382683432 * Tci);
+		    Tck = Tcg - Tcj;
+		    Td0 = Tcg + Tcj;
+		    TcV = Tct + Tcu;
+		    TcW = Tcw + Tcx;
+		    TcX = FNMS(KP195090322, TcW, KP980785280 * TcV);
+		    Td5 = FMA(KP195090322, TcV, KP980785280 * TcW);
+	       }
+	       {
+		    E Tco, Tcr, TcE, TcF;
+		    Tco = Tcm - Tcn;
+		    Tcr = Tcp - Tcq;
+		    Tcs = FMA(KP555570233, Tco, KP831469612 * Tcr);
+		    TcK = FNMS(KP831469612, Tco, KP555570233 * Tcr);
+		    TcE = FNMS(KP382683432, Tch, KP923879532 * Tci);
+		    TcF = FMA(KP382683432, Tce, KP923879532 * Tcf);
+		    TcG = TcE - TcF;
+		    TcQ = TcF + TcE;
+	       }
+	       {
+		    E TcS, TcT, Tcv, Tcy;
+		    TcS = Tcm + Tcn;
+		    TcT = Tcp + Tcq;
+		    TcU = FMA(KP980785280, TcS, KP195090322 * TcT);
+		    Td4 = FNMS(KP195090322, TcS, KP980785280 * TcT);
+		    Tcv = Tct - Tcu;
+		    Tcy = Tcw - Tcx;
+		    Tcz = FNMS(KP831469612, Tcy, KP555570233 * Tcv);
+		    TcL = FMA(KP831469612, Tcv, KP555570233 * Tcy);
+	       }
+	       {
+		    E Tcl, TcA, TcN, TcO;
+		    Tcl = Tcd + Tck;
+		    TcA = Tcs + Tcz;
+		    ro[WS(os, 42)] = Tcl - TcA;
+		    ro[WS(os, 10)] = Tcl + TcA;
+		    TcN = TcD + TcG;
+		    TcO = TcK + TcL;
+		    io[WS(os, 42)] = TcN - TcO;
+		    io[WS(os, 10)] = TcN + TcO;
+	       }
+	       {
+		    E TcH, TcI, TcJ, TcM;
+		    TcH = TcD - TcG;
+		    TcI = Tcz - Tcs;
+		    io[WS(os, 58)] = TcH - TcI;
+		    io[WS(os, 26)] = TcH + TcI;
+		    TcJ = Tcd - Tck;
+		    TcM = TcK - TcL;
+		    ro[WS(os, 58)] = TcJ - TcM;
+		    ro[WS(os, 26)] = TcJ + TcM;
+	       }
+	       {
+		    E TcR, TcY, Td7, Td8;
+		    TcR = TcP + TcQ;
+		    TcY = TcU + TcX;
+		    ro[WS(os, 34)] = TcR - TcY;
+		    ro[WS(os, 2)] = TcR + TcY;
+		    Td7 = TcZ + Td0;
+		    Td8 = Td4 + Td5;
+		    io[WS(os, 34)] = Td7 - Td8;
+		    io[WS(os, 2)] = Td7 + Td8;
+	       }
+	       {
+		    E Td1, Td2, Td3, Td6;
+		    Td1 = TcZ - Td0;
+		    Td2 = TcX - TcU;
+		    io[WS(os, 50)] = Td1 - Td2;
+		    io[WS(os, 18)] = Td1 + Td2;
+		    Td3 = TcP - TcQ;
+		    Td6 = Td4 - Td5;
+		    ro[WS(os, 50)] = Td3 - Td6;
+		    ro[WS(os, 18)] = Td3 + Td6;
+	       }
+	  }
+	  {
+	       E Tap, TbR, TbF, Tc1, TaE, Tc2, TbZ, Tc7, Tb6, TbM, TbI, TbS, TbW, Tc6, Tbx;
+	       E TbN, Tao, TbE;
+	       Tao = KP707106781 * (Tak - Tan);
+	       Tap = Tah - Tao;
+	       TbR = Tah + Tao;
+	       TbE = KP707106781 * (TbC - TbD);
+	       TbF = TbB - TbE;
+	       Tc1 = TbB + TbE;
+	       {
+		    E Taw, TaD, TbX, TbY;
+		    Taw = FNMS(KP923879532, Tav, KP382683432 * Tas);
+		    TaD = FMA(KP382683432, Taz, KP923879532 * TaC);
+		    TaE = Taw - TaD;
+		    Tc2 = Taw + TaD;
+		    TbX = Tbb + Tbm;
+		    TbY = Tbs + Tbv;
+		    TbZ = FNMS(KP555570233, TbY, KP831469612 * TbX);
+		    Tc7 = FMA(KP831469612, TbY, KP555570233 * TbX);
+	       }
+	       {
+		    E TaW, Tb5, TbG, TbH;
+		    TaW = TaK - TaV;
+		    Tb5 = Tb1 - Tb4;
+		    Tb6 = FMA(KP980785280, TaW, KP195090322 * Tb5);
+		    TbM = FNMS(KP980785280, Tb5, KP195090322 * TaW);
+		    TbG = FNMS(KP923879532, Taz, KP382683432 * TaC);
+		    TbH = FMA(KP923879532, Tas, KP382683432 * Tav);
+		    TbI = TbG - TbH;
+		    TbS = TbH + TbG;
+	       }
+	       {
+		    E TbU, TbV, Tbn, Tbw;
+		    TbU = TaK + TaV;
+		    TbV = Tb1 + Tb4;
+		    TbW = FMA(KP555570233, TbU, KP831469612 * TbV);
+		    Tc6 = FNMS(KP555570233, TbV, KP831469612 * TbU);
+		    Tbn = Tbb - Tbm;
+		    Tbw = Tbs - Tbv;
+		    Tbx = FNMS(KP980785280, Tbw, KP195090322 * Tbn);
+		    TbN = FMA(KP195090322, Tbw, KP980785280 * Tbn);
+	       }
+	       {
+		    E TaF, Tby, TbP, TbQ;
+		    TaF = Tap + TaE;
+		    Tby = Tb6 + Tbx;
+		    ro[WS(os, 46)] = TaF - Tby;
+		    ro[WS(os, 14)] = TaF + Tby;
+		    TbP = TbF + TbI;
+		    TbQ = TbM + TbN;
+		    io[WS(os, 46)] = TbP - TbQ;
+		    io[WS(os, 14)] = TbP + TbQ;
+	       }
+	       {
+		    E TbJ, TbK, TbL, TbO;
+		    TbJ = TbF - TbI;
+		    TbK = Tbx - Tb6;
+		    io[WS(os, 62)] = TbJ - TbK;
+		    io[WS(os, 30)] = TbJ + TbK;
+		    TbL = Tap - TaE;
+		    TbO = TbM - TbN;
+		    ro[WS(os, 62)] = TbL - TbO;
+		    ro[WS(os, 30)] = TbL + TbO;
+	       }
+	       {
+		    E TbT, Tc0, Tc9, Tca;
+		    TbT = TbR + TbS;
+		    Tc0 = TbW + TbZ;
+		    ro[WS(os, 38)] = TbT - Tc0;
+		    ro[WS(os, 6)] = TbT + Tc0;
+		    Tc9 = Tc1 + Tc2;
+		    Tca = Tc6 + Tc7;
+		    io[WS(os, 38)] = Tc9 - Tca;
+		    io[WS(os, 6)] = Tc9 + Tca;
+	       }
+	       {
+		    E Tc3, Tc4, Tc5, Tc8;
+		    Tc3 = Tc1 - Tc2;
+		    Tc4 = TbZ - TbW;
+		    io[WS(os, 54)] = Tc3 - Tc4;
+		    io[WS(os, 22)] = Tc3 + Tc4;
+		    Tc5 = TbR - TbS;
+		    Tc8 = Tc6 - Tc7;
+		    ro[WS(os, 54)] = Tc5 - Tc8;
+		    ro[WS(os, 22)] = Tc5 + Tc8;
+	       }
+	  }
+	  {
+	       E T6F, T7h, T7m, T7w, T7p, T7x, T6M, T7s, T6U, T7c, T75, T7r, T78, T7i, T71;
+	       E T7d;
+	       {
+		    E T6D, T6E, T7k, T7l;
+		    T6D = T37 + T3e;
+		    T6E = T65 + T64;
+		    T6F = T6D - T6E;
+		    T7h = T6D + T6E;
+		    T7k = T6O + T6P;
+		    T7l = T6R + T6S;
+		    T7m = FMA(KP956940335, T7k, KP290284677 * T7l);
+		    T7w = FNMS(KP290284677, T7k, KP956940335 * T7l);
+	       }
+	       {
+		    E T7n, T7o, T6I, T6L;
+		    T7n = T6V + T6W;
+		    T7o = T6Y + T6Z;
+		    T7p = FNMS(KP290284677, T7o, KP956940335 * T7n);
+		    T7x = FMA(KP290284677, T7n, KP956940335 * T7o);
+		    T6I = FNMS(KP555570233, T6H, KP831469612 * T6G);
+		    T6L = FMA(KP831469612, T6J, KP555570233 * T6K);
+		    T6M = T6I - T6L;
+		    T7s = T6I + T6L;
+	       }
+	       {
+		    E T6Q, T6T, T73, T74;
+		    T6Q = T6O - T6P;
+		    T6T = T6R - T6S;
+		    T6U = FMA(KP471396736, T6Q, KP881921264 * T6T);
+		    T7c = FNMS(KP881921264, T6Q, KP471396736 * T6T);
+		    T73 = T5Z + T62;
+		    T74 = T3m + T3t;
+		    T75 = T73 - T74;
+		    T7r = T73 + T74;
+	       }
+	       {
+		    E T76, T77, T6X, T70;
+		    T76 = FNMS(KP555570233, T6J, KP831469612 * T6K);
+		    T77 = FMA(KP555570233, T6G, KP831469612 * T6H);
+		    T78 = T76 - T77;
+		    T7i = T77 + T76;
+		    T6X = T6V - T6W;
+		    T70 = T6Y - T6Z;
+		    T71 = FNMS(KP881921264, T70, KP471396736 * T6X);
+		    T7d = FMA(KP881921264, T6X, KP471396736 * T70);
+	       }
+	       {
+		    E T6N, T72, T7f, T7g;
+		    T6N = T6F + T6M;
+		    T72 = T6U + T71;
+		    ro[WS(os, 43)] = T6N - T72;
+		    ro[WS(os, 11)] = T6N + T72;
+		    T7f = T75 + T78;
+		    T7g = T7c + T7d;
+		    io[WS(os, 43)] = T7f - T7g;
+		    io[WS(os, 11)] = T7f + T7g;
+	       }
+	       {
+		    E T79, T7a, T7b, T7e;
+		    T79 = T75 - T78;
+		    T7a = T71 - T6U;
+		    io[WS(os, 59)] = T79 - T7a;
+		    io[WS(os, 27)] = T79 + T7a;
+		    T7b = T6F - T6M;
+		    T7e = T7c - T7d;
+		    ro[WS(os, 59)] = T7b - T7e;
+		    ro[WS(os, 27)] = T7b + T7e;
+	       }
+	       {
+		    E T7j, T7q, T7z, T7A;
+		    T7j = T7h + T7i;
+		    T7q = T7m + T7p;
+		    ro[WS(os, 35)] = T7j - T7q;
+		    ro[WS(os, 3)] = T7j + T7q;
+		    T7z = T7r + T7s;
+		    T7A = T7w + T7x;
+		    io[WS(os, 35)] = T7z - T7A;
+		    io[WS(os, 3)] = T7z + T7A;
+	       }
+	       {
+		    E T7t, T7u, T7v, T7y;
+		    T7t = T7r - T7s;
+		    T7u = T7p - T7m;
+		    io[WS(os, 51)] = T7t - T7u;
+		    io[WS(os, 19)] = T7t + T7u;
+		    T7v = T7h - T7i;
+		    T7y = T7w - T7x;
+		    ro[WS(os, 51)] = T7v - T7y;
+		    ro[WS(os, 19)] = T7v + T7y;
+	       }
+	  }
+	  {
+	       E T9j, T9V, Ta0, Taa, Ta3, Tab, T9q, Ta6, T9y, T9Q, T9J, Ta5, T9M, T9W, T9F;
+	       E T9R;
+	       {
+		    E T9h, T9i, T9Y, T9Z;
+		    T9h = T7B + T7C;
+		    T9i = T8J + T8I;
+		    T9j = T9h - T9i;
+		    T9V = T9h + T9i;
+		    T9Y = T9s + T9t;
+		    T9Z = T9v + T9w;
+		    Ta0 = FMA(KP995184726, T9Y, KP098017140 * T9Z);
+		    Taa = FNMS(KP098017140, T9Y, KP995184726 * T9Z);
+	       }
+	       {
+		    E Ta1, Ta2, T9m, T9p;
+		    Ta1 = T9z + T9A;
+		    Ta2 = T9C + T9D;
+		    Ta3 = FNMS(KP098017140, Ta2, KP995184726 * Ta1);
+		    Tab = FMA(KP098017140, Ta1, KP995184726 * Ta2);
+		    T9m = FNMS(KP195090322, T9l, KP980785280 * T9k);
+		    T9p = FMA(KP195090322, T9n, KP980785280 * T9o);
+		    T9q = T9m - T9p;
+		    Ta6 = T9m + T9p;
+	       }
+	       {
+		    E T9u, T9x, T9H, T9I;
+		    T9u = T9s - T9t;
+		    T9x = T9v - T9w;
+		    T9y = FMA(KP634393284, T9u, KP773010453 * T9x);
+		    T9Q = FNMS(KP773010453, T9u, KP634393284 * T9x);
+		    T9H = T8F + T8G;
+		    T9I = T7G + T7J;
+		    T9J = T9H - T9I;
+		    Ta5 = T9H + T9I;
+	       }
+	       {
+		    E T9K, T9L, T9B, T9E;
+		    T9K = FNMS(KP195090322, T9o, KP980785280 * T9n);
+		    T9L = FMA(KP980785280, T9l, KP195090322 * T9k);
+		    T9M = T9K - T9L;
+		    T9W = T9L + T9K;
+		    T9B = T9z - T9A;
+		    T9E = T9C - T9D;
+		    T9F = FNMS(KP773010453, T9E, KP634393284 * T9B);
+		    T9R = FMA(KP773010453, T9B, KP634393284 * T9E);
+	       }
+	       {
+		    E T9r, T9G, T9T, T9U;
+		    T9r = T9j + T9q;
+		    T9G = T9y + T9F;
+		    ro[WS(os, 41)] = T9r - T9G;
+		    ro[WS(os, 9)] = T9r + T9G;
+		    T9T = T9J + T9M;
+		    T9U = T9Q + T9R;
+		    io[WS(os, 41)] = T9T - T9U;
+		    io[WS(os, 9)] = T9T + T9U;
+	       }
+	       {
+		    E T9N, T9O, T9P, T9S;
+		    T9N = T9J - T9M;
+		    T9O = T9F - T9y;
+		    io[WS(os, 57)] = T9N - T9O;
+		    io[WS(os, 25)] = T9N + T9O;
+		    T9P = T9j - T9q;
+		    T9S = T9Q - T9R;
+		    ro[WS(os, 57)] = T9P - T9S;
+		    ro[WS(os, 25)] = T9P + T9S;
+	       }
+	       {
+		    E T9X, Ta4, Tad, Tae;
+		    T9X = T9V + T9W;
+		    Ta4 = Ta0 + Ta3;
+		    ro[WS(os, 33)] = T9X - Ta4;
+		    ro[WS(os, 1)] = T9X + Ta4;
+		    Tad = Ta5 + Ta6;
+		    Tae = Taa + Tab;
+		    io[WS(os, 33)] = Tad - Tae;
+		    io[WS(os, 1)] = Tad + Tae;
+	       }
+	       {
+		    E Ta7, Ta8, Ta9, Tac;
+		    Ta7 = Ta5 - Ta6;
+		    Ta8 = Ta3 - Ta0;
+		    io[WS(os, 49)] = Ta7 - Ta8;
+		    io[WS(os, 17)] = Ta7 + Ta8;
+		    Ta9 = T9V - T9W;
+		    Tac = Taa - Tab;
+		    ro[WS(os, 49)] = Ta9 - Tac;
+		    ro[WS(os, 17)] = Ta9 + Tac;
+	       }
+	  }
+	  {
+	       E T3v, T6j, T6o, T6y, T6r, T6z, T48, T6u, T52, T6e, T67, T6t, T6a, T6k, T5V;
+	       E T6f;
+	       {
+		    E T3f, T3u, T6m, T6n;
+		    T3f = T37 - T3e;
+		    T3u = T3m - T3t;
+		    T3v = T3f - T3u;
+		    T6j = T3f + T3u;
+		    T6m = T4q + T4N;
+		    T6n = T4X + T50;
+		    T6o = FMA(KP634393284, T6m, KP773010453 * T6n);
+		    T6y = FNMS(KP634393284, T6n, KP773010453 * T6m);
+	       }
+	       {
+		    E T6p, T6q, T3O, T47;
+		    T6p = T5j + T5G;
+		    T6q = T5Q + T5T;
+		    T6r = FNMS(KP634393284, T6q, KP773010453 * T6p);
+		    T6z = FMA(KP773010453, T6q, KP634393284 * T6p);
+		    T3O = FNMS(KP980785280, T3N, KP195090322 * T3G);
+		    T47 = FMA(KP195090322, T3Z, KP980785280 * T46);
+		    T48 = T3O - T47;
+		    T6u = T3O + T47;
+	       }
+	       {
+		    E T4O, T51, T63, T66;
+		    T4O = T4q - T4N;
+		    T51 = T4X - T50;
+		    T52 = FMA(KP995184726, T4O, KP098017140 * T51);
+		    T6e = FNMS(KP995184726, T51, KP098017140 * T4O);
+		    T63 = T5Z - T62;
+		    T66 = T64 - T65;
+		    T67 = T63 - T66;
+		    T6t = T63 + T66;
+	       }
+	       {
+		    E T68, T69, T5H, T5U;
+		    T68 = FNMS(KP980785280, T3Z, KP195090322 * T46);
+		    T69 = FMA(KP980785280, T3G, KP195090322 * T3N);
+		    T6a = T68 - T69;
+		    T6k = T69 + T68;
+		    T5H = T5j - T5G;
+		    T5U = T5Q - T5T;
+		    T5V = FNMS(KP995184726, T5U, KP098017140 * T5H);
+		    T6f = FMA(KP098017140, T5U, KP995184726 * T5H);
+	       }
+	       {
+		    E T49, T5W, T6h, T6i;
+		    T49 = T3v + T48;
+		    T5W = T52 + T5V;
+		    ro[WS(os, 47)] = T49 - T5W;
+		    ro[WS(os, 15)] = T49 + T5W;
+		    T6h = T67 + T6a;
+		    T6i = T6e + T6f;
+		    io[WS(os, 47)] = T6h - T6i;
+		    io[WS(os, 15)] = T6h + T6i;
+	       }
+	       {
+		    E T6b, T6c, T6d, T6g;
+		    T6b = T67 - T6a;
+		    T6c = T5V - T52;
+		    io[WS(os, 63)] = T6b - T6c;
+		    io[WS(os, 31)] = T6b + T6c;
+		    T6d = T3v - T48;
+		    T6g = T6e - T6f;
+		    ro[WS(os, 63)] = T6d - T6g;
+		    ro[WS(os, 31)] = T6d + T6g;
+	       }
+	       {
+		    E T6l, T6s, T6B, T6C;
+		    T6l = T6j + T6k;
+		    T6s = T6o + T6r;
+		    ro[WS(os, 39)] = T6l - T6s;
+		    ro[WS(os, 7)] = T6l + T6s;
+		    T6B = T6t + T6u;
+		    T6C = T6y + T6z;
+		    io[WS(os, 39)] = T6B - T6C;
+		    io[WS(os, 7)] = T6B + T6C;
+	       }
+	       {
+		    E T6v, T6w, T6x, T6A;
+		    T6v = T6t - T6u;
+		    T6w = T6r - T6o;
+		    io[WS(os, 55)] = T6v - T6w;
+		    io[WS(os, 23)] = T6v + T6w;
+		    T6x = T6j - T6k;
+		    T6A = T6y - T6z;
+		    ro[WS(os, 55)] = T6x - T6A;
+		    ro[WS(os, 23)] = T6x + T6A;
+	       }
+	  }
+	  {
+	       E T7L, T8X, T92, T9c, T95, T9d, T80, T98, T8k, T8S, T8L, T97, T8O, T8Y, T8D;
+	       E T8T;
+	       {
+		    E T7D, T7K, T90, T91;
+		    T7D = T7B - T7C;
+		    T7K = T7G - T7J;
+		    T7L = T7D - T7K;
+		    T8X = T7D + T7K;
+		    T90 = T84 + T8b;
+		    T91 = T8f + T8i;
+		    T92 = FMA(KP471396736, T90, KP881921264 * T91);
+		    T9c = FNMS(KP471396736, T91, KP881921264 * T90);
+	       }
+	       {
+		    E T93, T94, T7S, T7Z;
+		    T93 = T8n + T8u;
+		    T94 = T8y + T8B;
+		    T95 = FNMS(KP471396736, T94, KP881921264 * T93);
+		    T9d = FMA(KP881921264, T94, KP471396736 * T93);
+		    T7S = FNMS(KP831469612, T7R, KP555570233 * T7O);
+		    T7Z = FMA(KP831469612, T7V, KP555570233 * T7Y);
+		    T80 = T7S - T7Z;
+		    T98 = T7S + T7Z;
+	       }
+	       {
+		    E T8c, T8j, T8H, T8K;
+		    T8c = T84 - T8b;
+		    T8j = T8f - T8i;
+		    T8k = FMA(KP956940335, T8c, KP290284677 * T8j);
+		    T8S = FNMS(KP956940335, T8j, KP290284677 * T8c);
+		    T8H = T8F - T8G;
+		    T8K = T8I - T8J;
+		    T8L = T8H - T8K;
+		    T97 = T8H + T8K;
+	       }
+	       {
+		    E T8M, T8N, T8v, T8C;
+		    T8M = FNMS(KP831469612, T7Y, KP555570233 * T7V);
+		    T8N = FMA(KP555570233, T7R, KP831469612 * T7O);
+		    T8O = T8M - T8N;
+		    T8Y = T8N + T8M;
+		    T8v = T8n - T8u;
+		    T8C = T8y - T8B;
+		    T8D = FNMS(KP956940335, T8C, KP290284677 * T8v);
+		    T8T = FMA(KP290284677, T8C, KP956940335 * T8v);
+	       }
+	       {
+		    E T81, T8E, T8V, T8W;
+		    T81 = T7L + T80;
+		    T8E = T8k + T8D;
+		    ro[WS(os, 45)] = T81 - T8E;
+		    ro[WS(os, 13)] = T81 + T8E;
+		    T8V = T8L + T8O;
+		    T8W = T8S + T8T;
+		    io[WS(os, 45)] = T8V - T8W;
+		    io[WS(os, 13)] = T8V + T8W;
+	       }
+	       {
+		    E T8P, T8Q, T8R, T8U;
+		    T8P = T8L - T8O;
+		    T8Q = T8D - T8k;
+		    io[WS(os, 61)] = T8P - T8Q;
+		    io[WS(os, 29)] = T8P + T8Q;
+		    T8R = T7L - T80;
+		    T8U = T8S - T8T;
+		    ro[WS(os, 61)] = T8R - T8U;
+		    ro[WS(os, 29)] = T8R + T8U;
+	       }
+	       {
+		    E T8Z, T96, T9f, T9g;
+		    T8Z = T8X + T8Y;
+		    T96 = T92 + T95;
+		    ro[WS(os, 37)] = T8Z - T96;
+		    ro[WS(os, 5)] = T8Z + T96;
+		    T9f = T97 + T98;
+		    T9g = T9c + T9d;
+		    io[WS(os, 37)] = T9f - T9g;
+		    io[WS(os, 5)] = T9f + T9g;
+	       }
+	       {
+		    E T99, T9a, T9b, T9e;
+		    T99 = T97 - T98;
+		    T9a = T95 - T92;
+		    io[WS(os, 53)] = T99 - T9a;
+		    io[WS(os, 21)] = T99 + T9a;
+		    T9b = T8X - T8Y;
+		    T9e = T9c - T9d;
+		    ro[WS(os, 53)] = T9b - T9e;
+		    ro[WS(os, 21)] = T9b + T9e;
+	       }
+	  }
+     }
+}
+
+static void m1_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i -= 1) {
+	  m1_64_0(ri, ii, ro, io, is, os);
+	  ri += ivs;
+	  ii += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kdft_desc desc = { 64, "m1_64", {808, 144, 104, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_m1_64) (planner *p) {
+     X(kdft_register) (p, m1_64, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_10.c b/src/fftw3/dft/codelets/standard/n1_10.c
new file mode 100644
index 0000000..95fb984
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_10.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 10 -name n1_10 -include n.h */
+
+/*
+ * This function contains 84 FP additions, 24 FP multiplications,
+ * (or, 72 additions, 12 multiplications, 12 fused multiply/add),
+ * 41 stack variables, and 40 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_10.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_10.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_10.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, Tj, TQ, T1e, TU, TV, T1c, T1b, Tm, Tp, Tq, Ta, Th, Ti, TA;
+	  E TH, T17, T14, T1f, T1g, T1h, TL, TM, TR;
+	  {
+	       E T1, T2, TO, TP;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 5)];
+	       T3 = T1 - T2;
+	       Tj = T1 + T2;
+	       TO = ii[0];
+	       TP = ii[WS(is, 5)];
+	       TQ = TO - TP;
+	       T1e = TO + TP;
+	  }
+	  {
+	       E T6, Tk, Tg, To, T9, Tl, Td, Tn;
+	       {
+		    E T4, T5, Te, Tf;
+		    T4 = ri[WS(is, 2)];
+		    T5 = ri[WS(is, 7)];
+		    T6 = T4 - T5;
+		    Tk = T4 + T5;
+		    Te = ri[WS(is, 6)];
+		    Tf = ri[WS(is, 1)];
+		    Tg = Te - Tf;
+		    To = Te + Tf;
+	       }
+	       {
+		    E T7, T8, Tb, Tc;
+		    T7 = ri[WS(is, 8)];
+		    T8 = ri[WS(is, 3)];
+		    T9 = T7 - T8;
+		    Tl = T7 + T8;
+		    Tb = ri[WS(is, 4)];
+		    Tc = ri[WS(is, 9)];
+		    Td = Tb - Tc;
+		    Tn = Tb + Tc;
+	       }
+	       TU = T6 - T9;
+	       TV = Td - Tg;
+	       T1c = Tk - Tl;
+	       T1b = Tn - To;
+	       Tm = Tk + Tl;
+	       Tp = Tn + To;
+	       Tq = Tm + Tp;
+	       Ta = T6 + T9;
+	       Th = Td + Tg;
+	       Ti = Ta + Th;
+	  }
+	  {
+	       E Tw, T15, TG, T13, Tz, T16, TD, T12;
+	       {
+		    E Tu, Tv, TE, TF;
+		    Tu = ii[WS(is, 2)];
+		    Tv = ii[WS(is, 7)];
+		    Tw = Tu - Tv;
+		    T15 = Tu + Tv;
+		    TE = ii[WS(is, 6)];
+		    TF = ii[WS(is, 1)];
+		    TG = TE - TF;
+		    T13 = TE + TF;
+	       }
+	       {
+		    E Tx, Ty, TB, TC;
+		    Tx = ii[WS(is, 8)];
+		    Ty = ii[WS(is, 3)];
+		    Tz = Tx - Ty;
+		    T16 = Tx + Ty;
+		    TB = ii[WS(is, 4)];
+		    TC = ii[WS(is, 9)];
+		    TD = TB - TC;
+		    T12 = TB + TC;
+	       }
+	       TA = Tw - Tz;
+	       TH = TD - TG;
+	       T17 = T15 - T16;
+	       T14 = T12 - T13;
+	       T1f = T15 + T16;
+	       T1g = T12 + T13;
+	       T1h = T1f + T1g;
+	       TL = Tw + Tz;
+	       TM = TD + TG;
+	       TR = TL + TM;
+	  }
+	  ro[WS(os, 5)] = T3 + Ti;
+	  io[WS(os, 5)] = TQ + TR;
+	  ro[0] = Tj + Tq;
+	  io[0] = T1e + T1h;
+	  {
+	       E TI, TK, Tt, TJ, Tr, Ts;
+	       TI = FMA(KP951056516, TA, KP587785252 * TH);
+	       TK = FNMS(KP587785252, TA, KP951056516 * TH);
+	       Tr = KP559016994 * (Ta - Th);
+	       Ts = FNMS(KP250000000, Ti, T3);
+	       Tt = Tr + Ts;
+	       TJ = Ts - Tr;
+	       ro[WS(os, 9)] = Tt - TI;
+	       ro[WS(os, 3)] = TJ + TK;
+	       ro[WS(os, 1)] = Tt + TI;
+	       ro[WS(os, 7)] = TJ - TK;
+	  }
+	  {
+	       E TW, TY, TT, TX, TN, TS;
+	       TW = FMA(KP951056516, TU, KP587785252 * TV);
+	       TY = FNMS(KP587785252, TU, KP951056516 * TV);
+	       TN = KP559016994 * (TL - TM);
+	       TS = FNMS(KP250000000, TR, TQ);
+	       TT = TN + TS;
+	       TX = TS - TN;
+	       io[WS(os, 1)] = TT - TW;
+	       io[WS(os, 7)] = TY + TX;
+	       io[WS(os, 9)] = TW + TT;
+	       io[WS(os, 3)] = TX - TY;
+	  }
+	  {
+	       E T18, T1a, T11, T19, TZ, T10;
+	       T18 = FNMS(KP587785252, T17, KP951056516 * T14);
+	       T1a = FMA(KP951056516, T17, KP587785252 * T14);
+	       TZ = FNMS(KP250000000, Tq, Tj);
+	       T10 = KP559016994 * (Tm - Tp);
+	       T11 = TZ - T10;
+	       T19 = T10 + TZ;
+	       ro[WS(os, 2)] = T11 - T18;
+	       ro[WS(os, 6)] = T19 + T1a;
+	       ro[WS(os, 8)] = T11 + T18;
+	       ro[WS(os, 4)] = T19 - T1a;
+	  }
+	  {
+	       E T1d, T1l, T1k, T1m, T1i, T1j;
+	       T1d = FNMS(KP587785252, T1c, KP951056516 * T1b);
+	       T1l = FMA(KP951056516, T1c, KP587785252 * T1b);
+	       T1i = FNMS(KP250000000, T1h, T1e);
+	       T1j = KP559016994 * (T1f - T1g);
+	       T1k = T1i - T1j;
+	       T1m = T1j + T1i;
+	       io[WS(os, 2)] = T1d + T1k;
+	       io[WS(os, 6)] = T1m - T1l;
+	       io[WS(os, 8)] = T1k - T1d;
+	       io[WS(os, 4)] = T1l + T1m;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 10, "n1_10", {72, 12, 12, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_10) (planner *p) {
+     X(kdft_register) (p, n1_10, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_11.c b/src/fftw3/dft/codelets/standard/n1_11.c
new file mode 100644
index 0000000..29e8049
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_11.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 11 -name n1_11 -include n.h */
+
+/*
+ * This function contains 140 FP additions, 100 FP multiplications,
+ * (or, 60 additions, 20 multiplications, 80 fused multiply/add),
+ * 41 stack variables, and 44 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_11.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_11.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_11.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_11(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP654860733, +0.654860733945285064056925072466293553183791199);
+     DK(KP142314838, +0.142314838273285140443792668616369668791051361);
+     DK(KP959492973, +0.959492973614497389890368057066327699062454848);
+     DK(KP415415013, +0.415415013001886425529274149229623203524004910);
+     DK(KP841253532, +0.841253532831181168861811648919367717513292498);
+     DK(KP989821441, +0.989821441880932732376092037776718787376519372);
+     DK(KP909631995, +0.909631995354518371411715383079028460060241051);
+     DK(KP281732556, +0.281732556841429697711417915346616899035777899);
+     DK(KP540640817, +0.540640817455597582107635954318691695431770608);
+     DK(KP755749574, +0.755749574354258283774035843972344420179717445);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, TM, T4, TG, Tk, TR, Tw, TN, T7, TK, Ta, TH, Tn, TQ, Td;
+	  E TJ, Tq, TO, Tt, TP, Tg, TI;
+	  {
+	       E T2, T3, Ti, Tj;
+	       T1 = ri[0];
+	       TM = ii[0];
+	       T2 = ri[WS(is, 1)];
+	       T3 = ri[WS(is, 10)];
+	       T4 = T2 + T3;
+	       TG = T3 - T2;
+	       Ti = ii[WS(is, 1)];
+	       Tj = ii[WS(is, 10)];
+	       Tk = Ti - Tj;
+	       TR = Ti + Tj;
+	       {
+		    E Tu, Tv, T5, T6;
+		    Tu = ii[WS(is, 2)];
+		    Tv = ii[WS(is, 9)];
+		    Tw = Tu - Tv;
+		    TN = Tu + Tv;
+		    T5 = ri[WS(is, 2)];
+		    T6 = ri[WS(is, 9)];
+		    T7 = T5 + T6;
+		    TK = T6 - T5;
+	       }
+	  }
+	  {
+	       E T8, T9, To, Tp;
+	       T8 = ri[WS(is, 3)];
+	       T9 = ri[WS(is, 8)];
+	       Ta = T8 + T9;
+	       TH = T9 - T8;
+	       {
+		    E Tl, Tm, Tb, Tc;
+		    Tl = ii[WS(is, 3)];
+		    Tm = ii[WS(is, 8)];
+		    Tn = Tl - Tm;
+		    TQ = Tl + Tm;
+		    Tb = ri[WS(is, 4)];
+		    Tc = ri[WS(is, 7)];
+		    Td = Tb + Tc;
+		    TJ = Tc - Tb;
+	       }
+	       To = ii[WS(is, 4)];
+	       Tp = ii[WS(is, 7)];
+	       Tq = To - Tp;
+	       TO = To + Tp;
+	       {
+		    E Tr, Ts, Te, Tf;
+		    Tr = ii[WS(is, 5)];
+		    Ts = ii[WS(is, 6)];
+		    Tt = Tr - Ts;
+		    TP = Tr + Ts;
+		    Te = ri[WS(is, 5)];
+		    Tf = ri[WS(is, 6)];
+		    Tg = Te + Tf;
+		    TI = Tf - Te;
+	       }
+	  }
+	  {
+	       E Tx, Th, TZ, T10;
+	       ro[0] = T1 + T4 + T7 + Ta + Td + Tg;
+	       io[0] = TM + TR + TN + TQ + TO + TP;
+	       Tx = FMA(KP755749574, Tk, KP540640817 * Tn) + FNMS(KP909631995, Tt, KP281732556 * Tq) - (KP989821441 * Tw);
+	       Th = FMA(KP841253532, Ta, T1) + FNMS(KP959492973, Td, KP415415013 * Tg) + FNMA(KP142314838, T7, KP654860733 * T4);
+	       ro[WS(os, 7)] = Th - Tx;
+	       ro[WS(os, 4)] = Th + Tx;
+	       TZ = FMA(KP755749574, TG, KP540640817 * TH) + FNMS(KP909631995, TI, KP281732556 * TJ) - (KP989821441 * TK);
+	       T10 = FMA(KP841253532, TQ, TM) + FNMS(KP959492973, TO, KP415415013 * TP) + FNMA(KP142314838, TN, KP654860733 * TR);
+	       io[WS(os, 4)] = TZ + T10;
+	       io[WS(os, 7)] = T10 - TZ;
+	       {
+		    E TX, TY, Tz, Ty;
+		    TX = FMA(KP909631995, TG, KP755749574 * TK) + FNMA(KP540640817, TI, KP989821441 * TJ) - (KP281732556 * TH);
+		    TY = FMA(KP415415013, TR, TM) + FNMS(KP142314838, TO, KP841253532 * TP) + FNMA(KP959492973, TQ, KP654860733 * TN);
+		    io[WS(os, 2)] = TX + TY;
+		    io[WS(os, 9)] = TY - TX;
+		    Tz = FMA(KP909631995, Tk, KP755749574 * Tw) + FNMA(KP540640817, Tt, KP989821441 * Tq) - (KP281732556 * Tn);
+		    Ty = FMA(KP415415013, T4, T1) + FNMS(KP142314838, Td, KP841253532 * Tg) + FNMA(KP959492973, Ta, KP654860733 * T7);
+		    ro[WS(os, 9)] = Ty - Tz;
+		    ro[WS(os, 2)] = Ty + Tz;
+	       }
+	  }
+	  {
+	       E TB, TA, TT, TU;
+	       TB = FMA(KP540640817, Tk, KP909631995 * Tw) + FMA(KP989821441, Tn, KP755749574 * Tq) + (KP281732556 * Tt);
+	       TA = FMA(KP841253532, T4, T1) + FNMS(KP959492973, Tg, KP415415013 * T7) + FNMA(KP654860733, Td, KP142314838 * Ta);
+	       ro[WS(os, 10)] = TA - TB;
+	       ro[WS(os, 1)] = TA + TB;
+	       {
+		    E TV, TW, TD, TC;
+		    TV = FMA(KP540640817, TG, KP909631995 * TK) + FMA(KP989821441, TH, KP755749574 * TJ) + (KP281732556 * TI);
+		    TW = FMA(KP841253532, TR, TM) + FNMS(KP959492973, TP, KP415415013 * TN) + FNMA(KP654860733, TO, KP142314838 * TQ);
+		    io[WS(os, 1)] = TV + TW;
+		    io[WS(os, 10)] = TW - TV;
+		    TD = FMA(KP989821441, Tk, KP540640817 * Tq) + FNMS(KP909631995, Tn, KP755749574 * Tt) - (KP281732556 * Tw);
+		    TC = FMA(KP415415013, Ta, T1) + FNMS(KP654860733, Tg, KP841253532 * Td) + FNMA(KP959492973, T7, KP142314838 * T4);
+		    ro[WS(os, 8)] = TC - TD;
+		    ro[WS(os, 3)] = TC + TD;
+	       }
+	       TT = FMA(KP989821441, TG, KP540640817 * TJ) + FNMS(KP909631995, TH, KP755749574 * TI) - (KP281732556 * TK);
+	       TU = FMA(KP415415013, TQ, TM) + FNMS(KP654860733, TP, KP841253532 * TO) + FNMA(KP959492973, TN, KP142314838 * TR);
+	       io[WS(os, 3)] = TT + TU;
+	       io[WS(os, 8)] = TU - TT;
+	       {
+		    E TL, TS, TF, TE;
+		    TL = FMA(KP281732556, TG, KP755749574 * TH) + FNMS(KP909631995, TJ, KP989821441 * TI) - (KP540640817 * TK);
+		    TS = FMA(KP841253532, TN, TM) + FNMS(KP142314838, TP, KP415415013 * TO) + FNMA(KP654860733, TQ, KP959492973 * TR);
+		    io[WS(os, 5)] = TL + TS;
+		    io[WS(os, 6)] = TS - TL;
+		    TF = FMA(KP281732556, Tk, KP755749574 * Tn) + FNMS(KP909631995, Tq, KP989821441 * Tt) - (KP540640817 * Tw);
+		    TE = FMA(KP841253532, T7, T1) + FNMS(KP142314838, Tg, KP415415013 * Td) + FNMA(KP654860733, Ta, KP959492973 * T4);
+		    ro[WS(os, 6)] = TE - TF;
+		    ro[WS(os, 5)] = TE + TF;
+	       }
+	  }
+     }
+}
+
+static const kdft_desc desc = { 11, "n1_11", {60, 20, 80, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_11) (planner *p) {
+     X(kdft_register) (p, n1_11, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_12.c b/src/fftw3/dft/codelets/standard/n1_12.c
new file mode 100644
index 0000000..cced824
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_12.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 12 -name n1_12 -include n.h */
+
+/*
+ * This function contains 96 FP additions, 16 FP multiplications,
+ * (or, 88 additions, 8 multiplications, 8 fused multiply/add),
+ * 43 stack variables, and 48 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_12.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_12.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_12.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T5, TR, TA, Ts, TS, Tz, Ta, TU, TD, Tx, TV, TC, Tg, T1a, TG;
+	  E TJ, T1u, T1d, Tl, T1f, TL, TO, T1v, T1i;
+	  {
+	       E T1, T2, T3, T4;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 4)];
+	       T3 = ri[WS(is, 8)];
+	       T4 = T2 + T3;
+	       T5 = T1 + T4;
+	       TR = FNMS(KP500000000, T4, T1);
+	       TA = KP866025403 * (T3 - T2);
+	  }
+	  {
+	       E To, Tp, Tq, Tr;
+	       To = ii[0];
+	       Tp = ii[WS(is, 4)];
+	       Tq = ii[WS(is, 8)];
+	       Tr = Tp + Tq;
+	       Ts = To + Tr;
+	       TS = KP866025403 * (Tp - Tq);
+	       Tz = FNMS(KP500000000, Tr, To);
+	  }
+	  {
+	       E T6, T7, T8, T9;
+	       T6 = ri[WS(is, 6)];
+	       T7 = ri[WS(is, 10)];
+	       T8 = ri[WS(is, 2)];
+	       T9 = T7 + T8;
+	       Ta = T6 + T9;
+	       TU = FNMS(KP500000000, T9, T6);
+	       TD = KP866025403 * (T8 - T7);
+	  }
+	  {
+	       E Tt, Tu, Tv, Tw;
+	       Tt = ii[WS(is, 6)];
+	       Tu = ii[WS(is, 10)];
+	       Tv = ii[WS(is, 2)];
+	       Tw = Tu + Tv;
+	       Tx = Tt + Tw;
+	       TV = KP866025403 * (Tu - Tv);
+	       TC = FNMS(KP500000000, Tw, Tt);
+	  }
+	  {
+	       E Tc, Td, Te, Tf;
+	       Tc = ri[WS(is, 3)];
+	       Td = ri[WS(is, 7)];
+	       Te = ri[WS(is, 11)];
+	       Tf = Td + Te;
+	       Tg = Tc + Tf;
+	       T1a = KP866025403 * (Te - Td);
+	       TG = FNMS(KP500000000, Tf, Tc);
+	  }
+	  {
+	       E T1b, TH, TI, T1c;
+	       T1b = ii[WS(is, 3)];
+	       TH = ii[WS(is, 7)];
+	       TI = ii[WS(is, 11)];
+	       T1c = TH + TI;
+	       TJ = KP866025403 * (TH - TI);
+	       T1u = T1b + T1c;
+	       T1d = FNMS(KP500000000, T1c, T1b);
+	  }
+	  {
+	       E Th, Ti, Tj, Tk;
+	       Th = ri[WS(is, 9)];
+	       Ti = ri[WS(is, 1)];
+	       Tj = ri[WS(is, 5)];
+	       Tk = Ti + Tj;
+	       Tl = Th + Tk;
+	       T1f = KP866025403 * (Tj - Ti);
+	       TL = FNMS(KP500000000, Tk, Th);
+	  }
+	  {
+	       E T1g, TM, TN, T1h;
+	       T1g = ii[WS(is, 9)];
+	       TM = ii[WS(is, 1)];
+	       TN = ii[WS(is, 5)];
+	       T1h = TM + TN;
+	       TO = KP866025403 * (TM - TN);
+	       T1v = T1g + T1h;
+	       T1i = FNMS(KP500000000, T1h, T1g);
+	  }
+	  {
+	       E Tb, Tm, T1t, T1w;
+	       Tb = T5 + Ta;
+	       Tm = Tg + Tl;
+	       ro[WS(os, 6)] = Tb - Tm;
+	       ro[0] = Tb + Tm;
+	       {
+		    E T1x, T1y, Tn, Ty;
+		    T1x = Ts + Tx;
+		    T1y = T1u + T1v;
+		    io[WS(os, 6)] = T1x - T1y;
+		    io[0] = T1x + T1y;
+		    Tn = Tg - Tl;
+		    Ty = Ts - Tx;
+		    io[WS(os, 3)] = Tn + Ty;
+		    io[WS(os, 9)] = Ty - Tn;
+	       }
+	       T1t = T5 - Ta;
+	       T1w = T1u - T1v;
+	       ro[WS(os, 3)] = T1t - T1w;
+	       ro[WS(os, 9)] = T1t + T1w;
+	       {
+		    E T11, T1l, T1k, T1m, T14, T18, T17, T19;
+		    {
+			 E TZ, T10, T1e, T1j;
+			 TZ = TA + Tz;
+			 T10 = TD + TC;
+			 T11 = TZ - T10;
+			 T1l = TZ + T10;
+			 T1e = T1a + T1d;
+			 T1j = T1f + T1i;
+			 T1k = T1e - T1j;
+			 T1m = T1e + T1j;
+		    }
+		    {
+			 E T12, T13, T15, T16;
+			 T12 = TG + TJ;
+			 T13 = TL + TO;
+			 T14 = T12 - T13;
+			 T18 = T12 + T13;
+			 T15 = TR + TS;
+			 T16 = TU + TV;
+			 T17 = T15 + T16;
+			 T19 = T15 - T16;
+		    }
+		    io[WS(os, 1)] = T11 - T14;
+		    ro[WS(os, 1)] = T19 + T1k;
+		    io[WS(os, 7)] = T11 + T14;
+		    ro[WS(os, 7)] = T19 - T1k;
+		    ro[WS(os, 10)] = T17 - T18;
+		    io[WS(os, 10)] = T1l - T1m;
+		    ro[WS(os, 4)] = T17 + T18;
+		    io[WS(os, 4)] = T1l + T1m;
+	       }
+	       {
+		    E TF, T1r, T1q, T1s, TQ, TY, TX, T1n;
+		    {
+			 E TB, TE, T1o, T1p;
+			 TB = Tz - TA;
+			 TE = TC - TD;
+			 TF = TB - TE;
+			 T1r = TB + TE;
+			 T1o = T1d - T1a;
+			 T1p = T1i - T1f;
+			 T1q = T1o - T1p;
+			 T1s = T1o + T1p;
+		    }
+		    {
+			 E TK, TP, TT, TW;
+			 TK = TG - TJ;
+			 TP = TL - TO;
+			 TQ = TK - TP;
+			 TY = TK + TP;
+			 TT = TR - TS;
+			 TW = TU - TV;
+			 TX = TT + TW;
+			 T1n = TT - TW;
+		    }
+		    io[WS(os, 5)] = TF - TQ;
+		    ro[WS(os, 5)] = T1n + T1q;
+		    io[WS(os, 11)] = TF + TQ;
+		    ro[WS(os, 11)] = T1n - T1q;
+		    ro[WS(os, 2)] = TX - TY;
+		    io[WS(os, 2)] = T1r - T1s;
+		    ro[WS(os, 8)] = TX + TY;
+		    io[WS(os, 8)] = T1r + T1s;
+	       }
+	  }
+     }
+}
+
+static const kdft_desc desc = { 12, "n1_12", {88, 8, 8, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_12) (planner *p) {
+     X(kdft_register) (p, n1_12, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_13.c b/src/fftw3/dft/codelets/standard/n1_13.c
new file mode 100644
index 0000000..3f8c804
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_13.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:33 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 13 -name n1_13 -include n.h */
+
+/*
+ * This function contains 176 FP additions, 68 FP multiplications,
+ * (or, 138 additions, 30 multiplications, 38 fused multiply/add),
+ * 71 stack variables, and 52 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_13.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_13.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_13.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP083333333, +0.083333333333333333333333333333333333333333333);
+     DK(KP251768516, +0.251768516431883313623436926934233488546674281);
+     DK(KP075902986, +0.075902986037193865983102897245103540356428373);
+     DK(KP132983124, +0.132983124607418643793760531921092974399165133);
+     DK(KP258260390, +0.258260390311744861420450644284508567852516811);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     DK(KP300238635, +0.300238635966332641462884626667381504676006424);
+     DK(KP011599105, +0.011599105605768290721655456654083252189827041);
+     DK(KP156891391, +0.156891391051584611046832726756003269660212636);
+     DK(KP256247671, +0.256247671582936600958684654061725059144125175);
+     DK(KP174138601, +0.174138601152135905005660794929264742616964676);
+     DK(KP575140729, +0.575140729474003121368385547455453388461001608);
+     DK(KP503537032, +0.503537032863766627246873853868466977093348562);
+     DK(KP113854479, +0.113854479055790798974654345867655310534642560);
+     DK(KP265966249, +0.265966249214837287587521063842185948798330267);
+     DK(KP387390585, +0.387390585467617292130675966426762851778775217);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP300462606, +0.300462606288665774426601772289207995520941381);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T1q, Tt, Tu, To, T22, T20, T24, TF, TH, TA, TI, T1X, T25, T2a;
+	  E T2d, T18, T1n, T2k, T2n, T1l, T1r, T1f, T1o, T2h, T2m;
+	  T1 = ri[0];
+	  T1q = ii[0];
+	  {
+	       E Tf, Tp, Tb, TC, Tx, T6, TB, Tw, Ti, Tq, Tl, Tr, Tm, Ts, Td;
+	       E Te, Tc, Tn;
+	       Td = ri[WS(is, 8)];
+	       Te = ri[WS(is, 5)];
+	       Tf = Td + Te;
+	       Tp = Td - Te;
+	       {
+		    E T7, T8, T9, Ta;
+		    T7 = ri[WS(is, 12)];
+		    T8 = ri[WS(is, 10)];
+		    T9 = ri[WS(is, 4)];
+		    Ta = T8 + T9;
+		    Tb = T7 + Ta;
+		    TC = T8 - T9;
+		    Tx = FNMS(KP500000000, Ta, T7);
+	       }
+	       {
+		    E T2, T3, T4, T5;
+		    T2 = ri[WS(is, 1)];
+		    T3 = ri[WS(is, 3)];
+		    T4 = ri[WS(is, 9)];
+		    T5 = T3 + T4;
+		    T6 = T2 + T5;
+		    TB = T3 - T4;
+		    Tw = FNMS(KP500000000, T5, T2);
+	       }
+	       {
+		    E Tg, Th, Tj, Tk;
+		    Tg = ri[WS(is, 11)];
+		    Th = ri[WS(is, 6)];
+		    Ti = Tg + Th;
+		    Tq = Tg - Th;
+		    Tj = ri[WS(is, 7)];
+		    Tk = ri[WS(is, 2)];
+		    Tl = Tj + Tk;
+		    Tr = Tj - Tk;
+	       }
+	       Tm = Ti + Tl;
+	       Ts = Tq + Tr;
+	       Tt = Tp + Ts;
+	       Tu = T6 - Tb;
+	       Tc = T6 + Tb;
+	       Tn = Tf + Tm;
+	       To = Tc + Tn;
+	       T22 = KP300462606 * (Tc - Tn);
+	       {
+		    E T1Y, T1Z, TD, TE;
+		    T1Y = TB + TC;
+		    T1Z = Tq - Tr;
+		    T20 = T1Y - T1Z;
+		    T24 = T1Y + T1Z;
+		    TD = KP866025403 * (TB - TC);
+		    TE = FNMS(KP500000000, Ts, Tp);
+		    TF = TD - TE;
+		    TH = TD + TE;
+	       }
+	       {
+		    E Ty, Tz, T1V, T1W;
+		    Ty = Tw - Tx;
+		    Tz = KP866025403 * (Ti - Tl);
+		    TA = Ty + Tz;
+		    TI = Ty - Tz;
+		    T1V = Tw + Tx;
+		    T1W = FNMS(KP500000000, Tm, Tf);
+		    T1X = T1V - T1W;
+		    T25 = T1V + T1W;
+	       }
+	  }
+	  {
+	       E TZ, T2b, TV, T1i, T1a, TQ, T1h, T19, T12, T1d, T15, T1c, T16, T2c, TX;
+	       E TY, TW, T17;
+	       TX = ii[WS(is, 8)];
+	       TY = ii[WS(is, 5)];
+	       TZ = TX + TY;
+	       T2b = TX - TY;
+	       {
+		    E TR, TS, TT, TU;
+		    TR = ii[WS(is, 12)];
+		    TS = ii[WS(is, 10)];
+		    TT = ii[WS(is, 4)];
+		    TU = TS + TT;
+		    TV = FNMS(KP500000000, TU, TR);
+		    T1i = TR + TU;
+		    T1a = TS - TT;
+	       }
+	       {
+		    E TM, TN, TO, TP;
+		    TM = ii[WS(is, 1)];
+		    TN = ii[WS(is, 3)];
+		    TO = ii[WS(is, 9)];
+		    TP = TN + TO;
+		    TQ = FNMS(KP500000000, TP, TM);
+		    T1h = TM + TP;
+		    T19 = TN - TO;
+	       }
+	       {
+		    E T10, T11, T13, T14;
+		    T10 = ii[WS(is, 11)];
+		    T11 = ii[WS(is, 6)];
+		    T12 = T10 + T11;
+		    T1d = T10 - T11;
+		    T13 = ii[WS(is, 7)];
+		    T14 = ii[WS(is, 2)];
+		    T15 = T13 + T14;
+		    T1c = T13 - T14;
+	       }
+	       T16 = T12 + T15;
+	       T2c = T1d + T1c;
+	       T2a = T1h - T1i;
+	       T2d = T2b + T2c;
+	       TW = TQ + TV;
+	       T17 = FNMS(KP500000000, T16, TZ);
+	       T18 = TW - T17;
+	       T1n = TW + T17;
+	       {
+		    E T2i, T2j, T1j, T1k;
+		    T2i = TQ - TV;
+		    T2j = KP866025403 * (T15 - T12);
+		    T2k = T2i + T2j;
+		    T2n = T2i - T2j;
+		    T1j = T1h + T1i;
+		    T1k = TZ + T16;
+		    T1l = KP300462606 * (T1j - T1k);
+		    T1r = T1j + T1k;
+	       }
+	       {
+		    E T1b, T1e, T2f, T2g;
+		    T1b = T19 + T1a;
+		    T1e = T1c - T1d;
+		    T1f = T1b + T1e;
+		    T1o = T1e - T1b;
+		    T2f = FNMS(KP500000000, T2c, T2b);
+		    T2g = KP866025403 * (T1a - T19);
+		    T2h = T2f - T2g;
+		    T2m = T2g + T2f;
+	       }
+	  }
+	  ro[0] = T1 + To;
+	  io[0] = T1q + T1r;
+	  {
+	       E T1D, T1N, T1y, T1x, T1E, T1O, Tv, TK, T1J, T1Q, T1m, T1R, T1t, T1I, TG;
+	       E TJ;
+	       {
+		    E T1B, T1C, T1v, T1w;
+		    T1B = FMA(KP387390585, T1f, KP265966249 * T18);
+		    T1C = FMA(KP113854479, T1o, KP503537032 * T1n);
+		    T1D = T1B + T1C;
+		    T1N = T1C - T1B;
+		    T1y = FMA(KP575140729, Tu, KP174138601 * Tt);
+		    T1v = FNMS(KP156891391, TH, KP256247671 * TI);
+		    T1w = FMA(KP011599105, TF, KP300238635 * TA);
+		    T1x = T1v - T1w;
+		    T1E = T1y + T1x;
+		    T1O = KP1_732050807 * (T1v + T1w);
+	       }
+	       Tv = FNMS(KP174138601, Tu, KP575140729 * Tt);
+	       TG = FNMS(KP300238635, TF, KP011599105 * TA);
+	       TJ = FMA(KP256247671, TH, KP156891391 * TI);
+	       TK = TG - TJ;
+	       T1J = KP1_732050807 * (TJ + TG);
+	       T1Q = Tv - TK;
+	       {
+		    E T1g, T1H, T1p, T1s, T1G;
+		    T1g = FNMS(KP132983124, T1f, KP258260390 * T18);
+		    T1H = T1l - T1g;
+		    T1p = FNMS(KP251768516, T1o, KP075902986 * T1n);
+		    T1s = FNMS(KP083333333, T1r, T1q);
+		    T1G = T1s - T1p;
+		    T1m = FMA(KP2_000000000, T1g, T1l);
+		    T1R = T1H + T1G;
+		    T1t = FMA(KP2_000000000, T1p, T1s);
+		    T1I = T1G - T1H;
+	       }
+	       {
+		    E TL, T1u, T1P, T1S;
+		    TL = FMA(KP2_000000000, TK, Tv);
+		    T1u = T1m + T1t;
+		    io[WS(os, 1)] = TL + T1u;
+		    io[WS(os, 12)] = T1u - TL;
+		    {
+			 E T1z, T1A, T1T, T1U;
+			 T1z = FMS(KP2_000000000, T1x, T1y);
+			 T1A = T1t - T1m;
+			 io[WS(os, 5)] = T1z + T1A;
+			 io[WS(os, 8)] = T1A - T1z;
+			 T1T = T1R - T1Q;
+			 T1U = T1O + T1N;
+			 io[WS(os, 4)] = T1T - T1U;
+			 io[WS(os, 10)] = T1U + T1T;
+		    }
+		    T1P = T1N - T1O;
+		    T1S = T1Q + T1R;
+		    io[WS(os, 3)] = T1P + T1S;
+		    io[WS(os, 9)] = T1S - T1P;
+		    {
+			 E T1L, T1M, T1F, T1K;
+			 T1L = T1J + T1I;
+			 T1M = T1E + T1D;
+			 io[WS(os, 6)] = T1L - T1M;
+			 io[WS(os, 11)] = T1M + T1L;
+			 T1F = T1D - T1E;
+			 T1K = T1I - T1J;
+			 io[WS(os, 2)] = T1F + T1K;
+			 io[WS(os, 7)] = T1K - T1F;
+		    }
+	       }
+	  }
+	  {
+	       E T2y, T2I, T2J, T2K, T2B, T2L, T2e, T2p, T2u, T2G, T23, T2F, T28, T2t, T2l;
+	       E T2o;
+	       {
+		    E T2w, T2x, T2z, T2A;
+		    T2w = FMA(KP387390585, T20, KP265966249 * T1X);
+		    T2x = FNMS(KP503537032, T25, KP113854479 * T24);
+		    T2y = T2w + T2x;
+		    T2I = T2w - T2x;
+		    T2J = FMA(KP575140729, T2a, KP174138601 * T2d);
+		    T2z = FNMS(KP300238635, T2n, KP011599105 * T2m);
+		    T2A = FNMS(KP156891391, T2h, KP256247671 * T2k);
+		    T2K = T2z + T2A;
+		    T2B = KP1_732050807 * (T2z - T2A);
+		    T2L = T2J + T2K;
+	       }
+	       T2e = FNMS(KP575140729, T2d, KP174138601 * T2a);
+	       T2l = FMA(KP256247671, T2h, KP156891391 * T2k);
+	       T2o = FMA(KP300238635, T2m, KP011599105 * T2n);
+	       T2p = T2l - T2o;
+	       T2u = T2e - T2p;
+	       T2G = KP1_732050807 * (T2o + T2l);
+	       {
+		    E T21, T2r, T26, T27, T2s;
+		    T21 = FNMS(KP132983124, T20, KP258260390 * T1X);
+		    T2r = T22 - T21;
+		    T26 = FMA(KP251768516, T24, KP075902986 * T25);
+		    T27 = FNMS(KP083333333, To, T1);
+		    T2s = T27 - T26;
+		    T23 = FMA(KP2_000000000, T21, T22);
+		    T2F = T2s - T2r;
+		    T28 = FMA(KP2_000000000, T26, T27);
+		    T2t = T2r + T2s;
+	       }
+	       {
+		    E T29, T2q, T2N, T2O;
+		    T29 = T23 + T28;
+		    T2q = FMA(KP2_000000000, T2p, T2e);
+		    ro[WS(os, 12)] = T29 - T2q;
+		    ro[WS(os, 1)] = T29 + T2q;
+		    {
+			 E T2v, T2C, T2P, T2Q;
+			 T2v = T2t - T2u;
+			 T2C = T2y - T2B;
+			 ro[WS(os, 10)] = T2v - T2C;
+			 ro[WS(os, 4)] = T2v + T2C;
+			 T2P = T28 - T23;
+			 T2Q = FMS(KP2_000000000, T2K, T2J);
+			 ro[WS(os, 5)] = T2P - T2Q;
+			 ro[WS(os, 8)] = T2P + T2Q;
+		    }
+		    T2N = T2F - T2G;
+		    T2O = T2L - T2I;
+		    ro[WS(os, 11)] = T2N - T2O;
+		    ro[WS(os, 6)] = T2N + T2O;
+		    {
+			 E T2H, T2M, T2D, T2E;
+			 T2H = T2F + T2G;
+			 T2M = T2I + T2L;
+			 ro[WS(os, 7)] = T2H - T2M;
+			 ro[WS(os, 2)] = T2H + T2M;
+			 T2D = T2t + T2u;
+			 T2E = T2y + T2B;
+			 ro[WS(os, 3)] = T2D - T2E;
+			 ro[WS(os, 9)] = T2D + T2E;
+		    }
+	       }
+	  }
+     }
+}
+
+static const kdft_desc desc = { 13, "n1_13", {138, 30, 38, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_13) (planner *p) {
+     X(kdft_register) (p, n1_13, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_14.c b/src/fftw3/dft/codelets/standard/n1_14.c
new file mode 100644
index 0000000..a5d5928
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_14.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:34 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 14 -name n1_14 -include n.h */
+
+/*
+ * This function contains 148 FP additions, 72 FP multiplications,
+ * (or, 100 additions, 24 multiplications, 48 fused multiply/add),
+ * 43 stack variables, and 56 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_14.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_14.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_14.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_14(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, Tp, T16, T1f, Ta, T1q, Ts, T10, TG, T1z, T19, T1i, Th, T1s, Tv;
+	  E T12, TU, T1B, T17, T1o, To, T1r, Ty, T11, TN, T1A, T18, T1l;
+	  {
+	       E T1, T2, T14, T15;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 7)];
+	       T3 = T1 - T2;
+	       Tp = T1 + T2;
+	       T14 = ii[0];
+	       T15 = ii[WS(is, 7)];
+	       T16 = T14 - T15;
+	       T1f = T14 + T15;
+	  }
+	  {
+	       E T6, Tq, T9, Tr;
+	       {
+		    E T4, T5, T7, T8;
+		    T4 = ri[WS(is, 2)];
+		    T5 = ri[WS(is, 9)];
+		    T6 = T4 - T5;
+		    Tq = T4 + T5;
+		    T7 = ri[WS(is, 12)];
+		    T8 = ri[WS(is, 5)];
+		    T9 = T7 - T8;
+		    Tr = T7 + T8;
+	       }
+	       Ta = T6 + T9;
+	       T1q = Tr - Tq;
+	       Ts = Tq + Tr;
+	       T10 = T9 - T6;
+	  }
+	  {
+	       E TC, T1g, TF, T1h;
+	       {
+		    E TA, TB, TD, TE;
+		    TA = ii[WS(is, 2)];
+		    TB = ii[WS(is, 9)];
+		    TC = TA - TB;
+		    T1g = TA + TB;
+		    TD = ii[WS(is, 12)];
+		    TE = ii[WS(is, 5)];
+		    TF = TD - TE;
+		    T1h = TD + TE;
+	       }
+	       TG = TC - TF;
+	       T1z = T1g - T1h;
+	       T19 = TC + TF;
+	       T1i = T1g + T1h;
+	  }
+	  {
+	       E Td, Tt, Tg, Tu;
+	       {
+		    E Tb, Tc, Te, Tf;
+		    Tb = ri[WS(is, 4)];
+		    Tc = ri[WS(is, 11)];
+		    Td = Tb - Tc;
+		    Tt = Tb + Tc;
+		    Te = ri[WS(is, 10)];
+		    Tf = ri[WS(is, 3)];
+		    Tg = Te - Tf;
+		    Tu = Te + Tf;
+	       }
+	       Th = Td + Tg;
+	       T1s = Tt - Tu;
+	       Tv = Tt + Tu;
+	       T12 = Tg - Td;
+	  }
+	  {
+	       E TQ, T1m, TT, T1n;
+	       {
+		    E TO, TP, TR, TS;
+		    TO = ii[WS(is, 4)];
+		    TP = ii[WS(is, 11)];
+		    TQ = TO - TP;
+		    T1m = TO + TP;
+		    TR = ii[WS(is, 10)];
+		    TS = ii[WS(is, 3)];
+		    TT = TR - TS;
+		    T1n = TR + TS;
+	       }
+	       TU = TQ - TT;
+	       T1B = T1n - T1m;
+	       T17 = TQ + TT;
+	       T1o = T1m + T1n;
+	  }
+	  {
+	       E Tk, Tw, Tn, Tx;
+	       {
+		    E Ti, Tj, Tl, Tm;
+		    Ti = ri[WS(is, 6)];
+		    Tj = ri[WS(is, 13)];
+		    Tk = Ti - Tj;
+		    Tw = Ti + Tj;
+		    Tl = ri[WS(is, 8)];
+		    Tm = ri[WS(is, 1)];
+		    Tn = Tl - Tm;
+		    Tx = Tl + Tm;
+	       }
+	       To = Tk + Tn;
+	       T1r = Tw - Tx;
+	       Ty = Tw + Tx;
+	       T11 = Tn - Tk;
+	  }
+	  {
+	       E TJ, T1j, TM, T1k;
+	       {
+		    E TH, TI, TK, TL;
+		    TH = ii[WS(is, 6)];
+		    TI = ii[WS(is, 13)];
+		    TJ = TH - TI;
+		    T1j = TH + TI;
+		    TK = ii[WS(is, 8)];
+		    TL = ii[WS(is, 1)];
+		    TM = TK - TL;
+		    T1k = TK + TL;
+	       }
+	       TN = TJ - TM;
+	       T1A = T1k - T1j;
+	       T18 = TJ + TM;
+	       T1l = T1j + T1k;
+	  }
+	  ro[WS(os, 7)] = T3 + Ta + Th + To;
+	  io[WS(os, 7)] = T16 + T19 + T17 + T18;
+	  ro[0] = Tp + Ts + Tv + Ty;
+	  io[0] = T1f + T1i + T1o + T1l;
+	  {
+	       E TV, Tz, T1e, T1d;
+	       TV = FNMS(KP781831482, TN, KP974927912 * TG) - (KP433883739 * TU);
+	       Tz = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta);
+	       ro[WS(os, 5)] = Tz - TV;
+	       ro[WS(os, 9)] = Tz + TV;
+	       T1e = FNMS(KP781831482, T11, KP974927912 * T10) - (KP433883739 * T12);
+	       T1d = FMA(KP623489801, T18, T16) + FNMA(KP900968867, T17, KP222520933 * T19);
+	       io[WS(os, 5)] = T1d - T1e;
+	       io[WS(os, 9)] = T1e + T1d;
+	  }
+	  {
+	       E TX, TW, T1b, T1c;
+	       TX = FMA(KP781831482, TG, KP974927912 * TU) + (KP433883739 * TN);
+	       TW = FMA(KP623489801, Ta, T3) + FNMA(KP900968867, To, KP222520933 * Th);
+	       ro[WS(os, 13)] = TW - TX;
+	       ro[WS(os, 1)] = TW + TX;
+	       T1b = FMA(KP781831482, T10, KP974927912 * T12) + (KP433883739 * T11);
+	       T1c = FMA(KP623489801, T19, T16) + FNMA(KP900968867, T18, KP222520933 * T17);
+	       io[WS(os, 1)] = T1b + T1c;
+	       io[WS(os, 13)] = T1c - T1b;
+	  }
+	  {
+	       E TZ, TY, T13, T1a;
+	       TZ = FMA(KP433883739, TG, KP974927912 * TN) - (KP781831482 * TU);
+	       TY = FMA(KP623489801, Th, T3) + FNMA(KP222520933, To, KP900968867 * Ta);
+	       ro[WS(os, 11)] = TY - TZ;
+	       ro[WS(os, 3)] = TY + TZ;
+	       T13 = FMA(KP433883739, T10, KP974927912 * T11) - (KP781831482 * T12);
+	       T1a = FMA(KP623489801, T17, T16) + FNMA(KP222520933, T18, KP900968867 * T19);
+	       io[WS(os, 3)] = T13 + T1a;
+	       io[WS(os, 11)] = T1a - T13;
+	  }
+	  {
+	       E T1t, T1p, T1C, T1y;
+	       T1t = FNMS(KP433883739, T1r, KP781831482 * T1q) - (KP974927912 * T1s);
+	       T1p = FMA(KP623489801, T1i, T1f) + FNMA(KP900968867, T1l, KP222520933 * T1o);
+	       io[WS(os, 6)] = T1p - T1t;
+	       io[WS(os, 8)] = T1t + T1p;
+	       T1C = FNMS(KP433883739, T1A, KP781831482 * T1z) - (KP974927912 * T1B);
+	       T1y = FMA(KP623489801, Ts, Tp) + FNMA(KP900968867, Ty, KP222520933 * Tv);
+	       ro[WS(os, 6)] = T1y - T1C;
+	       ro[WS(os, 8)] = T1y + T1C;
+	  }
+	  {
+	       E T1v, T1u, T1E, T1D;
+	       T1v = FMA(KP433883739, T1q, KP781831482 * T1s) - (KP974927912 * T1r);
+	       T1u = FMA(KP623489801, T1o, T1f) + FNMA(KP222520933, T1l, KP900968867 * T1i);
+	       io[WS(os, 4)] = T1u - T1v;
+	       io[WS(os, 10)] = T1v + T1u;
+	       T1E = FMA(KP433883739, T1z, KP781831482 * T1B) - (KP974927912 * T1A);
+	       T1D = FMA(KP623489801, Tv, Tp) + FNMA(KP222520933, Ty, KP900968867 * Ts);
+	       ro[WS(os, 4)] = T1D - T1E;
+	       ro[WS(os, 10)] = T1D + T1E;
+	  }
+	  {
+	       E T1w, T1x, T1G, T1F;
+	       T1w = FMA(KP974927912, T1q, KP433883739 * T1s) + (KP781831482 * T1r);
+	       T1x = FMA(KP623489801, T1l, T1f) + FNMA(KP900968867, T1o, KP222520933 * T1i);
+	       io[WS(os, 2)] = T1w + T1x;
+	       io[WS(os, 12)] = T1x - T1w;
+	       T1G = FMA(KP974927912, T1z, KP433883739 * T1B) + (KP781831482 * T1A);
+	       T1F = FMA(KP623489801, Ty, Tp) + FNMA(KP900968867, Tv, KP222520933 * Ts);
+	       ro[WS(os, 12)] = T1F - T1G;
+	       ro[WS(os, 2)] = T1F + T1G;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 14, "n1_14", {100, 24, 48, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_14) (planner *p) {
+     X(kdft_register) (p, n1_14, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_15.c b/src/fftw3/dft/codelets/standard/n1_15.c
new file mode 100644
index 0000000..5fbb0f4
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_15.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:34 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 15 -name n1_15 -include n.h */
+
+/*
+ * This function contains 156 FP additions, 56 FP multiplications,
+ * (or, 128 additions, 28 multiplications, 28 fused multiply/add),
+ * 69 stack variables, and 60 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_15.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_15.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_15.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_15(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T5, T2l, Tx, TV, T1C, T20, Tl, Tq, Tr, TN, TS, TT, T2c, T2d, T2n;
+	  E T1O, T1P, T22, T1l, T1q, T1w, TZ, T10, T11, Ta, Tf, Tg, TC, TH, TI;
+	  E T2f, T2g, T2m, T1R, T1S, T21, T1a, T1f, T1v, TW, TX, TY;
+	  {
+	       E T1, T1z, T4, T1y, Tw, T1A, Tt, T1B;
+	       T1 = ri[0];
+	       T1z = ii[0];
+	       {
+		    E T2, T3, Tu, Tv;
+		    T2 = ri[WS(is, 5)];
+		    T3 = ri[WS(is, 10)];
+		    T4 = T2 + T3;
+		    T1y = KP866025403 * (T3 - T2);
+		    Tu = ii[WS(is, 5)];
+		    Tv = ii[WS(is, 10)];
+		    Tw = KP866025403 * (Tu - Tv);
+		    T1A = Tu + Tv;
+	       }
+	       T5 = T1 + T4;
+	       T2l = T1z + T1A;
+	       Tt = FNMS(KP500000000, T4, T1);
+	       Tx = Tt - Tw;
+	       TV = Tt + Tw;
+	       T1B = FNMS(KP500000000, T1A, T1z);
+	       T1C = T1y + T1B;
+	       T20 = T1B - T1y;
+	  }
+	  {
+	       E Th, Tk, TJ, T1h, T1i, T1j, TM, T1k, Tm, Tp, TO, T1m, T1n, T1o, TR;
+	       E T1p;
+	       {
+		    E Ti, Tj, TK, TL;
+		    Th = ri[WS(is, 6)];
+		    Ti = ri[WS(is, 11)];
+		    Tj = ri[WS(is, 1)];
+		    Tk = Ti + Tj;
+		    TJ = FNMS(KP500000000, Tk, Th);
+		    T1h = KP866025403 * (Tj - Ti);
+		    T1i = ii[WS(is, 6)];
+		    TK = ii[WS(is, 11)];
+		    TL = ii[WS(is, 1)];
+		    T1j = TK + TL;
+		    TM = KP866025403 * (TK - TL);
+		    T1k = FNMS(KP500000000, T1j, T1i);
+	       }
+	       {
+		    E Tn, To, TP, TQ;
+		    Tm = ri[WS(is, 9)];
+		    Tn = ri[WS(is, 14)];
+		    To = ri[WS(is, 4)];
+		    Tp = Tn + To;
+		    TO = FNMS(KP500000000, Tp, Tm);
+		    T1m = KP866025403 * (To - Tn);
+		    T1n = ii[WS(is, 9)];
+		    TP = ii[WS(is, 14)];
+		    TQ = ii[WS(is, 4)];
+		    T1o = TP + TQ;
+		    TR = KP866025403 * (TP - TQ);
+		    T1p = FNMS(KP500000000, T1o, T1n);
+	       }
+	       Tl = Th + Tk;
+	       Tq = Tm + Tp;
+	       Tr = Tl + Tq;
+	       TN = TJ - TM;
+	       TS = TO - TR;
+	       TT = TN + TS;
+	       T2c = T1i + T1j;
+	       T2d = T1n + T1o;
+	       T2n = T2c + T2d;
+	       T1O = T1k - T1h;
+	       T1P = T1p - T1m;
+	       T22 = T1O + T1P;
+	       T1l = T1h + T1k;
+	       T1q = T1m + T1p;
+	       T1w = T1l + T1q;
+	       TZ = TJ + TM;
+	       T10 = TO + TR;
+	       T11 = TZ + T10;
+	  }
+	  {
+	       E T6, T9, Ty, T16, T17, T18, TB, T19, Tb, Te, TD, T1b, T1c, T1d, TG;
+	       E T1e;
+	       {
+		    E T7, T8, Tz, TA;
+		    T6 = ri[WS(is, 3)];
+		    T7 = ri[WS(is, 8)];
+		    T8 = ri[WS(is, 13)];
+		    T9 = T7 + T8;
+		    Ty = FNMS(KP500000000, T9, T6);
+		    T16 = KP866025403 * (T8 - T7);
+		    T17 = ii[WS(is, 3)];
+		    Tz = ii[WS(is, 8)];
+		    TA = ii[WS(is, 13)];
+		    T18 = Tz + TA;
+		    TB = KP866025403 * (Tz - TA);
+		    T19 = FNMS(KP500000000, T18, T17);
+	       }
+	       {
+		    E Tc, Td, TE, TF;
+		    Tb = ri[WS(is, 12)];
+		    Tc = ri[WS(is, 2)];
+		    Td = ri[WS(is, 7)];
+		    Te = Tc + Td;
+		    TD = FNMS(KP500000000, Te, Tb);
+		    T1b = KP866025403 * (Td - Tc);
+		    T1c = ii[WS(is, 12)];
+		    TE = ii[WS(is, 2)];
+		    TF = ii[WS(is, 7)];
+		    T1d = TE + TF;
+		    TG = KP866025403 * (TE - TF);
+		    T1e = FNMS(KP500000000, T1d, T1c);
+	       }
+	       Ta = T6 + T9;
+	       Tf = Tb + Te;
+	       Tg = Ta + Tf;
+	       TC = Ty - TB;
+	       TH = TD - TG;
+	       TI = TC + TH;
+	       T2f = T17 + T18;
+	       T2g = T1c + T1d;
+	       T2m = T2f + T2g;
+	       T1R = T19 - T16;
+	       T1S = T1e - T1b;
+	       T21 = T1R + T1S;
+	       T1a = T16 + T19;
+	       T1f = T1b + T1e;
+	       T1v = T1a + T1f;
+	       TW = Ty + TB;
+	       TX = TD + TG;
+	       TY = TW + TX;
+	  }
+	  {
+	       E T2a, Ts, T29, T2i, T2k, T2e, T2h, T2j, T2b;
+	       T2a = KP559016994 * (Tg - Tr);
+	       Ts = Tg + Tr;
+	       T29 = FNMS(KP250000000, Ts, T5);
+	       T2e = T2c - T2d;
+	       T2h = T2f - T2g;
+	       T2i = FNMS(KP587785252, T2h, KP951056516 * T2e);
+	       T2k = FMA(KP951056516, T2h, KP587785252 * T2e);
+	       ro[0] = T5 + Ts;
+	       T2j = T2a + T29;
+	       ro[WS(os, 9)] = T2j - T2k;
+	       ro[WS(os, 6)] = T2j + T2k;
+	       T2b = T29 - T2a;
+	       ro[WS(os, 12)] = T2b - T2i;
+	       ro[WS(os, 3)] = T2b + T2i;
+	  }
+	  {
+	       E T2q, T2o, T2p, T2u, T2w, T2s, T2t, T2v, T2r;
+	       T2q = KP559016994 * (T2m - T2n);
+	       T2o = T2m + T2n;
+	       T2p = FNMS(KP250000000, T2o, T2l);
+	       T2s = Tl - Tq;
+	       T2t = Ta - Tf;
+	       T2u = FNMS(KP587785252, T2t, KP951056516 * T2s);
+	       T2w = FMA(KP951056516, T2t, KP587785252 * T2s);
+	       io[0] = T2l + T2o;
+	       T2v = T2q + T2p;
+	       io[WS(os, 6)] = T2v - T2w;
+	       io[WS(os, 9)] = T2w + T2v;
+	       T2r = T2p - T2q;
+	       io[WS(os, 3)] = T2r - T2u;
+	       io[WS(os, 12)] = T2u + T2r;
+	  }
+	  {
+	       E T1M, TU, T1L, T1U, T1W, T1Q, T1T, T1V, T1N;
+	       T1M = KP559016994 * (TI - TT);
+	       TU = TI + TT;
+	       T1L = FNMS(KP250000000, TU, Tx);
+	       T1Q = T1O - T1P;
+	       T1T = T1R - T1S;
+	       T1U = FNMS(KP587785252, T1T, KP951056516 * T1Q);
+	       T1W = FMA(KP951056516, T1T, KP587785252 * T1Q);
+	       ro[WS(os, 5)] = Tx + TU;
+	       T1V = T1M + T1L;
+	       ro[WS(os, 14)] = T1V - T1W;
+	       ro[WS(os, 11)] = T1V + T1W;
+	       T1N = T1L - T1M;
+	       ro[WS(os, 2)] = T1N - T1U;
+	       ro[WS(os, 8)] = T1N + T1U;
+	  }
+	  {
+	       E T25, T23, T24, T1Z, T28, T1X, T1Y, T27, T26;
+	       T25 = KP559016994 * (T21 - T22);
+	       T23 = T21 + T22;
+	       T24 = FNMS(KP250000000, T23, T20);
+	       T1X = TN - TS;
+	       T1Y = TC - TH;
+	       T1Z = FNMS(KP587785252, T1Y, KP951056516 * T1X);
+	       T28 = FMA(KP951056516, T1Y, KP587785252 * T1X);
+	       io[WS(os, 5)] = T20 + T23;
+	       T27 = T25 + T24;
+	       io[WS(os, 11)] = T27 - T28;
+	       io[WS(os, 14)] = T28 + T27;
+	       T26 = T24 - T25;
+	       io[WS(os, 2)] = T1Z + T26;
+	       io[WS(os, 8)] = T26 - T1Z;
+	  }
+	  {
+	       E T1x, T1D, T1E, T1I, T1J, T1G, T1H, T1K, T1F;
+	       T1x = KP559016994 * (T1v - T1w);
+	       T1D = T1v + T1w;
+	       T1E = FNMS(KP250000000, T1D, T1C);
+	       T1G = TW - TX;
+	       T1H = TZ - T10;
+	       T1I = FMA(KP951056516, T1G, KP587785252 * T1H);
+	       T1J = FNMS(KP587785252, T1G, KP951056516 * T1H);
+	       io[WS(os, 10)] = T1C + T1D;
+	       T1K = T1E - T1x;
+	       io[WS(os, 7)] = T1J + T1K;
+	       io[WS(os, 13)] = T1K - T1J;
+	       T1F = T1x + T1E;
+	       io[WS(os, 1)] = T1F - T1I;
+	       io[WS(os, 4)] = T1I + T1F;
+	  }
+	  {
+	       E T13, T12, T14, T1s, T1u, T1g, T1r, T1t, T15;
+	       T13 = KP559016994 * (TY - T11);
+	       T12 = TY + T11;
+	       T14 = FNMS(KP250000000, T12, TV);
+	       T1g = T1a - T1f;
+	       T1r = T1l - T1q;
+	       T1s = FMA(KP951056516, T1g, KP587785252 * T1r);
+	       T1u = FNMS(KP587785252, T1g, KP951056516 * T1r);
+	       ro[WS(os, 10)] = TV + T12;
+	       T1t = T14 - T13;
+	       ro[WS(os, 7)] = T1t - T1u;
+	       ro[WS(os, 13)] = T1t + T1u;
+	       T15 = T13 + T14;
+	       ro[WS(os, 4)] = T15 - T1s;
+	       ro[WS(os, 1)] = T15 + T1s;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 15, "n1_15", {128, 28, 28, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_15) (planner *p) {
+     X(kdft_register) (p, n1_15, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_16.c b/src/fftw3/dft/codelets/standard/n1_16.c
new file mode 100644
index 0000000..6c813a1
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_16.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:37 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 16 -name n1_16 -include n.h */
+
+/*
+ * This function contains 144 FP additions, 24 FP multiplications,
+ * (or, 136 additions, 16 multiplications, 8 fused multiply/add),
+ * 50 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_16(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T7, T1R, T25, TC, TN, T1x, T1H, T1l, Tt, T22, T2h, T1b, T1g, T1E, T1Z;
+	  E T1D, Te, T1S, T26, TJ, TQ, T1m, T1n, TT, Tm, T1X, T2g, T10, T15, T1B;
+	  E T1U, T1A;
+	  {
+	       E T3, TL, Ty, T1k, T6, T1j, TB, TM;
+	       {
+		    E T1, T2, Tw, Tx;
+		    T1 = ri[0];
+		    T2 = ri[WS(is, 8)];
+		    T3 = T1 + T2;
+		    TL = T1 - T2;
+		    Tw = ii[0];
+		    Tx = ii[WS(is, 8)];
+		    Ty = Tw + Tx;
+		    T1k = Tw - Tx;
+	       }
+	       {
+		    E T4, T5, Tz, TA;
+		    T4 = ri[WS(is, 4)];
+		    T5 = ri[WS(is, 12)];
+		    T6 = T4 + T5;
+		    T1j = T4 - T5;
+		    Tz = ii[WS(is, 4)];
+		    TA = ii[WS(is, 12)];
+		    TB = Tz + TA;
+		    TM = Tz - TA;
+	       }
+	       T7 = T3 + T6;
+	       T1R = T3 - T6;
+	       T25 = Ty - TB;
+	       TC = Ty + TB;
+	       TN = TL - TM;
+	       T1x = TL + TM;
+	       T1H = T1k - T1j;
+	       T1l = T1j + T1k;
+	  }
+	  {
+	       E Tp, T17, T1f, T20, Ts, T1c, T1a, T21;
+	       {
+		    E Tn, To, T1d, T1e;
+		    Tn = ri[WS(is, 15)];
+		    To = ri[WS(is, 7)];
+		    Tp = Tn + To;
+		    T17 = Tn - To;
+		    T1d = ii[WS(is, 15)];
+		    T1e = ii[WS(is, 7)];
+		    T1f = T1d - T1e;
+		    T20 = T1d + T1e;
+	       }
+	       {
+		    E Tq, Tr, T18, T19;
+		    Tq = ri[WS(is, 3)];
+		    Tr = ri[WS(is, 11)];
+		    Ts = Tq + Tr;
+		    T1c = Tq - Tr;
+		    T18 = ii[WS(is, 3)];
+		    T19 = ii[WS(is, 11)];
+		    T1a = T18 - T19;
+		    T21 = T18 + T19;
+	       }
+	       Tt = Tp + Ts;
+	       T22 = T20 - T21;
+	       T2h = T20 + T21;
+	       T1b = T17 - T1a;
+	       T1g = T1c + T1f;
+	       T1E = T1f - T1c;
+	       T1Z = Tp - Ts;
+	       T1D = T17 + T1a;
+	  }
+	  {
+	       E Ta, TP, TF, TO, Td, TR, TI, TS;
+	       {
+		    E T8, T9, TD, TE;
+		    T8 = ri[WS(is, 2)];
+		    T9 = ri[WS(is, 10)];
+		    Ta = T8 + T9;
+		    TP = T8 - T9;
+		    TD = ii[WS(is, 2)];
+		    TE = ii[WS(is, 10)];
+		    TF = TD + TE;
+		    TO = TD - TE;
+	       }
+	       {
+		    E Tb, Tc, TG, TH;
+		    Tb = ri[WS(is, 14)];
+		    Tc = ri[WS(is, 6)];
+		    Td = Tb + Tc;
+		    TR = Tb - Tc;
+		    TG = ii[WS(is, 14)];
+		    TH = ii[WS(is, 6)];
+		    TI = TG + TH;
+		    TS = TG - TH;
+	       }
+	       Te = Ta + Td;
+	       T1S = TF - TI;
+	       T26 = Td - Ta;
+	       TJ = TF + TI;
+	       TQ = TO - TP;
+	       T1m = TR - TS;
+	       T1n = TP + TO;
+	       TT = TR + TS;
+	  }
+	  {
+	       E Ti, T11, TZ, T1V, Tl, TW, T14, T1W;
+	       {
+		    E Tg, Th, TX, TY;
+		    Tg = ri[WS(is, 1)];
+		    Th = ri[WS(is, 9)];
+		    Ti = Tg + Th;
+		    T11 = Tg - Th;
+		    TX = ii[WS(is, 1)];
+		    TY = ii[WS(is, 9)];
+		    TZ = TX - TY;
+		    T1V = TX + TY;
+	       }
+	       {
+		    E Tj, Tk, T12, T13;
+		    Tj = ri[WS(is, 5)];
+		    Tk = ri[WS(is, 13)];
+		    Tl = Tj + Tk;
+		    TW = Tj - Tk;
+		    T12 = ii[WS(is, 5)];
+		    T13 = ii[WS(is, 13)];
+		    T14 = T12 - T13;
+		    T1W = T12 + T13;
+	       }
+	       Tm = Ti + Tl;
+	       T1X = T1V - T1W;
+	       T2g = T1V + T1W;
+	       T10 = TW + TZ;
+	       T15 = T11 - T14;
+	       T1B = T11 + T14;
+	       T1U = Ti - Tl;
+	       T1A = TZ - TW;
+	  }
+	  {
+	       E Tf, Tu, T2j, T2k;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       ro[WS(os, 8)] = Tf - Tu;
+	       ro[0] = Tf + Tu;
+	       T2j = TC + TJ;
+	       T2k = T2g + T2h;
+	       io[WS(os, 8)] = T2j - T2k;
+	       io[0] = T2j + T2k;
+	  }
+	  {
+	       E Tv, TK, T2f, T2i;
+	       Tv = Tt - Tm;
+	       TK = TC - TJ;
+	       io[WS(os, 4)] = Tv + TK;
+	       io[WS(os, 12)] = TK - Tv;
+	       T2f = T7 - Te;
+	       T2i = T2g - T2h;
+	       ro[WS(os, 12)] = T2f - T2i;
+	       ro[WS(os, 4)] = T2f + T2i;
+	  }
+	  {
+	       E T1T, T27, T24, T28, T1Y, T23;
+	       T1T = T1R + T1S;
+	       T27 = T25 - T26;
+	       T1Y = T1U + T1X;
+	       T23 = T1Z - T22;
+	       T24 = KP707106781 * (T1Y + T23);
+	       T28 = KP707106781 * (T23 - T1Y);
+	       ro[WS(os, 10)] = T1T - T24;
+	       io[WS(os, 6)] = T27 + T28;
+	       ro[WS(os, 2)] = T1T + T24;
+	       io[WS(os, 14)] = T27 - T28;
+	  }
+	  {
+	       E T29, T2d, T2c, T2e, T2a, T2b;
+	       T29 = T1R - T1S;
+	       T2d = T26 + T25;
+	       T2a = T1X - T1U;
+	       T2b = T1Z + T22;
+	       T2c = KP707106781 * (T2a - T2b);
+	       T2e = KP707106781 * (T2a + T2b);
+	       ro[WS(os, 14)] = T29 - T2c;
+	       io[WS(os, 2)] = T2d + T2e;
+	       ro[WS(os, 6)] = T29 + T2c;
+	       io[WS(os, 10)] = T2d - T2e;
+	  }
+	  {
+	       E TV, T1r, T1p, T1v, T1i, T1q, T1u, T1w, TU, T1o;
+	       TU = KP707106781 * (TQ - TT);
+	       TV = TN + TU;
+	       T1r = TN - TU;
+	       T1o = KP707106781 * (T1m - T1n);
+	       T1p = T1l - T1o;
+	       T1v = T1l + T1o;
+	       {
+		    E T16, T1h, T1s, T1t;
+		    T16 = FMA(KP923879532, T10, KP382683432 * T15);
+		    T1h = FNMS(KP923879532, T1g, KP382683432 * T1b);
+		    T1i = T16 + T1h;
+		    T1q = T1h - T16;
+		    T1s = FNMS(KP923879532, T15, KP382683432 * T10);
+		    T1t = FMA(KP382683432, T1g, KP923879532 * T1b);
+		    T1u = T1s - T1t;
+		    T1w = T1s + T1t;
+	       }
+	       ro[WS(os, 11)] = TV - T1i;
+	       io[WS(os, 11)] = T1v - T1w;
+	       ro[WS(os, 3)] = TV + T1i;
+	       io[WS(os, 3)] = T1v + T1w;
+	       io[WS(os, 15)] = T1p - T1q;
+	       ro[WS(os, 15)] = T1r - T1u;
+	       io[WS(os, 7)] = T1p + T1q;
+	       ro[WS(os, 7)] = T1r + T1u;
+	  }
+	  {
+	       E T1z, T1L, T1J, T1P, T1G, T1K, T1O, T1Q, T1y, T1I;
+	       T1y = KP707106781 * (T1n + T1m);
+	       T1z = T1x + T1y;
+	       T1L = T1x - T1y;
+	       T1I = KP707106781 * (TQ + TT);
+	       T1J = T1H - T1I;
+	       T1P = T1H + T1I;
+	       {
+		    E T1C, T1F, T1M, T1N;
+		    T1C = FMA(KP382683432, T1A, KP923879532 * T1B);
+		    T1F = FNMS(KP382683432, T1E, KP923879532 * T1D);
+		    T1G = T1C + T1F;
+		    T1K = T1F - T1C;
+		    T1M = FNMS(KP382683432, T1B, KP923879532 * T1A);
+		    T1N = FMA(KP923879532, T1E, KP382683432 * T1D);
+		    T1O = T1M - T1N;
+		    T1Q = T1M + T1N;
+	       }
+	       ro[WS(os, 9)] = T1z - T1G;
+	       io[WS(os, 9)] = T1P - T1Q;
+	       ro[WS(os, 1)] = T1z + T1G;
+	       io[WS(os, 1)] = T1P + T1Q;
+	       io[WS(os, 13)] = T1J - T1K;
+	       ro[WS(os, 13)] = T1L - T1O;
+	       io[WS(os, 5)] = T1J + T1K;
+	       ro[WS(os, 5)] = T1L + T1O;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 16, "n1_16", {136, 16, 8, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_16) (planner *p) {
+     X(kdft_register) (p, n1_16, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_2.c b/src/fftw3/dft/codelets/standard/n1_2.c
new file mode 100644
index 0000000..8819b53
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_2.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:31 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 2 -name n1_2 -include n.h */
+
+/*
+ * This function contains 4 FP additions, 0 FP multiplications,
+ * (or, 4 additions, 0 multiplications, 0 fused multiply/add),
+ * 5 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_2.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_2.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_2.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_2(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2, T3, T4;
+	  T1 = ri[0];
+	  T2 = ri[WS(is, 1)];
+	  ro[WS(os, 1)] = T1 - T2;
+	  ro[0] = T1 + T2;
+	  T3 = ii[0];
+	  T4 = ii[WS(is, 1)];
+	  io[WS(os, 1)] = T3 - T4;
+	  io[0] = T3 + T4;
+     }
+}
+
+static const kdft_desc desc = { 2, "n1_2", {4, 0, 0, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_2) (planner *p) {
+     X(kdft_register) (p, n1_2, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_3.c b/src/fftw3/dft/codelets/standard/n1_3.c
new file mode 100644
index 0000000..f639ac1
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_3.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 3 -name n1_3 -include n.h */
+
+/*
+ * This function contains 12 FP additions, 4 FP multiplications,
+ * (or, 10 additions, 2 multiplications, 2 fused multiply/add),
+ * 15 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_3.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_3.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_3.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_3(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, Ta, T4, T9, T8, Tb, T5, Tc;
+	  T1 = ri[0];
+	  Ta = ii[0];
+	  {
+	       E T2, T3, T6, T7;
+	       T2 = ri[WS(is, 1)];
+	       T3 = ri[WS(is, 2)];
+	       T4 = T2 + T3;
+	       T9 = KP866025403 * (T3 - T2);
+	       T6 = ii[WS(is, 1)];
+	       T7 = ii[WS(is, 2)];
+	       T8 = KP866025403 * (T6 - T7);
+	       Tb = T6 + T7;
+	  }
+	  ro[0] = T1 + T4;
+	  io[0] = Ta + Tb;
+	  T5 = FNMS(KP500000000, T4, T1);
+	  ro[WS(os, 2)] = T5 - T8;
+	  ro[WS(os, 1)] = T5 + T8;
+	  Tc = FNMS(KP500000000, Tb, Ta);
+	  io[WS(os, 1)] = T9 + Tc;
+	  io[WS(os, 2)] = Tc - T9;
+     }
+}
+
+static const kdft_desc desc = { 3, "n1_3", {10, 2, 2, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_3) (planner *p) {
+     X(kdft_register) (p, n1_3, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_4.c b/src/fftw3/dft/codelets/standard/n1_4.c
new file mode 100644
index 0000000..402046a
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_4.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 4 -name n1_4 -include n.h */
+
+/*
+ * This function contains 16 FP additions, 0 FP multiplications,
+ * (or, 16 additions, 0 multiplications, 0 fused multiply/add),
+ * 13 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_4(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, Tb, T9, Tf, T6, Ta, Te, Tg;
+	  {
+	       E T1, T2, T7, T8;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 2)];
+	       T3 = T1 + T2;
+	       Tb = T1 - T2;
+	       T7 = ii[0];
+	       T8 = ii[WS(is, 2)];
+	       T9 = T7 - T8;
+	       Tf = T7 + T8;
+	  }
+	  {
+	       E T4, T5, Tc, Td;
+	       T4 = ri[WS(is, 1)];
+	       T5 = ri[WS(is, 3)];
+	       T6 = T4 + T5;
+	       Ta = T4 - T5;
+	       Tc = ii[WS(is, 1)];
+	       Td = ii[WS(is, 3)];
+	       Te = Tc - Td;
+	       Tg = Tc + Td;
+	  }
+	  ro[WS(os, 2)] = T3 - T6;
+	  io[WS(os, 2)] = Tf - Tg;
+	  ro[0] = T3 + T6;
+	  io[0] = Tf + Tg;
+	  io[WS(os, 1)] = T9 - Ta;
+	  ro[WS(os, 1)] = Tb + Te;
+	  io[WS(os, 3)] = Ta + T9;
+	  ro[WS(os, 3)] = Tb - Te;
+     }
+}
+
+static const kdft_desc desc = { 4, "n1_4", {16, 0, 0, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_4) (planner *p) {
+     X(kdft_register) (p, n1_4, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_5.c b/src/fftw3/dft/codelets/standard/n1_5.c
new file mode 100644
index 0000000..8ac5abc
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_5.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 5 -name n1_5 -include n.h */
+
+/*
+ * This function contains 32 FP additions, 12 FP multiplications,
+ * (or, 26 additions, 6 multiplications, 6 fused multiply/add),
+ * 21 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_5.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_5.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_5.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_5(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, To, T8, Tt, T9, Ts, Te, Tp, Th, Tn;
+	  T1 = ri[0];
+	  To = ii[0];
+	  {
+	       E T2, T3, T4, T5, T6, T7;
+	       T2 = ri[WS(is, 1)];
+	       T3 = ri[WS(is, 4)];
+	       T4 = T2 + T3;
+	       T5 = ri[WS(is, 2)];
+	       T6 = ri[WS(is, 3)];
+	       T7 = T5 + T6;
+	       T8 = T4 + T7;
+	       Tt = T5 - T6;
+	       T9 = KP559016994 * (T4 - T7);
+	       Ts = T2 - T3;
+	  }
+	  {
+	       E Tc, Td, Tl, Tf, Tg, Tm;
+	       Tc = ii[WS(is, 1)];
+	       Td = ii[WS(is, 4)];
+	       Tl = Tc + Td;
+	       Tf = ii[WS(is, 2)];
+	       Tg = ii[WS(is, 3)];
+	       Tm = Tf + Tg;
+	       Te = Tc - Td;
+	       Tp = Tl + Tm;
+	       Th = Tf - Tg;
+	       Tn = KP559016994 * (Tl - Tm);
+	  }
+	  ro[0] = T1 + T8;
+	  io[0] = To + Tp;
+	  {
+	       E Ti, Tk, Tb, Tj, Ta;
+	       Ti = FMA(KP951056516, Te, KP587785252 * Th);
+	       Tk = FNMS(KP587785252, Te, KP951056516 * Th);
+	       Ta = FNMS(KP250000000, T8, T1);
+	       Tb = T9 + Ta;
+	       Tj = Ta - T9;
+	       ro[WS(os, 4)] = Tb - Ti;
+	       ro[WS(os, 3)] = Tj + Tk;
+	       ro[WS(os, 1)] = Tb + Ti;
+	       ro[WS(os, 2)] = Tj - Tk;
+	  }
+	  {
+	       E Tu, Tv, Tr, Tw, Tq;
+	       Tu = FMA(KP951056516, Ts, KP587785252 * Tt);
+	       Tv = FNMS(KP587785252, Ts, KP951056516 * Tt);
+	       Tq = FNMS(KP250000000, Tp, To);
+	       Tr = Tn + Tq;
+	       Tw = Tq - Tn;
+	       io[WS(os, 1)] = Tr - Tu;
+	       io[WS(os, 3)] = Tw - Tv;
+	       io[WS(os, 4)] = Tu + Tr;
+	       io[WS(os, 2)] = Tv + Tw;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 5, "n1_5", {26, 6, 6, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_5) (planner *p) {
+     X(kdft_register) (p, n1_5, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_6.c b/src/fftw3/dft/codelets/standard/n1_6.c
new file mode 100644
index 0000000..114679d
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_6.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 6 -name n1_6 -include n.h */
+
+/*
+ * This function contains 36 FP additions, 8 FP multiplications,
+ * (or, 32 additions, 4 multiplications, 4 fused multiply/add),
+ * 23 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_6.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_6.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_6.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_6(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, Tb, Tq, Tx, T6, Tc, T9, Td, Ta, Te, Ti, Tu, Tl, Tv, Tr;
+	  E Ty;
+	  {
+	       E T1, T2, To, Tp;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 3)];
+	       T3 = T1 - T2;
+	       Tb = T1 + T2;
+	       To = ii[0];
+	       Tp = ii[WS(is, 3)];
+	       Tq = To - Tp;
+	       Tx = To + Tp;
+	  }
+	  {
+	       E T4, T5, T7, T8;
+	       T4 = ri[WS(is, 2)];
+	       T5 = ri[WS(is, 5)];
+	       T6 = T4 - T5;
+	       Tc = T4 + T5;
+	       T7 = ri[WS(is, 4)];
+	       T8 = ri[WS(is, 1)];
+	       T9 = T7 - T8;
+	       Td = T7 + T8;
+	  }
+	  Ta = T6 + T9;
+	  Te = Tc + Td;
+	  {
+	       E Tg, Th, Tj, Tk;
+	       Tg = ii[WS(is, 2)];
+	       Th = ii[WS(is, 5)];
+	       Ti = Tg - Th;
+	       Tu = Tg + Th;
+	       Tj = ii[WS(is, 4)];
+	       Tk = ii[WS(is, 1)];
+	       Tl = Tj - Tk;
+	       Tv = Tj + Tk;
+	  }
+	  Tr = Ti + Tl;
+	  Ty = Tu + Tv;
+	  ro[WS(os, 3)] = T3 + Ta;
+	  io[WS(os, 3)] = Tq + Tr;
+	  ro[0] = Tb + Te;
+	  io[0] = Tx + Ty;
+	  {
+	       E Tf, Tm, Tn, Ts;
+	       Tf = FNMS(KP500000000, Ta, T3);
+	       Tm = KP866025403 * (Ti - Tl);
+	       ro[WS(os, 5)] = Tf - Tm;
+	       ro[WS(os, 1)] = Tf + Tm;
+	       Tn = KP866025403 * (T9 - T6);
+	       Ts = FNMS(KP500000000, Tr, Tq);
+	       io[WS(os, 1)] = Tn + Ts;
+	       io[WS(os, 5)] = Ts - Tn;
+	  }
+	  {
+	       E Tt, Tw, Tz, TA;
+	       Tt = FNMS(KP500000000, Te, Tb);
+	       Tw = KP866025403 * (Tu - Tv);
+	       ro[WS(os, 2)] = Tt - Tw;
+	       ro[WS(os, 4)] = Tt + Tw;
+	       Tz = FNMS(KP500000000, Ty, Tx);
+	       TA = KP866025403 * (Td - Tc);
+	       io[WS(os, 2)] = Tz - TA;
+	       io[WS(os, 4)] = TA + Tz;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 6, "n1_6", {32, 4, 4, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_6) (planner *p) {
+     X(kdft_register) (p, n1_6, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_7.c b/src/fftw3/dft/codelets/standard/n1_7.c
new file mode 100644
index 0000000..03c892d
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_7.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 7 -name n1_7 -include n.h */
+
+/*
+ * This function contains 60 FP additions, 36 FP multiplications,
+ * (or, 36 additions, 12 multiplications, 24 fused multiply/add),
+ * 25 stack variables, and 28 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_7.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_7.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_7.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, Tu, T4, Tq, Te, Tx, T7, Ts, Tk, Tv, Ta, Tr, Th, Tw;
+	  T1 = ri[0];
+	  Tu = ii[0];
+	  {
+	       E T2, T3, Tc, Td;
+	       T2 = ri[WS(is, 1)];
+	       T3 = ri[WS(is, 6)];
+	       T4 = T2 + T3;
+	       Tq = T3 - T2;
+	       Tc = ii[WS(is, 1)];
+	       Td = ii[WS(is, 6)];
+	       Te = Tc - Td;
+	       Tx = Tc + Td;
+	  }
+	  {
+	       E T5, T6, Ti, Tj;
+	       T5 = ri[WS(is, 2)];
+	       T6 = ri[WS(is, 5)];
+	       T7 = T5 + T6;
+	       Ts = T6 - T5;
+	       Ti = ii[WS(is, 2)];
+	       Tj = ii[WS(is, 5)];
+	       Tk = Ti - Tj;
+	       Tv = Ti + Tj;
+	  }
+	  {
+	       E T8, T9, Tf, Tg;
+	       T8 = ri[WS(is, 3)];
+	       T9 = ri[WS(is, 4)];
+	       Ta = T8 + T9;
+	       Tr = T9 - T8;
+	       Tf = ii[WS(is, 3)];
+	       Tg = ii[WS(is, 4)];
+	       Th = Tf - Tg;
+	       Tw = Tf + Tg;
+	  }
+	  ro[0] = T1 + T4 + T7 + Ta;
+	  io[0] = Tu + Tx + Tv + Tw;
+	  {
+	       E Tl, Tb, TB, TC;
+	       Tl = FNMS(KP781831482, Th, KP974927912 * Te) - (KP433883739 * Tk);
+	       Tb = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4);
+	       ro[WS(os, 5)] = Tb - Tl;
+	       ro[WS(os, 2)] = Tb + Tl;
+	       TB = FNMS(KP781831482, Tr, KP974927912 * Tq) - (KP433883739 * Ts);
+	       TC = FMA(KP623489801, Tw, Tu) + FNMA(KP900968867, Tv, KP222520933 * Tx);
+	       io[WS(os, 2)] = TB + TC;
+	       io[WS(os, 5)] = TC - TB;
+	  }
+	  {
+	       E Tn, Tm, Tz, TA;
+	       Tn = FMA(KP781831482, Te, KP974927912 * Tk) + (KP433883739 * Th);
+	       Tm = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7);
+	       ro[WS(os, 6)] = Tm - Tn;
+	       ro[WS(os, 1)] = Tm + Tn;
+	       Tz = FMA(KP781831482, Tq, KP974927912 * Ts) + (KP433883739 * Tr);
+	       TA = FMA(KP623489801, Tx, Tu) + FNMA(KP900968867, Tw, KP222520933 * Tv);
+	       io[WS(os, 1)] = Tz + TA;
+	       io[WS(os, 6)] = TA - Tz;
+	  }
+	  {
+	       E Tp, To, Tt, Ty;
+	       Tp = FMA(KP433883739, Te, KP974927912 * Th) - (KP781831482 * Tk);
+	       To = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4);
+	       ro[WS(os, 4)] = To - Tp;
+	       ro[WS(os, 3)] = To + Tp;
+	       Tt = FMA(KP433883739, Tq, KP974927912 * Tr) - (KP781831482 * Ts);
+	       Ty = FMA(KP623489801, Tv, Tu) + FNMA(KP222520933, Tw, KP900968867 * Tx);
+	       io[WS(os, 3)] = Tt + Ty;
+	       io[WS(os, 4)] = Ty - Tt;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 7, "n1_7", {36, 12, 24, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_7) (planner *p) {
+     X(kdft_register) (p, n1_7, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_8.c b/src/fftw3/dft/codelets/standard/n1_8.c
new file mode 100644
index 0000000..114592d
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_8.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 8 -name n1_8 -include n.h */
+
+/*
+ * This function contains 52 FP additions, 4 FP multiplications,
+ * (or, 52 additions, 4 multiplications, 0 fused multiply/add),
+ * 28 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_8(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, Tn, Ti, TC, T6, TB, Tl, To, Td, TN, Tz, TH, Ta, TM, Tu;
+	  E TG;
+	  {
+	       E T1, T2, Tj, Tk;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 4)];
+	       T3 = T1 + T2;
+	       Tn = T1 - T2;
+	       {
+		    E Tg, Th, T4, T5;
+		    Tg = ii[0];
+		    Th = ii[WS(is, 4)];
+		    Ti = Tg + Th;
+		    TC = Tg - Th;
+		    T4 = ri[WS(is, 2)];
+		    T5 = ri[WS(is, 6)];
+		    T6 = T4 + T5;
+		    TB = T4 - T5;
+	       }
+	       Tj = ii[WS(is, 2)];
+	       Tk = ii[WS(is, 6)];
+	       Tl = Tj + Tk;
+	       To = Tj - Tk;
+	       {
+		    E Tb, Tc, Tv, Tw, Tx, Ty;
+		    Tb = ri[WS(is, 7)];
+		    Tc = ri[WS(is, 3)];
+		    Tv = Tb - Tc;
+		    Tw = ii[WS(is, 7)];
+		    Tx = ii[WS(is, 3)];
+		    Ty = Tw - Tx;
+		    Td = Tb + Tc;
+		    TN = Tw + Tx;
+		    Tz = Tv - Ty;
+		    TH = Tv + Ty;
+	       }
+	       {
+		    E T8, T9, Tq, Tr, Ts, Tt;
+		    T8 = ri[WS(is, 1)];
+		    T9 = ri[WS(is, 5)];
+		    Tq = T8 - T9;
+		    Tr = ii[WS(is, 1)];
+		    Ts = ii[WS(is, 5)];
+		    Tt = Tr - Ts;
+		    Ta = T8 + T9;
+		    TM = Tr + Ts;
+		    Tu = Tq + Tt;
+		    TG = Tt - Tq;
+	       }
+	  }
+	  {
+	       E T7, Te, TP, TQ;
+	       T7 = T3 + T6;
+	       Te = Ta + Td;
+	       ro[WS(os, 4)] = T7 - Te;
+	       ro[0] = T7 + Te;
+	       TP = Ti + Tl;
+	       TQ = TM + TN;
+	       io[WS(os, 4)] = TP - TQ;
+	       io[0] = TP + TQ;
+	  }
+	  {
+	       E Tf, Tm, TL, TO;
+	       Tf = Td - Ta;
+	       Tm = Ti - Tl;
+	       io[WS(os, 2)] = Tf + Tm;
+	       io[WS(os, 6)] = Tm - Tf;
+	       TL = T3 - T6;
+	       TO = TM - TN;
+	       ro[WS(os, 6)] = TL - TO;
+	       ro[WS(os, 2)] = TL + TO;
+	  }
+	  {
+	       E Tp, TA, TJ, TK;
+	       Tp = Tn + To;
+	       TA = KP707106781 * (Tu + Tz);
+	       ro[WS(os, 5)] = Tp - TA;
+	       ro[WS(os, 1)] = Tp + TA;
+	       TJ = TC - TB;
+	       TK = KP707106781 * (TG + TH);
+	       io[WS(os, 5)] = TJ - TK;
+	       io[WS(os, 1)] = TJ + TK;
+	  }
+	  {
+	       E TD, TE, TF, TI;
+	       TD = TB + TC;
+	       TE = KP707106781 * (Tz - Tu);
+	       io[WS(os, 7)] = TD - TE;
+	       io[WS(os, 3)] = TD + TE;
+	       TF = Tn - To;
+	       TI = KP707106781 * (TG - TH);
+	       ro[WS(os, 7)] = TF - TI;
+	       ro[WS(os, 3)] = TF + TI;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 8, "n1_8", {52, 4, 0, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_8) (planner *p) {
+     X(kdft_register) (p, n1_8, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/n1_9.c b/src/fftw3/dft/codelets/standard/n1_9.c
new file mode 100644
index 0000000..07308dc
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/n1_9.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_notw -compact -variables 4 -n 9 -name n1_9 -include n.h */
+
+/*
+ * This function contains 80 FP additions, 40 FP multiplications,
+ * (or, 60 additions, 20 multiplications, 20 fused multiply/add),
+ * 39 stack variables, and 36 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: n1_9.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_9.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: n1_9.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "n.h"
+
+static void n1_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
+{
+     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
+     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T5, TO, Th, Tk, T1g, TR, Ta, T1c, Tq, TW, Tv, TX, Tf, T1d, TB;
+	  E T10, TG, TZ;
+	  {
+	       E T1, T2, T3, T4;
+	       T1 = ri[0];
+	       T2 = ri[WS(is, 3)];
+	       T3 = ri[WS(is, 6)];
+	       T4 = T2 + T3;
+	       T5 = T1 + T4;
+	       TO = KP866025403 * (T3 - T2);
+	       Th = FNMS(KP500000000, T4, T1);
+	  }
+	  {
+	       E TP, Ti, Tj, TQ;
+	       TP = ii[0];
+	       Ti = ii[WS(is, 3)];
+	       Tj = ii[WS(is, 6)];
+	       TQ = Ti + Tj;
+	       Tk = KP866025403 * (Ti - Tj);
+	       T1g = TP + TQ;
+	       TR = FNMS(KP500000000, TQ, TP);
+	  }
+	  {
+	       E T6, Ts, T9, Tr, Tp, Tt, Tm, Tu;
+	       T6 = ri[WS(is, 1)];
+	       Ts = ii[WS(is, 1)];
+	       {
+		    E T7, T8, Tn, To;
+		    T7 = ri[WS(is, 4)];
+		    T8 = ri[WS(is, 7)];
+		    T9 = T7 + T8;
+		    Tr = KP866025403 * (T8 - T7);
+		    Tn = ii[WS(is, 4)];
+		    To = ii[WS(is, 7)];
+		    Tp = KP866025403 * (Tn - To);
+		    Tt = Tn + To;
+	       }
+	       Ta = T6 + T9;
+	       T1c = Ts + Tt;
+	       Tm = FNMS(KP500000000, T9, T6);
+	       Tq = Tm + Tp;
+	       TW = Tm - Tp;
+	       Tu = FNMS(KP500000000, Tt, Ts);
+	       Tv = Tr + Tu;
+	       TX = Tu - Tr;
+	  }
+	  {
+	       E Tb, TD, Te, TC, TA, TE, Tx, TF;
+	       Tb = ri[WS(is, 2)];
+	       TD = ii[WS(is, 2)];
+	       {
+		    E Tc, Td, Ty, Tz;
+		    Tc = ri[WS(is, 5)];
+		    Td = ri[WS(is, 8)];
+		    Te = Tc + Td;
+		    TC = KP866025403 * (Td - Tc);
+		    Ty = ii[WS(is, 5)];
+		    Tz = ii[WS(is, 8)];
+		    TA = KP866025403 * (Ty - Tz);
+		    TE = Ty + Tz;
+	       }
+	       Tf = Tb + Te;
+	       T1d = TD + TE;
+	       Tx = FNMS(KP500000000, Te, Tb);
+	       TB = Tx + TA;
+	       T10 = Tx - TA;
+	       TF = FNMS(KP500000000, TE, TD);
+	       TG = TC + TF;
+	       TZ = TF - TC;
+	  }
+	  {
+	       E T1e, Tg, T1b, T1f, T1h, T1i;
+	       T1e = KP866025403 * (T1c - T1d);
+	       Tg = Ta + Tf;
+	       T1b = FNMS(KP500000000, Tg, T5);
+	       ro[0] = T5 + Tg;
+	       ro[WS(os, 3)] = T1b + T1e;
+	       ro[WS(os, 6)] = T1b - T1e;
+	       T1f = KP866025403 * (Tf - Ta);
+	       T1h = T1c + T1d;
+	       T1i = FNMS(KP500000000, T1h, T1g);
+	       io[WS(os, 3)] = T1f + T1i;
+	       io[0] = T1g + T1h;
+	       io[WS(os, 6)] = T1i - T1f;
+	  }
+	  {
+	       E Tl, TS, TI, TN, TM, TT, TJ, TU;
+	       Tl = Th + Tk;
+	       TS = TO + TR;
+	       {
+		    E Tw, TH, TK, TL;
+		    Tw = FMA(KP766044443, Tq, KP642787609 * Tv);
+		    TH = FMA(KP173648177, TB, KP984807753 * TG);
+		    TI = Tw + TH;
+		    TN = KP866025403 * (TH - Tw);
+		    TK = FNMS(KP642787609, Tq, KP766044443 * Tv);
+		    TL = FNMS(KP984807753, TB, KP173648177 * TG);
+		    TM = KP866025403 * (TK - TL);
+		    TT = TK + TL;
+	       }
+	       ro[WS(os, 1)] = Tl + TI;
+	       io[WS(os, 1)] = TS + TT;
+	       TJ = FNMS(KP500000000, TI, Tl);
+	       ro[WS(os, 7)] = TJ - TM;
+	       ro[WS(os, 4)] = TJ + TM;
+	       TU = FNMS(KP500000000, TT, TS);
+	       io[WS(os, 4)] = TN + TU;
+	       io[WS(os, 7)] = TU - TN;
+	  }
+	  {
+	       E TV, T14, T12, T13, T17, T1a, T18, T19;
+	       TV = Th - Tk;
+	       T14 = TR - TO;
+	       {
+		    E TY, T11, T15, T16;
+		    TY = FMA(KP173648177, TW, KP984807753 * TX);
+		    T11 = FNMS(KP939692620, T10, KP342020143 * TZ);
+		    T12 = TY + T11;
+		    T13 = KP866025403 * (T11 - TY);
+		    T15 = FNMS(KP984807753, TW, KP173648177 * TX);
+		    T16 = FMA(KP342020143, T10, KP939692620 * TZ);
+		    T17 = T15 - T16;
+		    T1a = KP866025403 * (T15 + T16);
+	       }
+	       ro[WS(os, 2)] = TV + T12;
+	       io[WS(os, 2)] = T14 + T17;
+	       T18 = FNMS(KP500000000, T17, T14);
+	       io[WS(os, 5)] = T13 + T18;
+	       io[WS(os, 8)] = T18 - T13;
+	       T19 = FNMS(KP500000000, T12, TV);
+	       ro[WS(os, 8)] = T19 - T1a;
+	       ro[WS(os, 5)] = T19 + T1a;
+	  }
+     }
+}
+
+static const kdft_desc desc = { 9, "n1_9", {60, 20, 20, 0}, &GENUS, 0, 0, 0, 0 };
+void X(codelet_n1_9) (planner *p) {
+     X(kdft_register) (p, n1_9, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/scodlist.c b/src/fftw3/dft/codelets/standard/scodlist.c
new file mode 100644
index 0000000..9524d36
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/scodlist.c
@@ -0,0 +1,82 @@
+#include "ifftw.h"
+
+extern void X(codelet_n1_2)(planner *);
+extern void X(codelet_n1_3)(planner *);
+extern void X(codelet_n1_4)(planner *);
+extern void X(codelet_n1_5)(planner *);
+extern void X(codelet_n1_6)(planner *);
+extern void X(codelet_n1_7)(planner *);
+extern void X(codelet_n1_8)(planner *);
+extern void X(codelet_n1_9)(planner *);
+extern void X(codelet_n1_10)(planner *);
+extern void X(codelet_n1_11)(planner *);
+extern void X(codelet_n1_12)(planner *);
+extern void X(codelet_n1_13)(planner *);
+extern void X(codelet_n1_14)(planner *);
+extern void X(codelet_n1_15)(planner *);
+extern void X(codelet_n1_16)(planner *);
+extern void X(codelet_m1_16)(planner *);
+extern void X(codelet_m1_32)(planner *);
+extern void X(codelet_m1_64)(planner *);
+extern void X(codelet_t1_2)(planner *);
+extern void X(codelet_t1_3)(planner *);
+extern void X(codelet_t1_4)(planner *);
+extern void X(codelet_t1_5)(planner *);
+extern void X(codelet_t1_6)(planner *);
+extern void X(codelet_t1_7)(planner *);
+extern void X(codelet_t1_8)(planner *);
+extern void X(codelet_t1_9)(planner *);
+extern void X(codelet_t1_10)(planner *);
+extern void X(codelet_t1_12)(planner *);
+extern void X(codelet_t1_15)(planner *);
+extern void X(codelet_t1_16)(planner *);
+extern void X(codelet_t1_32)(planner *);
+extern void X(codelet_t1_64)(planner *);
+extern void X(codelet_t2_4)(planner *);
+extern void X(codelet_t2_8)(planner *);
+extern void X(codelet_t2_16)(planner *);
+extern void X(codelet_t2_32)(planner *);
+extern void X(codelet_t2_64)(planner *);
+
+
+extern const solvtab X(solvtab_dft_standard);
+const solvtab X(solvtab_dft_standard) = {
+   SOLVTAB(X(codelet_n1_2)),
+   SOLVTAB(X(codelet_n1_3)),
+   SOLVTAB(X(codelet_n1_4)),
+   SOLVTAB(X(codelet_n1_5)),
+   SOLVTAB(X(codelet_n1_6)),
+   SOLVTAB(X(codelet_n1_7)),
+   SOLVTAB(X(codelet_n1_8)),
+   SOLVTAB(X(codelet_n1_9)),
+   SOLVTAB(X(codelet_n1_10)),
+   SOLVTAB(X(codelet_n1_11)),
+   SOLVTAB(X(codelet_n1_12)),
+   SOLVTAB(X(codelet_n1_13)),
+   SOLVTAB(X(codelet_n1_14)),
+   SOLVTAB(X(codelet_n1_15)),
+   SOLVTAB(X(codelet_n1_16)),
+   SOLVTAB(X(codelet_m1_16)),
+   SOLVTAB(X(codelet_m1_32)),
+   SOLVTAB(X(codelet_m1_64)),
+   SOLVTAB(X(codelet_t1_2)),
+   SOLVTAB(X(codelet_t1_3)),
+   SOLVTAB(X(codelet_t1_4)),
+   SOLVTAB(X(codelet_t1_5)),
+   SOLVTAB(X(codelet_t1_6)),
+   SOLVTAB(X(codelet_t1_7)),
+   SOLVTAB(X(codelet_t1_8)),
+   SOLVTAB(X(codelet_t1_9)),
+   SOLVTAB(X(codelet_t1_10)),
+   SOLVTAB(X(codelet_t1_12)),
+   SOLVTAB(X(codelet_t1_15)),
+   SOLVTAB(X(codelet_t1_16)),
+   SOLVTAB(X(codelet_t1_32)),
+   SOLVTAB(X(codelet_t1_64)),
+   SOLVTAB(X(codelet_t2_4)),
+   SOLVTAB(X(codelet_t2_8)),
+   SOLVTAB(X(codelet_t2_16)),
+   SOLVTAB(X(codelet_t2_32)),
+   SOLVTAB(X(codelet_t2_64)),
+   SOLVTAB_END
+};
diff --git a/src/fftw3/dft/codelets/standard/t1_10.c b/src/fftw3/dft/codelets/standard/t1_10.c
new file mode 100644
index 0000000..cd11522
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_10.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:00 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 10 -name t1_10 -include t.h */
+
+/*
+ * This function contains 102 FP additions, 60 FP multiplications,
+ * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
+ * 45 stack variables, and 40 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_10.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_10.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_10.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_10(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 18) {
+	  E T7, T1O, TT, T1C, TF, TQ, TR, T1o, T1p, T1y, TX, TY, TZ, T1d, T1g;
+	  E T1M, Ti, Tt, Tu, T1r, T1s, T1x, TU, TV, TW, T16, T19, T1L;
+	  {
+	       E T1, T1B, T6, T1A;
+	       T1 = ri[0];
+	       T1B = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 5)];
+		    T5 = ii[WS(ios, 5)];
+		    T2 = W[8];
+		    T4 = W[9];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T1A = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 - T6;
+	       T1O = T1B - T1A;
+	       TT = T1 + T6;
+	       T1C = T1A + T1B;
+	  }
+	  {
+	       E Tz, T1b, TP, T1f, TE, T1c, TK, T1e;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 4)];
+		    Ty = ii[WS(ios, 4)];
+		    Tv = W[6];
+		    Tx = W[7];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T1b = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TM, TO, TL, TN;
+		    TM = ri[WS(ios, 1)];
+		    TO = ii[WS(ios, 1)];
+		    TL = W[0];
+		    TN = W[1];
+		    TP = FMA(TL, TM, TN * TO);
+		    T1f = FNMS(TN, TM, TL * TO);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 9)];
+		    TD = ii[WS(ios, 9)];
+		    TA = W[16];
+		    TC = W[17];
+		    TE = FMA(TA, TB, TC * TD);
+		    T1c = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E TH, TJ, TG, TI;
+		    TH = ri[WS(ios, 6)];
+		    TJ = ii[WS(ios, 6)];
+		    TG = W[10];
+		    TI = W[11];
+		    TK = FMA(TG, TH, TI * TJ);
+		    T1e = FNMS(TI, TH, TG * TJ);
+	       }
+	       TF = Tz - TE;
+	       TQ = TK - TP;
+	       TR = TF + TQ;
+	       T1o = T1b + T1c;
+	       T1p = T1e + T1f;
+	       T1y = T1o + T1p;
+	       TX = Tz + TE;
+	       TY = TK + TP;
+	       TZ = TX + TY;
+	       T1d = T1b - T1c;
+	       T1g = T1e - T1f;
+	       T1M = T1d + T1g;
+	  }
+	  {
+	       E Tc, T14, Ts, T18, Th, T15, Tn, T17;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = ri[WS(ios, 2)];
+		    Tb = ii[WS(ios, 2)];
+		    T8 = W[2];
+		    Ta = W[3];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T14 = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = ri[WS(ios, 3)];
+		    Tr = ii[WS(ios, 3)];
+		    To = W[4];
+		    Tq = W[5];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    T18 = FNMS(Tq, Tp, To * Tr);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 7)];
+		    Tg = ii[WS(ios, 7)];
+		    Td = W[12];
+		    Tf = W[13];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T15 = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = ri[WS(ios, 8)];
+		    Tm = ii[WS(ios, 8)];
+		    Tj = W[14];
+		    Tl = W[15];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    T17 = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       Ti = Tc - Th;
+	       Tt = Tn - Ts;
+	       Tu = Ti + Tt;
+	       T1r = T14 + T15;
+	       T1s = T17 + T18;
+	       T1x = T1r + T1s;
+	       TU = Tc + Th;
+	       TV = Tn + Ts;
+	       TW = TU + TV;
+	       T16 = T14 - T15;
+	       T19 = T17 - T18;
+	       T1L = T16 + T19;
+	  }
+	  {
+	       E T11, TS, T12, T1i, T1k, T1a, T1h, T1j, T13;
+	       T11 = KP559016994 * (Tu - TR);
+	       TS = Tu + TR;
+	       T12 = FNMS(KP250000000, TS, T7);
+	       T1a = T16 - T19;
+	       T1h = T1d - T1g;
+	       T1i = FMA(KP951056516, T1a, KP587785252 * T1h);
+	       T1k = FNMS(KP587785252, T1a, KP951056516 * T1h);
+	       ri[WS(ios, 5)] = T7 + TS;
+	       T1j = T12 - T11;
+	       ri[WS(ios, 7)] = T1j - T1k;
+	       ri[WS(ios, 3)] = T1j + T1k;
+	       T13 = T11 + T12;
+	       ri[WS(ios, 9)] = T13 - T1i;
+	       ri[WS(ios, 1)] = T13 + T1i;
+	  }
+	  {
+	       E T1N, T1P, T1Q, T1U, T1W, T1S, T1T, T1V, T1R;
+	       T1N = KP559016994 * (T1L - T1M);
+	       T1P = T1L + T1M;
+	       T1Q = FNMS(KP250000000, T1P, T1O);
+	       T1S = Ti - Tt;
+	       T1T = TF - TQ;
+	       T1U = FMA(KP951056516, T1S, KP587785252 * T1T);
+	       T1W = FNMS(KP587785252, T1S, KP951056516 * T1T);
+	       ii[WS(ios, 5)] = T1P + T1O;
+	       T1V = T1Q - T1N;
+	       ii[WS(ios, 3)] = T1V - T1W;
+	       ii[WS(ios, 7)] = T1W + T1V;
+	       T1R = T1N + T1Q;
+	       ii[WS(ios, 1)] = T1R - T1U;
+	       ii[WS(ios, 9)] = T1U + T1R;
+	  }
+	  {
+	       E T1m, T10, T1l, T1u, T1w, T1q, T1t, T1v, T1n;
+	       T1m = KP559016994 * (TW - TZ);
+	       T10 = TW + TZ;
+	       T1l = FNMS(KP250000000, T10, TT);
+	       T1q = T1o - T1p;
+	       T1t = T1r - T1s;
+	       T1u = FNMS(KP587785252, T1t, KP951056516 * T1q);
+	       T1w = FMA(KP951056516, T1t, KP587785252 * T1q);
+	       ri[0] = TT + T10;
+	       T1v = T1m + T1l;
+	       ri[WS(ios, 4)] = T1v - T1w;
+	       ri[WS(ios, 6)] = T1v + T1w;
+	       T1n = T1l - T1m;
+	       ri[WS(ios, 2)] = T1n - T1u;
+	       ri[WS(ios, 8)] = T1n + T1u;
+	  }
+	  {
+	       E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
+	       T1H = KP559016994 * (T1x - T1y);
+	       T1z = T1x + T1y;
+	       T1G = FNMS(KP250000000, T1z, T1C);
+	       T1D = TX - TY;
+	       T1E = TU - TV;
+	       T1F = FNMS(KP587785252, T1E, KP951056516 * T1D);
+	       T1J = FMA(KP951056516, T1E, KP587785252 * T1D);
+	       ii[0] = T1z + T1C;
+	       T1K = T1H + T1G;
+	       ii[WS(ios, 4)] = T1J + T1K;
+	       ii[WS(ios, 6)] = T1K - T1J;
+	       T1I = T1G - T1H;
+	       ii[WS(ios, 2)] = T1F + T1I;
+	       ii[WS(ios, 8)] = T1I - T1F;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 10},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 10, "t1_10", twinstr, {72, 30, 30, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_10) (planner *p) {
+     X(kdft_dit_register) (p, t1_10, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_12.c b/src/fftw3/dft/codelets/standard/t1_12.c
new file mode 100644
index 0000000..e0ea2eb
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_12.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:03 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 12 -name t1_12 -include t.h */
+
+/*
+ * This function contains 118 FP additions, 60 FP multiplications,
+ * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
+ * 47 stack variables, and 48 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_12.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_12.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_12.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_12(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 22) {
+	  E T1, T1W, T18, T21, Tc, T15, T1V, T22, TR, T1E, T1o, T1D, T12, T1l, T1F;
+	  E T1G, Ti, T1S, T1d, T24, Tt, T1a, T1T, T25, TA, T1z, T1j, T1y, TL, T1g;
+	  E T1A, T1B;
+	  {
+	       E T6, T16, Tb, T17;
+	       T1 = ri[0];
+	       T1W = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 4)];
+		    T5 = ii[WS(ios, 4)];
+		    T2 = W[6];
+		    T4 = W[7];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T16 = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = ri[WS(ios, 8)];
+		    Ta = ii[WS(ios, 8)];
+		    T7 = W[14];
+		    T9 = W[15];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    T17 = FNMS(T9, T8, T7 * Ta);
+	       }
+	       T18 = KP866025403 * (T16 - T17);
+	       T21 = KP866025403 * (Tb - T6);
+	       Tc = T6 + Tb;
+	       T15 = FNMS(KP500000000, Tc, T1);
+	       T1V = T16 + T17;
+	       T22 = FNMS(KP500000000, T1V, T1W);
+	  }
+	  {
+	       E T11, T1n, TW, T1m;
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = ri[WS(ios, 9)];
+		    TQ = ii[WS(ios, 9)];
+		    TN = W[16];
+		    TP = W[17];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T1E = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E TY, T10, TX, TZ;
+		    TY = ri[WS(ios, 5)];
+		    T10 = ii[WS(ios, 5)];
+		    TX = W[8];
+		    TZ = W[9];
+		    T11 = FMA(TX, TY, TZ * T10);
+		    T1n = FNMS(TZ, TY, TX * T10);
+	       }
+	       {
+		    E TT, TV, TS, TU;
+		    TT = ri[WS(ios, 1)];
+		    TV = ii[WS(ios, 1)];
+		    TS = W[0];
+		    TU = W[1];
+		    TW = FMA(TS, TT, TU * TV);
+		    T1m = FNMS(TU, TT, TS * TV);
+	       }
+	       T1o = KP866025403 * (T1m - T1n);
+	       T1D = KP866025403 * (T11 - TW);
+	       T12 = TW + T11;
+	       T1l = FNMS(KP500000000, T12, TR);
+	       T1F = T1m + T1n;
+	       T1G = FNMS(KP500000000, T1F, T1E);
+	  }
+	  {
+	       E Ts, T1c, Tn, T1b;
+	       {
+		    E Tf, Th, Te, Tg;
+		    Tf = ri[WS(ios, 6)];
+		    Th = ii[WS(ios, 6)];
+		    Te = W[10];
+		    Tg = W[11];
+		    Ti = FMA(Te, Tf, Tg * Th);
+		    T1S = FNMS(Tg, Tf, Te * Th);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = ri[WS(ios, 2)];
+		    Tr = ii[WS(ios, 2)];
+		    To = W[2];
+		    Tq = W[3];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    T1c = FNMS(Tq, Tp, To * Tr);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = ri[WS(ios, 10)];
+		    Tm = ii[WS(ios, 10)];
+		    Tj = W[18];
+		    Tl = W[19];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    T1b = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       T1d = KP866025403 * (T1b - T1c);
+	       T24 = KP866025403 * (Ts - Tn);
+	       Tt = Tn + Ts;
+	       T1a = FNMS(KP500000000, Tt, Ti);
+	       T1T = T1b + T1c;
+	       T25 = FNMS(KP500000000, T1T, T1S);
+	  }
+	  {
+	       E TK, T1i, TF, T1h;
+	       {
+		    E Tx, Tz, Tw, Ty;
+		    Tx = ri[WS(ios, 3)];
+		    Tz = ii[WS(ios, 3)];
+		    Tw = W[4];
+		    Ty = W[5];
+		    TA = FMA(Tw, Tx, Ty * Tz);
+		    T1z = FNMS(Ty, Tx, Tw * Tz);
+	       }
+	       {
+		    E TH, TJ, TG, TI;
+		    TH = ri[WS(ios, 11)];
+		    TJ = ii[WS(ios, 11)];
+		    TG = W[20];
+		    TI = W[21];
+		    TK = FMA(TG, TH, TI * TJ);
+		    T1i = FNMS(TI, TH, TG * TJ);
+	       }
+	       {
+		    E TC, TE, TB, TD;
+		    TC = ri[WS(ios, 7)];
+		    TE = ii[WS(ios, 7)];
+		    TB = W[12];
+		    TD = W[13];
+		    TF = FMA(TB, TC, TD * TE);
+		    T1h = FNMS(TD, TC, TB * TE);
+	       }
+	       T1j = KP866025403 * (T1h - T1i);
+	       T1y = KP866025403 * (TK - TF);
+	       TL = TF + TK;
+	       T1g = FNMS(KP500000000, TL, TA);
+	       T1A = T1h + T1i;
+	       T1B = FNMS(KP500000000, T1A, T1z);
+	  }
+	  {
+	       E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R;
+	       {
+		    E Td, Tu, T1U, T1X;
+		    Td = T1 + Tc;
+		    Tu = Ti + Tt;
+		    Tv = Td + Tu;
+		    T1N = Td - Tu;
+		    T1U = T1S + T1T;
+		    T1X = T1V + T1W;
+		    T1Y = T1U + T1X;
+		    T20 = T1X - T1U;
+	       }
+	       {
+		    E TM, T13, T1O, T1P;
+		    TM = TA + TL;
+		    T13 = TR + T12;
+		    T14 = TM + T13;
+		    T1Z = TM - T13;
+		    T1O = T1z + T1A;
+		    T1P = T1E + T1F;
+		    T1Q = T1O - T1P;
+		    T1R = T1O + T1P;
+	       }
+	       ri[WS(ios, 6)] = Tv - T14;
+	       ii[WS(ios, 6)] = T1Y - T1R;
+	       ri[0] = Tv + T14;
+	       ii[0] = T1R + T1Y;
+	       ri[WS(ios, 3)] = T1N - T1Q;
+	       ii[WS(ios, 3)] = T1Z + T20;
+	       ri[WS(ios, 9)] = T1N + T1Q;
+	       ii[WS(ios, 9)] = T20 - T1Z;
+	  }
+	  {
+	       E T1t, T1x, T27, T2a, T1w, T28, T1I, T29;
+	       {
+		    E T1r, T1s, T23, T26;
+		    T1r = T15 + T18;
+		    T1s = T1a + T1d;
+		    T1t = T1r + T1s;
+		    T1x = T1r - T1s;
+		    T23 = T21 + T22;
+		    T26 = T24 + T25;
+		    T27 = T23 - T26;
+		    T2a = T26 + T23;
+	       }
+	       {
+		    E T1u, T1v, T1C, T1H;
+		    T1u = T1g + T1j;
+		    T1v = T1l + T1o;
+		    T1w = T1u + T1v;
+		    T28 = T1u - T1v;
+		    T1C = T1y + T1B;
+		    T1H = T1D + T1G;
+		    T1I = T1C - T1H;
+		    T29 = T1C + T1H;
+	       }
+	       ri[WS(ios, 10)] = T1t - T1w;
+	       ii[WS(ios, 10)] = T2a - T29;
+	       ri[WS(ios, 4)] = T1t + T1w;
+	       ii[WS(ios, 4)] = T29 + T2a;
+	       ri[WS(ios, 7)] = T1x - T1I;
+	       ii[WS(ios, 7)] = T28 + T27;
+	       ri[WS(ios, 1)] = T1x + T1I;
+	       ii[WS(ios, 1)] = T27 - T28;
+	  }
+	  {
+	       E T1f, T1J, T2d, T2f, T1q, T2g, T1M, T2e;
+	       {
+		    E T19, T1e, T2b, T2c;
+		    T19 = T15 - T18;
+		    T1e = T1a - T1d;
+		    T1f = T19 + T1e;
+		    T1J = T19 - T1e;
+		    T2b = T25 - T24;
+		    T2c = T22 - T21;
+		    T2d = T2b + T2c;
+		    T2f = T2c - T2b;
+	       }
+	       {
+		    E T1k, T1p, T1K, T1L;
+		    T1k = T1g - T1j;
+		    T1p = T1l - T1o;
+		    T1q = T1k + T1p;
+		    T2g = T1k - T1p;
+		    T1K = T1B - T1y;
+		    T1L = T1G - T1D;
+		    T1M = T1K - T1L;
+		    T2e = T1K + T1L;
+	       }
+	       ri[WS(ios, 2)] = T1f - T1q;
+	       ii[WS(ios, 2)] = T2d - T2e;
+	       ri[WS(ios, 8)] = T1f + T1q;
+	       ii[WS(ios, 8)] = T2e + T2d;
+	       ri[WS(ios, 11)] = T1J - T1M;
+	       ii[WS(ios, 11)] = T2g + T2f;
+	       ri[WS(ios, 5)] = T1J + T1M;
+	       ii[WS(ios, 5)] = T2f - T2g;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 12},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 12, "t1_12", twinstr, {88, 30, 30, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_12) (planner *p) {
+     X(kdft_dit_register) (p, t1_12, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_15.c b/src/fftw3/dft/codelets/standard/t1_15.c
new file mode 100644
index 0000000..1c721cc
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_15.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:07 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 15 -name t1_15 -include t.h */
+
+/*
+ * This function contains 184 FP additions, 112 FP multiplications,
+ * (or, 128 additions, 56 multiplications, 56 fused multiply/add),
+ * 65 stack variables, and 60 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_15.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_15.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_15.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_15(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 28) {
+	  E T1q, T34, Td, T1n, T2S, T35, T13, T1k, T1l, T2E, T2F, T2O, T1H, T1T, T2k;
+	  E T2t, T2f, T2s, T1M, T1U, Tu, TL, TM, T2H, T2I, T2N, T1w, T1Q, T29, T2w;
+	  E T24, T2v, T1B, T1R;
+	  {
+	       E T1, T2R, T6, T1o, Tb, T1p, Tc, T2Q;
+	       T1 = ri[0];
+	       T2R = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 5)];
+		    T5 = ii[WS(ios, 5)];
+		    T2 = W[8];
+		    T4 = W[9];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T1o = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = ri[WS(ios, 10)];
+		    Ta = ii[WS(ios, 10)];
+		    T7 = W[18];
+		    T9 = W[19];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    T1p = FNMS(T9, T8, T7 * Ta);
+	       }
+	       T1q = KP866025403 * (T1o - T1p);
+	       T34 = KP866025403 * (Tb - T6);
+	       Tc = T6 + Tb;
+	       Td = T1 + Tc;
+	       T1n = FNMS(KP500000000, Tc, T1);
+	       T2Q = T1o + T1p;
+	       T2S = T2Q + T2R;
+	       T35 = FNMS(KP500000000, T2Q, T2R);
+	  }
+	  {
+	       E TR, T2c, T18, T2h, TW, T1E, T11, T1F, T12, T2d, T1d, T1J, T1i, T1K, T1j;
+	       E T2i;
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = ri[WS(ios, 6)];
+		    TQ = ii[WS(ios, 6)];
+		    TN = W[10];
+		    TP = W[11];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T2c = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E T15, T17, T14, T16;
+		    T15 = ri[WS(ios, 9)];
+		    T17 = ii[WS(ios, 9)];
+		    T14 = W[16];
+		    T16 = W[17];
+		    T18 = FMA(T14, T15, T16 * T17);
+		    T2h = FNMS(T16, T15, T14 * T17);
+	       }
+	       {
+		    E TT, TV, TS, TU;
+		    TT = ri[WS(ios, 11)];
+		    TV = ii[WS(ios, 11)];
+		    TS = W[20];
+		    TU = W[21];
+		    TW = FMA(TS, TT, TU * TV);
+		    T1E = FNMS(TU, TT, TS * TV);
+	       }
+	       {
+		    E TY, T10, TX, TZ;
+		    TY = ri[WS(ios, 1)];
+		    T10 = ii[WS(ios, 1)];
+		    TX = W[0];
+		    TZ = W[1];
+		    T11 = FMA(TX, TY, TZ * T10);
+		    T1F = FNMS(TZ, TY, TX * T10);
+	       }
+	       T12 = TW + T11;
+	       T2d = T1E + T1F;
+	       {
+		    E T1a, T1c, T19, T1b;
+		    T1a = ri[WS(ios, 14)];
+		    T1c = ii[WS(ios, 14)];
+		    T19 = W[26];
+		    T1b = W[27];
+		    T1d = FMA(T19, T1a, T1b * T1c);
+		    T1J = FNMS(T1b, T1a, T19 * T1c);
+	       }
+	       {
+		    E T1f, T1h, T1e, T1g;
+		    T1f = ri[WS(ios, 4)];
+		    T1h = ii[WS(ios, 4)];
+		    T1e = W[6];
+		    T1g = W[7];
+		    T1i = FMA(T1e, T1f, T1g * T1h);
+		    T1K = FNMS(T1g, T1f, T1e * T1h);
+	       }
+	       T1j = T1d + T1i;
+	       T2i = T1J + T1K;
+	       {
+		    E T1D, T1G, T2g, T2j;
+		    T13 = TR + T12;
+		    T1k = T18 + T1j;
+		    T1l = T13 + T1k;
+		    T2E = T2c + T2d;
+		    T2F = T2h + T2i;
+		    T2O = T2E + T2F;
+		    T1D = FNMS(KP500000000, T12, TR);
+		    T1G = KP866025403 * (T1E - T1F);
+		    T1H = T1D - T1G;
+		    T1T = T1D + T1G;
+		    T2g = KP866025403 * (T1i - T1d);
+		    T2j = FNMS(KP500000000, T2i, T2h);
+		    T2k = T2g + T2j;
+		    T2t = T2j - T2g;
+		    {
+			 E T2b, T2e, T1I, T1L;
+			 T2b = KP866025403 * (T11 - TW);
+			 T2e = FNMS(KP500000000, T2d, T2c);
+			 T2f = T2b + T2e;
+			 T2s = T2e - T2b;
+			 T1I = FNMS(KP500000000, T1j, T18);
+			 T1L = KP866025403 * (T1J - T1K);
+			 T1M = T1I - T1L;
+			 T1U = T1I + T1L;
+		    }
+	       }
+	  }
+	  {
+	       E Ti, T21, Tz, T26, Tn, T1t, Ts, T1u, Tt, T22, TE, T1y, TJ, T1z, TK;
+	       E T27;
+	       {
+		    E Tf, Th, Te, Tg;
+		    Tf = ri[WS(ios, 3)];
+		    Th = ii[WS(ios, 3)];
+		    Te = W[4];
+		    Tg = W[5];
+		    Ti = FMA(Te, Tf, Tg * Th);
+		    T21 = FNMS(Tg, Tf, Te * Th);
+	       }
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 12)];
+		    Ty = ii[WS(ios, 12)];
+		    Tv = W[22];
+		    Tx = W[23];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T26 = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = ri[WS(ios, 8)];
+		    Tm = ii[WS(ios, 8)];
+		    Tj = W[14];
+		    Tl = W[15];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    T1t = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = ri[WS(ios, 13)];
+		    Tr = ii[WS(ios, 13)];
+		    To = W[24];
+		    Tq = W[25];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    T1u = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Tt = Tn + Ts;
+	       T22 = T1t + T1u;
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 2)];
+		    TD = ii[WS(ios, 2)];
+		    TA = W[2];
+		    TC = W[3];
+		    TE = FMA(TA, TB, TC * TD);
+		    T1y = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E TG, TI, TF, TH;
+		    TG = ri[WS(ios, 7)];
+		    TI = ii[WS(ios, 7)];
+		    TF = W[12];
+		    TH = W[13];
+		    TJ = FMA(TF, TG, TH * TI);
+		    T1z = FNMS(TH, TG, TF * TI);
+	       }
+	       TK = TE + TJ;
+	       T27 = T1y + T1z;
+	       {
+		    E T1s, T1v, T25, T28;
+		    Tu = Ti + Tt;
+		    TL = Tz + TK;
+		    TM = Tu + TL;
+		    T2H = T21 + T22;
+		    T2I = T26 + T27;
+		    T2N = T2H + T2I;
+		    T1s = FNMS(KP500000000, Tt, Ti);
+		    T1v = KP866025403 * (T1t - T1u);
+		    T1w = T1s - T1v;
+		    T1Q = T1s + T1v;
+		    T25 = KP866025403 * (TJ - TE);
+		    T28 = FNMS(KP500000000, T27, T26);
+		    T29 = T25 + T28;
+		    T2w = T28 - T25;
+		    {
+			 E T20, T23, T1x, T1A;
+			 T20 = KP866025403 * (Ts - Tn);
+			 T23 = FNMS(KP500000000, T22, T21);
+			 T24 = T20 + T23;
+			 T2v = T23 - T20;
+			 T1x = FNMS(KP500000000, TK, Tz);
+			 T1A = KP866025403 * (T1y - T1z);
+			 T1B = T1x - T1A;
+			 T1R = T1x + T1A;
+		    }
+	       }
+	  }
+	  {
+	       E T2C, T1m, T2B, T2K, T2M, T2G, T2J, T2L, T2D;
+	       T2C = KP559016994 * (TM - T1l);
+	       T1m = TM + T1l;
+	       T2B = FNMS(KP250000000, T1m, Td);
+	       T2G = T2E - T2F;
+	       T2J = T2H - T2I;
+	       T2K = FNMS(KP587785252, T2J, KP951056516 * T2G);
+	       T2M = FMA(KP951056516, T2J, KP587785252 * T2G);
+	       ri[0] = Td + T1m;
+	       T2L = T2C + T2B;
+	       ri[WS(ios, 9)] = T2L - T2M;
+	       ri[WS(ios, 6)] = T2L + T2M;
+	       T2D = T2B - T2C;
+	       ri[WS(ios, 12)] = T2D - T2K;
+	       ri[WS(ios, 3)] = T2D + T2K;
+	  }
+	  {
+	       E T2U, T2P, T2T, T2Y, T30, T2W, T2X, T2Z, T2V;
+	       T2U = KP559016994 * (T2N - T2O);
+	       T2P = T2N + T2O;
+	       T2T = FNMS(KP250000000, T2P, T2S);
+	       T2W = T13 - T1k;
+	       T2X = Tu - TL;
+	       T2Y = FNMS(KP587785252, T2X, KP951056516 * T2W);
+	       T30 = FMA(KP951056516, T2X, KP587785252 * T2W);
+	       ii[0] = T2P + T2S;
+	       T2Z = T2U + T2T;
+	       ii[WS(ios, 6)] = T2Z - T30;
+	       ii[WS(ios, 9)] = T30 + T2Z;
+	       T2V = T2T - T2U;
+	       ii[WS(ios, 3)] = T2V - T2Y;
+	       ii[WS(ios, 12)] = T2Y + T2V;
+	  }
+	  {
+	       E T2y, T2A, T1r, T1O, T2p, T2q, T2z, T2r;
+	       {
+		    E T2u, T2x, T1C, T1N;
+		    T2u = T2s - T2t;
+		    T2x = T2v - T2w;
+		    T2y = FNMS(KP587785252, T2x, KP951056516 * T2u);
+		    T2A = FMA(KP951056516, T2x, KP587785252 * T2u);
+		    T1r = T1n - T1q;
+		    T1C = T1w + T1B;
+		    T1N = T1H + T1M;
+		    T1O = T1C + T1N;
+		    T2p = FNMS(KP250000000, T1O, T1r);
+		    T2q = KP559016994 * (T1C - T1N);
+	       }
+	       ri[WS(ios, 5)] = T1r + T1O;
+	       T2z = T2q + T2p;
+	       ri[WS(ios, 14)] = T2z - T2A;
+	       ri[WS(ios, 11)] = T2z + T2A;
+	       T2r = T2p - T2q;
+	       ri[WS(ios, 2)] = T2r - T2y;
+	       ri[WS(ios, 8)] = T2r + T2y;
+	  }
+	  {
+	       E T3h, T3q, T3i, T3l, T3m, T3n, T3p, T3o;
+	       {
+		    E T3f, T3g, T3j, T3k;
+		    T3f = T1H - T1M;
+		    T3g = T1w - T1B;
+		    T3h = FNMS(KP587785252, T3g, KP951056516 * T3f);
+		    T3q = FMA(KP951056516, T3g, KP587785252 * T3f);
+		    T3i = T35 - T34;
+		    T3j = T2v + T2w;
+		    T3k = T2s + T2t;
+		    T3l = T3j + T3k;
+		    T3m = FNMS(KP250000000, T3l, T3i);
+		    T3n = KP559016994 * (T3j - T3k);
+	       }
+	       ii[WS(ios, 5)] = T3l + T3i;
+	       T3p = T3n + T3m;
+	       ii[WS(ios, 11)] = T3p - T3q;
+	       ii[WS(ios, 14)] = T3q + T3p;
+	       T3o = T3m - T3n;
+	       ii[WS(ios, 2)] = T3h + T3o;
+	       ii[WS(ios, 8)] = T3o - T3h;
+	  }
+	  {
+	       E T3c, T3d, T36, T37, T33, T38, T3e, T39;
+	       {
+		    E T3a, T3b, T31, T32;
+		    T3a = T1Q - T1R;
+		    T3b = T1T - T1U;
+		    T3c = FMA(KP951056516, T3a, KP587785252 * T3b);
+		    T3d = FNMS(KP587785252, T3a, KP951056516 * T3b);
+		    T36 = T34 + T35;
+		    T31 = T24 + T29;
+		    T32 = T2f + T2k;
+		    T37 = T31 + T32;
+		    T33 = KP559016994 * (T31 - T32);
+		    T38 = FNMS(KP250000000, T37, T36);
+	       }
+	       ii[WS(ios, 10)] = T37 + T36;
+	       T3e = T38 - T33;
+	       ii[WS(ios, 7)] = T3d + T3e;
+	       ii[WS(ios, 13)] = T3e - T3d;
+	       T39 = T33 + T38;
+	       ii[WS(ios, 1)] = T39 - T3c;
+	       ii[WS(ios, 4)] = T3c + T39;
+	  }
+	  {
+	       E T2m, T2o, T1P, T1W, T1X, T1Y, T2n, T1Z;
+	       {
+		    E T2a, T2l, T1S, T1V;
+		    T2a = T24 - T29;
+		    T2l = T2f - T2k;
+		    T2m = FMA(KP951056516, T2a, KP587785252 * T2l);
+		    T2o = FNMS(KP587785252, T2a, KP951056516 * T2l);
+		    T1P = T1n + T1q;
+		    T1S = T1Q + T1R;
+		    T1V = T1T + T1U;
+		    T1W = T1S + T1V;
+		    T1X = KP559016994 * (T1S - T1V);
+		    T1Y = FNMS(KP250000000, T1W, T1P);
+	       }
+	       ri[WS(ios, 10)] = T1P + T1W;
+	       T2n = T1Y - T1X;
+	       ri[WS(ios, 7)] = T2n - T2o;
+	       ri[WS(ios, 13)] = T2n + T2o;
+	       T1Z = T1X + T1Y;
+	       ri[WS(ios, 4)] = T1Z - T2m;
+	       ri[WS(ios, 1)] = T1Z + T2m;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 15},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 15, "t1_15", twinstr, {128, 56, 56, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_15) (planner *p) {
+     X(kdft_dit_register) (p, t1_15, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_16.c b/src/fftw3/dft/codelets/standard/t1_16.c
new file mode 100644
index 0000000..6dc13f9
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_16.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:07 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 16 -name t1_16 -include t.h */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 136 additions, 46 multiplications, 38 fused multiply/add),
+ * 52 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_16(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 30) {
+	  E T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H;
+	  E T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2j, T24, T2k, TS, T13, T2w, T2x;
+	  E T2y, T2z, T1O, T2g, T1T, T2h;
+	  {
+	       E T1, T2T, T6, T2S;
+	       T1 = ri[0];
+	       T2T = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 8)];
+		    T5 = ii[WS(ios, 8)];
+		    T2 = W[14];
+		    T4 = W[15];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T2S = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 + T6;
+	       T37 = T2T - T2S;
+	       T1t = T1 - T6;
+	       T2U = T2S + T2T;
+	  }
+	  {
+	       E Tc, T1u, Th, T1v;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = ri[WS(ios, 4)];
+		    Tb = ii[WS(ios, 4)];
+		    T8 = W[6];
+		    Ta = W[7];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T1u = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 12)];
+		    Tg = ii[WS(ios, 12)];
+		    Td = W[22];
+		    Tf = W[23];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T1v = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Ti = Tc + Th;
+	       T38 = Tc - Th;
+	       T1w = T1u - T1v;
+	       T2R = T1u + T1v;
+	  }
+	  {
+	       E To, T1y, Tt, T1z, T1A, T1B;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = ri[WS(ios, 2)];
+		    Tn = ii[WS(ios, 2)];
+		    Tk = W[2];
+		    Tm = W[3];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    T1y = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = ri[WS(ios, 10)];
+		    Ts = ii[WS(ios, 10)];
+		    Tp = W[18];
+		    Tr = W[19];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    T1z = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       Tu = To + Tt;
+	       T2s = T1y + T1z;
+	       T1A = T1y - T1z;
+	       T1B = To - Tt;
+	       T1C = T1A - T1B;
+	       T2c = T1B + T1A;
+	  }
+	  {
+	       E Tz, T1E, TE, T1F, T1D, T1G;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 14)];
+		    Ty = ii[WS(ios, 14)];
+		    Tv = W[26];
+		    Tx = W[27];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T1E = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 6)];
+		    TD = ii[WS(ios, 6)];
+		    TA = W[10];
+		    TC = W[11];
+		    TE = FMA(TA, TB, TC * TD);
+		    T1F = FNMS(TC, TB, TA * TD);
+	       }
+	       TF = Tz + TE;
+	       T2t = T1E + T1F;
+	       T1D = Tz - TE;
+	       T1G = T1E - T1F;
+	       T1H = T1D + T1G;
+	       T2d = T1D - T1G;
+	  }
+	  {
+	       E T19, T20, T1p, T1X, T1e, T21, T1k, T1W;
+	       {
+		    E T16, T18, T15, T17;
+		    T16 = ri[WS(ios, 15)];
+		    T18 = ii[WS(ios, 15)];
+		    T15 = W[28];
+		    T17 = W[29];
+		    T19 = FMA(T15, T16, T17 * T18);
+		    T20 = FNMS(T17, T16, T15 * T18);
+	       }
+	       {
+		    E T1m, T1o, T1l, T1n;
+		    T1m = ri[WS(ios, 11)];
+		    T1o = ii[WS(ios, 11)];
+		    T1l = W[20];
+		    T1n = W[21];
+		    T1p = FMA(T1l, T1m, T1n * T1o);
+		    T1X = FNMS(T1n, T1m, T1l * T1o);
+	       }
+	       {
+		    E T1b, T1d, T1a, T1c;
+		    T1b = ri[WS(ios, 7)];
+		    T1d = ii[WS(ios, 7)];
+		    T1a = W[12];
+		    T1c = W[13];
+		    T1e = FMA(T1a, T1b, T1c * T1d);
+		    T21 = FNMS(T1c, T1b, T1a * T1d);
+	       }
+	       {
+		    E T1h, T1j, T1g, T1i;
+		    T1h = ri[WS(ios, 3)];
+		    T1j = ii[WS(ios, 3)];
+		    T1g = W[4];
+		    T1i = W[5];
+		    T1k = FMA(T1g, T1h, T1i * T1j);
+		    T1W = FNMS(T1i, T1h, T1g * T1j);
+	       }
+	       T1f = T19 + T1e;
+	       T1q = T1k + T1p;
+	       T2B = T1f - T1q;
+	       T2C = T20 + T21;
+	       T2D = T1W + T1X;
+	       T2E = T2C - T2D;
+	       {
+		    E T1V, T1Y, T22, T23;
+		    T1V = T19 - T1e;
+		    T1Y = T1W - T1X;
+		    T1Z = T1V - T1Y;
+		    T2j = T1V + T1Y;
+		    T22 = T20 - T21;
+		    T23 = T1k - T1p;
+		    T24 = T22 + T23;
+		    T2k = T22 - T23;
+	       }
+	  }
+	  {
+	       E TM, T1K, T12, T1R, TR, T1L, TX, T1Q;
+	       {
+		    E TJ, TL, TI, TK;
+		    TJ = ri[WS(ios, 1)];
+		    TL = ii[WS(ios, 1)];
+		    TI = W[0];
+		    TK = W[1];
+		    TM = FMA(TI, TJ, TK * TL);
+		    T1K = FNMS(TK, TJ, TI * TL);
+	       }
+	       {
+		    E TZ, T11, TY, T10;
+		    TZ = ri[WS(ios, 13)];
+		    T11 = ii[WS(ios, 13)];
+		    TY = W[24];
+		    T10 = W[25];
+		    T12 = FMA(TY, TZ, T10 * T11);
+		    T1R = FNMS(T10, TZ, TY * T11);
+	       }
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = ri[WS(ios, 9)];
+		    TQ = ii[WS(ios, 9)];
+		    TN = W[16];
+		    TP = W[17];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T1L = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E TU, TW, TT, TV;
+		    TU = ri[WS(ios, 5)];
+		    TW = ii[WS(ios, 5)];
+		    TT = W[8];
+		    TV = W[9];
+		    TX = FMA(TT, TU, TV * TW);
+		    T1Q = FNMS(TV, TU, TT * TW);
+	       }
+	       TS = TM + TR;
+	       T13 = TX + T12;
+	       T2w = TS - T13;
+	       T2x = T1K + T1L;
+	       T2y = T1Q + T1R;
+	       T2z = T2x - T2y;
+	       {
+		    E T1M, T1N, T1P, T1S;
+		    T1M = T1K - T1L;
+		    T1N = TX - T12;
+		    T1O = T1M + T1N;
+		    T2g = T1M - T1N;
+		    T1P = TM - TR;
+		    T1S = T1Q - T1R;
+		    T1T = T1P - T1S;
+		    T2h = T1P + T1S;
+	       }
+	  }
+	  {
+	       E T1J, T27, T3g, T3i, T26, T3h, T2a, T3d;
+	       {
+		    E T1x, T1I, T3e, T3f;
+		    T1x = T1t - T1w;
+		    T1I = KP707106781 * (T1C - T1H);
+		    T1J = T1x + T1I;
+		    T27 = T1x - T1I;
+		    T3e = KP707106781 * (T2d - T2c);
+		    T3f = T38 + T37;
+		    T3g = T3e + T3f;
+		    T3i = T3f - T3e;
+	       }
+	       {
+		    E T1U, T25, T28, T29;
+		    T1U = FMA(KP923879532, T1O, KP382683432 * T1T);
+		    T25 = FNMS(KP923879532, T24, KP382683432 * T1Z);
+		    T26 = T1U + T25;
+		    T3h = T25 - T1U;
+		    T28 = FNMS(KP923879532, T1T, KP382683432 * T1O);
+		    T29 = FMA(KP382683432, T24, KP923879532 * T1Z);
+		    T2a = T28 - T29;
+		    T3d = T28 + T29;
+	       }
+	       ri[WS(ios, 11)] = T1J - T26;
+	       ii[WS(ios, 11)] = T3g - T3d;
+	       ri[WS(ios, 3)] = T1J + T26;
+	       ii[WS(ios, 3)] = T3d + T3g;
+	       ri[WS(ios, 15)] = T27 - T2a;
+	       ii[WS(ios, 15)] = T3i - T3h;
+	       ri[WS(ios, 7)] = T27 + T2a;
+	       ii[WS(ios, 7)] = T3h + T3i;
+	  }
+	  {
+	       E T2v, T2H, T32, T34, T2G, T33, T2K, T2Z;
+	       {
+		    E T2r, T2u, T30, T31;
+		    T2r = T7 - Ti;
+		    T2u = T2s - T2t;
+		    T2v = T2r + T2u;
+		    T2H = T2r - T2u;
+		    T30 = TF - Tu;
+		    T31 = T2U - T2R;
+		    T32 = T30 + T31;
+		    T34 = T31 - T30;
+	       }
+	       {
+		    E T2A, T2F, T2I, T2J;
+		    T2A = T2w + T2z;
+		    T2F = T2B - T2E;
+		    T2G = KP707106781 * (T2A + T2F);
+		    T33 = KP707106781 * (T2F - T2A);
+		    T2I = T2z - T2w;
+		    T2J = T2B + T2E;
+		    T2K = KP707106781 * (T2I - T2J);
+		    T2Z = KP707106781 * (T2I + T2J);
+	       }
+	       ri[WS(ios, 10)] = T2v - T2G;
+	       ii[WS(ios, 10)] = T32 - T2Z;
+	       ri[WS(ios, 2)] = T2v + T2G;
+	       ii[WS(ios, 2)] = T2Z + T32;
+	       ri[WS(ios, 14)] = T2H - T2K;
+	       ii[WS(ios, 14)] = T34 - T33;
+	       ri[WS(ios, 6)] = T2H + T2K;
+	       ii[WS(ios, 6)] = T33 + T34;
+	  }
+	  {
+	       E T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35;
+	       {
+		    E T2b, T2e, T36, T39;
+		    T2b = T1t + T1w;
+		    T2e = KP707106781 * (T2c + T2d);
+		    T2f = T2b + T2e;
+		    T2n = T2b - T2e;
+		    T36 = KP707106781 * (T1C + T1H);
+		    T39 = T37 - T38;
+		    T3a = T36 + T39;
+		    T3c = T39 - T36;
+	       }
+	       {
+		    E T2i, T2l, T2o, T2p;
+		    T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
+		    T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
+		    T2m = T2i + T2l;
+		    T3b = T2l - T2i;
+		    T2o = FNMS(KP382683432, T2h, KP923879532 * T2g);
+		    T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
+		    T2q = T2o - T2p;
+		    T35 = T2o + T2p;
+	       }
+	       ri[WS(ios, 9)] = T2f - T2m;
+	       ii[WS(ios, 9)] = T3a - T35;
+	       ri[WS(ios, 1)] = T2f + T2m;
+	       ii[WS(ios, 1)] = T35 + T3a;
+	       ri[WS(ios, 13)] = T2n - T2q;
+	       ii[WS(ios, 13)] = T3c - T3b;
+	       ri[WS(ios, 5)] = T2n + T2q;
+	       ii[WS(ios, 5)] = T3b + T3c;
+	  }
+	  {
+	       E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
+	       {
+		    E Tj, TG, T2Q, T2V;
+		    Tj = T7 + Ti;
+		    TG = Tu + TF;
+		    TH = Tj + TG;
+		    T2L = Tj - TG;
+		    T2Q = T2s + T2t;
+		    T2V = T2R + T2U;
+		    T2W = T2Q + T2V;
+		    T2Y = T2V - T2Q;
+	       }
+	       {
+		    E T14, T1r, T2M, T2N;
+		    T14 = TS + T13;
+		    T1r = T1f + T1q;
+		    T1s = T14 + T1r;
+		    T2X = T1r - T14;
+		    T2M = T2x + T2y;
+		    T2N = T2C + T2D;
+		    T2O = T2M - T2N;
+		    T2P = T2M + T2N;
+	       }
+	       ri[WS(ios, 8)] = TH - T1s;
+	       ii[WS(ios, 8)] = T2W - T2P;
+	       ri[0] = TH + T1s;
+	       ii[0] = T2P + T2W;
+	       ri[WS(ios, 12)] = T2L - T2O;
+	       ii[WS(ios, 12)] = T2Y - T2X;
+	       ri[WS(ios, 4)] = T2L + T2O;
+	       ii[WS(ios, 4)] = T2X + T2Y;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 16},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 16, "t1_16", twinstr, {136, 46, 38, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_16) (planner *p) {
+     X(kdft_dit_register) (p, t1_16, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_2.c b/src/fftw3/dft/codelets/standard/t1_2.c
new file mode 100644
index 0000000..9213d04
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_2.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:51 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 2 -name t1_2 -include t.h */
+
+/*
+ * This function contains 6 FP additions, 4 FP multiplications,
+ * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
+ * 9 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_2.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_2.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_2.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_2(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 2) {
+	  E T1, T8, T6, T7;
+	  T1 = ri[0];
+	  T8 = ii[0];
+	  {
+	       E T3, T5, T2, T4;
+	       T3 = ri[WS(ios, 1)];
+	       T5 = ii[WS(ios, 1)];
+	       T2 = W[0];
+	       T4 = W[1];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       T7 = FNMS(T4, T3, T2 * T5);
+	  }
+	  ri[WS(ios, 1)] = T1 - T6;
+	  ii[WS(ios, 1)] = T8 - T7;
+	  ri[0] = T1 + T6;
+	  ii[0] = T7 + T8;
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 2},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 2, "t1_2", twinstr, {4, 2, 2, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_2) (planner *p) {
+     X(kdft_dit_register) (p, t1_2, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_3.c b/src/fftw3/dft/codelets/standard/t1_3.c
new file mode 100644
index 0000000..e8cfbc6
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_3.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:51 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 3 -name t1_3 -include t.h */
+
+/*
+ * This function contains 16 FP additions, 12 FP multiplications,
+ * (or, 10 additions, 6 multiplications, 6 fused multiply/add),
+ * 15 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_3.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_3.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_3.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_3(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 4) {
+	  E T1, Ti, T6, Te, Tb, Tf, Tc, Th;
+	  T1 = ri[0];
+	  Ti = ii[0];
+	  {
+	       E T3, T5, T2, T4;
+	       T3 = ri[WS(ios, 1)];
+	       T5 = ii[WS(ios, 1)];
+	       T2 = W[0];
+	       T4 = W[1];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       Te = FNMS(T4, T3, T2 * T5);
+	  }
+	  {
+	       E T8, Ta, T7, T9;
+	       T8 = ri[WS(ios, 2)];
+	       Ta = ii[WS(ios, 2)];
+	       T7 = W[2];
+	       T9 = W[3];
+	       Tb = FMA(T7, T8, T9 * Ta);
+	       Tf = FNMS(T9, T8, T7 * Ta);
+	  }
+	  Tc = T6 + Tb;
+	  Th = Te + Tf;
+	  ri[0] = T1 + Tc;
+	  ii[0] = Th + Ti;
+	  {
+	       E Td, Tg, Tj, Tk;
+	       Td = FNMS(KP500000000, Tc, T1);
+	       Tg = KP866025403 * (Te - Tf);
+	       ri[WS(ios, 2)] = Td - Tg;
+	       ri[WS(ios, 1)] = Td + Tg;
+	       Tj = KP866025403 * (Tb - T6);
+	       Tk = FNMS(KP500000000, Th, Ti);
+	       ii[WS(ios, 1)] = Tj + Tk;
+	       ii[WS(ios, 2)] = Tk - Tj;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 3},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 3, "t1_3", twinstr, {10, 6, 6, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_3) (planner *p) {
+     X(kdft_dit_register) (p, t1_3, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_32.c b/src/fftw3/dft/codelets/standard/t1_32.c
new file mode 100644
index 0000000..387b955
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_32.c
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:07 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 32 -name t1_32 -include t.h */
+
+/*
+ * This function contains 434 FP additions, 208 FP multiplications,
+ * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
+ * 96 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_32(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 62) {
+	  E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41;
+	  E T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U;
+	  E T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x;
+	  E T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P;
+	  E T4m, T5h, T4v, T5e;
+	  {
+	       E T1, T76, T6, T75, Tc, T32, Th, T33;
+	       T1 = ri[0];
+	       T76 = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 16)];
+		    T5 = ii[WS(ios, 16)];
+		    T2 = W[30];
+		    T4 = W[31];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T75 = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = ri[WS(ios, 8)];
+		    Tb = ii[WS(ios, 8)];
+		    T8 = W[14];
+		    Ta = W[15];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T32 = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 24)];
+		    Tg = ii[WS(ios, 24)];
+		    Td = W[46];
+		    Tf = W[47];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T33 = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E T7, Ti, T7A, T7B;
+		    T7 = T1 + T6;
+		    Ti = Tc + Th;
+		    Tj = T7 + Ti;
+		    T5F = T7 - Ti;
+		    T7A = T76 - T75;
+		    T7B = Tc - Th;
+		    T7C = T7A - T7B;
+		    T7Q = T7B + T7A;
+	       }
+	       {
+		    E T31, T34, T74, T77;
+		    T31 = T1 - T6;
+		    T34 = T32 - T33;
+		    T35 = T31 - T34;
+		    T4T = T31 + T34;
+		    T74 = T32 + T33;
+		    T77 = T75 + T76;
+		    T78 = T74 + T77;
+		    T7m = T77 - T74;
+	       }
+	  }
+	  {
+	       E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y;
+	       {
+		    E T1v, T1x, T1u, T1w;
+		    T1v = ri[WS(ios, 1)];
+		    T1x = ii[WS(ios, 1)];
+		    T1u = W[0];
+		    T1w = W[1];
+		    T1y = FMA(T1u, T1v, T1w * T1x);
+		    T3G = FNMS(T1w, T1v, T1u * T1x);
+	       }
+	       {
+		    E T1L, T1N, T1K, T1M;
+		    T1L = ri[WS(ios, 25)];
+		    T1N = ii[WS(ios, 25)];
+		    T1K = W[48];
+		    T1M = W[49];
+		    T1O = FMA(T1K, T1L, T1M * T1N);
+		    T3Z = FNMS(T1M, T1L, T1K * T1N);
+	       }
+	       {
+		    E T1A, T1C, T1z, T1B;
+		    T1A = ri[WS(ios, 17)];
+		    T1C = ii[WS(ios, 17)];
+		    T1z = W[32];
+		    T1B = W[33];
+		    T1D = FMA(T1z, T1A, T1B * T1C);
+		    T3H = FNMS(T1B, T1A, T1z * T1C);
+	       }
+	       {
+		    E T1G, T1I, T1F, T1H;
+		    T1G = ri[WS(ios, 9)];
+		    T1I = ii[WS(ios, 9)];
+		    T1F = W[16];
+		    T1H = W[17];
+		    T1J = FMA(T1F, T1G, T1H * T1I);
+		    T3Y = FNMS(T1H, T1G, T1F * T1I);
+	       }
+	       {
+		    E T1E, T1P, T5W, T5X;
+		    T1E = T1y + T1D;
+		    T1P = T1J + T1O;
+		    T1Q = T1E + T1P;
+		    T61 = T1E - T1P;
+		    T5W = T3G + T3H;
+		    T5X = T3Y + T3Z;
+		    T5Y = T5W - T5X;
+		    T6J = T5W + T5X;
+	       }
+	       {
+		    E T3I, T3J, T3X, T40;
+		    T3I = T3G - T3H;
+		    T3J = T1J - T1O;
+		    T3K = T3I + T3J;
+		    T59 = T3I - T3J;
+		    T3X = T1y - T1D;
+		    T40 = T3Y - T3Z;
+		    T41 = T3X - T40;
+		    T56 = T3X + T40;
+	       }
+	  }
+	  {
+	       E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48;
+	       {
+		    E T2g, T2i, T2f, T2h;
+		    T2g = ri[WS(ios, 31)];
+		    T2i = ii[WS(ios, 31)];
+		    T2f = W[60];
+		    T2h = W[61];
+		    T2j = FMA(T2f, T2g, T2h * T2i);
+		    T4o = FNMS(T2h, T2g, T2f * T2i);
+	       }
+	       {
+		    E T2w, T2y, T2v, T2x;
+		    T2w = ri[WS(ios, 23)];
+		    T2y = ii[WS(ios, 23)];
+		    T2v = W[44];
+		    T2x = W[45];
+		    T2z = FMA(T2v, T2w, T2x * T2y);
+		    T49 = FNMS(T2x, T2w, T2v * T2y);
+	       }
+	       {
+		    E T2l, T2n, T2k, T2m;
+		    T2l = ri[WS(ios, 15)];
+		    T2n = ii[WS(ios, 15)];
+		    T2k = W[28];
+		    T2m = W[29];
+		    T2o = FMA(T2k, T2l, T2m * T2n);
+		    T4p = FNMS(T2m, T2l, T2k * T2n);
+	       }
+	       {
+		    E T2r, T2t, T2q, T2s;
+		    T2r = ri[WS(ios, 7)];
+		    T2t = ii[WS(ios, 7)];
+		    T2q = W[12];
+		    T2s = W[13];
+		    T2u = FMA(T2q, T2r, T2s * T2t);
+		    T48 = FNMS(T2s, T2r, T2q * T2t);
+	       }
+	       {
+		    E T2p, T2A, T6c, T6d;
+		    T2p = T2j + T2o;
+		    T2A = T2u + T2z;
+		    T2B = T2p + T2A;
+		    T67 = T2p - T2A;
+		    T6c = T4o + T4p;
+		    T6d = T48 + T49;
+		    T6e = T6c - T6d;
+		    T6O = T6c + T6d;
+	       }
+	       {
+		    E T47, T4a, T4q, T4r;
+		    T47 = T2j - T2o;
+		    T4a = T48 - T49;
+		    T4b = T47 - T4a;
+		    T5d = T47 + T4a;
+		    T4q = T4o - T4p;
+		    T4r = T2u - T2z;
+		    T4s = T4q + T4r;
+		    T5g = T4q - T4r;
+	       }
+	  }
+	  {
+	       E To, T36, TE, T3d, Tt, T37, Tz, T3c;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = ri[WS(ios, 4)];
+		    Tn = ii[WS(ios, 4)];
+		    Tk = W[6];
+		    Tm = W[7];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    T36 = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 12)];
+		    TD = ii[WS(ios, 12)];
+		    TA = W[22];
+		    TC = W[23];
+		    TE = FMA(TA, TB, TC * TD);
+		    T3d = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = ri[WS(ios, 20)];
+		    Ts = ii[WS(ios, 20)];
+		    Tp = W[38];
+		    Tr = W[39];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    T37 = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 28)];
+		    Ty = ii[WS(ios, 28)];
+		    Tv = W[54];
+		    Tx = W[55];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T3c = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E Tu, TF, T5G, T5H;
+		    Tu = To + Tt;
+		    TF = Tz + TE;
+		    TG = Tu + TF;
+		    T7l = TF - Tu;
+		    T5G = T36 + T37;
+		    T5H = T3c + T3d;
+		    T5I = T5G - T5H;
+		    T73 = T5G + T5H;
+	       }
+	       {
+		    E T38, T39, T3b, T3e;
+		    T38 = T36 - T37;
+		    T39 = To - Tt;
+		    T3a = T38 - T39;
+		    T4U = T39 + T38;
+		    T3b = Tz - TE;
+		    T3e = T3c - T3d;
+		    T3f = T3b + T3e;
+		    T4V = T3b - T3e;
+	       }
+	  }
+	  {
+	       E TM, T3i, T12, T3p, TR, T3j, TX, T3o;
+	       {
+		    E TJ, TL, TI, TK;
+		    TJ = ri[WS(ios, 2)];
+		    TL = ii[WS(ios, 2)];
+		    TI = W[2];
+		    TK = W[3];
+		    TM = FMA(TI, TJ, TK * TL);
+		    T3i = FNMS(TK, TJ, TI * TL);
+	       }
+	       {
+		    E TZ, T11, TY, T10;
+		    TZ = ri[WS(ios, 26)];
+		    T11 = ii[WS(ios, 26)];
+		    TY = W[50];
+		    T10 = W[51];
+		    T12 = FMA(TY, TZ, T10 * T11);
+		    T3p = FNMS(T10, TZ, TY * T11);
+	       }
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = ri[WS(ios, 18)];
+		    TQ = ii[WS(ios, 18)];
+		    TN = W[34];
+		    TP = W[35];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T3j = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E TU, TW, TT, TV;
+		    TU = ri[WS(ios, 10)];
+		    TW = ii[WS(ios, 10)];
+		    TT = W[18];
+		    TV = W[19];
+		    TX = FMA(TT, TU, TV * TW);
+		    T3o = FNMS(TV, TU, TT * TW);
+	       }
+	       {
+		    E TS, T13, T5K, T5L;
+		    TS = TM + TR;
+		    T13 = TX + T12;
+		    T14 = TS + T13;
+		    T5N = TS - T13;
+		    T5K = T3i + T3j;
+		    T5L = T3o + T3p;
+		    T5M = T5K - T5L;
+		    T6E = T5K + T5L;
+	       }
+	       {
+		    E T3k, T3l, T3n, T3q;
+		    T3k = T3i - T3j;
+		    T3l = TX - T12;
+		    T3m = T3k + T3l;
+		    T4Y = T3k - T3l;
+		    T3n = TM - TR;
+		    T3q = T3o - T3p;
+		    T3r = T3n - T3q;
+		    T4Z = T3n + T3q;
+	       }
+	  }
+	  {
+	       E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
+	       {
+		    E T16, T18, T15, T17;
+		    T16 = ri[WS(ios, 30)];
+		    T18 = ii[WS(ios, 30)];
+		    T15 = W[58];
+		    T17 = W[59];
+		    T19 = FMA(T15, T16, T17 * T18);
+		    T3t = FNMS(T17, T16, T15 * T18);
+	       }
+	       {
+		    E T1m, T1o, T1l, T1n;
+		    T1m = ri[WS(ios, 22)];
+		    T1o = ii[WS(ios, 22)];
+		    T1l = W[42];
+		    T1n = W[43];
+		    T1p = FMA(T1l, T1m, T1n * T1o);
+		    T3A = FNMS(T1n, T1m, T1l * T1o);
+	       }
+	       {
+		    E T1b, T1d, T1a, T1c;
+		    T1b = ri[WS(ios, 14)];
+		    T1d = ii[WS(ios, 14)];
+		    T1a = W[26];
+		    T1c = W[27];
+		    T1e = FMA(T1a, T1b, T1c * T1d);
+		    T3u = FNMS(T1c, T1b, T1a * T1d);
+	       }
+	       {
+		    E T1h, T1j, T1g, T1i;
+		    T1h = ri[WS(ios, 6)];
+		    T1j = ii[WS(ios, 6)];
+		    T1g = W[10];
+		    T1i = W[11];
+		    T1k = FMA(T1g, T1h, T1i * T1j);
+		    T3z = FNMS(T1i, T1h, T1g * T1j);
+	       }
+	       {
+		    E T1f, T1q, T5Q, T5R;
+		    T1f = T19 + T1e;
+		    T1q = T1k + T1p;
+		    T1r = T1f + T1q;
+		    T5P = T1f - T1q;
+		    T5Q = T3t + T3u;
+		    T5R = T3z + T3A;
+		    T5S = T5Q - T5R;
+		    T6F = T5Q + T5R;
+	       }
+	       {
+		    E T3v, T3w, T3y, T3B;
+		    T3v = T3t - T3u;
+		    T3w = T1k - T1p;
+		    T3x = T3v + T3w;
+		    T51 = T3v - T3w;
+		    T3y = T19 - T1e;
+		    T3B = T3z - T3A;
+		    T3C = T3y - T3B;
+		    T52 = T3y + T3B;
+	       }
+	  }
+	  {
+	       E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O;
+	       {
+		    E T1S, T1U, T1R, T1T;
+		    T1S = ri[WS(ios, 5)];
+		    T1U = ii[WS(ios, 5)];
+		    T1R = W[8];
+		    T1T = W[9];
+		    T1V = FMA(T1R, T1S, T1T * T1U);
+		    T3R = FNMS(T1T, T1S, T1R * T1U);
+	       }
+	       {
+		    E T1X, T1Z, T1W, T1Y;
+		    T1X = ri[WS(ios, 21)];
+		    T1Z = ii[WS(ios, 21)];
+		    T1W = W[40];
+		    T1Y = W[41];
+		    T20 = FMA(T1W, T1X, T1Y * T1Z);
+		    T3S = FNMS(T1Y, T1X, T1W * T1Z);
+	       }
+	       T3Q = T1V - T20;
+	       T3T = T3R - T3S;
+	       {
+		    E T23, T25, T22, T24;
+		    T23 = ri[WS(ios, 29)];
+		    T25 = ii[WS(ios, 29)];
+		    T22 = W[56];
+		    T24 = W[57];
+		    T26 = FMA(T22, T23, T24 * T25);
+		    T3M = FNMS(T24, T23, T22 * T25);
+	       }
+	       {
+		    E T28, T2a, T27, T29;
+		    T28 = ri[WS(ios, 13)];
+		    T2a = ii[WS(ios, 13)];
+		    T27 = W[24];
+		    T29 = W[25];
+		    T2b = FMA(T27, T28, T29 * T2a);
+		    T3N = FNMS(T29, T28, T27 * T2a);
+	       }
+	       T3L = T26 - T2b;
+	       T3O = T3M - T3N;
+	       {
+		    E T21, T2c, T62, T63;
+		    T21 = T1V + T20;
+		    T2c = T26 + T2b;
+		    T2d = T21 + T2c;
+		    T5Z = T2c - T21;
+		    T62 = T3R + T3S;
+		    T63 = T3M + T3N;
+		    T64 = T62 - T63;
+		    T6K = T62 + T63;
+	       }
+	       {
+		    E T3P, T3U, T42, T43;
+		    T3P = T3L - T3O;
+		    T3U = T3Q + T3T;
+		    T3V = KP707106781 * (T3P - T3U);
+		    T57 = KP707106781 * (T3U + T3P);
+		    T42 = T3T - T3Q;
+		    T43 = T3L + T3O;
+		    T44 = KP707106781 * (T42 - T43);
+		    T5a = KP707106781 * (T42 + T43);
+	       }
+	  }
+	  {
+	       E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k;
+	       {
+		    E T2D, T2F, T2C, T2E;
+		    T2D = ri[WS(ios, 3)];
+		    T2F = ii[WS(ios, 3)];
+		    T2C = W[4];
+		    T2E = W[5];
+		    T2G = FMA(T2C, T2D, T2E * T2F);
+		    T4c = FNMS(T2E, T2D, T2C * T2F);
+	       }
+	       {
+		    E T2I, T2K, T2H, T2J;
+		    T2I = ri[WS(ios, 19)];
+		    T2K = ii[WS(ios, 19)];
+		    T2H = W[36];
+		    T2J = W[37];
+		    T2L = FMA(T2H, T2I, T2J * T2K);
+		    T4d = FNMS(T2J, T2I, T2H * T2K);
+	       }
+	       T4e = T4c - T4d;
+	       T4f = T2G - T2L;
+	       {
+		    E T2O, T2Q, T2N, T2P;
+		    T2O = ri[WS(ios, 27)];
+		    T2Q = ii[WS(ios, 27)];
+		    T2N = W[52];
+		    T2P = W[53];
+		    T2R = FMA(T2N, T2O, T2P * T2Q);
+		    T4i = FNMS(T2P, T2O, T2N * T2Q);
+	       }
+	       {
+		    E T2T, T2V, T2S, T2U;
+		    T2T = ri[WS(ios, 11)];
+		    T2V = ii[WS(ios, 11)];
+		    T2S = W[20];
+		    T2U = W[21];
+		    T2W = FMA(T2S, T2T, T2U * T2V);
+		    T4j = FNMS(T2U, T2T, T2S * T2V);
+	       }
+	       T4h = T2R - T2W;
+	       T4k = T4i - T4j;
+	       {
+		    E T2M, T2X, T68, T69;
+		    T2M = T2G + T2L;
+		    T2X = T2R + T2W;
+		    T2Y = T2M + T2X;
+		    T6f = T2X - T2M;
+		    T68 = T4c + T4d;
+		    T69 = T4i + T4j;
+		    T6a = T68 - T69;
+		    T6P = T68 + T69;
+	       }
+	       {
+		    E T4g, T4l, T4t, T4u;
+		    T4g = T4e - T4f;
+		    T4l = T4h + T4k;
+		    T4m = KP707106781 * (T4g - T4l);
+		    T5h = KP707106781 * (T4g + T4l);
+		    T4t = T4h - T4k;
+		    T4u = T4f + T4e;
+		    T4v = KP707106781 * (T4t - T4u);
+		    T5e = KP707106781 * (T4u + T4t);
+	       }
+	  }
+	  {
+	       E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
+	       {
+		    E TH, T1s, T72, T79;
+		    TH = Tj + TG;
+		    T1s = T14 + T1r;
+		    T1t = TH + T1s;
+		    T6X = TH - T1s;
+		    T72 = T6E + T6F;
+		    T79 = T73 + T78;
+		    T7a = T72 + T79;
+		    T7c = T79 - T72;
+	       }
+	       {
+		    E T2e, T2Z, T6Y, T6Z;
+		    T2e = T1Q + T2d;
+		    T2Z = T2B + T2Y;
+		    T30 = T2e + T2Z;
+		    T7b = T2Z - T2e;
+		    T6Y = T6J + T6K;
+		    T6Z = T6O + T6P;
+		    T70 = T6Y - T6Z;
+		    T71 = T6Y + T6Z;
+	       }
+	       ri[WS(ios, 16)] = T1t - T30;
+	       ii[WS(ios, 16)] = T7a - T71;
+	       ri[0] = T1t + T30;
+	       ii[0] = T71 + T7a;
+	       ri[WS(ios, 24)] = T6X - T70;
+	       ii[WS(ios, 24)] = T7c - T7b;
+	       ri[WS(ios, 8)] = T6X + T70;
+	       ii[WS(ios, 8)] = T7b + T7c;
+	  }
+	  {
+	       E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
+	       {
+		    E T6D, T6G, T7e, T7f;
+		    T6D = Tj - TG;
+		    T6G = T6E - T6F;
+		    T6H = T6D + T6G;
+		    T6T = T6D - T6G;
+		    T7e = T1r - T14;
+		    T7f = T78 - T73;
+		    T7g = T7e + T7f;
+		    T7i = T7f - T7e;
+	       }
+	       {
+		    E T6I, T6L, T6N, T6Q;
+		    T6I = T1Q - T2d;
+		    T6L = T6J - T6K;
+		    T6M = T6I + T6L;
+		    T6U = T6L - T6I;
+		    T6N = T2B - T2Y;
+		    T6Q = T6O - T6P;
+		    T6R = T6N - T6Q;
+		    T6V = T6N + T6Q;
+	       }
+	       {
+		    E T6S, T7d, T6W, T7h;
+		    T6S = KP707106781 * (T6M + T6R);
+		    ri[WS(ios, 20)] = T6H - T6S;
+		    ri[WS(ios, 4)] = T6H + T6S;
+		    T7d = KP707106781 * (T6U + T6V);
+		    ii[WS(ios, 4)] = T7d + T7g;
+		    ii[WS(ios, 20)] = T7g - T7d;
+		    T6W = KP707106781 * (T6U - T6V);
+		    ri[WS(ios, 28)] = T6T - T6W;
+		    ri[WS(ios, 12)] = T6T + T6W;
+		    T7h = KP707106781 * (T6R - T6M);
+		    ii[WS(ios, 12)] = T7h + T7i;
+		    ii[WS(ios, 28)] = T7i - T7h;
+	       }
+	  }
+	  {
+	       E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
+	       E T6l;
+	       {
+		    E T5O, T5T, T60, T65;
+		    T5J = T5F - T5I;
+		    T7n = T7l + T7m;
+		    T7t = T7m - T7l;
+		    T6n = T5F + T5I;
+		    T5O = T5M - T5N;
+		    T5T = T5P + T5S;
+		    T5U = KP707106781 * (T5O - T5T);
+		    T7k = KP707106781 * (T5O + T5T);
+		    {
+			 E T6v, T6w, T6o, T6p;
+			 T6v = T67 + T6a;
+			 T6w = T6e + T6f;
+			 T6x = FNMS(KP382683432, T6w, KP923879532 * T6v);
+			 T6B = FMA(KP923879532, T6w, KP382683432 * T6v);
+			 T6o = T5N + T5M;
+			 T6p = T5P - T5S;
+			 T6q = KP707106781 * (T6o + T6p);
+			 T7s = KP707106781 * (T6p - T6o);
+		    }
+		    T60 = T5Y - T5Z;
+		    T65 = T61 - T64;
+		    T66 = FMA(KP923879532, T60, KP382683432 * T65);
+		    T6k = FNMS(KP923879532, T65, KP382683432 * T60);
+		    {
+			 E T6s, T6t, T6b, T6g;
+			 T6s = T5Y + T5Z;
+			 T6t = T61 + T64;
+			 T6u = FMA(KP382683432, T6s, KP923879532 * T6t);
+			 T6A = FNMS(KP382683432, T6t, KP923879532 * T6s);
+			 T6b = T67 - T6a;
+			 T6g = T6e - T6f;
+			 T6h = FNMS(KP923879532, T6g, KP382683432 * T6b);
+			 T6l = FMA(KP382683432, T6g, KP923879532 * T6b);
+		    }
+	       }
+	       {
+		    E T5V, T6i, T7r, T7u;
+		    T5V = T5J + T5U;
+		    T6i = T66 + T6h;
+		    ri[WS(ios, 22)] = T5V - T6i;
+		    ri[WS(ios, 6)] = T5V + T6i;
+		    T7r = T6k + T6l;
+		    T7u = T7s + T7t;
+		    ii[WS(ios, 6)] = T7r + T7u;
+		    ii[WS(ios, 22)] = T7u - T7r;
+	       }
+	       {
+		    E T6j, T6m, T7v, T7w;
+		    T6j = T5J - T5U;
+		    T6m = T6k - T6l;
+		    ri[WS(ios, 30)] = T6j - T6m;
+		    ri[WS(ios, 14)] = T6j + T6m;
+		    T7v = T6h - T66;
+		    T7w = T7t - T7s;
+		    ii[WS(ios, 14)] = T7v + T7w;
+		    ii[WS(ios, 30)] = T7w - T7v;
+	       }
+	       {
+		    E T6r, T6y, T7j, T7o;
+		    T6r = T6n + T6q;
+		    T6y = T6u + T6x;
+		    ri[WS(ios, 18)] = T6r - T6y;
+		    ri[WS(ios, 2)] = T6r + T6y;
+		    T7j = T6A + T6B;
+		    T7o = T7k + T7n;
+		    ii[WS(ios, 2)] = T7j + T7o;
+		    ii[WS(ios, 18)] = T7o - T7j;
+	       }
+	       {
+		    E T6z, T6C, T7p, T7q;
+		    T6z = T6n - T6q;
+		    T6C = T6A - T6B;
+		    ri[WS(ios, 26)] = T6z - T6C;
+		    ri[WS(ios, 10)] = T6z + T6C;
+		    T7p = T6x - T6u;
+		    T7q = T7n - T7k;
+		    ii[WS(ios, 10)] = T7p + T7q;
+		    ii[WS(ios, 26)] = T7q - T7p;
+	       }
+	  }
+	  {
+	       E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
+	       E T4B, T3g, T7P;
+	       T3g = KP707106781 * (T3a - T3f);
+	       T3h = T35 - T3g;
+	       T4D = T35 + T3g;
+	       T7P = KP707106781 * (T4V - T4U);
+	       T7R = T7P + T7Q;
+	       T7X = T7Q - T7P;
+	       {
+		    E T3s, T3D, T4L, T4M;
+		    T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
+		    T3D = FMA(KP382683432, T3x, KP923879532 * T3C);
+		    T3E = T3s - T3D;
+		    T7O = T3s + T3D;
+		    T4L = T4b + T4m;
+		    T4M = T4s + T4v;
+		    T4N = FNMS(KP555570233, T4M, KP831469612 * T4L);
+		    T4R = FMA(KP831469612, T4M, KP555570233 * T4L);
+	       }
+	       {
+		    E T3W, T45, T4E, T4F;
+		    T3W = T3K - T3V;
+		    T45 = T41 - T44;
+		    T46 = FMA(KP980785280, T3W, KP195090322 * T45);
+		    T4A = FNMS(KP980785280, T45, KP195090322 * T3W);
+		    T4E = FMA(KP923879532, T3m, KP382683432 * T3r);
+		    T4F = FNMS(KP923879532, T3x, KP382683432 * T3C);
+		    T4G = T4E + T4F;
+		    T7W = T4F - T4E;
+	       }
+	       {
+		    E T4I, T4J, T4n, T4w;
+		    T4I = T3K + T3V;
+		    T4J = T41 + T44;
+		    T4K = FMA(KP555570233, T4I, KP831469612 * T4J);
+		    T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I);
+		    T4n = T4b - T4m;
+		    T4w = T4s - T4v;
+		    T4x = FNMS(KP980785280, T4w, KP195090322 * T4n);
+		    T4B = FMA(KP195090322, T4w, KP980785280 * T4n);
+	       }
+	       {
+		    E T3F, T4y, T7V, T7Y;
+		    T3F = T3h + T3E;
+		    T4y = T46 + T4x;
+		    ri[WS(ios, 23)] = T3F - T4y;
+		    ri[WS(ios, 7)] = T3F + T4y;
+		    T7V = T4A + T4B;
+		    T7Y = T7W + T7X;
+		    ii[WS(ios, 7)] = T7V + T7Y;
+		    ii[WS(ios, 23)] = T7Y - T7V;
+	       }
+	       {
+		    E T4z, T4C, T7Z, T80;
+		    T4z = T3h - T3E;
+		    T4C = T4A - T4B;
+		    ri[WS(ios, 31)] = T4z - T4C;
+		    ri[WS(ios, 15)] = T4z + T4C;
+		    T7Z = T4x - T46;
+		    T80 = T7X - T7W;
+		    ii[WS(ios, 15)] = T7Z + T80;
+		    ii[WS(ios, 31)] = T80 - T7Z;
+	       }
+	       {
+		    E T4H, T4O, T7N, T7S;
+		    T4H = T4D + T4G;
+		    T4O = T4K + T4N;
+		    ri[WS(ios, 19)] = T4H - T4O;
+		    ri[WS(ios, 3)] = T4H + T4O;
+		    T7N = T4Q + T4R;
+		    T7S = T7O + T7R;
+		    ii[WS(ios, 3)] = T7N + T7S;
+		    ii[WS(ios, 19)] = T7S - T7N;
+	       }
+	       {
+		    E T4P, T4S, T7T, T7U;
+		    T4P = T4D - T4G;
+		    T4S = T4Q - T4R;
+		    ri[WS(ios, 27)] = T4P - T4S;
+		    ri[WS(ios, 11)] = T4P + T4S;
+		    T7T = T4N - T4K;
+		    T7U = T7R - T7O;
+		    ii[WS(ios, 11)] = T7T + T7U;
+		    ii[WS(ios, 27)] = T7U - T7T;
+	       }
+	  }
+	  {
+	       E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
+	       E T5n, T4W, T7z;
+	       T4W = KP707106781 * (T4U + T4V);
+	       T4X = T4T - T4W;
+	       T5p = T4T + T4W;
+	       T7z = KP707106781 * (T3a + T3f);
+	       T7D = T7z + T7C;
+	       T7J = T7C - T7z;
+	       {
+		    E T50, T53, T5x, T5y;
+		    T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y);
+		    T53 = FMA(KP923879532, T51, KP382683432 * T52);
+		    T54 = T50 - T53;
+		    T7y = T50 + T53;
+		    T5x = T5d + T5e;
+		    T5y = T5g + T5h;
+		    T5z = FNMS(KP195090322, T5y, KP980785280 * T5x);
+		    T5D = FMA(KP195090322, T5x, KP980785280 * T5y);
+	       }
+	       {
+		    E T58, T5b, T5q, T5r;
+		    T58 = T56 - T57;
+		    T5b = T59 - T5a;
+		    T5c = FMA(KP555570233, T58, KP831469612 * T5b);
+		    T5m = FNMS(KP831469612, T58, KP555570233 * T5b);
+		    T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z);
+		    T5r = FNMS(KP382683432, T51, KP923879532 * T52);
+		    T5s = T5q + T5r;
+		    T7I = T5r - T5q;
+	       }
+	       {
+		    E T5u, T5v, T5f, T5i;
+		    T5u = T56 + T57;
+		    T5v = T59 + T5a;
+		    T5w = FMA(KP980785280, T5u, KP195090322 * T5v);
+		    T5C = FNMS(KP195090322, T5u, KP980785280 * T5v);
+		    T5f = T5d - T5e;
+		    T5i = T5g - T5h;
+		    T5j = FNMS(KP831469612, T5i, KP555570233 * T5f);
+		    T5n = FMA(KP831469612, T5f, KP555570233 * T5i);
+	       }
+	       {
+		    E T55, T5k, T7H, T7K;
+		    T55 = T4X + T54;
+		    T5k = T5c + T5j;
+		    ri[WS(ios, 21)] = T55 - T5k;
+		    ri[WS(ios, 5)] = T55 + T5k;
+		    T7H = T5m + T5n;
+		    T7K = T7I + T7J;
+		    ii[WS(ios, 5)] = T7H + T7K;
+		    ii[WS(ios, 21)] = T7K - T7H;
+	       }
+	       {
+		    E T5l, T5o, T7L, T7M;
+		    T5l = T4X - T54;
+		    T5o = T5m - T5n;
+		    ri[WS(ios, 29)] = T5l - T5o;
+		    ri[WS(ios, 13)] = T5l + T5o;
+		    T7L = T5j - T5c;
+		    T7M = T7J - T7I;
+		    ii[WS(ios, 13)] = T7L + T7M;
+		    ii[WS(ios, 29)] = T7M - T7L;
+	       }
+	       {
+		    E T5t, T5A, T7x, T7E;
+		    T5t = T5p + T5s;
+		    T5A = T5w + T5z;
+		    ri[WS(ios, 17)] = T5t - T5A;
+		    ri[WS(ios, 1)] = T5t + T5A;
+		    T7x = T5C + T5D;
+		    T7E = T7y + T7D;
+		    ii[WS(ios, 1)] = T7x + T7E;
+		    ii[WS(ios, 17)] = T7E - T7x;
+	       }
+	       {
+		    E T5B, T5E, T7F, T7G;
+		    T5B = T5p - T5s;
+		    T5E = T5C - T5D;
+		    ri[WS(ios, 25)] = T5B - T5E;
+		    ri[WS(ios, 9)] = T5B + T5E;
+		    T7F = T5z - T5w;
+		    T7G = T7D - T7y;
+		    ii[WS(ios, 9)] = T7F + T7G;
+		    ii[WS(ios, 25)] = T7G - T7F;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 32},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 32, "t1_32", twinstr, {340, 114, 94, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_32) (planner *p) {
+     X(kdft_dit_register) (p, t1_32, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_4.c b/src/fftw3/dft/codelets/standard/t1_4.c
new file mode 100644
index 0000000..cc869e4
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_4.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:52 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 4 -name t1_4 -include t.h */
+
+/*
+ * This function contains 22 FP additions, 12 FP multiplications,
+ * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
+ * 13 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_4(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 6) {
+	  E T1, Tp, T6, To, Tc, Tk, Th, Tl;
+	  T1 = ri[0];
+	  Tp = ii[0];
+	  {
+	       E T3, T5, T2, T4;
+	       T3 = ri[WS(ios, 2)];
+	       T5 = ii[WS(ios, 2)];
+	       T2 = W[2];
+	       T4 = W[3];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       To = FNMS(T4, T3, T2 * T5);
+	  }
+	  {
+	       E T9, Tb, T8, Ta;
+	       T9 = ri[WS(ios, 1)];
+	       Tb = ii[WS(ios, 1)];
+	       T8 = W[0];
+	       Ta = W[1];
+	       Tc = FMA(T8, T9, Ta * Tb);
+	       Tk = FNMS(Ta, T9, T8 * Tb);
+	  }
+	  {
+	       E Te, Tg, Td, Tf;
+	       Te = ri[WS(ios, 3)];
+	       Tg = ii[WS(ios, 3)];
+	       Td = W[4];
+	       Tf = W[5];
+	       Th = FMA(Td, Te, Tf * Tg);
+	       Tl = FNMS(Tf, Te, Td * Tg);
+	  }
+	  {
+	       E T7, Ti, Tn, Tq;
+	       T7 = T1 + T6;
+	       Ti = Tc + Th;
+	       ri[WS(ios, 2)] = T7 - Ti;
+	       ri[0] = T7 + Ti;
+	       Tn = Tk + Tl;
+	       Tq = To + Tp;
+	       ii[0] = Tn + Tq;
+	       ii[WS(ios, 2)] = Tq - Tn;
+	  }
+	  {
+	       E Tj, Tm, Tr, Ts;
+	       Tj = T1 - T6;
+	       Tm = Tk - Tl;
+	       ri[WS(ios, 3)] = Tj - Tm;
+	       ri[WS(ios, 1)] = Tj + Tm;
+	       Tr = Tp - To;
+	       Ts = Tc - Th;
+	       ii[WS(ios, 1)] = Tr - Ts;
+	       ii[WS(ios, 3)] = Ts + Tr;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 4},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 4, "t1_4", twinstr, {16, 6, 6, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_4) (planner *p) {
+     X(kdft_dit_register) (p, t1_4, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_5.c b/src/fftw3/dft/codelets/standard/t1_5.c
new file mode 100644
index 0000000..87feed7
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_5.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:52 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 5 -name t1_5 -include t.h */
+
+/*
+ * This function contains 40 FP additions, 28 FP multiplications,
+ * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
+ * 29 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_5.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_5.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_5.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_5(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 8) {
+	  E T1, TE, Tu, Tx, TJ, TI, TB, TC, TD, Tc, Tn, To;
+	  T1 = ri[0];
+	  TE = ii[0];
+	  {
+	       E T6, Ts, Tm, Tw, Tb, Tt, Th, Tv;
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 1)];
+		    T5 = ii[WS(ios, 1)];
+		    T2 = W[0];
+		    T4 = W[1];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    Ts = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E Tj, Tl, Ti, Tk;
+		    Tj = ri[WS(ios, 3)];
+		    Tl = ii[WS(ios, 3)];
+		    Ti = W[4];
+		    Tk = W[5];
+		    Tm = FMA(Ti, Tj, Tk * Tl);
+		    Tw = FNMS(Tk, Tj, Ti * Tl);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = ri[WS(ios, 4)];
+		    Ta = ii[WS(ios, 4)];
+		    T7 = W[6];
+		    T9 = W[7];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    Tt = FNMS(T9, T8, T7 * Ta);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 2)];
+		    Tg = ii[WS(ios, 2)];
+		    Td = W[2];
+		    Tf = W[3];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    Tv = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Tu = Ts - Tt;
+	       Tx = Tv - Tw;
+	       TJ = Th - Tm;
+	       TI = T6 - Tb;
+	       TB = Ts + Tt;
+	       TC = Tv + Tw;
+	       TD = TB + TC;
+	       Tc = T6 + Tb;
+	       Tn = Th + Tm;
+	       To = Tc + Tn;
+	  }
+	  ri[0] = T1 + To;
+	  ii[0] = TD + TE;
+	  {
+	       E Ty, TA, Tr, Tz, Tp, Tq;
+	       Ty = FMA(KP951056516, Tu, KP587785252 * Tx);
+	       TA = FNMS(KP587785252, Tu, KP951056516 * Tx);
+	       Tp = KP559016994 * (Tc - Tn);
+	       Tq = FNMS(KP250000000, To, T1);
+	       Tr = Tp + Tq;
+	       Tz = Tq - Tp;
+	       ri[WS(ios, 4)] = Tr - Ty;
+	       ri[WS(ios, 3)] = Tz + TA;
+	       ri[WS(ios, 1)] = Tr + Ty;
+	       ri[WS(ios, 2)] = Tz - TA;
+	  }
+	  {
+	       E TK, TL, TH, TM, TF, TG;
+	       TK = FMA(KP951056516, TI, KP587785252 * TJ);
+	       TL = FNMS(KP587785252, TI, KP951056516 * TJ);
+	       TF = KP559016994 * (TB - TC);
+	       TG = FNMS(KP250000000, TD, TE);
+	       TH = TF + TG;
+	       TM = TG - TF;
+	       ii[WS(ios, 1)] = TH - TK;
+	       ii[WS(ios, 3)] = TM - TL;
+	       ii[WS(ios, 4)] = TK + TH;
+	       ii[WS(ios, 2)] = TL + TM;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 5},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 5, "t1_5", twinstr, {26, 14, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_5) (planner *p) {
+     X(kdft_dit_register) (p, t1_5, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_6.c b/src/fftw3/dft/codelets/standard/t1_6.c
new file mode 100644
index 0000000..d474acd
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_6.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:55 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 6 -name t1_6 -include t.h */
+
+/*
+ * This function contains 46 FP additions, 28 FP multiplications,
+ * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
+ * 23 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_6.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_6.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_6.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_6(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 10) {
+	  E T7, TS, Tv, TO, Tt, TJ, Tx, TF, Ti, TI, Tw, TC;
+	  {
+	       E T1, TN, T6, TM;
+	       T1 = ri[0];
+	       TN = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 3)];
+		    T5 = ii[WS(ios, 3)];
+		    T2 = W[4];
+		    T4 = W[5];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TM = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 - T6;
+	       TS = TN - TM;
+	       Tv = T1 + T6;
+	       TO = TM + TN;
+	  }
+	  {
+	       E Tn, TD, Ts, TE;
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = ri[WS(ios, 4)];
+		    Tm = ii[WS(ios, 4)];
+		    Tj = W[6];
+		    Tl = W[7];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    TD = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = ri[WS(ios, 1)];
+		    Tr = ii[WS(ios, 1)];
+		    To = W[0];
+		    Tq = W[1];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    TE = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Tt = Tn - Ts;
+	       TJ = TD + TE;
+	       Tx = Tn + Ts;
+	       TF = TD - TE;
+	  }
+	  {
+	       E Tc, TA, Th, TB;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = ri[WS(ios, 2)];
+		    Tb = ii[WS(ios, 2)];
+		    T8 = W[2];
+		    Ta = W[3];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    TA = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 5)];
+		    Tg = ii[WS(ios, 5)];
+		    Td = W[8];
+		    Tf = W[9];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    TB = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Ti = Tc - Th;
+	       TI = TA + TB;
+	       Tw = Tc + Th;
+	       TC = TA - TB;
+	  }
+	  {
+	       E TG, Tu, Tz, TR, TT, TU;
+	       TG = KP866025403 * (TC - TF);
+	       Tu = Ti + Tt;
+	       Tz = FNMS(KP500000000, Tu, T7);
+	       ri[WS(ios, 3)] = T7 + Tu;
+	       ri[WS(ios, 1)] = Tz + TG;
+	       ri[WS(ios, 5)] = Tz - TG;
+	       TR = KP866025403 * (Tt - Ti);
+	       TT = TC + TF;
+	       TU = FNMS(KP500000000, TT, TS);
+	       ii[WS(ios, 1)] = TR + TU;
+	       ii[WS(ios, 3)] = TT + TS;
+	       ii[WS(ios, 5)] = TU - TR;
+	  }
+	  {
+	       E TK, Ty, TH, TQ, TL, TP;
+	       TK = KP866025403 * (TI - TJ);
+	       Ty = Tw + Tx;
+	       TH = FNMS(KP500000000, Ty, Tv);
+	       ri[0] = Tv + Ty;
+	       ri[WS(ios, 4)] = TH + TK;
+	       ri[WS(ios, 2)] = TH - TK;
+	       TQ = KP866025403 * (Tx - Tw);
+	       TL = TI + TJ;
+	       TP = FNMS(KP500000000, TL, TO);
+	       ii[0] = TL + TO;
+	       ii[WS(ios, 4)] = TQ + TP;
+	       ii[WS(ios, 2)] = TP - TQ;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 6},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 6, "t1_6", twinstr, {32, 14, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_6) (planner *p) {
+     X(kdft_dit_register) (p, t1_6, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_64.c b/src/fftw3/dft/codelets/standard/t1_64.c
new file mode 100644
index 0000000..a03697b
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_64.c
@@ -0,0 +1,2001 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:08 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 64 -name t1_64 -include t.h */
+
+/*
+ * This function contains 1038 FP additions, 500 FP multiplications,
+ * (or, 808 additions, 270 multiplications, 230 fused multiply/add),
+ * 176 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_64(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 126) {
+	  E Tj, TcL, ThT, Tin, T6b, Taz, TgT, Thn, TG, Thm, TcO, TgO, T6m, ThQ, TaC;
+	  E Tim, T14, Tfq, T6y, T9O, TaG, Tc0, TcU, TeE, T1r, Tfr, T6J, T9P, TaJ, Tc1;
+	  E TcZ, TeF, T1Q, T2d, Tfx, Tfu, Tfv, Tfw, T6Q, TaM, Tdb, TeJ, T71, TaQ, T7a;
+	  E TaN, Td6, TeI, T77, TaP, T2B, T2Y, Tfz, TfA, TfB, TfC, T7h, TaW, Tdm, TeM;
+	  E T7s, TaU, T7B, TaX, Tdh, TeL, T7y, TaT, T5j, TfR, Tec, Tf0, TfY, Tgy, T8D;
+	  E Tbl, T8O, Tbx, T9l, Tbm, TdV, TeX, T9i, Tbw, T3M, TfL, TdL, TeQ, TfI, Tgt;
+	  E T7K, Tb2, T7V, Tbe, T8s, Tb3, Tdu, TeT, T8p, Tbd, T4x, TfJ, TdE, TdM, TfO;
+	  E Tgu, T87, T8v, T8i, T8u, Tba, Tbg, Tdz, TdN, Tb7, Tbh, T64, TfZ, Te5, Ted;
+	  E TfU, Tgz, T90, T9o, T9b, T9n, Tbt, Tbz, Te0, Tee, Tbq, TbA;
+	  {
+	       E T1, TgR, T6, TgQ, Tc, T68, Th, T69;
+	       T1 = ri[0];
+	       TgR = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 32)];
+		    T5 = ii[WS(ios, 32)];
+		    T2 = W[62];
+		    T4 = W[63];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TgQ = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = ri[WS(ios, 16)];
+		    Tb = ii[WS(ios, 16)];
+		    T8 = W[30];
+		    Ta = W[31];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T68 = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 48)];
+		    Tg = ii[WS(ios, 48)];
+		    Td = W[94];
+		    Tf = W[95];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T69 = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E T7, Ti, ThR, ThS;
+		    T7 = T1 + T6;
+		    Ti = Tc + Th;
+		    Tj = T7 + Ti;
+		    TcL = T7 - Ti;
+		    ThR = TgR - TgQ;
+		    ThS = Tc - Th;
+		    ThT = ThR - ThS;
+		    Tin = ThS + ThR;
+	       }
+	       {
+		    E T67, T6a, TgP, TgS;
+		    T67 = T1 - T6;
+		    T6a = T68 - T69;
+		    T6b = T67 - T6a;
+		    Taz = T67 + T6a;
+		    TgP = T68 + T69;
+		    TgS = TgQ + TgR;
+		    TgT = TgP + TgS;
+		    Thn = TgS - TgP;
+	       }
+	  }
+	  {
+	       E To, T6c, Tt, T6d, T6e, T6f, Tz, T6i, TE, T6j, T6h, T6k;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = ri[WS(ios, 8)];
+		    Tn = ii[WS(ios, 8)];
+		    Tk = W[14];
+		    Tm = W[15];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    T6c = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = ri[WS(ios, 40)];
+		    Ts = ii[WS(ios, 40)];
+		    Tp = W[78];
+		    Tr = W[79];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    T6d = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       T6e = T6c - T6d;
+	       T6f = To - Tt;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 56)];
+		    Ty = ii[WS(ios, 56)];
+		    Tv = W[110];
+		    Tx = W[111];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T6i = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 24)];
+		    TD = ii[WS(ios, 24)];
+		    TA = W[46];
+		    TC = W[47];
+		    TE = FMA(TA, TB, TC * TD);
+		    T6j = FNMS(TC, TB, TA * TD);
+	       }
+	       T6h = Tz - TE;
+	       T6k = T6i - T6j;
+	       {
+		    E Tu, TF, TcM, TcN;
+		    Tu = To + Tt;
+		    TF = Tz + TE;
+		    TG = Tu + TF;
+		    Thm = TF - Tu;
+		    TcM = T6c + T6d;
+		    TcN = T6i + T6j;
+		    TcO = TcM - TcN;
+		    TgO = TcM + TcN;
+	       }
+	       {
+		    E T6g, T6l, TaA, TaB;
+		    T6g = T6e - T6f;
+		    T6l = T6h + T6k;
+		    T6m = KP707106781 * (T6g - T6l);
+		    ThQ = KP707106781 * (T6g + T6l);
+		    TaA = T6f + T6e;
+		    TaB = T6h - T6k;
+		    TaC = KP707106781 * (TaA + TaB);
+		    Tim = KP707106781 * (TaB - TaA);
+	       }
+	  }
+	  {
+	       E TS, TcQ, T6q, T6t, T13, TcR, T6r, T6w, T6s, T6x;
+	       {
+		    E TM, T6o, TR, T6p;
+		    {
+			 E TJ, TL, TI, TK;
+			 TJ = ri[WS(ios, 4)];
+			 TL = ii[WS(ios, 4)];
+			 TI = W[6];
+			 TK = W[7];
+			 TM = FMA(TI, TJ, TK * TL);
+			 T6o = FNMS(TK, TJ, TI * TL);
+		    }
+		    {
+			 E TO, TQ, TN, TP;
+			 TO = ri[WS(ios, 36)];
+			 TQ = ii[WS(ios, 36)];
+			 TN = W[70];
+			 TP = W[71];
+			 TR = FMA(TN, TO, TP * TQ);
+			 T6p = FNMS(TP, TO, TN * TQ);
+		    }
+		    TS = TM + TR;
+		    TcQ = T6o + T6p;
+		    T6q = T6o - T6p;
+		    T6t = TM - TR;
+	       }
+	       {
+		    E TX, T6u, T12, T6v;
+		    {
+			 E TU, TW, TT, TV;
+			 TU = ri[WS(ios, 20)];
+			 TW = ii[WS(ios, 20)];
+			 TT = W[38];
+			 TV = W[39];
+			 TX = FMA(TT, TU, TV * TW);
+			 T6u = FNMS(TV, TU, TT * TW);
+		    }
+		    {
+			 E TZ, T11, TY, T10;
+			 TZ = ri[WS(ios, 52)];
+			 T11 = ii[WS(ios, 52)];
+			 TY = W[102];
+			 T10 = W[103];
+			 T12 = FMA(TY, TZ, T10 * T11);
+			 T6v = FNMS(T10, TZ, TY * T11);
+		    }
+		    T13 = TX + T12;
+		    TcR = T6u + T6v;
+		    T6r = TX - T12;
+		    T6w = T6u - T6v;
+	       }
+	       T14 = TS + T13;
+	       Tfq = TcQ + TcR;
+	       T6s = T6q + T6r;
+	       T6x = T6t - T6w;
+	       T6y = FNMS(KP923879532, T6x, KP382683432 * T6s);
+	       T9O = FMA(KP923879532, T6s, KP382683432 * T6x);
+	       {
+		    E TaE, TaF, TcS, TcT;
+		    TaE = T6q - T6r;
+		    TaF = T6t + T6w;
+		    TaG = FNMS(KP382683432, TaF, KP923879532 * TaE);
+		    Tc0 = FMA(KP382683432, TaE, KP923879532 * TaF);
+		    TcS = TcQ - TcR;
+		    TcT = TS - T13;
+		    TcU = TcS - TcT;
+		    TeE = TcT + TcS;
+	       }
+	  }
+	  {
+	       E T1f, TcW, T6B, T6E, T1q, TcX, T6C, T6H, T6D, T6I;
+	       {
+		    E T19, T6z, T1e, T6A;
+		    {
+			 E T16, T18, T15, T17;
+			 T16 = ri[WS(ios, 60)];
+			 T18 = ii[WS(ios, 60)];
+			 T15 = W[118];
+			 T17 = W[119];
+			 T19 = FMA(T15, T16, T17 * T18);
+			 T6z = FNMS(T17, T16, T15 * T18);
+		    }
+		    {
+			 E T1b, T1d, T1a, T1c;
+			 T1b = ri[WS(ios, 28)];
+			 T1d = ii[WS(ios, 28)];
+			 T1a = W[54];
+			 T1c = W[55];
+			 T1e = FMA(T1a, T1b, T1c * T1d);
+			 T6A = FNMS(T1c, T1b, T1a * T1d);
+		    }
+		    T1f = T19 + T1e;
+		    TcW = T6z + T6A;
+		    T6B = T6z - T6A;
+		    T6E = T19 - T1e;
+	       }
+	       {
+		    E T1k, T6F, T1p, T6G;
+		    {
+			 E T1h, T1j, T1g, T1i;
+			 T1h = ri[WS(ios, 12)];
+			 T1j = ii[WS(ios, 12)];
+			 T1g = W[22];
+			 T1i = W[23];
+			 T1k = FMA(T1g, T1h, T1i * T1j);
+			 T6F = FNMS(T1i, T1h, T1g * T1j);
+		    }
+		    {
+			 E T1m, T1o, T1l, T1n;
+			 T1m = ri[WS(ios, 44)];
+			 T1o = ii[WS(ios, 44)];
+			 T1l = W[86];
+			 T1n = W[87];
+			 T1p = FMA(T1l, T1m, T1n * T1o);
+			 T6G = FNMS(T1n, T1m, T1l * T1o);
+		    }
+		    T1q = T1k + T1p;
+		    TcX = T6F + T6G;
+		    T6C = T1k - T1p;
+		    T6H = T6F - T6G;
+	       }
+	       T1r = T1f + T1q;
+	       Tfr = TcW + TcX;
+	       T6D = T6B + T6C;
+	       T6I = T6E - T6H;
+	       T6J = FMA(KP382683432, T6D, KP923879532 * T6I);
+	       T9P = FNMS(KP923879532, T6D, KP382683432 * T6I);
+	       {
+		    E TaH, TaI, TcV, TcY;
+		    TaH = T6B - T6C;
+		    TaI = T6E + T6H;
+		    TaJ = FMA(KP923879532, TaH, KP382683432 * TaI);
+		    Tc1 = FNMS(KP382683432, TaH, KP923879532 * TaI);
+		    TcV = T1f - T1q;
+		    TcY = TcW - TcX;
+		    TcZ = TcV + TcY;
+		    TeF = TcV - TcY;
+	       }
+	  }
+	  {
+	       E T1y, T6M, T1D, T6N, T1E, Td2, T1J, T74, T1O, T75, T1P, Td3, T21, Td8, T6W;
+	       E T6Z, T2c, Td9, T6R, T6U;
+	       {
+		    E T1v, T1x, T1u, T1w;
+		    T1v = ri[WS(ios, 2)];
+		    T1x = ii[WS(ios, 2)];
+		    T1u = W[2];
+		    T1w = W[3];
+		    T1y = FMA(T1u, T1v, T1w * T1x);
+		    T6M = FNMS(T1w, T1v, T1u * T1x);
+	       }
+	       {
+		    E T1A, T1C, T1z, T1B;
+		    T1A = ri[WS(ios, 34)];
+		    T1C = ii[WS(ios, 34)];
+		    T1z = W[66];
+		    T1B = W[67];
+		    T1D = FMA(T1z, T1A, T1B * T1C);
+		    T6N = FNMS(T1B, T1A, T1z * T1C);
+	       }
+	       T1E = T1y + T1D;
+	       Td2 = T6M + T6N;
+	       {
+		    E T1G, T1I, T1F, T1H;
+		    T1G = ri[WS(ios, 18)];
+		    T1I = ii[WS(ios, 18)];
+		    T1F = W[34];
+		    T1H = W[35];
+		    T1J = FMA(T1F, T1G, T1H * T1I);
+		    T74 = FNMS(T1H, T1G, T1F * T1I);
+	       }
+	       {
+		    E T1L, T1N, T1K, T1M;
+		    T1L = ri[WS(ios, 50)];
+		    T1N = ii[WS(ios, 50)];
+		    T1K = W[98];
+		    T1M = W[99];
+		    T1O = FMA(T1K, T1L, T1M * T1N);
+		    T75 = FNMS(T1M, T1L, T1K * T1N);
+	       }
+	       T1P = T1J + T1O;
+	       Td3 = T74 + T75;
+	       {
+		    E T1V, T6X, T20, T6Y;
+		    {
+			 E T1S, T1U, T1R, T1T;
+			 T1S = ri[WS(ios, 10)];
+			 T1U = ii[WS(ios, 10)];
+			 T1R = W[18];
+			 T1T = W[19];
+			 T1V = FMA(T1R, T1S, T1T * T1U);
+			 T6X = FNMS(T1T, T1S, T1R * T1U);
+		    }
+		    {
+			 E T1X, T1Z, T1W, T1Y;
+			 T1X = ri[WS(ios, 42)];
+			 T1Z = ii[WS(ios, 42)];
+			 T1W = W[82];
+			 T1Y = W[83];
+			 T20 = FMA(T1W, T1X, T1Y * T1Z);
+			 T6Y = FNMS(T1Y, T1X, T1W * T1Z);
+		    }
+		    T21 = T1V + T20;
+		    Td8 = T6X + T6Y;
+		    T6W = T1V - T20;
+		    T6Z = T6X - T6Y;
+	       }
+	       {
+		    E T26, T6S, T2b, T6T;
+		    {
+			 E T23, T25, T22, T24;
+			 T23 = ri[WS(ios, 58)];
+			 T25 = ii[WS(ios, 58)];
+			 T22 = W[114];
+			 T24 = W[115];
+			 T26 = FMA(T22, T23, T24 * T25);
+			 T6S = FNMS(T24, T23, T22 * T25);
+		    }
+		    {
+			 E T28, T2a, T27, T29;
+			 T28 = ri[WS(ios, 26)];
+			 T2a = ii[WS(ios, 26)];
+			 T27 = W[50];
+			 T29 = W[51];
+			 T2b = FMA(T27, T28, T29 * T2a);
+			 T6T = FNMS(T29, T28, T27 * T2a);
+		    }
+		    T2c = T26 + T2b;
+		    Td9 = T6S + T6T;
+		    T6R = T26 - T2b;
+		    T6U = T6S - T6T;
+	       }
+	       T1Q = T1E + T1P;
+	       T2d = T21 + T2c;
+	       Tfx = T1Q - T2d;
+	       Tfu = Td2 + Td3;
+	       Tfv = Td8 + Td9;
+	       Tfw = Tfu - Tfv;
+	       {
+		    E T6O, T6P, Td7, Tda;
+		    T6O = T6M - T6N;
+		    T6P = T1J - T1O;
+		    T6Q = T6O + T6P;
+		    TaM = T6O - T6P;
+		    Td7 = T1E - T1P;
+		    Tda = Td8 - Td9;
+		    Tdb = Td7 - Tda;
+		    TeJ = Td7 + Tda;
+	       }
+	       {
+		    E T6V, T70, T78, T79;
+		    T6V = T6R - T6U;
+		    T70 = T6W + T6Z;
+		    T71 = KP707106781 * (T6V - T70);
+		    TaQ = KP707106781 * (T70 + T6V);
+		    T78 = T6Z - T6W;
+		    T79 = T6R + T6U;
+		    T7a = KP707106781 * (T78 - T79);
+		    TaN = KP707106781 * (T78 + T79);
+	       }
+	       {
+		    E Td4, Td5, T73, T76;
+		    Td4 = Td2 - Td3;
+		    Td5 = T2c - T21;
+		    Td6 = Td4 - Td5;
+		    TeI = Td4 + Td5;
+		    T73 = T1y - T1D;
+		    T76 = T74 - T75;
+		    T77 = T73 - T76;
+		    TaP = T73 + T76;
+	       }
+	  }
+	  {
+	       E T2j, T7d, T2o, T7e, T2p, Tdd, T2u, T7v, T2z, T7w, T2A, Tde, T2M, Tdj, T7n;
+	       E T7q, T2X, Tdk, T7i, T7l;
+	       {
+		    E T2g, T2i, T2f, T2h;
+		    T2g = ri[WS(ios, 62)];
+		    T2i = ii[WS(ios, 62)];
+		    T2f = W[122];
+		    T2h = W[123];
+		    T2j = FMA(T2f, T2g, T2h * T2i);
+		    T7d = FNMS(T2h, T2g, T2f * T2i);
+	       }
+	       {
+		    E T2l, T2n, T2k, T2m;
+		    T2l = ri[WS(ios, 30)];
+		    T2n = ii[WS(ios, 30)];
+		    T2k = W[58];
+		    T2m = W[59];
+		    T2o = FMA(T2k, T2l, T2m * T2n);
+		    T7e = FNMS(T2m, T2l, T2k * T2n);
+	       }
+	       T2p = T2j + T2o;
+	       Tdd = T7d + T7e;
+	       {
+		    E T2r, T2t, T2q, T2s;
+		    T2r = ri[WS(ios, 14)];
+		    T2t = ii[WS(ios, 14)];
+		    T2q = W[26];
+		    T2s = W[27];
+		    T2u = FMA(T2q, T2r, T2s * T2t);
+		    T7v = FNMS(T2s, T2r, T2q * T2t);
+	       }
+	       {
+		    E T2w, T2y, T2v, T2x;
+		    T2w = ri[WS(ios, 46)];
+		    T2y = ii[WS(ios, 46)];
+		    T2v = W[90];
+		    T2x = W[91];
+		    T2z = FMA(T2v, T2w, T2x * T2y);
+		    T7w = FNMS(T2x, T2w, T2v * T2y);
+	       }
+	       T2A = T2u + T2z;
+	       Tde = T7v + T7w;
+	       {
+		    E T2G, T7o, T2L, T7p;
+		    {
+			 E T2D, T2F, T2C, T2E;
+			 T2D = ri[WS(ios, 6)];
+			 T2F = ii[WS(ios, 6)];
+			 T2C = W[10];
+			 T2E = W[11];
+			 T2G = FMA(T2C, T2D, T2E * T2F);
+			 T7o = FNMS(T2E, T2D, T2C * T2F);
+		    }
+		    {
+			 E T2I, T2K, T2H, T2J;
+			 T2I = ri[WS(ios, 38)];
+			 T2K = ii[WS(ios, 38)];
+			 T2H = W[74];
+			 T2J = W[75];
+			 T2L = FMA(T2H, T2I, T2J * T2K);
+			 T7p = FNMS(T2J, T2I, T2H * T2K);
+		    }
+		    T2M = T2G + T2L;
+		    Tdj = T7o + T7p;
+		    T7n = T2G - T2L;
+		    T7q = T7o - T7p;
+	       }
+	       {
+		    E T2R, T7j, T2W, T7k;
+		    {
+			 E T2O, T2Q, T2N, T2P;
+			 T2O = ri[WS(ios, 54)];
+			 T2Q = ii[WS(ios, 54)];
+			 T2N = W[106];
+			 T2P = W[107];
+			 T2R = FMA(T2N, T2O, T2P * T2Q);
+			 T7j = FNMS(T2P, T2O, T2N * T2Q);
+		    }
+		    {
+			 E T2T, T2V, T2S, T2U;
+			 T2T = ri[WS(ios, 22)];
+			 T2V = ii[WS(ios, 22)];
+			 T2S = W[42];
+			 T2U = W[43];
+			 T2W = FMA(T2S, T2T, T2U * T2V);
+			 T7k = FNMS(T2U, T2T, T2S * T2V);
+		    }
+		    T2X = T2R + T2W;
+		    Tdk = T7j + T7k;
+		    T7i = T2R - T2W;
+		    T7l = T7j - T7k;
+	       }
+	       T2B = T2p + T2A;
+	       T2Y = T2M + T2X;
+	       Tfz = T2B - T2Y;
+	       TfA = Tdd + Tde;
+	       TfB = Tdj + Tdk;
+	       TfC = TfA - TfB;
+	       {
+		    E T7f, T7g, Tdi, Tdl;
+		    T7f = T7d - T7e;
+		    T7g = T2u - T2z;
+		    T7h = T7f + T7g;
+		    TaW = T7f - T7g;
+		    Tdi = T2p - T2A;
+		    Tdl = Tdj - Tdk;
+		    Tdm = Tdi - Tdl;
+		    TeM = Tdi + Tdl;
+	       }
+	       {
+		    E T7m, T7r, T7z, T7A;
+		    T7m = T7i - T7l;
+		    T7r = T7n + T7q;
+		    T7s = KP707106781 * (T7m - T7r);
+		    TaU = KP707106781 * (T7r + T7m);
+		    T7z = T7q - T7n;
+		    T7A = T7i + T7l;
+		    T7B = KP707106781 * (T7z - T7A);
+		    TaX = KP707106781 * (T7z + T7A);
+	       }
+	       {
+		    E Tdf, Tdg, T7u, T7x;
+		    Tdf = Tdd - Tde;
+		    Tdg = T2X - T2M;
+		    Tdh = Tdf - Tdg;
+		    TeL = Tdf + Tdg;
+		    T7u = T2j - T2o;
+		    T7x = T7v - T7w;
+		    T7y = T7u - T7x;
+		    TaT = T7u + T7x;
+	       }
+	  }
+	  {
+	       E T4D, T9e, T4I, T9f, T4J, Te8, T4O, T8A, T4T, T8B, T4U, Te9, T56, TdS, T8G;
+	       E T8H, T5h, TdT, T8J, T8M;
+	       {
+		    E T4A, T4C, T4z, T4B;
+		    T4A = ri[WS(ios, 63)];
+		    T4C = ii[WS(ios, 63)];
+		    T4z = W[124];
+		    T4B = W[125];
+		    T4D = FMA(T4z, T4A, T4B * T4C);
+		    T9e = FNMS(T4B, T4A, T4z * T4C);
+	       }
+	       {
+		    E T4F, T4H, T4E, T4G;
+		    T4F = ri[WS(ios, 31)];
+		    T4H = ii[WS(ios, 31)];
+		    T4E = W[60];
+		    T4G = W[61];
+		    T4I = FMA(T4E, T4F, T4G * T4H);
+		    T9f = FNMS(T4G, T4F, T4E * T4H);
+	       }
+	       T4J = T4D + T4I;
+	       Te8 = T9e + T9f;
+	       {
+		    E T4L, T4N, T4K, T4M;
+		    T4L = ri[WS(ios, 15)];
+		    T4N = ii[WS(ios, 15)];
+		    T4K = W[28];
+		    T4M = W[29];
+		    T4O = FMA(T4K, T4L, T4M * T4N);
+		    T8A = FNMS(T4M, T4L, T4K * T4N);
+	       }
+	       {
+		    E T4Q, T4S, T4P, T4R;
+		    T4Q = ri[WS(ios, 47)];
+		    T4S = ii[WS(ios, 47)];
+		    T4P = W[92];
+		    T4R = W[93];
+		    T4T = FMA(T4P, T4Q, T4R * T4S);
+		    T8B = FNMS(T4R, T4Q, T4P * T4S);
+	       }
+	       T4U = T4O + T4T;
+	       Te9 = T8A + T8B;
+	       {
+		    E T50, T8E, T55, T8F;
+		    {
+			 E T4X, T4Z, T4W, T4Y;
+			 T4X = ri[WS(ios, 7)];
+			 T4Z = ii[WS(ios, 7)];
+			 T4W = W[12];
+			 T4Y = W[13];
+			 T50 = FMA(T4W, T4X, T4Y * T4Z);
+			 T8E = FNMS(T4Y, T4X, T4W * T4Z);
+		    }
+		    {
+			 E T52, T54, T51, T53;
+			 T52 = ri[WS(ios, 39)];
+			 T54 = ii[WS(ios, 39)];
+			 T51 = W[76];
+			 T53 = W[77];
+			 T55 = FMA(T51, T52, T53 * T54);
+			 T8F = FNMS(T53, T52, T51 * T54);
+		    }
+		    T56 = T50 + T55;
+		    TdS = T8E + T8F;
+		    T8G = T8E - T8F;
+		    T8H = T50 - T55;
+	       }
+	       {
+		    E T5b, T8K, T5g, T8L;
+		    {
+			 E T58, T5a, T57, T59;
+			 T58 = ri[WS(ios, 55)];
+			 T5a = ii[WS(ios, 55)];
+			 T57 = W[108];
+			 T59 = W[109];
+			 T5b = FMA(T57, T58, T59 * T5a);
+			 T8K = FNMS(T59, T58, T57 * T5a);
+		    }
+		    {
+			 E T5d, T5f, T5c, T5e;
+			 T5d = ri[WS(ios, 23)];
+			 T5f = ii[WS(ios, 23)];
+			 T5c = W[44];
+			 T5e = W[45];
+			 T5g = FMA(T5c, T5d, T5e * T5f);
+			 T8L = FNMS(T5e, T5d, T5c * T5f);
+		    }
+		    T5h = T5b + T5g;
+		    TdT = T8K + T8L;
+		    T8J = T5b - T5g;
+		    T8M = T8K - T8L;
+	       }
+	       {
+		    E T4V, T5i, Tea, Teb;
+		    T4V = T4J + T4U;
+		    T5i = T56 + T5h;
+		    T5j = T4V + T5i;
+		    TfR = T4V - T5i;
+		    Tea = Te8 - Te9;
+		    Teb = T5h - T56;
+		    Tec = Tea - Teb;
+		    Tf0 = Tea + Teb;
+	       }
+	       {
+		    E TfW, TfX, T8z, T8C;
+		    TfW = Te8 + Te9;
+		    TfX = TdS + TdT;
+		    TfY = TfW - TfX;
+		    Tgy = TfW + TfX;
+		    T8z = T4D - T4I;
+		    T8C = T8A - T8B;
+		    T8D = T8z - T8C;
+		    Tbl = T8z + T8C;
+	       }
+	       {
+		    E T8I, T8N, T9j, T9k;
+		    T8I = T8G - T8H;
+		    T8N = T8J + T8M;
+		    T8O = KP707106781 * (T8I - T8N);
+		    Tbx = KP707106781 * (T8I + T8N);
+		    T9j = T8J - T8M;
+		    T9k = T8H + T8G;
+		    T9l = KP707106781 * (T9j - T9k);
+		    Tbm = KP707106781 * (T9k + T9j);
+	       }
+	       {
+		    E TdR, TdU, T9g, T9h;
+		    TdR = T4J - T4U;
+		    TdU = TdS - TdT;
+		    TdV = TdR - TdU;
+		    TeX = TdR + TdU;
+		    T9g = T9e - T9f;
+		    T9h = T4O - T4T;
+		    T9i = T9g + T9h;
+		    Tbw = T9g - T9h;
+	       }
+	  }
+	  {
+	       E T36, T7G, T3b, T7H, T3c, Tdq, T3h, T8m, T3m, T8n, T3n, Tdr, T3z, TdI, T7Q;
+	       E T7T, T3K, TdJ, T7L, T7O;
+	       {
+		    E T33, T35, T32, T34;
+		    T33 = ri[WS(ios, 1)];
+		    T35 = ii[WS(ios, 1)];
+		    T32 = W[0];
+		    T34 = W[1];
+		    T36 = FMA(T32, T33, T34 * T35);
+		    T7G = FNMS(T34, T33, T32 * T35);
+	       }
+	       {
+		    E T38, T3a, T37, T39;
+		    T38 = ri[WS(ios, 33)];
+		    T3a = ii[WS(ios, 33)];
+		    T37 = W[64];
+		    T39 = W[65];
+		    T3b = FMA(T37, T38, T39 * T3a);
+		    T7H = FNMS(T39, T38, T37 * T3a);
+	       }
+	       T3c = T36 + T3b;
+	       Tdq = T7G + T7H;
+	       {
+		    E T3e, T3g, T3d, T3f;
+		    T3e = ri[WS(ios, 17)];
+		    T3g = ii[WS(ios, 17)];
+		    T3d = W[32];
+		    T3f = W[33];
+		    T3h = FMA(T3d, T3e, T3f * T3g);
+		    T8m = FNMS(T3f, T3e, T3d * T3g);
+	       }
+	       {
+		    E T3j, T3l, T3i, T3k;
+		    T3j = ri[WS(ios, 49)];
+		    T3l = ii[WS(ios, 49)];
+		    T3i = W[96];
+		    T3k = W[97];
+		    T3m = FMA(T3i, T3j, T3k * T3l);
+		    T8n = FNMS(T3k, T3j, T3i * T3l);
+	       }
+	       T3n = T3h + T3m;
+	       Tdr = T8m + T8n;
+	       {
+		    E T3t, T7R, T3y, T7S;
+		    {
+			 E T3q, T3s, T3p, T3r;
+			 T3q = ri[WS(ios, 9)];
+			 T3s = ii[WS(ios, 9)];
+			 T3p = W[16];
+			 T3r = W[17];
+			 T3t = FMA(T3p, T3q, T3r * T3s);
+			 T7R = FNMS(T3r, T3q, T3p * T3s);
+		    }
+		    {
+			 E T3v, T3x, T3u, T3w;
+			 T3v = ri[WS(ios, 41)];
+			 T3x = ii[WS(ios, 41)];
+			 T3u = W[80];
+			 T3w = W[81];
+			 T3y = FMA(T3u, T3v, T3w * T3x);
+			 T7S = FNMS(T3w, T3v, T3u * T3x);
+		    }
+		    T3z = T3t + T3y;
+		    TdI = T7R + T7S;
+		    T7Q = T3t - T3y;
+		    T7T = T7R - T7S;
+	       }
+	       {
+		    E T3E, T7M, T3J, T7N;
+		    {
+			 E T3B, T3D, T3A, T3C;
+			 T3B = ri[WS(ios, 57)];
+			 T3D = ii[WS(ios, 57)];
+			 T3A = W[112];
+			 T3C = W[113];
+			 T3E = FMA(T3A, T3B, T3C * T3D);
+			 T7M = FNMS(T3C, T3B, T3A * T3D);
+		    }
+		    {
+			 E T3G, T3I, T3F, T3H;
+			 T3G = ri[WS(ios, 25)];
+			 T3I = ii[WS(ios, 25)];
+			 T3F = W[48];
+			 T3H = W[49];
+			 T3J = FMA(T3F, T3G, T3H * T3I);
+			 T7N = FNMS(T3H, T3G, T3F * T3I);
+		    }
+		    T3K = T3E + T3J;
+		    TdJ = T7M + T7N;
+		    T7L = T3E - T3J;
+		    T7O = T7M - T7N;
+	       }
+	       {
+		    E T3o, T3L, TdH, TdK;
+		    T3o = T3c + T3n;
+		    T3L = T3z + T3K;
+		    T3M = T3o + T3L;
+		    TfL = T3o - T3L;
+		    TdH = T3c - T3n;
+		    TdK = TdI - TdJ;
+		    TdL = TdH - TdK;
+		    TeQ = TdH + TdK;
+	       }
+	       {
+		    E TfG, TfH, T7I, T7J;
+		    TfG = Tdq + Tdr;
+		    TfH = TdI + TdJ;
+		    TfI = TfG - TfH;
+		    Tgt = TfG + TfH;
+		    T7I = T7G - T7H;
+		    T7J = T3h - T3m;
+		    T7K = T7I + T7J;
+		    Tb2 = T7I - T7J;
+	       }
+	       {
+		    E T7P, T7U, T8q, T8r;
+		    T7P = T7L - T7O;
+		    T7U = T7Q + T7T;
+		    T7V = KP707106781 * (T7P - T7U);
+		    Tbe = KP707106781 * (T7U + T7P);
+		    T8q = T7T - T7Q;
+		    T8r = T7L + T7O;
+		    T8s = KP707106781 * (T8q - T8r);
+		    Tb3 = KP707106781 * (T8q + T8r);
+	       }
+	       {
+		    E Tds, Tdt, T8l, T8o;
+		    Tds = Tdq - Tdr;
+		    Tdt = T3K - T3z;
+		    Tdu = Tds - Tdt;
+		    TeT = Tds + Tdt;
+		    T8l = T36 - T3b;
+		    T8o = T8m - T8n;
+		    T8p = T8l - T8o;
+		    Tbd = T8l + T8o;
+	       }
+	  }
+	  {
+	       E T3X, TdB, T8a, T8d, T4v, Tdx, T80, T85, T48, TdC, T8b, T8g, T4k, Tdw, T7X;
+	       E T84;
+	       {
+		    E T3R, T88, T3W, T89;
+		    {
+			 E T3O, T3Q, T3N, T3P;
+			 T3O = ri[WS(ios, 5)];
+			 T3Q = ii[WS(ios, 5)];
+			 T3N = W[8];
+			 T3P = W[9];
+			 T3R = FMA(T3N, T3O, T3P * T3Q);
+			 T88 = FNMS(T3P, T3O, T3N * T3Q);
+		    }
+		    {
+			 E T3T, T3V, T3S, T3U;
+			 T3T = ri[WS(ios, 37)];
+			 T3V = ii[WS(ios, 37)];
+			 T3S = W[72];
+			 T3U = W[73];
+			 T3W = FMA(T3S, T3T, T3U * T3V);
+			 T89 = FNMS(T3U, T3T, T3S * T3V);
+		    }
+		    T3X = T3R + T3W;
+		    TdB = T88 + T89;
+		    T8a = T88 - T89;
+		    T8d = T3R - T3W;
+	       }
+	       {
+		    E T4p, T7Y, T4u, T7Z;
+		    {
+			 E T4m, T4o, T4l, T4n;
+			 T4m = ri[WS(ios, 13)];
+			 T4o = ii[WS(ios, 13)];
+			 T4l = W[24];
+			 T4n = W[25];
+			 T4p = FMA(T4l, T4m, T4n * T4o);
+			 T7Y = FNMS(T4n, T4m, T4l * T4o);
+		    }
+		    {
+			 E T4r, T4t, T4q, T4s;
+			 T4r = ri[WS(ios, 45)];
+			 T4t = ii[WS(ios, 45)];
+			 T4q = W[88];
+			 T4s = W[89];
+			 T4u = FMA(T4q, T4r, T4s * T4t);
+			 T7Z = FNMS(T4s, T4r, T4q * T4t);
+		    }
+		    T4v = T4p + T4u;
+		    Tdx = T7Y + T7Z;
+		    T80 = T7Y - T7Z;
+		    T85 = T4p - T4u;
+	       }
+	       {
+		    E T42, T8e, T47, T8f;
+		    {
+			 E T3Z, T41, T3Y, T40;
+			 T3Z = ri[WS(ios, 21)];
+			 T41 = ii[WS(ios, 21)];
+			 T3Y = W[40];
+			 T40 = W[41];
+			 T42 = FMA(T3Y, T3Z, T40 * T41);
+			 T8e = FNMS(T40, T3Z, T3Y * T41);
+		    }
+		    {
+			 E T44, T46, T43, T45;
+			 T44 = ri[WS(ios, 53)];
+			 T46 = ii[WS(ios, 53)];
+			 T43 = W[104];
+			 T45 = W[105];
+			 T47 = FMA(T43, T44, T45 * T46);
+			 T8f = FNMS(T45, T44, T43 * T46);
+		    }
+		    T48 = T42 + T47;
+		    TdC = T8e + T8f;
+		    T8b = T42 - T47;
+		    T8g = T8e - T8f;
+	       }
+	       {
+		    E T4e, T82, T4j, T83;
+		    {
+			 E T4b, T4d, T4a, T4c;
+			 T4b = ri[WS(ios, 61)];
+			 T4d = ii[WS(ios, 61)];
+			 T4a = W[120];
+			 T4c = W[121];
+			 T4e = FMA(T4a, T4b, T4c * T4d);
+			 T82 = FNMS(T4c, T4b, T4a * T4d);
+		    }
+		    {
+			 E T4g, T4i, T4f, T4h;
+			 T4g = ri[WS(ios, 29)];
+			 T4i = ii[WS(ios, 29)];
+			 T4f = W[56];
+			 T4h = W[57];
+			 T4j = FMA(T4f, T4g, T4h * T4i);
+			 T83 = FNMS(T4h, T4g, T4f * T4i);
+		    }
+		    T4k = T4e + T4j;
+		    Tdw = T82 + T83;
+		    T7X = T4e - T4j;
+		    T84 = T82 - T83;
+	       }
+	       {
+		    E T49, T4w, TdA, TdD;
+		    T49 = T3X + T48;
+		    T4w = T4k + T4v;
+		    T4x = T49 + T4w;
+		    TfJ = T4w - T49;
+		    TdA = T3X - T48;
+		    TdD = TdB - TdC;
+		    TdE = TdA + TdD;
+		    TdM = TdD - TdA;
+	       }
+	       {
+		    E TfM, TfN, T81, T86;
+		    TfM = TdB + TdC;
+		    TfN = Tdw + Tdx;
+		    TfO = TfM - TfN;
+		    Tgu = TfM + TfN;
+		    T81 = T7X - T80;
+		    T86 = T84 + T85;
+		    T87 = FNMS(KP923879532, T86, KP382683432 * T81);
+		    T8v = FMA(KP382683432, T86, KP923879532 * T81);
+	       }
+	       {
+		    E T8c, T8h, Tb8, Tb9;
+		    T8c = T8a + T8b;
+		    T8h = T8d - T8g;
+		    T8i = FMA(KP923879532, T8c, KP382683432 * T8h);
+		    T8u = FNMS(KP923879532, T8h, KP382683432 * T8c);
+		    Tb8 = T8a - T8b;
+		    Tb9 = T8d + T8g;
+		    Tba = FMA(KP382683432, Tb8, KP923879532 * Tb9);
+		    Tbg = FNMS(KP382683432, Tb9, KP923879532 * Tb8);
+	       }
+	       {
+		    E Tdv, Tdy, Tb5, Tb6;
+		    Tdv = T4k - T4v;
+		    Tdy = Tdw - Tdx;
+		    Tdz = Tdv - Tdy;
+		    TdN = Tdv + Tdy;
+		    Tb5 = T7X + T80;
+		    Tb6 = T84 - T85;
+		    Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb5);
+		    Tbh = FMA(KP923879532, Tb6, KP382683432 * Tb5);
+	       }
+	  }
+	  {
+	       E T5u, TdW, T8S, T8V, T62, Te3, T94, T99, T5F, TdX, T8T, T8Y, T5R, Te2, T93;
+	       E T96;
+	       {
+		    E T5o, T8Q, T5t, T8R;
+		    {
+			 E T5l, T5n, T5k, T5m;
+			 T5l = ri[WS(ios, 3)];
+			 T5n = ii[WS(ios, 3)];
+			 T5k = W[4];
+			 T5m = W[5];
+			 T5o = FMA(T5k, T5l, T5m * T5n);
+			 T8Q = FNMS(T5m, T5l, T5k * T5n);
+		    }
+		    {
+			 E T5q, T5s, T5p, T5r;
+			 T5q = ri[WS(ios, 35)];
+			 T5s = ii[WS(ios, 35)];
+			 T5p = W[68];
+			 T5r = W[69];
+			 T5t = FMA(T5p, T5q, T5r * T5s);
+			 T8R = FNMS(T5r, T5q, T5p * T5s);
+		    }
+		    T5u = T5o + T5t;
+		    TdW = T8Q + T8R;
+		    T8S = T8Q - T8R;
+		    T8V = T5o - T5t;
+	       }
+	       {
+		    E T5W, T97, T61, T98;
+		    {
+			 E T5T, T5V, T5S, T5U;
+			 T5T = ri[WS(ios, 11)];
+			 T5V = ii[WS(ios, 11)];
+			 T5S = W[20];
+			 T5U = W[21];
+			 T5W = FMA(T5S, T5T, T5U * T5V);
+			 T97 = FNMS(T5U, T5T, T5S * T5V);
+		    }
+		    {
+			 E T5Y, T60, T5X, T5Z;
+			 T5Y = ri[WS(ios, 43)];
+			 T60 = ii[WS(ios, 43)];
+			 T5X = W[84];
+			 T5Z = W[85];
+			 T61 = FMA(T5X, T5Y, T5Z * T60);
+			 T98 = FNMS(T5Z, T5Y, T5X * T60);
+		    }
+		    T62 = T5W + T61;
+		    Te3 = T97 + T98;
+		    T94 = T5W - T61;
+		    T99 = T97 - T98;
+	       }
+	       {
+		    E T5z, T8W, T5E, T8X;
+		    {
+			 E T5w, T5y, T5v, T5x;
+			 T5w = ri[WS(ios, 19)];
+			 T5y = ii[WS(ios, 19)];
+			 T5v = W[36];
+			 T5x = W[37];
+			 T5z = FMA(T5v, T5w, T5x * T5y);
+			 T8W = FNMS(T5x, T5w, T5v * T5y);
+		    }
+		    {
+			 E T5B, T5D, T5A, T5C;
+			 T5B = ri[WS(ios, 51)];
+			 T5D = ii[WS(ios, 51)];
+			 T5A = W[100];
+			 T5C = W[101];
+			 T5E = FMA(T5A, T5B, T5C * T5D);
+			 T8X = FNMS(T5C, T5B, T5A * T5D);
+		    }
+		    T5F = T5z + T5E;
+		    TdX = T8W + T8X;
+		    T8T = T5z - T5E;
+		    T8Y = T8W - T8X;
+	       }
+	       {
+		    E T5L, T91, T5Q, T92;
+		    {
+			 E T5I, T5K, T5H, T5J;
+			 T5I = ri[WS(ios, 59)];
+			 T5K = ii[WS(ios, 59)];
+			 T5H = W[116];
+			 T5J = W[117];
+			 T5L = FMA(T5H, T5I, T5J * T5K);
+			 T91 = FNMS(T5J, T5I, T5H * T5K);
+		    }
+		    {
+			 E T5N, T5P, T5M, T5O;
+			 T5N = ri[WS(ios, 27)];
+			 T5P = ii[WS(ios, 27)];
+			 T5M = W[52];
+			 T5O = W[53];
+			 T5Q = FMA(T5M, T5N, T5O * T5P);
+			 T92 = FNMS(T5O, T5N, T5M * T5P);
+		    }
+		    T5R = T5L + T5Q;
+		    Te2 = T91 + T92;
+		    T93 = T91 - T92;
+		    T96 = T5L - T5Q;
+	       }
+	       {
+		    E T5G, T63, Te1, Te4;
+		    T5G = T5u + T5F;
+		    T63 = T5R + T62;
+		    T64 = T5G + T63;
+		    TfZ = T63 - T5G;
+		    Te1 = T5R - T62;
+		    Te4 = Te2 - Te3;
+		    Te5 = Te1 + Te4;
+		    Ted = Te1 - Te4;
+	       }
+	       {
+		    E TfS, TfT, T8U, T8Z;
+		    TfS = TdW + TdX;
+		    TfT = Te2 + Te3;
+		    TfU = TfS - TfT;
+		    Tgz = TfS + TfT;
+		    T8U = T8S + T8T;
+		    T8Z = T8V - T8Y;
+		    T90 = FNMS(KP923879532, T8Z, KP382683432 * T8U);
+		    T9o = FMA(KP923879532, T8U, KP382683432 * T8Z);
+	       }
+	       {
+		    E T95, T9a, Tbr, Tbs;
+		    T95 = T93 + T94;
+		    T9a = T96 - T99;
+		    T9b = FMA(KP382683432, T95, KP923879532 * T9a);
+		    T9n = FNMS(KP923879532, T95, KP382683432 * T9a);
+		    Tbr = T93 - T94;
+		    Tbs = T96 + T99;
+		    Tbt = FMA(KP923879532, Tbr, KP382683432 * Tbs);
+		    Tbz = FNMS(KP382683432, Tbr, KP923879532 * Tbs);
+	       }
+	       {
+		    E TdY, TdZ, Tbo, Tbp;
+		    TdY = TdW - TdX;
+		    TdZ = T5u - T5F;
+		    Te0 = TdY - TdZ;
+		    Tee = TdZ + TdY;
+		    Tbo = T8S - T8T;
+		    Tbp = T8V + T8Y;
+		    Tbq = FNMS(KP382683432, Tbp, KP923879532 * Tbo);
+		    TbA = FMA(KP382683432, Tbo, KP923879532 * Tbp);
+	       }
+	  }
+	  {
+	       E T1t, Tgn, TgK, TgL, TgV, Th1, T30, Th0, T66, TgX, Tgw, TgE, TgB, TgF, Tgq;
+	       E TgM;
+	       {
+		    E TH, T1s, TgI, TgJ;
+		    TH = Tj + TG;
+		    T1s = T14 + T1r;
+		    T1t = TH + T1s;
+		    Tgn = TH - T1s;
+		    TgI = Tgt + Tgu;
+		    TgJ = Tgy + Tgz;
+		    TgK = TgI - TgJ;
+		    TgL = TgI + TgJ;
+	       }
+	       {
+		    E TgN, TgU, T2e, T2Z;
+		    TgN = Tfq + Tfr;
+		    TgU = TgO + TgT;
+		    TgV = TgN + TgU;
+		    Th1 = TgU - TgN;
+		    T2e = T1Q + T2d;
+		    T2Z = T2B + T2Y;
+		    T30 = T2e + T2Z;
+		    Th0 = T2Z - T2e;
+	       }
+	       {
+		    E T4y, T65, Tgs, Tgv;
+		    T4y = T3M + T4x;
+		    T65 = T5j + T64;
+		    T66 = T4y + T65;
+		    TgX = T65 - T4y;
+		    Tgs = T3M - T4x;
+		    Tgv = Tgt - Tgu;
+		    Tgw = Tgs + Tgv;
+		    TgE = Tgv - Tgs;
+	       }
+	       {
+		    E Tgx, TgA, Tgo, Tgp;
+		    Tgx = T5j - T64;
+		    TgA = Tgy - Tgz;
+		    TgB = Tgx - TgA;
+		    TgF = Tgx + TgA;
+		    Tgo = Tfu + Tfv;
+		    Tgp = TfA + TfB;
+		    Tgq = Tgo - Tgp;
+		    TgM = Tgo + Tgp;
+	       }
+	       {
+		    E T31, TgW, TgH, TgY;
+		    T31 = T1t + T30;
+		    ri[WS(ios, 32)] = T31 - T66;
+		    ri[0] = T31 + T66;
+		    TgW = TgM + TgV;
+		    ii[0] = TgL + TgW;
+		    ii[WS(ios, 32)] = TgW - TgL;
+		    TgH = T1t - T30;
+		    ri[WS(ios, 48)] = TgH - TgK;
+		    ri[WS(ios, 16)] = TgH + TgK;
+		    TgY = TgV - TgM;
+		    ii[WS(ios, 16)] = TgX + TgY;
+		    ii[WS(ios, 48)] = TgY - TgX;
+	       }
+	       {
+		    E Tgr, TgC, TgZ, Th2;
+		    Tgr = Tgn + Tgq;
+		    TgC = KP707106781 * (Tgw + TgB);
+		    ri[WS(ios, 40)] = Tgr - TgC;
+		    ri[WS(ios, 8)] = Tgr + TgC;
+		    TgZ = KP707106781 * (TgE + TgF);
+		    Th2 = Th0 + Th1;
+		    ii[WS(ios, 8)] = TgZ + Th2;
+		    ii[WS(ios, 40)] = Th2 - TgZ;
+	       }
+	       {
+		    E TgD, TgG, Th3, Th4;
+		    TgD = Tgn - Tgq;
+		    TgG = KP707106781 * (TgE - TgF);
+		    ri[WS(ios, 56)] = TgD - TgG;
+		    ri[WS(ios, 24)] = TgD + TgG;
+		    Th3 = KP707106781 * (TgB - Tgw);
+		    Th4 = Th1 - Th0;
+		    ii[WS(ios, 24)] = Th3 + Th4;
+		    ii[WS(ios, 56)] = Th4 - Th3;
+	       }
+	  }
+	  {
+	       E Tft, Tg7, Tgh, Tgl, Th9, Thf, TfE, Th6, TfQ, Tg4, Tga, The, Tge, Tgk, Tg1;
+	       E Tg5;
+	       {
+		    E Tfp, Tfs, Tgf, Tgg;
+		    Tfp = Tj - TG;
+		    Tfs = Tfq - Tfr;
+		    Tft = Tfp - Tfs;
+		    Tg7 = Tfp + Tfs;
+		    Tgf = TfR + TfU;
+		    Tgg = TfY + TfZ;
+		    Tgh = FNMS(KP382683432, Tgg, KP923879532 * Tgf);
+		    Tgl = FMA(KP923879532, Tgg, KP382683432 * Tgf);
+	       }
+	       {
+		    E Th7, Th8, Tfy, TfD;
+		    Th7 = T1r - T14;
+		    Th8 = TgT - TgO;
+		    Th9 = Th7 + Th8;
+		    Thf = Th8 - Th7;
+		    Tfy = Tfw - Tfx;
+		    TfD = Tfz + TfC;
+		    TfE = KP707106781 * (Tfy - TfD);
+		    Th6 = KP707106781 * (Tfy + TfD);
+	       }
+	       {
+		    E TfK, TfP, Tg8, Tg9;
+		    TfK = TfI - TfJ;
+		    TfP = TfL - TfO;
+		    TfQ = FMA(KP923879532, TfK, KP382683432 * TfP);
+		    Tg4 = FNMS(KP923879532, TfP, KP382683432 * TfK);
+		    Tg8 = Tfx + Tfw;
+		    Tg9 = Tfz - TfC;
+		    Tga = KP707106781 * (Tg8 + Tg9);
+		    The = KP707106781 * (Tg9 - Tg8);
+	       }
+	       {
+		    E Tgc, Tgd, TfV, Tg0;
+		    Tgc = TfI + TfJ;
+		    Tgd = TfL + TfO;
+		    Tge = FMA(KP382683432, Tgc, KP923879532 * Tgd);
+		    Tgk = FNMS(KP382683432, Tgd, KP923879532 * Tgc);
+		    TfV = TfR - TfU;
+		    Tg0 = TfY - TfZ;
+		    Tg1 = FNMS(KP923879532, Tg0, KP382683432 * TfV);
+		    Tg5 = FMA(KP382683432, Tg0, KP923879532 * TfV);
+	       }
+	       {
+		    E TfF, Tg2, Thd, Thg;
+		    TfF = Tft + TfE;
+		    Tg2 = TfQ + Tg1;
+		    ri[WS(ios, 44)] = TfF - Tg2;
+		    ri[WS(ios, 12)] = TfF + Tg2;
+		    Thd = Tg4 + Tg5;
+		    Thg = The + Thf;
+		    ii[WS(ios, 12)] = Thd + Thg;
+		    ii[WS(ios, 44)] = Thg - Thd;
+	       }
+	       {
+		    E Tg3, Tg6, Thh, Thi;
+		    Tg3 = Tft - TfE;
+		    Tg6 = Tg4 - Tg5;
+		    ri[WS(ios, 60)] = Tg3 - Tg6;
+		    ri[WS(ios, 28)] = Tg3 + Tg6;
+		    Thh = Tg1 - TfQ;
+		    Thi = Thf - The;
+		    ii[WS(ios, 28)] = Thh + Thi;
+		    ii[WS(ios, 60)] = Thi - Thh;
+	       }
+	       {
+		    E Tgb, Tgi, Th5, Tha;
+		    Tgb = Tg7 + Tga;
+		    Tgi = Tge + Tgh;
+		    ri[WS(ios, 36)] = Tgb - Tgi;
+		    ri[WS(ios, 4)] = Tgb + Tgi;
+		    Th5 = Tgk + Tgl;
+		    Tha = Th6 + Th9;
+		    ii[WS(ios, 4)] = Th5 + Tha;
+		    ii[WS(ios, 36)] = Tha - Th5;
+	       }
+	       {
+		    E Tgj, Tgm, Thb, Thc;
+		    Tgj = Tg7 - Tga;
+		    Tgm = Tgk - Tgl;
+		    ri[WS(ios, 52)] = Tgj - Tgm;
+		    ri[WS(ios, 20)] = Tgj + Tgm;
+		    Thb = Tgh - Tge;
+		    Thc = Th9 - Th6;
+		    ii[WS(ios, 20)] = Thb + Thc;
+		    ii[WS(ios, 52)] = Thc - Thb;
+	       }
+	  }
+	  {
+	       E Td1, Ten, Tdo, ThA, ThD, ThJ, Teq, ThI, Teh, TeB, Tel, Tex, TdQ, TeA, Tek;
+	       E Teu;
+	       {
+		    E TcP, Td0, Teo, Tep;
+		    TcP = TcL - TcO;
+		    Td0 = KP707106781 * (TcU - TcZ);
+		    Td1 = TcP - Td0;
+		    Ten = TcP + Td0;
+		    {
+			 E Tdc, Tdn, ThB, ThC;
+			 Tdc = FNMS(KP923879532, Tdb, KP382683432 * Td6);
+			 Tdn = FMA(KP382683432, Tdh, KP923879532 * Tdm);
+			 Tdo = Tdc - Tdn;
+			 ThA = Tdc + Tdn;
+			 ThB = KP707106781 * (TeF - TeE);
+			 ThC = Thn - Thm;
+			 ThD = ThB + ThC;
+			 ThJ = ThC - ThB;
+		    }
+		    Teo = FMA(KP923879532, Td6, KP382683432 * Tdb);
+		    Tep = FNMS(KP923879532, Tdh, KP382683432 * Tdm);
+		    Teq = Teo + Tep;
+		    ThI = Tep - Teo;
+		    {
+			 E Te7, Tev, Teg, Tew, Te6, Tef;
+			 Te6 = KP707106781 * (Te0 - Te5);
+			 Te7 = TdV - Te6;
+			 Tev = TdV + Te6;
+			 Tef = KP707106781 * (Ted - Tee);
+			 Teg = Tec - Tef;
+			 Tew = Tec + Tef;
+			 Teh = FNMS(KP980785280, Teg, KP195090322 * Te7);
+			 TeB = FMA(KP831469612, Tew, KP555570233 * Tev);
+			 Tel = FMA(KP195090322, Teg, KP980785280 * Te7);
+			 Tex = FNMS(KP555570233, Tew, KP831469612 * Tev);
+		    }
+		    {
+			 E TdG, Tes, TdP, Tet, TdF, TdO;
+			 TdF = KP707106781 * (Tdz - TdE);
+			 TdG = Tdu - TdF;
+			 Tes = Tdu + TdF;
+			 TdO = KP707106781 * (TdM - TdN);
+			 TdP = TdL - TdO;
+			 Tet = TdL + TdO;
+			 TdQ = FMA(KP980785280, TdG, KP195090322 * TdP);
+			 TeA = FNMS(KP555570233, Tet, KP831469612 * Tes);
+			 Tek = FNMS(KP980785280, TdP, KP195090322 * TdG);
+			 Teu = FMA(KP555570233, Tes, KP831469612 * Tet);
+		    }
+	       }
+	       {
+		    E Tdp, Tei, ThH, ThK;
+		    Tdp = Td1 + Tdo;
+		    Tei = TdQ + Teh;
+		    ri[WS(ios, 46)] = Tdp - Tei;
+		    ri[WS(ios, 14)] = Tdp + Tei;
+		    ThH = Tek + Tel;
+		    ThK = ThI + ThJ;
+		    ii[WS(ios, 14)] = ThH + ThK;
+		    ii[WS(ios, 46)] = ThK - ThH;
+	       }
+	       {
+		    E Tej, Tem, ThL, ThM;
+		    Tej = Td1 - Tdo;
+		    Tem = Tek - Tel;
+		    ri[WS(ios, 62)] = Tej - Tem;
+		    ri[WS(ios, 30)] = Tej + Tem;
+		    ThL = Teh - TdQ;
+		    ThM = ThJ - ThI;
+		    ii[WS(ios, 30)] = ThL + ThM;
+		    ii[WS(ios, 62)] = ThM - ThL;
+	       }
+	       {
+		    E Ter, Tey, Thz, ThE;
+		    Ter = Ten + Teq;
+		    Tey = Teu + Tex;
+		    ri[WS(ios, 38)] = Ter - Tey;
+		    ri[WS(ios, 6)] = Ter + Tey;
+		    Thz = TeA + TeB;
+		    ThE = ThA + ThD;
+		    ii[WS(ios, 6)] = Thz + ThE;
+		    ii[WS(ios, 38)] = ThE - Thz;
+	       }
+	       {
+		    E Tez, TeC, ThF, ThG;
+		    Tez = Ten - Teq;
+		    TeC = TeA - TeB;
+		    ri[WS(ios, 54)] = Tez - TeC;
+		    ri[WS(ios, 22)] = Tez + TeC;
+		    ThF = Tex - Teu;
+		    ThG = ThD - ThA;
+		    ii[WS(ios, 22)] = ThF + ThG;
+		    ii[WS(ios, 54)] = ThG - ThF;
+	       }
+	  }
+	  {
+	       E TeH, Tf9, TeO, Thk, Thp, Thv, Tfc, Thu, Tf3, Tfn, Tf7, Tfj, TeW, Tfm, Tf6;
+	       E Tfg;
+	       {
+		    E TeD, TeG, Tfa, Tfb;
+		    TeD = TcL + TcO;
+		    TeG = KP707106781 * (TeE + TeF);
+		    TeH = TeD - TeG;
+		    Tf9 = TeD + TeG;
+		    {
+			 E TeK, TeN, Thl, Tho;
+			 TeK = FNMS(KP382683432, TeJ, KP923879532 * TeI);
+			 TeN = FMA(KP923879532, TeL, KP382683432 * TeM);
+			 TeO = TeK - TeN;
+			 Thk = TeK + TeN;
+			 Thl = KP707106781 * (TcU + TcZ);
+			 Tho = Thm + Thn;
+			 Thp = Thl + Tho;
+			 Thv = Tho - Thl;
+		    }
+		    Tfa = FMA(KP382683432, TeI, KP923879532 * TeJ);
+		    Tfb = FNMS(KP382683432, TeL, KP923879532 * TeM);
+		    Tfc = Tfa + Tfb;
+		    Thu = Tfb - Tfa;
+		    {
+			 E TeZ, Tfh, Tf2, Tfi, TeY, Tf1;
+			 TeY = KP707106781 * (Tee + Ted);
+			 TeZ = TeX - TeY;
+			 Tfh = TeX + TeY;
+			 Tf1 = KP707106781 * (Te0 + Te5);
+			 Tf2 = Tf0 - Tf1;
+			 Tfi = Tf0 + Tf1;
+			 Tf3 = FNMS(KP831469612, Tf2, KP555570233 * TeZ);
+			 Tfn = FMA(KP195090322, Tfh, KP980785280 * Tfi);
+			 Tf7 = FMA(KP831469612, TeZ, KP555570233 * Tf2);
+			 Tfj = FNMS(KP195090322, Tfi, KP980785280 * Tfh);
+		    }
+		    {
+			 E TeS, Tfe, TeV, Tff, TeR, TeU;
+			 TeR = KP707106781 * (TdE + Tdz);
+			 TeS = TeQ - TeR;
+			 Tfe = TeQ + TeR;
+			 TeU = KP707106781 * (TdM + TdN);
+			 TeV = TeT - TeU;
+			 Tff = TeT + TeU;
+			 TeW = FMA(KP555570233, TeS, KP831469612 * TeV);
+			 Tfm = FNMS(KP195090322, Tfe, KP980785280 * Tff);
+			 Tf6 = FNMS(KP831469612, TeS, KP555570233 * TeV);
+			 Tfg = FMA(KP980785280, Tfe, KP195090322 * Tff);
+		    }
+	       }
+	       {
+		    E TeP, Tf4, Tht, Thw;
+		    TeP = TeH + TeO;
+		    Tf4 = TeW + Tf3;
+		    ri[WS(ios, 42)] = TeP - Tf4;
+		    ri[WS(ios, 10)] = TeP + Tf4;
+		    Tht = Tf6 + Tf7;
+		    Thw = Thu + Thv;
+		    ii[WS(ios, 10)] = Tht + Thw;
+		    ii[WS(ios, 42)] = Thw - Tht;
+	       }
+	       {
+		    E Tf5, Tf8, Thx, Thy;
+		    Tf5 = TeH - TeO;
+		    Tf8 = Tf6 - Tf7;
+		    ri[WS(ios, 58)] = Tf5 - Tf8;
+		    ri[WS(ios, 26)] = Tf5 + Tf8;
+		    Thx = Tf3 - TeW;
+		    Thy = Thv - Thu;
+		    ii[WS(ios, 26)] = Thx + Thy;
+		    ii[WS(ios, 58)] = Thy - Thx;
+	       }
+	       {
+		    E Tfd, Tfk, Thj, Thq;
+		    Tfd = Tf9 + Tfc;
+		    Tfk = Tfg + Tfj;
+		    ri[WS(ios, 34)] = Tfd - Tfk;
+		    ri[WS(ios, 2)] = Tfd + Tfk;
+		    Thj = Tfm + Tfn;
+		    Thq = Thk + Thp;
+		    ii[WS(ios, 2)] = Thj + Thq;
+		    ii[WS(ios, 34)] = Thq - Thj;
+	       }
+	       {
+		    E Tfl, Tfo, Thr, Ths;
+		    Tfl = Tf9 - Tfc;
+		    Tfo = Tfm - Tfn;
+		    ri[WS(ios, 50)] = Tfl - Tfo;
+		    ri[WS(ios, 18)] = Tfl + Tfo;
+		    Thr = Tfj - Tfg;
+		    Ths = Thp - Thk;
+		    ii[WS(ios, 18)] = Thr + Ths;
+		    ii[WS(ios, 50)] = Ths - Thr;
+	       }
+	  }
+	  {
+	       E T6L, T9x, TiD, TiJ, T7E, TiI, T9A, TiA, T8y, T9K, T9u, T9E, T9r, T9L, T9v;
+	       E T9H;
+	       {
+		    E T6n, T6K, TiB, TiC;
+		    T6n = T6b - T6m;
+		    T6K = T6y - T6J;
+		    T6L = T6n - T6K;
+		    T9x = T6n + T6K;
+		    TiB = T9P - T9O;
+		    TiC = Tin - Tim;
+		    TiD = TiB + TiC;
+		    TiJ = TiC - TiB;
+	       }
+	       {
+		    E T7c, T9y, T7D, T9z;
+		    {
+			 E T72, T7b, T7t, T7C;
+			 T72 = T6Q - T71;
+			 T7b = T77 - T7a;
+			 T7c = FNMS(KP980785280, T7b, KP195090322 * T72);
+			 T9y = FMA(KP980785280, T72, KP195090322 * T7b);
+			 T7t = T7h - T7s;
+			 T7C = T7y - T7B;
+			 T7D = FMA(KP195090322, T7t, KP980785280 * T7C);
+			 T9z = FNMS(KP980785280, T7t, KP195090322 * T7C);
+		    }
+		    T7E = T7c - T7D;
+		    TiI = T9z - T9y;
+		    T9A = T9y + T9z;
+		    TiA = T7c + T7D;
+	       }
+	       {
+		    E T8k, T9C, T8x, T9D;
+		    {
+			 E T7W, T8j, T8t, T8w;
+			 T7W = T7K - T7V;
+			 T8j = T87 - T8i;
+			 T8k = T7W - T8j;
+			 T9C = T7W + T8j;
+			 T8t = T8p - T8s;
+			 T8w = T8u - T8v;
+			 T8x = T8t - T8w;
+			 T9D = T8t + T8w;
+		    }
+		    T8y = FMA(KP995184726, T8k, KP098017140 * T8x);
+		    T9K = FNMS(KP634393284, T9D, KP773010453 * T9C);
+		    T9u = FNMS(KP995184726, T8x, KP098017140 * T8k);
+		    T9E = FMA(KP634393284, T9C, KP773010453 * T9D);
+	       }
+	       {
+		    E T9d, T9F, T9q, T9G;
+		    {
+			 E T8P, T9c, T9m, T9p;
+			 T8P = T8D - T8O;
+			 T9c = T90 - T9b;
+			 T9d = T8P - T9c;
+			 T9F = T8P + T9c;
+			 T9m = T9i - T9l;
+			 T9p = T9n - T9o;
+			 T9q = T9m - T9p;
+			 T9G = T9m + T9p;
+		    }
+		    T9r = FNMS(KP995184726, T9q, KP098017140 * T9d);
+		    T9L = FMA(KP773010453, T9G, KP634393284 * T9F);
+		    T9v = FMA(KP098017140, T9q, KP995184726 * T9d);
+		    T9H = FNMS(KP634393284, T9G, KP773010453 * T9F);
+	       }
+	       {
+		    E T7F, T9s, TiH, TiK;
+		    T7F = T6L + T7E;
+		    T9s = T8y + T9r;
+		    ri[WS(ios, 47)] = T7F - T9s;
+		    ri[WS(ios, 15)] = T7F + T9s;
+		    TiH = T9u + T9v;
+		    TiK = TiI + TiJ;
+		    ii[WS(ios, 15)] = TiH + TiK;
+		    ii[WS(ios, 47)] = TiK - TiH;
+	       }
+	       {
+		    E T9t, T9w, TiL, TiM;
+		    T9t = T6L - T7E;
+		    T9w = T9u - T9v;
+		    ri[WS(ios, 63)] = T9t - T9w;
+		    ri[WS(ios, 31)] = T9t + T9w;
+		    TiL = T9r - T8y;
+		    TiM = TiJ - TiI;
+		    ii[WS(ios, 31)] = TiL + TiM;
+		    ii[WS(ios, 63)] = TiM - TiL;
+	       }
+	       {
+		    E T9B, T9I, Tiz, TiE;
+		    T9B = T9x + T9A;
+		    T9I = T9E + T9H;
+		    ri[WS(ios, 39)] = T9B - T9I;
+		    ri[WS(ios, 7)] = T9B + T9I;
+		    Tiz = T9K + T9L;
+		    TiE = TiA + TiD;
+		    ii[WS(ios, 7)] = Tiz + TiE;
+		    ii[WS(ios, 39)] = TiE - Tiz;
+	       }
+	       {
+		    E T9J, T9M, TiF, TiG;
+		    T9J = T9x - T9A;
+		    T9M = T9K - T9L;
+		    ri[WS(ios, 55)] = T9J - T9M;
+		    ri[WS(ios, 23)] = T9J + T9M;
+		    TiF = T9H - T9E;
+		    TiG = TiD - TiA;
+		    ii[WS(ios, 23)] = TiF + TiG;
+		    ii[WS(ios, 55)] = TiG - TiF;
+	       }
+	  }
+	  {
+	       E TaL, TbJ, Ti9, Tif, Tb0, Tie, TbM, Ti6, Tbk, TbW, TbG, TbQ, TbD, TbX, TbH;
+	       E TbT;
+	       {
+		    E TaD, TaK, Ti7, Ti8;
+		    TaD = Taz - TaC;
+		    TaK = TaG - TaJ;
+		    TaL = TaD - TaK;
+		    TbJ = TaD + TaK;
+		    Ti7 = Tc1 - Tc0;
+		    Ti8 = ThT - ThQ;
+		    Ti9 = Ti7 + Ti8;
+		    Tif = Ti8 - Ti7;
+	       }
+	       {
+		    E TaS, TbK, TaZ, TbL;
+		    {
+			 E TaO, TaR, TaV, TaY;
+			 TaO = TaM - TaN;
+			 TaR = TaP - TaQ;
+			 TaS = FNMS(KP831469612, TaR, KP555570233 * TaO);
+			 TbK = FMA(KP555570233, TaR, KP831469612 * TaO);
+			 TaV = TaT - TaU;
+			 TaY = TaW - TaX;
+			 TaZ = FMA(KP831469612, TaV, KP555570233 * TaY);
+			 TbL = FNMS(KP831469612, TaY, KP555570233 * TaV);
+		    }
+		    Tb0 = TaS - TaZ;
+		    Tie = TbL - TbK;
+		    TbM = TbK + TbL;
+		    Ti6 = TaS + TaZ;
+	       }
+	       {
+		    E Tbc, TbO, Tbj, TbP;
+		    {
+			 E Tb4, Tbb, Tbf, Tbi;
+			 Tb4 = Tb2 - Tb3;
+			 Tbb = Tb7 - Tba;
+			 Tbc = Tb4 - Tbb;
+			 TbO = Tb4 + Tbb;
+			 Tbf = Tbd - Tbe;
+			 Tbi = Tbg - Tbh;
+			 Tbj = Tbf - Tbi;
+			 TbP = Tbf + Tbi;
+		    }
+		    Tbk = FMA(KP956940335, Tbc, KP290284677 * Tbj);
+		    TbW = FNMS(KP471396736, TbP, KP881921264 * TbO);
+		    TbG = FNMS(KP956940335, Tbj, KP290284677 * Tbc);
+		    TbQ = FMA(KP471396736, TbO, KP881921264 * TbP);
+	       }
+	       {
+		    E Tbv, TbR, TbC, TbS;
+		    {
+			 E Tbn, Tbu, Tby, TbB;
+			 Tbn = Tbl - Tbm;
+			 Tbu = Tbq - Tbt;
+			 Tbv = Tbn - Tbu;
+			 TbR = Tbn + Tbu;
+			 Tby = Tbw - Tbx;
+			 TbB = Tbz - TbA;
+			 TbC = Tby - TbB;
+			 TbS = Tby + TbB;
+		    }
+		    TbD = FNMS(KP956940335, TbC, KP290284677 * Tbv);
+		    TbX = FMA(KP881921264, TbS, KP471396736 * TbR);
+		    TbH = FMA(KP290284677, TbC, KP956940335 * Tbv);
+		    TbT = FNMS(KP471396736, TbS, KP881921264 * TbR);
+	       }
+	       {
+		    E Tb1, TbE, Tid, Tig;
+		    Tb1 = TaL + Tb0;
+		    TbE = Tbk + TbD;
+		    ri[WS(ios, 45)] = Tb1 - TbE;
+		    ri[WS(ios, 13)] = Tb1 + TbE;
+		    Tid = TbG + TbH;
+		    Tig = Tie + Tif;
+		    ii[WS(ios, 13)] = Tid + Tig;
+		    ii[WS(ios, 45)] = Tig - Tid;
+	       }
+	       {
+		    E TbF, TbI, Tih, Tii;
+		    TbF = TaL - Tb0;
+		    TbI = TbG - TbH;
+		    ri[WS(ios, 61)] = TbF - TbI;
+		    ri[WS(ios, 29)] = TbF + TbI;
+		    Tih = TbD - Tbk;
+		    Tii = Tif - Tie;
+		    ii[WS(ios, 29)] = Tih + Tii;
+		    ii[WS(ios, 61)] = Tii - Tih;
+	       }
+	       {
+		    E TbN, TbU, Ti5, Tia;
+		    TbN = TbJ + TbM;
+		    TbU = TbQ + TbT;
+		    ri[WS(ios, 37)] = TbN - TbU;
+		    ri[WS(ios, 5)] = TbN + TbU;
+		    Ti5 = TbW + TbX;
+		    Tia = Ti6 + Ti9;
+		    ii[WS(ios, 5)] = Ti5 + Tia;
+		    ii[WS(ios, 37)] = Tia - Ti5;
+	       }
+	       {
+		    E TbV, TbY, Tib, Tic;
+		    TbV = TbJ - TbM;
+		    TbY = TbW - TbX;
+		    ri[WS(ios, 53)] = TbV - TbY;
+		    ri[WS(ios, 21)] = TbV + TbY;
+		    Tib = TbT - TbQ;
+		    Tic = Ti9 - Ti6;
+		    ii[WS(ios, 21)] = Tib + Tic;
+		    ii[WS(ios, 53)] = Tic - Tib;
+	       }
+	  }
+	  {
+	       E Tc3, Tcv, ThV, Ti1, Tca, Ti0, Tcy, ThO, Tci, TcI, Tcs, TcC, Tcp, TcJ, Tct;
+	       E TcF;
+	       {
+		    E TbZ, Tc2, ThP, ThU;
+		    TbZ = Taz + TaC;
+		    Tc2 = Tc0 + Tc1;
+		    Tc3 = TbZ - Tc2;
+		    Tcv = TbZ + Tc2;
+		    ThP = TaG + TaJ;
+		    ThU = ThQ + ThT;
+		    ThV = ThP + ThU;
+		    Ti1 = ThU - ThP;
+	       }
+	       {
+		    E Tc6, Tcw, Tc9, Tcx;
+		    {
+			 E Tc4, Tc5, Tc7, Tc8;
+			 Tc4 = TaM + TaN;
+			 Tc5 = TaP + TaQ;
+			 Tc6 = FNMS(KP195090322, Tc5, KP980785280 * Tc4);
+			 Tcw = FMA(KP980785280, Tc5, KP195090322 * Tc4);
+			 Tc7 = TaT + TaU;
+			 Tc8 = TaW + TaX;
+			 Tc9 = FMA(KP195090322, Tc7, KP980785280 * Tc8);
+			 Tcx = FNMS(KP195090322, Tc8, KP980785280 * Tc7);
+		    }
+		    Tca = Tc6 - Tc9;
+		    Ti0 = Tcx - Tcw;
+		    Tcy = Tcw + Tcx;
+		    ThO = Tc6 + Tc9;
+	       }
+	       {
+		    E Tce, TcA, Tch, TcB;
+		    {
+			 E Tcc, Tcd, Tcf, Tcg;
+			 Tcc = Tbd + Tbe;
+			 Tcd = Tba + Tb7;
+			 Tce = Tcc - Tcd;
+			 TcA = Tcc + Tcd;
+			 Tcf = Tb2 + Tb3;
+			 Tcg = Tbg + Tbh;
+			 Tch = Tcf - Tcg;
+			 TcB = Tcf + Tcg;
+		    }
+		    Tci = FMA(KP634393284, Tce, KP773010453 * Tch);
+		    TcI = FNMS(KP098017140, TcA, KP995184726 * TcB);
+		    Tcs = FNMS(KP773010453, Tce, KP634393284 * Tch);
+		    TcC = FMA(KP995184726, TcA, KP098017140 * TcB);
+	       }
+	       {
+		    E Tcl, TcD, Tco, TcE;
+		    {
+			 E Tcj, Tck, Tcm, Tcn;
+			 Tcj = Tbl + Tbm;
+			 Tck = TbA + Tbz;
+			 Tcl = Tcj - Tck;
+			 TcD = Tcj + Tck;
+			 Tcm = Tbw + Tbx;
+			 Tcn = Tbq + Tbt;
+			 Tco = Tcm - Tcn;
+			 TcE = Tcm + Tcn;
+		    }
+		    Tcp = FNMS(KP773010453, Tco, KP634393284 * Tcl);
+		    TcJ = FMA(KP098017140, TcD, KP995184726 * TcE);
+		    Tct = FMA(KP773010453, Tcl, KP634393284 * Tco);
+		    TcF = FNMS(KP098017140, TcE, KP995184726 * TcD);
+	       }
+	       {
+		    E Tcb, Tcq, ThZ, Ti2;
+		    Tcb = Tc3 + Tca;
+		    Tcq = Tci + Tcp;
+		    ri[WS(ios, 41)] = Tcb - Tcq;
+		    ri[WS(ios, 9)] = Tcb + Tcq;
+		    ThZ = Tcs + Tct;
+		    Ti2 = Ti0 + Ti1;
+		    ii[WS(ios, 9)] = ThZ + Ti2;
+		    ii[WS(ios, 41)] = Ti2 - ThZ;
+	       }
+	       {
+		    E Tcr, Tcu, Ti3, Ti4;
+		    Tcr = Tc3 - Tca;
+		    Tcu = Tcs - Tct;
+		    ri[WS(ios, 57)] = Tcr - Tcu;
+		    ri[WS(ios, 25)] = Tcr + Tcu;
+		    Ti3 = Tcp - Tci;
+		    Ti4 = Ti1 - Ti0;
+		    ii[WS(ios, 25)] = Ti3 + Ti4;
+		    ii[WS(ios, 57)] = Ti4 - Ti3;
+	       }
+	       {
+		    E Tcz, TcG, ThN, ThW;
+		    Tcz = Tcv + Tcy;
+		    TcG = TcC + TcF;
+		    ri[WS(ios, 33)] = Tcz - TcG;
+		    ri[WS(ios, 1)] = Tcz + TcG;
+		    ThN = TcI + TcJ;
+		    ThW = ThO + ThV;
+		    ii[WS(ios, 1)] = ThN + ThW;
+		    ii[WS(ios, 33)] = ThW - ThN;
+	       }
+	       {
+		    E TcH, TcK, ThX, ThY;
+		    TcH = Tcv - Tcy;
+		    TcK = TcI - TcJ;
+		    ri[WS(ios, 49)] = TcH - TcK;
+		    ri[WS(ios, 17)] = TcH + TcK;
+		    ThX = TcF - TcC;
+		    ThY = ThV - ThO;
+		    ii[WS(ios, 17)] = ThX + ThY;
+		    ii[WS(ios, 49)] = ThY - ThX;
+	       }
+	  }
+	  {
+	       E T9R, Taj, Tip, Tiv, T9Y, Tiu, Tam, Tik, Ta6, Taw, Tag, Taq, Tad, Tax, Tah;
+	       E Tat;
+	       {
+		    E T9N, T9Q, Til, Tio;
+		    T9N = T6b + T6m;
+		    T9Q = T9O + T9P;
+		    T9R = T9N - T9Q;
+		    Taj = T9N + T9Q;
+		    Til = T6y + T6J;
+		    Tio = Tim + Tin;
+		    Tip = Til + Tio;
+		    Tiv = Tio - Til;
+	       }
+	       {
+		    E T9U, Tak, T9X, Tal;
+		    {
+			 E T9S, T9T, T9V, T9W;
+			 T9S = T6Q + T71;
+			 T9T = T77 + T7a;
+			 T9U = FNMS(KP555570233, T9T, KP831469612 * T9S);
+			 Tak = FMA(KP555570233, T9S, KP831469612 * T9T);
+			 T9V = T7h + T7s;
+			 T9W = T7y + T7B;
+			 T9X = FMA(KP831469612, T9V, KP555570233 * T9W);
+			 Tal = FNMS(KP555570233, T9V, KP831469612 * T9W);
+		    }
+		    T9Y = T9U - T9X;
+		    Tiu = Tal - Tak;
+		    Tam = Tak + Tal;
+		    Tik = T9U + T9X;
+	       }
+	       {
+		    E Ta2, Tao, Ta5, Tap;
+		    {
+			 E Ta0, Ta1, Ta3, Ta4;
+			 Ta0 = T8p + T8s;
+			 Ta1 = T8i + T87;
+			 Ta2 = Ta0 - Ta1;
+			 Tao = Ta0 + Ta1;
+			 Ta3 = T7K + T7V;
+			 Ta4 = T8u + T8v;
+			 Ta5 = Ta3 - Ta4;
+			 Tap = Ta3 + Ta4;
+		    }
+		    Ta6 = FMA(KP471396736, Ta2, KP881921264 * Ta5);
+		    Taw = FNMS(KP290284677, Tao, KP956940335 * Tap);
+		    Tag = FNMS(KP881921264, Ta2, KP471396736 * Ta5);
+		    Taq = FMA(KP956940335, Tao, KP290284677 * Tap);
+	       }
+	       {
+		    E Ta9, Tar, Tac, Tas;
+		    {
+			 E Ta7, Ta8, Taa, Tab;
+			 Ta7 = T8D + T8O;
+			 Ta8 = T9o + T9n;
+			 Ta9 = Ta7 - Ta8;
+			 Tar = Ta7 + Ta8;
+			 Taa = T9i + T9l;
+			 Tab = T90 + T9b;
+			 Tac = Taa - Tab;
+			 Tas = Taa + Tab;
+		    }
+		    Tad = FNMS(KP881921264, Tac, KP471396736 * Ta9);
+		    Tax = FMA(KP290284677, Tar, KP956940335 * Tas);
+		    Tah = FMA(KP881921264, Ta9, KP471396736 * Tac);
+		    Tat = FNMS(KP290284677, Tas, KP956940335 * Tar);
+	       }
+	       {
+		    E T9Z, Tae, Tit, Tiw;
+		    T9Z = T9R + T9Y;
+		    Tae = Ta6 + Tad;
+		    ri[WS(ios, 43)] = T9Z - Tae;
+		    ri[WS(ios, 11)] = T9Z + Tae;
+		    Tit = Tag + Tah;
+		    Tiw = Tiu + Tiv;
+		    ii[WS(ios, 11)] = Tit + Tiw;
+		    ii[WS(ios, 43)] = Tiw - Tit;
+	       }
+	       {
+		    E Taf, Tai, Tix, Tiy;
+		    Taf = T9R - T9Y;
+		    Tai = Tag - Tah;
+		    ri[WS(ios, 59)] = Taf - Tai;
+		    ri[WS(ios, 27)] = Taf + Tai;
+		    Tix = Tad - Ta6;
+		    Tiy = Tiv - Tiu;
+		    ii[WS(ios, 27)] = Tix + Tiy;
+		    ii[WS(ios, 59)] = Tiy - Tix;
+	       }
+	       {
+		    E Tan, Tau, Tij, Tiq;
+		    Tan = Taj + Tam;
+		    Tau = Taq + Tat;
+		    ri[WS(ios, 35)] = Tan - Tau;
+		    ri[WS(ios, 3)] = Tan + Tau;
+		    Tij = Taw + Tax;
+		    Tiq = Tik + Tip;
+		    ii[WS(ios, 3)] = Tij + Tiq;
+		    ii[WS(ios, 35)] = Tiq - Tij;
+	       }
+	       {
+		    E Tav, Tay, Tir, Tis;
+		    Tav = Taj - Tam;
+		    Tay = Taw - Tax;
+		    ri[WS(ios, 51)] = Tav - Tay;
+		    ri[WS(ios, 19)] = Tav + Tay;
+		    Tir = Tat - Taq;
+		    Tis = Tip - Tik;
+		    ii[WS(ios, 19)] = Tir + Tis;
+		    ii[WS(ios, 51)] = Tis - Tir;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 64},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 64, "t1_64", twinstr, {808, 270, 230, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_64) (planner *p) {
+     X(kdft_dit_register) (p, t1_64, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_7.c b/src/fftw3/dft/codelets/standard/t1_7.c
new file mode 100644
index 0000000..d52c5eb
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_7.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:56 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 7 -name t1_7 -include t.h */
+
+/*
+ * This function contains 72 FP additions, 60 FP multiplications,
+ * (or, 36 additions, 24 multiplications, 36 fused multiply/add),
+ * 29 stack variables, and 28 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_7.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_7.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_7.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_7(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 12) {
+	  E T1, TR, Tc, TS, TC, TO, Tn, TT, TI, TP, Ty, TU, TF, TQ;
+	  T1 = ri[0];
+	  TR = ii[0];
+	  {
+	       E T6, TA, Tb, TB;
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 1)];
+		    T5 = ii[WS(ios, 1)];
+		    T2 = W[0];
+		    T4 = W[1];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TA = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = ri[WS(ios, 6)];
+		    Ta = ii[WS(ios, 6)];
+		    T7 = W[10];
+		    T9 = W[11];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    TB = FNMS(T9, T8, T7 * Ta);
+	       }
+	       Tc = T6 + Tb;
+	       TS = Tb - T6;
+	       TC = TA - TB;
+	       TO = TA + TB;
+	  }
+	  {
+	       E Th, TG, Tm, TH;
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 2)];
+		    Tg = ii[WS(ios, 2)];
+		    Td = W[2];
+		    Tf = W[3];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    TG = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E Tj, Tl, Ti, Tk;
+		    Tj = ri[WS(ios, 5)];
+		    Tl = ii[WS(ios, 5)];
+		    Ti = W[8];
+		    Tk = W[9];
+		    Tm = FMA(Ti, Tj, Tk * Tl);
+		    TH = FNMS(Tk, Tj, Ti * Tl);
+	       }
+	       Tn = Th + Tm;
+	       TT = Tm - Th;
+	       TI = TG - TH;
+	       TP = TG + TH;
+	  }
+	  {
+	       E Ts, TD, Tx, TE;
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = ri[WS(ios, 3)];
+		    Tr = ii[WS(ios, 3)];
+		    To = W[4];
+		    Tq = W[5];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    TD = FNMS(Tq, Tp, To * Tr);
+	       }
+	       {
+		    E Tu, Tw, Tt, Tv;
+		    Tu = ri[WS(ios, 4)];
+		    Tw = ii[WS(ios, 4)];
+		    Tt = W[6];
+		    Tv = W[7];
+		    Tx = FMA(Tt, Tu, Tv * Tw);
+		    TE = FNMS(Tv, Tu, Tt * Tw);
+	       }
+	       Ty = Ts + Tx;
+	       TU = Tx - Ts;
+	       TF = TD - TE;
+	       TQ = TD + TE;
+	  }
+	  ri[0] = T1 + Tc + Tn + Ty;
+	  ii[0] = TO + TP + TQ + TR;
+	  {
+	       E TJ, Tz, TX, TY;
+	       TJ = FNMS(KP781831482, TF, KP974927912 * TC) - (KP433883739 * TI);
+	       Tz = FMA(KP623489801, Ty, T1) + FNMA(KP900968867, Tn, KP222520933 * Tc);
+	       ri[WS(ios, 5)] = Tz - TJ;
+	       ri[WS(ios, 2)] = Tz + TJ;
+	       TX = FNMS(KP781831482, TU, KP974927912 * TS) - (KP433883739 * TT);
+	       TY = FMA(KP623489801, TQ, TR) + FNMA(KP900968867, TP, KP222520933 * TO);
+	       ii[WS(ios, 2)] = TX + TY;
+	       ii[WS(ios, 5)] = TY - TX;
+	  }
+	  {
+	       E TL, TK, TV, TW;
+	       TL = FMA(KP781831482, TC, KP974927912 * TI) + (KP433883739 * TF);
+	       TK = FMA(KP623489801, Tc, T1) + FNMA(KP900968867, Ty, KP222520933 * Tn);
+	       ri[WS(ios, 6)] = TK - TL;
+	       ri[WS(ios, 1)] = TK + TL;
+	       TV = FMA(KP781831482, TS, KP974927912 * TT) + (KP433883739 * TU);
+	       TW = FMA(KP623489801, TO, TR) + FNMA(KP900968867, TQ, KP222520933 * TP);
+	       ii[WS(ios, 1)] = TV + TW;
+	       ii[WS(ios, 6)] = TW - TV;
+	  }
+	  {
+	       E TN, TM, TZ, T10;
+	       TN = FMA(KP433883739, TC, KP974927912 * TF) - (KP781831482 * TI);
+	       TM = FMA(KP623489801, Tn, T1) + FNMA(KP222520933, Ty, KP900968867 * Tc);
+	       ri[WS(ios, 4)] = TM - TN;
+	       ri[WS(ios, 3)] = TM + TN;
+	       TZ = FMA(KP433883739, TS, KP974927912 * TU) - (KP781831482 * TT);
+	       T10 = FMA(KP623489801, TP, TR) + FNMA(KP222520933, TQ, KP900968867 * TO);
+	       ii[WS(ios, 3)] = TZ + T10;
+	       ii[WS(ios, 4)] = T10 - TZ;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 7},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 7, "t1_7", twinstr, {36, 24, 36, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_7) (planner *p) {
+     X(kdft_dit_register) (p, t1_7, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_8.c b/src/fftw3/dft/codelets/standard/t1_8.c
new file mode 100644
index 0000000..b9cb995
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_8.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:29:59 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 8 -name t1_8 -include t.h */
+
+/*
+ * This function contains 66 FP additions, 32 FP multiplications,
+ * (or, 52 additions, 18 multiplications, 14 fused multiply/add),
+ * 28 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_8(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 14) {
+	  E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM;
+	  E TP;
+	  {
+	       E T1, T18, T6, T17;
+	       T1 = ri[0];
+	       T18 = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 4)];
+		    T5 = ii[WS(ios, 4)];
+		    T2 = W[6];
+		    T4 = W[7];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T17 = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 + T6;
+	       T1e = T18 - T17;
+	       TH = T1 - T6;
+	       T19 = T17 + T18;
+	  }
+	  {
+	       E Tz, TS, TE, TT;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 7)];
+		    Ty = ii[WS(ios, 7)];
+		    Tv = W[12];
+		    Tx = W[13];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    TS = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 3)];
+		    TD = ii[WS(ios, 3)];
+		    TA = W[4];
+		    TC = W[5];
+		    TE = FMA(TA, TB, TC * TD);
+		    TT = FNMS(TC, TB, TA * TD);
+	       }
+	       TF = Tz + TE;
+	       T13 = TS + TT;
+	       TR = Tz - TE;
+	       TU = TS - TT;
+	  }
+	  {
+	       E Tc, TI, Th, TJ;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = ri[WS(ios, 2)];
+		    Tb = ii[WS(ios, 2)];
+		    T8 = W[2];
+		    Ta = W[3];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    TI = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = ri[WS(ios, 6)];
+		    Tg = ii[WS(ios, 6)];
+		    Td = W[10];
+		    Tf = W[11];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    TJ = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Ti = Tc + Th;
+	       T1f = Tc - Th;
+	       TK = TI - TJ;
+	       T16 = TI + TJ;
+	  }
+	  {
+	       E To, TN, Tt, TO;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = ri[WS(ios, 1)];
+		    Tn = ii[WS(ios, 1)];
+		    Tk = W[0];
+		    Tm = W[1];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    TN = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = ri[WS(ios, 5)];
+		    Ts = ii[WS(ios, 5)];
+		    Tp = W[8];
+		    Tr = W[9];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    TO = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       Tu = To + Tt;
+	       T12 = TN + TO;
+	       TM = To - Tt;
+	       TP = TN - TO;
+	  }
+	  {
+	       E Tj, TG, T1b, T1c;
+	       Tj = T7 + Ti;
+	       TG = Tu + TF;
+	       ri[WS(ios, 4)] = Tj - TG;
+	       ri[0] = Tj + TG;
+	       {
+		    E T15, T1a, T11, T14;
+		    T15 = T12 + T13;
+		    T1a = T16 + T19;
+		    ii[0] = T15 + T1a;
+		    ii[WS(ios, 4)] = T1a - T15;
+		    T11 = T7 - Ti;
+		    T14 = T12 - T13;
+		    ri[WS(ios, 6)] = T11 - T14;
+		    ri[WS(ios, 2)] = T11 + T14;
+	       }
+	       T1b = TF - Tu;
+	       T1c = T19 - T16;
+	       ii[WS(ios, 2)] = T1b + T1c;
+	       ii[WS(ios, 6)] = T1c - T1b;
+	       {
+		    E TX, T1g, T10, T1d, TY, TZ;
+		    TX = TH - TK;
+		    T1g = T1e - T1f;
+		    TY = TP - TM;
+		    TZ = TR + TU;
+		    T10 = KP707106781 * (TY - TZ);
+		    T1d = KP707106781 * (TY + TZ);
+		    ri[WS(ios, 7)] = TX - T10;
+		    ii[WS(ios, 5)] = T1g - T1d;
+		    ri[WS(ios, 3)] = TX + T10;
+		    ii[WS(ios, 1)] = T1d + T1g;
+	       }
+	       {
+		    E TL, T1i, TW, T1h, TQ, TV;
+		    TL = TH + TK;
+		    T1i = T1f + T1e;
+		    TQ = TM + TP;
+		    TV = TR - TU;
+		    TW = KP707106781 * (TQ + TV);
+		    T1h = KP707106781 * (TV - TQ);
+		    ri[WS(ios, 5)] = TL - TW;
+		    ii[WS(ios, 7)] = T1i - T1h;
+		    ri[WS(ios, 1)] = TL + TW;
+		    ii[WS(ios, 3)] = T1h + T1i;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 8},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 8, "t1_8", twinstr, {52, 18, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_8) (planner *p) {
+     X(kdft_dit_register) (p, t1_8, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t1_9.c b/src/fftw3/dft/codelets/standard/t1_9.c
new file mode 100644
index 0000000..924e456
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t1_9.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:00 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -n 9 -name t1_9 -include t.h */
+
+/*
+ * This function contains 96 FP additions, 72 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
+ * 41 stack variables, and 36 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t1_9.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_9.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t1_9.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t1_9(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
+     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 16) {
+	  E T1, T1B, TQ, T1G, Tc, TN, T1A, T1H, TL, T1x, T17, T1o, T1c, T1n, Tu;
+	  E T1w, TW, T1k, T11, T1l;
+	  {
+	       E T6, TO, Tb, TP;
+	       T1 = ri[0];
+	       T1B = ii[0];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = ri[WS(ios, 3)];
+		    T5 = ii[WS(ios, 3)];
+		    T2 = W[4];
+		    T4 = W[5];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TO = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = ri[WS(ios, 6)];
+		    Ta = ii[WS(ios, 6)];
+		    T7 = W[10];
+		    T9 = W[11];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    TP = FNMS(T9, T8, T7 * Ta);
+	       }
+	       TQ = KP866025403 * (TO - TP);
+	       T1G = KP866025403 * (Tb - T6);
+	       Tc = T6 + Tb;
+	       TN = FNMS(KP500000000, Tc, T1);
+	       T1A = TO + TP;
+	       T1H = FNMS(KP500000000, T1A, T1B);
+	  }
+	  {
+	       E Tz, T19, TE, T14, TJ, T15, TK, T1a;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = ri[WS(ios, 2)];
+		    Ty = ii[WS(ios, 2)];
+		    Tv = W[2];
+		    Tx = W[3];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T19 = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = ri[WS(ios, 5)];
+		    TD = ii[WS(ios, 5)];
+		    TA = W[8];
+		    TC = W[9];
+		    TE = FMA(TA, TB, TC * TD);
+		    T14 = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E TG, TI, TF, TH;
+		    TG = ri[WS(ios, 8)];
+		    TI = ii[WS(ios, 8)];
+		    TF = W[14];
+		    TH = W[15];
+		    TJ = FMA(TF, TG, TH * TI);
+		    T15 = FNMS(TH, TG, TF * TI);
+	       }
+	       TK = TE + TJ;
+	       T1a = T14 + T15;
+	       TL = Tz + TK;
+	       T1x = T19 + T1a;
+	       {
+		    E T13, T16, T18, T1b;
+		    T13 = FNMS(KP500000000, TK, Tz);
+		    T16 = KP866025403 * (T14 - T15);
+		    T17 = T13 + T16;
+		    T1o = T13 - T16;
+		    T18 = KP866025403 * (TJ - TE);
+		    T1b = FNMS(KP500000000, T1a, T19);
+		    T1c = T18 + T1b;
+		    T1n = T1b - T18;
+	       }
+	  }
+	  {
+	       E Ti, TY, Tn, TT, Ts, TU, Tt, TZ;
+	       {
+		    E Tf, Th, Te, Tg;
+		    Tf = ri[WS(ios, 1)];
+		    Th = ii[WS(ios, 1)];
+		    Te = W[0];
+		    Tg = W[1];
+		    Ti = FMA(Te, Tf, Tg * Th);
+		    TY = FNMS(Tg, Tf, Te * Th);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = ri[WS(ios, 4)];
+		    Tm = ii[WS(ios, 4)];
+		    Tj = W[6];
+		    Tl = W[7];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    TT = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = ri[WS(ios, 7)];
+		    Tr = ii[WS(ios, 7)];
+		    To = W[12];
+		    Tq = W[13];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    TU = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Tt = Tn + Ts;
+	       TZ = TT + TU;
+	       Tu = Ti + Tt;
+	       T1w = TY + TZ;
+	       {
+		    E TS, TV, TX, T10;
+		    TS = FNMS(KP500000000, Tt, Ti);
+		    TV = KP866025403 * (TT - TU);
+		    TW = TS + TV;
+		    T1k = TS - TV;
+		    TX = KP866025403 * (Ts - Tn);
+		    T10 = FNMS(KP500000000, TZ, TY);
+		    T11 = TX + T10;
+		    T1l = T10 - TX;
+	       }
+	  }
+	  {
+	       E T1y, Td, TM, T1v;
+	       T1y = KP866025403 * (T1w - T1x);
+	       Td = T1 + Tc;
+	       TM = Tu + TL;
+	       T1v = FNMS(KP500000000, TM, Td);
+	       ri[0] = Td + TM;
+	       ri[WS(ios, 3)] = T1v + T1y;
+	       ri[WS(ios, 6)] = T1v - T1y;
+	  }
+	  {
+	       E T1D, T1z, T1C, T1E;
+	       T1D = KP866025403 * (TL - Tu);
+	       T1z = T1w + T1x;
+	       T1C = T1A + T1B;
+	       T1E = FNMS(KP500000000, T1z, T1C);
+	       ii[0] = T1z + T1C;
+	       ii[WS(ios, 6)] = T1E - T1D;
+	       ii[WS(ios, 3)] = T1D + T1E;
+	  }
+	  {
+	       E TR, T1I, T1e, T1J, T1i, T1F, T1f, T1K;
+	       TR = TN + TQ;
+	       T1I = T1G + T1H;
+	       {
+		    E T12, T1d, T1g, T1h;
+		    T12 = FMA(KP766044443, TW, KP642787609 * T11);
+		    T1d = FMA(KP173648177, T17, KP984807753 * T1c);
+		    T1e = T12 + T1d;
+		    T1J = KP866025403 * (T1d - T12);
+		    T1g = FNMS(KP642787609, TW, KP766044443 * T11);
+		    T1h = FNMS(KP984807753, T17, KP173648177 * T1c);
+		    T1i = KP866025403 * (T1g - T1h);
+		    T1F = T1g + T1h;
+	       }
+	       ri[WS(ios, 1)] = TR + T1e;
+	       ii[WS(ios, 1)] = T1F + T1I;
+	       T1f = FNMS(KP500000000, T1e, TR);
+	       ri[WS(ios, 7)] = T1f - T1i;
+	       ri[WS(ios, 4)] = T1f + T1i;
+	       T1K = FNMS(KP500000000, T1F, T1I);
+	       ii[WS(ios, 4)] = T1J + T1K;
+	       ii[WS(ios, 7)] = T1K - T1J;
+	  }
+	  {
+	       E T1j, T1M, T1q, T1N, T1u, T1L, T1r, T1O;
+	       T1j = TN - TQ;
+	       T1M = T1H - T1G;
+	       {
+		    E T1m, T1p, T1s, T1t;
+		    T1m = FMA(KP173648177, T1k, KP984807753 * T1l);
+		    T1p = FNMS(KP939692620, T1o, KP342020143 * T1n);
+		    T1q = T1m + T1p;
+		    T1N = KP866025403 * (T1p - T1m);
+		    T1s = FNMS(KP984807753, T1k, KP173648177 * T1l);
+		    T1t = FMA(KP342020143, T1o, KP939692620 * T1n);
+		    T1u = KP866025403 * (T1s + T1t);
+		    T1L = T1s - T1t;
+	       }
+	       ri[WS(ios, 2)] = T1j + T1q;
+	       ii[WS(ios, 2)] = T1L + T1M;
+	       T1r = FNMS(KP500000000, T1q, T1j);
+	       ri[WS(ios, 8)] = T1r - T1u;
+	       ri[WS(ios, 5)] = T1r + T1u;
+	       T1O = FNMS(KP500000000, T1L, T1M);
+	       ii[WS(ios, 5)] = T1N + T1O;
+	       ii[WS(ios, 8)] = T1O - T1N;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 9},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 9, "t1_9", twinstr, {60, 36, 36, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t1_9) (planner *p) {
+     X(kdft_dit_register) (p, t1_9, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t2_16.c b/src/fftw3/dft/codelets/standard/t2_16.c
new file mode 100644
index 0000000..46d4bdb
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t2_16.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:12 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 16 -name t2_16 -include t.h */
+
+/*
+ * This function contains 196 FP additions, 108 FP multiplications,
+ * (or, 156 additions, 68 multiplications, 40 fused multiply/add),
+ * 104 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t2_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_16.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t2_16(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 8) {
+	  E T1, T3d, T18, T26, T29, T2R, Tq, T1r, T1E, T2k, T2g, T1O, Te, T3c, Tz;
+	  E T1P, T1S, T1T, T1U, TG, TL, T1V, T1Y, T1Z, T20, TT, TY, T1X, T1A, T2l;
+	  E T1J, T2h, T1h, T2b, T1m, T2a;
+	  T1 = ri[0];
+	  T3d = ii[0];
+	  {
+	       E T9, Td, Tl, Tp, Ty, Tu, TD, TF, TI, TK, TV, TQ, TS, TX, T1z;
+	       E T1v, T1C, T1D, T1G, T1I, T1q, T1p, T1l, T1j, T1c, T1g, T2, T5, Ti, Tg;
+	       E T4, Tw, Ts, Ta, Tv, T7, Tb, Tr, Tk, TW, TJ, TC, TU, To, TE;
+	       E TH, T14, T24, T17, T25, TN, TO, TP, TR;
+	       T9 = ri[WS(ios, 8)];
+	       Td = ii[WS(ios, 8)];
+	       Tl = ri[WS(ios, 4)];
+	       Tp = ii[WS(ios, 4)];
+	       Ty = ii[WS(ios, 12)];
+	       Tu = ri[WS(ios, 12)];
+	       TD = ri[WS(ios, 2)];
+	       TF = ii[WS(ios, 2)];
+	       TI = ri[WS(ios, 10)];
+	       TK = ii[WS(ios, 10)];
+	       TV = ri[WS(ios, 6)];
+	       TQ = ri[WS(ios, 14)];
+	       TS = ii[WS(ios, 14)];
+	       TX = ii[WS(ios, 6)];
+	       T1z = ii[WS(ios, 7)];
+	       T1v = ri[WS(ios, 7)];
+	       T1C = ri[WS(ios, 3)];
+	       T1D = ii[WS(ios, 3)];
+	       T1G = ri[WS(ios, 11)];
+	       T1I = ii[WS(ios, 11)];
+	       T1q = ii[WS(ios, 15)];
+	       T1p = ri[WS(ios, 15)];
+	       T1l = ii[WS(ios, 13)];
+	       T1j = ri[WS(ios, 13)];
+	       T1c = ri[WS(ios, 5)];
+	       T1g = ii[WS(ios, 5)];
+	       {
+		    E T12, T13, T15, T16, T3, T6, Tm, Tj, Tn, Th;
+		    T12 = ri[WS(ios, 1)];
+		    T13 = ii[WS(ios, 1)];
+		    T15 = ri[WS(ios, 9)];
+		    T16 = ii[WS(ios, 9)];
+		    T2 = W[4];
+		    T5 = W[5];
+		    T3 = W[0];
+		    T6 = W[1];
+		    Ti = W[3];
+		    Tg = W[2];
+		    T4 = T2 * T3;
+		    Tw = T5 * Tg;
+		    Ts = T5 * Ti;
+		    Ta = T2 * T6;
+		    Tv = T2 * Ti;
+		    T7 = T5 * T6;
+		    Tb = T5 * T3;
+		    Tr = T2 * Tg;
+		    Tm = Tg * T6;
+		    Tj = Ti * T6;
+		    Tn = Ti * T3;
+		    Th = Tg * T3;
+		    Tk = Th - Tj;
+		    TW = Tv - Tw;
+		    TJ = Ta + Tb;
+		    TC = Th + Tj;
+		    TU = Tr + Ts;
+		    To = Tm + Tn;
+		    TE = Tm - Tn;
+		    TH = T4 - T7;
+		    T14 = FMA(T3, T12, T6 * T13);
+		    T24 = FNMS(T6, T12, T3 * T13);
+		    T17 = FMA(T2, T15, T5 * T16);
+		    T25 = FNMS(T5, T15, T2 * T16);
+		    TN = W[6];
+		    TO = W[7];
+		    TP = FMA(TN, T3, TO * T6);
+		    TR = FNMS(TO, T3, TN * T6);
+	       }
+	       T18 = T14 + T17;
+	       T26 = T24 - T25;
+	       T29 = T14 - T17;
+	       T2R = T24 + T25;
+	       Tq = FMA(Tk, Tl, To * Tp);
+	       T1r = FMA(TN, T1p, TO * T1q);
+	       T1E = FMA(Tg, T1C, Ti * T1D);
+	       T2k = FNMS(TO, T1p, TN * T1q);
+	       T2g = FNMS(Ti, T1C, Tg * T1D);
+	       {
+		    E T8, Tc, Tt, Tx;
+		    T1O = FNMS(To, Tl, Tk * Tp);
+		    T8 = T4 + T7;
+		    Tc = Ta - Tb;
+		    Te = FNMS(Tc, Td, T8 * T9);
+		    T3c = FMA(Tc, T9, T8 * Td);
+		    Tt = Tr - Ts;
+		    Tx = Tv + Tw;
+		    Tz = FMA(Tt, Tu, Tx * Ty);
+		    T1P = FNMS(Tx, Tu, Tt * Ty);
+		    T1S = FMA(TE, TD, TC * TF);
+		    T1T = FNMS(TJ, TI, TH * TK);
+		    T1U = T1S - T1T;
+	       }
+	       TG = FNMS(TE, TF, TC * TD);
+	       TL = FMA(TH, TI, TJ * TK);
+	       T1V = TG - TL;
+	       T1Y = FMA(TR, TQ, TP * TS);
+	       T1Z = FMA(TW, TV, TU * TX);
+	       T20 = T1Y - T1Z;
+	       TT = FNMS(TR, TS, TP * TQ);
+	       TY = FNMS(TW, TX, TU * TV);
+	       T1X = TT - TY;
+	       {
+		    E T1u, T1F, T1y, T1H;
+		    {
+			 E T1s, T1t, T1w, T1x;
+			 T1s = T2 * TC;
+			 T1t = T5 * TE;
+			 T1u = T1s - T1t;
+			 T1F = T1s + T1t;
+			 T1w = T2 * TE;
+			 T1x = T5 * TC;
+			 T1y = T1w + T1x;
+			 T1H = T1w - T1x;
+		    }
+		    T1A = FMA(T1u, T1v, T1y * T1z);
+		    T2l = FNMS(T1y, T1v, T1u * T1z);
+		    T1J = FNMS(T1H, T1I, T1F * T1G);
+		    T2h = FMA(T1H, T1G, T1F * T1I);
+	       }
+	       {
+		    E T1b, T1i, T1f, T1k;
+		    {
+			 E T19, T1a, T1d, T1e;
+			 T19 = T2 * Tk;
+			 T1a = T5 * To;
+			 T1b = T19 + T1a;
+			 T1i = T19 - T1a;
+			 T1d = T2 * To;
+			 T1e = T5 * Tk;
+			 T1f = T1d - T1e;
+			 T1k = T1d + T1e;
+		    }
+		    T1h = FNMS(T1f, T1g, T1b * T1c);
+		    T2b = FNMS(T1k, T1j, T1i * T1l);
+		    T1m = FMA(T1i, T1j, T1k * T1l);
+		    T2a = FMA(T1f, T1c, T1b * T1g);
+	       }
+	  }
+	  {
+	       E TB, T2L, T10, T3k, T3f, T3l, T2O, T3a, T1o, T36, T2U, T32, T1L, T37, T2Z;
+	       E T33;
+	       {
+		    E Tf, TA, T2M, T2N;
+		    Tf = T1 + Te;
+		    TA = Tq + Tz;
+		    TB = Tf + TA;
+		    T2L = Tf - TA;
+		    {
+			 E TM, TZ, T3b, T3e;
+			 TM = TG + TL;
+			 TZ = TT + TY;
+			 T10 = TM + TZ;
+			 T3k = TZ - TM;
+			 T3b = T1O + T1P;
+			 T3e = T3c + T3d;
+			 T3f = T3b + T3e;
+			 T3l = T3e - T3b;
+		    }
+		    T2M = T1S + T1T;
+		    T2N = T1Y + T1Z;
+		    T2O = T2M - T2N;
+		    T3a = T2M + T2N;
+		    {
+			 E T1n, T2Q, T2S, T2T;
+			 T1n = T1h + T1m;
+			 T2Q = T18 - T1n;
+			 T2S = T2a + T2b;
+			 T2T = T2R - T2S;
+			 T1o = T18 + T1n;
+			 T36 = T2R + T2S;
+			 T2U = T2Q + T2T;
+			 T32 = T2T - T2Q;
+		    }
+		    {
+			 E T1B, T1K, T2V, T2W, T2X, T2Y;
+			 T1B = T1r + T1A;
+			 T1K = T1E + T1J;
+			 T2V = T1B - T1K;
+			 T2W = T2k + T2l;
+			 T2X = T2g + T2h;
+			 T2Y = T2W - T2X;
+			 T1L = T1B + T1K;
+			 T37 = T2W + T2X;
+			 T2Z = T2V - T2Y;
+			 T33 = T2V + T2Y;
+		    }
+	       }
+	       {
+		    E T11, T1M, T39, T3g;
+		    T11 = TB + T10;
+		    T1M = T1o + T1L;
+		    ri[WS(ios, 8)] = T11 - T1M;
+		    ri[0] = T11 + T1M;
+		    T39 = T36 + T37;
+		    T3g = T3a + T3f;
+		    ii[0] = T39 + T3g;
+		    ii[WS(ios, 8)] = T3g - T39;
+	       }
+	       {
+		    E T2P, T30, T3j, T3m;
+		    T2P = T2L + T2O;
+		    T30 = KP707106781 * (T2U + T2Z);
+		    ri[WS(ios, 10)] = T2P - T30;
+		    ri[WS(ios, 2)] = T2P + T30;
+		    T3j = KP707106781 * (T32 + T33);
+		    T3m = T3k + T3l;
+		    ii[WS(ios, 2)] = T3j + T3m;
+		    ii[WS(ios, 10)] = T3m - T3j;
+	       }
+	       {
+		    E T31, T34, T3n, T3o;
+		    T31 = T2L - T2O;
+		    T34 = KP707106781 * (T32 - T33);
+		    ri[WS(ios, 14)] = T31 - T34;
+		    ri[WS(ios, 6)] = T31 + T34;
+		    T3n = KP707106781 * (T2Z - T2U);
+		    T3o = T3l - T3k;
+		    ii[WS(ios, 6)] = T3n + T3o;
+		    ii[WS(ios, 14)] = T3o - T3n;
+	       }
+	       {
+		    E T35, T38, T3h, T3i;
+		    T35 = TB - T10;
+		    T38 = T36 - T37;
+		    ri[WS(ios, 12)] = T35 - T38;
+		    ri[WS(ios, 4)] = T35 + T38;
+		    T3h = T1L - T1o;
+		    T3i = T3f - T3a;
+		    ii[WS(ios, 4)] = T3h + T3i;
+		    ii[WS(ios, 12)] = T3i - T3h;
+	       }
+	  }
+	  {
+	       E T1R, T2v, T22, T3q, T3t, T3z, T2y, T3y, T2e, T2I, T2s, T2C, T2p, T2J, T2t;
+	       E T2F;
+	       {
+		    E T1N, T1Q, T2w, T2x;
+		    T1N = T1 - Te;
+		    T1Q = T1O - T1P;
+		    T1R = T1N - T1Q;
+		    T2v = T1N + T1Q;
+		    {
+			 E T1W, T21, T3r, T3s;
+			 T1W = T1U - T1V;
+			 T21 = T1X + T20;
+			 T22 = KP707106781 * (T1W - T21);
+			 T3q = KP707106781 * (T1W + T21);
+			 T3r = T3d - T3c;
+			 T3s = Tq - Tz;
+			 T3t = T3r - T3s;
+			 T3z = T3s + T3r;
+		    }
+		    T2w = T1V + T1U;
+		    T2x = T1X - T20;
+		    T2y = KP707106781 * (T2w + T2x);
+		    T3y = KP707106781 * (T2x - T2w);
+		    {
+			 E T28, T2A, T2d, T2B, T27, T2c;
+			 T27 = T1h - T1m;
+			 T28 = T26 + T27;
+			 T2A = T26 - T27;
+			 T2c = T2a - T2b;
+			 T2d = T29 - T2c;
+			 T2B = T29 + T2c;
+			 T2e = FMA(KP923879532, T28, KP382683432 * T2d);
+			 T2I = FNMS(KP382683432, T2B, KP923879532 * T2A);
+			 T2s = FNMS(KP923879532, T2d, KP382683432 * T28);
+			 T2C = FMA(KP382683432, T2A, KP923879532 * T2B);
+		    }
+		    {
+			 E T2j, T2D, T2o, T2E;
+			 {
+			      E T2f, T2i, T2m, T2n;
+			      T2f = T1r - T1A;
+			      T2i = T2g - T2h;
+			      T2j = T2f - T2i;
+			      T2D = T2f + T2i;
+			      T2m = T2k - T2l;
+			      T2n = T1E - T1J;
+			      T2o = T2m + T2n;
+			      T2E = T2m - T2n;
+			 }
+			 T2p = FNMS(KP923879532, T2o, KP382683432 * T2j);
+			 T2J = FMA(KP923879532, T2E, KP382683432 * T2D);
+			 T2t = FMA(KP382683432, T2o, KP923879532 * T2j);
+			 T2F = FNMS(KP382683432, T2E, KP923879532 * T2D);
+		    }
+	       }
+	       {
+		    E T23, T2q, T3x, T3A;
+		    T23 = T1R + T22;
+		    T2q = T2e + T2p;
+		    ri[WS(ios, 11)] = T23 - T2q;
+		    ri[WS(ios, 3)] = T23 + T2q;
+		    T3x = T2s + T2t;
+		    T3A = T3y + T3z;
+		    ii[WS(ios, 3)] = T3x + T3A;
+		    ii[WS(ios, 11)] = T3A - T3x;
+	       }
+	       {
+		    E T2r, T2u, T3B, T3C;
+		    T2r = T1R - T22;
+		    T2u = T2s - T2t;
+		    ri[WS(ios, 15)] = T2r - T2u;
+		    ri[WS(ios, 7)] = T2r + T2u;
+		    T3B = T2p - T2e;
+		    T3C = T3z - T3y;
+		    ii[WS(ios, 7)] = T3B + T3C;
+		    ii[WS(ios, 15)] = T3C - T3B;
+	       }
+	       {
+		    E T2z, T2G, T3p, T3u;
+		    T2z = T2v + T2y;
+		    T2G = T2C + T2F;
+		    ri[WS(ios, 9)] = T2z - T2G;
+		    ri[WS(ios, 1)] = T2z + T2G;
+		    T3p = T2I + T2J;
+		    T3u = T3q + T3t;
+		    ii[WS(ios, 1)] = T3p + T3u;
+		    ii[WS(ios, 9)] = T3u - T3p;
+	       }
+	       {
+		    E T2H, T2K, T3v, T3w;
+		    T2H = T2v - T2y;
+		    T2K = T2I - T2J;
+		    ri[WS(ios, 13)] = T2H - T2K;
+		    ri[WS(ios, 5)] = T2H + T2K;
+		    T3v = T2F - T2C;
+		    T3w = T3t - T3q;
+		    ii[WS(ios, 5)] = T3v + T3w;
+		    ii[WS(ios, 13)] = T3w - T3v;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 9},
+     {TW_SIN, 0, 9},
+     {TW_COS, 0, 15},
+     {TW_SIN, 0, 15},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 16, "t2_16", twinstr, {156, 68, 40, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t2_16) (planner *p) {
+     X(kdft_dit_register) (p, t2_16, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t2_32.c b/src/fftw3/dft/codelets/standard/t2_32.c
new file mode 100644
index 0000000..b065ecb
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t2_32.c
@@ -0,0 +1,853 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:26 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 32 -name t2_32 -include t.h */
+
+/*
+ * This function contains 488 FP additions, 280 FP multiplications,
+ * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
+ * 204 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t2_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_32.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t2_32(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 8) {
+	  E T1, T7G, Tn, Tp, T3t, T4S, TQ, T3G, T49, T20, T2n, T4y, T1J, T43, T2w;
+	  E T4z, T36, T4Z, TK, T8b, T40, T6l, T3U, T6k, T1h, T3L, T1D, T3V, T1s, T3X;
+	  E T3E, T7E, T3O, T6h, T2k, T6w, T4i, T4x, T3q, T6I, T4O, T4P, T3w, T4T, T4R;
+	  E T4U, Tm, To, TX, T4I, T3a, T3H, T31, T4Y, T3f, T4J, T2G, T4s, T4r, T2B;
+	  E T4q, T4t, T27, T4a, T2M, T4m, T4n, T2P, T4l, T4o, T1U, T44;
+	  T1 = ri[0];
+	  T7G = ii[0];
+	  Tn = ri[WS(ios, 16)];
+	  Tp = ii[WS(ios, 16)];
+	  {
+	       E Tv, Tz, TE, TI, TP, TN, TU, TW, T12, T16, T1k, T1b, T1f, T1l, T24;
+	       E T1z, T1w, T1u, T1q, T1o, T1B, T1X, T1Z, T1T, T1R, T1I, T1G, T26, T2O, T3e;
+	       E T3m, T3o, T3u, T3v, T3c, T30, T2W, T33, T35, T38, T39, T2N, T2r, T2v, T2m;
+	       E T2l, T2i, T2g, T2z, T2A, T2D, T2F, T2L, T2J, T2, Ti, T3, Tc, TF, TC;
+	       E TG, TB, Tu, T1a, T15, Ty, T1t, T1Y, T1W, T1v, TH, T1y, T11, TD, T1A;
+	       E T1e, T4g, T3k, T1n, T1p, T2e, T4M, TM, T1K, T1O, TO, T1L, T1N, Ta, Tb;
+	       E T2t, Tk, T2o, Tf, Tg, T2s, Tj, T2p;
+	       Tv = ri[WS(ios, 8)];
+	       Tz = ii[WS(ios, 8)];
+	       TE = ri[WS(ios, 24)];
+	       TI = ii[WS(ios, 24)];
+	       TP = ii[WS(ios, 4)];
+	       TN = ri[WS(ios, 4)];
+	       TU = ri[WS(ios, 20)];
+	       TW = ii[WS(ios, 20)];
+	       T12 = ri[WS(ios, 28)];
+	       T16 = ii[WS(ios, 28)];
+	       T1k = ri[WS(ios, 2)];
+	       T1b = ri[WS(ios, 12)];
+	       T1f = ii[WS(ios, 12)];
+	       T1l = ii[WS(ios, 2)];
+	       T24 = ri[WS(ios, 22)];
+	       T1z = ri[WS(ios, 26)];
+	       T1w = ii[WS(ios, 10)];
+	       T1u = ri[WS(ios, 10)];
+	       T1q = ii[WS(ios, 18)];
+	       T1o = ri[WS(ios, 18)];
+	       T1B = ii[WS(ios, 26)];
+	       T1X = ri[WS(ios, 6)];
+	       T1Z = ii[WS(ios, 6)];
+	       T1T = ii[WS(ios, 14)];
+	       T1R = ri[WS(ios, 14)];
+	       T1I = ii[WS(ios, 30)];
+	       T1G = ri[WS(ios, 30)];
+	       T26 = ii[WS(ios, 22)];
+	       T2O = ii[WS(ios, 13)];
+	       T3e = ii[WS(ios, 23)];
+	       T3m = ri[WS(ios, 19)];
+	       T3o = ii[WS(ios, 19)];
+	       T3u = ri[WS(ios, 11)];
+	       T3v = ii[WS(ios, 11)];
+	       T3c = ri[WS(ios, 23)];
+	       T30 = ii[WS(ios, 31)];
+	       T2W = ri[WS(ios, 31)];
+	       T33 = ri[WS(ios, 15)];
+	       T35 = ii[WS(ios, 15)];
+	       T38 = ri[WS(ios, 7)];
+	       T39 = ii[WS(ios, 7)];
+	       T2N = ri[WS(ios, 13)];
+	       T2r = ri[WS(ios, 25)];
+	       T2v = ii[WS(ios, 25)];
+	       T2m = ii[WS(ios, 9)];
+	       T2l = ri[WS(ios, 9)];
+	       T2i = ii[WS(ios, 17)];
+	       T2g = ri[WS(ios, 17)];
+	       T2z = ri[WS(ios, 5)];
+	       T2A = ii[WS(ios, 5)];
+	       T2D = ri[WS(ios, 21)];
+	       T2F = ii[WS(ios, 21)];
+	       T2L = ii[WS(ios, 29)];
+	       T2J = ri[WS(ios, 29)];
+	       {
+		    E T2c, T2d, T3i, T3j, T3s, T3r, T4, T7, T5, T8, T6, T9, T14, T1d, Ts;
+		    E T18, T19, T1c, Te, Td, Tt, Tw, T13, TZ, T10, Tx;
+		    T2c = ri[WS(ios, 1)];
+		    T2d = ii[WS(ios, 1)];
+		    T3i = ri[WS(ios, 3)];
+		    T3j = ii[WS(ios, 3)];
+		    T3s = ii[WS(ios, 27)];
+		    T3r = ri[WS(ios, 27)];
+		    T2 = W[6];
+		    Ti = W[7];
+		    T3 = W[4];
+		    Tc = W[5];
+		    T4 = W[2];
+		    T7 = W[3];
+		    T5 = W[0];
+		    T8 = W[1];
+		    T6 = T4 * T5;
+		    T9 = T7 * T8;
+		    T14 = Ti * T5;
+		    T1d = Tc * T4;
+		    Ts = T3 * T5;
+		    T18 = T3 * T4;
+		    T19 = Tc * T7;
+		    T1c = T3 * T7;
+		    Te = T7 * T5;
+		    Td = T4 * T8;
+		    Tt = Tc * T8;
+		    Tw = T3 * T8;
+		    TF = T2 * T7;
+		    T13 = T2 * T8;
+		    TC = Ti * T7;
+		    TG = Ti * T4;
+		    TZ = T2 * T5;
+		    T10 = Ti * T8;
+		    TB = T2 * T4;
+		    Tx = Tc * T5;
+		    Tu = Ts + Tt;
+		    T1a = T18 - T19;
+		    T15 = T13 + T14;
+		    Ty = Tw - Tx;
+		    T1t = Ts - Tt;
+		    T1Y = T1c - T1d;
+		    T1W = T18 + T19;
+		    T1v = Tw + Tx;
+		    TH = TF - TG;
+		    T1y = TZ + T10;
+		    T11 = TZ - T10;
+		    TD = TB + TC;
+		    T1A = T13 - T14;
+		    T1e = T1c + T1d;
+		    T3t = FMA(T2, T3r, Ti * T3s);
+		    T4g = FNMS(T8, T2c, T5 * T2d);
+		    T4S = FNMS(Ti, T3r, T2 * T3s);
+		    T3k = FMA(T4, T3i, T7 * T3j);
+		    T1n = FMA(T2, T3, Ti * Tc);
+		    T1p = FNMS(Ti, T3, T2 * Tc);
+		    T2e = FMA(T5, T2c, T8 * T2d);
+		    T4M = FNMS(T7, T3i, T4 * T3j);
+		    TM = T6 - T9;
+		    T1K = T3 * TM;
+		    T1O = Tc * TM;
+		    TO = Td + Te;
+		    T1L = Tc * TO;
+		    T1N = T3 * TO;
+		    Ta = T6 + T9;
+		    Tb = T3 * Ta;
+		    T2t = Ti * Ta;
+		    Tk = Tc * Ta;
+		    T2o = T2 * Ta;
+		    Tf = Td - Te;
+		    Tg = Tc * Tf;
+		    T2s = T2 * Tf;
+		    Tj = T3 * Tf;
+		    T2p = Ti * Tf;
+	       }
+	       TQ = FMA(TM, TN, TO * TP);
+	       T3G = FNMS(TO, TN, TM * TP);
+	       T49 = FMA(T1Y, T1X, T1W * T1Z);
+	       T20 = FNMS(T1Y, T1Z, T1W * T1X);
+	       T2n = FMA(T3, T2l, Tc * T2m);
+	       T4y = FNMS(Tc, T2l, T3 * T2m);
+	       {
+		    E T1F, T1H, TA, TJ;
+		    T1F = TB - TC;
+		    T1H = TF + TG;
+		    T1J = FMA(T1F, T1G, T1H * T1I);
+		    T43 = FNMS(T1H, T1G, T1F * T1I);
+		    {
+			 E T2q, T2u, T32, T34;
+			 T2q = T2o - T2p;
+			 T2u = T2s + T2t;
+			 T2w = FMA(T2q, T2r, T2u * T2v);
+			 T4z = FNMS(T2u, T2r, T2q * T2v);
+			 T32 = FMA(T2, T1a, Ti * T1e);
+			 T34 = FNMS(Ti, T1a, T2 * T1e);
+			 T36 = FNMS(T34, T35, T32 * T33);
+			 T4Z = FMA(T34, T33, T32 * T35);
+		    }
+		    TA = FNMS(Ty, Tz, Tu * Tv);
+		    TJ = FNMS(TH, TI, TD * TE);
+		    TK = TA + TJ;
+		    T8b = TA - TJ;
+		    {
+			 E T3Y, T3Z, T3S, T3T;
+			 T3Y = FNMS(T1v, T1u, T1t * T1w);
+			 T3Z = FMA(T1A, T1z, T1y * T1B);
+			 T40 = T3Y - T3Z;
+			 T6l = T3Y + T3Z;
+			 T3S = FMA(Tf, T1k, Ta * T1l);
+			 T3T = FMA(T1p, T1o, T1n * T1q);
+			 T3U = T3S - T3T;
+			 T6k = T3S + T3T;
+		    }
+	       }
+	       {
+		    E T17, T1g, Th, Tl;
+		    T17 = FMA(T11, T12, T15 * T16);
+		    T1g = FMA(T1a, T1b, T1e * T1f);
+		    T1h = T17 + T1g;
+		    T3L = T17 - T1g;
+		    {
+			 E T1x, T1C, T1m, T1r;
+			 T1x = FMA(T1t, T1u, T1v * T1w);
+			 T1C = FNMS(T1A, T1B, T1y * T1z);
+			 T1D = T1x + T1C;
+			 T3V = T1x - T1C;
+			 T1m = FNMS(Tf, T1l, Ta * T1k);
+			 T1r = FNMS(T1p, T1q, T1n * T1o);
+			 T1s = T1m + T1r;
+			 T3X = T1m - T1r;
+		    }
+		    {
+			 E T3C, T3D, T3M, T3N;
+			 T3C = FMA(Ty, Tv, Tu * Tz);
+			 T3D = FMA(TH, TE, TD * TI);
+			 T3E = T3C - T3D;
+			 T7E = T3C + T3D;
+			 T3M = FNMS(T15, T12, T11 * T16);
+			 T3N = FNMS(T1e, T1b, T1a * T1f);
+			 T3O = T3M - T3N;
+			 T6h = T3M + T3N;
+			 {
+			      E T2j, T4h, T2f, T2h;
+			      T2f = FMA(T2, T1t, Ti * T1v);
+			      T2h = FNMS(Ti, T1t, T2 * T1v);
+			      T2j = FNMS(T2h, T2i, T2f * T2g);
+			      T4h = FMA(T2h, T2g, T2f * T2i);
+			      T2k = T2e + T2j;
+			      T6w = T4g + T4h;
+			      T4i = T4g - T4h;
+			      T4x = T2e - T2j;
+			 }
+		    }
+		    {
+			 E T3p, T4N, T3l, T3n;
+			 T3l = FNMS(Ti, Ty, T2 * Tu);
+			 T3n = FMA(T2, Ty, Ti * Tu);
+			 T3p = FMA(T3l, T3m, T3n * T3o);
+			 T4N = FNMS(T3n, T3m, T3l * T3o);
+			 T3q = T3k + T3p;
+			 T6I = T4M + T4N;
+			 T4O = T4M - T4N;
+			 T4P = T3k - T3p;
+		    }
+		    Th = Tb + Tg;
+		    Tl = Tj - Tk;
+		    T3w = FNMS(Tl, T3v, Th * T3u);
+		    T4T = FMA(Tl, T3u, Th * T3v);
+		    T4R = T3t - T3w;
+		    T4U = T4S - T4T;
+		    Tm = FNMS(Ti, Tl, T2 * Th);
+		    To = FMA(T2, Tl, Ti * Th);
+		    {
+			 E TR, TS, TT, TV;
+			 TR = Tb - Tg;
+			 TS = Tj + Tk;
+			 TT = FMA(T2, TR, Ti * TS);
+			 TV = FNMS(Ti, TR, T2 * TS);
+			 TX = FNMS(TV, TW, TT * TU);
+			 T4I = FNMS(TS, T38, TR * T39);
+			 T3a = FMA(TR, T38, TS * T39);
+			 T3H = FMA(TV, TU, TT * TW);
+		    }
+		    {
+			 E T2V, T3b, T2Z, T3d;
+			 {
+			      E T2T, T2U, T2X, T2Y;
+			      T2T = T2 * TM;
+			      T2U = Ti * TO;
+			      T2V = T2T - T2U;
+			      T3b = T2T + T2U;
+			      T2X = T2 * TO;
+			      T2Y = Ti * TM;
+			      T2Z = T2X + T2Y;
+			      T3d = T2X - T2Y;
+			 }
+			 T31 = FMA(T2V, T2W, T2Z * T30);
+			 T4Y = FNMS(T2Z, T2W, T2V * T30);
+			 T3f = FNMS(T3d, T3e, T3b * T3c);
+			 T4J = FMA(T3d, T3c, T3b * T3e);
+		    }
+		    {
+			 E T23, T25, T1Q, T1S;
+			 {
+			      E T2C, T2E, T21, T22;
+			      T2C = FNMS(Ti, T1Y, T2 * T1W);
+			      T2E = FMA(T2, T1Y, Ti * T1W);
+			      T2G = FMA(T2C, T2D, T2E * T2F);
+			      T4s = FNMS(T2E, T2D, T2C * T2F);
+			      T21 = T1K + T1L;
+			      T22 = T1N - T1O;
+			      T23 = FNMS(Ti, T22, T2 * T21);
+			      T4r = FMA(T22, T2z, T21 * T2A);
+			      T25 = FMA(T2, T22, Ti * T21);
+			      T2B = FNMS(T22, T2A, T21 * T2z);
+			 }
+			 T4q = T2B - T2G;
+			 T4t = T4r - T4s;
+			 T27 = FMA(T23, T24, T25 * T26);
+			 T4a = FNMS(T25, T24, T23 * T26);
+			 {
+			      E T2I, T2K, T1M, T1P;
+			      T2I = T2o + T2p;
+			      T2K = T2s - T2t;
+			      T2M = FNMS(T2K, T2L, T2I * T2J);
+			      T4m = FMA(T2K, T2J, T2I * T2L);
+			      T1M = T1K - T1L;
+			      T1P = T1N + T1O;
+			      T1Q = FMA(T2, T1M, Ti * T1P);
+			      T4n = FNMS(T1P, T2N, T1M * T2O);
+			      T1S = FNMS(Ti, T1M, T2 * T1P);
+			      T2P = FMA(T1M, T2N, T1P * T2O);
+			 }
+			 T4l = T2M - T2P;
+			 T4o = T4m - T4n;
+			 T1U = FNMS(T1S, T1T, T1Q * T1R);
+			 T44 = FMA(T1S, T1R, T1Q * T1T);
+		    }
+	       }
+	  }
+	  {
+	       E T1i, T7V, T6i, T7D, T42, T5e, T5A, T60, T6o, T6Y, TL, T6f, T3F, T5t, T7I;
+	       E T8q, T7W, T8c, T3Q, T8p, T5w, T89, T4d, T61, T5f, T5D, T2a, T6t, T7O, T7C;
+	       E T7g, T6Z, T4w, T64, T65, T4F, T5i, T5I, T5L, T5j, T2S, T7l, T7y, T6A, T6F;
+	       E T73, T7i, T72, T4X, T67, T68, T56, T5l, T5P, T5S, T5m, T3z, T7q, T7z, T6L;
+	       E T6Q, T76, T7n, T75;
+	       {
+		    E TY, T6g, T3W, T41;
+		    TY = TQ + TX;
+		    T1i = TY + T1h;
+		    T7V = T1h - TY;
+		    T6g = T3G + T3H;
+		    T6i = T6g - T6h;
+		    T7D = T6g + T6h;
+		    T3W = T3U + T3V;
+		    T41 = T3X - T40;
+		    T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
+		    T5e = FMA(KP923879532, T3W, KP382683432 * T41);
+	       }
+	       {
+		    E T5y, T5z, T6m, T6n;
+		    T5y = T3U - T3V;
+		    T5z = T3X + T40;
+		    T5A = FNMS(KP382683432, T5z, KP923879532 * T5y);
+		    T60 = FMA(KP382683432, T5y, KP923879532 * T5z);
+		    T6m = T6k - T6l;
+		    T6n = T1s - T1D;
+		    T6o = T6m - T6n;
+		    T6Y = T6n + T6m;
+	       }
+	       {
+		    E Tr, T3B, Tq, T7H, T8a, T7F;
+		    Tq = FMA(Tm, Tn, To * Tp);
+		    Tr = T1 + Tq;
+		    T3B = T1 - Tq;
+		    TL = Tr + TK;
+		    T6f = Tr - TK;
+		    T3F = T3B - T3E;
+		    T5t = T3B + T3E;
+		    T7F = FNMS(To, Tn, Tm * Tp);
+		    T7H = T7F + T7G;
+		    T8a = T7G - T7F;
+		    T7I = T7E + T7H;
+		    T8q = T8b + T8a;
+		    T7W = T7H - T7E;
+		    T8c = T8a - T8b;
+	       }
+	       {
+		    E T3P, T5v, T3K, T5u, T3I, T3J;
+		    T3P = T3L + T3O;
+		    T5v = T3L - T3O;
+		    T3I = T3G - T3H;
+		    T3J = TQ - TX;
+		    T3K = T3I - T3J;
+		    T5u = T3J + T3I;
+		    T3Q = KP707106781 * (T3K - T3P);
+		    T8p = KP707106781 * (T5v - T5u);
+		    T5w = KP707106781 * (T5u + T5v);
+		    T89 = KP707106781 * (T3K + T3P);
+	       }
+	       {
+		    E T47, T5B, T4c, T5C;
+		    {
+			 E T45, T46, T48, T4b;
+			 T45 = T43 - T44;
+			 T46 = T20 - T27;
+			 T47 = T45 + T46;
+			 T5B = T45 - T46;
+			 T48 = T1J - T1U;
+			 T4b = T49 - T4a;
+			 T4c = T48 - T4b;
+			 T5C = T48 + T4b;
+		    }
+		    T4d = FMA(KP382683432, T47, KP923879532 * T4c);
+		    T61 = FNMS(KP382683432, T5B, KP923879532 * T5C);
+		    T5f = FNMS(KP923879532, T47, KP382683432 * T4c);
+		    T5D = FMA(KP923879532, T5B, KP382683432 * T5C);
+	       }
+	       {
+		    E T1E, T7e, T29, T6p, T6s, T7f;
+		    T1E = T1s + T1D;
+		    T7e = T6k + T6l;
+		    {
+			 E T1V, T28, T6q, T6r;
+			 T1V = T1J + T1U;
+			 T28 = T20 + T27;
+			 T29 = T1V + T28;
+			 T6p = T1V - T28;
+			 T6q = T43 + T44;
+			 T6r = T49 + T4a;
+			 T6s = T6q - T6r;
+			 T7f = T6q + T6r;
+		    }
+		    T2a = T1E + T29;
+		    T6t = T6p + T6s;
+		    T7O = T29 - T1E;
+		    T7C = T7e + T7f;
+		    T7g = T7e - T7f;
+		    T6Z = T6p - T6s;
+	       }
+	       {
+		    E T4k, T5J, T4B, T5G, T4v, T5H, T4E, T5K, T4j, T4A;
+		    T4j = T2n - T2w;
+		    T4k = T4i + T4j;
+		    T5J = T4i - T4j;
+		    T4A = T4y - T4z;
+		    T4B = T4x - T4A;
+		    T5G = T4x + T4A;
+		    {
+			 E T4p, T4u, T4C, T4D;
+			 T4p = T4l - T4o;
+			 T4u = T4q + T4t;
+			 T4v = KP707106781 * (T4p - T4u);
+			 T5H = KP707106781 * (T4u + T4p);
+			 T4C = T4t - T4q;
+			 T4D = T4l + T4o;
+			 T4E = KP707106781 * (T4C - T4D);
+			 T5K = KP707106781 * (T4C + T4D);
+		    }
+		    T4w = T4k - T4v;
+		    T64 = T5G + T5H;
+		    T65 = T5J + T5K;
+		    T4F = T4B - T4E;
+		    T5i = T4k + T4v;
+		    T5I = T5G - T5H;
+		    T5L = T5J - T5K;
+		    T5j = T4B + T4E;
+	       }
+	       {
+		    E T2y, T6B, T6y, T7j, T2R, T6z, T6E, T7k, T2x, T6x;
+		    T2x = T2n + T2w;
+		    T2y = T2k + T2x;
+		    T6B = T2k - T2x;
+		    T6x = T4y + T4z;
+		    T6y = T6w - T6x;
+		    T7j = T6w + T6x;
+		    {
+			 E T2H, T2Q, T6C, T6D;
+			 T2H = T2B + T2G;
+			 T2Q = T2M + T2P;
+			 T2R = T2H + T2Q;
+			 T6z = T2Q - T2H;
+			 T6C = T4r + T4s;
+			 T6D = T4m + T4n;
+			 T6E = T6C - T6D;
+			 T7k = T6C + T6D;
+		    }
+		    T2S = T2y + T2R;
+		    T7l = T7j - T7k;
+		    T7y = T7j + T7k;
+		    T6A = T6y - T6z;
+		    T6F = T6B - T6E;
+		    T73 = T6B + T6E;
+		    T7i = T2y - T2R;
+		    T72 = T6y + T6z;
+	       }
+	       {
+		    E T4L, T5N, T55, T5O, T4W, T5R, T52, T5Q;
+		    {
+			 E T4H, T4K, T53, T54;
+			 T4H = T31 - T36;
+			 T4K = T4I - T4J;
+			 T4L = T4H - T4K;
+			 T5N = T4H + T4K;
+			 T53 = T4R - T4U;
+			 T54 = T4P + T4O;
+			 T55 = KP707106781 * (T53 - T54);
+			 T5O = KP707106781 * (T54 + T53);
+		    }
+		    {
+			 E T4Q, T4V, T50, T51;
+			 T4Q = T4O - T4P;
+			 T4V = T4R + T4U;
+			 T4W = KP707106781 * (T4Q - T4V);
+			 T5R = KP707106781 * (T4Q + T4V);
+			 T50 = T4Y - T4Z;
+			 T51 = T3a - T3f;
+			 T52 = T50 + T51;
+			 T5Q = T50 - T51;
+		    }
+		    T4X = T4L - T4W;
+		    T67 = T5N + T5O;
+		    T68 = T5Q + T5R;
+		    T56 = T52 - T55;
+		    T5l = T4L + T4W;
+		    T5P = T5N - T5O;
+		    T5S = T5Q - T5R;
+		    T5m = T52 + T55;
+	       }
+	       {
+		    E T3y, T6P, T6K, T7p, T3h, T6H, T6O, T7o, T3x, T6J;
+		    T3x = T3t + T3w;
+		    T3y = T3q + T3x;
+		    T6P = T3x - T3q;
+		    T6J = T4S + T4T;
+		    T6K = T6I - T6J;
+		    T7p = T6I + T6J;
+		    {
+			 E T37, T3g, T6M, T6N;
+			 T37 = T31 + T36;
+			 T3g = T3a + T3f;
+			 T3h = T37 + T3g;
+			 T6H = T37 - T3g;
+			 T6M = T4Y + T4Z;
+			 T6N = T4I + T4J;
+			 T6O = T6M - T6N;
+			 T7o = T6M + T6N;
+		    }
+		    T3z = T3h + T3y;
+		    T7q = T7o - T7p;
+		    T7z = T7o + T7p;
+		    T6L = T6H - T6K;
+		    T6Q = T6O - T6P;
+		    T76 = T6O + T6P;
+		    T7n = T3h - T3y;
+		    T75 = T6H + T6K;
+	       }
+	       {
+		    E T3A, T7A, T2b, T7x, T1j;
+		    T3A = T2S + T3z;
+		    T7A = T7y - T7z;
+		    T1j = TL + T1i;
+		    T2b = T1j + T2a;
+		    T7x = T1j - T2a;
+		    ri[WS(ios, 16)] = T2b - T3A;
+		    ri[WS(ios, 8)] = T7x + T7A;
+		    ri[0] = T2b + T3A;
+		    ri[WS(ios, 24)] = T7x - T7A;
+	       }
+	       {
+		    E T7B, T7L, T7K, T7M, T7J;
+		    T7B = T7y + T7z;
+		    T7L = T3z - T2S;
+		    T7J = T7D + T7I;
+		    T7K = T7C + T7J;
+		    T7M = T7J - T7C;
+		    ii[0] = T7B + T7K;
+		    ii[WS(ios, 24)] = T7M - T7L;
+		    ii[WS(ios, 16)] = T7K - T7B;
+		    ii[WS(ios, 8)] = T7L + T7M;
+	       }
+	       {
+		    E T7h, T7t, T7Q, T7S, T7s, T7R, T7w, T7N, T7d, T7P;
+		    T7d = TL - T1i;
+		    T7h = T7d + T7g;
+		    T7t = T7d - T7g;
+		    T7P = T7I - T7D;
+		    T7Q = T7O + T7P;
+		    T7S = T7P - T7O;
+		    {
+			 E T7m, T7r, T7u, T7v;
+			 T7m = T7i + T7l;
+			 T7r = T7n - T7q;
+			 T7s = KP707106781 * (T7m + T7r);
+			 T7R = KP707106781 * (T7r - T7m);
+			 T7u = T7l - T7i;
+			 T7v = T7n + T7q;
+			 T7w = KP707106781 * (T7u - T7v);
+			 T7N = KP707106781 * (T7u + T7v);
+		    }
+		    ri[WS(ios, 20)] = T7h - T7s;
+		    ii[WS(ios, 20)] = T7Q - T7N;
+		    ri[WS(ios, 4)] = T7h + T7s;
+		    ii[WS(ios, 4)] = T7N + T7Q;
+		    ri[WS(ios, 28)] = T7t - T7w;
+		    ii[WS(ios, 28)] = T7S - T7R;
+		    ri[WS(ios, 12)] = T7t + T7w;
+		    ii[WS(ios, 12)] = T7R + T7S;
+	       }
+	       {
+		    E T71, T79, T7Y, T80, T78, T7Z, T7c, T7T;
+		    {
+			 E T6X, T70, T7U, T7X;
+			 T6X = T6f + T6i;
+			 T70 = KP707106781 * (T6Y + T6Z);
+			 T71 = T6X + T70;
+			 T79 = T6X - T70;
+			 T7U = KP707106781 * (T6o + T6t);
+			 T7X = T7V + T7W;
+			 T7Y = T7U + T7X;
+			 T80 = T7X - T7U;
+		    }
+		    {
+			 E T74, T77, T7a, T7b;
+			 T74 = FMA(KP382683432, T72, KP923879532 * T73);
+			 T77 = FNMS(KP382683432, T76, KP923879532 * T75);
+			 T78 = T74 + T77;
+			 T7Z = T77 - T74;
+			 T7a = FNMS(KP382683432, T73, KP923879532 * T72);
+			 T7b = FMA(KP923879532, T76, KP382683432 * T75);
+			 T7c = T7a - T7b;
+			 T7T = T7a + T7b;
+		    }
+		    ri[WS(ios, 18)] = T71 - T78;
+		    ii[WS(ios, 18)] = T7Y - T7T;
+		    ri[WS(ios, 2)] = T71 + T78;
+		    ii[WS(ios, 2)] = T7T + T7Y;
+		    ri[WS(ios, 26)] = T79 - T7c;
+		    ii[WS(ios, 26)] = T80 - T7Z;
+		    ri[WS(ios, 10)] = T79 + T7c;
+		    ii[WS(ios, 10)] = T7Z + T80;
+	       }
+	       {
+		    E T4f, T59, T8y, T8A, T58, T8z, T5c, T8v;
+		    {
+			 E T3R, T4e, T8w, T8x;
+			 T3R = T3F - T3Q;
+			 T4e = T42 - T4d;
+			 T4f = T3R + T4e;
+			 T59 = T3R - T4e;
+			 T8w = T5f - T5e;
+			 T8x = T8q - T8p;
+			 T8y = T8w + T8x;
+			 T8A = T8x - T8w;
+		    }
+		    {
+			 E T4G, T57, T5a, T5b;
+			 T4G = FMA(KP980785280, T4w, KP195090322 * T4F);
+			 T57 = FNMS(KP980785280, T56, KP195090322 * T4X);
+			 T58 = T4G + T57;
+			 T8z = T57 - T4G;
+			 T5a = FNMS(KP980785280, T4F, KP195090322 * T4w);
+			 T5b = FMA(KP195090322, T56, KP980785280 * T4X);
+			 T5c = T5a - T5b;
+			 T8v = T5a + T5b;
+		    }
+		    ri[WS(ios, 23)] = T4f - T58;
+		    ii[WS(ios, 23)] = T8y - T8v;
+		    ri[WS(ios, 7)] = T4f + T58;
+		    ii[WS(ios, 7)] = T8v + T8y;
+		    ri[WS(ios, 31)] = T59 - T5c;
+		    ii[WS(ios, 31)] = T8A - T8z;
+		    ri[WS(ios, 15)] = T59 + T5c;
+		    ii[WS(ios, 15)] = T8z + T8A;
+	       }
+	       {
+		    E T5F, T5V, T8k, T8m, T5U, T8l, T5Y, T8h;
+		    {
+			 E T5x, T5E, T8i, T8j;
+			 T5x = T5t - T5w;
+			 T5E = T5A - T5D;
+			 T5F = T5x + T5E;
+			 T5V = T5x - T5E;
+			 T8i = T61 - T60;
+			 T8j = T8c - T89;
+			 T8k = T8i + T8j;
+			 T8m = T8j - T8i;
+		    }
+		    {
+			 E T5M, T5T, T5W, T5X;
+			 T5M = FMA(KP555570233, T5I, KP831469612 * T5L);
+			 T5T = FNMS(KP831469612, T5S, KP555570233 * T5P);
+			 T5U = T5M + T5T;
+			 T8l = T5T - T5M;
+			 T5W = FNMS(KP831469612, T5I, KP555570233 * T5L);
+			 T5X = FMA(KP831469612, T5P, KP555570233 * T5S);
+			 T5Y = T5W - T5X;
+			 T8h = T5W + T5X;
+		    }
+		    ri[WS(ios, 21)] = T5F - T5U;
+		    ii[WS(ios, 21)] = T8k - T8h;
+		    ri[WS(ios, 5)] = T5F + T5U;
+		    ii[WS(ios, 5)] = T8h + T8k;
+		    ri[WS(ios, 29)] = T5V - T5Y;
+		    ii[WS(ios, 29)] = T8m - T8l;
+		    ri[WS(ios, 13)] = T5V + T5Y;
+		    ii[WS(ios, 13)] = T8l + T8m;
+	       }
+	       {
+		    E T6v, T6T, T84, T86, T6S, T85, T6W, T81;
+		    {
+			 E T6j, T6u, T82, T83;
+			 T6j = T6f - T6i;
+			 T6u = KP707106781 * (T6o - T6t);
+			 T6v = T6j + T6u;
+			 T6T = T6j - T6u;
+			 T82 = KP707106781 * (T6Z - T6Y);
+			 T83 = T7W - T7V;
+			 T84 = T82 + T83;
+			 T86 = T83 - T82;
+		    }
+		    {
+			 E T6G, T6R, T6U, T6V;
+			 T6G = FMA(KP923879532, T6A, KP382683432 * T6F);
+			 T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L);
+			 T6S = T6G + T6R;
+			 T85 = T6R - T6G;
+			 T6U = FNMS(KP923879532, T6F, KP382683432 * T6A);
+			 T6V = FMA(KP382683432, T6Q, KP923879532 * T6L);
+			 T6W = T6U - T6V;
+			 T81 = T6U + T6V;
+		    }
+		    ri[WS(ios, 22)] = T6v - T6S;
+		    ii[WS(ios, 22)] = T84 - T81;
+		    ri[WS(ios, 6)] = T6v + T6S;
+		    ii[WS(ios, 6)] = T81 + T84;
+		    ri[WS(ios, 30)] = T6T - T6W;
+		    ii[WS(ios, 30)] = T86 - T85;
+		    ri[WS(ios, 14)] = T6T + T6W;
+		    ii[WS(ios, 14)] = T85 + T86;
+	       }
+	       {
+		    E T5h, T5p, T8s, T8u, T5o, T8t, T5s, T8n;
+		    {
+			 E T5d, T5g, T8o, T8r;
+			 T5d = T3F + T3Q;
+			 T5g = T5e + T5f;
+			 T5h = T5d + T5g;
+			 T5p = T5d - T5g;
+			 T8o = T42 + T4d;
+			 T8r = T8p + T8q;
+			 T8s = T8o + T8r;
+			 T8u = T8r - T8o;
+		    }
+		    {
+			 E T5k, T5n, T5q, T5r;
+			 T5k = FMA(KP555570233, T5i, KP831469612 * T5j);
+			 T5n = FNMS(KP555570233, T5m, KP831469612 * T5l);
+			 T5o = T5k + T5n;
+			 T8t = T5n - T5k;
+			 T5q = FNMS(KP555570233, T5j, KP831469612 * T5i);
+			 T5r = FMA(KP831469612, T5m, KP555570233 * T5l);
+			 T5s = T5q - T5r;
+			 T8n = T5q + T5r;
+		    }
+		    ri[WS(ios, 19)] = T5h - T5o;
+		    ii[WS(ios, 19)] = T8s - T8n;
+		    ri[WS(ios, 3)] = T5h + T5o;
+		    ii[WS(ios, 3)] = T8n + T8s;
+		    ri[WS(ios, 27)] = T5p - T5s;
+		    ii[WS(ios, 27)] = T8u - T8t;
+		    ri[WS(ios, 11)] = T5p + T5s;
+		    ii[WS(ios, 11)] = T8t + T8u;
+	       }
+	       {
+		    E T63, T6b, T8e, T8g, T6a, T8f, T6e, T87;
+		    {
+			 E T5Z, T62, T88, T8d;
+			 T5Z = T5t + T5w;
+			 T62 = T60 + T61;
+			 T63 = T5Z + T62;
+			 T6b = T5Z - T62;
+			 T88 = T5A + T5D;
+			 T8d = T89 + T8c;
+			 T8e = T88 + T8d;
+			 T8g = T8d - T88;
+		    }
+		    {
+			 E T66, T69, T6c, T6d;
+			 T66 = FMA(KP980785280, T64, KP195090322 * T65);
+			 T69 = FNMS(KP195090322, T68, KP980785280 * T67);
+			 T6a = T66 + T69;
+			 T8f = T69 - T66;
+			 T6c = FNMS(KP195090322, T64, KP980785280 * T65);
+			 T6d = FMA(KP195090322, T67, KP980785280 * T68);
+			 T6e = T6c - T6d;
+			 T87 = T6c + T6d;
+		    }
+		    ri[WS(ios, 17)] = T63 - T6a;
+		    ii[WS(ios, 17)] = T8e - T87;
+		    ri[WS(ios, 1)] = T63 + T6a;
+		    ii[WS(ios, 1)] = T87 + T8e;
+		    ri[WS(ios, 25)] = T6b - T6e;
+		    ii[WS(ios, 25)] = T8g - T8f;
+		    ri[WS(ios, 9)] = T6b + T6e;
+		    ii[WS(ios, 9)] = T8f + T8g;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 9},
+     {TW_SIN, 0, 9},
+     {TW_COS, 0, 27},
+     {TW_SIN, 0, 27},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 32, "t2_32", twinstr, {376, 168, 112, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t2_32) (planner *p) {
+     X(kdft_dit_register) (p, t2_32, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t2_4.c b/src/fftw3/dft/codelets/standard/t2_4.c
new file mode 100644
index 0000000..ced1a63
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t2_4.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:08 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 4 -name t2_4 -include t.h */
+
+/*
+ * This function contains 24 FP additions, 16 FP multiplications,
+ * (or, 16 additions, 8 multiplications, 8 fused multiply/add),
+ * 21 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t2_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_4.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t2_4(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 4) {
+	  E T1, Tp, Ta, Te, To, Tl, Tk, Th;
+	  T1 = ri[0];
+	  Tp = ii[0];
+	  {
+	       E T7, T9, Tc, Td, Tg, Tf, T2, T4, T3, T5, T6, T8;
+	       T7 = ri[WS(ios, 2)];
+	       T9 = ii[WS(ios, 2)];
+	       Tc = ri[WS(ios, 1)];
+	       Td = ii[WS(ios, 1)];
+	       Tg = ii[WS(ios, 3)];
+	       Tf = ri[WS(ios, 3)];
+	       T2 = W[2];
+	       T4 = W[3];
+	       T3 = W[0];
+	       T5 = W[1];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       T8 = FNMS(T4, T3, T2 * T5);
+	       Ta = FNMS(T8, T9, T6 * T7);
+	       Te = FMA(T3, Tc, T5 * Td);
+	       To = FMA(T8, T7, T6 * T9);
+	       Tl = FNMS(T4, Tf, T2 * Tg);
+	       Tk = FNMS(T5, Tc, T3 * Td);
+	       Th = FMA(T2, Tf, T4 * Tg);
+	  }
+	  {
+	       E Tb, Ti, Tn, Tq;
+	       Tb = T1 + Ta;
+	       Ti = Te + Th;
+	       ri[WS(ios, 2)] = Tb - Ti;
+	       ri[0] = Tb + Ti;
+	       Tn = Tk + Tl;
+	       Tq = To + Tp;
+	       ii[0] = Tn + Tq;
+	       ii[WS(ios, 2)] = Tq - Tn;
+	  }
+	  {
+	       E Tj, Tm, Tr, Ts;
+	       Tj = T1 - Ta;
+	       Tm = Tk - Tl;
+	       ri[WS(ios, 3)] = Tj - Tm;
+	       ri[WS(ios, 1)] = Tj + Tm;
+	       Tr = Tp - To;
+	       Ts = Te - Th;
+	       ii[WS(ios, 1)] = Tr - Ts;
+	       ii[WS(ios, 3)] = Ts + Tr;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 4, "t2_4", twinstr, {16, 8, 8, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t2_4) (planner *p) {
+     X(kdft_dit_register) (p, t2_4, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t2_64.c b/src/fftw3/dft/codelets/standard/t2_64.c
new file mode 100644
index 0000000..6fc7efd
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t2_64.c
@@ -0,0 +1,1906 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:32 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 64 -name t2_64 -include t.h */
+
+/*
+ * This function contains 1154 FP additions, 660 FP multiplications,
+ * (or, 880 additions, 386 multiplications, 274 fused multiply/add),
+ * 382 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t2_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_64.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t2_64(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 10) {
+	  E T1, Ti1, Tp, Tt, TH, TL, T6a, T6c, T4J, T4H, T1g, T91, T7W, T7m, T2O;
+	  E T4j, T7P, T4P, T8y, T2w, T8t, T2Z, T8e, T48, T1z, T7s, T1I, T7t, T8p, Ten;
+	  E T1Y, T7D, T2t, T7O, T7L, Te6, T3N, T8E, T7A, Te0, T4C, TeA, T8S, T9v, T65;
+	  E Tfi, T9J, Taq, T6K, Tf6, Ta2, Ta5, T73, Tfc, Tad, Tag, T3z, T83, T3u, T82;
+	  E T81, T84, T15, T9K, T68, T7j, T43, T9w, T4F, T8G, T5l, TeL, T9k, T9n, T6o;
+	  E Tf2, T9Q, T9R, T6z, Tf3, T9T, T9W, To, Ts, T4o, T8u, T4U, T92, T5a, TeT;
+	  E T8V, T8Y, T5G, TeG, T97, T9e, T27, T7X, T2T, T7E, T7b, Tai, T6T, Ta3, Tf7;
+	  E Ta8, T7Q, T2H, T2c, T76, Tah, T7F, T4d, T8z, TG, TK, T69, T6b, T3b, T87;
+	  E T5u, T9l, TeM, T9q, T88, T89, T3o, T86, T5P, T9f, TeH, T9a, T34, T8f, T1r;
+	  E T7n, T3S, T8F, T4G, T4I;
+	  T1 = ri[0];
+	  Ti1 = ii[0];
+	  Tp = ri[WS(ios, 32)];
+	  Tt = ii[WS(ios, 32)];
+	  TH = ri[WS(ios, 16)];
+	  TL = ii[WS(ios, 16)];
+	  T6a = ri[WS(ios, 47)];
+	  T6c = ii[WS(ios, 47)];
+	  T4J = ii[WS(ios, 49)];
+	  T4H = ri[WS(ios, 49)];
+	  {
+	       E T12, T14, T1b, T1f, T1q, T1m, T1w, T1y, T1D, T1H, T1S, T1M, T1N, T1W, T2M;
+	       E T2g, T2b, T29, T26, T22, T2i, T2E, T2G, T2v, T2u, T2r, T2n, T2N, T3a, T38;
+	       E T3l, T3n, T3r, T3t, T33, T31, T2Y, T2W, T4g, T2S, T2Q, T3w, T3y, T3E, T3G;
+	       E T3P, T3J, T3L, T3R, T4a, T4c, T47, T46, T42, T40, T4i, T6P, T6R, T6M, T6L;
+	       E T6I, T6G, T6W, T6Y, T74, T75, T5I, T78, T7a, T6x, T6v, T6s, T6q, T6h, T6m;
+	       E T6k, T6g, T5N, T5L, T5Z, T63, T66, T67, T5H, T54, T4D, T4A, T4y, T4n, T4l;
+	       E T4E, T4X, T4Z, T4T, T4R, T4O, T4N, T58, T5s, T5q, T5x, T5z, T5C, T5E, T5n;
+	       E T5m, T5j, T5h, T5d, T5e, Ta, Ty, Tf, Tw, T2, Tj, T3, Tc, T1E, T1B;
+	       E T1F, T1A, T1R, T3x, T2m, T3K, T61, T1V, T60, T3I, T51, T52, T2V, T56, T5X;
+	       E T3v, T55, T2X, T2q, T5W, T4w, T6E, Ta0, T8Q, Tac, T72, Tb, Tg, Th, T3e;
+	       E T3f, T3h, T1a, T2x, T2B, TU, TV, TY, T1e, T2y, T2A, TC, TD, T1u, Tk;
+	       E Tl, Tm, T39, T3U, T3W, T37, T3T, T3X, TQ, TR, TZ, T3c, T3d, T3i, Tx;
+	       E Tz, T1t, TN, TX, T2f, T5V, Tao, T2h, T3D, T4f, T4h, T3F, T3q, T3s;
+	       T12 = ri[WS(ios, 48)];
+	       T14 = ii[WS(ios, 48)];
+	       T1b = ri[WS(ios, 8)];
+	       T1f = ii[WS(ios, 8)];
+	       T1q = ii[WS(ios, 40)];
+	       T1m = ri[WS(ios, 40)];
+	       T1w = ri[WS(ios, 56)];
+	       T1y = ii[WS(ios, 56)];
+	       T1D = ri[WS(ios, 24)];
+	       T1H = ii[WS(ios, 24)];
+	       T1S = ri[WS(ios, 36)];
+	       T1M = ri[WS(ios, 4)];
+	       T1N = ii[WS(ios, 4)];
+	       T1W = ii[WS(ios, 36)];
+	       T2M = ri[WS(ios, 2)];
+	       T2g = ri[WS(ios, 60)];
+	       T2b = ii[WS(ios, 52)];
+	       T29 = ri[WS(ios, 52)];
+	       T26 = ii[WS(ios, 20)];
+	       T22 = ri[WS(ios, 20)];
+	       T2i = ii[WS(ios, 60)];
+	       T2E = ri[WS(ios, 44)];
+	       T2G = ii[WS(ios, 44)];
+	       T2v = ii[WS(ios, 12)];
+	       T2u = ri[WS(ios, 12)];
+	       T2r = ii[WS(ios, 28)];
+	       T2n = ri[WS(ios, 28)];
+	       T2N = ii[WS(ios, 2)];
+	       T3a = ii[WS(ios, 10)];
+	       T38 = ri[WS(ios, 10)];
+	       T3l = ri[WS(ios, 42)];
+	       T3n = ii[WS(ios, 42)];
+	       T3r = ri[WS(ios, 58)];
+	       T3t = ii[WS(ios, 58)];
+	       T33 = ii[WS(ios, 50)];
+	       T31 = ri[WS(ios, 50)];
+	       T2Y = ii[WS(ios, 18)];
+	       T2W = ri[WS(ios, 18)];
+	       T4g = ri[WS(ios, 54)];
+	       T2S = ii[WS(ios, 34)];
+	       T2Q = ri[WS(ios, 34)];
+	       T3w = ri[WS(ios, 26)];
+	       T3y = ii[WS(ios, 26)];
+	       T3E = ri[WS(ios, 62)];
+	       T3G = ii[WS(ios, 62)];
+	       T3P = ri[WS(ios, 14)];
+	       T3J = ri[WS(ios, 30)];
+	       T3L = ii[WS(ios, 30)];
+	       T3R = ii[WS(ios, 14)];
+	       T4a = ri[WS(ios, 38)];
+	       T4c = ii[WS(ios, 38)];
+	       T47 = ii[WS(ios, 6)];
+	       T46 = ri[WS(ios, 6)];
+	       T42 = ii[WS(ios, 46)];
+	       T40 = ri[WS(ios, 46)];
+	       T4i = ii[WS(ios, 54)];
+	       T6P = ri[WS(ios, 51)];
+	       T6R = ii[WS(ios, 51)];
+	       T6M = ii[WS(ios, 19)];
+	       T6L = ri[WS(ios, 19)];
+	       T6I = ii[WS(ios, 35)];
+	       T6G = ri[WS(ios, 35)];
+	       T6W = ri[WS(ios, 59)];
+	       T6Y = ii[WS(ios, 59)];
+	       T74 = ri[WS(ios, 11)];
+	       T75 = ii[WS(ios, 11)];
+	       T5I = ii[WS(ios, 13)];
+	       T78 = ri[WS(ios, 43)];
+	       T7a = ii[WS(ios, 43)];
+	       T6x = ii[WS(ios, 23)];
+	       T6v = ri[WS(ios, 23)];
+	       T6s = ii[WS(ios, 55)];
+	       T6q = ri[WS(ios, 55)];
+	       T6h = ii[WS(ios, 7)];
+	       T6m = ii[WS(ios, 39)];
+	       T6k = ri[WS(ios, 39)];
+	       T6g = ri[WS(ios, 7)];
+	       T5N = ii[WS(ios, 45)];
+	       T5L = ri[WS(ios, 45)];
+	       T5Z = ri[WS(ios, 31)];
+	       T63 = ii[WS(ios, 31)];
+	       T66 = ri[WS(ios, 15)];
+	       T67 = ii[WS(ios, 15)];
+	       T5H = ri[WS(ios, 13)];
+	       T54 = ri[WS(ios, 25)];
+	       T4D = ri[WS(ios, 17)];
+	       T4A = ii[WS(ios, 33)];
+	       T4y = ri[WS(ios, 33)];
+	       T4n = ii[WS(ios, 22)];
+	       T4l = ri[WS(ios, 22)];
+	       T4E = ii[WS(ios, 17)];
+	       T4X = ri[WS(ios, 57)];
+	       T4Z = ii[WS(ios, 57)];
+	       T4T = ii[WS(ios, 41)];
+	       T4R = ri[WS(ios, 41)];
+	       T4O = ii[WS(ios, 9)];
+	       T4N = ri[WS(ios, 9)];
+	       T58 = ii[WS(ios, 25)];
+	       T5s = ii[WS(ios, 53)];
+	       T5q = ri[WS(ios, 53)];
+	       T5x = ri[WS(ios, 61)];
+	       T5z = ii[WS(ios, 61)];
+	       T5C = ri[WS(ios, 29)];
+	       T5E = ii[WS(ios, 29)];
+	       T5n = ii[WS(ios, 21)];
+	       T5m = ri[WS(ios, 21)];
+	       T5j = ii[WS(ios, 37)];
+	       T5h = ri[WS(ios, 37)];
+	       T5d = ri[WS(ios, 5)];
+	       T5e = ii[WS(ios, 5)];
+	       {
+		    E T4u, T4v, T5T, T5U, T6C, T6D, T70, T71, T4, T7, T5, T8, TO, TP, T1U;
+		    E T2p, T18, T2k, T2l, T2o, TT, TS, T19, T1c, T1T, T1P, T1Q, T1d;
+		    T4u = ri[WS(ios, 1)];
+		    T4v = ii[WS(ios, 1)];
+		    T5T = ri[WS(ios, 63)];
+		    T5U = ii[WS(ios, 63)];
+		    T6C = ri[WS(ios, 3)];
+		    T6D = ii[WS(ios, 3)];
+		    T70 = ri[WS(ios, 27)];
+		    T71 = ii[WS(ios, 27)];
+		    {
+			 E T6, Te, T9, Td;
+			 T4 = W[2];
+			 T7 = W[3];
+			 T5 = W[0];
+			 T8 = W[1];
+			 T6 = T4 * T5;
+			 Te = T7 * T5;
+			 T9 = T7 * T8;
+			 Td = T4 * T8;
+			 Ta = T6 - T9;
+			 Ty = Td - Te;
+			 Tf = Td + Te;
+			 Tw = T6 + T9;
+			 T2 = W[6];
+			 Tj = W[7];
+			 T3 = W[4];
+			 Tc = W[5];
+			 TO = T3 * T4;
+			 TP = Tc * T7;
+			 T1U = Tj * T3;
+			 T2p = Tj * T5;
+			 T18 = T3 * T5;
+			 T2k = T2 * T5;
+			 T2l = Tj * T8;
+			 T2o = T2 * T8;
+			 TT = Tc * T4;
+			 TS = T3 * T7;
+			 T19 = Tc * T8;
+			 T1c = T3 * T8;
+			 T1E = T2 * T7;
+			 T1T = T2 * Tc;
+			 T1B = Tj * T7;
+			 T1F = Tj * T4;
+			 T1P = T2 * T3;
+			 T1Q = Tj * Tc;
+			 T1A = T2 * T4;
+			 T1d = Tc * T5;
+		    }
+		    T1R = T1P - T1Q;
+		    T3x = T2o - T2p;
+		    T2m = T2k - T2l;
+		    T3K = T1E + T1F;
+		    T61 = Tj * Ta;
+		    T1V = T1T + T1U;
+		    T60 = T2 * Tf;
+		    T3I = T1A - T1B;
+		    T51 = T2 * Tw;
+		    T52 = Tj * Ty;
+		    T2V = T1P + T1Q;
+		    T56 = Tj * Tw;
+		    T5X = Tj * Tf;
+		    T3v = T2k + T2l;
+		    T55 = T2 * Ty;
+		    T2X = T1T - T1U;
+		    T2q = T2o + T2p;
+		    T5W = T2 * Ta;
+		    T4w = FMA(T5, T4u, T8 * T4v);
+		    T6E = FMA(T4, T6C, T7 * T6D);
+		    Ta0 = FNMS(T7, T6C, T4 * T6D);
+		    T8Q = FNMS(T8, T4u, T5 * T4v);
+		    Tac = FNMS(Tj, T70, T2 * T71);
+		    T72 = FMA(T2, T70, Tj * T71);
+		    Tb = T3 * Ta;
+		    Tg = Tc * Tf;
+		    Th = Tb + Tg;
+		    T3e = TS - TT;
+		    T3f = Tj * T3e;
+		    T3h = T2 * T3e;
+		    T1a = T18 + T19;
+		    T2x = T2 * T1a;
+		    T2B = Tj * T1a;
+		    TU = TS + TT;
+		    TV = Tj * TU;
+		    TY = T2 * TU;
+		    T1e = T1c - T1d;
+		    T2y = Tj * T1e;
+		    T2A = T2 * T1e;
+		    TC = T3 * Ty;
+		    TD = Tc * Tw;
+		    T1u = TC + TD;
+		    Tk = T3 * Tf;
+		    Tl = Tc * Ta;
+		    Tm = Tk - Tl;
+		    T39 = T1c + T1d;
+		    T3U = Tj * T39;
+		    T3W = T2 * T39;
+		    T37 = T18 - T19;
+		    T3T = T2 * T37;
+		    T3X = Tj * T37;
+		    TQ = TO - TP;
+		    TR = T2 * TQ;
+		    TZ = Tj * TQ;
+		    T3c = TO + TP;
+		    T3d = T2 * T3c;
+		    T3i = Tj * T3c;
+		    Tx = T3 * Tw;
+		    Tz = Tc * Ty;
+		    T1t = Tx - Tz;
+		    TN = W[8];
+		    TX = W[9];
+		    T2f = FMA(TN, T4, TX * T7);
+		    T5V = FMA(TN, T5T, TX * T5U);
+		    Tao = FNMS(TX, T5T, TN * T5U);
+		    T2h = FNMS(TX, T4, TN * T7);
+		    T3D = FMA(TN, T5, TX * T8);
+		    T4f = FMA(TN, T3, TX * Tc);
+		    T4h = FNMS(TX, T3, TN * Tc);
+		    T3F = FNMS(TX, T5, TN * T8);
+	       }
+	       T1g = FNMS(T1e, T1f, T1a * T1b);
+	       T91 = FNMS(Tc, T4N, T3 * T4O);
+	       T7W = FMA(Ty, T2M, Tw * T2N);
+	       T7m = FMA(T1e, T1b, T1a * T1f);
+	       T2O = FNMS(Ty, T2N, Tw * T2M);
+	       T4j = FNMS(T4h, T4i, T4f * T4g);
+	       T7P = FNMS(TU, T2u, TQ * T2v);
+	       T4P = FMA(T3, T4N, Tc * T4O);
+	       T8y = FMA(T3e, T46, T3c * T47);
+	       T2w = FMA(TQ, T2u, TU * T2v);
+	       {
+		    E T1v, T1x, T1O, T1X;
+		    T8t = FMA(T4h, T4g, T4f * T4i);
+		    T2Z = FNMS(T2X, T2Y, T2V * T2W);
+		    T8e = FMA(T2X, T2W, T2V * T2Y);
+		    T48 = FNMS(T3e, T47, T3c * T46);
+		    T1v = FMA(TN, T1t, TX * T1u);
+		    T1x = FNMS(TX, T1t, TN * T1u);
+		    T1z = FNMS(T1x, T1y, T1v * T1w);
+		    T7s = FMA(T1x, T1w, T1v * T1y);
+		    {
+			 E T1C, T1G, T8n, T8o;
+			 T1C = T1A + T1B;
+			 T1G = T1E - T1F;
+			 T1I = FNMS(T1G, T1H, T1C * T1D);
+			 T7t = FMA(T1G, T1D, T1C * T1H);
+			 T8n = FMA(T3F, T3E, T3D * T3G);
+			 T8o = FNMS(T3K, T3J, T3I * T3L);
+			 T8p = T8n - T8o;
+			 Ten = T8n + T8o;
+		    }
+		    T1O = FMA(Ta, T1M, Tf * T1N);
+		    T1X = FMA(T1R, T1S, T1V * T1W);
+		    T1Y = T1O + T1X;
+		    T7D = T1O - T1X;
+		    {
+			 E T2j, T2s, T7J, T7K;
+			 T2j = FNMS(T2h, T2i, T2f * T2g);
+			 T2s = FMA(T2m, T2n, T2q * T2r);
+			 T2t = T2j + T2s;
+			 T7O = T2j - T2s;
+			 T7J = FMA(T2h, T2g, T2f * T2i);
+			 T7K = FNMS(T2q, T2n, T2m * T2r);
+			 T7L = T7J - T7K;
+			 Te6 = T7J + T7K;
+		    }
+	       }
+	       {
+		    E T3H, T3M, T7y, T7z;
+		    T3H = FNMS(T3F, T3G, T3D * T3E);
+		    T3M = FMA(T3I, T3J, T3K * T3L);
+		    T3N = T3H + T3M;
+		    T8E = T3H - T3M;
+		    T7y = FNMS(Tf, T1M, Ta * T1N);
+		    T7z = FNMS(T1V, T1S, T1R * T1W);
+		    T7A = T7y - T7z;
+		    Te0 = T7y + T7z;
+	       }
+	       {
+		    E T4B, T8R, T4x, T4z;
+		    T4x = T3d + T3f;
+		    T4z = T3h - T3i;
+		    T4B = FNMS(T4z, T4A, T4x * T4y);
+		    T8R = FMA(T4z, T4y, T4x * T4A);
+		    T4C = T4w + T4B;
+		    TeA = T8Q + T8R;
+		    T8S = T8Q - T8R;
+		    T9v = T4w - T4B;
+	       }
+	       {
+		    E T64, Tap, T5Y, T62;
+		    T5Y = T5W - T5X;
+		    T62 = T60 + T61;
+		    T64 = FMA(T5Y, T5Z, T62 * T63);
+		    Tap = FNMS(T62, T5Z, T5Y * T63);
+		    T65 = T5V + T64;
+		    Tfi = Tao + Tap;
+		    T9J = T5V - T64;
+		    Taq = Tao - Tap;
+	       }
+	       {
+		    E T6J, Ta1, T6F, T6H;
+		    T6F = T2x + T2y;
+		    T6H = T2A - T2B;
+		    T6J = FNMS(T6H, T6I, T6F * T6G);
+		    Ta1 = FMA(T6H, T6G, T6F * T6I);
+		    T6K = T6E + T6J;
+		    Tf6 = Ta0 + Ta1;
+		    Ta2 = Ta0 - Ta1;
+		    Ta5 = T6E - T6J;
+	       }
+	       {
+		    E T6Z, Tab, T6V, T6X;
+		    T6V = FMA(TN, Ta, TX * Tf);
+		    T6X = FNMS(TX, Ta, TN * Tf);
+		    T6Z = FNMS(T6X, T6Y, T6V * T6W);
+		    Tab = FMA(T6X, T6W, T6V * T6Y);
+		    T73 = T6Z + T72;
+		    Tfc = Tab + Tac;
+		    Tad = Tab - Tac;
+		    Tag = T6Z - T72;
+	       }
+	       T3z = FNMS(T3x, T3y, T3v * T3w);
+	       T83 = FMA(T3x, T3w, T3v * T3y);
+	       T3q = FNMS(TX, Tm, TN * Th);
+	       T3s = FMA(TN, Tm, TX * Th);
+	       T3u = FMA(T3q, T3r, T3s * T3t);
+	       T82 = FNMS(T3s, T3r, T3q * T3t);
+	       T81 = T3u - T3z;
+	       T84 = T82 - T83;
+	       {
+		    E TW, T10, T11, T13;
+		    TW = TR + TV;
+		    T10 = TY - TZ;
+		    T11 = FNMS(TX, T10, TN * TW);
+		    T13 = FMA(TN, T10, TX * TW);
+		    T15 = FMA(T11, T12, T13 * T14);
+		    T9K = FMA(T10, T66, TW * T67);
+		    T68 = FNMS(T10, T67, TW * T66);
+		    T7j = FNMS(T13, T12, T11 * T14);
+	       }
+	       {
+		    E T3V, T3Y, T3Z, T41;
+		    T3V = T3T + T3U;
+		    T3Y = T3W - T3X;
+		    T3Z = FNMS(TX, T3Y, TN * T3V);
+		    T41 = FMA(TN, T3Y, TX * T3V);
+		    T43 = FMA(T3Z, T40, T41 * T42);
+		    T9w = FMA(T3Y, T4D, T3V * T4E);
+		    T4F = FNMS(T3Y, T4E, T3V * T4D);
+		    T8G = FNMS(T41, T40, T3Z * T42);
+	       }
+	       {
+		    E T5f, T9i, T5k, T9j, T5g, T5i;
+		    T5f = FNMS(Tm, T5e, Th * T5d);
+		    T9i = FMA(Tm, T5d, Th * T5e);
+		    T5g = T3T - T3U;
+		    T5i = T3W + T3X;
+		    T5k = FMA(T5g, T5h, T5i * T5j);
+		    T9j = FNMS(T5i, T5h, T5g * T5j);
+		    T5l = T5f + T5k;
+		    TeL = T9i + T9j;
+		    T9k = T9i - T9j;
+		    T9n = T5f - T5k;
+	       }
+	       {
+		    E T6i, T9O, T6n, T9P, T6j, T6l;
+		    T6i = FMA(T1t, T6g, T1u * T6h);
+		    T9O = FNMS(T1u, T6g, T1t * T6h);
+		    T6j = TR - TV;
+		    T6l = TY + TZ;
+		    T6n = FMA(T6j, T6k, T6l * T6m);
+		    T9P = FNMS(T6l, T6k, T6j * T6m);
+		    T6o = T6i + T6n;
+		    Tf2 = T9O + T9P;
+		    T9Q = T9O - T9P;
+		    T9R = T6i - T6n;
+	       }
+	       {
+		    E T6t, T9U, T6y, T9V;
+		    {
+			 E T6p, T6r, T6u, T6w;
+			 T6p = FNMS(TX, T1e, TN * T1a);
+			 T6r = FMA(TN, T1e, TX * T1a);
+			 T6t = FMA(T6p, T6q, T6r * T6s);
+			 T9U = FNMS(T6r, T6q, T6p * T6s);
+			 T6u = T5W + T5X;
+			 T6w = T60 - T61;
+			 T6y = FNMS(T6w, T6x, T6u * T6v);
+			 T9V = FMA(T6w, T6v, T6u * T6x);
+		    }
+		    T6z = T6t + T6y;
+		    Tf3 = T9U + T9V;
+		    T9T = T6t - T6y;
+		    T9W = T9U - T9V;
+	       }
+	       {
+		    E Ti, Tn, T4k, Tq, Tr, T4m, T4Q, T4S;
+		    Ti = T2 * Th;
+		    Tn = Tj * Tm;
+		    T4k = Ti - Tn;
+		    Tq = T2 * Tm;
+		    Tr = Tj * Th;
+		    T4m = Tq + Tr;
+		    To = Ti + Tn;
+		    Ts = Tq - Tr;
+		    T4o = FMA(T4k, T4l, T4m * T4n);
+		    T8u = FNMS(T4m, T4l, T4k * T4n);
+		    T4Q = FMA(TN, T4k, TX * T4m);
+		    T4S = FNMS(TX, T4k, TN * T4m);
+		    T4U = FNMS(T4S, T4T, T4Q * T4R);
+		    T92 = FMA(T4S, T4R, T4Q * T4T);
+	       }
+	       {
+		    E T50, T8W, T59, T8X;
+		    {
+			 E T4W, T4Y, T53, T57;
+			 T4W = FNMS(TX, T3e, TN * T3c);
+			 T4Y = FMA(TN, T3e, TX * T3c);
+			 T50 = FMA(T4W, T4X, T4Y * T4Z);
+			 T8W = FNMS(T4Y, T4X, T4W * T4Z);
+			 T53 = T51 - T52;
+			 T57 = T55 + T56;
+			 T59 = FMA(T53, T54, T57 * T58);
+			 T8X = FNMS(T57, T54, T53 * T58);
+		    }
+		    T5a = T50 + T59;
+		    TeT = T8W + T8X;
+		    T8V = T50 - T59;
+		    T8Y = T8W - T8X;
+	       }
+	       {
+		    E T5A, T9c, T5F, T9d;
+		    {
+			 E T5w, T5y, T5B, T5D;
+			 T5w = FNMS(TX, Ty, TN * Tw);
+			 T5y = FMA(TN, Ty, TX * Tw);
+			 T5A = FMA(T5w, T5x, T5y * T5z);
+			 T9c = FNMS(T5y, T5x, T5w * T5z);
+			 T5B = T51 + T52;
+			 T5D = T55 - T56;
+			 T5F = FNMS(T5D, T5E, T5B * T5C);
+			 T9d = FMA(T5D, T5C, T5B * T5E);
+		    }
+		    T5G = T5A + T5F;
+		    TeG = T9c + T9d;
+		    T97 = T5A - T5F;
+		    T9e = T9c - T9d;
+	       }
+	       {
+		    E T21, T2P, T25, T2R, T77, T79;
+		    {
+			 E T1Z, T20, T23, T24;
+			 T1Z = T2 * T1t;
+			 T20 = Tj * T1u;
+			 T21 = T1Z + T20;
+			 T2P = T1Z - T20;
+			 T23 = T2 * T1u;
+			 T24 = Tj * T1t;
+			 T25 = T23 - T24;
+			 T2R = T23 + T24;
+		    }
+		    T27 = FNMS(T25, T26, T21 * T22);
+		    T7X = FNMS(T2R, T2Q, T2P * T2S);
+		    T2T = FMA(T2P, T2Q, T2R * T2S);
+		    T7E = FMA(T25, T22, T21 * T26);
+		    T77 = FNMS(TX, T25, TN * T21);
+		    T79 = FMA(TN, T25, TX * T21);
+		    T7b = FMA(T77, T78, T79 * T7a);
+		    Tai = FNMS(T79, T78, T77 * T7a);
+	       }
+	       {
+		    E T6S, Ta7, T2D, Ta6, T2F, T6N;
+		    {
+			 E T6O, T6Q, T2z, T2C;
+			 T6O = FMA(TN, TQ, TX * TU);
+			 T6Q = FNMS(TX, TQ, TN * TU);
+			 T6S = FNMS(T6Q, T6R, T6O * T6P);
+			 Ta7 = FMA(T6Q, T6P, T6O * T6R);
+			 T2z = T2x - T2y;
+			 T2C = T2A + T2B;
+			 T2D = FMA(TN, T2z, TX * T2C);
+			 Ta6 = FNMS(T2C, T6L, T2z * T6M);
+			 T2F = FNMS(TX, T2z, TN * T2C);
+			 T6N = FMA(T2z, T6L, T2C * T6M);
+		    }
+		    T6T = T6N + T6S;
+		    Ta3 = T6N - T6S;
+		    Tf7 = Ta6 + Ta7;
+		    Ta8 = Ta6 - Ta7;
+		    T7Q = FMA(T2F, T2E, T2D * T2G);
+		    T2H = FNMS(T2F, T2G, T2D * T2E);
+	       }
+	       {
+		    E TA, TE, TB, TF, TJ, TI, T2a, T28, T49, T4b;
+		    TA = Tx + Tz;
+		    TE = TC - TD;
+		    TB = T2 * TA;
+		    TF = Tj * TE;
+		    TJ = Tj * TA;
+		    TI = T2 * TE;
+		    T2a = FMA(TN, TE, TX * TA);
+		    T28 = FNMS(TX, TE, TN * TA);
+		    T2c = FMA(T28, T29, T2a * T2b);
+		    T76 = FNMS(TE, T75, TA * T74);
+		    Tah = FMA(TE, T74, TA * T75);
+		    T7F = FNMS(T2a, T29, T28 * T2b);
+		    T49 = TB + TF;
+		    T4b = TI - TJ;
+		    T4d = FNMS(T4b, T4c, T49 * T4a);
+		    T8z = FMA(T4b, T4a, T49 * T4c);
+		    TG = TB - TF;
+		    TK = TI + TJ;
+		    T69 = FMA(TN, TG, TX * TK);
+		    T6b = FNMS(TX, TG, TN * TK);
+	       }
+	       {
+		    E T5t, T9p, T3k, T9o, T3m, T5o;
+		    T3b = FMA(T37, T38, T39 * T3a);
+		    T87 = FNMS(T39, T38, T37 * T3a);
+		    {
+			 E T5p, T5r, T3g, T3j;
+			 T5p = FMA(TN, T37, TX * T39);
+			 T5r = FNMS(TX, T37, TN * T39);
+			 T5t = FNMS(T5r, T5s, T5p * T5q);
+			 T9p = FMA(T5r, T5q, T5p * T5s);
+			 T3g = T3d - T3f;
+			 T3j = T3h + T3i;
+			 T3k = FMA(TN, T3g, TX * T3j);
+			 T9o = FNMS(T3j, T5m, T3g * T5n);
+			 T3m = FNMS(TX, T3g, TN * T3j);
+			 T5o = FMA(T3g, T5m, T3j * T5n);
+		    }
+		    T5u = T5o + T5t;
+		    T9l = T5o - T5t;
+		    TeM = T9o + T9p;
+		    T9q = T9o - T9p;
+		    T88 = FMA(T3m, T3l, T3k * T3n);
+		    T89 = T87 - T88;
+		    T3o = FNMS(T3m, T3n, T3k * T3l);
+		    T86 = T3b - T3o;
+	       }
+	       {
+		    E T5O, T99, T1i, T1n, T1o, T1k, T30, T5J, T98, T32;
+		    {
+			 E T5K, T5M, T1h, T1j;
+			 T5K = FNMS(TX, T2X, TN * T2V);
+			 T5M = FMA(TN, T2X, TX * T2V);
+			 T5O = FMA(T5K, T5L, T5M * T5N);
+			 T99 = FNMS(T5M, T5L, T5K * T5N);
+			 T1h = Tb - Tg;
+			 T1j = Tk + Tl;
+			 T1i = T2 * T1h;
+			 T1n = T2 * T1j;
+			 T1o = Tj * T1h;
+			 T1k = Tj * T1j;
+			 T30 = FMA(TN, T1h, TX * T1j);
+			 T5J = FMA(T1h, T5H, T1j * T5I);
+			 T98 = FNMS(T1j, T5H, T1h * T5I);
+			 T32 = FNMS(TX, T1h, TN * T1j);
+		    }
+		    T5P = T5J + T5O;
+		    T9f = T5J - T5O;
+		    TeH = T98 + T99;
+		    T9a = T98 - T99;
+		    T34 = FNMS(T32, T33, T30 * T31);
+		    T8f = FMA(T32, T31, T30 * T33);
+		    {
+			 E T1l, T1p, T3O, T3Q;
+			 T1l = T1i - T1k;
+			 T1p = T1n + T1o;
+			 T1r = FMA(T1l, T1m, T1p * T1q);
+			 T7n = FNMS(T1p, T1m, T1l * T1q);
+			 T3O = T1i + T1k;
+			 T3Q = T1n - T1o;
+			 T3S = FNMS(T3Q, T3R, T3O * T3P);
+			 T8F = FMA(T3Q, T3P, T3O * T3R);
+			 T4G = FNMS(TX, T3Q, TN * T3O);
+			 T4I = FMA(TN, T3Q, TX * T3O);
+		    }
+	       }
+	  }
+	  {
+	       E T5R, TgT, TgY, ThE, T9t, Tbe, T9G, Tbb, Tcl, Tdq, Tcs, Tdn, TeP, Tg4, TeY;
+	       E Tg1, T7e, Th4, ThJ, Th9, Tfp, Tg8, Tfg, Tgb, T2K, TgC, Tih, ThX, TfQ, TiL;
+	       E Tea, Tiv, Tam, Tbl, TcL, Tdu, Taz, Tbi, TcE, Tdx, T7U, Tjv, Tdc, Tjh, Tb0;
+	       E TjL, TbU, TiZ, T8D, Tb5, Tc8, Tdi, T8M, Tb6, Tc5, Tdh, T4r, Thz, Tex, Tfz;
+	       E TfX, Tgl, TgN, Thj, T8m, TaI, Tdg, TdG, Tb4, Tbu, Tc2, TcU, T3C, Thy, Tem;
+	       E Tfy, TfU, Tgk, TgI, Thi, T6B, Th1, Tfm, Tga, Th8, ThI, T9Z, Tbh, Taw, Tbk;
+	       E TcI, Tdw, Tf5, Tg7, Tcx, Tdt, T5c, TgV, TeV, Tg0, TgS, ThD, TeE, Tg3, T96;
+	       E Tbd, Tce, Tdp, Tcp, Tdm, T9D, Tba, T1L, Tgz, Ti4, Tii, Tiy, TiM, TdZ, TfN;
+	       E T7x, TaX, Tj4, Tji, Tjy, TjM, TbN, Td9;
+	       {
+		    E T5v, T5Q, TgW, TgX;
+		    T5v = T5l + T5u;
+		    T5Q = T5G + T5P;
+		    T5R = T5v + T5Q;
+		    TgT = T5Q - T5v;
+		    TgW = TeL + TeM;
+		    TgX = TeG + TeH;
+		    TgY = TgW - TgX;
+		    ThE = TgW + TgX;
+	       }
+	       {
+		    E T9h, T9F, T9s, T9E;
+		    {
+			 E T9b, T9g, T9m, T9r;
+			 T9b = T97 - T9a;
+			 T9g = T9e + T9f;
+			 T9h = FNMS(KP923879532, T9g, KP382683432 * T9b);
+			 T9F = FMA(KP382683432, T9g, KP923879532 * T9b);
+			 T9m = T9k + T9l;
+			 T9r = T9n - T9q;
+			 T9s = FMA(KP923879532, T9m, KP382683432 * T9r);
+			 T9E = FNMS(KP923879532, T9r, KP382683432 * T9m);
+		    }
+		    T9t = T9h - T9s;
+		    Tbe = T9E + T9F;
+		    T9G = T9E - T9F;
+		    Tbb = T9s + T9h;
+	       }
+	       {
+		    E Tch, Tcr, Tck, Tcq;
+		    {
+			 E Tcf, Tcg, Tci, Tcj;
+			 Tcf = T97 + T9a;
+			 Tcg = T9e - T9f;
+			 Tch = FNMS(KP382683432, Tcg, KP923879532 * Tcf);
+			 Tcr = FMA(KP923879532, Tcg, KP382683432 * Tcf);
+			 Tci = T9k - T9l;
+			 Tcj = T9n + T9q;
+			 Tck = FMA(KP382683432, Tci, KP923879532 * Tcj);
+			 Tcq = FNMS(KP382683432, Tcj, KP923879532 * Tci);
+		    }
+		    Tcl = Tch - Tck;
+		    Tdq = Tcq + Tcr;
+		    Tcs = Tcq - Tcr;
+		    Tdn = Tck + Tch;
+	       }
+	       {
+		    E TeJ, TeX, TeO, TeW;
+		    {
+			 E TeF, TeI, TeK, TeN;
+			 TeF = T5G - T5P;
+			 TeI = TeG - TeH;
+			 TeJ = TeF - TeI;
+			 TeX = TeF + TeI;
+			 TeK = T5l - T5u;
+			 TeN = TeL - TeM;
+			 TeO = TeK + TeN;
+			 TeW = TeN - TeK;
+		    }
+		    TeP = KP707106781 * (TeJ - TeO);
+		    Tg4 = KP707106781 * (TeW + TeX);
+		    TeY = KP707106781 * (TeW - TeX);
+		    Tg1 = KP707106781 * (TeO + TeJ);
+	       }
+	       {
+		    E T6U, Th2, T7d, Tfb, Tfe, Th3, Tfa, Tfo, Tfn, Tff;
+		    T6U = T6K + T6T;
+		    Th2 = Tf6 + Tf7;
+		    {
+			 E T7c, Tfd, Tf8, Tf9;
+			 T7c = T76 + T7b;
+			 T7d = T73 + T7c;
+			 Tfb = T73 - T7c;
+			 Tfd = Tah + Tai;
+			 Tfe = Tfc - Tfd;
+			 Th3 = Tfc + Tfd;
+			 Tf8 = Tf6 - Tf7;
+			 Tf9 = T6K - T6T;
+			 Tfa = Tf8 - Tf9;
+			 Tfo = Tf9 + Tf8;
+		    }
+		    T7e = T6U + T7d;
+		    Th4 = Th2 - Th3;
+		    ThJ = Th2 + Th3;
+		    Th9 = T7d - T6U;
+		    Tfn = Tfb - Tfe;
+		    Tfp = KP707106781 * (Tfn - Tfo);
+		    Tg8 = KP707106781 * (Tfo + Tfn);
+		    Tff = Tfb + Tfe;
+		    Tfg = KP707106781 * (Tfa - Tff);
+		    Tgb = KP707106781 * (Tfa + Tff);
+	       }
+	       {
+		    E T2e, Te3, Te8, TgB, T2J, Te5, Te2, TgA;
+		    {
+			 E T2d, Te7, T2I, Te1;
+			 T2d = T27 + T2c;
+			 T2e = T1Y + T2d;
+			 Te3 = T1Y - T2d;
+			 Te7 = T7P + T7Q;
+			 Te8 = Te6 - Te7;
+			 TgB = Te6 + Te7;
+			 T2I = T2w + T2H;
+			 T2J = T2t + T2I;
+			 Te5 = T2t - T2I;
+			 Te1 = T7E + T7F;
+			 Te2 = Te0 - Te1;
+			 TgA = Te0 + Te1;
+		    }
+		    T2K = T2e + T2J;
+		    TgC = TgA - TgB;
+		    Tih = T2J - T2e;
+		    ThX = TgA + TgB;
+		    {
+			 E TfO, TfP, Te4, Te9;
+			 TfO = Te3 + Te2;
+			 TfP = Te5 - Te8;
+			 TfQ = KP707106781 * (TfO + TfP);
+			 TiL = KP707106781 * (TfP - TfO);
+			 Te4 = Te2 - Te3;
+			 Te9 = Te5 + Te8;
+			 Tea = KP707106781 * (Te4 - Te9);
+			 Tiv = KP707106781 * (Te4 + Te9);
+		    }
+	       }
+	       {
+		    E Taf, TcB, Tak, TcC, Taa, Tay, TcA, TcK, Tae, Taj;
+		    Tae = T76 - T7b;
+		    Taf = Tad + Tae;
+		    TcB = Tad - Tae;
+		    Taj = Tah - Tai;
+		    Tak = Tag - Taj;
+		    TcC = Tag + Taj;
+		    {
+			 E Ta4, Ta9, Tcy, Tcz;
+			 Ta4 = Ta2 + Ta3;
+			 Ta9 = Ta5 - Ta8;
+			 Taa = FNMS(KP923879532, Ta9, KP382683432 * Ta4);
+			 Tay = FMA(KP923879532, Ta4, KP382683432 * Ta9);
+			 Tcy = Ta2 - Ta3;
+			 Tcz = Ta5 + Ta8;
+			 TcA = FNMS(KP382683432, Tcz, KP923879532 * Tcy);
+			 TcK = FMA(KP382683432, Tcy, KP923879532 * Tcz);
+		    }
+		    {
+			 E Tal, TcJ, Tax, TcD;
+			 Tal = FMA(KP382683432, Taf, KP923879532 * Tak);
+			 Tam = Taa - Tal;
+			 Tbl = Taa + Tal;
+			 TcJ = FNMS(KP382683432, TcB, KP923879532 * TcC);
+			 TcL = TcJ - TcK;
+			 Tdu = TcK + TcJ;
+			 Tax = FNMS(KP923879532, Taf, KP382683432 * Tak);
+			 Taz = Tax - Tay;
+			 Tbi = Tay + Tax;
+			 TcD = FMA(KP923879532, TcB, KP382683432 * TcC);
+			 TcE = TcA - TcD;
+			 Tdx = TcA + TcD;
+		    }
+	       }
+	       {
+		    E T7C, TbO, T7S, TbS, T7H, TbP, T7N, TbR;
+		    {
+			 E T7B, T7R, T7G, T7M;
+			 T7B = T27 - T2c;
+			 T7C = T7A + T7B;
+			 TbO = T7A - T7B;
+			 T7R = T7P - T7Q;
+			 T7S = T7O - T7R;
+			 TbS = T7O + T7R;
+			 T7G = T7E - T7F;
+			 T7H = T7D - T7G;
+			 TbP = T7D + T7G;
+			 T7M = T2w - T2H;
+			 T7N = T7L + T7M;
+			 TbR = T7L - T7M;
+		    }
+		    {
+			 E T7I, T7T, Tda, Tdb;
+			 T7I = FNMS(KP923879532, T7H, KP382683432 * T7C);
+			 T7T = FMA(KP382683432, T7N, KP923879532 * T7S);
+			 T7U = T7I - T7T;
+			 Tjv = T7I + T7T;
+			 Tda = FMA(KP382683432, TbO, KP923879532 * TbP);
+			 Tdb = FNMS(KP382683432, TbR, KP923879532 * TbS);
+			 Tdc = Tda + Tdb;
+			 Tjh = Tdb - Tda;
+		    }
+		    {
+			 E TaY, TaZ, TbQ, TbT;
+			 TaY = FMA(KP923879532, T7C, KP382683432 * T7H);
+			 TaZ = FNMS(KP923879532, T7N, KP382683432 * T7S);
+			 Tb0 = TaY + TaZ;
+			 TjL = TaZ - TaY;
+			 TbQ = FNMS(KP382683432, TbP, KP923879532 * TbO);
+			 TbT = FMA(KP923879532, TbR, KP382683432 * TbS);
+			 TbU = TbQ - TbT;
+			 TiZ = TbQ + TbT;
+		    }
+	       }
+	       {
+		    E T8r, Tc6, T8I, Tc3, T8w, T8K, T8B, T8J, T8q, T8H;
+		    T8q = T3S - T43;
+		    T8r = T8p + T8q;
+		    Tc6 = T8p - T8q;
+		    T8H = T8F - T8G;
+		    T8I = T8E - T8H;
+		    Tc3 = T8E + T8H;
+		    {
+			 E T8s, T8v, T8x, T8A;
+			 T8s = T4j - T4o;
+			 T8v = T8t - T8u;
+			 T8w = T8s - T8v;
+			 T8K = T8s + T8v;
+			 T8x = T48 - T4d;
+			 T8A = T8y - T8z;
+			 T8B = T8x + T8A;
+			 T8J = T8A - T8x;
+		    }
+		    {
+			 E T8C, Tc7, T8L, Tc4;
+			 T8C = KP707106781 * (T8w - T8B);
+			 T8D = T8r - T8C;
+			 Tb5 = T8r + T8C;
+			 Tc7 = KP707106781 * (T8J + T8K);
+			 Tc8 = Tc6 - Tc7;
+			 Tdi = Tc6 + Tc7;
+			 T8L = KP707106781 * (T8J - T8K);
+			 T8M = T8I - T8L;
+			 Tb6 = T8I + T8L;
+			 Tc4 = KP707106781 * (T8B + T8w);
+			 Tc5 = Tc3 - Tc4;
+			 Tdh = Tc3 + Tc4;
+		    }
+	       }
+	       {
+		    E T45, Tes, Tep, TgK, T4q, Teq, Tev, TgL, T44, Teo, Ter, Tew;
+		    T44 = T3S + T43;
+		    T45 = T3N + T44;
+		    Tes = T3N - T44;
+		    Teo = T8F + T8G;
+		    Tep = Ten - Teo;
+		    TgK = Ten + Teo;
+		    {
+			 E T4e, T4p, Tet, Teu;
+			 T4e = T48 + T4d;
+			 T4p = T4j + T4o;
+			 T4q = T4e + T4p;
+			 Teq = T4p - T4e;
+			 Tet = T8y + T8z;
+			 Teu = T8t + T8u;
+			 Tev = Tet - Teu;
+			 TgL = Tet + Teu;
+		    }
+		    T4r = T45 + T4q;
+		    Thz = TgK + TgL;
+		    Ter = Tep - Teq;
+		    Tew = Tes - Tev;
+		    Tex = FMA(KP382683432, Ter, KP923879532 * Tew);
+		    Tfz = FNMS(KP923879532, Ter, KP382683432 * Tew);
+		    {
+			 E TfV, TfW, TgJ, TgM;
+			 TfV = Tep + Teq;
+			 TfW = Tes + Tev;
+			 TfX = FMA(KP923879532, TfV, KP382683432 * TfW);
+			 Tgl = FNMS(KP382683432, TfV, KP923879532 * TfW);
+			 TgJ = T45 - T4q;
+			 TgM = TgK - TgL;
+			 TgN = TgJ + TgM;
+			 Thj = TgJ - TgM;
+		    }
+	       }
+	       {
+		    E T80, TbW, T8k, TbX, T8b, Tc0, T8h, TbZ;
+		    {
+			 E T7Y, T7Z, T8i, T8j;
+			 T7Y = T7W - T7X;
+			 T7Z = T2Z - T34;
+			 T80 = T7Y + T7Z;
+			 TbW = T7Y - T7Z;
+			 T8i = T89 - T86;
+			 T8j = T81 + T84;
+			 T8k = KP707106781 * (T8i - T8j);
+			 TbX = KP707106781 * (T8i + T8j);
+		    }
+		    {
+			 E T85, T8a, T8d, T8g;
+			 T85 = T81 - T84;
+			 T8a = T86 + T89;
+			 T8b = KP707106781 * (T85 - T8a);
+			 Tc0 = KP707106781 * (T8a + T85);
+			 T8d = T2O - T2T;
+			 T8g = T8e - T8f;
+			 T8h = T8d - T8g;
+			 TbZ = T8d + T8g;
+		    }
+		    {
+			 E T8c, T8l, Tde, Tdf;
+			 T8c = T80 - T8b;
+			 T8l = T8h - T8k;
+			 T8m = FNMS(KP980785280, T8l, KP195090322 * T8c);
+			 TaI = FMA(KP980785280, T8c, KP195090322 * T8l);
+			 Tde = TbW + TbX;
+			 Tdf = TbZ + Tc0;
+			 Tdg = FNMS(KP195090322, Tdf, KP980785280 * Tde);
+			 TdG = FMA(KP980785280, Tdf, KP195090322 * Tde);
+		    }
+		    {
+			 E Tb2, Tb3, TbY, Tc1;
+			 Tb2 = T80 + T8b;
+			 Tb3 = T8h + T8k;
+			 Tb4 = FNMS(KP555570233, Tb3, KP831469612 * Tb2);
+			 Tbu = FMA(KP555570233, Tb2, KP831469612 * Tb3);
+			 TbY = TbW - TbX;
+			 Tc1 = TbZ - Tc0;
+			 Tc2 = FNMS(KP831469612, Tc1, KP555570233 * TbY);
+			 TcU = FMA(KP555570233, Tc1, KP831469612 * TbY);
+		    }
+	       }
+	       {
+		    E T36, Teh, Tek, TgF, T3B, Tef, Tee, TgE, Teg, Tel;
+		    {
+			 E T2U, T35, Tei, Tej;
+			 T2U = T2O + T2T;
+			 T35 = T2Z + T34;
+			 T36 = T2U + T35;
+			 Teh = T2U - T35;
+			 Tei = T87 + T88;
+			 Tej = T82 + T83;
+			 Tek = Tei - Tej;
+			 TgF = Tei + Tej;
+		    }
+		    {
+			 E T3p, T3A, Tec, Ted;
+			 T3p = T3b + T3o;
+			 T3A = T3u + T3z;
+			 T3B = T3p + T3A;
+			 Tef = T3A - T3p;
+			 Tec = T7W + T7X;
+			 Ted = T8e + T8f;
+			 Tee = Tec - Ted;
+			 TgE = Tec + Ted;
+		    }
+		    T3C = T36 + T3B;
+		    Thy = TgE + TgF;
+		    Teg = Tee - Tef;
+		    Tel = Teh - Tek;
+		    Tem = FNMS(KP923879532, Tel, KP382683432 * Teg);
+		    Tfy = FMA(KP923879532, Teg, KP382683432 * Tel);
+		    {
+			 E TfS, TfT, TgG, TgH;
+			 TfS = Tee + Tef;
+			 TfT = Teh + Tek;
+			 TfU = FNMS(KP382683432, TfT, KP923879532 * TfS);
+			 Tgk = FMA(KP382683432, TfS, KP923879532 * TfT);
+			 TgG = TgE - TgF;
+			 TgH = T36 - T3B;
+			 TgI = TgG - TgH;
+			 Thi = TgH + TgG;
+		    }
+	       }
+	       {
+		    E T6A, Tfl, Th7, Tf4, T6e, Tar, T9Y, TcH, Tav, Tcw, T9M, Tfj;
+		    T6A = T6o + T6z;
+		    Tfl = T6z - T6o;
+		    Th7 = Tf2 + Tf3;
+		    Tf4 = Tf2 - Tf3;
+		    {
+			 E T6d, T9S, T9X, Tat, Tau, T9L;
+			 T6d = FNMS(T6b, T6c, T69 * T6a);
+			 T6e = T68 + T6d;
+			 Tar = T68 - T6d;
+			 T9S = T9Q - T9R;
+			 T9X = T9T + T9W;
+			 T9Y = KP707106781 * (T9S - T9X);
+			 TcH = KP707106781 * (T9S + T9X);
+			 Tat = T9T - T9W;
+			 Tau = T9R + T9Q;
+			 Tav = KP707106781 * (Tat - Tau);
+			 Tcw = KP707106781 * (Tau + Tat);
+			 T9L = FMA(T6b, T6a, T69 * T6c);
+			 T9M = T9K - T9L;
+			 Tfj = T9K + T9L;
+		    }
+		    {
+			 E T6f, Tfk, Th6, T9N;
+			 T6f = T65 + T6e;
+			 T6B = T6f + T6A;
+			 Th1 = T6f - T6A;
+			 Tfk = Tfi - Tfj;
+			 Tfm = Tfk - Tfl;
+			 Tga = Tfk + Tfl;
+			 Th6 = Tfi + Tfj;
+			 Th8 = Th6 - Th7;
+			 ThI = Th6 + Th7;
+			 T9N = T9J - T9M;
+			 T9Z = T9N - T9Y;
+			 Tbh = T9N + T9Y;
+		    }
+		    {
+			 E Tas, TcG, Tf1, Tcv;
+			 Tas = Taq + Tar;
+			 Taw = Tas - Tav;
+			 Tbk = Tas + Tav;
+			 TcG = Taq - Tar;
+			 TcI = TcG - TcH;
+			 Tdw = TcG + TcH;
+			 Tf1 = T65 - T6e;
+			 Tf5 = Tf1 - Tf4;
+			 Tg7 = Tf1 + Tf4;
+			 Tcv = T9J + T9M;
+			 Tcx = Tcv - Tcw;
+			 Tdt = Tcv + Tcw;
+		    }
+	       }
+	       {
+		    E T8Z, T9B, T5b, TeD, TeU, TgR, T94, T9A, T4L, T8T, T9y, TeB, T4V;
+		    T8Z = T8V - T8Y;
+		    T9B = T8V + T8Y;
+		    T4V = T4P + T4U;
+		    T5b = T4V + T5a;
+		    TeD = T5a - T4V;
+		    {
+			 E TeS, T90, T93, T4K, T9x;
+			 TeS = T91 + T92;
+			 TeU = TeS - TeT;
+			 TgR = TeS + TeT;
+			 T90 = T4P - T4U;
+			 T93 = T91 - T92;
+			 T94 = T90 + T93;
+			 T9A = T93 - T90;
+			 T4K = FMA(T4G, T4H, T4I * T4J);
+			 T4L = T4F + T4K;
+			 T8T = T4F - T4K;
+			 T9x = FNMS(T4I, T4H, T4G * T4J);
+			 T9y = T9w - T9x;
+			 TeB = T9w + T9x;
+		    }
+		    {
+			 E T4M, TeR, TgQ, TeC;
+			 T4M = T4C + T4L;
+			 T5c = T4M + T5b;
+			 TgV = T4M - T5b;
+			 TeR = T4C - T4L;
+			 TeV = TeR - TeU;
+			 Tg0 = TeR + TeU;
+			 TgQ = TeA + TeB;
+			 TgS = TgQ - TgR;
+			 ThD = TgQ + TgR;
+			 TeC = TeA - TeB;
+			 TeE = TeC - TeD;
+			 Tg3 = TeC + TeD;
+		    }
+		    {
+			 E T8U, T95, Tcc, Tcd;
+			 T8U = T8S + T8T;
+			 T95 = KP707106781 * (T8Z - T94);
+			 T96 = T8U - T95;
+			 Tbd = T8U + T95;
+			 Tcc = T8S - T8T;
+			 Tcd = KP707106781 * (T9A + T9B);
+			 Tce = Tcc - Tcd;
+			 Tdp = Tcc + Tcd;
+		    }
+		    {
+			 E Tcn, Tco, T9z, T9C;
+			 Tcn = T9v + T9y;
+			 Tco = KP707106781 * (T94 + T8Z);
+			 Tcp = Tcn - Tco;
+			 Tdm = Tcn + Tco;
+			 T9z = T9v - T9y;
+			 T9C = KP707106781 * (T9A - T9B);
+			 T9D = T9z - T9C;
+			 Tba = T9z + T9C;
+		    }
+	       }
+	       {
+		    E Tv, T7h, TdY, ThY, Ti2, Tj1, T16, Tj2, T1K, Tiw, T7q, TbK, T7v, TbL, T7k;
+		    E ThZ, T7r, T7u, T7i;
+		    {
+			 E Tu, TdW, TdX, Ti0, TM;
+			 Tu = FNMS(Ts, Tt, To * Tp);
+			 Tv = T1 + Tu;
+			 T7h = T1 - Tu;
+			 TdW = T7m + T7n;
+			 TdX = T7s + T7t;
+			 TdY = TdW - TdX;
+			 ThY = TdW + TdX;
+			 Ti0 = FMA(Ts, Tp, To * Tt);
+			 Ti2 = Ti0 + Ti1;
+			 Tj1 = Ti1 - Ti0;
+			 TM = FMA(TG, TH, TK * TL);
+			 T16 = TM + T15;
+			 Tj2 = TM - T15;
+		    }
+		    {
+			 E T1s, T1J, T7o, T7p;
+			 T1s = T1g + T1r;
+			 T1J = T1z + T1I;
+			 T1K = T1s + T1J;
+			 Tiw = T1J - T1s;
+			 T7o = T7m - T7n;
+			 T7p = T1g - T1r;
+			 T7q = T7o - T7p;
+			 TbK = T7p + T7o;
+		    }
+		    T7r = T1z - T1I;
+		    T7u = T7s - T7t;
+		    T7v = T7r + T7u;
+		    TbL = T7r - T7u;
+		    T7i = FNMS(TK, TH, TG * TL);
+		    T7k = T7i - T7j;
+		    ThZ = T7i + T7j;
+		    {
+			 E T17, Ti3, Tix, TdV;
+			 T17 = Tv + T16;
+			 T1L = T17 + T1K;
+			 Tgz = T17 - T1K;
+			 Ti3 = ThZ + Ti2;
+			 Ti4 = ThY + Ti3;
+			 Tii = Ti3 - ThY;
+			 Tix = Ti2 - ThZ;
+			 Tiy = Tiw + Tix;
+			 TiM = Tix - Tiw;
+			 TdV = Tv - T16;
+			 TdZ = TdV - TdY;
+			 TfN = TdV + TdY;
+		    }
+		    {
+			 E T7l, T7w, Tj0, Tj3;
+			 T7l = T7h - T7k;
+			 T7w = KP707106781 * (T7q - T7v);
+			 T7x = T7l - T7w;
+			 TaX = T7l + T7w;
+			 Tj0 = KP707106781 * (T7q + T7v);
+			 Tj3 = Tj1 - Tj2;
+			 Tj4 = Tj0 + Tj3;
+			 Tji = Tj3 - Tj0;
+		    }
+		    {
+			 E Tjw, Tjx, TbJ, TbM;
+			 Tjw = KP707106781 * (TbL - TbK);
+			 Tjx = Tj2 + Tj1;
+			 Tjy = Tjw + Tjx;
+			 TjM = Tjx - Tjw;
+			 TbJ = T7h + T7k;
+			 TbM = KP707106781 * (TbK + TbL);
+			 TbN = TbJ - TbM;
+			 Td9 = TbJ + TbM;
+		    }
+	       }
+	       {
+		    E T4t, ThR, Ti6, Ti8, T7g, Ti7, ThU, ThV;
+		    {
+			 E T2L, T4s, ThW, Ti5;
+			 T2L = T1L + T2K;
+			 T4s = T3C + T4r;
+			 T4t = T2L + T4s;
+			 ThR = T2L - T4s;
+			 ThW = Thy + Thz;
+			 Ti5 = ThX + Ti4;
+			 Ti6 = ThW + Ti5;
+			 Ti8 = Ti5 - ThW;
+		    }
+		    {
+			 E T5S, T7f, ThS, ThT;
+			 T5S = T5c + T5R;
+			 T7f = T6B + T7e;
+			 T7g = T5S + T7f;
+			 Ti7 = T7f - T5S;
+			 ThS = ThD + ThE;
+			 ThT = ThI + ThJ;
+			 ThU = ThS - ThT;
+			 ThV = ThS + ThT;
+		    }
+		    ri[WS(ios, 32)] = T4t - T7g;
+		    ii[WS(ios, 32)] = Ti6 - ThV;
+		    ri[0] = T4t + T7g;
+		    ii[0] = ThV + Ti6;
+		    ri[WS(ios, 48)] = ThR - ThU;
+		    ii[WS(ios, 48)] = Ti8 - Ti7;
+		    ri[WS(ios, 16)] = ThR + ThU;
+		    ii[WS(ios, 16)] = Ti7 + Ti8;
+	       }
+	       {
+		    E ThB, ThN, Tic, Tie, ThG, ThO, ThL, ThP;
+		    {
+			 E Thx, ThA, Tia, Tib;
+			 Thx = T1L - T2K;
+			 ThA = Thy - Thz;
+			 ThB = Thx + ThA;
+			 ThN = Thx - ThA;
+			 Tia = T4r - T3C;
+			 Tib = Ti4 - ThX;
+			 Tic = Tia + Tib;
+			 Tie = Tib - Tia;
+		    }
+		    {
+			 E ThC, ThF, ThH, ThK;
+			 ThC = T5c - T5R;
+			 ThF = ThD - ThE;
+			 ThG = ThC + ThF;
+			 ThO = ThF - ThC;
+			 ThH = T6B - T7e;
+			 ThK = ThI - ThJ;
+			 ThL = ThH - ThK;
+			 ThP = ThH + ThK;
+		    }
+		    {
+			 E ThM, Ti9, ThQ, Tid;
+			 ThM = KP707106781 * (ThG + ThL);
+			 ri[WS(ios, 40)] = ThB - ThM;
+			 ri[WS(ios, 8)] = ThB + ThM;
+			 Ti9 = KP707106781 * (ThO + ThP);
+			 ii[WS(ios, 8)] = Ti9 + Tic;
+			 ii[WS(ios, 40)] = Tic - Ti9;
+			 ThQ = KP707106781 * (ThO - ThP);
+			 ri[WS(ios, 56)] = ThN - ThQ;
+			 ri[WS(ios, 24)] = ThN + ThQ;
+			 Tid = KP707106781 * (ThL - ThG);
+			 ii[WS(ios, 24)] = Tid + Tie;
+			 ii[WS(ios, 56)] = Tie - Tid;
+		    }
+	       }
+	       {
+		    E TgP, Thd, Tiq, Tis, Th0, The, Thb, Thf;
+		    {
+			 E TgD, TgO, Tio, Tip;
+			 TgD = Tgz - TgC;
+			 TgO = KP707106781 * (TgI - TgN);
+			 TgP = TgD + TgO;
+			 Thd = TgD - TgO;
+			 Tio = KP707106781 * (Thj - Thi);
+			 Tip = Tii - Tih;
+			 Tiq = Tio + Tip;
+			 Tis = Tip - Tio;
+		    }
+		    {
+			 E TgU, TgZ, Th5, Tha;
+			 TgU = TgS - TgT;
+			 TgZ = TgV - TgY;
+			 Th0 = FMA(KP923879532, TgU, KP382683432 * TgZ);
+			 The = FNMS(KP923879532, TgZ, KP382683432 * TgU);
+			 Th5 = Th1 - Th4;
+			 Tha = Th8 - Th9;
+			 Thb = FNMS(KP923879532, Tha, KP382683432 * Th5);
+			 Thf = FMA(KP382683432, Tha, KP923879532 * Th5);
+		    }
+		    {
+			 E Thc, Tin, Thg, Tir;
+			 Thc = Th0 + Thb;
+			 ri[WS(ios, 44)] = TgP - Thc;
+			 ri[WS(ios, 12)] = TgP + Thc;
+			 Tin = The + Thf;
+			 ii[WS(ios, 12)] = Tin + Tiq;
+			 ii[WS(ios, 44)] = Tiq - Tin;
+			 Thg = The - Thf;
+			 ri[WS(ios, 60)] = Thd - Thg;
+			 ri[WS(ios, 28)] = Thd + Thg;
+			 Tir = Thb - Th0;
+			 ii[WS(ios, 28)] = Tir + Tis;
+			 ii[WS(ios, 60)] = Tis - Tir;
+		    }
+	       }
+	       {
+		    E TfB, TfJ, TiO, TiQ, TfE, TfK, TfH, TfL;
+		    {
+			 E Tfx, TfA, TiK, TiN;
+			 Tfx = TdZ + Tea;
+			 TfA = Tfy + Tfz;
+			 TfB = Tfx + TfA;
+			 TfJ = Tfx - TfA;
+			 TiK = Tem + Tex;
+			 TiN = TiL + TiM;
+			 TiO = TiK + TiN;
+			 TiQ = TiN - TiK;
+		    }
+		    {
+			 E TfC, TfD, TfF, TfG;
+			 TfC = TeE + TeP;
+			 TfD = TeV + TeY;
+			 TfE = FMA(KP555570233, TfC, KP831469612 * TfD);
+			 TfK = FNMS(KP555570233, TfD, KP831469612 * TfC);
+			 TfF = Tf5 + Tfg;
+			 TfG = Tfm + Tfp;
+			 TfH = FNMS(KP555570233, TfG, KP831469612 * TfF);
+			 TfL = FMA(KP831469612, TfG, KP555570233 * TfF);
+		    }
+		    {
+			 E TfI, TiJ, TfM, TiP;
+			 TfI = TfE + TfH;
+			 ri[WS(ios, 38)] = TfB - TfI;
+			 ri[WS(ios, 6)] = TfB + TfI;
+			 TiJ = TfK + TfL;
+			 ii[WS(ios, 6)] = TiJ + TiO;
+			 ii[WS(ios, 38)] = TiO - TiJ;
+			 TfM = TfK - TfL;
+			 ri[WS(ios, 54)] = TfJ - TfM;
+			 ri[WS(ios, 22)] = TfJ + TfM;
+			 TiP = TfH - TfE;
+			 ii[WS(ios, 22)] = TiP + TiQ;
+			 ii[WS(ios, 54)] = TiQ - TiP;
+		    }
+	       }
+	       {
+		    E Thl, Tht, Tik, Tim, Tho, Thu, Thr, Thv;
+		    {
+			 E Thh, Thk, Tig, Tij;
+			 Thh = Tgz + TgC;
+			 Thk = KP707106781 * (Thi + Thj);
+			 Thl = Thh + Thk;
+			 Tht = Thh - Thk;
+			 Tig = KP707106781 * (TgI + TgN);
+			 Tij = Tih + Tii;
+			 Tik = Tig + Tij;
+			 Tim = Tij - Tig;
+		    }
+		    {
+			 E Thm, Thn, Thp, Thq;
+			 Thm = TgS + TgT;
+			 Thn = TgV + TgY;
+			 Tho = FMA(KP382683432, Thm, KP923879532 * Thn);
+			 Thu = FNMS(KP382683432, Thn, KP923879532 * Thm);
+			 Thp = Th1 + Th4;
+			 Thq = Th8 + Th9;
+			 Thr = FNMS(KP382683432, Thq, KP923879532 * Thp);
+			 Thv = FMA(KP923879532, Thq, KP382683432 * Thp);
+		    }
+		    {
+			 E Ths, Tif, Thw, Til;
+			 Ths = Tho + Thr;
+			 ri[WS(ios, 36)] = Thl - Ths;
+			 ri[WS(ios, 4)] = Thl + Ths;
+			 Tif = Thu + Thv;
+			 ii[WS(ios, 4)] = Tif + Tik;
+			 ii[WS(ios, 36)] = Tik - Tif;
+			 Thw = Thu - Thv;
+			 ri[WS(ios, 52)] = Tht - Thw;
+			 ri[WS(ios, 20)] = Tht + Thw;
+			 Til = Thr - Tho;
+			 ii[WS(ios, 20)] = Til + Tim;
+			 ii[WS(ios, 52)] = Tim - Til;
+		    }
+	       }
+	       {
+		    E Tez, Tft, TiU, TiW, Tf0, Tfu, Tfr, Tfv;
+		    {
+			 E Teb, Tey, TiS, TiT;
+			 Teb = TdZ - Tea;
+			 Tey = Tem - Tex;
+			 Tez = Teb + Tey;
+			 Tft = Teb - Tey;
+			 TiS = Tfz - Tfy;
+			 TiT = TiM - TiL;
+			 TiU = TiS + TiT;
+			 TiW = TiT - TiS;
+		    }
+		    {
+			 E TeQ, TeZ, Tfh, Tfq;
+			 TeQ = TeE - TeP;
+			 TeZ = TeV - TeY;
+			 Tf0 = FMA(KP980785280, TeQ, KP195090322 * TeZ);
+			 Tfu = FNMS(KP980785280, TeZ, KP195090322 * TeQ);
+			 Tfh = Tf5 - Tfg;
+			 Tfq = Tfm - Tfp;
+			 Tfr = FNMS(KP980785280, Tfq, KP195090322 * Tfh);
+			 Tfv = FMA(KP195090322, Tfq, KP980785280 * Tfh);
+		    }
+		    {
+			 E Tfs, TiR, Tfw, TiV;
+			 Tfs = Tf0 + Tfr;
+			 ri[WS(ios, 46)] = Tez - Tfs;
+			 ri[WS(ios, 14)] = Tez + Tfs;
+			 TiR = Tfu + Tfv;
+			 ii[WS(ios, 14)] = TiR + TiU;
+			 ii[WS(ios, 46)] = TiU - TiR;
+			 Tfw = Tfu - Tfv;
+			 ri[WS(ios, 62)] = Tft - Tfw;
+			 ri[WS(ios, 30)] = Tft + Tfw;
+			 TiV = Tfr - Tf0;
+			 ii[WS(ios, 30)] = TiV + TiW;
+			 ii[WS(ios, 62)] = TiW - TiV;
+		    }
+	       }
+	       {
+		    E TfZ, Tgf, TiG, TiI, Tg6, Tgg, Tgd, Tgh;
+		    {
+			 E TfR, TfY, TiE, TiF;
+			 TfR = TfN - TfQ;
+			 TfY = TfU - TfX;
+			 TfZ = TfR + TfY;
+			 Tgf = TfR - TfY;
+			 TiE = Tgl - Tgk;
+			 TiF = Tiy - Tiv;
+			 TiG = TiE + TiF;
+			 TiI = TiF - TiE;
+		    }
+		    {
+			 E Tg2, Tg5, Tg9, Tgc;
+			 Tg2 = Tg0 - Tg1;
+			 Tg5 = Tg3 - Tg4;
+			 Tg6 = FMA(KP555570233, Tg2, KP831469612 * Tg5);
+			 Tgg = FNMS(KP831469612, Tg2, KP555570233 * Tg5);
+			 Tg9 = Tg7 - Tg8;
+			 Tgc = Tga - Tgb;
+			 Tgd = FNMS(KP831469612, Tgc, KP555570233 * Tg9);
+			 Tgh = FMA(KP831469612, Tg9, KP555570233 * Tgc);
+		    }
+		    {
+			 E Tge, TiD, Tgi, TiH;
+			 Tge = Tg6 + Tgd;
+			 ri[WS(ios, 42)] = TfZ - Tge;
+			 ri[WS(ios, 10)] = TfZ + Tge;
+			 TiD = Tgg + Tgh;
+			 ii[WS(ios, 10)] = TiD + TiG;
+			 ii[WS(ios, 42)] = TiG - TiD;
+			 Tgi = Tgg - Tgh;
+			 ri[WS(ios, 58)] = Tgf - Tgi;
+			 ri[WS(ios, 26)] = Tgf + Tgi;
+			 TiH = Tgd - Tg6;
+			 ii[WS(ios, 26)] = TiH + TiI;
+			 ii[WS(ios, 58)] = TiI - TiH;
+		    }
+	       }
+	       {
+		    E Tgn, Tgv, TiA, TiC, Tgq, Tgw, Tgt, Tgx;
+		    {
+			 E Tgj, Tgm, Tiu, Tiz;
+			 Tgj = TfN + TfQ;
+			 Tgm = Tgk + Tgl;
+			 Tgn = Tgj + Tgm;
+			 Tgv = Tgj - Tgm;
+			 Tiu = TfU + TfX;
+			 Tiz = Tiv + Tiy;
+			 TiA = Tiu + Tiz;
+			 TiC = Tiz - Tiu;
+		    }
+		    {
+			 E Tgo, Tgp, Tgr, Tgs;
+			 Tgo = Tg0 + Tg1;
+			 Tgp = Tg3 + Tg4;
+			 Tgq = FMA(KP980785280, Tgo, KP195090322 * Tgp);
+			 Tgw = FNMS(KP195090322, Tgo, KP980785280 * Tgp);
+			 Tgr = Tg7 + Tg8;
+			 Tgs = Tga + Tgb;
+			 Tgt = FNMS(KP195090322, Tgs, KP980785280 * Tgr);
+			 Tgx = FMA(KP195090322, Tgr, KP980785280 * Tgs);
+		    }
+		    {
+			 E Tgu, Tit, Tgy, TiB;
+			 Tgu = Tgq + Tgt;
+			 ri[WS(ios, 34)] = Tgn - Tgu;
+			 ri[WS(ios, 2)] = Tgn + Tgu;
+			 Tit = Tgw + Tgx;
+			 ii[WS(ios, 2)] = Tit + TiA;
+			 ii[WS(ios, 34)] = TiA - Tit;
+			 Tgy = Tgw - Tgx;
+			 ri[WS(ios, 50)] = Tgv - Tgy;
+			 ri[WS(ios, 18)] = Tgv + Tgy;
+			 TiB = Tgt - Tgq;
+			 ii[WS(ios, 18)] = TiB + TiC;
+			 ii[WS(ios, 50)] = TiC - TiB;
+		    }
+	       }
+	       {
+		    E T7V, TjN, TjT, TaH, T8O, TjK, TaK, TjS, TaO, TaU, T9I, TaE, TaR, TaV, TaB;
+		    E TaF, T8N;
+		    T7V = T7x - T7U;
+		    TjN = TjL + TjM;
+		    TjT = TjM - TjL;
+		    TaH = T7x + T7U;
+		    T8N = FMA(KP195090322, T8D, KP980785280 * T8M);
+		    T8O = T8m - T8N;
+		    TjK = T8m + T8N;
+		    {
+			 E TaJ, TaM, TaN, T9u, T9H;
+			 TaJ = FNMS(KP980785280, T8D, KP195090322 * T8M);
+			 TaK = TaI + TaJ;
+			 TjS = TaJ - TaI;
+			 TaM = T96 + T9t;
+			 TaN = T9D + T9G;
+			 TaO = FMA(KP634393284, TaM, KP773010453 * TaN);
+			 TaU = FNMS(KP634393284, TaN, KP773010453 * TaM);
+			 T9u = T96 - T9t;
+			 T9H = T9D - T9G;
+			 T9I = FMA(KP995184726, T9u, KP098017140 * T9H);
+			 TaE = FNMS(KP995184726, T9H, KP098017140 * T9u);
+			 {
+			      E TaP, TaQ, Tan, TaA;
+			      TaP = T9Z + Tam;
+			      TaQ = Taw + Taz;
+			      TaR = FNMS(KP634393284, TaQ, KP773010453 * TaP);
+			      TaV = FMA(KP773010453, TaQ, KP634393284 * TaP);
+			      Tan = T9Z - Tam;
+			      TaA = Taw - Taz;
+			      TaB = FNMS(KP995184726, TaA, KP098017140 * Tan);
+			      TaF = FMA(KP098017140, TaA, KP995184726 * Tan);
+			 }
+		    }
+		    {
+			 E T8P, TaC, TjR, TjU;
+			 T8P = T7V + T8O;
+			 TaC = T9I + TaB;
+			 ri[WS(ios, 47)] = T8P - TaC;
+			 ri[WS(ios, 15)] = T8P + TaC;
+			 TjR = TaE + TaF;
+			 TjU = TjS + TjT;
+			 ii[WS(ios, 15)] = TjR + TjU;
+			 ii[WS(ios, 47)] = TjU - TjR;
+		    }
+		    {
+			 E TaD, TaG, TjV, TjW;
+			 TaD = T7V - T8O;
+			 TaG = TaE - TaF;
+			 ri[WS(ios, 63)] = TaD - TaG;
+			 ri[WS(ios, 31)] = TaD + TaG;
+			 TjV = TaB - T9I;
+			 TjW = TjT - TjS;
+			 ii[WS(ios, 31)] = TjV + TjW;
+			 ii[WS(ios, 63)] = TjW - TjV;
+		    }
+		    {
+			 E TaL, TaS, TjJ, TjO;
+			 TaL = TaH + TaK;
+			 TaS = TaO + TaR;
+			 ri[WS(ios, 39)] = TaL - TaS;
+			 ri[WS(ios, 7)] = TaL + TaS;
+			 TjJ = TaU + TaV;
+			 TjO = TjK + TjN;
+			 ii[WS(ios, 7)] = TjJ + TjO;
+			 ii[WS(ios, 39)] = TjO - TjJ;
+		    }
+		    {
+			 E TaT, TaW, TjP, TjQ;
+			 TaT = TaH - TaK;
+			 TaW = TaU - TaV;
+			 ri[WS(ios, 55)] = TaT - TaW;
+			 ri[WS(ios, 23)] = TaT + TaW;
+			 TjP = TaR - TaO;
+			 TjQ = TjN - TjK;
+			 ii[WS(ios, 23)] = TjP + TjQ;
+			 ii[WS(ios, 55)] = TjQ - TjP;
+		    }
+	       }
+	       {
+		    E TbV, Tjj, Tjp, TcT, Tca, Tjg, TcW, Tjo, Td0, Td6, Tcu, TcQ, Td3, Td7, TcN;
+		    E TcR, Tc9;
+		    TbV = TbN - TbU;
+		    Tjj = Tjh + Tji;
+		    Tjp = Tji - Tjh;
+		    TcT = TbN + TbU;
+		    Tc9 = FMA(KP831469612, Tc5, KP555570233 * Tc8);
+		    Tca = Tc2 - Tc9;
+		    Tjg = Tc2 + Tc9;
+		    {
+			 E TcV, TcY, TcZ, Tcm, Tct;
+			 TcV = FNMS(KP831469612, Tc8, KP555570233 * Tc5);
+			 TcW = TcU + TcV;
+			 Tjo = TcV - TcU;
+			 TcY = Tce + Tcl;
+			 TcZ = Tcp + Tcs;
+			 Td0 = FMA(KP471396736, TcY, KP881921264 * TcZ);
+			 Td6 = FNMS(KP471396736, TcZ, KP881921264 * TcY);
+			 Tcm = Tce - Tcl;
+			 Tct = Tcp - Tcs;
+			 Tcu = FMA(KP956940335, Tcm, KP290284677 * Tct);
+			 TcQ = FNMS(KP956940335, Tct, KP290284677 * Tcm);
+			 {
+			      E Td1, Td2, TcF, TcM;
+			      Td1 = Tcx + TcE;
+			      Td2 = TcI + TcL;
+			      Td3 = FNMS(KP471396736, Td2, KP881921264 * Td1);
+			      Td7 = FMA(KP881921264, Td2, KP471396736 * Td1);
+			      TcF = Tcx - TcE;
+			      TcM = TcI - TcL;
+			      TcN = FNMS(KP956940335, TcM, KP290284677 * TcF);
+			      TcR = FMA(KP290284677, TcM, KP956940335 * TcF);
+			 }
+		    }
+		    {
+			 E Tcb, TcO, Tjn, Tjq;
+			 Tcb = TbV + Tca;
+			 TcO = Tcu + TcN;
+			 ri[WS(ios, 45)] = Tcb - TcO;
+			 ri[WS(ios, 13)] = Tcb + TcO;
+			 Tjn = TcQ + TcR;
+			 Tjq = Tjo + Tjp;
+			 ii[WS(ios, 13)] = Tjn + Tjq;
+			 ii[WS(ios, 45)] = Tjq - Tjn;
+		    }
+		    {
+			 E TcP, TcS, Tjr, Tjs;
+			 TcP = TbV - Tca;
+			 TcS = TcQ - TcR;
+			 ri[WS(ios, 61)] = TcP - TcS;
+			 ri[WS(ios, 29)] = TcP + TcS;
+			 Tjr = TcN - Tcu;
+			 Tjs = Tjp - Tjo;
+			 ii[WS(ios, 29)] = Tjr + Tjs;
+			 ii[WS(ios, 61)] = Tjs - Tjr;
+		    }
+		    {
+			 E TcX, Td4, Tjf, Tjk;
+			 TcX = TcT + TcW;
+			 Td4 = Td0 + Td3;
+			 ri[WS(ios, 37)] = TcX - Td4;
+			 ri[WS(ios, 5)] = TcX + Td4;
+			 Tjf = Td6 + Td7;
+			 Tjk = Tjg + Tjj;
+			 ii[WS(ios, 5)] = Tjf + Tjk;
+			 ii[WS(ios, 37)] = Tjk - Tjf;
+		    }
+		    {
+			 E Td5, Td8, Tjl, Tjm;
+			 Td5 = TcT - TcW;
+			 Td8 = Td6 - Td7;
+			 ri[WS(ios, 53)] = Td5 - Td8;
+			 ri[WS(ios, 21)] = Td5 + Td8;
+			 Tjl = Td3 - Td0;
+			 Tjm = Tjj - Tjg;
+			 ii[WS(ios, 21)] = Tjl + Tjm;
+			 ii[WS(ios, 53)] = Tjm - Tjl;
+		    }
+	       }
+	       {
+		    E Tb1, Tjz, TjF, Tbt, Tb8, Tju, Tbw, TjE, TbA, TbG, Tbg, Tbq, TbD, TbH, Tbn;
+		    E Tbr, Tb7;
+		    Tb1 = TaX - Tb0;
+		    Tjz = Tjv + Tjy;
+		    TjF = Tjy - Tjv;
+		    Tbt = TaX + Tb0;
+		    Tb7 = FMA(KP831469612, Tb5, KP555570233 * Tb6);
+		    Tb8 = Tb4 - Tb7;
+		    Tju = Tb4 + Tb7;
+		    {
+			 E Tbv, Tby, Tbz, Tbc, Tbf;
+			 Tbv = FNMS(KP555570233, Tb5, KP831469612 * Tb6);
+			 Tbw = Tbu + Tbv;
+			 TjE = Tbv - Tbu;
+			 Tby = Tba + Tbb;
+			 Tbz = Tbd + Tbe;
+			 TbA = FMA(KP956940335, Tby, KP290284677 * Tbz);
+			 TbG = FNMS(KP290284677, Tby, KP956940335 * Tbz);
+			 Tbc = Tba - Tbb;
+			 Tbf = Tbd - Tbe;
+			 Tbg = FMA(KP471396736, Tbc, KP881921264 * Tbf);
+			 Tbq = FNMS(KP881921264, Tbc, KP471396736 * Tbf);
+			 {
+			      E TbB, TbC, Tbj, Tbm;
+			      TbB = Tbh + Tbi;
+			      TbC = Tbk + Tbl;
+			      TbD = FNMS(KP290284677, TbC, KP956940335 * TbB);
+			      TbH = FMA(KP290284677, TbB, KP956940335 * TbC);
+			      Tbj = Tbh - Tbi;
+			      Tbm = Tbk - Tbl;
+			      Tbn = FNMS(KP881921264, Tbm, KP471396736 * Tbj);
+			      Tbr = FMA(KP881921264, Tbj, KP471396736 * Tbm);
+			 }
+		    }
+		    {
+			 E Tb9, Tbo, TjD, TjG;
+			 Tb9 = Tb1 + Tb8;
+			 Tbo = Tbg + Tbn;
+			 ri[WS(ios, 43)] = Tb9 - Tbo;
+			 ri[WS(ios, 11)] = Tb9 + Tbo;
+			 TjD = Tbq + Tbr;
+			 TjG = TjE + TjF;
+			 ii[WS(ios, 11)] = TjD + TjG;
+			 ii[WS(ios, 43)] = TjG - TjD;
+		    }
+		    {
+			 E Tbp, Tbs, TjH, TjI;
+			 Tbp = Tb1 - Tb8;
+			 Tbs = Tbq - Tbr;
+			 ri[WS(ios, 59)] = Tbp - Tbs;
+			 ri[WS(ios, 27)] = Tbp + Tbs;
+			 TjH = Tbn - Tbg;
+			 TjI = TjF - TjE;
+			 ii[WS(ios, 27)] = TjH + TjI;
+			 ii[WS(ios, 59)] = TjI - TjH;
+		    }
+		    {
+			 E Tbx, TbE, Tjt, TjA;
+			 Tbx = Tbt + Tbw;
+			 TbE = TbA + TbD;
+			 ri[WS(ios, 35)] = Tbx - TbE;
+			 ri[WS(ios, 3)] = Tbx + TbE;
+			 Tjt = TbG + TbH;
+			 TjA = Tju + Tjz;
+			 ii[WS(ios, 3)] = Tjt + TjA;
+			 ii[WS(ios, 35)] = TjA - Tjt;
+		    }
+		    {
+			 E TbF, TbI, TjB, TjC;
+			 TbF = Tbt - Tbw;
+			 TbI = TbG - TbH;
+			 ri[WS(ios, 51)] = TbF - TbI;
+			 ri[WS(ios, 19)] = TbF + TbI;
+			 TjB = TbD - TbA;
+			 TjC = Tjz - Tju;
+			 ii[WS(ios, 19)] = TjB + TjC;
+			 ii[WS(ios, 51)] = TjC - TjB;
+		    }
+	       }
+	       {
+		    E Tdd, Tj5, Tjb, TdF, Tdk, TiY, TdI, Tja, TdM, TdS, Tds, TdC, TdP, TdT, Tdz;
+		    E TdD, Tdj;
+		    Tdd = Td9 - Tdc;
+		    Tj5 = TiZ + Tj4;
+		    Tjb = Tj4 - TiZ;
+		    TdF = Td9 + Tdc;
+		    Tdj = FMA(KP195090322, Tdh, KP980785280 * Tdi);
+		    Tdk = Tdg - Tdj;
+		    TiY = Tdg + Tdj;
+		    {
+			 E TdH, TdK, TdL, Tdo, Tdr;
+			 TdH = FNMS(KP195090322, Tdi, KP980785280 * Tdh);
+			 TdI = TdG + TdH;
+			 Tja = TdH - TdG;
+			 TdK = Tdm + Tdn;
+			 TdL = Tdp + Tdq;
+			 TdM = FMA(KP995184726, TdK, KP098017140 * TdL);
+			 TdS = FNMS(KP098017140, TdK, KP995184726 * TdL);
+			 Tdo = Tdm - Tdn;
+			 Tdr = Tdp - Tdq;
+			 Tds = FMA(KP634393284, Tdo, KP773010453 * Tdr);
+			 TdC = FNMS(KP773010453, Tdo, KP634393284 * Tdr);
+			 {
+			      E TdN, TdO, Tdv, Tdy;
+			      TdN = Tdt + Tdu;
+			      TdO = Tdw + Tdx;
+			      TdP = FNMS(KP098017140, TdO, KP995184726 * TdN);
+			      TdT = FMA(KP098017140, TdN, KP995184726 * TdO);
+			      Tdv = Tdt - Tdu;
+			      Tdy = Tdw - Tdx;
+			      Tdz = FNMS(KP773010453, Tdy, KP634393284 * Tdv);
+			      TdD = FMA(KP773010453, Tdv, KP634393284 * Tdy);
+			 }
+		    }
+		    {
+			 E Tdl, TdA, Tj9, Tjc;
+			 Tdl = Tdd + Tdk;
+			 TdA = Tds + Tdz;
+			 ri[WS(ios, 41)] = Tdl - TdA;
+			 ri[WS(ios, 9)] = Tdl + TdA;
+			 Tj9 = TdC + TdD;
+			 Tjc = Tja + Tjb;
+			 ii[WS(ios, 9)] = Tj9 + Tjc;
+			 ii[WS(ios, 41)] = Tjc - Tj9;
+		    }
+		    {
+			 E TdB, TdE, Tjd, Tje;
+			 TdB = Tdd - Tdk;
+			 TdE = TdC - TdD;
+			 ri[WS(ios, 57)] = TdB - TdE;
+			 ri[WS(ios, 25)] = TdB + TdE;
+			 Tjd = Tdz - Tds;
+			 Tje = Tjb - Tja;
+			 ii[WS(ios, 25)] = Tjd + Tje;
+			 ii[WS(ios, 57)] = Tje - Tjd;
+		    }
+		    {
+			 E TdJ, TdQ, TiX, Tj6;
+			 TdJ = TdF + TdI;
+			 TdQ = TdM + TdP;
+			 ri[WS(ios, 33)] = TdJ - TdQ;
+			 ri[WS(ios, 1)] = TdJ + TdQ;
+			 TiX = TdS + TdT;
+			 Tj6 = TiY + Tj5;
+			 ii[WS(ios, 1)] = TiX + Tj6;
+			 ii[WS(ios, 33)] = Tj6 - TiX;
+		    }
+		    {
+			 E TdR, TdU, Tj7, Tj8;
+			 TdR = TdF - TdI;
+			 TdU = TdS - TdT;
+			 ri[WS(ios, 49)] = TdR - TdU;
+			 ri[WS(ios, 17)] = TdR + TdU;
+			 Tj7 = TdP - TdM;
+			 Tj8 = Tj5 - TiY;
+			 ii[WS(ios, 17)] = Tj7 + Tj8;
+			 ii[WS(ios, 49)] = Tj8 - Tj7;
+		    }
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 9},
+     {TW_SIN, 0, 9},
+     {TW_COS, 0, 27},
+     {TW_SIN, 0, 27},
+     {TW_COS, 0, 63},
+     {TW_SIN, 0, 63},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 64, "t2_64", twinstr, {880, 386, 274, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t2_64) (planner *p) {
+     X(kdft_dit_register) (p, t2_64, &desc);
+}
diff --git a/src/fftw3/dft/codelets/standard/t2_8.c b/src/fftw3/dft/codelets/standard/t2_8.c
new file mode 100644
index 0000000..d9aec9a
--- /dev/null
+++ b/src/fftw3/dft/codelets/standard/t2_8.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:30:08 EDT 2003 */
+
+#include "codelet-dft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_twiddle -compact -variables 4 -twiddle-log3 -n 8 -name t2_8 -include t.h */
+
+/*
+ * This function contains 74 FP additions, 44 FP multiplications,
+ * (or, 56 additions, 26 multiplications, 18 fused multiply/add),
+ * 50 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ * $Id: t2_8.c,v 1.1 2008/10/17 06:11:09 scuri Exp $
+ */
+
+#include "t.h"
+
+static const R *t2_8(R *ri, R *ii, const R *W, stride ios, int m, int dist)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 6) {
+	  E T1, T1c, TJ, T17, TY, TV, TR, Tk, Tr, TN, TM, Tw, TB, TS, Te;
+	  E T1b;
+	  T1 = ri[0];
+	  T1c = ii[0];
+	  {
+	       E T9, Td, Th, Tj, To, Tq, Ty, TA, Tv, Tu, T3, T6, T4, Tb, T7;
+	       E Ta, Tg, Ti, TI, TX, Tl, Tm, Tn, TW, Tp, TF;
+	       T9 = ri[WS(ios, 4)];
+	       Td = ii[WS(ios, 4)];
+	       Th = ri[WS(ios, 2)];
+	       Tj = ii[WS(ios, 2)];
+	       To = ri[WS(ios, 6)];
+	       Tq = ii[WS(ios, 6)];
+	       Ty = ri[WS(ios, 5)];
+	       TA = ii[WS(ios, 5)];
+	       Tv = ii[WS(ios, 1)];
+	       Tu = ri[WS(ios, 1)];
+	       {
+		    E TD, TE, TG, TH, T2, T5;
+		    TD = ri[WS(ios, 7)];
+		    TE = ii[WS(ios, 7)];
+		    TG = ri[WS(ios, 3)];
+		    TH = ii[WS(ios, 3)];
+		    T2 = W[2];
+		    T5 = W[3];
+		    T3 = W[0];
+		    T6 = W[1];
+		    T4 = T2 * T3;
+		    Tb = T5 * T3;
+		    T7 = T5 * T6;
+		    Ta = T2 * T6;
+		    Tg = T4 + T7;
+		    Ti = Ta - Tb;
+		    TI = FMA(T2, TG, T5 * TH);
+		    TX = FNMS(T5, TG, T2 * TH);
+		    Tl = W[4];
+		    Tm = W[5];
+		    Tn = FMA(Tl, T3, Tm * T6);
+		    TW = FNMS(Tm, TD, Tl * TE);
+		    Tp = FNMS(Tm, T3, Tl * T6);
+		    TF = FMA(Tl, TD, Tm * TE);
+	       }
+	       TJ = TF + TI;
+	       T17 = TW + TX;
+	       TY = TW - TX;
+	       TV = TF - TI;
+	       TR = FNMS(T6, Tu, T3 * Tv);
+	       Tk = FNMS(Ti, Tj, Tg * Th);
+	       Tr = FNMS(Tp, Tq, Tn * To);
+	       TN = FMA(Tp, To, Tn * Tq);
+	       TM = FMA(Ti, Th, Tg * Tj);
+	       Tw = FMA(T3, Tu, T6 * Tv);
+	       {
+		    E Tx, Tz, T8, Tc;
+		    Tx = FNMS(Tm, Ti, Tl * Tg);
+		    Tz = FMA(Tl, Ti, Tm * Tg);
+		    TB = FMA(Tx, Ty, Tz * TA);
+		    TS = FNMS(Tz, Ty, Tx * TA);
+		    T8 = T4 - T7;
+		    Tc = Ta + Tb;
+		    Te = FMA(T8, T9, Tc * Td);
+		    T1b = FNMS(Tc, T9, T8 * Td);
+	       }
+	  }
+	  {
+	       E TK, T1f, T18, T19, Tt, T15, T1e, T1g, TC, T16;
+	       TC = Tw + TB;
+	       TK = TC + TJ;
+	       T1f = TJ - TC;
+	       T16 = TR + TS;
+	       T18 = T16 - T17;
+	       T19 = T16 + T17;
+	       {
+		    E Tf, Ts, T1a, T1d;
+		    Tf = T1 + Te;
+		    Ts = Tk + Tr;
+		    Tt = Tf + Ts;
+		    T15 = Tf - Ts;
+		    T1a = TM + TN;
+		    T1d = T1b + T1c;
+		    T1e = T1a + T1d;
+		    T1g = T1d - T1a;
+	       }
+	       ri[WS(ios, 4)] = Tt - TK;
+	       ii[WS(ios, 4)] = T1e - T19;
+	       ri[0] = Tt + TK;
+	       ii[0] = T19 + T1e;
+	       ri[WS(ios, 6)] = T15 - T18;
+	       ii[WS(ios, 6)] = T1g - T1f;
+	       ri[WS(ios, 2)] = T15 + T18;
+	       ii[WS(ios, 2)] = T1f + T1g;
+	  }
+	  {
+	       E TZ, T13, TP, T11, TU, T12, T1k, T1m, TL, TO;
+	       TZ = TV - TY;
+	       T13 = TV + TY;
+	       TL = T1 - Te;
+	       TO = TM - TN;
+	       TP = TL + TO;
+	       T11 = TL - TO;
+	       {
+		    E TQ, TT, T1i, T1j;
+		    TQ = Tw - TB;
+		    TT = TR - TS;
+		    TU = TQ + TT;
+		    T12 = TT - TQ;
+		    T1i = T1c - T1b;
+		    T1j = Tk - Tr;
+		    T1k = T1i - T1j;
+		    T1m = T1j + T1i;
+	       }
+	       {
+		    E T10, T1h, T14, T1l;
+		    T10 = KP707106781 * (TU + TZ);
+		    ri[WS(ios, 5)] = TP - T10;
+		    ri[WS(ios, 1)] = TP + T10;
+		    T1h = KP707106781 * (T12 + T13);
+		    ii[WS(ios, 1)] = T1h + T1k;
+		    ii[WS(ios, 5)] = T1k - T1h;
+		    T14 = KP707106781 * (T12 - T13);
+		    ri[WS(ios, 7)] = T11 - T14;
+		    ri[WS(ios, 3)] = T11 + T14;
+		    T1l = KP707106781 * (TZ - TU);
+		    ii[WS(ios, 3)] = T1l + T1m;
+		    ii[WS(ios, 7)] = T1m - T1l;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 7},
+     {TW_SIN, 0, 7},
+     {TW_NEXT, 1, 0}
+};
+
+static const ct_desc desc = { 8, "t2_8", twinstr, {56, 26, 18, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_t2_8) (planner *p) {
+     X(kdft_dit_register) (p, t2_8, &desc);
+}
diff --git a/src/fftw3/dft/codelets/t.c b/src/fftw3/dft/codelets/t.c
new file mode 100644
index 0000000..b3fe55b
--- /dev/null
+++ b/src/fftw3/dft/codelets/t.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "codelet-dft.h"
+#include "t.h"
+
+static int okp(const ct_desc *d,
+	       const R *rio, const R *iio, 
+	       int ios, int vs, int m, int dist, const planner *plnr)
+{
+     UNUSED(rio); UNUSED(iio); UNUSED(m); UNUSED(plnr);
+     return (1
+	     && (!d->s1 || (d->s1 == ios))
+	     && (!d->s2 || (d->s2 == vs))
+	     && (!d->dist || (d->dist == dist))
+	  );
+}
+
+const ct_genus GENUS = { okp, 1 };
diff --git a/src/fftw3/dft/codelets/t.h b/src/fftw3/dft/codelets/t.h
new file mode 100644
index 0000000..ecb5abc
--- /dev/null
+++ b/src/fftw3/dft/codelets/t.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(dft_t_genus)
+extern const ct_genus GENUS;
diff --git a/src/fftw3/dft/conf.c b/src/fftw3/dft/conf.c
new file mode 100644
index 0000000..30edb41
--- /dev/null
+++ b/src/fftw3/dft/conf.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: conf.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+static const solvtab s =
+{
+     SOLVTAB(X(dft_indirect_register)),
+     SOLVTAB(X(dft_rank0_register)),
+     SOLVTAB(X(dft_rank_geq2_register)),
+     SOLVTAB(X(dft_vrank_geq1_register)),
+     SOLVTAB(X(dft_vrank2_transpose_register)),
+     SOLVTAB(X(dft_vrank3_transpose_register)),
+     SOLVTAB(X(dft_buffered_register)),
+     SOLVTAB(X(dft_generic_register)),
+     SOLVTAB(X(dft_rader_register)),
+     SOLVTAB(X(dft_nop_register)),
+     SOLVTAB_END
+};
+
+void X(dft_conf_standard)(planner *p)
+{
+     X(solvtab_exec)(s, p);
+     X(solvtab_exec)(X(solvtab_dft_standard), p);
+     X(solvtab_exec)(X(solvtab_dft_inplace), p);
+#if HAVE_K7
+     X(solvtab_exec)(X(solvtab_dft_k7), p);
+#endif
+#if HAVE_SIMD
+     X(solvtab_exec)(X(solvtab_dft_simd), p);
+#endif
+}
diff --git a/src/fftw3/dft/ct-dif.c b/src/fftw3/dft/ct-dif.c
new file mode 100644
index 0000000..490dfc9
--- /dev/null
+++ b/src/fftw3/dft/ct-dif.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ct-dif.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* decimation in time Cooley-Tukey */
+#include "dft.h"
+#include "ct.h"
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const plan_ct *ego = (const plan_ct *) ego_;
+
+     {
+          int i, m = ego->m, vl = ego->vl;
+          int is = ego->is, ivs = ego->ivs;
+
+          for (i = 0; i < vl; ++i)
+               ego->k.dif(ri + i * ivs, ii + i * ivs, ego->td->W,
+			  ego->ios, m, is);
+     }
+
+     /* two-dimensional r x vl sub-transform: */
+     {
+	  plan *cld0 = ego->cld;
+	  plan_dft *cld = (plan_dft *) cld0;
+	  cld->apply(cld0, ri, ii, ro, io);
+     }
+}
+
+static int applicable0(const solver_ct *ego, const problem *p_,
+		       const planner *plnr)
+{
+     if (X(dft_ct_applicable)(ego, p_)) {
+	  int ivs, ovs;
+	  int vl;
+          const ct_desc *e = ego->desc;
+          const problem_dft *p = (const problem_dft *) p_;
+          iodim *d = p->sz->dims;
+	  int m = d[0].n / e->radix;
+	  X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+          return (1
+                  /* DIF destroys the input and we don't like it */
+                  && (p->ri == p->ro || DESTROY_INPUTP(plnr))
+
+		  && (e->genus->okp(e, p->ri, p->ii,
+				    (int)m * d[0].is, 0, m, d[0].is, plnr))
+		  && (e->genus->okp(e, p->ri + ivs, p->ii + ivs,
+				    (int)m * d[0].is, 0, m, d[0].is, plnr))
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver_ct *ego, const problem *p_,
+		 const planner *plnr)
+{
+     const problem_dft *p;
+     
+     if (!applicable0(ego, p_, plnr))  return 0;
+
+     p = (const problem_dft *) p_;
+
+     /* emulate fftw2 behavior */
+     if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0)) return 0;
+
+     if (NO_UGLYP(plnr) && X(ct_uglyp)(16, p->sz->dims[0].n, ego->desc->radix))
+	  return 0;
+
+     return 1;
+}
+
+static void finish(plan_ct *ego)
+{
+     const ct_desc *d = ego->slv->desc;
+     ego->ios = X(mkstride)(ego->r, ego->m * ego->is);
+     X(ops_madd)(ego->vl * ego->m / d->genus->vl, &d->ops, &ego->cld->ops,
+		 &ego->super.super.ops);
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const ctadt adt = {
+	  sizeof(plan_ct), X(dft_mkcld_dif), finish, applicable, apply
+     };
+     return X(mkplan_dft_ct)((const solver_ct *) ego, p, plnr, &adt);
+}
+
+
+solver *X(mksolver_dft_ct_dif)(kdft_dif codelet, const ct_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     static const char name[] = "dft-dif";
+     union kct k;
+     k.dif = codelet;
+
+     return X(mksolver_dft_ct)(k, desc, name, &sadt);
+}
diff --git a/src/fftw3/dft/ct-dit.c b/src/fftw3/dft/ct-dit.c
new file mode 100644
index 0000000..3251c20
--- /dev/null
+++ b/src/fftw3/dft/ct-dit.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ct-dit.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* decimation in time Cooley-Tukey */
+#include "dft.h"
+#include "ct.h"
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const plan_ct *ego = (const plan_ct *) ego_;
+     plan *cld0 = ego->cld;
+     plan_dft *cld = (plan_dft *) cld0;
+
+     /* two-dimensional r x vl sub-transform: */
+     cld->apply(cld0, ri, ii, ro, io);
+
+     {
+          int i, m = ego->m, vl = ego->vl;
+          int os = ego->os, ovs = ego->ovs;
+
+          for (i = 0; i < vl; ++i)
+               ego->k.dit(ro + i * ovs, io + i * ovs, ego->td->W,
+			  ego->ios, m, os);
+     }
+}
+
+static int applicable0(const solver_ct *ego, const problem *p_,
+		       const planner *plnr)
+{
+     UNUSED(plnr);
+     if (X(dft_ct_applicable)(ego, p_)) {
+	  int ivs, ovs;
+	  int vl;
+          const ct_desc *e = ego->desc;
+          const problem_dft *p = (const problem_dft *) p_;
+          iodim *d = p->sz->dims;
+	  int m = d[0].n / e->radix;
+	  X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+          return (1
+		  && (e->genus->okp(e, p->ro, p->io, 
+				    (int)m * d[0].os, 0, m, d[0].os, plnr))
+		  && (e->genus->okp(e, p->ro + ovs, p->io + ovs, 
+				    (int)m * d[0].os, 0, m, d[0].os, plnr))
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver_ct *ego, const problem *p_,
+		      const planner *plnr)
+{
+     const problem_dft *p;
+
+     if (!applicable0(ego, p_, plnr))
+          return 0;
+
+     p = (const problem_dft *) p_;
+
+     /* emulate fftw2 behavior */
+     if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0))  return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  if (X(ct_uglyp)(16, p->sz->dims[0].n, ego->desc->radix)) return 0;
+	  if (NONTHREADED_ICKYP(plnr))
+	       return 0; /* prefer threaded version */
+     }
+
+     return 1;
+}
+
+
+static void finish(plan_ct *ego)
+{
+     const ct_desc *d = ego->slv->desc;
+     ego->ios = X(mkstride)(ego->r, ego->m * ego->os);
+     X(ops_madd)(ego->vl * ego->m / d->genus->vl, &d->ops, &ego->cld->ops,
+		 &ego->super.super.ops);
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const ctadt adt = {
+	  sizeof(plan_ct), X(dft_mkcld_dit), finish, applicable, apply
+     };
+     return X(mkplan_dft_ct)((const solver_ct *) ego, p, plnr, &adt);
+}
+
+
+solver *X(mksolver_dft_ct_dit)(kdft_dit codelet, const ct_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     static const char name[] = "dft-dit";
+     union kct k;
+     k.dit = codelet;
+
+     return X(mksolver_dft_ct)(k, desc, name, &sadt);
+}
diff --git a/src/fftw3/dft/ct-ditbuf.c b/src/fftw3/dft/ct-ditbuf.c
new file mode 100644
index 0000000..2f7b57f
--- /dev/null
+++ b/src/fftw3/dft/ct-ditbuf.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ct-ditbuf.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* decimation in time Cooley-Tukey.  Codelet operates on
+   contiguous buffer rather than directly on the output array.  */
+
+/* FIXME: find a way to use rank-0 transforms for this stuff */
+
+#include "dft.h"
+#include "ct.h"
+
+/*
+   Copy A -> B, where A and B are n0 x n1 complex matrices
+   such that the (i0, i1) element has index (i0 * s0 + i1 * s1). 
+*/
+static void cpy(int n0, int n1, 
+		const R *rA, const R *iA, int sa0, int sa1, 
+		R *rB, R *iB, int sb0, int sb1)
+{
+     int i0, i1;
+     int ima = iA - rA, imb = iB - rB;
+
+     for (i0 = 0; i0 < n0; ++i0) {
+	  const R *pa; 
+	  R *pb;
+
+	  pa = rA; rA += sa0;
+	  pb = rB; rB += sb0;
+	  for (i1 = 0; i1 < n1; ++i1) {
+	       R xr = pa[0], xi = pa[ima];
+	       pb[0] = xr; pb[imb] = xi; 
+	       pa += sa1; pb += sb1;
+	  }
+     }
+}
+
+static const R *doit(kdft_dit k, R *rA, R *iA, const R *W, int ios, int dist, 
+		     int r, int batchsz, R *buf, stride bufstride)
+{
+     cpy(r, batchsz, rA, iA, ios, dist, buf, buf + 1, 2, 2 * r);
+     W = k(buf, buf + 1, W, bufstride, batchsz, 2 * r);
+     cpy(r, batchsz, buf, buf + 1, 2, 2 * r, rA, iA, ios, dist);
+     return W;
+}
+
+#define BATCHSZ 4 /* FIXME: parametrize? */
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const plan_ct *ego = (const plan_ct *) ego_;
+     plan *cld0 = ego->cld;
+     plan_dft *cld = (plan_dft *) cld0;
+
+     /* two-dimensional r x vl sub-transform: */
+     cld->apply(cld0, ri, ii, ro, io);
+
+     {
+          int i, j, m = ego->m, vl = ego->vl, r = ego->r;
+          int os = ego->os, ovs = ego->ovs, ios = ego->iios;
+	  R *buf;
+
+	  STACK_MALLOC(R *, buf, r * BATCHSZ * 2 * sizeof(R));
+
+          for (i = 0; i < vl; ++i) {
+	       R *rA = ro + i * ovs, *iA = io + i * ovs;
+	       const R *W = ego->td->W;
+
+	       for (j = m; j >= BATCHSZ; j -= BATCHSZ) {
+		    W = doit(ego->k.dit, rA, iA, W, ios, os, r, 
+			     BATCHSZ, buf, ego->vs);
+		    rA += os * (int)BATCHSZ;
+		    iA += os * (int)BATCHSZ;
+	       }
+
+	       /* do remaining j calls, if any */
+	       if (j > 0)
+		    doit(ego->k.dit, rA, iA, W, ios, os, r, j, buf, ego->vs);
+
+	  }
+
+	  STACK_FREE(buf);
+     }
+}
+
+static int applicable0(const solver_ct *ego, const problem *p_,
+		       const planner *plnr)
+{
+     UNUSED(plnr);
+     if (X(dft_ct_applicable)(ego, p_)) {
+          const ct_desc *e = ego->desc;
+          const problem_dft *p = (const problem_dft *) p_;
+          iodim *d = p->sz->dims;
+	  int m = d[0].n / e->radix;
+          return (1
+
+                  /* check both batch size and remainder */
+		  && (m < BATCHSZ ||
+		      (e->genus->okp(e, 0, ((const R *)0)+1, 2, 0, BATCHSZ,
+				     2 * e->radix, plnr)))
+		  && (e->genus->okp(e, 0, ((const R *)0)+1, 2, 0, m % BATCHSZ,
+				    2 * e->radix, plnr))
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver_ct *ego, const problem *p_,
+		      const planner *plnr)
+{
+     const problem_dft *p;
+
+     if (!applicable0(ego, p_, plnr)) return 0;
+
+     p = (const problem_dft *) p_;
+
+     /* emulate fftw2 behavior */
+     if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0))  return 0;
+
+     if (NO_UGLYP(plnr) && X(ct_uglyp)(512, p->sz->dims[0].n,
+				       ego->desc->radix))
+	  return 0;
+
+     return 1;
+}
+
+
+static void finish(plan_ct *ego)
+{
+     const ct_desc *d = ego->slv->desc;
+     ego->iios = ego->m * ego->os;
+     ego->vs = X(mkstride)(ego->r, 2);
+     X(ops_madd)(ego->vl * ego->m / d->genus->vl, &d->ops, &ego->cld->ops,
+		 &ego->super.super.ops);
+
+     /* 4 load/stores * N * VL */
+     ego->super.super.ops.other += 4 * ego->r * ego->m * ego->vl;
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const ctadt adt = {
+	  sizeof(plan_ct), X(dft_mkcld_dit), finish, applicable, apply
+     };
+     return X(mkplan_dft_ct)((const solver_ct *) ego, p, plnr, &adt);
+}
+
+
+solver *X(mksolver_dft_ct_ditbuf)(kdft_dit codelet, const ct_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     static const char name[] = "dft-ditbuf";
+     union kct k;
+     k.dit = codelet;
+
+     return X(mksolver_dft_ct)(k, desc, name, &sadt);
+}
diff --git a/src/fftw3/dft/ct-ditf.c b/src/fftw3/dft/ct-ditf.c
new file mode 100644
index 0000000..10823ab
--- /dev/null
+++ b/src/fftw3/dft/ct-ditf.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ct-ditf.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* decimation in time Cooley-Tukey */
+#include "dft.h"
+#include "ct.h"
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const plan_ct *ego = (const plan_ct *) ego_;
+     plan *cld0 = ego->cld;
+     plan_dft *cld = (plan_dft *) cld0;
+
+     UNUSED(ro);  /* == ri */
+     UNUSED(io);  /* == ii */
+     ego->k.difsq(ri, ii, ego->td->W, ego->ios, ego->vs, ego->m, ego->is);
+
+     /* two-dimensional r x vl sub-transform: */
+     cld->apply(cld0, ri, ii, ri, ii);
+}
+
+static int applicable(const solver_ct *ego, const problem *p_,
+		      const planner *plnr)
+{
+     UNUSED(plnr);
+     if (X(dft_ct_applicable)(ego, p_)) {
+          const ct_desc *e = ego->desc;
+          const problem_dft *p = (const problem_dft *) p_;
+          iodim *d = p->sz->dims, *vd = p->vecsz->dims;
+	  int m = d[0].n / e->radix;
+
+          return (1
+                  && p->ri == p->ro  /* inplace only */
+                  && p->vecsz->rnk == 1
+                  && vd[0].n == e->radix
+                  && d[0].os == vd[0].is
+                  && d[0].is == (int)e->radix * vd[0].is
+                  && vd[0].os == (int)d[0].n * vd[0].is
+
+		  && (e->genus->okp(e, p->ri, p->ii, 
+				    vd[0].os, vd[0].is, m, d[0].is, plnr))
+	       );
+     }
+     return 0;
+}
+
+static void finish(plan_ct *ego)
+{
+     const ct_desc *d = ego->slv->desc;
+     ego->ios = X(mkstride)(ego->r, ego->ovs);
+     ego->vs = X(mkstride)(ego->r, ego->ivs);
+     X(ops_madd)(ego->m / d->genus->vl, &ego->slv->desc->ops,
+		 &ego->cld->ops, &ego->super.super.ops);
+}
+
+static problem *mkcld(const solver_ct *ego, const problem_dft *p)
+{
+     iodim *d = p->sz->dims;
+     iodim *vd = p->vecsz->dims;
+     const ct_desc *e = ego->desc;
+
+     return X(mkproblem_dft_d)(
+	  X(mktensor_1d)(d[0].n / e->radix, d[0].is, d[0].is),
+	  X(mktensor_2d)(vd[0].n, vd[0].os, vd[0].os,
+			 e->radix, vd[0].is,vd[0].is),
+	  p->ro, p->io, p->ro, p->io);
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const ctadt adt = {
+	  sizeof(plan_ct), mkcld, finish, applicable, apply
+     };
+     return X(mkplan_dft_ct)((const solver_ct *) ego, p, plnr, &adt);
+}
+
+
+solver *X(mksolver_dft_ct_ditf)(kdft_difsq codelet, const ct_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     static const char name[] = "dft-ditf";
+     union kct k;
+     k.difsq = codelet;
+
+     return X(mksolver_dft_ct)(k, desc, name, &sadt);
+}
diff --git a/src/fftw3/dft/ct.c b/src/fftw3/dft/ct.c
new file mode 100644
index 0000000..1cee970
--- /dev/null
+++ b/src/fftw3/dft/ct.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ct.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* generic Cooley-Tukey routines */
+#include "dft.h"
+#include "ct.h"
+
+static void destroy(plan *ego_)
+{
+     plan_ct *ego = (plan_ct *) ego_;
+
+     X(plan_destroy_internal)(ego->cld);
+     X(stride_destroy)(ego->ios);
+     X(stride_destroy)(ego->vs);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     plan_ct *ego = (plan_ct *) ego_;
+     plan *cld = ego->cld;
+
+     AWAKE(cld, flg);
+     X(twiddle_awake)(flg, &ego->td, ego->slv->desc->tw, 
+		      ego->r * ego->m, ego->r, ego->m);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const plan_ct *ego = (const plan_ct *) ego_;
+     const solver_ct *slv = ego->slv;
+     const ct_desc *e = slv->desc;
+
+     p->print(p, "(%s-%d/%d%v \"%s\"%(%p%))",
+              slv->nam, ego->r, X(twiddle_length)(ego->r, e->tw),
+	      ego->vl, e->nam, ego->cld);
+}
+
+#define divides(a, b) (((int)(b) % (int)(a)) == 0)
+
+int X(dft_ct_applicable)(const solver_ct *ego, const problem *p_)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          const ct_desc *d = ego->desc;
+          return (1
+                  && p->sz->rnk == 1
+                  && p->vecsz->rnk <= 1
+                  && divides(d->radix, p->sz->dims[0].n)
+	       );
+     }
+     return 0;
+}
+
+
+static const plan_adt padt =
+{
+     X(dft_solve),
+     awake,
+     print,
+     destroy
+};
+
+
+plan *X(mkplan_dft_ct)(const solver_ct *ego,
+                       const problem *p_,
+                       planner *plnr,
+                       const ctadt *adt)
+{
+     plan_ct *pln;
+     plan *cld;
+     int n, r, m;
+     iodim *d;
+     const problem_dft *p;
+     const ct_desc *e = ego->desc;
+
+     if (!adt->applicable(ego, p_, plnr))
+          return (plan *) 0;
+
+     p = (const problem_dft *) p_;
+     d = p->sz->dims;
+     n = d[0].n;
+     r = e->radix;
+     m = n / r;
+
+     cld = X(mkplan_d)(plnr, adt->mkcld(ego, p));
+
+     if (!cld)
+          return (plan *) 0;
+
+     A(adt->pln_size >= sizeof(plan_ct));
+     pln = (plan_ct *) X(mkplan_dft)(adt->pln_size, &padt, adt->apply);
+
+     pln->slv = ego;
+     pln->cld = cld;
+     pln->k = ego->k;
+     pln->r = r;
+     pln->m = m;
+
+     pln->is = d[0].is;
+     pln->os = d[0].os;
+
+     pln->ios = pln->vs = 0;
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+
+     pln->td = 0;
+     adt->finish(pln);
+
+     return &(pln->super.super);
+}
+
+solver *X(mksolver_dft_ct)(union kct k, const ct_desc *desc,
+                           const char *nam, const solver_adt *adt)
+{
+     solver_ct *slv;
+
+     slv = MKSOLVER(solver_ct, adt);
+
+     slv->desc = desc;
+     slv->k = k;
+     slv->nam = nam;
+     return &(slv->super);
+}
+
+/* routines to create children are shared by many solvers */
+problem *X(dft_mkcld_dit)(const solver_ct *ego, const problem_dft *p)
+{
+     iodim *d = p->sz->dims;
+     const ct_desc *e = ego->desc;
+     int m = d[0].n / e->radix;
+
+     tensor *radix = X(mktensor_1d)(e->radix, d[0].is, m * d[0].os);
+     tensor *cld_vec = X(tensor_append)(radix, p->vecsz);
+     X(tensor_destroy)(radix);
+
+     return X(mkproblem_dft_d)(X(mktensor_1d)(m, e->radix * d[0].is, d[0].os),
+			       cld_vec, p->ri, p->ii, p->ro, p->io);
+}
+
+problem *X(dft_mkcld_dif)(const solver_ct *ego, const problem_dft *p)
+{
+     iodim *d = p->sz->dims;
+     const ct_desc *e = ego->desc;
+     int m = d[0].n / e->radix;
+
+     tensor *radix = X(mktensor_1d)(e->radix, m * d[0].is, d[0].os);
+     tensor *cld_vec = X(tensor_append)(radix, p->vecsz);
+     X(tensor_destroy)(radix);
+
+     return X(mkproblem_dft_d)(X(mktensor_1d)(m, d[0].is, e->radix * d[0].os),
+			       cld_vec, p->ri, p->ii, p->ro, p->io);
+}
diff --git a/src/fftw3/dft/ct.h b/src/fftw3/dft/ct.h
new file mode 100644
index 0000000..7ee8ff7
--- /dev/null
+++ b/src/fftw3/dft/ct.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ct.h,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* Cooley-Tukey variants */
+union kct {
+     kdft_dit dit;
+     kdft_dif dif;
+     kdft_difsq difsq;
+};
+
+typedef struct {
+     solver super;
+     const char *nam;
+     const ct_desc *desc;
+     union kct k;
+} solver_ct;
+
+typedef struct {
+     plan_dft super;
+     union kct k;
+     plan *cld;
+     twid *td;
+     int r, m, vl;
+     int is, os, ivs, ovs, iios;
+     stride ios, vs;
+     const solver_ct *slv;
+} plan_ct;
+
+/* data type describing a generic Cooley-Tukey solver */
+typedef struct
+{
+     size_t pln_size;
+     problem *(*mkcld)(const solver_ct *, const problem_dft *p);
+     void (*finish)(plan_ct *ego);
+     int (*applicable)(const solver_ct *ego, const problem *p,
+		       const planner *plnr);
+     dftapply apply;
+} ctadt;
+
+int X(dft_ct_applicable)(const solver_ct *ego, const problem *p_);
+
+plan *X(mkplan_dft_ct)(const solver_ct *ego,
+                       const problem *p_,
+                       planner *plnr,
+                       const ctadt *adt);
+
+solver *X(mksolver_dft_ct)(union kct k, const ct_desc *desc,
+                           const char *nam, const solver_adt *adt);
+
+problem *X(dft_mkcld_dit)(const solver_ct *ego, const problem_dft *p);
+problem *X(dft_mkcld_dif)(const solver_ct *ego, const problem_dft *p);
diff --git a/src/fftw3/dft/dft.h b/src/fftw3/dft/dft.h
new file mode 100644
index 0000000..cc88a1d
--- /dev/null
+++ b/src/fftw3/dft/dft.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: dft.h,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#ifndef __DFT_H__
+#define __DFT_H__
+
+#include "ifftw.h"
+#include "codelet-dft.h"
+
+/* problem.c: */
+typedef struct {
+     problem super;
+     tensor *sz, *vecsz;
+     R *ri, *ii, *ro, *io;
+} problem_dft;
+
+int X(problem_dft_p)(const problem *p);
+#define DFTP X(problem_dft_p)  /* shorthand */
+
+void X(dft_zerotens)(tensor *sz, R *ri, R *ii);
+problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz,
+                          R *ri, R *ii, R *ro, R *io);
+problem *X(mkproblem_dft_d)(tensor *sz, tensor *vecsz,
+                            R *ri, R *ii, R *ro, R *io);
+
+/* solve.c: */
+void X(dft_solve)(const plan *ego_, const problem *p_);
+
+/* plan.c: */
+typedef void (*dftapply) (const plan *ego, R *ri, R *ii, R *ro, R *io);
+
+typedef struct {
+     plan super;
+     dftapply apply;
+} plan_dft;
+
+plan *X(mkplan_dft)(size_t size, const plan_adt *adt, dftapply apply);
+
+#define MKPLAN_DFT(type, adt, apply) \
+  (type *)X(mkplan_dft)(sizeof(type), adt, apply)
+
+/* various solvers */
+solver *X(mksolver_dft_direct)(kdft k, const kdft_desc *desc);
+solver *X(mksolver_dft_ct_dit)(kdft_dit codelet, const ct_desc *desc);
+solver *X(mksolver_dft_ct_ditbuf)(kdft_dit codelet, const ct_desc *desc);
+solver *X(mksolver_dft_ct_dif)(kdft_dif codelet, const ct_desc *desc);
+solver *X(mksolver_dft_ct_ditf)(kdft_difsq codelet, const ct_desc *desc);
+
+extern void (*X(kdft_dit_register_hook))(planner *, kdft_dit, const ct_desc *);
+extern void (*X(kdft_dif_register_hook))(planner *, kdft_dif, const ct_desc *);
+
+void X(dft_rank0_register)(planner *p);
+void X(dft_rank_geq2_register)(planner *p);
+void X(dft_indirect_register)(planner *p);
+void X(dft_vrank_geq1_register)(planner *p);
+void X(dft_vrank2_transpose_register)(planner *p);
+void X(dft_vrank3_transpose_register)(planner *p);
+void X(dft_buffered_register)(planner *p);
+void X(dft_generic_register)(planner *p);
+void X(dft_rader_register)(planner *p);
+void X(dft_nop_register)(planner *p);
+
+/* rader-omega.c: auxiliary stuff for rader */
+R *X(dft_rader_mkomega)(plan *p_, int n, int ginv);
+void X(dft_rader_free_omega)(R **omega);
+
+/* configurations */
+void X(dft_conf_standard)(planner *p);
+
+#endif /* __DFT_H__ */
diff --git a/src/fftw3/dft/direct.c b/src/fftw3/dft/direct.c
new file mode 100644
index 0000000..cb11c88
--- /dev/null
+++ b/src/fftw3/dft/direct.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: direct.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* direct DFT solver, if we have a codelet */
+
+#include "dft.h"
+
+typedef struct {
+     solver super;
+     const kdft_desc *desc;
+     kdft k;
+} S;
+
+typedef struct {
+     plan_dft super;
+
+     stride is, os;
+     int vl;
+     int ivs, ovs;
+     kdft k;
+     const S *slv;
+} P;
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     ASSERT_ALIGNED_DOUBLE;
+     ego->k(ri, ii, ro, io, ego->is, ego->os, ego->vl, ego->ivs, ego->ovs);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(stride_destroy)(ego->is);
+     X(stride_destroy)(ego->os);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->slv;
+     const kdft_desc *d = s->desc;
+
+     p->print(p, "(dft-direct-%d%v \"%s\")", d->sz, ego->vl, d->nam);
+}
+
+static int applicable(const solver *ego_, const problem *p_,
+		      const planner *plnr)
+{
+     if (DFTP(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_dft *p = (const problem_dft *) p_;
+          const kdft_desc *d = ego->desc;
+	  int vl;
+	  int ivs, ovs;
+
+          return (
+	       1
+	       && p->sz->rnk == 1
+	       && p->vecsz->rnk <= 1
+	       && p->sz->dims[0].n == d->sz
+
+	       /* check strides etc */
+	       && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
+
+	       && (d->genus->okp(d, p->ri, p->ii, p->ro, p->io,
+				 p->sz->dims[0].is, p->sz->dims[0].os,
+				 vl, ivs, ovs, plnr))
+
+	       && (0
+		   /* can operate out-of-place */
+		   || p->ri != p->ro
+
+		   /*
+		    * can compute one transform in-place, no matter
+		    * what the strides are.
+		    */
+		   || p->vecsz->rnk == 0
+
+		   /* can operate in-place as long as strides are the same */
+		   || (X(tensor_inplace_strides2)(p->sz, p->vecsz))
+		    )
+	       );
+     }
+
+     return 0;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     P *pln;
+     const problem_dft *p;
+     iodim *d;
+     const kdft_desc *e = ego->desc;
+
+     static const plan_adt padt = {
+	  X(dft_solve), X(null_awake), print, destroy
+     };
+
+     UNUSED(plnr);
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_dft *) p_;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+
+     d = p->sz->dims;
+
+     pln->k = ego->k;
+     pln->is = X(mkstride)(e->sz, d[0].is);
+     pln->os = X(mkstride)(e->sz, d[0].os);
+
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     pln->slv = ego;
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl / e->genus->vl, &e->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+solver *X(mksolver_dft_direct)(kdft k, const kdft_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->k = k;
+     slv->desc = desc;
+     return &(slv->super);
+}
diff --git a/src/fftw3/dft/generic.c b/src/fftw3/dft/generic.c
new file mode 100644
index 0000000..eec32bd
--- /dev/null
+++ b/src/fftw3/dft/generic.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "dft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_dft super;
+     plan *cld;
+     twid *td;
+     int os;
+     int r, m;
+} P;
+
+/***************************************************************************/
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int n, m, r, j;
+     int os, osm;
+     E *buf;
+     const R *W;
+
+     {
+	  plan_dft *cld = (plan_dft *) ego->cld;
+	  cld->apply((plan *) cld, ri, ii, ro, io);
+     }
+
+     r = ego->r;
+
+     STACK_MALLOC(E *, buf, r * 2 * sizeof(E));
+     
+     osm = (m = ego->m) * (os = ego->os);
+     n = m * r;
+     W = ego->td->W;
+     for (j = 0; j < m; ++j, ro += os, io += os) {
+	  int k;
+	  for (k = 0; k < r; ++k) {
+	       E rb = ro[0], ib = io[0];
+	       int i, iw, iw_inc = j + m * k;
+	       for (i = 1, iw = iw_inc; i < r; ++i) {
+		    E xr = ro[i*osm], xi = io[i*osm];
+		    E wr = W[2*iw], wi = W[2*iw+1];
+		    /* note that W[iw] is the product of the DIT twiddle
+		       factor and the size-r DFT twiddle factor */
+		    rb += xr * wr - xi * wi;
+		    ib += xr * wi + xi * wr;
+		    iw += iw_inc;
+		    if (iw >= n)
+			 iw -= n;
+	       }
+	       buf[2*k] = rb;
+	       buf[2*k+1] = ib;
+	  }
+	  for (k = 0; k < r; ++k) {
+	       ro[k*osm] = buf[2*k];
+	       io[k*osm] = buf[2*k+1];
+	  }
+     }
+
+     STACK_FREE(buf);
+}
+
+/***************************************************************************/
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr generic_tw[] = {
+	  { TW_GENERIC, 0, 0 },
+	  { TW_NEXT, 1, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+     X(twiddle_awake)(flg, &ego->td, generic_tw,
+		      ego->r * ego->m, ego->r, ego->m);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+
+     p->print(p, "(dft-generic-dit-%d%(%p%))", ego->r, ego->cld);
+}
+
+static int applicable0(const problem *p_)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          return (1
+	       && p->sz->rnk == 1
+	       && p->vecsz->rnk == 0
+	       && p->sz->dims[0].n > 1
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p_, 
+		      const planner *plnr)
+{
+     UNUSED(ego);
+     if (NO_UGLYP(plnr)) return 0; /* always ugly */
+     if (!applicable0(p_)) return 0;
+
+     if (NO_LARGE_GENERICP(plnr)) {
+          const problem_dft *p = (const problem_dft *) p_;
+	  if (X(first_divisor)(p->sz->dims[0].n) >= GENERIC_MIN_BAD) return 0; 
+     }
+     return 1;
+}
+
+static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_dft *p = (const problem_dft *) p_;
+     P *pln = 0;
+     int n, r, m;
+     int is, os;
+     plan *cld = (plan *) 0;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego, p_, plnr))
+          goto nada;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+
+     r = X(first_divisor)(n);
+     m = n / r;
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_dft_d)(X(mktensor_1d)(m, r * is, os),
+					  X(mktensor_1d)(r, is, m * os),
+					  p->ri, p->ii, p->ro, p->io));
+     if (!cld) goto nada;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+
+     pln->os = os;
+     pln->r = r;
+     pln->m = m;
+     pln->cld = cld;
+     pln->td = 0;
+
+     X(ops_zero)(&pln->super.super.ops);
+     pln->super.super.ops.add = 4 * r * (r-1);
+     pln->super.super.ops.mul = 4 * r * (r-1);
+     /* loads + stores, minus loads + stores for all DIT codelets */
+     pln->super.super.ops.other = 4 * r + 4 * r * r - (6*r - 2);
+     X(ops_madd)(m, &pln->super.super.ops, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(ifree0)(pln);
+     return (plan *) 0;
+}
+
+/* constructors */
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(dft_generic_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/dft/indirect.c b/src/fftw3/dft/indirect.c
new file mode 100644
index 0000000..e00eabf
--- /dev/null
+++ b/src/fftw3/dft/indirect.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: indirect.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+
+/* solvers/plans for vectors of small DFT's that cannot be done
+   in-place directly.  Use a rank-0 plan to rearrange the data
+   before or after the transform.  Can also change an out-of-place
+   plan into a copy + in-place (where the in-place transform
+   is e.g. unit stride). */
+
+/* FIXME: merge with rank-geq2.c(?), since this is just a special case
+   of a rank split where the first/second transform has rank 0. */
+
+#include "dft.h"
+
+typedef problem *(*mkcld_t) (const problem_dft *p);
+
+typedef struct {
+     dftapply apply;
+     problem *(*mkcld)(const problem_dft *p);
+     const char *nam;
+} ndrct_adt;
+
+typedef struct {
+     solver super;
+     const ndrct_adt *adt;
+} S;
+
+typedef struct {
+     plan_dft super;
+     plan *cldcpy, *cld;
+     const S *slv;
+} P;
+
+/*-----------------------------------------------------------------------*/
+/* first rearrange, then transform */
+static void apply_before(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
+          cldcpy->apply(ego->cldcpy, ri, ii, ro, io);
+     }
+     {
+          plan_dft *cld = (plan_dft *) ego->cld;
+          cld->apply(ego->cld, ro, io, ro, io);
+     }
+}
+
+static problem *mkcld_before(const problem_dft *p)
+{
+     return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS),
+			       X(tensor_copy_inplace)(p->vecsz, INPLACE_OS),
+			       p->ro, p->io, p->ro, p->io);
+}
+
+static const ndrct_adt adt_before =
+{
+     apply_before, mkcld_before, "dft-indirect-before"
+};
+
+/*-----------------------------------------------------------------------*/
+/* first transform, then rearrange */
+
+static void apply_after(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          plan_dft *cld = (plan_dft *) ego->cld;
+          cld->apply(ego->cld, ri, ii, ri, ii);
+     }
+     {
+          plan_dft *cldcpy = (plan_dft *) ego->cldcpy;
+          cldcpy->apply(ego->cldcpy, ri, ii, ro, io);
+     }
+}
+
+static problem *mkcld_after(const problem_dft *p)
+{
+     return X(mkproblem_dft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS),
+			       X(tensor_copy_inplace)(p->vecsz, INPLACE_IS),
+			       p->ri, p->ii, p->ri, p->ii);
+}
+
+static const ndrct_adt adt_after =
+{
+     apply_after, mkcld_after, "dft-indirect-after"
+};
+
+/*-----------------------------------------------------------------------*/
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+     X(plan_destroy_internal)(ego->cldcpy);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cldcpy, flg);
+     AWAKE(ego->cld, flg);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->slv;
+     p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy);
+}
+
+static int applicable0(const solver *ego_, const problem *p_,
+		       const planner *plnr)
+{
+     if (DFTP(p_)) {
+	  const S *ego = (const S *) ego_;
+          const problem_dft *p = (const problem_dft *) p_;
+          return (1
+                  && FINITE_RNK(p->vecsz->rnk)
+
+                  /* problem must be a nontrivial transform, not just a copy */
+                  && p->sz->rnk > 0
+
+                  && (0
+
+		      /* problem must be in-place & require some
+		         rearrangement of the data */
+		      || (p->ri == p->ro
+			  && !(X(tensor_inplace_strides2)(p->sz, p->vecsz)))
+
+		      /* or problem must be out of place, transforming
+			 from stride 1/2 to bigger stride, for apply_after */
+		      || (p->ri != p->ro && ego->adt->apply == apply_after
+			  && DESTROY_INPUTP(plnr)
+			  && X(tensor_min_istride)(p->sz) <= 2
+			  && X(tensor_min_ostride)(p->sz) > 2)
+			  
+		      /* or problem must be out of place, transforming
+			 to stride 1/2 from bigger stride, for apply_before */
+		      || (p->ri != p->ro && ego->adt->apply == apply_before
+			  && X(tensor_min_ostride)(p->sz) <= 2
+			  && X(tensor_min_istride)(p->sz) > 2)
+		       )
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_,
+		      const planner *plnr)
+{
+     if (!applicable0(ego_, p_, plnr)) return 0;
+     {
+          const problem_dft *p = (const problem_dft *) p_;
+	  if (NO_INDIRECT_OP_P(plnr) && p->ri != p->ro) return 0;
+     }
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const problem_dft *p = (const problem_dft *) p_;
+     const S *ego = (const S *) ego_;
+     P *pln;
+     plan *cld = 0, *cldcpy = 0;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *) 0;
+
+     plnr->planner_flags |= NO_BUFFERING;
+
+     cldcpy = X(mkplan_d)(plnr, 
+			  X(mkproblem_dft_d)(X(mktensor_0d)(),
+					     X(tensor_append)(p->vecsz, p->sz),
+					     p->ri, p->ii, p->ro, p->io));
+     if (!cldcpy) goto nada;
+
+     cld = X(mkplan_d)(plnr, ego->adt->mkcld(p));
+     if (!cld) goto nada;
+
+     pln = MKPLAN_DFT(P, &padt, ego->adt->apply);
+     pln->cld = cld;
+     pln->cldcpy = cldcpy;
+     pln->slv = ego;
+     X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(plan_destroy_internal)(cldcpy);
+     return (plan *)0;
+}
+
+static solver *mksolver(const ndrct_adt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+void X(dft_indirect_register)(planner *p)
+{
+     unsigned i;
+     static const ndrct_adt *const adts[] = {
+	  &adt_before, &adt_after
+     };
+
+     for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i)
+          REGISTER_SOLVER(p, mksolver(adts[i]));
+}
diff --git a/src/fftw3/dft/kdft-dif.c b/src/fftw3/dft/kdft-dif.c
new file mode 100644
index 0000000..c6fad12
--- /dev/null
+++ b/src/fftw3/dft/kdft-dif.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kdft-dif.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+void (*X(kdft_dif_register_hook))(planner *, kdft_dit, const ct_desc *) = 0;
+
+void X(kdft_dif_register)(planner *p, kdft_dif codelet, const ct_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_dft_ct_dif)(codelet, desc));
+     if (X(kdft_dif_register_hook))
+	  X(kdft_dif_register_hook)(p, codelet, desc);
+}
diff --git a/src/fftw3/dft/kdft-difsq.c b/src/fftw3/dft/kdft-difsq.c
new file mode 100644
index 0000000..aa930ba
--- /dev/null
+++ b/src/fftw3/dft/kdft-difsq.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kdft-difsq.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+void X(kdft_difsq_register)(planner *p, kdft_difsq k, const ct_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_dft_ct_ditf)(k, desc));
+}
diff --git a/src/fftw3/dft/kdft-dit.c b/src/fftw3/dft/kdft-dit.c
new file mode 100644
index 0000000..054496c
--- /dev/null
+++ b/src/fftw3/dft/kdft-dit.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kdft-dit.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+void (*X(kdft_dit_register_hook))(planner *, kdft_dit, const ct_desc *) = 0;
+
+void X(kdft_dit_register)(planner *p, kdft_dit codelet, const ct_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_dft_ct_dit)(codelet, desc));
+     REGISTER_SOLVER(p, X(mksolver_dft_ct_ditbuf)(codelet, desc));
+     if (X(kdft_dit_register_hook))
+	  X(kdft_dit_register_hook)(p, codelet, desc);
+}
diff --git a/src/fftw3/dft/kdft.c b/src/fftw3/dft/kdft.c
new file mode 100644
index 0000000..1f0f57f
--- /dev/null
+++ b/src/fftw3/dft/kdft.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kdft.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+void X(kdft_register)(planner *p, kdft codelet, const kdft_desc *desc)
+{
+     solver *s = X(mksolver_dft_direct)(codelet, desc);
+     REGISTER_SOLVER(p, s);
+}
diff --git a/src/fftw3/dft/nop.c b/src/fftw3/dft/nop.c
new file mode 100644
index 0000000..9ff8161
--- /dev/null
+++ b/src/fftw3/dft/nop.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: nop.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* plans for vrank -infty DFTs (nothing to do) */
+
+#include "dft.h"
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     UNUSED(ego_);
+     UNUSED(ri);
+     UNUSED(ii);
+     UNUSED(ro);
+     UNUSED(io);
+}
+
+static int applicable(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          return 0
+	       /* case 1 : -infty vector rank */
+	       || (!FINITE_RNK(p->vecsz->rnk))
+
+	       /* case 2 : rank-0 in-place dft */
+	       || (1
+		   && p->sz->rnk == 0
+		   && FINITE_RNK(p->vecsz->rnk)
+		   && p->ro == p->ri
+		   && X(tensor_inplace_strides)(p->vecsz)
+                    );
+     }
+     return 0;
+}
+
+static void print(const plan *ego, printer *p)
+{
+     UNUSED(ego);
+     p->print(p, "(dft-nop)");
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const plan_adt padt = {
+	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+     plan_dft *pln;
+
+     UNUSED(plnr);
+
+     if (!applicable(ego, p))
+          return (plan *) 0;
+     pln = MKPLAN_DFT(plan_dft, &padt, apply);
+     X(ops_zero)(&pln->super.ops);
+
+     return &(pln->super);
+}
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     return MKSOLVER(solver, &sadt);
+}
+
+void X(dft_nop_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/dft/plan.c b/src/fftw3/dft/plan.c
new file mode 100644
index 0000000..090a5fb
--- /dev/null
+++ b/src/fftw3/dft/plan.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: plan.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+plan *X(mkplan_dft)(size_t size, const plan_adt *adt, dftapply apply)
+{
+     plan_dft *ego;
+
+     ego = (plan_dft *) X(mkplan)(size, adt);
+     ego->apply = apply;
+
+     return &(ego->super);
+}
diff --git a/src/fftw3/dft/problem.c b/src/fftw3/dft/problem.c
new file mode 100644
index 0000000..51822ad
--- /dev/null
+++ b/src/fftw3/dft/problem.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: problem.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+#include <stddef.h>
+
+static void destroy(problem *ego_)
+{
+     problem_dft *ego = (problem_dft *) ego_;
+     X(tensor_destroy2)(ego->vecsz, ego->sz);
+     X(ifree)(ego_);
+}
+
+static void hash(const problem *p_, md5 *m)
+{
+     const problem_dft *p = (const problem_dft *) p_;
+     X(md5puts)(m, "dft");
+     X(md5int)(m, p->ri == p->ro);
+     X(md5ptrdiff)(m, p->ii - p->ri);
+     X(md5ptrdiff)(m, p->io - p->ro);
+     X(md5int)(m, X(alignment_of)(p->ri));
+     X(md5int)(m, X(alignment_of)(p->ii));
+     X(md5int)(m, X(alignment_of)(p->ro));
+     X(md5int)(m, X(alignment_of)(p->io));
+     X(tensor_md5)(m, p->sz);
+     X(tensor_md5)(m, p->vecsz);
+}
+
+static void print(problem *ego_, printer *p)
+{
+     const problem_dft *ego = (const problem_dft *) ego_;
+     p->print(p, "(dft %d %d %d %td %td %T %T)", 
+	      ego->ri == ego->ro,
+	      X(alignment_of)(ego->ri),
+	      X(alignment_of)(ego->ro),
+	      ego->ii - ego->ri, 
+	      ego->io - ego->ro,
+	      ego->sz,
+	      ego->vecsz);
+}
+
+static void zero(const problem *ego_)
+{
+     const problem_dft *ego = (const problem_dft *) ego_;
+     tensor *sz = X(tensor_append)(ego->vecsz, ego->sz);
+     X(dft_zerotens)(sz, UNTAINT(ego->ri), UNTAINT(ego->ii));
+     X(tensor_destroy)(sz);
+}
+
+static const problem_adt padt =
+{
+     hash,
+     zero,
+     print,
+     destroy
+};
+
+int X(problem_dft_p)(const problem *p)
+{
+     return (p->adt == &padt);
+}
+
+problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz,
+                          R *ri, R *ii, R *ro, R *io)
+{
+     problem_dft *ego =
+          (problem_dft *)X(mkproblem)(sizeof(problem_dft), &padt);
+
+     A((ri == ro) == (ii == io)); /* both in place or both out of place */
+     A(X(tensor_kosherp)(sz));
+     A(X(tensor_kosherp)(vecsz));
+
+     /* enforce pointer equality if untainted pointers are equal */
+     if (UNTAINT(ri) == UNTAINT(ro))
+	  ri = ro = JOIN_TAINT(ri, ro);
+     if (UNTAINT(ii) == UNTAINT(io))
+	  ii = io = JOIN_TAINT(ii, io);
+
+     /* more correctness conditions: */
+     A(TAINTOF(ri) == TAINTOF(ii));
+     A(TAINTOF(ro) == TAINTOF(io));
+
+     ego->sz = X(tensor_compress)(sz);
+     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
+     ego->ri = ri;
+     ego->ii = ii;
+     ego->ro = ro;
+     ego->io = io;
+
+     A(FINITE_RNK(ego->sz->rnk));
+     return &(ego->super);
+}
+
+/* Same as X(mkproblem_dft), but also destroy input tensors. */
+problem *X(mkproblem_dft_d)(tensor *sz, tensor *vecsz,
+                            R *ri, R *ii, R *ro, R *io)
+{
+     problem *p;
+     p = X(mkproblem_dft)(sz, vecsz, ri, ii, ro, io);
+     X(tensor_destroy2)(vecsz, sz);
+     return p;
+}
diff --git a/src/fftw3/dft/rader-omega.c b/src/fftw3/dft/rader-omega.c
new file mode 100644
index 0000000..7fd86b9
--- /dev/null
+++ b/src/fftw3/dft/rader-omega.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "dft.h"
+
+static rader_tl *omegas = 0;
+
+R *X(dft_rader_mkomega)(plan *p_, int n, int ginv)
+{
+     plan_dft *p = (plan_dft *) p_;
+     R *omega;
+     int i, gpower;
+     trigreal scale;
+
+     if ((omega = X(rader_tl_find)(n, n, ginv, omegas)))
+	  return omega;
+
+     omega = (R *)MALLOC(sizeof(R) * (n - 1) * 2, TWIDDLES);
+
+     scale = n - 1.0; /* normalization for convolution */
+
+     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
+	  omega[2*i] = X(cos2pi)(gpower, n) / scale;
+	  omega[2*i+1] = FFT_SIGN * X(sin2pi)(gpower, n) / scale;
+     }
+     A(gpower == 1);
+
+     AWAKE(p_, 1);
+     p->apply(p_, omega, omega + 1, omega, omega + 1);
+     AWAKE(p_, 0);
+
+     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
+     return omega;
+}
+
+void X(dft_rader_free_omega)(R **omega)
+{
+     X(rader_tl_delete)(*omega, &omegas);
+     *omega = 0;
+}
diff --git a/src/fftw3/dft/rader.c b/src/fftw3/dft/rader.c
new file mode 100644
index 0000000..f31f370
--- /dev/null
+++ b/src/fftw3/dft/rader.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "dft.h"
+
+/*
+ * Compute transforms of prime sizes using Rader's trick: turn them
+ * into convolutions of size n - 1, which you then perform via a pair
+ * of FFTs.   This file contains both nontwiddle (direct) and 
+ * twiddle (DIT Cooley-Tukey) solvers.
+ */
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_dft super;
+
+     plan *cld1, *cld2;
+     R *omega;
+     int n, g, ginv;
+     int is, os;
+     plan *cld_omega;
+} P;
+
+typedef struct {
+     P super;
+     plan *cld;
+     R *W;
+     int os;
+     int m;
+} P_dit;
+
+
+static rader_tl *twiddles = 0;
+
+/***************************************************************************/
+
+/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
+   order to share data between forward and backward transforms and to
+   obviate the necessity of having separate forward and backward
+   plans.  (Although we often compute separate plans these days anyway
+   due to the differing strides, etcetera.)
+
+   Of course, since the new FFTW gives us separate pointers to
+   the real and imaginary parts, we could have instead used the
+   fft(r,i) = ifft(i,r) form of this identity, but it was easier to
+   reuse the code from our old version. */
+
+static void apply_aux(int r, int ginv, plan *cld1,plan *cld2, const R *omega,
+		      R *buf, R r0, R i0, R *ro, R *io, int os)
+{
+     int gpower, k;
+
+     /* compute DFT of buf, storing in output (except DC): */
+     {
+	    plan_dft *cld = (plan_dft *) cld1;
+	    cld->apply(cld1, buf, buf+1, ro+os, io+os);
+     }
+
+     /* set output DC component: */
+     ro[0] = r0 + ro[os];
+     io[0] = i0 + io[os];
+
+     /* now, multiply by omega: */
+     for (k = 0; k < r - 1; ++k) {
+	  E rB, iB, rW, iW;
+	  rW = omega[2*k];
+	  iW = omega[2*k+1];
+	  rB = ro[(k+1)*os];
+	  iB = io[(k+1)*os];
+	  ro[(k+1)*os] = rW * rB - iW * iB;
+	  io[(k+1)*os] = -(rW * iB + iW * rB);
+     }
+     
+     /* this will add input[0] to all of the outputs after the ifft */
+     ro[os] += r0;
+     io[os] -= i0;
+
+     /* inverse FFT: */
+     {
+	    plan_dft *cld = (plan_dft *) cld2;
+	    cld->apply(cld2, ro+os, io+os, buf, buf+1);
+     }
+     
+     /* finally, do inverse permutation to unshuffle the output: */
+     gpower = 1;
+     for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+	  ro[gpower * os] = buf[2*k];
+	  io[gpower * os] = -buf[2*k+1];
+     }
+     A(gpower == 1);
+}
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int is;
+     int k, gpower, g, r;
+     R *buf;
+
+     r = ego->n; is = ego->is; g = ego->g; 
+     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
+
+     /* First, permute the input, storing in buf: */
+     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	  R rA, iA;
+	  rA = ri[gpower * is];
+	  iA = ii[gpower * is];
+	  buf[2*k] = rA; buf[2*k + 1] = iA;
+     }
+     /* gpower == g^(r-1) mod r == 1 */;
+
+     apply_aux(r, ego->ginv, ego->cld1, ego->cld2, ego->omega, 
+	       buf, ri[0], ii[0], ro, io, ego->os);
+
+     X(ifree)(buf);
+}
+
+static void apply_dit(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P_dit *ego_dit = (const P_dit *) ego_;
+     const P *ego;
+     plan *cld1, *cld2;
+     int os, osm;
+     int j, k, gpower, g, ginv, r, m;
+     R *buf;
+     const R *omega, *W;
+
+     {
+	   plan *cld0 = ego_dit->cld;
+	   plan_dft *cld = (plan_dft *) cld0;
+	   cld->apply(cld0, ri, ii, ro, io);
+     }
+
+     ego = (const P *) ego_;
+     cld1 = ego->cld1;
+     cld2 = ego->cld2;
+     r = ego->n;
+     m = ego_dit->m;
+     g = ego->g; 
+     ginv = ego->ginv;
+     omega = ego->omega;
+     W = ego_dit->W;
+     os = ego_dit->os;
+     osm = ego->os;
+     gpower = 1;
+
+     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
+
+     for (j = 0; j < m; ++j, ro += os, io += os, W += 2*(r - 1)) {
+	  /* First, permute the input and multiply by W, storing in buf: */
+	  A(gpower == 1);
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	       E rA, iA, rW, iW;
+	       rA = ro[gpower * osm];
+	       iA = io[gpower * osm];
+	       rW = W[2*k];
+	       iW = W[2*k+1];
+	       buf[2*k] = rW * rA - iW * iA;
+	       buf[2*k + 1] = rW * iA + iW * rA;
+	  }
+	  /* gpower == g^(r-1) mod r == 1 */;
+	  
+	  apply_aux(r, ginv, cld1, cld2, omega, 
+		    buf, ro[0], io[0], ro, io, osm);
+     }
+
+     X(ifree)(buf);
+}
+
+static R *mktwiddle(int m, int r, int g)
+{
+     int i, j, gpower;
+     int n = r * m;
+     R *W;
+
+     if ((W = X(rader_tl_find)(m, r, g, twiddles)))
+	  return W;
+
+     W = (R *)MALLOC(sizeof(R) * (r - 1) * m * 2, TWIDDLES);
+     for (i = 0; i < m; ++i) {
+	  for (gpower = 1, j = 0; j < r - 1;
+	       ++j, gpower = MULMOD(gpower, g, r)) {
+	       int k = i * (r - 1) + j;
+	       W[2*k] = X(cos2pi)(i * gpower, n);
+	       W[2*k+1] = FFT_SIGN * X(sin2pi)(i * gpower, n);
+	  }
+	  A(gpower == 1);
+     }
+
+     X(rader_tl_insert)(m, r, g, W, &twiddles);
+     return W;
+}
+
+static void free_twiddle(R *twiddle)
+{
+     X(rader_tl_delete)(twiddle, &twiddles);
+}
+
+/***************************************************************************/
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+
+     AWAKE(ego->cld1, flg);
+     AWAKE(ego->cld2, flg);
+
+     if (flg) {
+	  if (!ego->omega) 
+	       ego->omega = 
+		    X(dft_rader_mkomega)(ego->cld_omega, ego->n, ego->ginv);
+     } else {
+	  X(dft_rader_free_omega)(&ego->omega);
+     }
+}
+
+static void awake_dit(plan *ego_, int flg)
+{
+     P_dit *ego = (P_dit *) ego_;
+
+     AWAKE(ego->cld, flg);
+     if (flg)
+	  ego->W = mktwiddle(ego->m, ego->super.n, ego->super.g);
+     else {
+	  free_twiddle(ego->W);
+	  ego->W = 0;
+     }
+
+     awake(ego_, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld_omega);
+     X(plan_destroy_internal)(ego->cld2);
+     X(plan_destroy_internal)(ego->cld1);
+}
+
+static void destroy_dit(plan *ego_)
+{
+     P_dit *ego = (P_dit *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+     destroy(ego_);
+}
+
+static void print_aux(const char *name, const P *ego, printer *p)
+{
+     p->print(p, "(%s-%d%ois=%oos=%(%p%)",
+              name, ego->n, ego->is, ego->os, ego->cld1);
+     if (ego->cld2 != ego->cld1)
+          p->print(p, "%(%p%)", ego->cld2);
+     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
+          p->print(p, "%(%p%)", ego->cld_omega);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     print_aux("dft-rader", (const P *) ego_, p);
+     p->putchr(p, ')');
+}
+
+static void print_dit(const plan *ego_, printer *p)
+{
+     const P_dit *ego_dit = (const P_dit *) ego_;
+
+     print_aux("dft-rader-dit", (const P *) ego_, p);
+     p->print(p, "%(%p%))", ego_dit->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          return (1
+	       && p->sz->rnk == 1
+	       && p->vecsz->rnk == 0
+	       && X(is_prime)(p->sz->dims[0].n)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable0_dit(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          return (1
+	       && p->sz->rnk == 1
+	       && p->vecsz->rnk == 0
+	       && p->sz->dims[0].n > 1
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_,
+		      const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego_, p_));
+}
+
+static int applicable_dit(const solver *ego_, const problem *p_, 
+			  const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0_dit(ego_, p_));
+}
+
+static int mkP(P *pln, int n, int is, int os, R *ro, R *io,
+	       planner *plnr)
+{
+     plan *cld1 = (plan *) 0;
+     plan *cld2 = (plan *) 0;
+     plan *cld_omega = (plan *) 0;
+     R *buf = (R *) 0;
+
+     /* initial allocation for the purpose of planning */
+     buf = (R *) MALLOC(sizeof(R) * (n - 1) * 2, BUFFERS);
+
+     cld1 = X(mkplan_d)(plnr, 
+			X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, os),
+					   X(mktensor_1d)(1, 0, 0),
+  					   buf, buf + 1, ro + os, io + os));
+     if (!cld1) goto nada;
+
+     cld2 = X(mkplan_d)(plnr, 
+			X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, os, 2),
+					   X(mktensor_1d)(1, 0, 0),
+  					   ro + os, io + os, buf, buf + 1));
+
+     if (!cld2) goto nada;
+
+     /* plan for omega array */
+     plnr->planner_flags |= ESTIMATE;
+     cld_omega = X(mkplan_d)(plnr, 
+			     X(mkproblem_dft_d)(X(mktensor_1d)(n - 1, 2, 2),
+						X(mktensor_1d)(1, 0, 0),
+						buf, buf + 1, buf, buf + 1));
+     if (!cld_omega) goto nada;
+
+     /* deallocate buffers; let awake() or apply() allocate them for real */
+     X(ifree)(buf);
+     buf = 0;
+
+     pln->cld1 = cld1;
+     pln->cld2 = cld2;
+     pln->cld_omega = cld_omega;
+     pln->omega = 0;
+     pln->n = n;
+     pln->is = is;
+     pln->os = os;
+     pln->g = X(find_generator)(n);
+     pln->ginv = X(power_mod)(pln->g, n - 2, n);
+     A(MULMOD(pln->g, pln->ginv, n) == 1);
+
+     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
+     pln->super.super.ops.other += (n - 1) * (4 * 2 + 6) + 6;
+     pln->super.super.ops.add += (n - 1) * 2 + 4;
+     pln->super.super.ops.mul += (n - 1) * 4;
+
+     return 1;
+
+ nada:
+     X(ifree0)(buf);
+     X(plan_destroy_internal)(cld_omega);
+     X(plan_destroy_internal)(cld2);
+     X(plan_destroy_internal)(cld1);
+     return 0;
+}
+
+static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_dft *p = (const problem_dft *) p_;
+     P *pln;
+     int n;
+     int is, os;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego, p_, plnr))
+	  return (plan *) 0;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+     if (!mkP(pln, n, is, os, p->ro, p->io, plnr)) {
+	  X(ifree)(pln);
+	  return (plan *) 0;
+     }
+     return &(pln->super.super);
+}
+
+static plan *mkplan_dit(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_dft *p = (const problem_dft *) p_;
+     P_dit *pln = 0;
+     int n, r, m;
+     int is, os;
+     plan *cld = (plan *) 0;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake_dit, print_dit, destroy_dit
+     };
+
+     if (!applicable_dit(ego, p_, plnr))
+          goto nada;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+
+     r = X(first_divisor)(n);
+     m = n / r;
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_dft_d)(X(mktensor_1d)(m, r * is, os),
+					  X(mktensor_1d)(r, is, m * os),
+					  p->ri, p->ii, p->ro, p->io));
+     if (!cld) goto nada;
+
+     pln = MKPLAN_DFT(P_dit, &padt, apply_dit);
+     if (!mkP(&pln->super, r, os*m, os*m, p->ro, p->io, plnr))
+	  goto nada;
+
+     pln->os = os;
+     pln->m = m;
+     pln->cld = cld;
+     pln->W = 0;
+
+     pln->super.super.super.ops.add += 2 * (r-1);
+     pln->super.super.super.ops.mul += 4 * (r-1);
+     X(ops_madd)(m, &pln->super.super.super.ops, &cld->ops,
+		 &pln->super.super.super.ops);
+
+     return &(pln->super.super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(ifree0)(pln);
+     return (plan *) 0;
+}
+
+/* constructors */
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+static solver *mksolver_dit(void)
+{
+     static const solver_adt sadt = { mkplan_dit };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(dft_rader_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+     REGISTER_SOLVER(p, mksolver_dit());
+}
diff --git a/src/fftw3/dft/rank-geq2.c b/src/fftw3/dft/rank-geq2.c
new file mode 100644
index 0000000..81dffd9
--- /dev/null
+++ b/src/fftw3/dft/rank-geq2.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rank-geq2.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* plans for DFT of rank >= 2 (multidimensional) */
+
+#include "dft.h"
+
+typedef struct {
+     solver super;
+     int spltrnk;
+     const int *buddies;
+     int nbuddies;
+} S;
+
+typedef struct {
+     plan_dft super;
+
+     plan *cld1, *cld2;
+     const S *solver;
+} P;
+
+/* Compute multi-dimensional DFT by applying the two cld plans
+   (lower-rnk DFTs). */
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     plan_dft *cld1, *cld2;
+
+     cld1 = (plan_dft *) ego->cld1;
+     cld1->apply(ego->cld1, ri, ii, ro, io);
+
+     cld2 = (plan_dft *) ego->cld2;
+     cld2->apply(ego->cld2, ro, io, ro, io);
+}
+
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld1, flg);
+     AWAKE(ego->cld2, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld2);
+     X(plan_destroy_internal)(ego->cld1);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->solver;
+     p->print(p, "(dft-rank>=2/%d%(%p%)%(%p%))",
+	      s->spltrnk, ego->cld1, ego->cld2);
+}
+
+static int picksplit(const S *ego, const tensor *sz, int *rp)
+{
+     A(sz->rnk > 1); /* cannot split rnk <= 1 */
+     if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp))
+	  return 0;
+     *rp += 1; /* convert from dim. index to rank */
+     if (*rp >= sz->rnk) /* split must reduce rank */
+	  return 0;
+     return 1;
+}
+
+static int applicable0(const solver *ego_, const problem *p_, int *rp)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          const S *ego = (const S *)ego_;
+          return (1
+                  && p->sz->rnk >= 2
+                  && picksplit(ego, p->sz, rp)
+	       );
+     }
+
+     return 0;
+}
+
+/* TODO: revise this. */
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr, int *rp)
+{
+     const S *ego = (const S *)ego_;
+     const problem_dft *p = (const problem_dft *) p_;
+
+     if (!applicable0(ego_, p_, rp)) return 0;
+
+     /* fixed spltrnk (unlike fftw2's spltrnk=1, default buddies[0] is
+        spltrnk=0, which is an asymptotic "theoretical optimum" for
+        an ideal cache; it's equivalent to spltrnk=1 for rnk < 4). */
+     if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0])) return 0;
+
+     /* Heuristic: if the vector stride is greater than the transform
+        sz, don't use (prefer to do the vector loop first with a
+        vrank-geq1 plan). */
+     if (NO_UGLYP(plnr))
+	  if (p->vecsz->rnk > 0 &&
+	      X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz))
+	       return 0;
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_dft *p;
+     P *pln;
+     plan *cld1 = 0, *cld2 = 0;
+     tensor *sz1, *sz2, *vecszi, *sz2i;
+     int spltrnk;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr, &spltrnk))
+          return (plan *) 0;
+
+     p = (const problem_dft *) p_;
+     X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);
+     vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS);
+     sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS);
+
+     cld1 = X(mkplan_d)(plnr, 
+			X(mkproblem_dft_d)(X(tensor_copy)(sz2),
+					   X(tensor_append)(p->vecsz, sz1),
+					   p->ri, p->ii, p->ro, p->io));
+     if (!cld1) goto nada;
+
+     cld2 = X(mkplan_d)(plnr, 
+			X(mkproblem_dft_d)(
+			     X(tensor_copy_inplace)(sz1, INPLACE_OS),
+			     X(tensor_append)(vecszi, sz2i),
+			     p->ro, p->io, p->ro, p->io));
+     if (!cld2) goto nada;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+
+     pln->cld1 = cld1;
+     pln->cld2 = cld2;
+
+     pln->solver = ego;
+     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
+
+     X(tensor_destroy4)(sz1, sz2, vecszi, sz2i);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld2);
+     X(plan_destroy_internal)(cld1);
+     X(tensor_destroy4)(sz1, sz2, vecszi, sz2i);
+     return (plan *) 0;
+}
+
+static solver *mksolver(int spltrnk, const int *buddies, int nbuddies)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->spltrnk = spltrnk;
+     slv->buddies = buddies;
+     slv->nbuddies = nbuddies;
+     return &(slv->super);
+}
+
+void X(dft_rank_geq2_register)(planner *p)
+{
+     int i;
+     static const int buddies[] = { 0, 1, -2 };
+
+     const int nbuddies = sizeof(buddies) / sizeof(buddies[0]);
+
+     for (i = 0; i < nbuddies; ++i)
+          REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies));
+
+     /* FIXME:
+
+        Should we try more buddies? 
+
+        Another possible variant is to swap cld1 and cld2 (or rather,
+        to swap their problems; they are not interchangeable because
+        cld2 must be in-place).  In past versions of FFTW, however, I
+        seem to recall that such rearrangements have made little or no
+        difference.
+     */
+}
diff --git a/src/fftw3/dft/rank0.c b/src/fftw3/dft/rank0.c
new file mode 100644
index 0000000..bef719f
--- /dev/null
+++ b/src/fftw3/dft/rank0.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rank0.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* plans for rank-0 DFTs (copy operations) */
+
+#include "dft.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>		/* for memcpy() */
+#endif
+
+
+typedef struct {
+     dftapply apply;
+     int (*applicable)(const problem_dft *p);
+     const char *nam;
+} rnk0adt;
+
+typedef struct {
+     solver super;
+     const rnk0adt *adt;
+} S;
+
+typedef struct {
+     plan_dft super;
+     int vl;
+     int ivs, ovs;
+     const S *slv;
+} P;
+
+/* generic applicability function */
+static int applicable(const solver *ego_, const problem *p_)
+{
+     if (DFTP(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_dft *p = (const problem_dft *) p_;
+          return (1
+		  && p->ri != p->ro
+                  && p->sz->rnk == 0
+                  && ego->adt->applicable(p)
+	       );
+     }
+     return 0;
+}
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 dft, vl == 1: just a copy */
+static void apply_1(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     UNUSED(ego_);
+     *ro = *ri;
+     *io = *ii;
+}
+
+static int applicable_1(const problem_dft *p)
+{
+     return (p->vecsz->rnk == 0);
+}
+
+static const rnk0adt adt_cpy1 =
+{
+     apply_1, applicable_1, "dft-rank0-cpy1"
+};
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 dft, vl > 1: just a copy loop (unroll 4) */
+static void apply_vec(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     int imi = ii - ri, imo = io - ro;
+     for (i = vl; i > 0; --i) {
+          R r0 = ri[0], i0 = ri[imi];
+          ro[0] = r0; ro[imo] = i0; 
+	  ri += ivs; ro += ovs;
+     }
+}
+
+static int applicable_vec(const problem_dft *p)
+{
+     return (p->vecsz->rnk == 1 && p->ro != p->ri);
+}
+
+static const rnk0adt adt_vec =
+{
+     apply_vec, applicable_vec, "dft-rank0-vec"
+};
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 dft, vl > 1, [io]vs == 1, using memcpy */
+static void apply_io1(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int vl = ego->vl;
+     memcpy(ro, ri, vl * sizeof(R));
+     memcpy(io, ii, vl * sizeof(R));
+}
+
+static int applicable_io1(const problem_dft *p)
+{
+     return (1
+             && applicable_vec(p)
+             && p->vecsz->dims[0].is == 1
+             && p->vecsz->dims[0].os == 1
+	  );
+}
+
+static const rnk0adt adt_io1 =
+{
+     apply_io1, applicable_io1, "dft-rank0-io1-memcpy"
+};
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 dft, vl > 1, [io]vs == 2 (interleaved) using memcpy */
+static void apply_io2r(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int vl = ego->vl;
+     UNUSED(ii);
+     UNUSED(io);		/* i{in,out}put == r{in,out}put + 1 */
+     memcpy(ro, ri, vl * sizeof(R) * 2);
+}
+
+static int applicable_io2r(const problem_dft *p)
+{
+     return (1
+             && applicable_vec(p)
+             && p->vecsz->dims[0].is == 2
+             && p->vecsz->dims[0].os == 2
+             && p->ii == p->ri + 1 && p->io == p->ro + 1
+	  );
+}
+
+static const rnk0adt adt_io2r =
+{
+     apply_io2r, applicable_io2r, "dft-rank0-io2r-memcpy"
+};
+
+static void apply_io2i(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int vl = ego->vl;
+     UNUSED(ri);
+     UNUSED(ro);		/* r{in,out}put == i{in,out}put + 1 */
+     memcpy(io, ii, vl * sizeof(R) * 2);
+}
+
+static int applicable_io2i(const problem_dft *p)
+{
+     return (1
+             && applicable_vec(p)
+             && p->vecsz->dims[0].is == 2
+             && p->vecsz->dims[0].os == 2
+             && p->ri == p->ii + 1 && p->ro == p->io + 1
+	  );
+}
+
+static const rnk0adt adt_io2i =
+{
+     apply_io2i, applicable_io2i, "dft-rank0-io2i-memcpy"
+};
+
+/*-----------------------------------------------------------------------*/
+/* generic stuff: */
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s%v)", ego->slv->adt->nam, ego->vl);
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_dft *p;
+     P *pln;
+
+     static const plan_adt padt = {
+	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+
+     UNUSED(plnr);
+
+     if (!applicable(ego_, p_))
+          return (plan *) 0;
+
+     p = (const problem_dft *) p_;
+     pln = MKPLAN_DFT(P, &padt, ego->adt->apply);
+
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     pln->slv = ego;
+
+     /* 2*vl loads, 2*vl stores */
+     X(ops_other)(4 * pln->vl, &pln->super.super.ops);
+     return &(pln->super.super);
+}
+
+static solver *mksolver(const rnk0adt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+void X(dft_rank0_register)(planner *p)
+{
+     unsigned i;
+     static const rnk0adt *const adts[] = {
+	  &adt_cpy1, &adt_vec, &adt_io1, &adt_io2r, &adt_io2i
+     };
+
+     for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i)
+          REGISTER_SOLVER(p, mksolver(adts[i]));
+}
diff --git a/src/fftw3/dft/solve.c b/src/fftw3/dft/solve.c
new file mode 100644
index 0000000..abcad7f
--- /dev/null
+++ b/src/fftw3/dft/solve.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: solve.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+/* use the apply() operation for DFT problems */
+void X(dft_solve)(const plan *ego_, const problem *p_)
+{
+     const plan_dft *ego = (const plan_dft *) ego_;
+     const problem_dft *p = (const problem_dft *) p_;
+     ego->apply(ego_, 
+		UNTAINT(p->ri), UNTAINT(p->ii), 
+		UNTAINT(p->ro), UNTAINT(p->io));
+}
diff --git a/src/fftw3/dft/vrank-geq1.c b/src/fftw3/dft/vrank-geq1.c
new file mode 100644
index 0000000..92a5776
--- /dev/null
+++ b/src/fftw3/dft/vrank-geq1.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: vrank-geq1.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+
+/* Plans for handling vector transform loops.  These are *just* the
+   loops, and rely on child plans for the actual DFTs.
+ 
+   They form a wrapper around solvers that don't have apply functions
+   for non-null vectors.
+ 
+   vrank-geq1 plans also recursively handle the case of multi-dimensional
+   vectors, obviating the need for most solvers to deal with this.  We
+   can also play games here, such as reordering the vector loops.
+ 
+   Each vrank-geq1 plan reduces the vector rank by 1, picking out a
+   dimension determined by the vecloop_dim field of the solver. */
+
+#include "dft.h"
+
+typedef struct {
+     solver super;
+     int vecloop_dim;
+     const int *buddies;
+     int nbuddies;
+} S;
+
+typedef struct {
+     plan_dft super;
+
+     plan *cld;
+     int vl;
+     int ivs, ovs;
+     const S *solver;
+} P;
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     dftapply cldapply = ((plan_dft *) ego->cld)->apply;
+
+     for (i = 0; i < vl; ++i) {
+          cldapply(ego->cld,
+                   ri + i * ivs, ii + i * ivs, ro + i * ovs, io + i * ovs);
+     }
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->solver;
+     p->print(p, "(dft-vrank>=1-x%d/%d%(%p%))",
+ 	      ego->vl, s->vecloop_dim, ego->cld);
+}
+
+static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp)
+{
+     return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies,
+		       vecsz, oop, dp);
+}
+
+static int applicable0(const solver *ego_, const problem *p_, int *dp)
+{
+     if (DFTP(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_dft *p = (const problem_dft *) p_;
+
+          return (1
+                  && FINITE_RNK(p->vecsz->rnk)
+                  && p->vecsz->rnk > 0
+                  && pickdim(ego, p->vecsz, p->ri != p->ro, dp)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr, int *dp)
+{
+     const S *ego = (const S *)ego_;
+     const problem_dft *p;
+
+     if (!applicable0(ego_, p_, dp)) return 0;
+
+     /* fftw2 behavior */
+     if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0]))
+	  return 0;
+
+     p = (const problem_dft *) p_;
+
+     if (NO_UGLYP(plnr)) {
+	  /* Heuristic: if the transform is multi-dimensional, and the
+	     vector stride is less than the transform size, then we
+	     probably want to use a rank>=2 plan first in order to combine
+	     this vector with the transform-dimension vectors. */
+	  {
+	       iodim *d = p->vecsz->dims + *dp;
+	       if (1
+		   && p->sz->rnk > 1 
+		   && X(imin)(X(iabs)(d->is), X(iabs)(d->os)) 
+		   < X(tensor_max_index)(p->sz)
+		    )
+		    return 0;
+	  }
+
+	  /* Heuristic: don't use a vrank-geq1 for rank-0 vrank-1
+	     transforms, since this case is better handled by rank-0
+	     solvers. */
+	  if (p->sz->rnk == 0 && p->vecsz->rnk == 1) return 0;
+
+	  if (NONTHREADED_ICKYP(plnr)) return 0; /* prefer threaded version */
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_dft *p;
+     P *pln;
+     plan *cld;
+     int vdim;
+     iodim *d;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr, &vdim))
+          return (plan *) 0;
+     p = (const problem_dft *) p_;
+
+     d = p->vecsz->dims + vdim;
+
+     A(d->n > 1);
+     cld = X(mkplan_d)(plnr,
+		       X(mkproblem_dft_d)(
+			    X(tensor_copy)(p->sz),
+			    X(tensor_copy_except)(p->vecsz, vdim),
+			    TAINT(p->ri, d->is), TAINT(p->ii, d->is),
+			    TAINT(p->ro, d->os), TAINT(p->io, d->os)));
+     if (!cld) return (plan *) 0;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+
+     pln->cld = cld;
+     pln->vl = d->n;
+     pln->ivs = d->is;
+     pln->ovs = d->os;
+
+     pln->solver = ego;
+     X(ops_zero)(&pln->super.super.ops);
+     pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+     pln->super.super.pcost = pln->vl * cld->pcost;
+
+     return &(pln->super.super);
+}
+
+static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->vecloop_dim = vecloop_dim;
+     slv->buddies = buddies;
+     slv->nbuddies = nbuddies;
+     return &(slv->super);
+}
+
+void X(dft_vrank_geq1_register)(planner *p)
+{
+     int i;
+
+     /* FIXME: Should we try other vecloop_dim values? */
+     static const int buddies[] = { 1, -1 };
+
+     const int nbuddies = sizeof(buddies) / sizeof(buddies[0]);
+
+     for (i = 0; i < nbuddies; ++i)
+          REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies));
+}
diff --git a/src/fftw3/dft/vrank2-transpose.c b/src/fftw3/dft/vrank2-transpose.c
new file mode 100644
index 0000000..ebed1ff
--- /dev/null
+++ b/src/fftw3/dft/vrank2-transpose.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: vrank2-transpose.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* rank-0, vector-rank-2, square transposition  */
+
+#include "dft.h"
+
+/* transposition routine. TODO: optimize? */
+static void t(R *rA, R *iA, int n, int is, int js)
+{
+     int i, j;
+     int im = iA - rA;
+
+     for (i = 1; i < n; ++i) {
+	  R *p0 = rA + i * is;
+	  R *p1 = rA + i * js;
+          for (j = 0; j < i; ++j) {
+               R ar = p0[0], ai = p0[im];
+               R br = p1[0], bi = p1[im];
+               p1[0] = ar; p1[im] = ai; p1 += is;
+               p0[0] = br; p0[im] = bi; p0 += js;
+          }
+     }
+}
+
+typedef solver S;
+
+typedef struct {
+     plan_dft super;
+     int n;
+     int s0, s1;
+     int m;
+     int offset;
+     int nd, md, d; /* d = gcd(n,m), nd = n / d, md = m / d */
+} P;
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     UNUSED(ro);
+     UNUSED(io);
+     A(ego->n == ego->m);
+     t(ri, ii, ego->n, ego->s0, ego->s1);
+}
+
+static void apply_general(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int nd = ego->nd, md = ego->md, d = ego->d;
+     R *buf = (R *)MALLOC((sizeof(R) * 2) * nd * md * d, BUFFERS);
+
+     UNUSED(ii); UNUSED(ro); UNUSED(io);
+     X(transpose)(ri + ego->offset, nd, md, d, 2, buf);
+     X(ifree)(buf);
+}
+
+static void apply_slow(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int n = ego->n, m = ego->m;
+     R buf[4];
+     int move_size = (n + m) / 2;
+     char *move;
+
+     UNUSED(ii); UNUSED(ro); UNUSED(io);
+     STACK_MALLOC(char *, move, move_size);
+     X(transpose_slow)(ri + ego->offset, n, m, 2, move, move_size, buf);
+     STACK_FREE(move);
+}
+
+static int applicable(const problem *p_, const planner *plnr)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *)p_;
+	  const iodim *d = p->vecsz->dims;
+          return (1
+                  && p->ri == p->ro
+                  && p->sz->rnk == 0
+                  && p->vecsz->rnk == 2
+		  && X(transposable)(d, d+1, 1, X(imin)(d[0].is,d[0].os),
+				     p->ri, p->ii)
+		  && (!NO_UGLYP(plnr) || d[0].n == d[1].n)
+	       );
+     }
+     return 0;
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(dft-transpose-%dx%d)", ego->n, ego->m);
+}
+
+static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_dft *p;
+     P *pln;
+     const iodim *d;
+
+     static const plan_adt padt = {
+	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+
+     UNUSED(ego);
+
+     if (!applicable(p_, plnr))
+          return (plan *) 0;
+     p = (const problem_dft *) p_;
+
+     d = p->vecsz->dims;
+     pln = MKPLAN_DFT(P, &padt, 
+		      X(transpose_simplep)(d, d+1, 1, X(imin)(d[0].is,d[0].os),
+					   p->ri, p->ii) ? apply :
+		      (X(transpose_slowp)(d, d+1, 2) ? apply_slow : 
+		       apply_general));
+     X(transpose_dims)(d, d+1, &pln->n, &pln->m, &pln->d, &pln->nd, &pln->md);
+     pln->offset = (p->ri - p->ii == 1) ? -1 : 0;
+     pln->s0 = d[0].is;
+     pln->s1 = d[0].os;
+
+     /* (4 loads + 4 stores) * (pln->n \choose 2)
+        (FIXME? underestimate for non-square) */
+     X(ops_other)(4 * pln->n * (pln->m - 1), &pln->super.super.ops);
+     return &(pln->super.super);
+}
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     return MKSOLVER(S, &sadt);
+}
+
+void X(dft_vrank2_transpose_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/dft/vrank3-transpose.c b/src/fftw3/dft/vrank3-transpose.c
new file mode 100644
index 0000000..3e2157a
--- /dev/null
+++ b/src/fftw3/dft/vrank3-transpose.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: vrank3-transpose.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+/* rank-0, vector-rank-3, square transposition  */
+
+#include "dft.h"
+
+/* transposition routine. TODO: optimize? */
+static void t(R *rA, R *iA, int n, int is, int js, int vn, int vs)
+{
+     int i, j, iv;
+     int im = iA - rA;
+
+     for (i = 1; i < n; ++i) {
+          for (j = 0; j < i; ++j) {
+	       R *p0 = rA + i * is + j * js;
+	       R *p1 = rA + j * is + i * js;
+               for (iv = 0; iv < vn; ++iv) {
+                    R ar = p0[0], ai = p0[im];
+                    R br = p1[0], bi = p1[im];
+                    p1[0] = ar; p1[im] = ai; p1 += vs;
+                    p0[0] = br; p0[im] = bi; p0 += vs;
+               }
+          }
+     }
+}
+
+typedef solver S;
+
+typedef struct {
+     plan_dft super;
+     int n, vl;
+     int s0, s1, vs;
+     int m;
+     int offset;
+     int nd, md, d; /* d = gcd(n,m), nd = n / d, md = m / d */
+} P;
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     UNUSED(ro);
+     UNUSED(io);
+     A(ego->n == ego->m);
+     t(ri, ii, ego->n, ego->s0, ego->s1, ego->vl, ego->vs);
+}
+
+static void apply_general(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int nd = ego->nd, md = ego->md, d = ego->d, vl = ego->vl;
+     R *buf = (R *)MALLOC((sizeof(R) * 2) * vl * nd * md * d, BUFFERS);
+
+     UNUSED(ii); UNUSED(ro); UNUSED(io);
+     X(transpose)(ri + ego->offset, nd, md, d, 2*vl, buf);
+     X(ifree)(buf);
+}
+
+static void apply_slow(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int n = ego->n, m = ego->m, vl = ego->vl;
+     R *buf = (R *)MALLOC((sizeof(R) * 4) * vl, BUFFERS);
+     int move_size = (n + m) / 2;
+     char *move;
+
+     UNUSED(ii); UNUSED(ro); UNUSED(io);
+     STACK_MALLOC(char *, move, move_size);
+     X(transpose_slow)(ri + ego->offset, n, m, 2*vl, move, move_size, buf);
+     STACK_FREE(move);
+     X(ifree)(buf);
+}
+
+static int pickdim(const tensor *s, int *pdim0, int *pdim1, int *pdim2,
+		   R *ri, R *ii)
+{
+     int dim0, dim1;
+
+     for (dim0 = 0; dim0 < s->rnk; ++dim0)
+          for (dim1 = dim0 + 1; dim1 < s->rnk; ++dim1) {
+	       int dim2 = 3 - dim0 - dim1;
+               if (s->dims[dim2].is == s->dims[dim2].os
+		   && X(transposable)(s->dims + dim0, s->dims + dim1, 
+				      s->dims[dim2].n, s->dims[dim2].is,
+				      ri, ii)) {
+                    *pdim0 = dim0;
+                    *pdim1 = dim1;
+		    *pdim2 = dim2;
+                    return 1;
+               }
+	  }
+     return 0;
+}
+
+static int applicable0(const problem *p_, int *dim0, int *dim1, int *dim2)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *)p_;
+          return (1
+                  && p->ri == p->ro
+                  && p->sz->rnk == 0
+                  && p->vecsz->rnk == 3
+                  && pickdim(p->vecsz, dim0, dim1, dim2, p->ri, p->ii)
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const problem *p_, const planner *plnr, 
+		      int *dim0, int *dim1, int *dim2)
+{
+     const problem_dft *p;
+
+     if (!applicable0(p_, dim0, dim1, dim2))
+          return 0;
+
+     p = (const problem_dft *) p_;
+
+     if (NO_UGLYP(plnr))
+	  if (p->vecsz->dims[*dim2].is > X(imax)(p->vecsz->dims[*dim0].is,
+						p->vecsz->dims[*dim0].os))
+	       /* loops are in the wrong order for locality */
+	       return 0;	
+
+     if (NO_UGLYP(plnr) && p->vecsz->dims[*dim0].n != p->vecsz->dims[*dim1].n)
+	  return 0;
+
+     return 1;
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(dft-transpose-%dx%d%v)", ego->n, ego->m, ego->vl);
+}
+
+static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_dft *p;
+     P *pln;
+     const iodim *d;
+     int dim0, dim1, dim2;
+     int vl;
+
+     static const plan_adt padt = {
+	  X(dft_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+
+     UNUSED(plnr);
+     UNUSED(ego);
+
+     if (!applicable(p_, plnr, &dim0, &dim1, &dim2))
+          return (plan *) 0;
+     p = (const problem_dft *) p_;
+
+     d = p->vecsz->dims;
+     vl = d[dim2].n;
+     pln = MKPLAN_DFT(P, &padt,
+                      X(transpose_simplep)(d+dim0, d+dim1,
+					   vl, p->vecsz->dims[dim2].is,
+					   p->ri, p->ii) ? apply :
+                      (X(transpose_slowp)(d+dim0, d+dim1, 2*vl) ? apply_slow
+                       : apply_general));
+     X(transpose_dims)(d+dim0, d+dim1, 
+		       &pln->n, &pln->m, &pln->d, &pln->nd, &pln->md);
+     pln->offset = (p->ri - p->ii == 1) ? -1 : 0;
+     pln->s0 = d[dim0].is;
+     pln->s1 = d[dim0].os;
+     pln->vl = vl;
+     pln->vs = d[dim2].is; /* == os */
+
+     /* pln->vl * (4 loads + 4 stores) * (pln->n \choose 2) 
+        (FIXME? underestimate for non-square) */
+     X(ops_other)(4 * pln->vl * pln->n * (pln->m - 1), &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     return MKSOLVER(S, &sadt);
+}
+
+void X(dft_vrank3_transpose_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/dft/zero.c b/src/fftw3/dft/zero.c
new file mode 100644
index 0000000..df34e0e
--- /dev/null
+++ b/src/fftw3/dft/zero.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: zero.c,v 1.1 2008/10/17 06:11:08 scuri Exp $ */
+
+#include "dft.h"
+
+/* fill a complex array with zeros. */
+static void recur(const iodim *dims, int rnk, R *ri, R *ii)
+{
+     if (rnk == RNK_MINFTY)
+          return;
+     else if (rnk == 0)
+          ri[0] = ii[0] = K(0.0);
+     else if (rnk > 0) {
+          int i, n = dims[0].n;
+          int is = dims[0].is;
+
+	  if (rnk == 1) {
+	       /* this case is redundant but faster */
+	       for (i = 0; i < n; ++i)
+		    ri[i * is] = ii[i * is] = K(0.0);
+	  } else {
+	       for (i = 0; i < n; ++i)
+		    recur(dims + 1, rnk - 1, ri + i * is, ii + i * is);
+	  }
+     }
+}
+
+
+void X(dft_zerotens)(tensor *sz, R *ri, R *ii)
+{
+     recur(sz->dims, sz->rnk, ri, ii);
+}
diff --git a/src/fftw3/kernel/align.c b/src/fftw3/kernel/align.c
new file mode 100644
index 0000000..7f475d5
--- /dev/null
+++ b/src/fftw3/kernel/align.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: align.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+#if HAVE_3DNOW
+#  define ALGN 8
+#elif HAVE_SIMD
+#  define ALGN 16
+#elif HAVE_K7
+#  define ALGN 8
+#else
+#  define ALGN (sizeof(R))
+#endif
+
+/* NONPORTABLE */
+int X(alignment_of)(R *p)
+{
+     return (int)(((uintptr_t) p) % ALGN);
+}
diff --git a/src/fftw3/kernel/alloc.c b/src/fftw3/kernel/alloc.c
new file mode 100644
index 0000000..a95e0e8
--- /dev/null
+++ b/src/fftw3/kernel/alloc.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: alloc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+#if defined(HAVE_DECL_MEMALIGN) && !HAVE_DECL_MEMALIGN
+#  if defined(HAVE_MALLOC_H)
+#    include <malloc.h>
+#  else
+extern void *memalign(size_t, size_t);
+#  endif
+#endif
+
+#if defined(HAVE_DECL_POSIX_MEMALIGN) && !HAVE_DECL_POSIX_MEMALIGN
+extern int posix_memalign(void **, size_t, size_t);
+#endif
+
+#if defined(macintosh) /* MacOS 9 */
+#  include <Multiprocessing.h>
+#endif
+
+#define real_malloc X(malloc)
+#define real_free free /* memalign and malloc use ordinary free */
+
+#if defined(WITH_OUR_MALLOC16) && (MIN_ALIGNMENT == 16)
+/* Our own 16-byte aligned malloc/free.  Assumes sizeof(void*) is a
+   power of two <= 8 and that malloc is at least sizeof(void*)-aligned.
+
+   The main reason for this routine is that, as of this writing,
+   Windows does not include any aligned allocation routines in its
+   system libraries, and instead provides an implementation with a
+   Visual C++ "Processor Pack" that you have to statically link into
+   your program.  We do not want to require users to have VC++
+   (e.g. gcc/MinGW should be fine).  Our code should be at least as good
+   as the MS _aligned_malloc, in any case, according to second-hand
+   reports of the algorithm it employs (also based on plain malloc). */
+static void *our_malloc16(size_t n)
+{
+     void *p0, *p;
+     if (!(p0 = malloc(n + 16))) return (void *) 0;
+     p = (void *) (((uintptr_t) p0 + 16) & (~((uintptr_t) 15)));
+     *((void **) p - 1) = p0;
+     return p;
+}
+static void our_free16(void *p)
+{
+     if (p) free(*((void **) p - 1));
+}
+#endif
+
+/* part of user-callable API */
+void *X(malloc)(size_t n)
+{
+     void *p;
+
+#if defined(MIN_ALIGNMENT)
+
+#  if defined(WITH_OUR_MALLOC16) && (MIN_ALIGNMENT == 16)
+     p = our_malloc16(n);
+#    undef real_free
+#    define real_free our_free16
+
+#  elif defined(HAVE_MEMALIGN)
+     p = memalign(MIN_ALIGNMENT, n);
+
+#  elif defined(HAVE_POSIX_MEMALIGN)
+     /* note: posix_memalign is broken in glibc 2.2.5: it constrains
+	the size, not the alignment, to be (power of two) * sizeof(void*).
+        The bug seems to have been fixed as of glibc 2.3.1. */
+     if (posix_memalign(&p, MIN_ALIGNMENT, n))
+	  p = (void*) 0;
+
+#  elif defined(__ICC) || defined(__INTEL_COMPILER) || defined(HAVE__MM_MALLOC)
+     /* Intel's C compiler defines _mm_malloc and _mm_free intrinsics */
+     p = (void *) _mm_malloc(n, MIN_ALIGNMENT);
+#    undef real_free
+#    define real_free _mm_free
+
+#  elif defined(_MSC_VER)
+     /* MS Visual C++ 6.0 with a "Processor Pack" supports SIMD
+	and _aligned_malloc/free (uses malloc.h) */
+     p = (void *) _aligned_malloc(n, MIN_ALIGNMENT);
+#    undef real_free
+#    define real_free _aligned_free
+
+#  elif (defined(__MACOSX__) || defined(__APPLE__)) && (MIN_ALIGNMENT <= 16)
+     /* MacOS X malloc is already 16-byte aligned */
+     p = malloc(n);
+
+#  elif defined(macintosh) /* MacOS 9 */
+     p = (void *) MPAllocateAligned(n,
+#    if MIN_ALIGNMENT == 8
+				    kMPAllocate8ByteAligned,
+#    elif MIN_ALIGNMENT == 16
+				    kMPAllocate16ByteAligned,
+#    elif MIN_ALIGNMENT == 32
+				    kMPAllocate32ByteAligned,
+#    else
+#      error "Unknown alignment for MPAllocateAligned"
+#    endif
+				    0);
+#    undef real_free
+#    define real_free MPFree
+
+#  else
+     /* Add your machine here and send a patch to fftw@fftw.org 
+        or (e.g. for Windows) configure --with-our-malloc16 */
+#    error "Don't know how to malloc() aligned memory."
+#  endif
+
+#else /* !defined(MIN_ALIGMENT) */
+     p = malloc(n);
+#endif
+
+     return p;
+}
+
+/* part of user-callable API */
+void X(free)(void *p)
+{
+     real_free(p);
+}
+
+/**********************************************************
+ *   DEBUGGING CODE
+ **********************************************************/
+#if defined(FFTW_DEBUG_MALLOC)
+
+#include <stdio.h>
+
+/*
+  debugging malloc/free. 
+ 
+  1) Initialize every malloced and freed area to random values, just
+  to make sure we are not using uninitialized pointers.
+ 
+  2) check for blocks freed twice.
+ 
+  3) Check for writes past the ends of allocated blocks
+ 
+  4) destroy contents of freed blocks in order to detect incorrect reuse.
+ 
+  5) keep track of who allocates what and report memory leaks
+ 
+  This code is a quick and dirty hack.  May be nonportable. 
+  Use at your own risk.
+ 
+*/
+
+#define MAGIC ((size_t)0xABadCafe)
+#define PAD_FACTOR 2
+#define SZ_HEADER (4 * sizeof(size_t))
+#define HASHSZ 1031
+
+static unsigned int hashaddr(void *p)
+{
+     return ((unsigned long)p) % HASHSZ;
+}
+
+struct mstat {
+     int siz;
+     int maxsiz;
+     int cnt;
+     int maxcnt;
+};
+
+static struct mstat mstat[MALLOC_WHAT_LAST];
+
+struct minfo {
+     const char *file;
+     int line;
+     size_t n;
+     void *p;
+     struct minfo *next;
+};
+
+static struct minfo *minfo[HASHSZ] = {0};
+
+#ifdef HAVE_THREADS
+int X(in_thread) = 0;
+#endif
+
+void *X(malloc_debug)(size_t n, enum malloc_tag what,
+                      const char *file, int line)
+{
+     char *p;
+     size_t i;
+     struct minfo *info;
+     struct mstat *stat = mstat + what;
+     struct mstat *estat = mstat + EVERYTHING;
+
+     if (n == 0)
+          n = 1;
+
+     if (!IN_THREAD) {
+	  stat->siz += n;
+	  if (stat->siz > stat->maxsiz)
+	       stat->maxsiz = stat->siz;
+	  estat->siz += n;
+	  if (estat->siz > estat->maxsiz)
+	       estat->maxsiz = estat->siz;
+     }
+
+     p = (char *) real_malloc(PAD_FACTOR * n + SZ_HEADER);
+     A(p);
+
+     /* store the sz in a known position */
+     ((size_t *) p)[0] = n;
+     ((size_t *) p)[1] = MAGIC;
+     ((size_t *) p)[2] = what;
+
+     /* fill with junk */
+     for (i = 0; i < PAD_FACTOR * n; i++)
+          p[i + SZ_HEADER] = (char) (i ^ 0xEF);
+
+     if (!IN_THREAD) {
+	  ++stat->cnt;
+	  ++estat->cnt;
+	  
+	  if (stat->cnt > stat->maxcnt)
+	       stat->maxcnt = stat->cnt;
+	  if (estat->cnt > estat->maxcnt)
+	       estat->maxcnt = estat->cnt;
+     }
+
+     /* skip the info we stored previously */
+     p = p + SZ_HEADER;
+
+     if (!IN_THREAD) {
+	  unsigned int h = hashaddr(p);
+	  /* record allocation in allocation list */
+	  info = (struct minfo *) malloc(sizeof(struct minfo));
+	  info->n = n;
+	  info->file = file;
+	  info->line = line;
+	  info->p = p;
+	  info->next = minfo[h];
+	  minfo[h] = info;
+     }
+
+     return (void *) p;
+}
+
+void X(ifree)(void *p)
+{
+     char *q;
+
+     A(p);
+
+     q = ((char *) p) - SZ_HEADER;
+     A(q);
+
+     {
+          size_t n = ((size_t *) q)[0];
+          size_t magic = ((size_t *) q)[1];
+          int what = ((size_t *) q)[2];
+          size_t i;
+          struct mstat *stat = mstat + what;
+          struct mstat *estat = mstat + EVERYTHING;
+
+          /* set to zero to detect duplicate free's */
+          ((size_t *) q)[0] = 0;
+
+          A(magic == MAGIC);
+          ((size_t *) q)[1] = ~MAGIC;
+
+	  if (!IN_THREAD) {
+	       stat->siz -= n;
+	       A(stat->siz >= 0);
+	       estat->siz -= n;
+	       A(estat->siz >= 0);
+	  }
+
+          /* check for writing past end of array: */
+          for (i = n; i < PAD_FACTOR * n; ++i)
+               if (q[i + SZ_HEADER] != (char) (i ^ 0xEF)) {
+                    A(0 /* array bounds overwritten */ );
+               }
+          for (i = 0; i < PAD_FACTOR * n; ++i)
+               q[i + SZ_HEADER] = (char) (i ^ 0xAD);
+
+	  if (!IN_THREAD) {
+	       --stat->cnt;
+	       --estat->cnt;
+	       
+	       A(stat->cnt >= 0);
+	       A((stat->cnt == 0 && stat->siz == 0) ||
+		 (stat->cnt > 0 && stat->siz > 0));
+	       A(estat->cnt >= 0);
+	       A((estat->cnt == 0 && estat->siz == 0) ||
+		 (estat->cnt > 0 && estat->siz > 0));
+	  }
+
+          real_free(q);
+     }
+
+     if (!IN_THREAD) {
+          /* delete minfo entry */
+	  unsigned int h = hashaddr(p);
+          struct minfo **i;
+
+          for (i = minfo + h; *i; i = &((*i)->next)) {
+               if ((*i)->p == p) {
+                    struct minfo *i0 = (*i)->next;
+                    free(*i);
+                    *i = i0;
+                    return;
+               }
+          }
+
+          A(0 /* no entry in minfo list */ );
+     }
+}
+
+void X(malloc_print_minfo)(int verbose)
+{
+     struct minfo *info;
+     int what;
+     unsigned int h;
+
+     if (verbose) {
+	  static const char *names[MALLOC_WHAT_LAST] = {
+	       "EVERYTHING",
+	       "PLANS", "SOLVERS", "PROBLEMS", "BUFFERS",
+	       "HASHT", "TENSORS", "PLANNERS", "SLVDSC", "TWIDDLES",
+	       "STRIDES", "OTHER"
+	  };
+
+	  printf("%12s %8s %8s %10s %10s\n",
+		 "what", "cnt", "maxcnt", "siz", "maxsiz");
+
+	  for (what = 0; what < MALLOC_WHAT_LAST; ++what) {
+	       struct mstat *stat = mstat + what;
+	       printf("%12s %8d %8d %10d %10d\n",
+		      names[what], stat->cnt, stat->maxcnt,
+		      stat->siz, stat->maxsiz);
+	  }
+     }
+
+     for (h = 0; h < HASHSZ; ++h) 
+	  if (minfo[h]) {
+	       printf("\nUnfreed allocations:\n");
+	       break;
+	  }
+
+     for (h = 0; h < HASHSZ; ++h) 
+	  for (info = minfo[h]; info; info = info->next) {
+	       printf("%s:%d:  %d bytes at %p\n",
+		      info->file, info->line, info->n, info->p);
+	  }
+}
+
+#else
+/**********************************************************
+ *   NON DEBUGGING CODE
+ **********************************************************/
+/* production version, no hacks */
+
+void *X(malloc_plain)(size_t n)
+{
+     void *p;
+     if (n == 0)
+          n = 1;
+     p = real_malloc(n);
+     CK(p);
+
+#ifdef MIN_ALIGMENT
+     A((((uintptr_t)p) % MIN_ALIGNMENT) == 0);
+#endif
+
+     return p;
+}
+
+void X(ifree)(void *p)
+{
+     real_free(p);
+}
+
+#endif
+
+void X(ifree0)(void *p)
+{
+     /* common pattern */
+     if (p) X(ifree)(p);
+}
diff --git a/src/fftw3/kernel/assert.c b/src/fftw3/kernel/assert.c
new file mode 100644
index 0000000..a066db0
--- /dev/null
+++ b/src/fftw3/kernel/assert.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: assert.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+#include "ifftw.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+void X(assertion_failed)(const char *s, int line, const char *file)
+{
+     fflush(stdout);
+     fprintf(stderr, "fftw: %s:%d: assertion failed: %s\n", file, line, s);
+     exit(EXIT_FAILURE);
+}
diff --git a/src/fftw3/kernel/awake.c b/src/fftw3/kernel/awake.c
new file mode 100644
index 0000000..7ee716f
--- /dev/null
+++ b/src/fftw3/kernel/awake.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: awake.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+void X(null_awake)(plan *ego, int awake)
+{
+     UNUSED(ego);
+     UNUSED(awake);
+     /* do nothing */
+}
diff --git a/src/fftw3/kernel/cycle.h b/src/fftw3/kernel/cycle.h
new file mode 100644
index 0000000..a324c2a
--- /dev/null
+++ b/src/fftw3/kernel/cycle.h
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/* $Id: cycle.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* machine-dependent cycle counters code. Needs to be inlined. */
+
+/***************************************************************************/
+/* To use the cycle counters in your code, simply #include "cycle.h" (this
+   file), and then use the functions/macros:
+
+                 ticks getticks(void);
+
+   ticks is an opaque typedef defined below, representing the current time.
+   You extract the elapsed time between two calls to gettick() via:
+
+                 double elapsed(ticks t1, ticks t0);
+
+   which returns a double-precision variable in arbitrary units.  You
+   are not expected to convert this into human units like seconds; it
+   is intended only for *comparisons* of time intervals.
+
+   (In order to use some of the OS-dependent timer routines like
+   Solaris' gethrtime, you need to paste the autoconf snippet below
+   into your configure.ac file and #include "config.h" before cycle.h,
+   or define the relevant macros manually if you are not using autoconf.)
+*/
+
+/***************************************************************************/
+/* This file uses macros like HAVE_GETHRTIME that are assumed to be
+   defined according to whether the corresponding function/type/header
+   is available on your system.  The necessary macros are most
+   conveniently defined if you are using GNU autoconf, via the tests:
+   
+   dnl ---------------------------------------------------------------------
+
+   AC_C_INLINE
+   AC_HEADER_TIME
+   AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h])
+
+   AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif])
+
+   AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime])
+
+   dnl Cray UNICOS _rtc() (real-time clock) intrinsic
+   AC_MSG_CHECKING([for _rtc intrinsic])
+   rtc_ok=yes
+   AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
+#include <intrinsics.h>
+#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
+   AC_MSG_RESULT($rtc_ok)
+
+   dnl ---------------------------------------------------------------------
+*/
+
+/***************************************************************************/
+
+#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
+{									  \
+     return (double)(t1 - t0);						  \
+}
+
+/*----------------------------------------------------------------*/
+/* Solaris */
+#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
+typedef hrtime_t ticks;
+
+#define getticks gethrtime
+
+INLINE_ELAPSED(inline)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/* AIX v. 4+ routines to read the real-time clock or time-base register */
+#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
+typedef timebasestruct_t ticks;
+
+static inline ticks getticks(void)
+{
+     ticks t;
+     read_real_time(&t, TIMEBASE_SZ);
+     return t;
+}
+
+static inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
+{
+     time_base_to_time(&t1, TIMEBASE_SZ);
+     time_base_to_time(&t0, TIMEBASE_SZ);
+     return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low));
+}
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/*
+ * PowerPC ``cycle'' counter using the time base register.
+ */
+#if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))  && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long long ticks;
+
+static __inline__ ticks getticks(void)
+{
+     unsigned int tbl, tbu0, tbu1;
+
+     do {
+	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
+	  __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
+	  __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
+     } while (tbu0 != tbu1);
+
+     return (((unsigned long long)tbu0) << 32) | tbl;
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+/*----------------------------------------------------------------*/
+/*
+ * Pentium cycle counter 
+ */
+#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__)  && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long long ticks;
+
+static __inline__ ticks getticks(void)
+{
+     ticks ret;
+
+     __asm__ __volatile__("rdtsc": "=A" (ret));
+     /* no input, nothing else clobbered */
+     return ret;
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/* Visual C++ -- thanks to Morten Nissov for his help with this */
+#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
+#include <windows.h>
+typedef LARGE_INTEGER ticks;
+#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
+
+static __inline ticks getticks(void)
+{
+     ticks ret;
+
+     __asm {
+	  RDTSC
+	  mov ret.HighPart, edx
+	  mov ret.LowPart, eax
+     }
+     return ret;
+}
+
+static __inline double elapsed(ticks t1, ticks t0)
+{  
+     return (double)(t1.QuadPart - t0.QuadPart);
+}  
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/*
+ * X86-64 cycle counter
+ */
+#if defined(__GNUC__) && defined(__x86_64__)  && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long long ticks;
+
+static __inline__ ticks getticks(void)
+{
+     unsigned a, d; 
+     asm volatile("rdtsc" : "=a" (a), "=d" (d)); 
+     return ((ticks)a) | (((ticks)d) << 32); 
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/* Visual C++ (FIXME: how to detect compilation for x86-64?) */
+#if _MSC_VER >= 1400 && !defined(HAVE_TICK_COUNTER)
+typedef ULONG64 ticks;
+
+#define getticks __rdtsc
+
+INLINE_ELAPSED(__inline)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/*
+ * IA64 cycle counter
+ */
+#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long ticks;
+
+static __inline__ ticks getticks(void)
+{
+     ticks ret;
+
+     __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
+     return ret;
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
+#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
+#include <machine/sys/inline.h>
+typedef unsigned long ticks;
+
+static inline ticks getticks(void)
+{
+     ticks ret;
+
+     ret = _Asm_mov_from_ar (_AREG_ITC);
+     return ret;
+}
+
+INLINE_ELAPSED(inline)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/* intel's ecc compiler */
+#if defined(__ECC) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long ticks;
+#include <ia64intrin.h>
+
+static __inline__ ticks getticks(void)
+{
+     return __getReg(_IA64_REG_AR_ITC);
+}
+ 
+INLINE_ELAPSED(__inline__)
+ 
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/*
+ * PA-RISC cycle counter 
+ */
+#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long ticks;
+
+#  ifdef __GNUC__
+static __inline__ ticks getticks(void)
+{
+     ticks ret;
+
+     __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
+     /* no input, nothing else clobbered */
+     return ret;
+}
+#  else
+#  include <machine/inline.h>
+static inline unsigned long getticks(void)
+{
+     register ticks ret;
+     _MFCTL(16, ret);
+     return ret;
+}
+#  endif
+
+INLINE_ELAPSED(inline)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/* S390, courtesy of James Treacy */
+#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long long ticks;
+
+static __inline__ ticks getticks(void)
+{
+     ticks cycles;
+     __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
+     return cycles;
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+/*----------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
+/*
+ * The 32-bit cycle counter on alpha overflows pretty quickly, 
+ * unfortunately.  A 1GHz machine overflows in 4 seconds.
+ */
+typedef unsigned int ticks;
+
+static __inline__ ticks getticks(void)
+{
+     unsigned long cc;
+     __asm__ __volatile__ ("rpcc %0" : "=r"(cc));
+     return (cc & 0xFFFFFFFF);
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
+typedef unsigned long ticks;
+
+static __inline__ ticks getticks(void)
+{
+     ticks ret;
+     __asm__("rd %%tick, %0" : "=r" (ret));
+     return ret;
+}
+
+INLINE_ELAPSED(__inline__)
+
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+#if defined(__DECC) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
+#  include <c_asm.h>
+typedef unsigned int ticks;
+
+static __inline ticks getticks(void)
+{
+     unsigned long cc;
+     cc = asm("rpcc %v0");
+     return (cc & 0xFFFFFFFF);
+}
+
+INLINE_ELAPSED(__inline)
+
+#define HAVE_TICK_COUNTER
+#endif
+/*----------------------------------------------------------------*/
+/* SGI/Irix */
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER)
+typedef struct timespec ticks;
+
+static inline ticks getticks(void)
+{
+     struct timespec t;
+     clock_gettime(CLOCK_SGI_CYCLE, &t);
+     return t;
+}
+
+static inline double elapsed(ticks t1, ticks t0)
+{
+     return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 +
+	  (double)(t1.tv_nsec - t0.tv_nsec);
+}
+#define HAVE_TICK_COUNTER
+#endif
+
+/*----------------------------------------------------------------*/
+/* Cray UNICOS _rtc() intrinsic function */
+#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
+#ifdef HAVE_INTRINSICS_H
+#  include <intrinsics.h>
+#endif
+
+typedef long long ticks;
+
+#define getticks _rtc
+
+INLINE_ELAPSED(inline)
+
+#define HAVE_TICK_COUNTER
+#endif
+
diff --git a/src/fftw3/kernel/debug.c b/src/fftw3/kernel/debug.c
new file mode 100644
index 0000000..63e4a22
--- /dev/null
+++ b/src/fftw3/kernel/debug.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: debug.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+#include "ifftw.h"
+
+#ifdef FFTW_DEBUG
+#include <stdio.h>
+
+typedef struct {
+     printer super;
+     FILE *f;
+} P_file;
+
+static void putchr_file(printer *p_, char c)
+{
+     P_file *p = (P_file *) p_;
+     fputc(c, p->f);
+}
+
+static printer *mkprinter_file(FILE *f)
+{
+     P_file *p = (P_file *) X(mkprinter)(sizeof(P_file), putchr_file, 0);
+     p->f = f;
+     return &p->super;
+}
+
+void X(debug)(const char *format, ...)
+{
+     va_list ap;
+     printer *p = mkprinter_file(stderr);
+     va_start(ap, format);
+     p->vprint(p, format, ap);
+     va_end(ap);
+     X(printer_destroy)(p);
+}
+#endif
diff --git a/src/fftw3/kernel/hash.c b/src/fftw3/kernel/hash.c
new file mode 100644
index 0000000..f12d5a4
--- /dev/null
+++ b/src/fftw3/kernel/hash.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "ifftw.h"
+
+unsigned X(hash)(const char *s)
+{
+     unsigned h = 0xDEADBEEFu;
+     do {
+	  h = h * 17 + (int)*s;
+     } while (*s++);
+     return h;
+}
+
diff --git a/src/fftw3/kernel/iabs.c b/src/fftw3/kernel/iabs.c
new file mode 100644
index 0000000..d2bdb7c
--- /dev/null
+++ b/src/fftw3/kernel/iabs.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: iabs.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+int X(iabs)(int a)
+{
+     return (int) (a < 0 ? -a : a);
+}
diff --git a/src/fftw3/kernel/ifftw.h b/src/fftw3/kernel/ifftw.h
new file mode 100644
index 0000000..0269e18
--- /dev/null
+++ b/src/fftw3/kernel/ifftw.h
@@ -0,0 +1,848 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ifftw.h,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* FFTW internal header file */
+#ifndef __IFFTW_H__
+#define __IFFTW_H__
+
+#include "config.h"
+
+#include <stdlib.h>		/* size_t */
+#include <stdarg.h>		/* va_list */
+#include <stddef.h>             /* ptrdiff_t */
+
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#if HAVE_STDINT_H
+# include <stdint.h>             /* uintptr_t, maybe */
+#endif
+
+#if HAVE_INTTYPES_H
+# include <inttypes.h>           /* uintptr_t, maybe */
+#endif
+
+/* determine precision and name-mangling scheme */
+#define CONCAT(prefix, name) prefix ## name
+#if defined(FFTW_SINGLE)
+typedef float R;
+#define X(name) CONCAT(fftwf_, name)
+#elif defined(FFTW_LDOUBLE)
+typedef long double R;
+#define X(name) CONCAT(fftwl_, name)
+#else
+typedef double R;
+#define X(name) CONCAT(fftw_, name)
+#endif
+
+/* dummy use of unused parameters to silence compiler warnings */
+#define UNUSED(x) (void)x
+
+#define FFT_SIGN (-1)  /* sign convention for forward transforms */
+
+/* get rid of that object-oriented stink: */
+#define REGISTER_SOLVER(p, s) X(solver_register)(p, s)
+
+#define STRINGIZEx(x) #x
+#define STRINGIZE(x) STRINGIZEx(x)
+
+#ifndef HAVE_K7
+#define HAVE_K7 0
+#endif
+
+#if defined(HAVE_SSE) || defined(HAVE_SSE2) || defined(HAVE_ALTIVEC) || defined(HAVE_3DNOW)
+#define HAVE_SIMD 1
+#else
+#define HAVE_SIMD 0
+#endif
+
+/* forward declarations */
+typedef struct problem_s problem;
+typedef struct plan_s plan;
+typedef struct solver_s solver;
+typedef struct planner_s planner;
+typedef struct printer_s printer;
+typedef struct scanner_s scanner;
+
+/*-----------------------------------------------------------------------*/
+/* alloca: */
+#if HAVE_SIMD
+#define MIN_ALIGNMENT 16
+#endif
+
+#ifdef HAVE_ALLOCA
+   /* use alloca if available */
+
+#ifndef alloca
+#ifdef __GNUC__
+# define alloca __builtin_alloca
+#else
+# ifdef _MSC_VER
+#  include <malloc.h>
+#  define alloca _alloca
+# else
+#  if HAVE_ALLOCA_H
+#   include <alloca.h>
+#  else
+#   ifdef _AIX
+ #pragma alloca
+#   else
+#    ifndef alloca /* predefined by HP cc +Olibcalls */
+void *alloca(size_t);
+#    endif
+#   endif
+#  endif
+# endif
+#endif
+#endif
+
+#  ifdef MIN_ALIGNMENT
+#    define STACK_MALLOC(T, p, x)				\
+     {								\
+         p = (T)alloca((x) + MIN_ALIGNMENT);			\
+         p = (T)(((uintptr_t)p + (MIN_ALIGNMENT - 1)) &	\
+               (~(uintptr_t)(MIN_ALIGNMENT - 1)));		\
+     }
+#    define STACK_FREE(x) 
+#  else /* HAVE_ALLOCA && !defined(MIN_ALIGNMENT) */
+#    define STACK_MALLOC(T, p, x) p = (T)alloca(x) 
+#    define STACK_FREE(x) 
+#  endif
+
+#else /* ! HAVE_ALLOCA */
+   /* use malloc instead of alloca */
+#  define STACK_MALLOC(T, p, x) p = (T)MALLOC(x, OTHER)
+#  define STACK_FREE(x) X(ifree)(x)
+#endif /* ! HAVE_ALLOCA */
+
+/*-----------------------------------------------------------------------*/
+/* define uintptr_t if it is not already defined */
+
+#ifndef HAVE_UINTPTR_T
+#  if SIZEOF_VOID_P == 0
+#    error sizeof void* is unknown!
+#  elif SIZEOF_UNSIGNED_INT == SIZEOF_VOID_P
+     typedef unsigned int uintptr_t;
+#  elif SIZEOF_UNSIGNED_LONG == SIZEOF_VOID_P
+     typedef unsigned long uintptr_t;
+#  elif SIZEOF_UNSIGNED_LONG_LONG == SIZEOF_VOID_P
+     typedef unsigned long long uintptr_t;
+#  else
+#    error no unsigned integer type matches void* sizeof!
+#  endif
+#endif
+
+/*-----------------------------------------------------------------------*/
+/* assert.c: */
+extern void X(assertion_failed)(const char *s, int line, const char *file);
+
+/* always check */
+#define CK(ex)						 \
+      (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0))
+
+#ifdef FFTW_DEBUG
+/* check only if debug enabled */
+#define A(ex)						 \
+      (void)((ex) || (X(assertion_failed)(#ex, __LINE__, __FILE__), 0))
+#else
+#define A(ex) /* nothing */
+#endif
+
+extern void X(debug)(const char *format, ...);
+#define D X(debug)
+
+/*-----------------------------------------------------------------------*/
+/* alloc.c: */
+
+/* objects allocated by malloc, for statistical purposes */
+enum malloc_tag {
+     EVERYTHING,
+     PLANS,
+     SOLVERS,
+     PROBLEMS,
+     BUFFERS,
+     HASHT,
+     TENSORS,
+     PLANNERS,
+     SLVDESCS,
+     TWIDDLES,
+     STRIDES,
+     OTHER,
+     MALLOC_WHAT_LAST		/* must be last */
+};
+
+extern void X(ifree)(void *ptr);
+extern void X(ifree0)(void *ptr);
+
+#ifdef FFTW_DEBUG_MALLOC
+
+extern void *X(malloc_debug)(size_t n, enum malloc_tag what,
+			     const char *file, int line);
+#define MALLOC(n, what) X(malloc_debug)(n, what, __FILE__, __LINE__)
+#define NATIVE_MALLOC(n, what) MALLOC(n, what)
+void X(malloc_print_minfo)(int vrbose);
+
+#else /* ! FFTW_DEBUG_MALLOC */
+
+extern void *X(malloc_plain)(size_t sz);
+#define MALLOC(n, what)  X(malloc_plain)(n)
+#define NATIVE_MALLOC(n, what) malloc(n)
+
+#endif
+
+#if defined(FFTW_DEBUG) && defined(FFTW_DEBUG_MALLOC) && defined(HAVE_THREADS)
+extern int X(in_thread);
+#  define IN_THREAD X(in_thread)
+#  define THREAD_ON { int in_thread_save = X(in_thread); X(in_thread) = 1
+#  define THREAD_OFF X(in_thread) = in_thread_save; }
+#else
+#  define IN_THREAD 0
+#  define THREAD_ON 
+#  define THREAD_OFF 
+#endif
+
+/*-----------------------------------------------------------------------*/
+/* ops.c: */
+/*
+ * ops counter.  The total number of additions is add + fma
+ * and the total number of multiplications is mul + fma.
+ * Total flops = add + mul + 2 * fma
+ */
+typedef struct {
+     double add;
+     double mul;
+     double fma;
+     double other;
+} opcnt;
+
+void X(ops_zero)(opcnt *dst);
+void X(ops_other)(int o, opcnt *dst);
+void X(ops_cpy)(const opcnt *src, opcnt *dst);
+
+void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst);
+void X(ops_add2)(const opcnt *a, opcnt *dst);
+
+/* dst = m * a + b */
+void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst);
+
+/* dst += m * a */
+void X(ops_madd2)(int m, const opcnt *a, opcnt *dst);
+
+
+/*-----------------------------------------------------------------------*/
+/* minmax.c: */
+int X(imax)(int a, int b);
+int X(imin)(int a, int b);
+
+/*-----------------------------------------------------------------------*/
+/* iabs.c: */
+int X(iabs)(int a);
+
+/*-----------------------------------------------------------------------*/
+/* md5.c */
+
+#if SIZEOF_UNSIGNED_INT >= 4
+typedef unsigned int md5uint;
+#else
+typedef unsigned long md5uint; /* at least 32 bits as per C standard */
+#endif
+
+typedef md5uint md5sig[4];
+
+typedef struct {
+     md5sig s; /* state and signature */
+
+     /* fields not meant to be used outside md5.c: */
+     unsigned char c[64]; /* stuff not yet processed */
+     unsigned l;  /* total length.  Should be 64 bits long, but this is
+		     good enough for us */
+} md5;
+
+void X(md5begin)(md5 *p);
+void X(md5putb)(md5 *p, const void *d_, int len);
+void X(md5puts)(md5 *p, const char *s);
+void X(md5putc)(md5 *p, unsigned char c);
+void X(md5int)(md5 *p, int i);
+void X(md5unsigned)(md5 *p, unsigned i);
+void X(md5ptrdiff)(md5 *p, ptrdiff_t d);
+void X(md5end)(md5 *p);
+
+/*-----------------------------------------------------------------------*/
+/* tensor.c: */
+#define STRUCT_HACK_KR
+#undef STRUCT_HACK_C99
+
+typedef struct {
+     int n;
+     int is;			/* input stride */
+     int os;			/* output stride */
+} iodim;
+
+typedef struct {
+     int rnk;
+#if defined(STRUCT_HACK_KR)
+     iodim dims[1];
+#elif defined(STRUCT_HACK_C99)
+     iodim dims[];
+#else
+     iodim *dims;
+#endif
+} tensor;
+
+/*
+  Definition of rank -infinity.
+  This definition has the property that if you want rank 0 or 1,
+  you can simply test for rank <= 1.  This is a common case.
+ 
+  A tensor of rank -infinity has size 0.
+*/
+#define RNK_MINFTY  ((int)(((unsigned) -1) >> 1))
+#define FINITE_RNK(rnk) ((rnk) != RNK_MINFTY)
+
+typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind;
+
+tensor *X(mktensor)(int rnk);
+tensor *X(mktensor_0d)(void);
+tensor *X(mktensor_1d)(int n, int is, int os);
+tensor *X(mktensor_2d)(int n0, int is0, int os0,
+                      int n1, int is1, int os1);
+int X(tensor_sz)(const tensor *sz);
+void X(tensor_md5)(md5 *p, const tensor *t);
+int X(tensor_max_index)(const tensor *sz);
+int X(tensor_min_istride)(const tensor *sz);
+int X(tensor_min_ostride)(const tensor *sz);
+int X(tensor_min_stride)(const tensor *sz);
+int X(tensor_inplace_strides)(const tensor *sz);
+int X(tensor_inplace_strides2)(const tensor *a, const tensor *b);
+tensor *X(tensor_copy)(const tensor *sz);
+int X(tensor_kosherp)(const tensor *x);
+
+tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k);
+tensor *X(tensor_copy_except)(const tensor *sz, int except_dim);
+tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk);
+tensor *X(tensor_compress)(const tensor *sz);
+tensor *X(tensor_compress_contiguous)(const tensor *sz);
+tensor *X(tensor_append)(const tensor *a, const tensor *b);
+void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b);
+int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os);
+void X(tensor_destroy)(tensor *sz);
+void X(tensor_destroy2)(tensor *a, tensor *b);
+void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d);
+void X(tensor_print)(const tensor *sz, printer *p);
+int X(dimcmp)(const iodim *a, const iodim *b);
+
+/*-----------------------------------------------------------------------*/
+/* problem.c: */
+typedef struct {
+     void (*hash) (const problem *ego, md5 *p);
+     void (*zero) (const problem *ego);
+     void (*print) (problem *ego, printer *p);
+     void (*destroy) (problem *ego);
+} problem_adt;
+
+struct problem_s {
+     const problem_adt *adt;
+};
+
+problem *X(mkproblem)(size_t sz, const problem_adt *adt);
+void X(problem_destroy)(problem *ego);
+
+/*-----------------------------------------------------------------------*/
+/* print.c */
+struct printer_s {
+     void (*print)(printer *p, const char *format, ...);
+     void (*vprint)(printer *p, const char *format, va_list ap);
+     void (*putchr)(printer *p, char c);
+     void (*cleanup)(printer *p);
+     int indent;
+     int indent_incr;
+};
+
+printer *X(mkprinter)(size_t size, 
+		      void (*putchr)(printer *p, char c),
+		      void (*cleanup)(printer *p));
+void X(printer_destroy)(printer *p);
+
+/*-----------------------------------------------------------------------*/
+/* scan.c */
+struct scanner_s {
+     int (*scan)(scanner *sc, const char *format, ...);
+     int (*vscan)(scanner *sc, const char *format, va_list ap);
+     int (*getchr)(scanner *sc);
+     int ungotc;
+};
+
+scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc));
+void X(scanner_destroy)(scanner *sc);
+
+/*-----------------------------------------------------------------------*/
+/* plan.c: */
+typedef struct {
+     void (*solve)(const plan *ego, const problem *p);
+     void (*awake)(plan *ego, int flag);
+     void (*print)(const plan *ego, printer *p);
+     void (*destroy)(plan *ego);
+} plan_adt;
+
+struct plan_s {
+     const plan_adt *adt;
+     int awake_refcnt;
+     opcnt ops;
+     double pcost;
+};
+
+plan *X(mkplan)(size_t size, const plan_adt *adt);
+void X(plan_destroy_internal)(plan *ego);
+void X(plan_awake)(plan *ego, int flag);
+#define AWAKE(plan, flag) X(plan_awake)(plan, flag)
+void X(plan_null_destroy)(plan *ego);
+
+/*-----------------------------------------------------------------------*/
+/* solver.c: */
+typedef struct {
+     plan *(*mkplan)(const solver *ego, const problem *p, planner *plnr);
+} solver_adt;
+
+struct solver_s {
+     const solver_adt *adt;
+     int refcnt;
+};
+
+solver *X(mksolver)(size_t size, const solver_adt *adt);
+void X(solver_use)(solver *ego);
+void X(solver_destroy)(solver *ego);
+void X(solver_register)(planner *plnr, solver *s);
+
+/* shorthand */
+#define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt)
+
+/*-----------------------------------------------------------------------*/
+/* planner.c */
+
+typedef struct slvdesc_s {
+     solver *slv;
+     const char *reg_nam;
+     unsigned nam_hash;
+     int reg_id;
+} slvdesc;
+
+typedef struct solution_s solution; /* opaque */
+
+/* values for problem_flags: */
+enum { 
+     DESTROY_INPUT = 0x1,
+     NO_SIMD = 0x2,
+     CONSERVE_MEMORY = 0x4,
+     NO_DHT_R2HC = 0x8
+};
+
+#define DESTROY_INPUTP(plnr) ((plnr)->problem_flags & DESTROY_INPUT)
+#define NO_SIMDP(plnr) ((plnr)->problem_flags & NO_SIMD)
+#define CONSERVE_MEMORYP(plnr) ((plnr)->problem_flags & CONSERVE_MEMORY)
+#define NO_DHT_R2HCP(plnr) ((plnr)->problem_flags & NO_DHT_R2HC)
+
+/* values for planner_flags: */
+enum {
+     /* impatience flags  */
+
+     BELIEVE_PCOST = 0x1,
+     DFT_R2HC_ICKY = 0x2,
+     NONTHREADED_ICKY = 0x4,
+     NO_BUFFERING = 0x8,
+     NO_EXHAUSTIVE = 0x10,
+     NO_INDIRECT_OP = 0x20,
+     NO_LARGE_GENERIC = 0x40,
+     NO_RANK_SPLITS = 0x80,
+     NO_VRANK_SPLITS = 0x100,
+     NO_VRECURSE = 0x200,
+     
+     /* flags that control the search */
+     NO_UGLY = 0x400,  /* avoid plans we are 99% sure are suboptimal */
+     NO_SEARCH = 0x800,  /* avoid searching altogether---use wisdom entries 
+			    only */
+
+     ESTIMATE = 0x1000,
+     IMPATIENCE_FLAGS = (ESTIMATE | (ESTIMATE - 1)),
+     
+     BLESSING = 0x4000,  /* save this entry */
+     H_VALID = 0x8000,    /* valid hastable entry */
+     NONIMPATIENCE_FLAGS = BLESSING
+};
+
+#define BELIEVE_PCOSTP(plnr) ((plnr)->planner_flags & BELIEVE_PCOST)
+#define DFT_R2HC_ICKYP(plnr) ((plnr)->planner_flags & DFT_R2HC_ICKY)
+#define ESTIMATEP(plnr) ((plnr)->planner_flags & ESTIMATE)
+#define NONTHREADED_ICKYP(plnr) (((plnr)->planner_flags & NONTHREADED_ICKY) \
+                                  && (plnr)->nthr > 1)
+#define NO_BUFFERINGP(plnr) ((plnr)->planner_flags & NO_BUFFERING)
+#define NO_EXHAUSTIVEP(plnr) ((plnr)->planner_flags & NO_EXHAUSTIVE)
+#define NO_INDIRECT_OP_P(plnr) ((plnr)->planner_flags & NO_INDIRECT_OP)
+#define NO_LARGE_GENERICP(plnr) ((plnr)->planner_flags & NO_LARGE_GENERIC)
+#define NO_RANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_RANK_SPLITS)
+#define NO_UGLYP(plnr) ((plnr)->planner_flags & NO_UGLY)
+#define NO_SEARCHP(plnr) ((plnr)->planner_flags & NO_SEARCH)
+#define NO_VRANK_SPLITSP(plnr) ((plnr)->planner_flags & NO_VRANK_SPLITS)
+#define NO_VRECURSEP(plnr) ((plnr)->planner_flags & NO_VRECURSE)
+
+typedef enum { FORGET_ACCURSED, FORGET_EVERYTHING } amnesia;
+
+typedef struct {
+     void (*register_solver)(planner *ego, solver *s);
+     plan *(*mkplan)(planner *ego, problem *p);
+     void (*forget)(planner *ego, amnesia a);
+     void (*exprt)(planner *ego, printer *p); /* ``export'' is a reserved
+						 word in C++. */
+     int (*imprt)(planner *ego, scanner *sc);
+} planner_adt;
+
+struct planner_s {
+     const planner_adt *adt;
+     void (*hook)(plan *pln, const problem *p, int optimalp);
+
+     /* solver descriptors */
+     slvdesc *slvdescs;
+     unsigned nslvdesc, slvdescsiz;
+     const char *cur_reg_nam;
+     int cur_reg_id;
+
+     /* hash table of solutions */
+     solution *solutions;
+     unsigned hashsiz, nelem;
+
+     int nthr;
+     unsigned problem_flags;
+     unsigned short planner_flags; /* matches type of solution.flags in
+				      planner.c */
+     /* various statistics */
+     int nplan;    /* number of plans evaluated */
+     double pcost, epcost; /* total pcost of measured/estimated plans */
+     int nprob;    /* number of problems evaluated */
+     int lookup, succ_lookup, lookup_iter;
+     int insert, insert_iter, insert_unknown;
+     int nrehash;
+};
+
+planner *X(mkplanner)(void);
+void X(planner_destroy)(planner *ego);
+
+#ifdef FFTW_DEBUG
+void X(planner_dump)(planner *ego, int vrbose);
+#endif
+
+/*
+  Iterate over all solvers.   Read:
+ 
+  @article{ baker93iterators,
+  author = "Henry G. Baker, Jr.",
+  title = "Iterators: Signs of Weakness in Object-Oriented Languages",
+  journal = "{ACM} {OOPS} Messenger",
+  volume = "4",
+  number = "3",
+  pages = "18--25"
+  }
+*/
+#define FORALL_SOLVERS(ego, s, p, what)			\
+{							\
+     unsigned _cnt;					\
+     for (_cnt = 0; _cnt < ego->nslvdesc; ++_cnt) {	\
+	  slvdesc *p = ego->slvdescs + _cnt;		\
+	  solver *s = p->slv;				\
+	  what;						\
+     }							\
+}
+
+/* make plan, destroy problem */
+plan *X(mkplan_d)(planner *ego, problem *p);
+
+/*-----------------------------------------------------------------------*/
+/* stride.c: */
+
+/* If PRECOMPUTE_ARRAY_INDICES is defined, precompute all strides. */
+#if (defined(__i386__) || _M_IX86 >= 500) && !HAVE_K7 && !defined(FFTW_LDOUBLE)
+#define PRECOMPUTE_ARRAY_INDICES
+#endif
+
+#ifdef PRECOMPUTE_ARRAY_INDICES
+typedef int *stride;
+#define WS(stride, i)  (stride[i])
+extern stride X(mkstride)(int n, int s);
+void X(stride_destroy)(stride p);
+
+#else
+
+typedef int stride;
+#define WS(stride, i)  (stride * i)
+#define fftwf_mkstride(n, stride) stride
+#define fftw_mkstride(n, stride) stride
+#define fftwl_mkstride(n, stride) stride
+#define fftwf_stride_destroy(p) ((void) p)
+#define fftw_stride_destroy(p) ((void) p)
+#define fftwl_stride_destroy(p) ((void) p)
+
+#endif /* PRECOMPUTE_ARRAY_INDICES */
+
+/*-----------------------------------------------------------------------*/
+/* solvtab.c */
+
+struct solvtab_s { void (*reg)(planner *); const char *reg_nam; };
+typedef struct solvtab_s solvtab[];
+void X(solvtab_exec)(solvtab tbl, planner *p);
+#define SOLVTAB(s) { s, STRINGIZE(s) }
+#define SOLVTAB_END { 0, 0 }
+
+/*-----------------------------------------------------------------------*/
+/* pickdim.c */
+int X(pickdim)(int which_dim, const int *buddies, int nbuddies,
+	       const tensor *sz, int oop, int *dp);
+
+/*-----------------------------------------------------------------------*/
+/* twiddle.c */
+/* little language to express twiddle factors computation */
+enum { TW_COS = 0, TW_SIN = 1, TW_TAN = 2, TW_NEXT = 3,
+       TW_FULL = 4, TW_GENERIC = 5 };
+
+typedef struct {
+     unsigned char op;
+     unsigned char v;
+     short i;
+} tw_instr;
+
+typedef struct twid_s {
+     R *W;                     /* array of twiddle factors */
+     int n, r, m;                /* transform order, radix, # twiddle rows */
+     int refcnt;
+     const tw_instr *instr;
+     struct twid_s *cdr;
+} twid;
+
+void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m);
+void X(twiddle_destroy)(twid **pp);
+int X(twiddle_length)(int r, const tw_instr *p);
+void X(twiddle_awake)(int flg, twid **pp, 
+		      const tw_instr *instr, int n, int r, int m);
+
+/*-----------------------------------------------------------------------*/
+/* trig.c */
+#ifdef FFTW_LDOUBLE
+typedef long double trigreal;
+#else
+typedef double trigreal;
+#endif
+
+extern trigreal X(cos2pi)(int, int);
+extern trigreal X(sin2pi)(int, int);
+extern trigreal X(tan2pi)(int, int);
+extern trigreal X(sincos)(trigreal m, trigreal n, int sinp);
+
+/*-----------------------------------------------------------------------*/
+/* primes.c: */
+
+#if defined(FFTW_ENABLE_UNSAFE_MULMOD)
+#  define MULMOD(x,y,p) (((x) * (y)) % (p))
+#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG >= 2 * SIZEOF_INT))
+#  define MULMOD(x,y,p) ((int) ((((long) (x)) * ((long) (y))) % ((long) (p))))
+#elif ((SIZEOF_INT != 0) && (SIZEOF_LONG_LONG >= 2 * SIZEOF_INT))
+#  define MULMOD(x,y,p) ((int) ((((long long) (x)) * ((long long) (y))) \
+				 % ((long long) (p))))
+#elif defined(_MSC_VER)
+#  define MULMOD(x,y,p) ((int) ((((__int64) (x)) * ((__int64) (y))) \
+                                 % ((__int64) (p))))
+#else /* 'long long' unavailable */
+#  define SAFE_MULMOD 1
+int X(safe_mulmod)(int x, int y, int p);
+#  define MULMOD(x,y,p) X(safe_mulmod)(x,y,p)
+#endif
+
+int X(power_mod)(int n, int m, int p);
+int X(find_generator)(int p);
+int X(first_divisor)(int n);
+int X(is_prime)(int n);
+int X(next_prime)(int n);
+
+#define GENERIC_MIN_BAD 71 /* min prime for which generic becomes bad */
+
+/*-----------------------------------------------------------------------*/
+/* rader.c: */
+typedef struct rader_tls rader_tl;
+
+void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl);
+R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t);
+void X(rader_tl_delete)(R *W, rader_tl **tl);
+
+/*-----------------------------------------------------------------------*/
+/* transpose.c: */
+
+void X(transpose)(R *A, int n, int m, int d, int N, R *buf);
+void X(transpose_slow)(R *a, int nx, int ny, int N,
+		       char *move, int move_size, R *buf);
+int X(transposable)(const iodim *a, const iodim *b,
+		    int vl, int s, R *ri, R *ii);
+void X(transpose_dims)(const iodim *a, const iodim *b,
+                       int *n, int *m, int *d, int *nd, int *md);
+int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s,
+			 R *ri, R *ii);
+int X(transpose_slowp)(const iodim *a, const iodim *b, int N);
+
+/*-----------------------------------------------------------------------*/
+/* misc stuff */
+void X(null_awake)(plan *ego, int awake);
+int X(square)(int x);
+double X(measure_execution_time)(plan *pln, const problem *p);
+int X(alignment_of)(R *p);
+unsigned X(hash)(const char *s);
+int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz);
+int X(ct_uglyp)(int min_n, int n, int r);
+
+#if HAVE_SIMD
+R *X(taint)(R *p, int s);
+R *X(join_taint)(R *p1, R *p2);
+#define TAINT(p, s) X(taint)(p, s)
+#define UNTAINT(p) ((R *) (((uintptr_t) (p)) & ~(uintptr_t)3))
+#define TAINTOF(p) (((uintptr_t)(p)) & 3)
+#define JOIN_TAINT(p1, p2) X(join_taint)(p1, p2)
+#else
+#define TAINT(p, s) (p)
+#define UNTAINT(p) (p)
+#define TAINTOF(p) 0
+#define JOIN_TAINT(p1, p2) p1
+#endif
+
+#ifdef FFTW_DEBUG_ALIGNMENT
+#  define ASSERT_ALIGNED_DOUBLE {		\
+     double __foo;				\
+     CK(!(((uintptr_t) &__foo) & 0x7));		\
+}
+#else
+#  define ASSERT_ALIGNED_DOUBLE 
+#endif /* FFTW_DEBUG_ALIGNMENT */
+
+
+
+/*-----------------------------------------------------------------------*/
+/* macros used in codelets to reduce source code size */
+
+typedef R E;  /* internal precision of codelets. */
+
+#ifdef FFTW_LDOUBLE
+#  define K(x) ((E) x##L)
+#else
+#  define K(x) ((E) x)
+#endif
+#define DK(name, value) const E name = K(value)
+
+/* FMA macros */
+
+#if defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))
+/* this peculiar coding seems to do the right thing on all of
+   gcc-2.95, gcc-3.1, and gcc-3.2.  
+
+   The obvious expression a * b + c does not work.  If both x = a * b
+   + c and y = a * b - c appear in the source, gcc computes t = a * b,
+   x = t + c, y = t - c, thus destroying the fma.
+*/
+static __inline__ E FMA(E a, E b, E c)
+{
+     E x = a * b;
+     x = x + c;
+     return x;
+}
+
+static __inline__ E FMS(E a, E b, E c)
+{
+     E x = a * b;
+     x = x - c;
+     return x;
+}
+
+static __inline__ E FNMA(E a, E b, E c)
+{
+     E x = a * b;
+     x = - (x + c);
+     return x;
+}
+
+static __inline__ E FNMS(E a, E b, E c)
+{
+     E x = a * b;
+     x = - (x - c);
+     return x;
+}
+#else
+#define FMA(a, b, c) (((a) * (b)) + (c))
+#define FMS(a, b, c) (((a) * (b)) - (c))
+#define FNMA(a, b, c) (- (((a) * (b)) + (c)))
+#define FNMS(a, b, c) ((c) - ((a) * (b)))
+#endif
+
+
+/* stack-alignment hackery */
+#if defined(__GNUC__) && defined(__i386__)
+/*
+ * horrible hack to align the stack to a 16-byte boundary.
+ *
+ * We assume a gcc version >= 2.95 so that
+ * -mpreferred-stack-boundary works.  Otherwise, all bets are
+ * off.  However, -mpreferred-stack-boundary does not create a
+ * stack alignment, but it only preserves it.  Unfortunately,
+ * many versions of libc on linux call main() with the wrong
+ * initial stack alignment, with the result that the code is now
+ * pessimally aligned instead of having a 50% chance of being
+ * correct.
+ */
+
+#define WITH_ALIGNED_STACK(what)				\
+{								\
+     /*								\
+      * Use alloca to allocate some memory on the stack.	\
+      * This alerts gcc that something funny is going		\
+      * on, so that it does not omit the frame pointer		\
+      * etc.							\
+      */							\
+     (void)__builtin_alloca(16);				\
+								\
+     /*								\
+      * Now align the stack pointer				\
+      */							\
+     __asm__ __volatile__ ("andl $-16, %esp");			\
+								\
+     what							\
+}
+#endif
+
+#ifdef __ICC /* Intel's compiler for ia32 */
+#define WITH_ALIGNED_STACK(what)				\
+{								\
+     /*								\
+      * Simply calling alloca seems to do the right thing.	\
+      * The size of the allocated block seems to be irrelevant.	\
+      */							\
+     _alloca(16);						\
+     what							\
+}
+#endif
+
+#ifndef WITH_ALIGNED_STACK
+#define WITH_ALIGNED_STACK(what) what
+#endif
+
+#endif /* __IFFTW_H__ */
diff --git a/src/fftw3/kernel/kbuffered.c b/src/fftw3/kernel/kbuffered.c
new file mode 100644
index 0000000..d20403f
--- /dev/null
+++ b/src/fftw3/kernel/kbuffered.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* routines shared by the various buffered solvers */
+
+#include "ifftw.h"
+
+int X(compute_nbuf)(int n, int vl, int nbuf, int maxbufsz)
+{
+     int i; 
+
+     if (nbuf * n > maxbufsz)
+          nbuf = X(imax)((int)1, maxbufsz / n);
+
+     /*
+      * Look for a buffer number (not too big) that divides the
+      * vector length, in order that we only need one child plan:
+      */
+     for (i = nbuf; i < vl && i < 2 * nbuf; ++i)
+          if (vl % i == 0)
+               return i;
+
+     /* whatever... */
+     nbuf = X(imin)(nbuf, vl);
+     return nbuf;
+}
+
diff --git a/src/fftw3/kernel/kct.c b/src/fftw3/kernel/kct.c
new file mode 100644
index 0000000..ba3bf68
--- /dev/null
+++ b/src/fftw3/kernel/kct.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* common routines for Cooley-Tukey algorithms */
+
+#include "ifftw.h"
+
+#define POW2P(n) (((n) > 0) && (((n) & ((n) - 1)) == 0))
+
+/* TRUE if radix-r is ugly for size n */
+int X(ct_uglyp)(int min_n, int n, int r)
+{
+     return (n <= min_n) || (POW2P(n) && (n / r) <= 4);
+}
diff --git a/src/fftw3/kernel/kplan.c b/src/fftw3/kernel/kplan.c
new file mode 100644
index 0000000..4c17107
--- /dev/null
+++ b/src/fftw3/kernel/kplan.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kplan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+/* "Plan: To bother about the best method of accomplishing an
+   accidental result."  (Ambrose Bierce, The Enlarged Devil's
+   Dictionary). */
+
+plan *X(mkplan)(size_t size, const plan_adt *adt)
+{
+     plan *p = (plan *)MALLOC(size, PLANS);
+
+     A(adt->destroy);
+     p->awake_refcnt = 0;
+     p->adt = adt;
+     X(ops_zero)(&p->ops);
+     p->pcost = 0.0;
+     
+     return p;
+}
+
+/*
+ * destroy a plan
+ */
+void X(plan_destroy_internal)(plan *ego)
+{
+     if (ego) {
+	  if (ego->awake_refcnt > 0)
+	       ego->adt->awake(ego, 0);
+          ego->adt->destroy(ego);
+	  X(ifree)(ego);
+     }
+}
+
+/* dummy destroy routine for plans with no local state */
+void X(plan_null_destroy)(plan *ego)
+{
+     UNUSED(ego);
+     /* nothing */
+}
+
+void X(plan_awake)(plan *ego, int flag)
+{
+     if (flag) {
+	  if (!ego->awake_refcnt)
+	       ego->adt->awake(ego, flag);
+	  ++ego->awake_refcnt;
+     } else {
+	  --ego->awake_refcnt;
+	  if (!ego->awake_refcnt)
+	       ego->adt->awake(ego, flag);
+     }
+}
+
diff --git a/src/fftw3/kernel/kproblem.c b/src/fftw3/kernel/kproblem.c
new file mode 100644
index 0000000..4686692
--- /dev/null
+++ b/src/fftw3/kernel/kproblem.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kproblem.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+/* constructor */
+problem *X(mkproblem)(size_t sz, const problem_adt *adt)
+{
+     problem *p = (problem *)MALLOC(sz, PROBLEMS);
+
+     p->adt = adt;
+     return p;
+}
+
+/* destructor */
+void X(problem_destroy)(problem *ego)
+{
+     if (ego)
+	  ego->adt->destroy(ego);
+}
+
diff --git a/src/fftw3/kernel/krader.c b/src/fftw3/kernel/krader.c
new file mode 100644
index 0000000..354edfc
--- /dev/null
+++ b/src/fftw3/kernel/krader.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "ifftw.h"
+
+/*
+  common routines for Rader solvers 
+*/
+
+
+/* shared twiddle and omega lists, keyed by two/three integers. */
+struct rader_tls {
+     int k1, k2, k3;
+     R *W;
+     int refcnt;
+     rader_tl *cdr; 
+};
+
+void X(rader_tl_insert)(int k1, int k2, int k3, R *W, rader_tl **tl)
+{
+     rader_tl *t = (rader_tl *) MALLOC(sizeof(rader_tl), TWIDDLES);
+     t->k1 = k1; t->k2 = k2; t->k3 = k3; t->W = W;
+     t->refcnt = 1; t->cdr = *tl; *tl = t;
+}
+
+R *X(rader_tl_find)(int k1, int k2, int k3, rader_tl *t)
+{
+     while (t && (t->k1 != k1 || t->k2 != k2 || t->k3 != k3))
+	  t = t->cdr;
+     if (t) {
+	  ++t->refcnt;
+	  return t->W;
+     } else 
+	  return 0;
+}
+
+void X(rader_tl_delete)(R *W, rader_tl **tl)
+{
+     if (W) {
+	  rader_tl **tp, *t;
+
+	  for (tp = tl; (t = *tp) && t->W != W; tp = &t->cdr)
+	       ;
+
+	  if (t && --t->refcnt <= 0) {
+	       *tp = t->cdr;
+	       X(ifree)(t->W);
+	       X(ifree)(t);
+	  }
+     }
+}
diff --git a/src/fftw3/kernel/md5-1.c b/src/fftw3/kernel/md5-1.c
new file mode 100644
index 0000000..a2e87a1
--- /dev/null
+++ b/src/fftw3/kernel/md5-1.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "ifftw.h"
+
+
+void X(md5putb)(md5 *p, const void *d_, int len)
+{
+     int i;
+     const unsigned char *d = (const unsigned char *)d_;
+     for (i = 0; i < len; ++i)
+	  X(md5putc)(p, d[i]);
+}
+
+void X(md5puts)(md5 *p, const char *s)
+{
+     /* also hash final '\0' */
+     do {
+	  X(md5putc)(p, *s);
+     } while(*s++);
+}
+
+void X(md5int)(md5 *p, int i)
+{
+     X(md5putb)(p, &i, sizeof(i));
+}
+
+void X(md5unsigned)(md5 *p, unsigned i)
+{
+     X(md5putb)(p, &i, sizeof(i));
+}
+
+void X(md5ptrdiff)(md5 *p, ptrdiff_t d)
+{
+     X(md5putb)(p, &d, sizeof(d));
+}
+
diff --git a/src/fftw3/kernel/md5.c b/src/fftw3/kernel/md5.c
new file mode 100644
index 0000000..edbd811
--- /dev/null
+++ b/src/fftw3/kernel/md5.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* 
+   independent implementation of Ron Rivest's MD5 message-digest
+   algorithm, based on rfc 1321.
+
+   Optimized for small code size, not speed.  Works as long as
+   sizeof(md5uint) >= 4.
+*/
+
+#include "ifftw.h"
+
+/* sintab[i] = 4294967296.0 * abs(sin((double)(i + 1))) */
+static const md5uint sintab[64] = {
+     0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+     0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+     0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+     0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+     0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+     0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+     0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+     0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+     0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+     0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+     0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+     0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+     0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+     0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+     0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+     0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+}; 
+
+/* see rfc 1321 section 3.4 */
+static const struct roundtab {
+     char k; 
+     char s;
+} roundtab[64] = {
+     {  0,  7}, {  1, 12}, {  2, 17}, {  3, 22},
+     {  4,  7}, {  5, 12}, {  6, 17}, {  7, 22},
+     {  8,  7}, {  9, 12}, { 10, 17}, { 11, 22},
+     { 12,  7}, { 13, 12}, { 14, 17}, { 15, 22},
+     {  1,  5}, {  6,  9}, { 11, 14}, {  0, 20},
+     {  5,  5}, { 10,  9}, { 15, 14}, {  4, 20},
+     {  9,  5}, { 14,  9}, {  3, 14}, {  8, 20},
+     { 13,  5}, {  2,  9}, {  7, 14}, { 12, 20},
+     {  5,  4}, {  8, 11}, { 11, 16}, { 14, 23},
+     {  1,  4}, {  4, 11}, {  7, 16}, { 10, 23},
+     { 13,  4}, {  0, 11}, {  3, 16}, {  6, 23},
+     {  9,  4}, { 12, 11}, { 15, 16}, {  2, 23},
+     {  0,  6}, {  7, 10}, { 14, 15}, {  5, 21},
+     { 12,  6}, {  3, 10}, { 10, 15}, {  1, 21},
+     {  8,  6}, { 15, 10}, {  6, 15}, { 13, 21},
+     {  4,  6}, { 11, 10}, {  2, 15}, {  9, 21}
+};
+
+#define rol(a, s) ((a << (int)(s)) | (a >> (32 - (int)(s))))
+
+static void doblock(md5sig state, const unsigned char *data)
+{
+     md5uint a, b, c, d, t, x[16];
+     const md5uint msk = 0xffffffffUL;
+     int i;
+
+     /* encode input bytes into md5uint */
+     for (i = 0; i < 16; ++i) {
+	  const unsigned char *p = data + 4 * i;
+	  x[i] = p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
+     }
+
+     a = state[0]; b = state[1]; c = state[2]; d = state[3];
+     for (i = 0; i < 64; ++i) {
+	  const struct roundtab *p = roundtab + i;
+	  int round = i / 16;
+	  switch (round) {
+	      case 0: a += (b & c) | (~b & d); break;
+	      case 1: a += (b & d) | (c & ~d); break;
+	      case 2: a += b ^ c ^ d; break;
+	      case 3: a += c ^ (b | ~d); break;
+	  }
+	  a += sintab[i];
+	  a += x[(int)(p->k)];
+	  a &= msk;
+	  t = b + rol(a, p->s);
+	  a = d; d = c; c = b; b = t;
+     }
+     state[0] = (state[0] + a) & msk;
+     state[1] = (state[1] + b) & msk;
+     state[2] = (state[2] + c) & msk;
+     state[3] = (state[3] + d) & msk;
+}
+
+
+void X(md5begin)(md5 *p)
+{
+     p->s[0] = 0x67452301;
+     p->s[1] = 0xefcdab89;
+     p->s[2] = 0x98badcfe;
+     p->s[3] = 0x10325476;
+     p->l = 0;
+}
+
+void X(md5putc)(md5 *p, unsigned char c)
+{
+     p->c[p->l % 64] = c;
+     if (((++p->l) % 64) == 0) doblock(p->s, p->c);
+}
+
+void X(md5end)(md5 *p)
+{
+     unsigned l, i;
+
+     l = 8 * p->l; /* length before padding, in bits */
+
+     /* rfc 1321 section 3.1: padding */
+     X(md5putc)(p, 0x80);
+     while ((p->l % 64) != 56) X(md5putc)(p, 0x00);
+
+     /* rfc 1321 section 3.2: length (little endian) */
+     for (i = 0; i < 8; ++i) {
+	  X(md5putc)(p, l & 0xFF);
+	  l = l >> 8;
+     }
+
+     /* Now p->l % 64 == 0 and signature is in p->s */
+}
diff --git a/src/fftw3/kernel/minmax.c b/src/fftw3/kernel/minmax.c
new file mode 100644
index 0000000..fc6f3a3
--- /dev/null
+++ b/src/fftw3/kernel/minmax.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: minmax.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+int X(imax)(int a, int b)
+{
+     return (a > b) ? a : b;
+}
+
+int X(imin)(int a, int b)
+{
+     return (a < b) ? a : b;
+}
diff --git a/src/fftw3/kernel/ops.c b/src/fftw3/kernel/ops.c
new file mode 100644
index 0000000..9e927c1
--- /dev/null
+++ b/src/fftw3/kernel/ops.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: ops.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+void X(ops_zero)(opcnt *dst)
+{
+     dst->add = dst->mul = dst->fma = dst->other = 0;
+}
+
+void X(ops_cpy)(const opcnt *src, opcnt *dst)
+{
+     *dst = *src;
+}
+
+void X(ops_other)(int o, opcnt *dst)
+{
+     X(ops_zero)(dst);
+     dst->other = o;
+}
+
+void X(ops_madd)(int m, const opcnt *a, const opcnt *b, opcnt *dst)
+{
+     dst->add = m * a->add + b->add;
+     dst->mul = m * a->mul + b->mul;
+     dst->fma = m * a->fma + b->fma;
+     dst->other = m * a->other + b->other;
+}
+
+void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst)
+{
+     X(ops_madd)(1, a, b, dst);
+}
+
+void X(ops_add2)(const opcnt *a, opcnt *dst)
+{
+     X(ops_add)(a, dst, dst);
+}
+
+void X(ops_madd2)(int m, const opcnt *a, opcnt *dst)
+{
+     X(ops_madd)(m, a, dst, dst);
+}
+
diff --git a/src/fftw3/kernel/pickdim.c b/src/fftw3/kernel/pickdim.c
new file mode 100644
index 0000000..034e6db
--- /dev/null
+++ b/src/fftw3/kernel/pickdim.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "ifftw.h"
+
+/* $Id: pickdim.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* Given a solver which_dim, a vector sz, and whether or not the
+   transform is out-of-place, return the actual dimension index that
+   it corresponds to.  The basic idea here is that we return the
+   which_dim'th valid dimension, starting from the end if
+   which_dim < 0. */
+static int really_pickdim(int which_dim, const tensor *sz, int oop, int *dp)
+{
+     int i;
+     int count_ok = 0;
+     if (which_dim > 0) {
+          for (i = 0; i < sz->rnk; ++i) {
+               if (oop || sz->dims[i].is == sz->dims[i].os)
+                    if (++count_ok == which_dim) {
+                         *dp = i;
+                         return 1;
+                    }
+          }
+     }
+     else if (which_dim < 0) {
+          for (i = sz->rnk; i > 0; --i) {
+               if (oop || sz->dims[i - 1].is == sz->dims[i - 1].os)
+                    if (++count_ok == -which_dim) {
+                         *dp = i - 1;
+                         return 1;
+                    }
+          }
+     }
+     else { /* zero: pick the middle, if valid */
+	  i = sz->rnk / 2 - 1;
+	  if (i < sz->rnk && (oop || sz->dims[i].is == sz->dims[i].os)) {
+	       *dp = i;
+	       return 1;
+	  }
+     }
+     return 0;
+}
+
+/* Like really_pickdim, but only returns 1 if no previous "buddy"
+   which_dim in the buddies list would give the same dim. */
+int X(pickdim)(int which_dim, const int *buddies, int nbuddies,
+	       const tensor *sz, int oop, int *dp)
+{
+     int i, d1;
+
+     if (!really_pickdim(which_dim, sz, oop, dp))
+          return 0;
+
+     /* check whether some buddy solver would produce the same dim.
+        If so, consider this solver unapplicable and let the buddy
+        take care of it.  The smallest-indexed buddy is applicable. */
+     for (i = 0; i < nbuddies; ++i) {
+          if (buddies[i] == which_dim)
+               break;  /* found self */
+          if (really_pickdim(buddies[i], sz, oop, &d1) && *dp == d1)
+               return 0; /* found equivalent buddy */
+     }
+     return 1;
+}
diff --git a/src/fftw3/kernel/planner.c b/src/fftw3/kernel/planner.c
new file mode 100644
index 0000000..1d01cb9
--- /dev/null
+++ b/src/fftw3/kernel/planner.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright (c) 2000 Matteo Frigo
+ * Copyright (c) 2000 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: planner.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+#include "ifftw.h"
+#include <string.h>
+
+/* GNU Coding Standards, Sec. 5.2: "Please write the comments in a GNU
+   program in English, because English is the one language that nearly
+   all programmers in all countries can read."
+
+                    ingemisco tanquam reus
+		    culpa rubet vultus meus
+		    supplicanti parce [rms]
+*/
+
+#define BLESSEDP(solution) ((solution)->flags & BLESSING)
+#define VALIDP(solution) ((solution)->flags & H_VALID)
+#define UNBLESS(flags)  flags &= ~BLESSING
+
+#define MAXNAM 64  /* maximum length of registrar's name.
+		      Used for reading wisdom.  There is no point
+		      in doing this right */
+
+/* Flags f1 subsumes flags f2 iff f1 is less/equally impatient than
+   f2, defining a partial ordering. */
+#define IMPATIENCE(flags) ((flags) & IMPATIENCE_FLAGS)
+#define NONIMPATIENCE(flags) ((flags) & NONIMPATIENCE_FLAGS)
+#define ORDERED(f1, f2) (SUBSUMES(f1, f2) || SUBSUMES(f2, f1))
+#define SUBSUMES(f1, f2) ((IMPATIENCE(f1) & (f2)) == IMPATIENCE(f1))
+
+static unsigned addmod(unsigned a, unsigned b, unsigned p)
+{
+     /* gcc-2.95/sparc produces incorrect code for the fast version below. */
+#if defined(__sparc__) && defined(__GNUC__)
+     /* slow version  */
+     return (a + b) % p;
+#else
+     /* faster version */
+     unsigned c = a + b;
+     return c >= p ? c - p : c;
+#endif
+}
+
+/*
+  slvdesc management:
+*/
+static void sgrow(planner *ego)
+{
+     unsigned osiz = ego->slvdescsiz, nsiz = 1 + osiz + osiz / 4;
+     slvdesc *ntab = (slvdesc *)MALLOC(nsiz * sizeof(slvdesc), SLVDESCS);
+     slvdesc *otab = ego->slvdescs;
+     unsigned i;
+
+     ego->slvdescs = ntab;
+     ego->slvdescsiz = nsiz;
+     for (i = 0; i < osiz; ++i)
+	  ntab[i] = otab[i];
+     X(ifree0)(otab);
+}
+
+static void register_solver(planner *ego, solver *s)
+{
+     slvdesc *n;
+     if (s) { /* add s to solver list */
+	  X(solver_use)(s);
+
+	  if (ego->nslvdesc >= ego->slvdescsiz)
+	       sgrow(ego);
+
+	  n = ego->slvdescs + ego->nslvdesc++;
+
+	  n->slv = s;
+	  n->reg_nam = ego->cur_reg_nam;
+	  n->reg_id = ego->cur_reg_id++;
+	  
+	  A(strlen(n->reg_nam) < MAXNAM);
+	  n->nam_hash = X(hash)(n->reg_nam);
+     }
+}
+
+static int slookup(planner *ego, char *nam, int id)
+{
+     unsigned h = X(hash)(nam); /* used to avoid strcmp in the common case */
+     FORALL_SOLVERS(ego, s, sp, {
+	  UNUSED(s);
+	  if (sp->reg_id == id && sp->nam_hash == h
+	      && !strcmp(sp->reg_nam, nam))
+	       return sp - ego->slvdescs;
+     });
+     return -1;
+}
+
+/*
+  md5-related stuff:
+*/
+
+/* first hash function */
+static unsigned h1(planner *ego, const md5sig s)
+{
+     return s[0] % ego->hashsiz;
+}
+
+/* second hash function (for double hashing) */
+static unsigned h2(planner *ego, const md5sig s)
+{
+     return 1U + s[1] % (ego->hashsiz - 1);
+}
+
+static void md5hash(md5 *m, const problem *p, const planner *plnr)
+{
+     X(md5begin)(m);
+     X(md5unsigned)(m, sizeof(R)); /* so we don't mix different precisions */
+     X(md5unsigned)(m, plnr->problem_flags);
+     X(md5int)(m, plnr->nthr);
+     p->adt->hash(p, m);
+     X(md5end)(m);
+}
+
+static int md5eq(const md5sig a, const md5sig b)
+{
+     return a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3];
+}
+
+static void sigcpy(const md5sig a, md5sig b)
+{
+     b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3];
+}
+
+/*
+  memoization routines :
+*/
+
+/*
+   liber scriptus proferetur
+   in quo totum continetur
+   unde mundus iudicetur
+*/
+struct solution_s {
+     md5sig s;
+     unsigned short flags;
+     short slvndx;
+};
+
+static solution *hlookup(planner *ego, const md5sig s, unsigned short flags)
+{
+     unsigned g, h = h1(ego, s), d = h2(ego, s);
+
+     ++ego->lookup;
+
+     for (g = h; ; g = addmod(g, d, ego->hashsiz)) {
+	  solution *l = ego->solutions + g;
+	  ++ego->lookup_iter;
+	  if (VALIDP(l)) {
+	       if (md5eq(s, l->s) && ORDERED(l->flags, flags)) { 
+		    ++ego->succ_lookup;
+		    return l; 
+	       }
+	  } else {
+	       return 0;
+	  }
+	  A((g + d) % ego->hashsiz != h);
+     }
+}
+
+
+static void hinsert0(planner *ego, const md5sig s, unsigned short flags,
+		     int slvndx, solution *l)
+{
+     ++ego->insert;
+     if (!l) { 	 
+	  /* search for nonfull slot */
+	  unsigned g, h = h1(ego, s), d = h2(ego, s); 
+	  ++ego->insert_unknown;
+	  for (g = h; ; g = addmod(g, d, ego->hashsiz)) {
+	       ++ego->insert_iter;
+	       l = ego->solutions + g;
+	       if (!VALIDP(l)) break;
+	       A((g + d) % ego->hashsiz != h);
+	  }
+     }
+
+     /* fill slot */
+     l->flags = flags | H_VALID;
+     l->slvndx = (short)slvndx;
+     sigcpy(s, l->s);
+}
+
+static void rehash(planner *ego, unsigned nsiz)
+{
+     unsigned osiz = ego->hashsiz, h;
+     solution *osol = ego->solutions, *nsol;
+
+     nsiz = (unsigned)X(next_prime)((int)nsiz);
+     nsol = (solution *)MALLOC(nsiz * sizeof(solution), HASHT);
+     ++ego->nrehash;
+
+     /* init new table */
+     for (h = 0; h < nsiz; ++h) 
+	  nsol[h].flags = 0;
+
+     /* install new table */
+     ego->hashsiz = nsiz;
+     ego->solutions = nsol;
+
+     /* copy table */
+     for (h = 0; h < osiz; ++h) {
+	  solution *l = osol + h;
+	  if (VALIDP(l))
+	       hinsert0(ego, l->s, l->flags, l->slvndx, 0);
+     }
+
+     X(ifree0)(osol);
+}
+
+static unsigned minsz(unsigned nelem)
+{
+     return 1U + nelem + nelem / 8U;
+}
+
+static unsigned nextsz(unsigned nelem)
+{
+     return minsz(minsz(nelem));
+}
+
+static void hgrow(planner *ego)
+{
+     unsigned nelem = ego->nelem;
+     if (minsz(nelem) >= ego->hashsiz)
+	  rehash(ego, nextsz(nelem));
+}
+
+static void hshrink(planner *ego)
+{
+     unsigned nelem = ego->nelem;
+     /* always rehash after deletions */
+     rehash(ego, nextsz(nelem));
+}
+
+/* inherit blessing, but only if the solver is the same */
+static unsigned short merge_flags(unsigned short dstflags, int dstndx,
+				  unsigned short srcflags, int srcndx)
+{
+     if (srcndx == dstndx)
+	  dstflags |= (srcflags & BLESSING); /* ne me perdas illa die */
+     return dstflags;
+}
+
+static void hinsert(planner *ego, const md5sig s, 
+		    unsigned short flags, int slvndx)
+{
+     solution *l;
+
+     if ((l = hlookup(ego, s, flags))) {
+	  if (SUBSUMES(flags, l->flags)) {
+	       /* overwrite old solution */
+	       flags = merge_flags(flags, slvndx, l->flags, l->slvndx);
+	  } else {
+	       A(SUBSUMES(l->flags, flags));
+	       l->flags = merge_flags(l->flags, l->slvndx, flags, slvndx);
+	       return;
+	  }
+     } else {
+	  ++ego->nelem;
+	  hgrow(ego);
+     }
+     hinsert0(ego, s, flags, slvndx, l);
+}
+
+static void hcurse_subsumed(planner *ego)
+{
+     unsigned h;
+
+     /* unbless any entries that are unreachable because they
+        are subsumed by less-impatient ones.  */
+     for (h = 0; h < ego->hashsiz; ++h) {
+	  solution *l = ego->solutions + h;
+	  if (VALIDP(l)) {
+	       unsigned d = h2(ego, l->s), g = addmod(h, d, ego->hashsiz);
+	       for (; ; g = addmod(g, d, ego->hashsiz)) {
+		    solution *m = ego->solutions + g;
+		    if (VALIDP(m)) {
+			 if (md5eq(l->s, m->s) &&
+			     SUBSUMES(l->flags, m->flags)) {
+			      /* ne cadant in obscurum */
+			      l->flags = merge_flags(l->flags, l->slvndx,
+						     m->flags, m->slvndx);
+
+			      /* cum vix justus sit securus */
+			      UNBLESS(m->flags);
+			 }
+		    }
+		    else break;
+		    A((g + d) % ego->hashsiz != h);
+	       }
+	  }
+     }
+}
+
+
+static void invoke_hook(planner *ego, plan *pln, const problem *p, 
+			int optimalp)
+{
+     if (ego->hook)
+	  ego->hook(pln, p, optimalp);
+}
+
+static void evaluate_plan(planner *ego, plan *pln, const problem *p)
+{
+     if (!BELIEVE_PCOSTP(ego) || pln->pcost == 0.0) {
+	  ego->nplan++;
+
+	  if (ESTIMATEP(ego)) {
+	  estimate:
+	       /* heuristic */
+	       pln->pcost = 0.0
+		    + pln->ops.add
+		    + pln->ops.mul
+		    + 2 * pln->ops.fma
+		    + pln->ops.other;
+	       ego->epcost += pln->pcost;
+	  } else {
+	       double t = X(measure_execution_time)(pln, p);
+
+	       if (t < 0) {  /* unavailable cycle counter */
+		    /* Real programmers can write FORTRAN in any language */
+		    goto estimate;
+	       }
+
+	       pln->pcost = t;
+	       ego->pcost += t;
+	  }
+     }
+     
+     invoke_hook(ego, pln, p, 0);
+}
+
+/* maintain dynamic scoping of flags, nthr: */
+static plan *invoke_solver(planner *ego, problem *p, solver *s, 
+			   unsigned short nflags)
+{
+     unsigned short planner_flags = ego->planner_flags;
+     unsigned problem_flags = ego->problem_flags;
+     int nthr = ego->nthr;
+     plan *pln;
+     ego->planner_flags = nflags;
+     pln = s->adt->mkplan(s, p, ego);
+     ego->problem_flags = problem_flags;
+     ego->nthr = nthr;
+     ego->planner_flags = planner_flags;
+     return pln;
+}
+
+static plan *search(planner *ego, problem *p, slvdesc **descp)
+{
+     plan *best = 0;
+     int best_not_yet_timed = 1;
+     int pass;
+
+     if (NO_SEARCHP(ego)) {
+	  /* D("invalid search for %P %x\n", p, ego->planner_flags); */
+	  return 0;
+     }
+
+     for (pass = 0; pass < 2; ++pass) {
+	  unsigned short nflags = ego->planner_flags;
+	  
+	  if (best) break;
+
+	  switch (pass) {
+	      case 0: 
+		   /* skip pass 0 during exhaustive search */
+		   if (!NO_EXHAUSTIVEP(ego)) continue;
+		   nflags |= NO_UGLY;
+		   break;
+	      case 1:
+		   /* skip pass 1 if NO_UGLY */
+		   if (NO_UGLYP(ego)) continue;
+		   break;
+	  }
+
+          FORALL_SOLVERS(ego, s, sp, {
+	       plan *pln = invoke_solver(ego, p, s, nflags);
+
+	       if (pln) {
+		    if (best) {
+			 if (best_not_yet_timed) {
+			      evaluate_plan(ego, best, p);
+			      best_not_yet_timed = 0;
+			 }
+			 evaluate_plan(ego, pln, p);
+			 if (pln->pcost < best->pcost) {
+			      X(plan_destroy_internal)(best);
+			      best = pln;
+			      *descp = sp;
+			 } else {
+			      X(plan_destroy_internal)(pln);
+			 }
+		    } else {
+			 best = pln;
+			 *descp = sp;
+		    }
+	       }
+	  });
+     }
+
+     return best;
+}
+
+static plan *mkplan(planner *ego, problem *p)
+{
+     plan *pln;
+     md5 m;
+     slvdesc *sp;
+     unsigned short flags;
+     ASSERT_ALIGNED_DOUBLE;
+
+     /* Canonical form. */
+     if (!NO_EXHAUSTIVEP(ego)) ego->planner_flags &= ~NO_UGLY;
+	  
+     ++ego->nprob;
+     md5hash(&m, p, ego);
+
+     pln = 0;
+
+     {
+	  solution *sol; /* new scope for sol */
+
+	  if ((sol = hlookup(ego, m.s, ego->planner_flags))) {
+	       if (SUBSUMES(sol->flags, ego->planner_flags)) {
+		    /* wisdom is acceptable */
+		    if (sol->slvndx < 0) 
+			 return 0;   /* known to be infeasible */
+
+		    /* use solver to obtain a plan */
+		    sp = ego->slvdescs + sol->slvndx;
+		    pln = 
+			 invoke_solver(ego, p, sp->slv, 
+				       (0
+					| NO_SEARCH 
+					| IMPATIENCE(sol->flags)
+					| NONIMPATIENCE(ego->planner_flags) ));
+
+		    /* if (!pln) then the entry is bogus, but
+		       we currently do nothing about it. */
+		    /* CAVEAS: Do not use ``sol'' here, because the
+		       pointer is possibly dangling after the call to
+		       invoke_solver(). */
+	       } else {
+		    A(SUBSUMES(ego->planner_flags, sol->flags));
+	       }
+	  }
+     }
+
+
+     if (!pln)
+	  pln = search(ego, p, &sp);
+
+     flags = ego->planner_flags;
+
+     if (pln) {
+	  /* Postulate de iure that NO_UGLY subsumes ~NO_UGLY if the
+	     problem is feasible. Also postulate that NO_SEARCH
+	     subsumes ~NO_SEARCH. */
+	  flags &= ~(NO_UGLY | NO_SEARCH);
+     }
+
+     hinsert(ego, m.s, flags, pln ? sp - ego->slvdescs : -1);
+
+     if (pln)
+	  invoke_hook(ego, pln, p, 1);
+     return pln;
+}
+
+/* destroy hash table entries.  If FORGET_EVERYTHING, destroy the whole
+   table.  If FORGET_ACCURSED, then destroy entries that are not blessed. */
+static void forget(planner *ego, amnesia a)
+{
+     unsigned h;
+
+     /* garbage-collect while we are at it */ 
+     if (a != FORGET_EVERYTHING)
+	  hcurse_subsumed(ego);
+
+     for (h = 0; h < ego->hashsiz; ++h) {
+	  solution *l = ego->solutions + h;
+	  if (VALIDP(l)) {
+	       if (a == FORGET_EVERYTHING ||
+		   (a == FORGET_ACCURSED && !BLESSEDP(l))) {
+		    /* confutatis maledictis
+		       flammis acribus addictis */
+		    l->flags &= ~H_VALID;
+		    --ego->nelem;
+	       }
+	  }
+     }
+     /* nil inultum remanebit */
+
+     hshrink(ego);
+}
+
+static void htab_destroy(planner *ego)
+{
+     forget(ego, FORGET_EVERYTHING);
+     X(ifree)(ego->solutions);
+     ego->nelem = 0U;
+}
+
+/* FIXME: what sort of version information should we write? */
+#define WISDOM_PREAMBLE PACKAGE "-" VERSION " " STRINGIZE(X(wisdom))
+
+/* tantus labor non sit cassus */
+static void exprt(planner *ego, printer *p)
+{
+     unsigned h;
+
+     hcurse_subsumed(ego);
+
+     p->print(p, "(" WISDOM_PREAMBLE "%(");
+     for (h = 0; h < ego->hashsiz; ++h) {
+	  solution *l = ego->solutions + h;
+	  if (VALIDP(l) && BLESSEDP(l) && l->slvndx >= 0) {
+	       slvdesc *sp = ego->slvdescs + l->slvndx;
+	       /* qui salvandos salvas gratis
+		  salva me fons pietatis */
+	       p->print(p, "(%s %d #x%x #x%M #x%M #x%M #x%M)\n",
+			sp->reg_nam, sp->reg_id, (int)l->flags,
+			l->s[0], l->s[1], l->s[2], l->s[3]);
+	  }
+     }
+     p->print(p, "%))\n");
+}
+
+/* mors stupebit et natura
+   cum resurget creatura */
+static int imprt(planner *ego, scanner *sc)
+{
+     char buf[MAXNAM + 1];
+     md5uint sig[4];
+     int flags;
+     int reg_id;
+     int slvndx;
+     solution *sol;
+
+     if (!sc->scan(sc, "(" WISDOM_PREAMBLE))
+	  return 0; /* don't need to restore hashtable */
+
+     /* make a backup copy of the hash table (cache the hash) */
+     {
+	  unsigned h, hsiz = ego->hashsiz;
+	  sol = (solution *)MALLOC(hsiz * sizeof(solution), HASHT);
+	  for (h = 0; h < hsiz; ++h)
+	       sol[h] = ego->solutions[h];
+     }
+
+     while (1) {
+	  if (sc->scan(sc, ")"))
+	       break;
+
+	  /* qua resurget ex favilla */
+	  if (!sc->scan(sc, "(%*s %d #x%x #x%M #x%M #x%M #x%M)",
+			MAXNAM, buf, &reg_id, &flags, 
+			sig + 0, sig + 1, sig + 2, sig + 3))
+	       goto bad;
+
+	  if ((slvndx = slookup(ego, buf, reg_id)) < 0)
+	       goto bad;
+
+	  /* inter oves locum praesta */
+	  hinsert(ego, sig, (unsigned short)flags, slvndx);
+     }
+
+     X(ifree0)(sol);
+     return 1;
+
+ bad:
+     /* ``The wisdom of FFTW must be above suspicion.'' */
+     X(ifree0)(ego->solutions);
+     ego->solutions = sol;
+     return 0;
+}
+
+/*
+ * create a planner
+ */
+planner *X(mkplanner)(void)
+{
+     static const planner_adt padt = {
+	  register_solver, mkplan, forget, exprt, imprt
+     };
+
+     planner *p = (planner *) MALLOC(sizeof(planner), PLANNERS);
+
+     p->adt = &padt;
+     p->nplan = p->nprob = p->nrehash = 0;
+     p->pcost = p->epcost = 0.0;
+     p->succ_lookup = p->lookup = p->lookup_iter = 0;
+     p->insert = p->insert_iter = p->insert_unknown = 0;
+     p->hook = 0;
+     p->cur_reg_nam = 0;
+
+     p->slvdescs = 0;
+     p->nslvdesc = p->slvdescsiz = 0;
+
+     p->solutions = 0;
+     p->hashsiz = p->nelem = 0U;
+
+     p->problem_flags = 0;
+     p->planner_flags = 0;
+     p->nthr = 1;
+
+     hgrow(p);			/* so that hashsiz > 0 */
+
+     return p;
+}
+
+void X(planner_destroy)(planner *ego)
+{
+     /* destroy hash table */
+     htab_destroy(ego);
+
+     /* destroy solvdesc table */
+     FORALL_SOLVERS(ego, s, sp, {
+	  UNUSED(sp);
+	  X(solver_destroy)(s);
+     });
+
+     X(ifree0)(ego->slvdescs);
+     X(ifree)(ego); /* dona eis requiem */
+}
+
+plan *X(mkplan_d)(planner *ego, problem *p)
+{
+     plan *pln = ego->adt->mkplan(ego, p);
+     X(problem_destroy)(p);
+     return pln;
+}
+
+/*
+ * Debugging code:
+ */
+#ifdef FFTW_DEBUG
+
+void X(planner_dump)(planner *ego, int verbose)
+{
+     unsigned valid = 0, empty = 0, infeasible = 0;
+     unsigned h;
+     UNUSED(verbose); /* historical */
+
+     for (h = 0; h < ego->hashsiz; ++h) {
+	  solution *l = ego->solutions + h; 
+	  if (VALIDP(l)) {
+	       ++valid; 
+	       if (l->slvndx < 0) ++infeasible;
+	  } else
+	       ++empty;
+	  
+     }
+
+     D("nplan = %d\n", ego->nplan);
+     D("nprob = %d\n", ego->nprob);
+     D("pcost = %g\n", ego->pcost);
+     D("epcost = %g\n", ego->epcost);
+     D("lookup = %d\n", ego->lookup);
+     D("succ_lookup = %d\n", ego->succ_lookup);
+     D("lookup_iter = %d\n", ego->lookup_iter);
+     D("insert = %d\n", ego->insert);
+     D("insert_iter = %d\n", ego->insert_iter);
+     D("insert_unknown = %d\n", ego->insert_unknown);
+     D("nrehash = %d\n", ego->nrehash);
+     D("hashsiz = %u\n", ego->hashsiz);
+     D("empty = %d\n", empty);
+     D("valid = %d\n", valid);
+     D("infeasible = %d\n", infeasible);
+     A(ego->nelem == valid);
+}
+
+#endif
diff --git a/src/fftw3/kernel/primes.c b/src/fftw3/kernel/primes.c
new file mode 100644
index 0000000..608e51c
--- /dev/null
+++ b/src/fftw3/kernel/primes.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: primes.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+/***************************************************************************/
+
+/* Rader's algorithm requires lots of modular arithmetic, and if we
+   aren't careful we can have errors due to integer overflows. */
+
+#ifdef SAFE_MULMOD
+
+#  include <limits.h>
+
+/* compute (x * y) mod p, but watch out for integer overflows; we must
+   have x, y >= 0, p > 0.  This routine is slow. */
+int X(safe_mulmod)(int x, int y, int p)
+{
+     if (y == 0 || x <= INT_MAX / y)
+	  return((x * y) % p);
+     else {
+	  int y2 = y/2;
+	  return((X(safe_mulmod)(x, y2, p) +
+		  X(safe_mulmod)(x, y - y2, p)) % p);
+     }
+}
+#endif /* safe_mulmod ('long long' unavailable) */
+
+/***************************************************************************/
+
+/* Compute n^m mod p, where m >= 0 and p > 0.  If we really cared, we
+   could make this tail-recursive. */
+int X(power_mod)(int n, int m, int p)
+{
+     A(p > 0);
+     if (m == 0)
+	  return 1;
+     else if (m % 2 == 0) {
+	  int x = X(power_mod)(n, m / 2, p);
+	  return MULMOD(x, x, p);
+     }
+     else
+	  return MULMOD(n, X(power_mod)(n, m - 1, p), p);
+}
+
+/* the following two routines were contributed by Greg Dionne. */
+static int get_prime_factors(int n, int *primef)
+{
+     int i;
+     int size = 0;
+
+     primef[size++] = 2;
+     do
+	  n >>= 1;
+     while ((n & 1) == 0);
+
+     if (n == 1)
+	  return size;
+
+     for (i = 3; i * i <= n; i += 2)
+	  if (!(n % i)) {
+	       primef[size++] = i;
+	       do
+		    n /= i;
+	       while (!(n % i));
+	  }
+     if (n == 1)
+	  return size;
+     primef[size++] = n;
+     return size;
+}
+
+int X(find_generator)(int p)
+{
+    int n, i, size;
+    int primef[16];     /* smallest number = 32589158477190044730 > 2^64 */
+    int pm1 = p - 1;
+
+    if (p == 2)
+	 return 1;
+
+    size = get_prime_factors(pm1, primef);
+    n = 2;
+    for (i = 0; i < size; i++)
+        if (X(power_mod)(n, pm1 / primef[i], p) == 1) {
+            i = -1;
+            n++;
+        }
+    return n;
+}
+
+/* Return first prime divisor of n  (It would be at best slightly faster to
+   search a static table of primes; there are 6542 primes < 2^16.)  */
+int X(first_divisor)(int n)
+{
+     int i;
+     if (n <= 1)
+	  return n;
+     if (n % 2 == 0)
+	  return 2;
+     for (i = 3; i*i <= n; i += 2)
+	  if (n % i == 0)
+	       return i;
+     return n;
+}
+
+int X(is_prime)(int n)
+{
+     return(n > 1 && X(first_divisor)(n) == n);
+}
+
+int X(next_prime)(int n)
+{
+     while (!X(is_prime)(n)) ++n;
+     return n;
+}
diff --git a/src/fftw3/kernel/print.c b/src/fftw3/kernel/print.c
new file mode 100644
index 0000000..314be7c
--- /dev/null
+++ b/src/fftw3/kernel/print.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: print.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define BSZ 64
+
+static void myputs(printer *p, const char *s)
+{
+     char c;
+     while ((c = *s++))
+          p->putchr(p, c);
+}
+
+static void vprint(printer *p, const char *format, va_list ap)
+{
+     char buf[BSZ];
+     const char *s = format;
+     char c;
+     int i;
+
+     for (i = 0; i < p->indent; ++i)
+          p->putchr(p, ' ');
+
+     while ((c = *s++)) {
+          switch (c) {
+	      case '%':
+		   switch ((c = *s++)) {
+		       case 'M': {
+			    /* md5 value */
+			    md5uint x = va_arg(ap, md5uint);
+			    x = 0xffffffffUL & x;
+			    sprintf(buf, "%8.8lx", (unsigned long)x);
+			    goto putbuf;
+		       }
+		       case 'c': {
+			    int x = va_arg(ap, int);
+			    p->putchr(p, x);
+			    break;
+		       }
+		       case 's': {
+			    char *x = va_arg(ap, char *);
+			    if (x)
+				 myputs(p, x);
+			    else
+				 goto putnull;
+			    break;
+		       }
+		       case 'd': {
+			    int x = va_arg(ap, int);
+			    sprintf(buf, "%d", x);
+			    goto putbuf;
+		       }
+		       case 't': {
+			    ptrdiff_t x;
+			    A(*s == 'd');
+			    s += 1;
+			    x = va_arg(ap, ptrdiff_t);
+			    /* should use C99 %td here, but
+			       this is not yet widespread enough */
+			    sprintf(buf, "%ld", (long) x);
+			    goto putbuf;
+		       }
+		       case 'f': case 'e': case 'g': {
+			    char fmt[3] = "%x";
+			    double x = va_arg(ap, double);
+			    fmt[1] = c;
+			    sprintf(buf, fmt, x);
+			    goto putbuf;
+		       }
+		       case 'v': {
+			    /* print optional vector length */
+			    int x = va_arg(ap, int);
+			    if (x > 1) {
+				 sprintf(buf, "-x%d", x);
+				 goto putbuf;
+			    }
+			    break;
+		       }
+		       case 'o': {
+			    /* integer option.  Usage: %oNAME= */
+			    int x = va_arg(ap, int);
+			    if (x)
+				 p->putchr(p, '/');
+			    while ((c = *s++) != '=')
+				 if (x)
+				      p->putchr(p, c);
+			    if (x) {
+				 sprintf(buf, "=%d", x);
+				 goto putbuf;
+			    }
+			    break;
+		       }
+		       case 'u': {
+			    unsigned x = va_arg(ap, unsigned);
+			    sprintf(buf, "%u", x);
+			    goto putbuf;
+		       }
+		       case 'x': {
+			    unsigned x = va_arg(ap, unsigned);
+			    sprintf(buf, "%x", x);
+			    goto putbuf;
+		       }
+		       case '(': {
+			    /* newline, augment indent level */
+			    p->putchr(p, '\n');
+			    p->indent += p->indent_incr;
+			    break;
+		       }
+		       case ')': {
+			    /* decrement indent level */
+			    p->indent -= p->indent_incr;
+			    break;
+		       }
+		       case 'p': {  /* note difference from C's %p */
+			    /* print plan */
+			    plan *x = va_arg(ap, plan *);
+			    if (x) 
+				 x->adt->print(x, p);
+			    else 
+				 goto putnull;
+			    break;
+		       }
+		       case 'P': {
+			    /* print problem */
+			    problem *x = va_arg(ap, problem *);
+			    if (x)
+				 x->adt->print(x, p);
+			    else
+				 goto putnull;
+			    break;
+		       }
+		       case 'T': {
+			    /* print tensor */
+			    tensor *x = va_arg(ap, tensor *);
+			    if (x)
+				 X(tensor_print)(x, p);
+			    else
+				 goto putnull;
+			    break;
+		       }
+		       default:
+			    A(0 /* unknown format */);
+			    break;
+
+		   putbuf:
+			    myputs(p, buf);
+			    break;
+		   putnull:
+			    myputs(p, "(null)");
+			    break;
+		   }
+		   break;
+	      default:
+		   p->putchr(p, c);
+		   break;
+          }
+     }
+}
+
+static void print(printer *p, const char *format, ...)
+{
+     va_list ap;
+     va_start(ap, format);
+     vprint(p, format, ap);
+     va_end(ap);
+}
+
+printer *X(mkprinter)(size_t size, 
+		      void (*putchr)(printer *p, char c),
+		      void (*cleanup)(printer *p))
+{
+     printer *s = (printer *)MALLOC(size, OTHER);
+     s->print = print;
+     s->vprint = vprint;
+     s->putchr = putchr;
+     s->cleanup = cleanup;
+     s->indent = 0;
+     s->indent_incr = 2;
+     return s;
+}
+
+void X(printer_destroy)(printer *p)
+{
+     if (p->cleanup)
+	  p->cleanup(p);
+     X(ifree)(p);
+}
diff --git a/src/fftw3/kernel/scan.c b/src/fftw3/kernel/scan.c
new file mode 100644
index 0000000..f5fa6e1
--- /dev/null
+++ b/src/fftw3/kernel/scan.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: scan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+#include <string.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#ifdef USE_CTYPE
+#include <ctype.h>
+#else
+/* Screw ctype. On linux, the is* functions call a routine that gets
+   the ctype map in the current locale.  Because this operation is
+   expensive, the map is cached on a per-thread basis.  I am not
+   willing to link this crap with FFTW.  Not over my dead body.
+
+   Sic transit gloria mundi.
+*/
+#undef isspace
+#define isspace(x) ((x) >= 0 && (x) <= ' ')
+#undef isdigit
+#define isdigit(x) ((x) >= '0' && (x) <= '9')
+#undef isupper
+#define isupper(x) ((x) >= 'A' && (x) <= 'Z')
+#undef islower
+#define islower(x) ((x) >= 'a' && (x) <= 'z')
+#endif
+
+static int mygetc(scanner *sc)
+{
+     if (sc->ungotc != EOF) {
+	  int c = sc->ungotc;
+	  sc->ungotc = EOF;
+	  return c;
+     }
+     return(sc->getchr(sc));
+}
+
+#define GETCHR(sc) mygetc(sc)
+
+static void myungetc(scanner *sc, int c)
+{
+     sc->ungotc = c;
+}
+
+#define UNGETCHR(sc, c) myungetc(sc, c)
+
+static void eat_blanks(scanner *sc)
+{
+     int ch;
+     while (ch = GETCHR(sc), isspace(ch))
+          ;
+     UNGETCHR(sc, ch);
+}
+
+static void mygets(scanner *sc, char *s, size_t maxlen)
+{
+     char *s0 = s;
+     int ch;
+
+     A(maxlen > 0);
+     while ((ch = GETCHR(sc)) != EOF && !isspace(ch)
+	    && ch != ')' && ch != '(' && s < s0 + maxlen)
+	  *s++ = ch;
+     *s = 0;
+     UNGETCHR(sc, ch);
+}
+
+static long getlong(scanner *sc, int base, int *ret)
+{
+     int sign = 1, ch, count;
+     long x = 0;     
+
+     ch = GETCHR(sc);
+     if (ch == '-' || ch == '+') {
+	  sign = ch == '-' ? -1 : 1;
+	  ch = GETCHR(sc);
+     }
+     for (count = 0; ; ++count) {
+	  if (isdigit(ch)) 
+	       ch -= '0';
+	  else if (isupper(ch))
+	       ch -= 'A' - 10;
+	  else if (islower(ch))
+	       ch -= 'a' - 10;
+	  else
+	       break;
+	  x = x * base + ch;
+	  ch = GETCHR(sc);
+     }
+     x *= sign;
+     UNGETCHR(sc, ch);
+     *ret = count > 0;
+     return x;
+}
+
+/* vscan is mostly scanf-like, with our additional format specifiers,
+   but with a few twists.  It returns simply 0 or 1 indicating whether
+   the match was successful. '(' and ')' in the format string match
+   those characters preceded by any whitespace.  Finally, if a
+   character match fails, it will ungetchr() the last character back
+   onto the stream. */
+static int vscan(scanner *sc, const char *format, va_list ap)
+{
+     const char *s = format;
+     char c;
+     int ch = 0;
+     size_t fmt_len;
+
+     while ((c = *s++)) {
+	  fmt_len = 0;
+          switch (c) {
+	      case '%':
+	  getformat:
+		   switch ((c = *s++)) {
+		       case 's': {
+			    char *x = va_arg(ap, char *);
+			    mygets(sc, x, fmt_len);
+			    break;
+		       }
+		       case 'd': {
+			    int *x = va_arg(ap, int *);
+			    *x = (int) getlong(sc, 10, &ch);
+			    if (!ch) return 0;
+			    break;
+		       }
+		       case 'x': {
+			    int *x = va_arg(ap, int *);
+			    *x = (int) getlong(sc, 16, &ch);
+			    if (!ch) return 0;
+			    break;
+		       }
+		       case 'M': {
+			    md5uint *x = va_arg(ap, md5uint *);
+			    *x = 0xffffffffUL & getlong(sc, 16, &ch);
+			    if (!ch) return 0;
+			    break;
+		       }
+		       case '*': {
+			    if ((fmt_len = va_arg(ap, int)) <= 0) return 0;
+			    goto getformat;
+		       }
+		       default:
+			    A(0 /* unknown format */);
+			    break;
+		   }
+		   break;
+	      default:
+		   if (isspace(c) || c == '(' || c == ')')
+			eat_blanks(sc);
+		   if (!isspace(c) && (ch = GETCHR(sc)) != c) {
+			UNGETCHR(sc, ch);
+			return 0;
+		   }
+		   break;
+          }
+     }
+     return 1;
+}
+
+static int scan(scanner *sc, const char *format, ...)
+{
+     int ret;
+     va_list ap;
+     va_start(ap, format);
+     ret = vscan(sc, format, ap);
+     va_end(ap);
+     return ret;
+}
+
+scanner *X(mkscanner)(size_t size, int (*getchr)(scanner *sc))
+{
+     scanner *s = (scanner *)MALLOC(size, OTHER);
+     s->scan = scan;
+     s->vscan = vscan;
+     s->getchr = getchr;
+     s->ungotc = EOF;
+     return s;
+}
+
+void X(scanner_destroy)(scanner *sc)
+{
+     X(ifree)(sc);
+}
diff --git a/src/fftw3/kernel/solver.c b/src/fftw3/kernel/solver.c
new file mode 100644
index 0000000..4bfb899
--- /dev/null
+++ b/src/fftw3/kernel/solver.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: solver.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+solver *X(mksolver)(size_t size, const solver_adt *adt)
+{
+     solver *s = (solver *)MALLOC(size, SOLVERS);
+
+     s->adt = adt;
+     s->refcnt = 0;
+     return s;
+}
+
+void X(solver_use)(solver *ego)
+{
+     ++ego->refcnt;
+}
+
+void X(solver_destroy)(solver *ego)
+{
+     if ((--ego->refcnt) == 0)
+          X(ifree)(ego);
+}
+
+void X(solver_register)(planner *plnr, solver *s)
+{
+     plnr->adt->register_solver(plnr, s);
+}
diff --git a/src/fftw3/kernel/solvtab.c b/src/fftw3/kernel/solvtab.c
new file mode 100644
index 0000000..496915e
--- /dev/null
+++ b/src/fftw3/kernel/solvtab.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: solvtab.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+void X(solvtab_exec)(solvtab tbl, planner *p)
+{
+     for (; tbl->reg_nam; ++tbl) {
+	  p->cur_reg_nam = tbl->reg_nam;
+	  p->cur_reg_id = 0;
+	  tbl->reg(p);
+     }
+     p->cur_reg_nam = 0;
+}
diff --git a/src/fftw3/kernel/square.c b/src/fftw3/kernel/square.c
new file mode 100644
index 0000000..4b5afab
--- /dev/null
+++ b/src/fftw3/kernel/square.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: square.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+int X(square)(int x)
+{
+     return x * x;
+}
diff --git a/src/fftw3/kernel/stride.c b/src/fftw3/kernel/stride.c
new file mode 100644
index 0000000..fda049f
--- /dev/null
+++ b/src/fftw3/kernel/stride.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: stride.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+#include "ifftw.h"
+
+#ifdef PRECOMPUTE_ARRAY_INDICES
+stride X(mkstride)(int n, int s)
+{
+     int i;
+     int *p = (int *) MALLOC(n * sizeof(int), STRIDES);
+
+     for (i = 0; i < n; ++i)
+          p[i] = s * i;
+
+     return p;
+}
+
+void X(stride_destroy)(stride p)
+{
+     X(ifree0)(p);
+}
+
+#endif
diff --git a/src/fftw3/kernel/tensor.c b/src/fftw3/kernel/tensor.c
new file mode 100644
index 0000000..1161963
--- /dev/null
+++ b/src/fftw3/kernel/tensor.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+tensor *X(mktensor)(int rnk) 
+{
+     tensor *x;
+
+     A(rnk >= 0);
+
+#if defined(STRUCT_HACK_KR)
+     if (FINITE_RNK(rnk) && rnk > 1)
+	  x = (tensor *)MALLOC(sizeof(tensor) + (rnk - 1) * sizeof(iodim),
+				    TENSORS);
+     else
+	  x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
+#elif defined(STRUCT_HACK_C99)
+     if (FINITE_RNK(rnk))
+	  x = (tensor *)MALLOC(sizeof(tensor) + rnk * sizeof(iodim),
+				    TENSORS);
+     else
+	  x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
+#else
+     x = (tensor *)MALLOC(sizeof(tensor), TENSORS);
+     if (FINITE_RNK(rnk) && rnk > 0)
+          x->dims = (iodim *)MALLOC(sizeof(iodim) * rnk, TENSORS);
+     else
+          x->dims = 0;
+#endif
+
+     x->rnk = rnk;
+     return x;
+}
+
+void X(tensor_destroy)(tensor *sz)
+{
+#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR)
+     X(ifree0)(sz->dims);
+#endif
+     X(ifree)(sz);
+}
+
+int X(tensor_sz)(const tensor *sz)
+{
+     int i, n = 1;
+
+     if (!FINITE_RNK(sz->rnk))
+          return 0;
+
+     for (i = 0; i < sz->rnk; ++i)
+          n *= sz->dims[i].n;
+     return n;
+}
+
+void X(tensor_md5)(md5 *p, const tensor *t)
+{
+     int i;
+     X(md5int)(p, t->rnk);
+     if (FINITE_RNK(t->rnk)) {
+	  for (i = 0; i < t->rnk; ++i) {
+	       const iodim *q = t->dims + i;
+	       X(md5int)(p, q->n);
+	       X(md5int)(p, q->is);
+	       X(md5int)(p, q->os);
+	  }
+     }
+}
+
+/* treat a (rank <= 1)-tensor as a rank-1 tensor, extracting
+   appropriate n, is, and os components */
+int X(tensor_tornk1)(const tensor *t, int *n, int *is, int *os)
+{
+     A(t->rnk <= 1);
+     if (t->rnk == 1) {
+	  const iodim *vd = t->dims;
+          *n = vd[0].n;
+          *is = vd[0].is;
+          *os = vd[0].os;
+     } else {
+          *n = 1;
+          *is = *os = 0;
+     }
+     return 1;
+}
+
+void X(tensor_print)(const tensor *x, printer *p)
+{
+     if (FINITE_RNK(x->rnk)) {
+	  int i;
+	  int first = 1;
+	  p->print(p, "(");
+	  for (i = 0; i < x->rnk; ++i) {
+	       const iodim *d = x->dims + i;
+	       p->print(p, "%s(%d %d %d)", 
+			first ? "" : " ",
+			d->n, d->is, d->os);
+	       first = 0;
+	  }
+	  p->print(p, ")");
+     } else {
+	  p->print(p, "rank-minfty"); 
+     }
+}
diff --git a/src/fftw3/kernel/tensor1.c b/src/fftw3/kernel/tensor1.c
new file mode 100644
index 0000000..06ad4dc
--- /dev/null
+++ b/src/fftw3/kernel/tensor1.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+tensor *X(mktensor_0d)(void)
+{
+     return X(mktensor(0));
+}
+
+tensor *X(mktensor_1d)(int n, int is, int os)
+{
+     tensor *x = X(mktensor)(1);
+     x->dims[0].n = n;
+     x->dims[0].is = is;
+     x->dims[0].os = os;
+     return x;
+}
diff --git a/src/fftw3/kernel/tensor2.c b/src/fftw3/kernel/tensor2.c
new file mode 100644
index 0000000..a45e164
--- /dev/null
+++ b/src/fftw3/kernel/tensor2.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+tensor *X(mktensor_2d)(int n0, int is0, int os0,
+                      int n1, int is1, int os1)
+{
+     tensor *x = X(mktensor)(2);
+     x->dims[0].n = n0;
+     x->dims[0].is = is0;
+     x->dims[0].os = os0;
+     x->dims[1].n = n1;
+     x->dims[1].is = is1;
+     x->dims[1].os = os1;
+     return x;
+}
+
diff --git a/src/fftw3/kernel/tensor4.c b/src/fftw3/kernel/tensor4.c
new file mode 100644
index 0000000..9b6cd28
--- /dev/null
+++ b/src/fftw3/kernel/tensor4.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor4.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+int X(tensor_max_index)(const tensor *sz)
+{
+     int i;
+     int n = 0;
+
+     A(FINITE_RNK(sz->rnk));
+     for (i = 0; i < sz->rnk; ++i) {
+          const iodim *p = sz->dims + i;
+          n += (p->n - 1) * X(imax)(X(iabs)(p->is), X(iabs)(p->os));
+     }
+     return n;
+}
+
+#define tensor_min_xstride(sz, xs) {			\
+     A(FINITE_RNK(sz->rnk));				\
+     if (sz->rnk == 0) return 0;			\
+     else {						\
+          int i;					\
+          int s = X(iabs)(sz->dims[0].xs);		\
+          for (i = 1; i < sz->rnk; ++i)			\
+               s = X(imin)(s, X(iabs)(sz->dims[i].xs));	\
+          return s;					\
+     }							\
+}
+
+int X(tensor_min_istride)(const tensor *sz) tensor_min_xstride(sz, is)
+int X(tensor_min_ostride)(const tensor *sz) tensor_min_xstride(sz, os)
+
+int X(tensor_min_stride)(const tensor *sz)
+{
+     return X(imin)(X(tensor_min_istride)(sz), X(tensor_min_ostride)(sz));
+}
+
+int X(tensor_inplace_strides)(const tensor *sz)
+{
+     int i;
+     A(FINITE_RNK(sz->rnk));
+     for (i = 0; i < sz->rnk; ++i) {
+          const iodim *p = sz->dims + i;
+          if (p->is != p->os)
+               return 0;
+     }
+     return 1;
+}
+
+int X(tensor_inplace_strides2)(const tensor *a, const tensor *b)
+{
+     return X(tensor_inplace_strides(a)) && X(tensor_inplace_strides(b));
+}
diff --git a/src/fftw3/kernel/tensor5.c b/src/fftw3/kernel/tensor5.c
new file mode 100644
index 0000000..8144930
--- /dev/null
+++ b/src/fftw3/kernel/tensor5.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor5.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+static void dimcpy(iodim *dst, const iodim *src, int rnk)
+{
+     int i;
+     if (FINITE_RNK(rnk))
+          for (i = 0; i < rnk; ++i)
+               dst[i] = src[i];
+}
+
+tensor *X(tensor_copy)(const tensor *sz)
+{
+     tensor *x = X(mktensor)(sz->rnk);
+     dimcpy(x->dims, sz->dims, sz->rnk);
+     return x;
+}
+
+/* like X(tensor_copy), but makes strides in-place by
+   setting os = is if k == INPLACE_IS or is = os if k == INPLACE_OS. */
+tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k)
+{
+     tensor *x = X(tensor_copy)(sz);
+     if (FINITE_RNK(x->rnk)) {
+	  int i;
+	  if (k == INPLACE_OS)
+	       for (i = 0; i < x->rnk; ++i)
+		    x->dims[i].is = x->dims[i].os;
+	  else
+	       for (i = 0; i < x->rnk; ++i)
+		    x->dims[i].os = x->dims[i].is;
+     }
+     return x;
+}
+
+/* Like X(tensor_copy), but copy all of the dimensions *except*
+   except_dim. */
+tensor *X(tensor_copy_except)(const tensor *sz, int except_dim)
+{
+     tensor *x;
+
+     A(FINITE_RNK(sz->rnk) && sz->rnk >= 1 && except_dim < sz->rnk);
+     x = X(mktensor)(sz->rnk - 1);
+     dimcpy(x->dims, sz->dims, except_dim);
+     dimcpy(x->dims + except_dim, sz->dims + except_dim + 1,
+            x->rnk - except_dim);
+     return x;
+}
+
+/* Like X(tensor_copy), but copy only rnk dimensions starting
+   with start_dim. */
+tensor *X(tensor_copy_sub)(const tensor *sz, int start_dim, int rnk)
+{
+     tensor *x;
+
+     A(FINITE_RNK(sz->rnk) && start_dim + rnk <= sz->rnk);
+     x = X(mktensor)(rnk);
+     dimcpy(x->dims, sz->dims + start_dim, rnk);
+     return x;
+}
+
+tensor *X(tensor_append)(const tensor *a, const tensor *b)
+{
+     if (!FINITE_RNK(a->rnk) || !FINITE_RNK(b->rnk)) {
+          return X(mktensor)(RNK_MINFTY);
+     } else {
+	  tensor *x = X(mktensor)(a->rnk + b->rnk);
+          dimcpy(x->dims, a->dims, a->rnk);
+          dimcpy(x->dims + a->rnk, b->dims, b->rnk);
+	  return x;
+     }
+}
diff --git a/src/fftw3/kernel/tensor7.c b/src/fftw3/kernel/tensor7.c
new file mode 100644
index 0000000..ceae2c4
--- /dev/null
+++ b/src/fftw3/kernel/tensor7.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor7.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+/* total order among iodim's */
+int X(dimcmp)(const iodim *a, const iodim *b)
+{
+     if (b->is != a->is)
+          return (b->is - a->is);	/* shorter strides go later */
+     if (b->os != a->os)
+          return (b->os - a->os);	/* shorter strides go later */
+     return (int)(a->n - b->n);	        /* larger n's go later */
+}
+
+/* Like tensor_copy, but eliminate n == 1 dimensions, which
+   never affect any transform or transform vector.
+ 
+   Also, we sort the tensor into a canonical order of decreasing
+   is.  In general, processing a loop/array in order of
+   decreasing stride will improve locality; sorting also makes the
+   analysis in fftw_tensor_contiguous (below) easier.  The choice
+   of is over os is mostly arbitrary, and hopefully
+   shouldn't affect things much.  Normally, either the os will be
+   in the same order as is (for e.g. multi-dimensional
+   transforms) or will be in opposite order (e.g. for Cooley-Tukey
+   recursion).  (Both forward and backwards traversal of the tensor
+   are considered e.g. by vrank-geq1, so sorting in increasing
+   vs. decreasing order is not really important.) */
+tensor *X(tensor_compress)(const tensor *sz)
+{
+     int i, rnk;
+     tensor *x;
+
+     A(FINITE_RNK(sz->rnk));
+     for (i = rnk = 0; i < sz->rnk; ++i) {
+          A(sz->dims[i].n > 0);
+          if (sz->dims[i].n != 1)
+               ++rnk;
+     }
+
+     x = X(mktensor)(rnk);
+     for (i = rnk = 0; i < sz->rnk; ++i) {
+          if (sz->dims[i].n != 1)
+               x->dims[rnk++] = sz->dims[i];
+     }
+
+     if (rnk) {
+	  /* God knows how qsort() behaves if n==0 */
+	  qsort(x->dims, (size_t)x->rnk, sizeof(iodim),
+		(int (*)(const void *, const void *))X(dimcmp));
+     }
+
+     return x;
+}
+
+/* Return whether the strides of a and b are such that they form an
+   effective contiguous 1d array.  Assumes that a.is >= b.is. */
+static int strides_contig(iodim *a, iodim *b)
+{
+     return (a->is == b->is * (int)b->n &&
+             a->os == b->os * (int)b->n);
+}
+
+/* Like tensor_compress, but also compress into one dimension any
+   group of dimensions that form a contiguous block of indices with
+   some stride.  (This can safely be done for transform vector sizes.) */
+tensor *X(tensor_compress_contiguous)(const tensor *sz)
+{
+     int i, rnk;
+     tensor *sz2, *x;
+
+     if (X(tensor_sz)(sz) == 0) 
+	  return X(mktensor)(RNK_MINFTY);
+
+     sz2 = X(tensor_compress)(sz);
+     A(FINITE_RNK(sz2->rnk));
+
+     if (sz2->rnk < 2)		/* nothing to compress */
+          return sz2;
+
+     for (i = rnk = 1; i < sz2->rnk; ++i)
+          if (!strides_contig(sz2->dims + i - 1, sz2->dims + i))
+               ++rnk;
+
+     x = X(mktensor)(rnk);
+     x->dims[0] = sz2->dims[0];
+     for (i = rnk = 1; i < sz2->rnk; ++i) {
+          if (strides_contig(sz2->dims + i - 1, sz2->dims + i)) {
+               x->dims[rnk - 1].n *= sz2->dims[i].n;
+               x->dims[rnk - 1].is = sz2->dims[i].is;
+               x->dims[rnk - 1].os = sz2->dims[i].os;
+          } else {
+               A(rnk < x->rnk);
+               x->dims[rnk++] = sz2->dims[i];
+          }
+     }
+
+     X(tensor_destroy)(sz2);
+     return x;
+}
+
+/* The inverse of X(tensor_append): splits the sz tensor into
+   tensor a followed by tensor b, where a's rank is arnk. */
+void X(tensor_split)(const tensor *sz, tensor **a, int arnk, tensor **b)
+{
+     A(FINITE_RNK(sz->rnk) && FINITE_RNK(arnk));
+
+     *a = X(tensor_copy_sub)(sz, 0, arnk);
+     *b = X(tensor_copy_sub)(sz, arnk, sz->rnk - arnk);
+}
diff --git a/src/fftw3/kernel/tensor8.c b/src/fftw3/kernel/tensor8.c
new file mode 100644
index 0000000..05e9b47
--- /dev/null
+++ b/src/fftw3/kernel/tensor8.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor8.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+void X(tensor_destroy2)(tensor *a, tensor *b)
+{
+     X(tensor_destroy)(a);
+     X(tensor_destroy)(b);
+}
+
+void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d)
+{
+     X(tensor_destroy2)(a, b);
+     X(tensor_destroy2)(c, d);
+}
diff --git a/src/fftw3/kernel/tensor9.c b/src/fftw3/kernel/tensor9.c
new file mode 100644
index 0000000..33ddf45
--- /dev/null
+++ b/src/fftw3/kernel/tensor9.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: tensor9.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+int X(tensor_kosherp)(const tensor *x)
+{
+     int i;
+
+     if (x->rnk < 0) return 0;
+
+     if (FINITE_RNK(x->rnk)) {
+	  for (i = 0; i < x->rnk; ++i)
+	       if (x->dims[i].n < 0)
+		    return 0;
+     }
+     return 1;
+}
diff --git a/src/fftw3/kernel/timer.c b/src/fftw3/kernel/timer.c
new file mode 100644
index 0000000..72969ad
--- /dev/null
+++ b/src/fftw3/kernel/timer.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: timer.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "ifftw.h"
+
+#ifdef HAVE_UNISTD_H
+#  include <unistd.h>
+#endif
+
+#ifndef WITH_SLOW_TIMER
+#  include "cycle.h"
+#else
+#  if TIME_WITH_SYS_TIME
+#   include <sys/time.h>
+#   include <time.h>
+#  else
+#   if HAVE_SYS_TIME_H
+#    include <sys/time.h>
+#   else
+#    include <time.h>
+#   endif
+#  endif
+#endif
+
+#ifndef FFTW_TIME_LIMIT
+#define FFTW_TIME_LIMIT 2.0  /* don't run for more than two seconds */
+#endif
+
+#ifdef HAVE_BSDGETTIMEOFDAY
+#ifndef HAVE_GETTIMEOFDAY
+#define gettimeofday BSDgettimeofday
+#define HAVE_GETTIMEOFDAY 1
+#endif
+#endif
+
+#if defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_SECONDS_TIMER)
+typedef struct timeval seconds;
+
+static seconds getseconds(void)
+{
+     struct timeval tv;
+     gettimeofday(&tv, 0);
+     return tv;
+}
+
+static double elapsed_sec(seconds t1, seconds t0)
+{
+     return (double)(t1.tv_sec - t0.tv_sec) +
+	  (double)(t1.tv_usec - t0.tv_usec) * 1.0E-6;
+}
+
+#  define TIME_MIN_SEC 1.0e-2 /* from fftw2 */
+#  define HAVE_SECONDS_TIMER
+#endif
+
+#ifndef HAVE_SECONDS_TIMER
+#  include <time.h>
+
+typedef clock_t seconds;
+
+static seconds getseconds(void) { return clock(); }
+
+static double elapsed_sec(seconds t1, seconds t0)
+{
+     return ((double) (t1 - t0)) / CLOCKS_PER_SEC;
+}
+
+#  define TIME_MIN_SEC 2.0e-1 /* from fftw2 */
+#  define HAVE_SECONDS_TIMER
+#endif
+
+#ifdef WITH_SLOW_TIMER
+/* excruciatingly slow; only use this if there is no choice! */
+typedef seconds ticks;
+#  define getticks getseconds
+#  define elapsed elapsed_sec
+#  define TIME_MIN TIME_MIN_SEC
+#  define TIME_REPEAT 4 /* from fftw2 */
+#  define HAVE_TICK_COUNTER
+#endif
+
+#ifdef HAVE_TICK_COUNTER
+
+#  ifndef TIME_MIN
+#    define TIME_MIN 100.0
+#  endif
+
+#  ifndef TIME_REPEAT
+#    define TIME_REPEAT 8
+#  endif
+
+  static double measure(plan *pln, const problem *p, int iter)
+  {
+       ticks t0, t1;
+       int i;
+
+       t0 = getticks();
+       for (i = 0; i < iter; ++i) 
+	    pln->adt->solve(pln, p);
+       t1 = getticks();
+       return elapsed(t1, t0);
+  }
+
+
+  double X(measure_execution_time)(plan *pln, const problem *p)
+  {
+       seconds begin, now;
+       double t, tmax, tmin;
+       int iter;
+       int repeat;
+
+       AWAKE(pln, 1);
+       p->adt->zero(p);
+
+  start_over:
+       for (iter = 1; iter; iter *= 2) {
+	    tmin = 1.0E10;
+	    tmax = -1.0E10;
+
+	    begin = getseconds();
+	    /* repeat the measurement TIME_REPEAT times */
+	    for (repeat = 0; repeat < TIME_REPEAT; ++repeat) {
+		 t = measure(pln, p, iter);
+
+		 if (t < 0)
+		      goto start_over;
+
+		 if (t < tmin)
+		      tmin = t;
+		 if (t > tmax)
+		      tmax = t;
+
+		 /* do not run for too long */
+		 now = getseconds();
+		 t = elapsed_sec(now, begin);
+
+		 if (t > FFTW_TIME_LIMIT)
+		      break;
+	    }
+
+	    if (tmin >= TIME_MIN) {
+		 tmin /= (double) iter;
+		 tmax /= (double) iter;
+		 AWAKE(pln, 0);
+		 return tmin;
+	    }
+       }
+       goto start_over; /* may happen if timer is screwed up */
+  }
+
+#else /* no cycle counter */
+
+  double X(measure_execution_time)(plan *pln, const problem *p)
+  {
+       UNUSED(p);
+       UNUSED(pln);
+       return -1.0;
+  }
+
+#endif
diff --git a/src/fftw3/kernel/transpose.c b/src/fftw3/kernel/transpose.c
new file mode 100644
index 0000000..fb489bd
--- /dev/null
+++ b/src/fftw3/kernel/transpose.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* transposes of unit-stride arrays, including arrays of N-tuples and
+   non-square matrices, using cache-oblivious recursive algorithms */
+
+#include "ifftw.h"
+#include <string.h> /* memcpy */
+
+#define CUTOFF 8 /* size below which we do a naive transpose */
+
+/*************************************************************************/
+/* some utilities for the solvers */
+
+static int Ntuple_transposable(const iodim *a, const iodim *b,
+			       int vl, int s, R *ri, R *ii)
+{
+     return(2 == s && (ii == ri + 1 || ri == ii + 1)
+	    &&
+	    ((a->is == b->os && a->is == (vl*2)
+	      && a->os == b->n * (vl*2) && b->is == a->n * (vl*2))
+	     ||
+	     (a->os == b->is && a->os == (vl*2)
+	      && a->is == b->n * (vl*2) && b->os == a->n * (vl*2))));
+}
+
+
+/* our solvers' transpose routines work for square matrices of arbitrary
+   stride, or for non-square matrices of a given vl*vl2 corresponding
+   to the N of the Ntuple with vl2 == s. */
+int X(transposable)(const iodim *a, const iodim *b,
+		    int vl, int s, R *ri, R *ii)
+{
+     return ((a->n == b->n && a->os == b->is && a->is == b->os)
+	     || Ntuple_transposable(a, b, vl, s, ri, ii));
+}
+
+static int gcd(int a, int b)
+{
+     int r;
+     do {
+	  r = a % b;
+	  a = b;
+	  b = r;
+     } while (r != 0);
+     
+     return a;
+}
+
+/* all of the solvers need to extract n, m, d, n/d, and m/d from the
+   two iodims, so we put it here to save code space */
+void X(transpose_dims)(const iodim *a, const iodim *b,
+		       int *n, int *m, int *d, int *nd, int *md)
+{
+     int n0, m0, d0;
+     /* matrix should be n x m, row-major */
+     if (a->is < b->is) {
+	  *n = n0 = b->n;
+	  *m = m0 = a->n;
+     }
+     else {
+	  *n = n0 = a->n;
+	  *m = m0 = b->n;
+     }
+     *d = d0 = gcd(n0, m0);
+     *nd = n0 / d0;
+     *md = m0 / d0;
+}
+
+/* use the simple square transpose in the solver for square matrices
+   that aren't too big or which have the wrong stride */
+int X(transpose_simplep)(const iodim *a, const iodim *b, int vl, int s,
+			 R *ri, R *ii)
+{
+     return (a->n == b->n &&
+	     (a->n*(vl*2) < CUTOFF 
+	      ||  !Ntuple_transposable(a, b, vl, s, ri, ii)));
+}
+
+/* use the slow general transpose if the buffer would be more than 1/8
+   the whole transpose and the transpose is fairly big.
+   (FIXME: use the CONSERVE_MEMORY flag?) */
+int X(transpose_slowp)(const iodim *a, const iodim *b, int N)
+{
+     int d = gcd(a->n, b->n);
+     return (d < 8 && (a->n * b->n * N) / d > 65536);
+}
+
+/*************************************************************************/
+/* Out-of-place transposes: */
+
+/* Transpose A (n x m) to B (m x n), where A and B are stored
+   as n x fda and m x fda arrays, respectively, operating on N-tuples: */
+static void rec_transpose_Ntuple(R *A, R *B, int n, int m, int fda, int fdb,
+			  int N)
+{
+     if (n == 1 || m == 1 || (n + m) * N < CUTOFF*2) {
+	  int i, j, k;
+	  for (i = 0; i < n; ++i) {
+	       for (j = 0; j < m; ++j) {
+		    for (k = 0; k < N; ++k) { /* FIXME: unroll */
+			 B[(j*fdb + i) * N + k] = A[(i*fda + j) * N + k];
+		    }
+	       }
+	  }
+     }
+     else if (n > m) {
+	  int n2 = n / 2;
+	  rec_transpose_Ntuple(A, B, n2, m, fda, fdb, N);
+	  rec_transpose_Ntuple(A + n2*N*fda, B + n2*N, n - n2, m, fda, fdb, N);
+     }
+     else {
+	  int m2 = m / 2;
+	  rec_transpose_Ntuple(A, B, n, m2, fda, fdb, N);
+	  rec_transpose_Ntuple(A + m2*N, B + m2*N*fdb, n, m - m2, fda, fdb, N);
+     }
+}
+
+/*************************************************************************/
+/* In-place transposes of square matrices of N-tuples: */
+
+/* Transpose both A and B, where A is n x m and B is m x n, storing
+   the transpose of A in B and the transpose of B in A.  A and B
+   are actually stored as n x fda and m x fda arrays. */
+static void rec_transpose_swap_Ntuple(R *A, R *B, int n, int m, int fda, int N)
+{
+     if (n == 1 || m == 1 || (n + m) * N <= CUTOFF*2) {
+	  switch (N) {
+	      case 1: {
+		   int i, j;
+		   for (i = 0; i < n; ++i) {
+			for (j = 0; j < m; ++j) {
+			     R a = A[(i*fda + j)];
+			     A[(i*fda + j)] = B[(j*fda + i)];
+			     B[(j*fda + i)] = a;
+			}
+		   }
+		   break;
+	      }
+	      case 2: {
+		   int i, j;
+		   for (i = 0; i < n; ++i) {
+			for (j = 0; j < m; ++j) {
+			     R a0 = A[(i*fda + j) * 2 + 0];
+			     R a1 = A[(i*fda + j) * 2 + 1];
+			     A[(i*fda + j) * 2 + 0] = B[(j*fda + i) * 2 + 0];
+			     A[(i*fda + j) * 2 + 1] = B[(j*fda + i) * 2 + 1];
+			     B[(j*fda + i) * 2 + 0] = a0;
+			     B[(j*fda + i) * 2 + 1] = a1;
+			}
+		   }
+		   break;
+	      }
+	      default: {
+		   int i, j, k;
+		   for (i = 0; i < n; ++i) {
+			for (j = 0; j < m; ++j) {
+			     for (k = 0; k < N; ++k) {
+				  R a = A[(i*fda + j) * N + k];
+				  A[(i*fda + j) * N + k] = 
+				       B[(j*fda + i) * N + k];
+				  B[(j*fda + i) * N + k] = a;
+			     }
+			}
+		   }
+	      }
+	  }
+     } else if (n > m) {
+	  int n2 = n / 2;
+	  rec_transpose_swap_Ntuple(A, B, n2, m, fda, N);
+	  rec_transpose_swap_Ntuple(A + n2*N*fda, B + n2*N, n - n2, m, fda, N);
+     }
+     else {
+	  int m2 = m / 2;
+	  rec_transpose_swap_Ntuple(A, B, n, m2, fda, N);
+	  rec_transpose_swap_Ntuple(A + m2*N, B + m2*N*fda, n, m - m2, fda, N);
+     }
+}
+
+/* Transpose A, an n x n matrix (stored as n x fda), in-place. */
+static void rec_transpose_sq_ip_Ntuple(R *A, int n, int fda, int N)
+{
+     if (n == 1)
+	  return;
+     else if (n*N <= CUTOFF) {
+	  switch (N) {
+	      case 1: {
+		   int i, j;
+		   for (i = 0; i < n; ++i) {
+			for (j = i + 1; j < n; ++j) {
+			     R a = A[(i*fda + j)];
+			     A[(i*fda + j)] = A[(j*fda + i)];
+			     A[(j*fda + i)] = a;
+			}
+		   }
+		   break;
+	      }
+	      case 2: {
+		   int i, j;
+		   for (i = 0; i < n; ++i) {
+			for (j = i + 1; j < n; ++j) {
+			     R a0 = A[(i*fda + j) * 2 + 0];
+			     R a1 = A[(i*fda + j) * 2 + 1];
+			     A[(i*fda + j) * 2 + 0] = A[(j*fda + i) * 2 + 0];
+			     A[(i*fda + j) * 2 + 1] = A[(j*fda + i) * 2 + 1];
+			     A[(j*fda + i) * 2 + 0] = a0;
+			     A[(j*fda + i) * 2 + 1] = a1;
+			}
+		   }
+		   break;
+	      }
+	      default: {
+		   int i, j, k;
+		   for (i = 0; i < n; ++i) {
+			for (j = i + 1; j < n; ++j) {
+			     for (k = 0; k < N; ++k) {
+				  R a = A[(i*fda + j) * N + k];
+				  A[(i*fda + j) * N + k] = 
+				       A[(j*fda + i) * N + k];
+				  A[(j*fda + i) * N + k] = a;
+			     }
+			}
+		   }
+	      }
+	  }
+     } else {
+	  int n2 = n / 2;
+	  rec_transpose_sq_ip_Ntuple(A, n2, fda, N);
+	  rec_transpose_sq_ip_Ntuple((A + n2*N) + n2*N*fda, n - n2, fda, N);
+	  rec_transpose_swap_Ntuple(A + n2*N, A + n2*N*fda, n2, n - n2, fda,N);
+     }
+}
+
+/*************************************************************************/
+/* In-place transposes of non-square matrices: */
+
+/* Transpose the matrix A in-place, where A is an (n*d) x (m*d) matrix
+   of N-tuples and buf contains at least n*m*d*N elements.  In
+   general, to transpose a p x q matrix, you should call this routine
+   with d = gcd(p, q), n = p/d, and m = q/d. */
+void X(transpose)(R *A, int n, int m, int d, int N, R *buf)
+{
+     A(n > 0 && m > 0 && N > 0 && d > 0);
+     if (d == 1) {
+	  rec_transpose_Ntuple(A, buf, n,m, m,n, N);
+	  memcpy(A, buf, m*n*N*sizeof(R));
+     }
+     else if (n*m == 1) {
+	  rec_transpose_sq_ip_Ntuple(A, d, d, N);
+     }
+     else {
+	  int i, num_el = n*m*d*N;
+
+	  /* treat as (d x n) x (d' x m) matrix.  (d' = d) */
+
+	  /* First, transpose d x (n x d') x m to d x (d' x n) x m,
+	     using the buf matrix.  This consists of d transposes
+	     of contiguous n x d' matrices of m-tuples. */
+	  if (n > 1) {
+	       for (i = 0; i < d; ++i) {
+		    rec_transpose_Ntuple(A + i*num_el, buf,
+					 n,d, d,n, m*N);
+		    memcpy(A + i*num_el, buf, num_el*sizeof(R));
+	       }
+	  }
+	  
+	  /* Now, transpose (d x d') x (n x m) to (d' x d) x (n x m), which
+	     is a square in-place transpose of n*m-tuples: */
+	  rec_transpose_sq_ip_Ntuple(A, d, d, n*m*N);
+
+	  /* Finally, transpose d' x ((d x n) x m) to d' x (m x (d x n)),
+	     using the buf matrix.  This consists of d' transposes
+	     of contiguous d*n x m matrices. */
+	  if (m > 1) {
+	       for (i = 0; i < d; ++i) {
+		    rec_transpose_Ntuple(A + i*num_el, buf,
+					 d*n,m, m,d*n, N);
+		    memcpy(A + i*num_el, buf, num_el*sizeof(R));
+	       }
+	  }
+     }
+}
+
+/*************************************************************************/
+/* In-place transpose routine from TOMS.  This routine is much slower
+   than the cache-oblivious algorithm above, but is has the advantage
+   of requiring less buffer space for the case of gcd(nx,ny) small. */
+
+/*
+ * TOMS Transpose.  Revised version of algorithm 380.
+ * 
+ * These routines do in-place transposes of arrays.
+ * 
+ * [ Cate, E.G. and Twigg, D.W., ACM Transactions on Mathematical Software, 
+ *   vol. 3, no. 1, 104-110 (1977) ]
+ * 
+ * C version by Steven G. Johnson. February 1997.
+ */
+
+/*
+ * "a" is a 1D array of length ny*nx*N which constains the nx x ny
+ * matrix of N-tuples to be transposed.  "a" is stored in row-major
+ * order (last index varies fastest).  move is a 1D array of length
+ * move_size used to store information to speed up the process.  The
+ * value move_size=(ny+nx)/2 is recommended.  buf should be an array
+ * of length 2*N.
+ * 
+ */
+
+void X(transpose_slow)(R *a, int nx, int ny, int N,
+		       char *move, int move_size, R *buf)
+{
+     int i, j, im, mn;
+     R *b, *c, *d;
+     int ncount;
+     int k;
+     
+     /* check arguments and initialize: */
+     A(ny > 0 && nx > 0 && N > 0 && move_size > 0);
+     
+     b = buf;
+     
+     if (ny == nx) {
+	  /*
+	   * if matrix is square, exchange elements a(i,j) and a(j,i):
+	   */
+	  for (i = 0; i < nx; ++i)
+	       for (j = i + 1; j < nx; ++j) {
+		    memcpy(b, &a[N * (i + j * nx)], N * sizeof(R));
+		    memcpy(&a[N * (i + j * nx)], &a[N * (j + i * nx)], N * sizeof(R));
+		    memcpy(&a[N * (j + i * nx)], b, N * sizeof(R));
+	       }
+	  return;
+     }
+     c = buf + N;
+     ncount = 2;		/* always at least 2 fixed points */
+     k = (mn = ny * nx) - 1;
+     
+     for (i = 0; i < move_size; ++i)
+	  move[i] = 0;
+     
+     if (ny >= 3 && nx >= 3)
+	  ncount += gcd(ny - 1, nx - 1) - 1;	/* # fixed points */
+     
+     i = 1;
+     im = ny;
+     
+     while (1) {
+	  int i1, i2, i1c, i2c;
+	  int kmi;
+	  
+	  /** Rearrange the elements of a loop
+	      and its companion loop: **/
+	  
+	  i1 = i;
+	  kmi = k - i;
+	  memcpy(b, &a[N * i1], N * sizeof(R));
+	  i1c = kmi;
+	  memcpy(c, &a[N * i1c], N * sizeof(R));
+	  
+	  while (1) {
+	       i2 = ny * i1 - k * (i1 / nx);
+	       i2c = k - i2;
+	       if (i1 < move_size)
+		    move[i1] = 1;
+	       if (i1c < move_size)
+		    move[i1c] = 1;
+	       ncount += 2;
+	       if (i2 == i)
+		    break;
+	       if (i2 == kmi) {
+		    d = b;
+		    b = c;
+		    c = d;
+		    break;
+	       }
+	       memcpy(&a[N * i1], &a[N * i2], 
+		      N * sizeof(R));
+	       memcpy(&a[N * i1c], &a[N * i2c], 
+		      N * sizeof(R));
+	       i1 = i2;
+	       i1c = i2c;
+	  }
+	  memcpy(&a[N * i1], b, N * sizeof(R));
+	  memcpy(&a[N * i1c], c, N * sizeof(R));
+	  
+	  if (ncount >= mn)
+	       break;	/* we've moved all elements */
+	  
+	  /** Search for loops to rearrange: **/
+	  
+	  while (1) {
+	       int max = k - i;
+	       ++i;
+	       A(i <= max);
+	       im += ny;
+	       if (im > k)
+		    im -= k;
+	       i2 = im;
+	       if (i == i2)
+		    continue;
+	       if (i >= move_size) {
+		    while (i2 > i && i2 < max) {
+			 i1 = i2;
+			 i2 = ny * i1 - k * (i1 / nx);
+		    }
+		    if (i2 == i)
+			 break;
+	       } else if (!move[i])
+		    break;
+	  }
+     }
+}
diff --git a/src/fftw3/kernel/trig.c b/src/fftw3/kernel/trig.c
new file mode 100644
index 0000000..0d4c3b2
--- /dev/null
+++ b/src/fftw3/kernel/trig.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: trig.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* trigonometric functions */
+#include "ifftw.h"
+#include <math.h>
+
+trigreal X(cos2pi)(int m, int n)
+{
+     return X(sincos)((trigreal)m, (trigreal)n, 0);
+}
+
+trigreal X(sin2pi)(int m, int n)
+{
+     return X(sincos)((trigreal)m, (trigreal)n, 1);
+}
+
+trigreal X(tan2pi)(int m, int n)
+{
+#if 0      /* unimplemented, unused */
+     trigreal dm = m, dn = n;
+     return TAN(by2pi(dm, dn));
+#endif
+     UNUSED(m); UNUSED(n);
+     return 0.0;
+}
diff --git a/src/fftw3/kernel/trig1.c b/src/fftw3/kernel/trig1.c
new file mode 100644
index 0000000..b86362c
--- /dev/null
+++ b/src/fftw3/kernel/trig1.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: trig1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* trigonometric functions */
+#include "ifftw.h"
+#include <math.h>
+
+#ifdef FFTW_LDOUBLE
+#  define COS cosl
+#  define SIN sinl
+#  define TAN tanl
+#  define KTRIG(x) (x##L)
+#else
+#  define COS cos
+#  define SIN sin
+#  define TAN tan
+#  define KTRIG(x) (x)
+#endif
+
+static const trigreal K2PI =
+    KTRIG(6.2831853071795864769252867665590057683943388);
+#define by2pi(m, n) ((K2PI * (m)) / (n))
+
+/*
+ * Improve accuracy by reducing x to range [0..1/8]
+ * before multiplication by 2 * PI.
+ */
+
+trigreal X(sincos)(trigreal m, trigreal n, int sinp)
+{
+     /* waiting for C to get tail recursion... */
+     trigreal half_n = n * KTRIG(0.5);
+     trigreal quarter_n = half_n * KTRIG(0.5);
+     trigreal eighth_n = quarter_n * KTRIG(0.5);
+     trigreal sgn = KTRIG(1.0);
+
+     if (sinp) goto sin;
+ cos:
+     if (m < 0) { m = -m; /* goto cos; */ }
+     if (m > half_n) { m = n - m; goto cos; }
+     if (m > eighth_n) { m = quarter_n - m; goto sin; }
+     return sgn * COS(by2pi(m, n));
+
+ msin:
+     sgn = -sgn;
+ sin:
+     if (m < 0) { m = -m; goto msin; }
+     if (m > half_n) { m = n - m; goto msin; }
+     if (m > eighth_n) { m = quarter_n - m; goto cos; }
+     return sgn * SIN(by2pi(m, n));
+}
diff --git a/src/fftw3/kernel/twiddle.c b/src/fftw3/kernel/twiddle.c
new file mode 100644
index 0000000..785bdd7
--- /dev/null
+++ b/src/fftw3/kernel/twiddle.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: twiddle.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* Twiddle manipulation */
+
+#include "ifftw.h"
+#include <math.h>
+
+/* table of known twiddle factors */
+static twid *twlist = (twid *) 0;
+
+static int equal_instr(const tw_instr *p, const tw_instr *q)
+{
+     if (p == q)
+          return 1;
+
+     for (;; ++p, ++q) {
+          if (p->op != q->op || p->v != q->v || p->i != q->i)
+               return 0;
+          if (p->op == TW_NEXT)  /* == q->op */
+               return 1;
+     }
+     A(0 /* can't happen */);
+}
+
+static int ok_twid(const twid *t, const tw_instr *q, int n, int r, int m)
+{
+     return (n == t->n && r == t->r && m <= t->m && equal_instr(t->instr, q));
+}
+
+static twid *lookup(const tw_instr *q, int n, int r, int m)
+{
+     twid *p;
+
+     for (p = twlist; p && !ok_twid(p, q, n, r, m); p = p->cdr)
+          ;
+     return p;
+}
+
+static int twlen0(int r, const tw_instr **pp)
+{
+     int ntwiddle = 0;
+     const tw_instr *p = *pp;
+
+     /* compute length of bytecode program */
+     A(r > 0);
+     for ( ; p->op != TW_NEXT; ++p) {
+	  switch (p->op) {
+	      case TW_FULL:
+		   ntwiddle += (r - 1) * 2;
+		   break;
+	      case TW_GENERIC:
+		   ntwiddle += r * 2;
+		   break;
+	      default:
+		   ++ntwiddle;
+	  }
+     }
+
+     *pp = p;
+     return ntwiddle;
+}
+
+int X(twiddle_length)(int r, const tw_instr *p)
+{
+     return twlen0(r, &p);
+}
+
+static R *compute(const tw_instr *instr, int n, int r, int m)
+{
+     int ntwiddle, j;
+     R *W, *W0;
+     const tw_instr *p;
+
+     static trigreal (*const f[])(int, int) = { 
+	  X(cos2pi), X(sin2pi), X(tan2pi) 
+     };
+
+     p = instr;
+     ntwiddle = twlen0(r, &p);
+
+     W0 = W = (R *)MALLOC(ntwiddle * (m / p->v) * sizeof(R), TWIDDLES);
+
+     for (j = 0; j < m; j += p->v) {
+          for (p = instr; p->op != TW_NEXT; ++p) {
+	       switch (p->op) {
+		   case TW_FULL:
+		   {
+			int i;
+			A((int)p->i == r); /* consistency check */
+			for (i = 1; i < r; ++i) {
+			     *W++ = f[TW_COS]((j + p->v) * i, n);
+			     *W++ = f[TW_SIN]((j + p->v) * i, n);
+			}
+			break;
+		   }
+
+		   case TW_GENERIC:
+		   {
+			int i;
+			A(p->v == 0); /* unused */
+			A(p->i == 0); /* unused */
+			for (i = 0; i < r; ++i) {
+			     int k = j * r + i;
+			     *W++ = f[TW_COS](k, n);
+			     *W++ = FFT_SIGN * f[TW_SIN](k, n);
+			}
+			break;
+		   }
+		   
+		   default:
+			*W++ = f[p->op](((signed int)(j + p->v)) * p->i, n);
+			break;
+	       }
+	  }
+          A(m % p->v == 0);
+     }
+
+     return W0;
+}
+
+void X(mktwiddle)(twid **pp, const tw_instr *instr, int n, int r, int m)
+{
+     twid *p;
+
+     if (*pp) return;  /* already created */
+
+     if ((p = lookup(instr, n, r, m))) {
+          ++p->refcnt;
+	  goto done;
+     }
+
+     p = (twid *) MALLOC(sizeof(twid), TWIDDLES);
+     p->n = n;
+     p->r = r;
+     p->m = m;
+     p->instr = instr;
+     p->refcnt = 1;
+     p->W = compute(instr, n, r, m);
+
+     /* cons! onto twlist */
+     p->cdr = twlist;
+     twlist = p;
+
+ done:
+     *pp = p;
+     return;
+}
+
+void X(twiddle_destroy)(twid **pp)
+{
+     twid *p = *pp;
+     if (p) {
+          twid **q;
+          if ((--p->refcnt) == 0) {
+               /* remove p from twiddle list */
+               for (q = &twlist; *q; q = &((*q)->cdr)) {
+                    if (*q == p) {
+                         *q = p->cdr;
+                         X(ifree)(p->W);
+                         X(ifree)(p);
+			 goto done;
+                    }
+               }
+               A(0 /* can't happen */ );
+          }
+     }
+ done:
+     *pp = 0; /* destroy pointer */
+     return;
+}
+
+
+void X(twiddle_awake)(int flg, twid **pp, 
+		      const tw_instr *instr, int n, int r, int m)
+{
+     if (flg) 
+	  X(mktwiddle)(pp, instr, n, r, m);
+     else 
+	  X(twiddle_destroy)(pp);
+}
diff --git a/src/fftw3/rdft/buffered2.c b/src/fftw3/rdft/buffered2.c
new file mode 100644
index 0000000..54fc7bc
--- /dev/null
+++ b/src/fftw3/rdft/buffered2.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: buffered2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+typedef struct {
+     int nbuf;
+     int maxbufsz;
+     int skew_alignment;
+     int skew;
+     const char *nam;
+} bufadt;
+
+typedef struct {
+     solver super;
+     const bufadt *adt;
+} S;
+
+typedef struct {
+     plan_rdft2 super;
+
+     plan *cld, *cldrest;
+     int n, vl, nbuf, bufdist;
+     int os, ivs, ovs;
+
+     const S *slv;
+} P;
+
+/***************************************************************************/
+
+/* FIXME: have alternate copy functions that push a vector loop inside
+   the n loops? */
+
+/* copy halfcomplex array r (contiguous) to complex (strided) array rio/iio. */
+static void hc2c(int n, R *r, R *rio, R *iio, int os)
+{
+     int n2 = (n + 1) / 2;
+     int i;
+
+     rio[0] = r[0];
+     iio[0] = 0;
+     for (i = 1; i < ((n2 - 1) & 3) + 1; ++i) {
+	  rio[i * os] = r[i];
+	  iio[i * os] = r[n - i];
+     }
+     for (; i < n2; i += 4) {
+	  R r0, r1, r2, r3;
+	  R i0, i1, i2, i3;
+	  r0 = r[i];
+	  r1 = r[i + 1];
+	  r2 = r[i + 2];
+	  r3 = r[i + 3];
+	  i3 = r[n - (i + 3)];
+	  i2 = r[n - (i + 2)];
+	  i1 = r[n - (i + 1)];
+	  i0 = r[n - i];
+	  rio[i * os] = r0;
+	  iio[i * os] = i0;
+	  rio[(i + 1) * os] = r1;
+	  iio[(i + 1) * os] = i1;
+	  rio[(i + 2) * os] = r2;
+	  iio[(i + 2) * os] = i2;
+	  rio[(i + 3) * os] = r3;
+	  iio[(i + 3) * os] = i3;
+     }
+     if ((n & 1) == 0) {	/* store the Nyquist frequency */
+	  rio[n2 * os] = r[n2];
+	  iio[n2 * os] = 0.0;
+     }
+}
+
+/* reverse of hc2c */
+static void c2hc(int n, R *rio, R *iio, int is, R *r)
+{
+     int n2 = (n + 1) / 2;
+     int i;
+
+     r[0] = rio[0];
+     for (i = 1; i < ((n2 - 1) & 3) + 1; ++i) {
+	  r[i] = rio[i * is];
+	  r[n - i] = iio[i * is];
+     }
+     for (; i < n2; i += 4) {
+	  R r0, r1, r2, r3;
+	  R i0, i1, i2, i3;
+	  r0 = rio[i * is];
+	  i0 = iio[i * is];
+	  r1 = rio[(i + 1) * is];
+	  i1 = iio[(i + 1) * is];
+	  r2 = rio[(i + 2) * is];
+	  i2 = iio[(i + 2) * is];
+	  r3 = rio[(i + 3) * is];
+	  i3 = iio[(i + 3) * is];
+	  r[i] = r0;
+	  r[i + 1] = r1;
+	  r[i + 2] = r2;
+	  r[i + 3] = r3;
+	  r[n - (i + 3)] = i3;
+	  r[n - (i + 2)] = i2;
+	  r[n - (i + 1)] = i1;
+	  r[n - i] = i0;
+     }
+     if ((n & 1) == 0)		/* store the Nyquist frequency */
+	  r[n2] = rio[n2 * is];
+}
+
+/***************************************************************************/
+
+static void apply_r2hc(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     plan_rdft *cld = (plan_rdft *) ego->cld;
+     int i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist;
+     int n = ego->n;
+     int ivs = ego->ivs, ovs = ego->ovs, os = ego->os;
+     R *bufs;
+
+     bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
+
+     for (i = nbuf; i <= vl; i += nbuf) {
+          /* transform to bufs: */
+          cld->apply((plan *) cld, r, bufs);
+	  r += ivs;
+
+          /* copy back */
+	  for (j = 0; j < nbuf; ++j, rio += ovs, iio += ovs)
+	       hc2c(n, bufs + j*bufdist, rio, iio, os);
+     }
+
+     /* Do the remaining transforms, if any: */
+     {
+	  plan_rdft *cldrest = (plan_rdft *) ego->cldrest;
+	  R *b = bufs;
+	  cldrest->apply((plan *) cldrest, r, bufs);
+	  for (i -= nbuf; i < vl; ++i, rio += ovs, iio += ovs, b += bufdist)
+	       hc2c(n, b, rio, iio, os);
+     }
+
+     X(ifree)(bufs);
+}
+
+static void apply_hc2r(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     plan_rdft *cld = (plan_rdft *) ego->cld;
+     int i, j, vl = ego->vl, nbuf = ego->nbuf, bufdist = ego->bufdist;
+     int n = ego->n;
+     int ivs = ego->ivs, ovs = ego->ovs, is = ego->os;
+     R *bufs;
+
+     bufs = (R *)MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
+
+     for (i = nbuf; i <= vl; i += nbuf) {
+          /* copy to bufs */
+	  for (j = 0; j < nbuf; ++j, rio += ivs, iio += ivs)
+	       c2hc(n, rio, iio, is, bufs + j*bufdist);
+
+          /* transform back: */
+          cld->apply((plan *) cld, bufs, r);
+	  r += ovs;
+     }
+
+     /* Do the remaining transforms, if any: */
+     {
+	  plan_rdft *cldrest;
+	  R *b = bufs;
+	  for (i -= nbuf; i < vl; ++i, rio += ivs, iio += ivs, b += bufdist)
+	       c2hc(n, rio, iio, is, b);
+	  cldrest = (plan_rdft *) ego->cldrest;
+	  cldrest->apply((plan *) cldrest, bufs, r);
+     }
+
+     X(ifree)(bufs);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+
+     AWAKE(ego->cld, flg);
+     AWAKE(ego->cldrest, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cldrest);
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s-%s-%d%v/%d-%d%(%p%)%(%p%))",
+              ego->slv->adt->nam,
+	      ego->super.apply == apply_r2hc ? "r2hc" : "hc2r",
+              ego->n, ego->nbuf,
+              ego->vl, ego->bufdist % ego->n,
+              ego->cld, ego->cldrest);
+}
+
+static int min_nbuf(const problem_rdft2 *p, int n, int vl)
+{
+     int is, os, ivs, ovs;
+
+     if (p->r != p->rio && p->r != p->iio)
+	  return 1;
+     if (X(rdft2_inplace_strides(p, RNK_MINFTY)))
+	  return 1;
+     A(p->vecsz->rnk == 1); /*  rank 0 and MINFTY are inplace */
+
+     X(rdft2_strides)(p->kind, p->sz->dims, &is, &os);
+     X(rdft2_strides)(p->kind, p->vecsz->dims, &ivs, &ovs);
+     
+     /* handle one potentially common case: "contiguous" real and
+	complex arrays, which overlap because of the differing sizes. */
+     if (n * X(iabs)(is) <= X(iabs)(ivs)
+	 && (n/2 + 1) * X(iabs)(os) <= X(iabs)(ovs)
+	 && X(iabs)((int) (p->rio - p->iio)) <= X(iabs)(os)
+	 && ivs > 0 && ovs > 0) {
+	  int vsmin = X(imin)(ivs, ovs);
+	  int vsmax = X(imax)(ivs, ovs);
+	  return(((vsmax - vsmin) * vl + vsmin - 1) / vsmin);
+     }
+
+     return vl; /* punt: just buffer the whole vector */
+}
+
+static int compute_nbuf(int n, int vl, const S *ego)
+{
+     return X(compute_nbuf)(n, vl, ego->adt->nbuf, ego->adt->maxbufsz);
+}
+
+static int toobig(int n, const S *ego)
+{
+     return (n > ego->adt->maxbufsz);
+}
+
+static int applicable0(const problem *p_, const S *ego, const planner *plnr)
+{
+     UNUSED(ego);
+     if (RDFT2P(p_)) {
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  return(p->vecsz->rnk <= 1 && p->sz->rnk == 1
+		 && !(toobig(p->sz->dims[0].n, ego) && CONSERVE_MEMORYP(plnr)));
+     }
+     return 0;
+}
+
+static int applicable(const problem *p_, const S *ego, const planner *plnr)
+{
+     const problem_rdft2 *p;
+
+     if (NO_BUFFERINGP(plnr)) return 0;
+     if (!applicable0(p_, ego, plnr)) return 0;
+
+     p = (const problem_rdft2 *) p_;
+     if (NO_UGLYP(plnr)) {
+	  if (p->r != p->rio && p->r != p->iio) return 0;
+	  if (toobig(p->sz->dims[0].n, ego)) return 0;
+     }
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const bufadt *adt = ego->adt;
+     P *pln;
+     plan *cld = (plan *) 0;
+     plan *cldrest = (plan *) 0;
+     problem *cldp = 0;
+     const problem_rdft2 *p = (const problem_rdft2 *) p_;
+     R *bufs = (R *) 0;
+     int nbuf = 0, bufdist, n, vl;
+     int ivs, ovs;
+
+     static const plan_adt padt = {
+	  X(rdft2_solve), awake, print, destroy
+     };
+
+
+     if (!applicable(p_, ego, plnr))
+          goto nada;
+
+     n = p->sz->dims[0].n;
+     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+
+     nbuf = X(imax)(compute_nbuf(n, vl, ego), min_nbuf(p, n, vl));
+     A(nbuf > 0);
+
+     /*
+      * Determine BUFDIST, the offset between successive array bufs.
+      * bufdist = n + skew, where skew is chosen such that bufdist %
+      * skew_alignment = skew.
+      */
+     if (vl == 1) {
+          bufdist = n;
+     } else {
+          bufdist =
+               n + ((adt->skew_alignment + adt->skew - n % adt->skew_alignment)
+                    % adt->skew_alignment);
+          A(p->vecsz->rnk == 1);
+     }
+
+     /* initial allocation for the purpose of planning */
+     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
+
+     if (p->kind == R2HC)
+	  cldp =
+	       X(mkproblem_rdft_d)(
+		    X(mktensor_1d)(n, p->sz->dims[0].is, 1),
+		    X(mktensor_1d)(nbuf, ivs, bufdist),
+		    TAINT(p->r, ivs * nbuf), bufs, &p->kind);
+     else {
+	  A(p->kind == HC2R);
+	  plnr->problem_flags |= DESTROY_INPUT; /* always ok to destroy buf */
+	  cldp =
+	       X(mkproblem_rdft_d)(
+		    X(mktensor_1d)(n, 1, p->sz->dims[0].os),
+		    X(mktensor_1d)(nbuf, bufdist, ovs),
+		    bufs, TAINT(p->r, ovs * nbuf), &p->kind);
+     }
+     if (!(cld = X(mkplan_d)(plnr, cldp))) goto nada;
+
+     /* plan the leftover transforms (cldrest): */
+     if (p->kind == R2HC)
+	  cldp =
+	       X(mkproblem_rdft_d)(
+		    X(mktensor_1d)(n, p->sz->dims[0].is, 1),
+		    X(mktensor_1d)(vl % nbuf, ivs, bufdist),
+		    p->r + ivs * (nbuf * (vl / nbuf)), bufs, &p->kind);
+     else /* HC2R */
+	  cldp =
+	       X(mkproblem_rdft_d)(
+		    X(mktensor_1d)(n, 1, p->sz->dims[0].os),
+		    X(mktensor_1d)(vl % nbuf, bufdist, ovs),
+			 bufs, p->r + ovs * (nbuf * (vl / nbuf)), &p->kind);
+     if (!(cldrest = X(mkplan_d)(plnr, cldp))) goto nada;
+
+     /* deallocate buffers, let apply() allocate them for real */
+     X(ifree)(bufs);
+     bufs = 0;
+
+     pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply_hc2r);
+     pln->cld = cld;
+     pln->cldrest = cldrest;
+     pln->slv = ego;
+     pln->n = n;
+     pln->vl = vl;
+     if (p->kind == R2HC) {
+	  pln->ivs = ivs * nbuf;
+	  pln->ovs = ovs;
+	  pln->os = p->sz->dims[0].os; /* stride of rio/iio  */
+     }
+     else { /* HC2R */
+	  pln->ivs = ivs;
+	  pln->ovs = ovs * nbuf;
+	  pln->os = p->sz->dims[0].is; /* stride of rio/iio  */
+     }
+
+
+     pln->nbuf = nbuf;
+     pln->bufdist = bufdist;
+
+     X(ops_madd)(vl / nbuf, &cld->ops, &cldrest->ops,
+		 &pln->super.super.ops);
+     pln->super.super.ops.other += (p->kind == R2HC ? (n + 2) : n) * vl;
+
+     return &(pln->super.super);
+
+ nada:
+     X(ifree0)(bufs);
+     X(plan_destroy_internal)(cldrest);
+     X(plan_destroy_internal)(cld);
+     return (plan *) 0;
+}
+
+static solver *mksolver(const bufadt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+void X(rdft2_buffered_register)(planner *p)
+{
+     /* FIXME: what are good defaults? */
+     static const bufadt adt = {
+	  /* nbuf */           8,
+	  /* maxbufsz */       (65536 / sizeof(R)),
+	  /* skew_alignment */ 8,
+	  /* skew */           5,
+	  /* nam */            "rdft2-buffered"
+     };
+
+     REGISTER_SOLVER(p, mksolver(&adt));
+}
diff --git a/src/fftw3/rdft/codelet-rdft.h b/src/fftw3/rdft/codelet-rdft.h
new file mode 100644
index 0000000..27d76a8
--- /dev/null
+++ b/src/fftw3/rdft/codelet-rdft.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * This header file must include every file or define every
+ * type or macro which is required to compile a codelet.
+ */
+
+#ifndef __RDFT_CODELET_H__
+#define __RDFT_CODELET_H__
+
+#include "ifftw.h"
+
+/**************************************************************
+ * types of codelets
+ **************************************************************/
+
+/* FOOab, with a,b in {0,1}, denotes the FOO transform
+   where a/b say whether the input/output are shifted by
+   half a sample/slot. */
+
+typedef enum {
+     R2HC00, R2HC01, R2HC10, R2HC11,
+     HC2R00, HC2R01, HC2R10, HC2R11,
+     DHT, 
+     REDFT00, REDFT01, REDFT10, REDFT11, /* real-even == DCT's */
+     RODFT00, RODFT01, RODFT10, RODFT11  /*  real-odd == DST's */
+} rdft_kind;
+
+/* standard R2HC/HC2R transforms are unshifted */
+#define R2HC R2HC00
+#define HC2R HC2R00
+
+#define R2HCII R2HC01
+#define HC2RIII HC2R10
+
+/* (k) >= R2HC00 produces a warning under gcc because checking x >= 0
+   is superfluous for unsigned values...but it is needed because other
+   compilers (e.g. icc) may define the enum to be a signed int...grrr. */
+#define R2HC_KINDP(k) ((k) >= R2HC00 && (k) <= R2HC11) /* uses kr2hc_genus */
+#define HC2R_KINDP(k) ((k) >= HC2R00 && (k) <= HC2R11) /* uses khc2r_genus */
+
+#define R2R_KINDP(k) ((k) >= DHT) /* uses kr2r_genus */
+
+#define REDFT_KINDP(k) ((k) >= REDFT00 && (k) <= REDFT11)
+#define RODFT_KINDP(k) ((k) >= RODFT00 && (k) <= RODFT11)
+#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11)
+
+/* real-input DFT codelets */
+typedef struct kr2hc_desc_s kr2hc_desc;
+
+typedef struct {
+     int (*okp)(
+	  const kr2hc_desc *desc,
+	  const R *I, const R *ro, const R *io,
+	  int is, int ios, int ros, int vl, int ivs, int ovs);
+     rdft_kind kind;
+     int vl;
+} kr2hc_genus;
+
+struct kr2hc_desc_s {
+     int sz;    /* size of transform computed */
+     const char *nam;
+     opcnt ops;
+     const kr2hc_genus *genus;
+     int is;
+     int ros, ios;
+     int ivs;
+     int ovs;
+};
+
+typedef void (*kr2hc) (const R *I, R *ro, R *io, stride is,
+		       stride ros, stride ios, int vl, int ivs, int ovs);
+void X(kr2hc_register)(planner *p, kr2hc codelet, const kr2hc_desc *desc);
+
+/* real-input DFT codelets, type II (middle case of hc2hc DIT) */
+typedef kr2hc_desc kr2hcII_desc;
+typedef kr2hc_genus kr2hcII_genus;
+typedef kr2hc kr2hcII;
+#define kr2hcII_register kr2hc_register
+
+/* half-complex to half-complex DIT/DIF codelets: */
+typedef struct hc2hc_desc_s hc2hc_desc;
+
+typedef struct {
+     int (*okp)(
+	  const struct hc2hc_desc_s *desc,
+	  const R *rio, const R *iio, int ios, int vs, int m, int dist);
+     rdft_kind kind;
+     int vl;
+} hc2hc_genus;
+
+struct hc2hc_desc_s {
+     int radix;
+     const char *nam;
+     const tw_instr *tw;
+     opcnt ops;
+     const hc2hc_genus *genus;
+     int s1;
+     int s2;
+     int dist;
+};
+
+typedef const R *(*khc2hc) (R *rioarray, R *iioarray, const R *W,
+                              stride ios, int m, int dist);
+void X(khc2hc_dit_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc);
+
+extern const solvtab X(solvtab_rdft_r2hc);
+
+/* real-output DFT codelets */
+typedef struct khc2r_desc_s khc2r_desc;
+
+typedef struct {
+     int (*okp)(
+	  const khc2r_desc *desc,
+	  const R *ri, const R *ii, const R *O,
+	  int ris, int iis, int os, int vl, int ivs, int ovs);
+     rdft_kind kind;
+     int vl;
+} khc2r_genus;
+
+struct khc2r_desc_s {
+     int sz;    /* size of transform computed */
+     const char *nam;
+     opcnt ops;
+     const khc2r_genus *genus;
+     int ris, iis;
+     int os;
+     int ivs;
+     int ovs;
+};
+
+typedef void (*khc2r) (const R *ri, const R *ii, R *O, stride ris,
+		       stride iis, stride os, int vl, int ivs, int ovs);
+void X(khc2r_register)(planner *p, khc2r codelet, const khc2r_desc *desc);
+
+/* real-output DFT codelets, type III (middle case of hc2hc DIF) */
+typedef khc2r_desc khc2rIII_desc;
+typedef khc2r_genus khc2rIII_genus;
+typedef khc2r khc2rIII;
+#define khc2rIII_register khc2r_register
+
+void X(khc2hc_dif_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc);
+
+extern const solvtab X(solvtab_rdft_hc2r);
+
+/* real-input & output DFT-like codelets (DHT, etc.) */
+typedef struct kr2r_desc_s kr2r_desc;
+
+typedef struct {
+     int (*okp)(
+	  const kr2r_desc *desc,
+	  const R *I, const R *O,
+	  int is, int os, int vl, int ivs, int ovs);
+     int vl;
+} kr2r_genus;
+
+struct kr2r_desc_s {
+     int sz;    /* size of transform computed */
+     const char *nam;
+     opcnt ops;
+     const kr2r_genus *genus;
+     rdft_kind kind;
+     int is, os;
+     int ivs;
+     int ovs;
+};
+
+typedef void (*kr2r) (const R *I, R *O, stride is, stride os,
+		      int vl, int ivs, int ovs);
+void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc);
+
+extern const solvtab X(solvtab_rdft_r2r);
+
+#endif				/* __RDFT_CODELET_H__ */
diff --git a/src/fftw3/rdft/codelets/hb.h b/src/fftw3/rdft/codelets/hb.h
new file mode 100644
index 0000000..bb8f528
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hb.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_hb_genus)
+extern const hc2hc_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/hc2r.c b/src/fftw3/rdft/codelets/hc2r.c
new file mode 100644
index 0000000..7daf21c
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "codelet-rdft.h"
+#include "hc2r.h"
+
+static int okp(const khc2r_desc *d,
+	       const R *ri, const R *ii,
+	       const R *O,
+	       int ris, int iis, int os, int vl, int ivs, int ovs)
+{
+     UNUSED(ri); UNUSED(ii); UNUSED(O); UNUSED(vl);
+     return (1
+	     && (!d->ris || (d->ris == ris))
+	     && (!d->iis || (d->ris == iis))
+	     && (!d->os || (d->os == os))
+	     && (!d->ivs || (d->ivs == ivs))
+	     && (!d->ovs || (d->ovs == ovs))
+	  );
+}
+
+const khc2r_genus GENUS = { okp, HC2R, 1 };
+
+#undef GENUS
+#include "hc2rIII.h"
+
+const khc2rIII_genus GENUS = { okp, HC2RIII, 1 };
diff --git a/src/fftw3/rdft/codelets/hc2r.h b/src/fftw3/rdft/codelets/hc2r.h
new file mode 100644
index 0000000..3dd17be
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_hc2r_genus)
+extern const khc2r_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_10.c b/src/fftw3/rdft/codelets/hc2r/hb_10.c
new file mode 100644
index 0000000..3d96502
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_10.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:33 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 10 -dif -name hb_10 -include hb.h */
+
+/*
+ * This function contains 102 FP additions, 60 FP multiplications,
+ * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
+ * 39 stack variables, and 40 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_10(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 18) {
+	  E T3, Tk, Tw, T1w, TO, TP, T1E, T1D, Tr, TX, Ti, T1l, TZ, T10, T1s;
+	  E T1p, T1z, T1B, TL, TS;
+	  {
+	       E T1, T2, Tu, Tv;
+	       T1 = rio[0];
+	       T2 = iio[-WS(ios, 5)];
+	       T3 = T1 + T2;
+	       Tk = T1 - T2;
+	       Tu = iio[0];
+	       Tv = rio[WS(ios, 5)];
+	       Tw = Tu + Tv;
+	       T1w = Tu - Tv;
+	  }
+	  {
+	       E T6, Tl, Tg, Tp, T9, Tm, Td, To;
+	       {
+		    E T4, T5, Te, Tf;
+		    T4 = rio[WS(ios, 2)];
+		    T5 = iio[-WS(ios, 7)];
+		    T6 = T4 + T5;
+		    Tl = T4 - T5;
+		    Te = iio[-WS(ios, 6)];
+		    Tf = rio[WS(ios, 1)];
+		    Tg = Te + Tf;
+		    Tp = Te - Tf;
+	       }
+	       {
+		    E T7, T8, Tb, Tc;
+		    T7 = iio[-WS(ios, 8)];
+		    T8 = rio[WS(ios, 3)];
+		    T9 = T7 + T8;
+		    Tm = T7 - T8;
+		    Tb = rio[WS(ios, 4)];
+		    Tc = iio[-WS(ios, 9)];
+		    Td = Tb + Tc;
+		    To = Tb - Tc;
+	       }
+	       TO = Tl - Tm;
+	       TP = To - Tp;
+	       T1E = Td - Tg;
+	       T1D = T6 - T9;
+	       {
+		    E Tn, Tq, Ta, Th;
+		    Tn = Tl + Tm;
+		    Tq = To + Tp;
+		    Tr = Tn + Tq;
+		    TX = KP559016994 * (Tn - Tq);
+		    Ta = T6 + T9;
+		    Th = Td + Tg;
+		    Ti = Ta + Th;
+		    T1l = KP559016994 * (Ta - Th);
+	       }
+	  }
+	  {
+	       E Tz, T1n, TJ, T1r, TC, T1o, TG, T1q;
+	       {
+		    E Tx, Ty, TH, TI;
+		    Tx = iio[-WS(ios, 2)];
+		    Ty = rio[WS(ios, 7)];
+		    Tz = Tx + Ty;
+		    T1n = Tx - Ty;
+		    TH = rio[WS(ios, 6)];
+		    TI = iio[-WS(ios, 1)];
+		    TJ = TH + TI;
+		    T1r = TI - TH;
+	       }
+	       {
+		    E TA, TB, TE, TF;
+		    TA = rio[WS(ios, 8)];
+		    TB = iio[-WS(ios, 3)];
+		    TC = TA + TB;
+		    T1o = TB - TA;
+		    TE = iio[-WS(ios, 4)];
+		    TF = rio[WS(ios, 9)];
+		    TG = TE + TF;
+		    T1q = TE - TF;
+	       }
+	       TZ = Tz + TC;
+	       T10 = TG + TJ;
+	       T1s = T1q - T1r;
+	       T1p = T1n - T1o;
+	       {
+		    E T1x, T1y, TD, TK;
+		    T1x = T1n + T1o;
+		    T1y = T1q + T1r;
+		    T1z = T1x + T1y;
+		    T1B = KP559016994 * (T1x - T1y);
+		    TD = Tz - TC;
+		    TK = TG - TJ;
+		    TL = TD + TK;
+		    TS = KP559016994 * (TD - TK);
+	       }
+	  }
+	  rio[0] = T3 + Ti;
+	  iio[-WS(ios, 9)] = T1w + T1z;
+	  {
+	       E Ts, TM, Tj, Tt;
+	       Ts = Tk + Tr;
+	       TM = Tw + TL;
+	       Tj = W[8];
+	       Tt = W[9];
+	       rio[WS(ios, 5)] = FNMS(Tt, TM, Tj * Ts);
+	       iio[-WS(ios, 4)] = FMA(Tt, Ts, Tj * TM);
+	  }
+	  {
+	       E T1t, T1F, T1Q, T1N, T1C, T1R, T1m, T1M, T1A, T1k;
+	       T1t = FNMS(KP951056516, T1s, KP587785252 * T1p);
+	       T1F = FNMS(KP951056516, T1E, KP587785252 * T1D);
+	       T1Q = FMA(KP951056516, T1D, KP587785252 * T1E);
+	       T1N = FMA(KP951056516, T1p, KP587785252 * T1s);
+	       T1A = FNMS(KP250000000, T1z, T1w);
+	       T1C = T1A - T1B;
+	       T1R = T1B + T1A;
+	       T1k = FNMS(KP250000000, Ti, T3);
+	       T1m = T1k - T1l;
+	       T1M = T1l + T1k;
+	       {
+		    E T1u, T1G, T1j, T1v;
+		    T1u = T1m + T1t;
+		    T1G = T1C - T1F;
+		    T1j = W[14];
+		    T1v = W[15];
+		    rio[WS(ios, 8)] = FNMS(T1v, T1G, T1j * T1u);
+		    iio[-WS(ios, 1)] = FMA(T1v, T1u, T1j * T1G);
+	       }
+	       {
+		    E T1U, T1W, T1T, T1V;
+		    T1U = T1M + T1N;
+		    T1W = T1R - T1Q;
+		    T1T = W[6];
+		    T1V = W[7];
+		    rio[WS(ios, 4)] = FNMS(T1V, T1W, T1T * T1U);
+		    iio[-WS(ios, 5)] = FMA(T1V, T1U, T1T * T1W);
+	       }
+	       {
+		    E T1I, T1K, T1H, T1J;
+		    T1I = T1m - T1t;
+		    T1K = T1F + T1C;
+		    T1H = W[2];
+		    T1J = W[3];
+		    rio[WS(ios, 2)] = FNMS(T1J, T1K, T1H * T1I);
+		    iio[-WS(ios, 7)] = FMA(T1J, T1I, T1H * T1K);
+	       }
+	       {
+		    E T1O, T1S, T1L, T1P;
+		    T1O = T1M - T1N;
+		    T1S = T1Q + T1R;
+		    T1L = W[10];
+		    T1P = W[11];
+		    rio[WS(ios, 6)] = FNMS(T1P, T1S, T1L * T1O);
+		    iio[-WS(ios, 3)] = FMA(T1P, T1O, T1L * T1S);
+	       }
+	  }
+	  {
+	       E TQ, T11, T1c, T19, TY, T18, TT, T1d, TW, TR;
+	       TQ = FNMS(KP951056516, TP, KP587785252 * TO);
+	       T11 = FNMS(KP951056516, T10, KP587785252 * TZ);
+	       T1c = FMA(KP951056516, TO, KP587785252 * TP);
+	       T19 = FMA(KP951056516, TZ, KP587785252 * T10);
+	       TW = FNMS(KP250000000, Tr, Tk);
+	       TY = TW - TX;
+	       T18 = TX + TW;
+	       TR = FNMS(KP250000000, TL, Tw);
+	       TT = TR - TS;
+	       T1d = TS + TR;
+	       {
+		    E TU, T12, TN, TV;
+		    TU = TQ + TT;
+		    T12 = TY - T11;
+		    TN = W[12];
+		    TV = W[13];
+		    iio[-WS(ios, 2)] = FMA(TN, TU, TV * T12);
+		    rio[WS(ios, 7)] = FNMS(TV, TU, TN * T12);
+	       }
+	       {
+		    E T1g, T1i, T1f, T1h;
+		    T1g = T1d - T1c;
+		    T1i = T18 + T19;
+		    T1f = W[16];
+		    T1h = W[17];
+		    iio[0] = FMA(T1f, T1g, T1h * T1i);
+		    rio[WS(ios, 9)] = FNMS(T1h, T1g, T1f * T1i);
+	       }
+	       {
+		    E T14, T16, T13, T15;
+		    T14 = TY + T11;
+		    T16 = TT - TQ;
+		    T13 = W[4];
+		    T15 = W[5];
+		    rio[WS(ios, 3)] = FNMS(T15, T16, T13 * T14);
+		    iio[-WS(ios, 6)] = FMA(T13, T16, T15 * T14);
+	       }
+	       {
+		    E T1a, T1e, T17, T1b;
+		    T1a = T18 - T19;
+		    T1e = T1c + T1d;
+		    T17 = W[0];
+		    T1b = W[1];
+		    rio[WS(ios, 1)] = FNMS(T1b, T1e, T17 * T1a);
+		    iio[-WS(ios, 8)] = FMA(T17, T1e, T1b * T1a);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 10},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 10, "hb_10", twinstr, {72, 30, 30, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_10) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_10, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_12.c b/src/fftw3/rdft/codelets/hc2r/hb_12.c
new file mode 100644
index 0000000..55e7db0
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_12.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:36 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 12 -dif -name hb_12 -include hb.h */
+
+/*
+ * This function contains 118 FP additions, 60 FP multiplications,
+ * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
+ * 39 stack variables, and 48 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_12(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 22) {
+	  E T5, Tt, T12, T1M, T1i, T1U, Tl, TM, T1c, T1Y, T1s, T1Q, Ta, Ty, T15;
+	  E T1N, T1l, T1V, Tg, TH, T19, T1X, T1p, T1P;
+	  {
+	       E T1, Tp, T4, T1g, Ts, T11, T10, T1h;
+	       T1 = rio[0];
+	       Tp = iio[0];
+	       {
+		    E T2, T3, Tq, Tr;
+		    T2 = rio[WS(ios, 4)];
+		    T3 = iio[-WS(ios, 8)];
+		    T4 = T2 + T3;
+		    T1g = KP866025403 * (T2 - T3);
+		    Tq = rio[WS(ios, 8)];
+		    Tr = iio[-WS(ios, 4)];
+		    Ts = Tq - Tr;
+		    T11 = KP866025403 * (Tq + Tr);
+	       }
+	       T5 = T1 + T4;
+	       Tt = Tp - Ts;
+	       T10 = FNMS(KP500000000, T4, T1);
+	       T12 = T10 - T11;
+	       T1M = T10 + T11;
+	       T1h = FMA(KP500000000, Ts, Tp);
+	       T1i = T1g + T1h;
+	       T1U = T1h - T1g;
+	  }
+	  {
+	       E Th, TL, Tk, T1a, TK, T1r, T1b, T1q;
+	       Th = iio[-WS(ios, 9)];
+	       TL = rio[WS(ios, 9)];
+	       {
+		    E Ti, Tj, TI, TJ;
+		    Ti = rio[WS(ios, 1)];
+		    Tj = rio[WS(ios, 5)];
+		    Tk = Ti + Tj;
+		    T1a = KP866025403 * (Ti - Tj);
+		    TI = iio[-WS(ios, 5)];
+		    TJ = iio[-WS(ios, 1)];
+		    TK = TI + TJ;
+		    T1r = KP866025403 * (TI - TJ);
+	       }
+	       Tl = Th + Tk;
+	       TM = TK - TL;
+	       T1b = FMA(KP500000000, TK, TL);
+	       T1c = T1a - T1b;
+	       T1Y = T1a + T1b;
+	       T1q = FNMS(KP500000000, Tk, Th);
+	       T1s = T1q + T1r;
+	       T1Q = T1q - T1r;
+	  }
+	  {
+	       E T6, Tx, T9, T1j, Tw, T14, T13, T1k;
+	       T6 = iio[-WS(ios, 6)];
+	       Tx = rio[WS(ios, 6)];
+	       {
+		    E T7, T8, Tu, Tv;
+		    T7 = iio[-WS(ios, 10)];
+		    T8 = rio[WS(ios, 2)];
+		    T9 = T7 + T8;
+		    T1j = KP866025403 * (T7 - T8);
+		    Tu = rio[WS(ios, 10)];
+		    Tv = iio[-WS(ios, 2)];
+		    Tw = Tu - Tv;
+		    T14 = KP866025403 * (Tu + Tv);
+	       }
+	       Ta = T6 + T9;
+	       Ty = Tw + Tx;
+	       T13 = FNMS(KP500000000, T9, T6);
+	       T15 = T13 + T14;
+	       T1N = T13 - T14;
+	       T1k = FMS(KP500000000, Tw, Tx);
+	       T1l = T1j + T1k;
+	       T1V = T1k - T1j;
+	  }
+	  {
+	       E Tc, TD, Tf, T17, TG, T1o, T18, T1n;
+	       Tc = rio[WS(ios, 3)];
+	       TD = iio[-WS(ios, 3)];
+	       {
+		    E Td, Te, TE, TF;
+		    Td = iio[-WS(ios, 7)];
+		    Te = iio[-WS(ios, 11)];
+		    Tf = Td + Te;
+		    T17 = KP866025403 * (Td - Te);
+		    TE = rio[WS(ios, 7)];
+		    TF = rio[WS(ios, 11)];
+		    TG = TE + TF;
+		    T1o = KP866025403 * (TE - TF);
+	       }
+	       Tg = Tc + Tf;
+	       TH = TD - TG;
+	       T18 = FMA(KP500000000, TG, TD);
+	       T19 = T17 + T18;
+	       T1X = T18 - T17;
+	       T1n = FNMS(KP500000000, Tf, Tc);
+	       T1p = T1n + T1o;
+	       T1P = T1n - T1o;
+	  }
+	  {
+	       E Tb, Tm, TU, TW, TX, TY, TT, TV;
+	       Tb = T5 + Ta;
+	       Tm = Tg + Tl;
+	       TU = Tb - Tm;
+	       TW = Tt - Ty;
+	       TX = TH + TM;
+	       TY = TW - TX;
+	       rio[0] = Tb + Tm;
+	       iio[-WS(ios, 11)] = TW + TX;
+	       TT = W[10];
+	       TV = W[11];
+	       rio[WS(ios, 6)] = FNMS(TV, TY, TT * TU);
+	       iio[-WS(ios, 5)] = FMA(TV, TU, TT * TY);
+	  }
+	  {
+	       E T28, T2g, T2c, T2e;
+	       {
+		    E T26, T27, T2a, T2b;
+		    T26 = T1M - T1N;
+		    T27 = T1X + T1Y;
+		    T28 = T26 - T27;
+		    T2g = T26 + T27;
+		    T2a = T1U - T1V;
+		    T2b = T1P - T1Q;
+		    T2c = T2a + T2b;
+		    T2e = T2a - T2b;
+	       }
+	       {
+		    E T25, T29, T2d, T2f;
+		    T25 = W[8];
+		    T29 = W[9];
+		    rio[WS(ios, 5)] = FNMS(T29, T2c, T25 * T28);
+		    iio[-WS(ios, 6)] = FMA(T25, T2c, T29 * T28);
+		    T2d = W[20];
+		    T2f = W[21];
+		    iio[0] = FMA(T2d, T2e, T2f * T2g);
+		    rio[WS(ios, 11)] = FNMS(T2f, T2e, T2d * T2g);
+	       }
+	  }
+	  {
+	       E TA, TS, TO, TQ;
+	       {
+		    E To, Tz, TC, TN;
+		    To = Tg - Tl;
+		    Tz = Tt + Ty;
+		    TA = To + Tz;
+		    TS = Tz - To;
+		    TC = T5 - Ta;
+		    TN = TH - TM;
+		    TO = TC - TN;
+		    TQ = TC + TN;
+	       }
+	       {
+		    E Tn, TB, TP, TR;
+		    Tn = W[16];
+		    TB = W[17];
+		    iio[-WS(ios, 2)] = FMA(Tn, TA, TB * TO);
+		    rio[WS(ios, 9)] = FNMS(TB, TA, Tn * TO);
+		    TP = W[4];
+		    TR = W[5];
+		    rio[WS(ios, 3)] = FNMS(TR, TS, TP * TQ);
+		    iio[-WS(ios, 8)] = FMA(TP, TS, TR * TQ);
+	       }
+	  }
+	  {
+	       E T1S, T22, T20, T24;
+	       {
+		    E T1O, T1R, T1W, T1Z;
+		    T1O = T1M + T1N;
+		    T1R = T1P + T1Q;
+		    T1S = T1O - T1R;
+		    T22 = T1O + T1R;
+		    T1W = T1U + T1V;
+		    T1Z = T1X - T1Y;
+		    T20 = T1W - T1Z;
+		    T24 = T1W + T1Z;
+	       }
+	       {
+		    E T1L, T1T, T21, T23;
+		    T1L = W[2];
+		    T1T = W[3];
+		    rio[WS(ios, 2)] = FNMS(T1T, T20, T1L * T1S);
+		    iio[-WS(ios, 9)] = FMA(T1T, T1S, T1L * T20);
+		    T21 = W[14];
+		    T23 = W[15];
+		    rio[WS(ios, 8)] = FNMS(T23, T24, T21 * T22);
+		    iio[-WS(ios, 3)] = FMA(T23, T22, T21 * T24);
+	       }
+	  }
+	  {
+	       E T1C, T1I, T1G, T1K;
+	       {
+		    E T1A, T1B, T1E, T1F;
+		    T1A = T12 + T15;
+		    T1B = T1p + T1s;
+		    T1C = T1A - T1B;
+		    T1I = T1A + T1B;
+		    T1E = T1i + T1l;
+		    T1F = T19 + T1c;
+		    T1G = T1E - T1F;
+		    T1K = T1E + T1F;
+	       }
+	       {
+		    E T1z, T1D, T1H, T1J;
+		    T1z = W[18];
+		    T1D = W[19];
+		    rio[WS(ios, 10)] = FNMS(T1D, T1G, T1z * T1C);
+		    iio[-WS(ios, 1)] = FMA(T1D, T1C, T1z * T1G);
+		    T1H = W[6];
+		    T1J = W[7];
+		    rio[WS(ios, 4)] = FNMS(T1J, T1K, T1H * T1I);
+		    iio[-WS(ios, 7)] = FMA(T1J, T1I, T1H * T1K);
+	       }
+	  }
+	  {
+	       E T1e, T1y, T1u, T1w;
+	       {
+		    E T16, T1d, T1m, T1t;
+		    T16 = T12 - T15;
+		    T1d = T19 - T1c;
+		    T1e = T16 - T1d;
+		    T1y = T16 + T1d;
+		    T1m = T1i - T1l;
+		    T1t = T1p - T1s;
+		    T1u = T1m + T1t;
+		    T1w = T1m - T1t;
+	       }
+	       {
+		    E TZ, T1f, T1v, T1x;
+		    TZ = W[0];
+		    T1f = W[1];
+		    rio[WS(ios, 1)] = FNMS(T1f, T1u, TZ * T1e);
+		    iio[-WS(ios, 10)] = FMA(TZ, T1u, T1f * T1e);
+		    T1v = W[12];
+		    T1x = W[13];
+		    iio[-WS(ios, 4)] = FMA(T1v, T1w, T1x * T1y);
+		    rio[WS(ios, 7)] = FNMS(T1x, T1w, T1v * T1y);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 12},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 12, "hb_12", twinstr, {88, 30, 30, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_12) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_12, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_15.c b/src/fftw3/rdft/codelets/hc2r/hb_15.c
new file mode 100644
index 0000000..8531f81
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_15.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:36 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 15 -dif -name hb_15 -include hb.h */
+
+/*
+ * This function contains 184 FP additions, 112 FP multiplications,
+ * (or, 128 additions, 56 multiplications, 56 fused multiply/add),
+ * 75 stack variables, and 60 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_15(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 28) {
+	  E T5, T2N, TV, T25, T1v, T2o, T2W, T38, T37, T2X, T2Q, T2T, T2U, Tg, Tr;
+	  E Ts, Ty, TD, TE, T2c, T2d, T2m, TJ, TO, TP, T1Y, T1Z, T20, T1e, T1j;
+	  E T1p, T13, T18, T1o, T29, T2a, T2l, T21, T22, T23;
+	  {
+	       E T1, T1r, T4, T1u, TU, T1s, TR, T1t;
+	       T1 = rio[0];
+	       T1r = iio[0];
+	       {
+		    E T2, T3, TS, TT;
+		    T2 = rio[WS(ios, 5)];
+		    T3 = iio[-WS(ios, 10)];
+		    T4 = T2 + T3;
+		    T1u = KP866025403 * (T2 - T3);
+		    TS = rio[WS(ios, 10)];
+		    TT = iio[-WS(ios, 5)];
+		    TU = KP866025403 * (TS + TT);
+		    T1s = TS - TT;
+	       }
+	       T5 = T1 + T4;
+	       T2N = T1r - T1s;
+	       TR = FNMS(KP500000000, T4, T1);
+	       TV = TR + TU;
+	       T25 = TR - TU;
+	       T1t = FMA(KP500000000, T1s, T1r);
+	       T1v = T1t - T1u;
+	       T2o = T1u + T1t;
+	  }
+	  {
+	       E Ta, T12, Tu, Tx, T2O, T11, Tf, T14, Tz, TC, T2P, T17, Tl, T1d, TF;
+	       E TI, T2R, T1c, Tq, T1i, TK, TN, T2S, T1h;
+	       {
+		    E T6, T7, T8, T9;
+		    T6 = rio[WS(ios, 3)];
+		    T7 = iio[-WS(ios, 8)];
+		    T8 = iio[-WS(ios, 13)];
+		    T9 = T7 + T8;
+		    Ta = T6 + T9;
+		    T12 = KP866025403 * (T7 - T8);
+		    Tu = FNMS(KP500000000, T9, T6);
+	       }
+	       {
+		    E TZ, Tv, Tw, T10;
+		    TZ = iio[-WS(ios, 3)];
+		    Tv = rio[WS(ios, 8)];
+		    Tw = rio[WS(ios, 13)];
+		    T10 = Tv + Tw;
+		    Tx = KP866025403 * (Tv - Tw);
+		    T2O = TZ - T10;
+		    T11 = FMA(KP500000000, T10, TZ);
+	       }
+	       {
+		    E Tb, Tc, Td, Te;
+		    Tb = iio[-WS(ios, 12)];
+		    Tc = rio[WS(ios, 2)];
+		    Td = rio[WS(ios, 7)];
+		    Te = Tc + Td;
+		    Tf = Tb + Te;
+		    T14 = KP866025403 * (Tc - Td);
+		    Tz = FNMS(KP500000000, Te, Tb);
+	       }
+	       {
+		    E T15, TA, TB, T16;
+		    T15 = rio[WS(ios, 12)];
+		    TA = iio[-WS(ios, 7)];
+		    TB = iio[-WS(ios, 2)];
+		    T16 = TB + TA;
+		    TC = KP866025403 * (TA - TB);
+		    T2P = T16 - T15;
+		    T17 = FMA(KP500000000, T16, T15);
+	       }
+	       {
+		    E Th, Ti, Tj, Tk;
+		    Th = rio[WS(ios, 6)];
+		    Ti = iio[-WS(ios, 11)];
+		    Tj = rio[WS(ios, 1)];
+		    Tk = Ti + Tj;
+		    Tl = Th + Tk;
+		    T1d = KP866025403 * (Ti - Tj);
+		    TF = FNMS(KP500000000, Tk, Th);
+	       }
+	       {
+		    E T1a, TG, TH, T1b;
+		    T1a = iio[-WS(ios, 6)];
+		    TG = rio[WS(ios, 11)];
+		    TH = iio[-WS(ios, 1)];
+		    T1b = TG - TH;
+		    TI = KP866025403 * (TG + TH);
+		    T2R = T1a - T1b;
+		    T1c = FMA(KP500000000, T1b, T1a);
+	       }
+	       {
+		    E Tm, Tn, To, Tp;
+		    Tm = iio[-WS(ios, 9)];
+		    Tn = iio[-WS(ios, 14)];
+		    To = rio[WS(ios, 4)];
+		    Tp = Tn + To;
+		    Tq = Tm + Tp;
+		    T1i = KP866025403 * (Tn - To);
+		    TK = FNMS(KP500000000, Tp, Tm);
+	       }
+	       {
+		    E T1g, TL, TM, T1f;
+		    T1g = rio[WS(ios, 9)];
+		    TL = rio[WS(ios, 14)];
+		    TM = iio[-WS(ios, 4)];
+		    T1f = TL - TM;
+		    TN = KP866025403 * (TL + TM);
+		    T2S = T1f + T1g;
+		    T1h = FMS(KP500000000, T1f, T1g);
+	       }
+	       T2W = Ta - Tf;
+	       T38 = T2R + T2S;
+	       T37 = T2O - T2P;
+	       T2X = Tl - Tq;
+	       T2Q = T2O + T2P;
+	       T2T = T2R - T2S;
+	       T2U = T2Q + T2T;
+	       Tg = Ta + Tf;
+	       Tr = Tl + Tq;
+	       Ts = Tg + Tr;
+	       Ty = Tu - Tx;
+	       TD = Tz - TC;
+	       TE = Ty + TD;
+	       T2c = T1d + T1c;
+	       T2d = T1i + T1h;
+	       T2m = T2c + T2d;
+	       TJ = TF - TI;
+	       TO = TK - TN;
+	       TP = TJ + TO;
+	       T1Y = Tu + Tx;
+	       T1Z = Tz + TC;
+	       T20 = T1Y + T1Z;
+	       T1e = T1c - T1d;
+	       T1j = T1h - T1i;
+	       T1p = T1e + T1j;
+	       T13 = T11 - T12;
+	       T18 = T14 + T17;
+	       T1o = T13 - T18;
+	       T29 = T12 + T11;
+	       T2a = T14 - T17;
+	       T2l = T29 + T2a;
+	       T21 = TF + TI;
+	       T22 = TK + TN;
+	       T23 = T21 + T22;
+	  }
+	  rio[0] = T5 + Ts;
+	  {
+	       E T1l, T1J, T1B, T1M, TY, T1U, T1I, T1y, T1W, T1N, T1T, T1V;
+	       {
+		    E T19, T1k, T1z, T1A;
+		    T19 = T13 + T18;
+		    T1k = T1e - T1j;
+		    T1l = FMA(KP951056516, T19, KP587785252 * T1k);
+		    T1J = FNMS(KP951056516, T1k, KP587785252 * T19);
+		    T1z = Ty - TD;
+		    T1A = TJ - TO;
+		    T1B = FMA(KP951056516, T1z, KP587785252 * T1A);
+		    T1M = FNMS(KP951056516, T1A, KP587785252 * T1z);
+	       }
+	       {
+		    E TQ, TW, TX, T1q, T1w, T1x;
+		    TQ = KP559016994 * (TE - TP);
+		    TW = TE + TP;
+		    TX = FNMS(KP250000000, TW, TV);
+		    TY = TQ + TX;
+		    T1U = TV + TW;
+		    T1I = TX - TQ;
+		    T1q = KP559016994 * (T1o - T1p);
+		    T1w = T1o + T1p;
+		    T1x = FNMS(KP250000000, T1w, T1v);
+		    T1y = T1q + T1x;
+		    T1W = T1v + T1w;
+		    T1N = T1x - T1q;
+	       }
+	       T1T = W[8];
+	       T1V = W[9];
+	       rio[WS(ios, 5)] = FNMS(T1V, T1W, T1T * T1U);
+	       iio[-WS(ios, 9)] = FMA(T1V, T1U, T1T * T1W);
+	       {
+		    E T1Q, T1S, T1P, T1R;
+		    T1Q = T1I + T1J;
+		    T1S = T1N - T1M;
+		    T1P = W[14];
+		    T1R = W[15];
+		    rio[WS(ios, 8)] = FNMS(T1R, T1S, T1P * T1Q);
+		    iio[-WS(ios, 6)] = FMA(T1R, T1Q, T1P * T1S);
+	       }
+	       {
+		    E T1m, T1C, Tt, T1n;
+		    T1m = TY + T1l;
+		    T1C = T1y - T1B;
+		    Tt = W[26];
+		    T1n = W[27];
+		    rio[WS(ios, 14)] = FNMS(T1n, T1C, Tt * T1m);
+		    iio[0] = FMA(T1n, T1m, Tt * T1C);
+	       }
+	       {
+		    E T1E, T1G, T1D, T1F;
+		    T1E = TY - T1l;
+		    T1G = T1B + T1y;
+		    T1D = W[20];
+		    T1F = W[21];
+		    rio[WS(ios, 11)] = FNMS(T1F, T1G, T1D * T1E);
+		    iio[-WS(ios, 3)] = FMA(T1F, T1E, T1D * T1G);
+	       }
+	       {
+		    E T1K, T1O, T1H, T1L;
+		    T1K = T1I - T1J;
+		    T1O = T1M + T1N;
+		    T1H = W[2];
+		    T1L = W[3];
+		    rio[WS(ios, 2)] = FNMS(T1L, T1O, T1H * T1K);
+		    iio[-WS(ios, 12)] = FMA(T1L, T1K, T1H * T1O);
+	       }
+	  }
+	  iio[-WS(ios, 14)] = T2N + T2U;
+	  {
+	       E T2Y, T39, T3k, T3h, T36, T3g, T31, T3l;
+	       T2Y = FNMS(KP951056516, T2X, KP587785252 * T2W);
+	       T39 = FNMS(KP951056516, T38, KP587785252 * T37);
+	       T3k = FMA(KP951056516, T2W, KP587785252 * T2X);
+	       T3h = FMA(KP951056516, T37, KP587785252 * T38);
+	       {
+		    E T34, T35, T2Z, T30;
+		    T34 = FNMS(KP250000000, Ts, T5);
+		    T35 = KP559016994 * (Tg - Tr);
+		    T36 = T34 - T35;
+		    T3g = T35 + T34;
+		    T2Z = FNMS(KP250000000, T2U, T2N);
+		    T30 = KP559016994 * (T2Q - T2T);
+		    T31 = T2Z - T30;
+		    T3l = T30 + T2Z;
+	       }
+	       {
+		    E T32, T3a, T2V, T33;
+		    T32 = T2Y + T31;
+		    T3a = T36 - T39;
+		    T2V = W[22];
+		    T33 = W[23];
+		    iio[-WS(ios, 2)] = FMA(T2V, T32, T33 * T3a);
+		    rio[WS(ios, 12)] = FNMS(T33, T32, T2V * T3a);
+	       }
+	       {
+		    E T3o, T3q, T3n, T3p;
+		    T3o = T3l - T3k;
+		    T3q = T3g + T3h;
+		    T3n = W[16];
+		    T3p = W[17];
+		    iio[-WS(ios, 5)] = FMA(T3n, T3o, T3p * T3q);
+		    rio[WS(ios, 9)] = FNMS(T3p, T3o, T3n * T3q);
+	       }
+	       {
+		    E T3c, T3e, T3b, T3d;
+		    T3c = T36 + T39;
+		    T3e = T31 - T2Y;
+		    T3b = W[4];
+		    T3d = W[5];
+		    rio[WS(ios, 3)] = FNMS(T3d, T3e, T3b * T3c);
+		    iio[-WS(ios, 11)] = FMA(T3b, T3e, T3d * T3c);
+	       }
+	       {
+		    E T3i, T3m, T3f, T3j;
+		    T3i = T3g - T3h;
+		    T3m = T3k + T3l;
+		    T3f = W[10];
+		    T3j = W[11];
+		    rio[WS(ios, 6)] = FNMS(T3j, T3m, T3f * T3i);
+		    iio[-WS(ios, 8)] = FMA(T3f, T3m, T3j * T3i);
+	       }
+	  }
+	  {
+	       E T2f, T2z, T2k, T2D, T28, T2K, T2y, T2r, T2M, T2C, T2J, T2L;
+	       {
+		    E T2b, T2e, T2i, T2j;
+		    T2b = T29 - T2a;
+		    T2e = T2c - T2d;
+		    T2f = FMA(KP951056516, T2b, KP587785252 * T2e);
+		    T2z = FNMS(KP951056516, T2e, KP587785252 * T2b);
+		    T2i = T1Y - T1Z;
+		    T2j = T21 - T22;
+		    T2k = FMA(KP951056516, T2i, KP587785252 * T2j);
+		    T2D = FNMS(KP951056516, T2j, KP587785252 * T2i);
+	       }
+	       {
+		    E T24, T26, T27, T2n, T2p, T2q;
+		    T24 = KP559016994 * (T20 - T23);
+		    T26 = T20 + T23;
+		    T27 = FNMS(KP250000000, T26, T25);
+		    T28 = T24 + T27;
+		    T2K = T25 + T26;
+		    T2y = T27 - T24;
+		    T2n = KP559016994 * (T2l - T2m);
+		    T2p = T2l + T2m;
+		    T2q = FNMS(KP250000000, T2p, T2o);
+		    T2r = T2n + T2q;
+		    T2M = T2o + T2p;
+		    T2C = T2q - T2n;
+	       }
+	       T2J = W[18];
+	       T2L = W[19];
+	       rio[WS(ios, 10)] = FNMS(T2L, T2M, T2J * T2K);
+	       iio[-WS(ios, 4)] = FMA(T2L, T2K, T2J * T2M);
+	       {
+		    E T2u, T2w, T2t, T2v;
+		    T2u = T28 + T2f;
+		    T2w = T2r - T2k;
+		    T2t = W[6];
+		    T2v = W[7];
+		    rio[WS(ios, 4)] = FNMS(T2v, T2w, T2t * T2u);
+		    iio[-WS(ios, 10)] = FMA(T2v, T2u, T2t * T2w);
+	       }
+	       {
+		    E T2g, T2s, T1X, T2h;
+		    T2g = T28 - T2f;
+		    T2s = T2k + T2r;
+		    T1X = W[0];
+		    T2h = W[1];
+		    rio[WS(ios, 1)] = FNMS(T2h, T2s, T1X * T2g);
+		    iio[-WS(ios, 13)] = FMA(T2h, T2g, T1X * T2s);
+	       }
+	       {
+		    E T2A, T2E, T2x, T2B;
+		    T2A = T2y + T2z;
+		    T2E = T2C - T2D;
+		    T2x = W[24];
+		    T2B = W[25];
+		    rio[WS(ios, 13)] = FNMS(T2B, T2E, T2x * T2A);
+		    iio[-WS(ios, 1)] = FMA(T2B, T2A, T2x * T2E);
+	       }
+	       {
+		    E T2G, T2I, T2F, T2H;
+		    T2G = T2y - T2z;
+		    T2I = T2D + T2C;
+		    T2F = W[12];
+		    T2H = W[13];
+		    rio[WS(ios, 7)] = FNMS(T2H, T2I, T2F * T2G);
+		    iio[-WS(ios, 7)] = FMA(T2H, T2G, T2F * T2I);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 15},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 15, "hb_15", twinstr, {128, 56, 56, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_15) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_15, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_16.c b/src/fftw3/rdft/codelets/hc2r/hb_16.c
new file mode 100644
index 0000000..7fffeb9
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_16.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:37 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 16 -dif -name hb_16 -include hb.h */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 136 additions, 46 multiplications, 38 fused multiply/add),
+ * 50 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_16(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 30) {
+	  E T7, T2K, T30, Tw, T1a, T2e, T2k, T1B, Te, TD, T1C, T13, T2n, T2Z, T2b;
+	  E T2L, Tm, T1v, TN, T10, T1W, T2p, T2P, T2W, Tt, T1w, TW, T11, T23, T2q;
+	  E T2S, T2X;
+	  {
+	       E T3, T2c, T16, T2j, T6, T2i, T19, T2d;
+	       {
+		    E T1, T2, T14, T15;
+		    T1 = rio[0];
+		    T2 = iio[-WS(ios, 8)];
+		    T3 = T1 + T2;
+		    T2c = T1 - T2;
+		    T14 = iio[0];
+		    T15 = rio[WS(ios, 8)];
+		    T16 = T14 - T15;
+		    T2j = T14 + T15;
+	       }
+	       {
+		    E T4, T5, T17, T18;
+		    T4 = rio[WS(ios, 4)];
+		    T5 = iio[-WS(ios, 12)];
+		    T6 = T4 + T5;
+		    T2i = T4 - T5;
+		    T17 = iio[-WS(ios, 4)];
+		    T18 = rio[WS(ios, 12)];
+		    T19 = T17 - T18;
+		    T2d = T17 + T18;
+	       }
+	       T7 = T3 + T6;
+	       T2K = T2j - T2i;
+	       T30 = T2c + T2d;
+	       Tw = T3 - T6;
+	       T1a = T16 - T19;
+	       T2e = T2c - T2d;
+	       T2k = T2i + T2j;
+	       T1B = T16 + T19;
+	  }
+	  {
+	       E Ta, T29, Tz, T28, Td, T25, TC, T26;
+	       {
+		    E T8, T9, Tx, Ty;
+		    T8 = rio[WS(ios, 2)];
+		    T9 = iio[-WS(ios, 10)];
+		    Ta = T8 + T9;
+		    T29 = T8 - T9;
+		    Tx = iio[-WS(ios, 2)];
+		    Ty = rio[WS(ios, 10)];
+		    Tz = Tx - Ty;
+		    T28 = Tx + Ty;
+	       }
+	       {
+		    E Tb, Tc, TA, TB;
+		    Tb = iio[-WS(ios, 14)];
+		    Tc = rio[WS(ios, 6)];
+		    Td = Tb + Tc;
+		    T25 = Tb - Tc;
+		    TA = iio[-WS(ios, 6)];
+		    TB = rio[WS(ios, 14)];
+		    TC = TA - TB;
+		    T26 = TB + TA;
+	       }
+	       Te = Ta + Td;
+	       TD = Tz - TC;
+	       T1C = Tz + TC;
+	       T13 = Td - Ta;
+	       {
+		    E T2l, T2m, T27, T2a;
+		    T2l = T29 + T28;
+		    T2m = T25 + T26;
+		    T2n = KP707106781 * (T2l - T2m);
+		    T2Z = KP707106781 * (T2l + T2m);
+		    T27 = T25 - T26;
+		    T2a = T28 - T29;
+		    T2b = KP707106781 * (T27 - T2a);
+		    T2L = KP707106781 * (T2a + T27);
+	       }
+	  }
+	  {
+	       E Ti, T1Q, TI, T1U, Tl, T1T, TL, T1R, TF, TM;
+	       {
+		    E Tg, Th, TG, TH;
+		    Tg = rio[WS(ios, 1)];
+		    Th = iio[-WS(ios, 9)];
+		    Ti = Tg + Th;
+		    T1Q = Tg - Th;
+		    TG = iio[-WS(ios, 1)];
+		    TH = rio[WS(ios, 9)];
+		    TI = TG - TH;
+		    T1U = TG + TH;
+	       }
+	       {
+		    E Tj, Tk, TJ, TK;
+		    Tj = rio[WS(ios, 5)];
+		    Tk = iio[-WS(ios, 13)];
+		    Tl = Tj + Tk;
+		    T1T = Tj - Tk;
+		    TJ = iio[-WS(ios, 5)];
+		    TK = rio[WS(ios, 13)];
+		    TL = TJ - TK;
+		    T1R = TJ + TK;
+	       }
+	       Tm = Ti + Tl;
+	       T1v = TI + TL;
+	       TF = Ti - Tl;
+	       TM = TI - TL;
+	       TN = TF + TM;
+	       T10 = TM - TF;
+	       {
+		    E T1S, T1V, T2N, T2O;
+		    T1S = T1Q - T1R;
+		    T1V = T1T + T1U;
+		    T1W = FNMS(KP382683432, T1V, KP923879532 * T1S);
+		    T2p = FMA(KP923879532, T1V, KP382683432 * T1S);
+		    T2N = T1U - T1T;
+		    T2O = T1Q + T1R;
+		    T2P = FNMS(KP382683432, T2O, KP923879532 * T2N);
+		    T2W = FMA(KP382683432, T2N, KP923879532 * T2O);
+	       }
+	  }
+	  {
+	       E Tp, T1X, TR, T21, Ts, T20, TU, T1Y, TO, TV;
+	       {
+		    E Tn, To, TP, TQ;
+		    Tn = iio[-WS(ios, 15)];
+		    To = rio[WS(ios, 7)];
+		    Tp = Tn + To;
+		    T1X = Tn - To;
+		    TP = iio[-WS(ios, 7)];
+		    TQ = rio[WS(ios, 15)];
+		    TR = TP - TQ;
+		    T21 = TQ + TP;
+	       }
+	       {
+		    E Tq, Tr, TS, TT;
+		    Tq = rio[WS(ios, 3)];
+		    Tr = iio[-WS(ios, 11)];
+		    Ts = Tq + Tr;
+		    T20 = Tq - Tr;
+		    TS = iio[-WS(ios, 3)];
+		    TT = rio[WS(ios, 11)];
+		    TU = TS - TT;
+		    T1Y = TS + TT;
+	       }
+	       Tt = Tp + Ts;
+	       T1w = TU + TR;
+	       TO = Tp - Ts;
+	       TV = TR - TU;
+	       TW = TO - TV;
+	       T11 = TO + TV;
+	       {
+		    E T1Z, T22, T2Q, T2R;
+		    T1Z = T1X - T1Y;
+		    T22 = T20 - T21;
+		    T23 = FMA(KP923879532, T1Z, KP382683432 * T22);
+		    T2q = FNMS(KP382683432, T1Z, KP923879532 * T22);
+		    T2Q = T1X + T1Y;
+		    T2R = T20 + T21;
+		    T2S = FNMS(KP923879532, T2R, KP382683432 * T2Q);
+		    T2X = FMA(KP923879532, T2Q, KP382683432 * T2R);
+	       }
+	  }
+	  {
+	       E Tf, Tu, T1K, T1M, T1N, T1O, T1J, T1L;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       T1K = Tf - Tu;
+	       T1M = T1C + T1B;
+	       T1N = T1v + T1w;
+	       T1O = T1M - T1N;
+	       rio[0] = Tf + Tu;
+	       iio[-WS(ios, 15)] = T1N + T1M;
+	       T1J = W[14];
+	       T1L = W[15];
+	       rio[WS(ios, 8)] = FNMS(T1L, T1O, T1J * T1K);
+	       iio[-WS(ios, 7)] = FMA(T1L, T1K, T1J * T1O);
+	  }
+	  {
+	       E T2U, T36, T32, T34;
+	       {
+		    E T2M, T2T, T2Y, T31;
+		    T2M = T2K + T2L;
+		    T2T = T2P + T2S;
+		    T2U = T2M + T2T;
+		    T36 = T2M - T2T;
+		    T2Y = T2W + T2X;
+		    T31 = T2Z + T30;
+		    T32 = T2Y + T31;
+		    T34 = T31 - T2Y;
+	       }
+	       {
+		    E T2J, T2V, T33, T35;
+		    T2J = W[28];
+		    T2V = W[29];
+		    iio[0] = FMA(T2J, T2U, T2V * T32);
+		    rio[WS(ios, 15)] = FNMS(T2V, T2U, T2J * T32);
+		    T33 = W[12];
+		    T35 = W[13];
+		    rio[WS(ios, 7)] = FNMS(T35, T36, T33 * T34);
+		    iio[-WS(ios, 8)] = FMA(T33, T36, T35 * T34);
+	       }
+	  }
+	  {
+	       E TY, T1e, T1c, T1g;
+	       {
+		    E TE, TX, T12, T1b;
+		    TE = Tw + TD;
+		    TX = KP707106781 * (TN + TW);
+		    TY = TE + TX;
+		    T1e = TE - TX;
+		    T12 = KP707106781 * (T10 + T11);
+		    T1b = T13 + T1a;
+		    T1c = T12 + T1b;
+		    T1g = T1b - T12;
+	       }
+	       {
+		    E Tv, TZ, T1d, T1f;
+		    Tv = W[26];
+		    TZ = W[27];
+		    rio[WS(ios, 14)] = FNMS(TZ, T1c, Tv * TY);
+		    iio[-WS(ios, 1)] = FMA(TZ, TY, Tv * T1c);
+		    T1d = W[10];
+		    T1f = W[11];
+		    rio[WS(ios, 6)] = FNMS(T1f, T1g, T1d * T1e);
+		    iio[-WS(ios, 9)] = FMA(T1f, T1e, T1d * T1g);
+	       }
+	  }
+	  {
+	       E T2g, T2w, T2s, T2u;
+	       {
+		    E T24, T2f, T2o, T2r;
+		    T24 = T1W + T23;
+		    T2f = T2b + T2e;
+		    T2g = T24 + T2f;
+		    T2w = T2f - T24;
+		    T2o = T2k + T2n;
+		    T2r = T2p + T2q;
+		    T2s = T2o + T2r;
+		    T2u = T2o - T2r;
+	       }
+	       {
+		    E T1P, T2h, T2t, T2v;
+		    T1P = W[0];
+		    T2h = W[1];
+		    rio[WS(ios, 1)] = FNMS(T2h, T2s, T1P * T2g);
+		    iio[-WS(ios, 14)] = FMA(T1P, T2s, T2h * T2g);
+		    T2t = W[16];
+		    T2v = W[17];
+		    iio[-WS(ios, 6)] = FMA(T2t, T2u, T2v * T2w);
+		    rio[WS(ios, 9)] = FNMS(T2v, T2u, T2t * T2w);
+	       }
+	  }
+	  {
+	       E T1k, T1q, T1o, T1s;
+	       {
+		    E T1i, T1j, T1m, T1n;
+		    T1i = Tw - TD;
+		    T1j = KP707106781 * (T11 - T10);
+		    T1k = T1i + T1j;
+		    T1q = T1i - T1j;
+		    T1m = KP707106781 * (TN - TW);
+		    T1n = T1a - T13;
+		    T1o = T1m + T1n;
+		    T1s = T1n - T1m;
+	       }
+	       {
+		    E T1h, T1l, T1p, T1r;
+		    T1h = W[2];
+		    T1l = W[3];
+		    rio[WS(ios, 2)] = FNMS(T1l, T1o, T1h * T1k);
+		    iio[-WS(ios, 13)] = FMA(T1l, T1k, T1h * T1o);
+		    T1p = W[18];
+		    T1r = W[19];
+		    rio[WS(ios, 10)] = FNMS(T1r, T1s, T1p * T1q);
+		    iio[-WS(ios, 5)] = FMA(T1r, T1q, T1p * T1s);
+	       }
+	  }
+	  {
+	       E T2A, T2I, T2E, T2G;
+	       {
+		    E T2y, T2z, T2C, T2D;
+		    T2y = T2k - T2n;
+		    T2z = T23 - T1W;
+		    T2A = T2y + T2z;
+		    T2I = T2y - T2z;
+		    T2C = T2p - T2q;
+		    T2D = T2e - T2b;
+		    T2E = T2C + T2D;
+		    T2G = T2D - T2C;
+	       }
+	       {
+		    E T2x, T2B, T2F, T2H;
+		    T2x = W[24];
+		    T2B = W[25];
+		    iio[-WS(ios, 2)] = FMA(T2x, T2A, T2B * T2E);
+		    rio[WS(ios, 13)] = FNMS(T2B, T2A, T2x * T2E);
+		    T2F = W[8];
+		    T2H = W[9];
+		    rio[WS(ios, 5)] = FNMS(T2H, T2I, T2F * T2G);
+		    iio[-WS(ios, 10)] = FMA(T2F, T2I, T2H * T2G);
+	       }
+	  }
+	  {
+	       E T1y, T1G, T1E, T1I;
+	       {
+		    E T1u, T1x, T1A, T1D;
+		    T1u = T7 - Te;
+		    T1x = T1v - T1w;
+		    T1y = T1u + T1x;
+		    T1G = T1u - T1x;
+		    T1A = Tt - Tm;
+		    T1D = T1B - T1C;
+		    T1E = T1A + T1D;
+		    T1I = T1D - T1A;
+	       }
+	       {
+		    E T1t, T1z, T1F, T1H;
+		    T1t = W[22];
+		    T1z = W[23];
+		    rio[WS(ios, 12)] = FNMS(T1z, T1E, T1t * T1y);
+		    iio[-WS(ios, 3)] = FMA(T1z, T1y, T1t * T1E);
+		    T1F = W[6];
+		    T1H = W[7];
+		    rio[WS(ios, 4)] = FNMS(T1H, T1I, T1F * T1G);
+		    iio[-WS(ios, 11)] = FMA(T1H, T1G, T1F * T1I);
+	       }
+	  }
+	  {
+	       E T3a, T3i, T3e, T3g;
+	       {
+		    E T38, T39, T3c, T3d;
+		    T38 = T2S - T2P;
+		    T39 = T30 - T2Z;
+		    T3a = T38 + T39;
+		    T3i = T39 - T38;
+		    T3c = T2K - T2L;
+		    T3d = T2W - T2X;
+		    T3e = T3c + T3d;
+		    T3g = T3c - T3d;
+	       }
+	       {
+		    E T37, T3b, T3f, T3h;
+		    T37 = W[4];
+		    T3b = W[5];
+		    rio[WS(ios, 3)] = FNMS(T3b, T3e, T37 * T3a);
+		    iio[-WS(ios, 12)] = FMA(T37, T3e, T3b * T3a);
+		    T3f = W[20];
+		    T3h = W[21];
+		    iio[-WS(ios, 4)] = FMA(T3f, T3g, T3h * T3i);
+		    rio[WS(ios, 11)] = FNMS(T3h, T3g, T3f * T3i);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 16},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 16, "hb_16", twinstr, {136, 46, 38, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_16) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_2.c b/src/fftw3/rdft/codelets/hc2r/hb_2.c
new file mode 100644
index 0000000..91740d2
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_2.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:22 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 2 -dif -name hb_2 -include hb.h */
+
+/*
+ * This function contains 6 FP additions, 4 FP multiplications,
+ * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
+ * 9 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_2.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_2.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_2.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_2(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 2) {
+	  E T1, T2, T6, T3, T4, T8, T5, T7;
+	  T1 = rio[0];
+	  T2 = iio[-WS(ios, 1)];
+	  T6 = T1 - T2;
+	  T3 = iio[0];
+	  T4 = rio[WS(ios, 1)];
+	  T8 = T3 + T4;
+	  rio[0] = T1 + T2;
+	  iio[-WS(ios, 1)] = T3 - T4;
+	  T5 = W[0];
+	  T7 = W[1];
+	  rio[WS(ios, 1)] = FNMS(T7, T8, T5 * T6);
+	  iio[0] = FMA(T7, T6, T5 * T8);
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 2},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 2, "hb_2", twinstr, {4, 2, 2, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_2) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_2, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_3.c b/src/fftw3/rdft/codelets/hc2r/hb_3.c
new file mode 100644
index 0000000..b2362cb
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_3.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:22 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 3 -dif -name hb_3 -include hb.h */
+
+/*
+ * This function contains 16 FP additions, 12 FP multiplications,
+ * (or, 10 additions, 6 multiplications, 6 fused multiply/add),
+ * 15 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_3(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 4) {
+	  E T1, T4, Ta, Te, T5, T8, Tb, Tf;
+	  {
+	       E T2, T3, T6, T7;
+	       T1 = rio[0];
+	       T2 = rio[WS(ios, 1)];
+	       T3 = iio[-WS(ios, 2)];
+	       T4 = T2 + T3;
+	       Ta = FNMS(KP500000000, T4, T1);
+	       Te = KP866025403 * (T2 - T3);
+	       T5 = iio[0];
+	       T6 = rio[WS(ios, 2)];
+	       T7 = iio[-WS(ios, 1)];
+	       T8 = T6 - T7;
+	       Tb = KP866025403 * (T6 + T7);
+	       Tf = FMA(KP500000000, T8, T5);
+	  }
+	  rio[0] = T1 + T4;
+	  iio[-WS(ios, 2)] = T5 - T8;
+	  {
+	       E Ti, Tk, Th, Tj;
+	       Ti = Tf - Te;
+	       Tk = Ta + Tb;
+	       Th = W[2];
+	       Tj = W[3];
+	       iio[0] = FMA(Th, Ti, Tj * Tk);
+	       rio[WS(ios, 2)] = FNMS(Tj, Ti, Th * Tk);
+	  }
+	  {
+	       E Tc, Tg, T9, Td;
+	       Tc = Ta - Tb;
+	       Tg = Te + Tf;
+	       T9 = W[0];
+	       Td = W[1];
+	       rio[WS(ios, 1)] = FNMS(Td, Tg, T9 * Tc);
+	       iio[-WS(ios, 1)] = FMA(T9, Tg, Td * Tc);
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 3},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 3, "hb_3", twinstr, {10, 6, 6, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_3) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_3, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_32.c b/src/fftw3/rdft/codelets/hc2r/hb_32.c
new file mode 100644
index 0000000..f3358ab
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_32.c
@@ -0,0 +1,890 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:39 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h */
+
+/*
+ * This function contains 434 FP additions, 208 FP multiplications,
+ * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
+ * 98 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_32(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62) {
+	  E T5n, T6y, T77, T5u, Tf, T3i, T5x, T76, T3G, T47, T1a, T2I, T5k, T6z, T2o;
+	  E T2Y, Tu, T3D, T6D, T73, T6G, T74, T1j, T2d, T1s, T2e, T55, T5z, T5c, T5A;
+	  E T3l, T48, TK, T3n, T6L, T7t, T6O, T7s, T1D, T2L, T1M, T2M, T4w, T62, T4D;
+	  E T61, T3q, T41, TZ, T3s, T6S, T7w, T6V, T7v, T1W, T2O, T25, T2P, T4P, T64;
+	  E T4W, T65, T3v, T42;
+	  {
+	       E T3, T5l, T2j, T5t, T6, T5s, T2m, T5m, Ta, T5i, T15, T5h, Td, T5e, T18;
+	       E T5f;
+	       {
+		    E T1, T2, T2h, T2i;
+		    T1 = rio[0];
+		    T2 = iio[-WS(ios, 16)];
+		    T3 = T1 + T2;
+		    T5l = T1 - T2;
+		    T2h = iio[0];
+		    T2i = rio[WS(ios, 16)];
+		    T2j = T2h - T2i;
+		    T5t = T2h + T2i;
+	       }
+	       {
+		    E T4, T5, T2k, T2l;
+		    T4 = rio[WS(ios, 8)];
+		    T5 = iio[-WS(ios, 24)];
+		    T6 = T4 + T5;
+		    T5s = T4 - T5;
+		    T2k = iio[-WS(ios, 8)];
+		    T2l = rio[WS(ios, 24)];
+		    T2m = T2k - T2l;
+		    T5m = T2k + T2l;
+	       }
+	       {
+		    E T8, T9, T13, T14;
+		    T8 = rio[WS(ios, 4)];
+		    T9 = iio[-WS(ios, 20)];
+		    Ta = T8 + T9;
+		    T5i = T8 - T9;
+		    T13 = iio[-WS(ios, 4)];
+		    T14 = rio[WS(ios, 20)];
+		    T15 = T13 - T14;
+		    T5h = T13 + T14;
+	       }
+	       {
+		    E Tb, Tc, T16, T17;
+		    Tb = iio[-WS(ios, 28)];
+		    Tc = rio[WS(ios, 12)];
+		    Td = Tb + Tc;
+		    T5e = Tb - Tc;
+		    T16 = iio[-WS(ios, 12)];
+		    T17 = rio[WS(ios, 28)];
+		    T18 = T16 - T17;
+		    T5f = T17 + T16;
+	       }
+	       {
+		    E T7, Te, T12, T19;
+		    T5n = T5l - T5m;
+		    T6y = T5t - T5s;
+		    T77 = T5l + T5m;
+		    T5u = T5s + T5t;
+		    T7 = T3 + T6;
+		    Te = Ta + Td;
+		    Tf = T7 + Te;
+		    T3i = T7 - Te;
+		    {
+			 E T5v, T5w, T3E, T3F;
+			 T5v = T5i + T5h;
+			 T5w = T5e + T5f;
+			 T5x = KP707106781 * (T5v - T5w);
+			 T76 = KP707106781 * (T5v + T5w);
+			 T3E = T2j + T2m;
+			 T3F = T15 + T18;
+			 T3G = T3E - T3F;
+			 T47 = T3F + T3E;
+		    }
+		    T12 = T3 - T6;
+		    T19 = T15 - T18;
+		    T1a = T12 + T19;
+		    T2I = T12 - T19;
+		    {
+			 E T5g, T5j, T2g, T2n;
+			 T5g = T5e - T5f;
+			 T5j = T5h - T5i;
+			 T5k = KP707106781 * (T5g - T5j);
+			 T6z = KP707106781 * (T5j + T5g);
+			 T2g = Td - Ta;
+			 T2n = T2j - T2m;
+			 T2o = T2g + T2n;
+			 T2Y = T2n - T2g;
+		    }
+	       }
+	  }
+	  {
+	       E Ti, T4Z, T1e, T53, Tl, T52, T1h, T50, Tp, T56, T1n, T5a, Ts, T59, T1q;
+	       E T57;
+	       {
+		    E Tg, Th, T1c, T1d;
+		    Tg = rio[WS(ios, 2)];
+		    Th = iio[-WS(ios, 18)];
+		    Ti = Tg + Th;
+		    T4Z = Tg - Th;
+		    T1c = iio[-WS(ios, 2)];
+		    T1d = rio[WS(ios, 18)];
+		    T1e = T1c - T1d;
+		    T53 = T1c + T1d;
+	       }
+	       {
+		    E Tj, Tk, T1f, T1g;
+		    Tj = rio[WS(ios, 10)];
+		    Tk = iio[-WS(ios, 26)];
+		    Tl = Tj + Tk;
+		    T52 = Tj - Tk;
+		    T1f = iio[-WS(ios, 10)];
+		    T1g = rio[WS(ios, 26)];
+		    T1h = T1f - T1g;
+		    T50 = T1f + T1g;
+	       }
+	       {
+		    E Tn, To, T1l, T1m;
+		    Tn = iio[-WS(ios, 30)];
+		    To = rio[WS(ios, 14)];
+		    Tp = Tn + To;
+		    T56 = Tn - To;
+		    T1l = iio[-WS(ios, 14)];
+		    T1m = rio[WS(ios, 30)];
+		    T1n = T1l - T1m;
+		    T5a = T1m + T1l;
+	       }
+	       {
+		    E Tq, Tr, T1o, T1p;
+		    Tq = rio[WS(ios, 6)];
+		    Tr = iio[-WS(ios, 22)];
+		    Ts = Tq + Tr;
+		    T59 = Tq - Tr;
+		    T1o = iio[-WS(ios, 6)];
+		    T1p = rio[WS(ios, 22)];
+		    T1q = T1o - T1p;
+		    T57 = T1o + T1p;
+	       }
+	       {
+		    E Tm, Tt, T6B, T6C;
+		    Tm = Ti + Tl;
+		    Tt = Tp + Ts;
+		    Tu = Tm + Tt;
+		    T3D = Tt - Tm;
+		    T6B = T53 - T52;
+		    T6C = T4Z + T50;
+		    T6D = FNMS(KP382683432, T6C, KP923879532 * T6B);
+		    T73 = FMA(KP382683432, T6B, KP923879532 * T6C);
+	       }
+	       {
+		    E T6E, T6F, T1b, T1i;
+		    T6E = T56 + T57;
+		    T6F = T59 + T5a;
+		    T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);
+		    T74 = FMA(KP923879532, T6E, KP382683432 * T6F);
+		    T1b = Ti - Tl;
+		    T1i = T1e - T1h;
+		    T1j = T1b + T1i;
+		    T2d = T1i - T1b;
+	       }
+	       {
+		    E T1k, T1r, T51, T54;
+		    T1k = Tp - Ts;
+		    T1r = T1n - T1q;
+		    T1s = T1k - T1r;
+		    T2e = T1k + T1r;
+		    T51 = T4Z - T50;
+		    T54 = T52 + T53;
+		    T55 = FNMS(KP382683432, T54, KP923879532 * T51);
+		    T5z = FMA(KP923879532, T54, KP382683432 * T51);
+	       }
+	       {
+		    E T58, T5b, T3j, T3k;
+		    T58 = T56 - T57;
+		    T5b = T59 - T5a;
+		    T5c = FMA(KP923879532, T58, KP382683432 * T5b);
+		    T5A = FNMS(KP382683432, T58, KP923879532 * T5b);
+		    T3j = T1e + T1h;
+		    T3k = T1q + T1n;
+		    T3l = T3j - T3k;
+		    T48 = T3j + T3k;
+	       }
+	  }
+	  {
+	       E Ty, T4t, T1H, T4y, TB, T4x, T1K, T4u, TI, T4B, T1B, T4o, TF, T4A, T1y;
+	       E T4r;
+	       {
+		    E Tw, Tx, T1I, T1J;
+		    Tw = rio[WS(ios, 1)];
+		    Tx = iio[-WS(ios, 17)];
+		    Ty = Tw + Tx;
+		    T4t = Tw - Tx;
+		    {
+			 E T1F, T1G, Tz, TA;
+			 T1F = iio[-WS(ios, 1)];
+			 T1G = rio[WS(ios, 17)];
+			 T1H = T1F - T1G;
+			 T4y = T1F + T1G;
+			 Tz = rio[WS(ios, 9)];
+			 TA = iio[-WS(ios, 25)];
+			 TB = Tz + TA;
+			 T4x = Tz - TA;
+		    }
+		    T1I = iio[-WS(ios, 9)];
+		    T1J = rio[WS(ios, 25)];
+		    T1K = T1I - T1J;
+		    T4u = T1I + T1J;
+		    {
+			 E TG, TH, T4m, T1z, T1A, T4n;
+			 TG = iio[-WS(ios, 29)];
+			 TH = rio[WS(ios, 13)];
+			 T4m = TG - TH;
+			 T1z = iio[-WS(ios, 13)];
+			 T1A = rio[WS(ios, 29)];
+			 T4n = T1A + T1z;
+			 TI = TG + TH;
+			 T4B = T4m + T4n;
+			 T1B = T1z - T1A;
+			 T4o = T4m - T4n;
+		    }
+		    {
+			 E TD, TE, T4q, T1w, T1x, T4p;
+			 TD = rio[WS(ios, 5)];
+			 TE = iio[-WS(ios, 21)];
+			 T4q = TD - TE;
+			 T1w = iio[-WS(ios, 5)];
+			 T1x = rio[WS(ios, 21)];
+			 T4p = T1w + T1x;
+			 TF = TD + TE;
+			 T4A = T4q + T4p;
+			 T1y = T1w - T1x;
+			 T4r = T4p - T4q;
+		    }
+	       }
+	       {
+		    E TC, TJ, T6J, T6K;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    T3n = TC - TJ;
+		    T6J = T4y - T4x;
+		    T6K = KP707106781 * (T4r + T4o);
+		    T6L = T6J + T6K;
+		    T7t = T6J - T6K;
+	       }
+	       {
+		    E T6M, T6N, T1v, T1C;
+		    T6M = KP707106781 * (T4A + T4B);
+		    T6N = T4t + T4u;
+		    T6O = T6M + T6N;
+		    T7s = T6N - T6M;
+		    T1v = Ty - TB;
+		    T1C = T1y - T1B;
+		    T1D = T1v + T1C;
+		    T2L = T1v - T1C;
+	       }
+	       {
+		    E T1E, T1L, T4s, T4v;
+		    T1E = TI - TF;
+		    T1L = T1H - T1K;
+		    T1M = T1E + T1L;
+		    T2M = T1L - T1E;
+		    T4s = KP707106781 * (T4o - T4r);
+		    T4v = T4t - T4u;
+		    T4w = T4s + T4v;
+		    T62 = T4v - T4s;
+	       }
+	       {
+		    E T4z, T4C, T3o, T3p;
+		    T4z = T4x + T4y;
+		    T4C = KP707106781 * (T4A - T4B);
+		    T4D = T4z + T4C;
+		    T61 = T4z - T4C;
+		    T3o = T1H + T1K;
+		    T3p = T1y + T1B;
+		    T3q = T3o - T3p;
+		    T41 = T3p + T3o;
+	       }
+	  }
+	  {
+	       E TN, T4T, T20, T4N, TQ, T4M, T23, T4U, TX, T4Q, T1U, T4K, TU, T4R, T1R;
+	       E T4H;
+	       {
+		    E TL, TM, T21, T22;
+		    TL = iio[-WS(ios, 31)];
+		    TM = rio[WS(ios, 15)];
+		    TN = TL + TM;
+		    T4T = TL - TM;
+		    {
+			 E T1Y, T1Z, TO, TP;
+			 T1Y = iio[-WS(ios, 15)];
+			 T1Z = rio[WS(ios, 31)];
+			 T20 = T1Y - T1Z;
+			 T4N = T1Z + T1Y;
+			 TO = rio[WS(ios, 7)];
+			 TP = iio[-WS(ios, 23)];
+			 TQ = TO + TP;
+			 T4M = TO - TP;
+		    }
+		    T21 = iio[-WS(ios, 7)];
+		    T22 = rio[WS(ios, 23)];
+		    T23 = T21 - T22;
+		    T4U = T21 + T22;
+		    {
+			 E TV, TW, T4I, T1S, T1T, T4J;
+			 TV = iio[-WS(ios, 27)];
+			 TW = rio[WS(ios, 11)];
+			 T4I = TV - TW;
+			 T1S = iio[-WS(ios, 11)];
+			 T1T = rio[WS(ios, 27)];
+			 T4J = T1T + T1S;
+			 TX = TV + TW;
+			 T4Q = T4I - T4J;
+			 T1U = T1S - T1T;
+			 T4K = T4I + T4J;
+		    }
+		    {
+			 E TS, TT, T4F, T1P, T1Q, T4G;
+			 TS = rio[WS(ios, 3)];
+			 TT = iio[-WS(ios, 19)];
+			 T4F = TS - TT;
+			 T1P = iio[-WS(ios, 3)];
+			 T1Q = rio[WS(ios, 19)];
+			 T4G = T1P + T1Q;
+			 TU = TS + TT;
+			 T4R = T4G - T4F;
+			 T1R = T1P - T1Q;
+			 T4H = T4F + T4G;
+		    }
+	       }
+	       {
+		    E TR, TY, T6Q, T6R;
+		    TR = TN + TQ;
+		    TY = TU + TX;
+		    TZ = TR + TY;
+		    T3s = TR - TY;
+		    T6Q = KP707106781 * (T4R + T4Q);
+		    T6R = T4M + T4N;
+		    T6S = T6Q - T6R;
+		    T7w = T6Q + T6R;
+	       }
+	       {
+		    E T6T, T6U, T1O, T1V;
+		    T6T = KP707106781 * (T4H + T4K);
+		    T6U = T4T + T4U;
+		    T6V = T6T + T6U;
+		    T7v = T6U - T6T;
+		    T1O = TN - TQ;
+		    T1V = T1R - T1U;
+		    T1W = T1O + T1V;
+		    T2O = T1O - T1V;
+	       }
+	       {
+		    E T1X, T24, T4L, T4O;
+		    T1X = TX - TU;
+		    T24 = T20 - T23;
+		    T25 = T1X + T24;
+		    T2P = T24 - T1X;
+		    T4L = KP707106781 * (T4H - T4K);
+		    T4O = T4M - T4N;
+		    T4P = T4L + T4O;
+		    T64 = T4O - T4L;
+	       }
+	       {
+		    E T4S, T4V, T3t, T3u;
+		    T4S = KP707106781 * (T4Q - T4R);
+		    T4V = T4T - T4U;
+		    T4W = T4S + T4V;
+		    T65 = T4V - T4S;
+		    T3t = T20 + T23;
+		    T3u = T1R + T1U;
+		    T3v = T3t - T3u;
+		    T42 = T3u + T3t;
+	       }
+	  }
+	  {
+	       E Tv, T10, T4g, T4i, T4j, T4k, T4f, T4h;
+	       Tv = Tf + Tu;
+	       T10 = TK + TZ;
+	       T4g = Tv - T10;
+	       T4i = T48 + T47;
+	       T4j = T41 + T42;
+	       T4k = T4i - T4j;
+	       rio[0] = Tv + T10;
+	       iio[-WS(ios, 31)] = T4j + T4i;
+	       T4f = W[30];
+	       T4h = W[31];
+	       rio[WS(ios, 16)] = FNMS(T4h, T4k, T4f * T4g);
+	       iio[-WS(ios, 15)] = FMA(T4h, T4g, T4f * T4k);
+	  }
+	  {
+	       E T44, T4c, T4a, T4e;
+	       {
+		    E T40, T43, T46, T49;
+		    T40 = Tf - Tu;
+		    T43 = T41 - T42;
+		    T44 = T40 + T43;
+		    T4c = T40 - T43;
+		    T46 = TZ - TK;
+		    T49 = T47 - T48;
+		    T4a = T46 + T49;
+		    T4e = T49 - T46;
+	       }
+	       {
+		    E T3Z, T45, T4b, T4d;
+		    T3Z = W[46];
+		    T45 = W[47];
+		    rio[WS(ios, 24)] = FNMS(T45, T4a, T3Z * T44);
+		    iio[-WS(ios, 7)] = FMA(T45, T44, T3Z * T4a);
+		    T4b = W[14];
+		    T4d = W[15];
+		    rio[WS(ios, 8)] = FNMS(T4d, T4e, T4b * T4c);
+		    iio[-WS(ios, 23)] = FMA(T4d, T4c, T4b * T4e);
+	       }
+	  }
+	  {
+	       E T3m, T3H, T3T, T3O, T3C, T3P, T3x, T3S;
+	       T3m = T3i + T3l;
+	       T3H = T3D + T3G;
+	       T3T = T3G - T3D;
+	       T3O = T3i - T3l;
+	       {
+		    E T3A, T3B, T3r, T3w;
+		    T3A = T3q - T3n;
+		    T3B = T3s + T3v;
+		    T3C = KP707106781 * (T3A + T3B);
+		    T3P = KP707106781 * (T3B - T3A);
+		    T3r = T3n + T3q;
+		    T3w = T3s - T3v;
+		    T3x = KP707106781 * (T3r + T3w);
+		    T3S = KP707106781 * (T3r - T3w);
+	       }
+	       {
+		    E T3y, T3I, T3h, T3z;
+		    T3y = T3m + T3x;
+		    T3I = T3C + T3H;
+		    T3h = W[54];
+		    T3z = W[55];
+		    rio[WS(ios, 28)] = FNMS(T3z, T3I, T3h * T3y);
+		    iio[-WS(ios, 3)] = FMA(T3z, T3y, T3h * T3I);
+	       }
+	       {
+		    E T3W, T3Y, T3V, T3X;
+		    T3W = T3O - T3P;
+		    T3Y = T3T - T3S;
+		    T3V = W[38];
+		    T3X = W[39];
+		    rio[WS(ios, 20)] = FNMS(T3X, T3Y, T3V * T3W);
+		    iio[-WS(ios, 11)] = FMA(T3X, T3W, T3V * T3Y);
+	       }
+	       {
+		    E T3K, T3M, T3J, T3L;
+		    T3K = T3m - T3x;
+		    T3M = T3H - T3C;
+		    T3J = W[22];
+		    T3L = W[23];
+		    rio[WS(ios, 12)] = FNMS(T3L, T3M, T3J * T3K);
+		    iio[-WS(ios, 19)] = FMA(T3L, T3K, T3J * T3M);
+	       }
+	       {
+		    E T3Q, T3U, T3N, T3R;
+		    T3Q = T3O + T3P;
+		    T3U = T3S + T3T;
+		    T3N = W[6];
+		    T3R = W[7];
+		    rio[WS(ios, 4)] = FNMS(T3R, T3U, T3N * T3Q);
+		    iio[-WS(ios, 27)] = FMA(T3R, T3Q, T3N * T3U);
+	       }
+	  }
+	  {
+	       E T2K, T36, T2Z, T3b, T2R, T3a, T2W, T37, T2J, T2X;
+	       T2J = KP707106781 * (T2e - T2d);
+	       T2K = T2I + T2J;
+	       T36 = T2I - T2J;
+	       T2X = KP707106781 * (T1j - T1s);
+	       T2Z = T2X + T2Y;
+	       T3b = T2Y - T2X;
+	       {
+		    E T2N, T2Q, T2U, T2V;
+		    T2N = FNMS(KP382683432, T2M, KP923879532 * T2L);
+		    T2Q = FMA(KP923879532, T2O, KP382683432 * T2P);
+		    T2R = T2N + T2Q;
+		    T3a = T2Q - T2N;
+		    T2U = FMA(KP382683432, T2L, KP923879532 * T2M);
+		    T2V = FNMS(KP382683432, T2O, KP923879532 * T2P);
+		    T2W = T2U + T2V;
+		    T37 = T2U - T2V;
+	       }
+	       {
+		    E T2S, T30, T2H, T2T;
+		    T2S = T2K + T2R;
+		    T30 = T2W + T2Z;
+		    T2H = W[2];
+		    T2T = W[3];
+		    rio[WS(ios, 2)] = FNMS(T2T, T30, T2H * T2S);
+		    iio[-WS(ios, 29)] = FMA(T2T, T2S, T2H * T30);
+	       }
+	       {
+		    E T3e, T3g, T3d, T3f;
+		    T3e = T36 - T37;
+		    T3g = T3b - T3a;
+		    T3d = W[18];
+		    T3f = W[19];
+		    rio[WS(ios, 10)] = FNMS(T3f, T3g, T3d * T3e);
+		    iio[-WS(ios, 21)] = FMA(T3f, T3e, T3d * T3g);
+	       }
+	       {
+		    E T32, T34, T31, T33;
+		    T32 = T2K - T2R;
+		    T34 = T2Z - T2W;
+		    T31 = W[34];
+		    T33 = W[35];
+		    rio[WS(ios, 18)] = FNMS(T33, T34, T31 * T32);
+		    iio[-WS(ios, 13)] = FMA(T33, T32, T31 * T34);
+	       }
+	       {
+		    E T38, T3c, T35, T39;
+		    T38 = T36 + T37;
+		    T3c = T3a + T3b;
+		    T35 = W[50];
+		    T39 = W[51];
+		    rio[WS(ios, 26)] = FNMS(T39, T3c, T35 * T38);
+		    iio[-WS(ios, 5)] = FMA(T39, T38, T35 * T3c);
+	       }
+	  }
+	  {
+	       E T1u, T2w, T2p, T2B, T27, T2A, T2c, T2x, T1t, T2f;
+	       T1t = KP707106781 * (T1j + T1s);
+	       T1u = T1a + T1t;
+	       T2w = T1a - T1t;
+	       T2f = KP707106781 * (T2d + T2e);
+	       T2p = T2f + T2o;
+	       T2B = T2o - T2f;
+	       {
+		    E T1N, T26, T2a, T2b;
+		    T1N = FMA(KP923879532, T1D, KP382683432 * T1M);
+		    T26 = FNMS(KP382683432, T25, KP923879532 * T1W);
+		    T27 = T1N + T26;
+		    T2A = T1N - T26;
+		    T2a = FNMS(KP382683432, T1D, KP923879532 * T1M);
+		    T2b = FMA(KP382683432, T1W, KP923879532 * T25);
+		    T2c = T2a + T2b;
+		    T2x = T2b - T2a;
+	       }
+	       {
+		    E T28, T2q, T11, T29;
+		    T28 = T1u + T27;
+		    T2q = T2c + T2p;
+		    T11 = W[58];
+		    T29 = W[59];
+		    rio[WS(ios, 30)] = FNMS(T29, T2q, T11 * T28);
+		    iio[-WS(ios, 1)] = FMA(T29, T28, T11 * T2q);
+	       }
+	       {
+		    E T2E, T2G, T2D, T2F;
+		    T2E = T2w - T2x;
+		    T2G = T2B - T2A;
+		    T2D = W[42];
+		    T2F = W[43];
+		    rio[WS(ios, 22)] = FNMS(T2F, T2G, T2D * T2E);
+		    iio[-WS(ios, 9)] = FMA(T2F, T2E, T2D * T2G);
+	       }
+	       {
+		    E T2s, T2u, T2r, T2t;
+		    T2s = T1u - T27;
+		    T2u = T2p - T2c;
+		    T2r = W[26];
+		    T2t = W[27];
+		    rio[WS(ios, 14)] = FNMS(T2t, T2u, T2r * T2s);
+		    iio[-WS(ios, 17)] = FMA(T2t, T2s, T2r * T2u);
+	       }
+	       {
+		    E T2y, T2C, T2v, T2z;
+		    T2y = T2w + T2x;
+		    T2C = T2A + T2B;
+		    T2v = W[10];
+		    T2z = W[11];
+		    rio[WS(ios, 6)] = FNMS(T2z, T2C, T2v * T2y);
+		    iio[-WS(ios, 25)] = FMA(T2z, T2y, T2v * T2C);
+	       }
+	  }
+	  {
+	       E T4Y, T5N, T5F, T5Q, T5p, T5R, T5C, T5M;
+	       {
+		    E T4E, T4X, T5D, T5E;
+		    T4E = FNMS(KP195090322, T4D, KP980785280 * T4w);
+		    T4X = FMA(KP195090322, T4P, KP980785280 * T4W);
+		    T4Y = T4E + T4X;
+		    T5N = T4X - T4E;
+		    T5D = FMA(KP980785280, T4D, KP195090322 * T4w);
+		    T5E = FNMS(KP195090322, T4W, KP980785280 * T4P);
+		    T5F = T5D + T5E;
+		    T5Q = T5D - T5E;
+	       }
+	       {
+		    E T5d, T5o, T5y, T5B;
+		    T5d = T55 + T5c;
+		    T5o = T5k + T5n;
+		    T5p = T5d + T5o;
+		    T5R = T5o - T5d;
+		    T5y = T5u + T5x;
+		    T5B = T5z + T5A;
+		    T5C = T5y + T5B;
+		    T5M = T5y - T5B;
+	       }
+	       {
+		    E T5q, T5G, T4l, T5r;
+		    T5q = T4Y + T5p;
+		    T5G = T5C + T5F;
+		    T4l = W[0];
+		    T5r = W[1];
+		    rio[WS(ios, 1)] = FNMS(T5r, T5G, T4l * T5q);
+		    iio[-WS(ios, 30)] = FMA(T4l, T5G, T5r * T5q);
+	       }
+	       {
+		    E T5U, T5W, T5T, T5V;
+		    T5U = T5R - T5Q;
+		    T5W = T5M - T5N;
+		    T5T = W[16];
+		    T5V = W[17];
+		    rio[WS(ios, 9)] = FNMS(T5V, T5W, T5T * T5U);
+		    iio[-WS(ios, 22)] = FMA(T5T, T5W, T5V * T5U);
+	       }
+	       {
+		    E T5I, T5K, T5H, T5J;
+		    T5I = T5C - T5F;
+		    T5K = T5p - T4Y;
+		    T5H = W[32];
+		    T5J = W[33];
+		    iio[-WS(ios, 14)] = FMA(T5H, T5I, T5J * T5K);
+		    rio[WS(ios, 17)] = FNMS(T5J, T5I, T5H * T5K);
+	       }
+	       {
+		    E T5O, T5S, T5L, T5P;
+		    T5O = T5M + T5N;
+		    T5S = T5Q + T5R;
+		    T5L = W[48];
+		    T5P = W[49];
+		    iio[-WS(ios, 6)] = FMA(T5L, T5O, T5P * T5S);
+		    rio[WS(ios, 25)] = FNMS(T5P, T5O, T5L * T5S);
+	       }
+	  }
+	  {
+	       E T60, T6q, T6f, T6n, T67, T6m, T6c, T6r;
+	       {
+		    E T5Y, T5Z, T6d, T6e;
+		    T5Y = T5u - T5x;
+		    T5Z = T5c - T55;
+		    T60 = T5Y + T5Z;
+		    T6q = T5Y - T5Z;
+		    T6d = T5z - T5A;
+		    T6e = T5n - T5k;
+		    T6f = T6d + T6e;
+		    T6n = T6e - T6d;
+	       }
+	       {
+		    E T63, T66, T6a, T6b;
+		    T63 = FNMS(KP555570233, T62, KP831469612 * T61);
+		    T66 = FMA(KP831469612, T64, KP555570233 * T65);
+		    T67 = T63 + T66;
+		    T6m = T66 - T63;
+		    T6a = FMA(KP555570233, T61, KP831469612 * T62);
+		    T6b = FNMS(KP555570233, T64, KP831469612 * T65);
+		    T6c = T6a + T6b;
+		    T6r = T6a - T6b;
+	       }
+	       {
+		    E T68, T6g, T5X, T69;
+		    T68 = T60 + T67;
+		    T6g = T6c + T6f;
+		    T5X = W[56];
+		    T69 = W[57];
+		    iio[-WS(ios, 2)] = FMA(T5X, T68, T69 * T6g);
+		    rio[WS(ios, 29)] = FNMS(T69, T68, T5X * T6g);
+	       }
+	       {
+		    E T6u, T6w, T6t, T6v;
+		    T6u = T6q - T6r;
+		    T6w = T6n - T6m;
+		    T6t = W[40];
+		    T6v = W[41];
+		    iio[-WS(ios, 10)] = FMA(T6t, T6u, T6v * T6w);
+		    rio[WS(ios, 21)] = FNMS(T6v, T6u, T6t * T6w);
+	       }
+	       {
+		    E T6i, T6k, T6h, T6j;
+		    T6i = T6f - T6c;
+		    T6k = T60 - T67;
+		    T6h = W[24];
+		    T6j = W[25];
+		    rio[WS(ios, 13)] = FNMS(T6j, T6k, T6h * T6i);
+		    iio[-WS(ios, 18)] = FMA(T6h, T6k, T6j * T6i);
+	       }
+	       {
+		    E T6o, T6s, T6l, T6p;
+		    T6o = T6m + T6n;
+		    T6s = T6q + T6r;
+		    T6l = W[8];
+		    T6p = W[9];
+		    rio[WS(ios, 5)] = FNMS(T6p, T6s, T6l * T6o);
+		    iio[-WS(ios, 26)] = FMA(T6l, T6s, T6p * T6o);
+	       }
+	  }
+	  {
+	       E T7y, T7R, T7J, T7U, T7B, T7V, T7G, T7Q;
+	       {
+		    E T7u, T7x, T7H, T7I;
+		    T7u = FNMS(KP555570233, T7t, KP831469612 * T7s);
+		    T7x = FNMS(KP555570233, T7w, KP831469612 * T7v);
+		    T7y = T7u + T7x;
+		    T7R = T7x - T7u;
+		    T7H = FMA(KP831469612, T7t, KP555570233 * T7s);
+		    T7I = FMA(KP831469612, T7w, KP555570233 * T7v);
+		    T7J = T7H - T7I;
+		    T7U = T7H + T7I;
+	       }
+	       {
+		    E T7z, T7A, T7E, T7F;
+		    T7z = T6G - T6D;
+		    T7A = T77 - T76;
+		    T7B = T7z + T7A;
+		    T7V = T7A - T7z;
+		    T7E = T6y - T6z;
+		    T7F = T73 - T74;
+		    T7G = T7E + T7F;
+		    T7Q = T7E - T7F;
+	       }
+	       {
+		    E T7C, T7K, T7r, T7D;
+		    T7C = T7y + T7B;
+		    T7K = T7G + T7J;
+		    T7r = W[4];
+		    T7D = W[5];
+		    rio[WS(ios, 3)] = FNMS(T7D, T7K, T7r * T7C);
+		    iio[-WS(ios, 28)] = FMA(T7r, T7K, T7D * T7C);
+	       }
+	       {
+		    E T7Y, T80, T7X, T7Z;
+		    T7Y = T7V - T7U;
+		    T80 = T7Q - T7R;
+		    T7X = W[20];
+		    T7Z = W[21];
+		    rio[WS(ios, 11)] = FNMS(T7Z, T80, T7X * T7Y);
+		    iio[-WS(ios, 20)] = FMA(T7X, T80, T7Z * T7Y);
+	       }
+	       {
+		    E T7M, T7O, T7L, T7N;
+		    T7M = T7G - T7J;
+		    T7O = T7B - T7y;
+		    T7L = W[36];
+		    T7N = W[37];
+		    iio[-WS(ios, 12)] = FMA(T7L, T7M, T7N * T7O);
+		    rio[WS(ios, 19)] = FNMS(T7N, T7M, T7L * T7O);
+	       }
+	       {
+		    E T7S, T7W, T7P, T7T;
+		    T7S = T7Q + T7R;
+		    T7W = T7U + T7V;
+		    T7P = W[52];
+		    T7T = W[53];
+		    iio[-WS(ios, 4)] = FMA(T7P, T7S, T7T * T7W);
+		    rio[WS(ios, 27)] = FNMS(T7T, T7S, T7P * T7W);
+	       }
+	  }
+	  {
+	       E T6I, T7k, T79, T7h, T6X, T7g, T72, T7l;
+	       {
+		    E T6A, T6H, T75, T78;
+		    T6A = T6y + T6z;
+		    T6H = T6D + T6G;
+		    T6I = T6A + T6H;
+		    T7k = T6A - T6H;
+		    T75 = T73 + T74;
+		    T78 = T76 + T77;
+		    T79 = T75 + T78;
+		    T7h = T78 - T75;
+	       }
+	       {
+		    E T6P, T6W, T70, T71;
+		    T6P = FNMS(KP195090322, T6O, KP980785280 * T6L);
+		    T6W = FMA(KP980785280, T6S, KP195090322 * T6V);
+		    T6X = T6P + T6W;
+		    T7g = T6W - T6P;
+		    T70 = FMA(KP195090322, T6L, KP980785280 * T6O);
+		    T71 = FNMS(KP195090322, T6S, KP980785280 * T6V);
+		    T72 = T70 + T71;
+		    T7l = T70 - T71;
+	       }
+	       {
+		    E T6Y, T7a, T6x, T6Z;
+		    T6Y = T6I + T6X;
+		    T7a = T72 + T79;
+		    T6x = W[60];
+		    T6Z = W[61];
+		    iio[0] = FMA(T6x, T6Y, T6Z * T7a);
+		    rio[WS(ios, 31)] = FNMS(T6Z, T6Y, T6x * T7a);
+	       }
+	       {
+		    E T7o, T7q, T7n, T7p;
+		    T7o = T7k - T7l;
+		    T7q = T7h - T7g;
+		    T7n = W[44];
+		    T7p = W[45];
+		    iio[-WS(ios, 8)] = FMA(T7n, T7o, T7p * T7q);
+		    rio[WS(ios, 23)] = FNMS(T7p, T7o, T7n * T7q);
+	       }
+	       {
+		    E T7c, T7e, T7b, T7d;
+		    T7c = T79 - T72;
+		    T7e = T6I - T6X;
+		    T7b = W[28];
+		    T7d = W[29];
+		    rio[WS(ios, 15)] = FNMS(T7d, T7e, T7b * T7c);
+		    iio[-WS(ios, 16)] = FMA(T7b, T7e, T7d * T7c);
+	       }
+	       {
+		    E T7i, T7m, T7f, T7j;
+		    T7i = T7g + T7h;
+		    T7m = T7k + T7l;
+		    T7f = W[12];
+		    T7j = W[13];
+		    rio[WS(ios, 7)] = FNMS(T7j, T7m, T7f * T7i);
+		    iio[-WS(ios, 24)] = FMA(T7f, T7m, T7j * T7i);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 32},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 32, "hb_32", twinstr, {340, 114, 94, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_32) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_4.c b/src/fftw3/rdft/codelets/hc2r/hb_4.c
new file mode 100644
index 0000000..ac05b69
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_4.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:22 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 4 -dif -name hb_4 -include hb.h */
+
+/*
+ * This function contains 22 FP additions, 12 FP multiplications,
+ * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
+ * 13 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_4(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 6) {
+	  E T3, Ti, Tc, Tn, T6, Tm, Tf, Tj;
+	  {
+	       E T1, T2, Ta, Tb;
+	       T1 = rio[0];
+	       T2 = iio[-WS(ios, 2)];
+	       T3 = T1 + T2;
+	       Ti = T1 - T2;
+	       Ta = iio[0];
+	       Tb = rio[WS(ios, 2)];
+	       Tc = Ta - Tb;
+	       Tn = Ta + Tb;
+	  }
+	  {
+	       E T4, T5, Td, Te;
+	       T4 = rio[WS(ios, 1)];
+	       T5 = iio[-WS(ios, 3)];
+	       T6 = T4 + T5;
+	       Tm = T4 - T5;
+	       Td = iio[-WS(ios, 1)];
+	       Te = rio[WS(ios, 3)];
+	       Tf = Td - Te;
+	       Tj = Td + Te;
+	  }
+	  rio[0] = T3 + T6;
+	  iio[-WS(ios, 3)] = Tc + Tf;
+	  {
+	       E Tq, Ts, Tp, Tr;
+	       Tq = Tn - Tm;
+	       Ts = Ti + Tj;
+	       Tp = W[4];
+	       Tr = W[5];
+	       iio[0] = FMA(Tp, Tq, Tr * Ts);
+	       rio[WS(ios, 3)] = FNMS(Tr, Tq, Tp * Ts);
+	  }
+	  {
+	       E T8, Tg, T7, T9;
+	       T8 = T3 - T6;
+	       Tg = Tc - Tf;
+	       T7 = W[2];
+	       T9 = W[3];
+	       rio[WS(ios, 2)] = FNMS(T9, Tg, T7 * T8);
+	       iio[-WS(ios, 1)] = FMA(T9, T8, T7 * Tg);
+	  }
+	  {
+	       E Tk, To, Th, Tl;
+	       Tk = Ti - Tj;
+	       To = Tm + Tn;
+	       Th = W[0];
+	       Tl = W[1];
+	       rio[WS(ios, 1)] = FNMS(Tl, To, Th * Tk);
+	       iio[-WS(ios, 2)] = FMA(Th, To, Tl * Tk);
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 4},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 4, "hb_4", twinstr, {16, 6, 6, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_4) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_5.c b/src/fftw3/rdft/codelets/hc2r/hb_5.c
new file mode 100644
index 0000000..e0af24d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_5.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:23 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 5 -dif -name hb_5 -include hb.h */
+
+/*
+ * This function contains 40 FP additions, 28 FP multiplications,
+ * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
+ * 27 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_5(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8) {
+	  E T1, Tj, TG, Ts, T8, Ti, T9, Tn, TD, Tu, Tg, Tt;
+	  {
+	       E T7, Tr, T4, Tq;
+	       T1 = rio[0];
+	       {
+		    E T5, T6, T2, T3;
+		    T5 = rio[WS(ios, 2)];
+		    T6 = iio[-WS(ios, 3)];
+		    T7 = T5 + T6;
+		    Tr = T5 - T6;
+		    T2 = rio[WS(ios, 1)];
+		    T3 = iio[-WS(ios, 4)];
+		    T4 = T2 + T3;
+		    Tq = T2 - T3;
+	       }
+	       Tj = KP559016994 * (T4 - T7);
+	       TG = FMA(KP951056516, Tq, KP587785252 * Tr);
+	       Ts = FNMS(KP951056516, Tr, KP587785252 * Tq);
+	       T8 = T4 + T7;
+	       Ti = FNMS(KP250000000, T8, T1);
+	  }
+	  {
+	       E Tf, Tm, Tc, Tl;
+	       T9 = iio[0];
+	       {
+		    E Td, Te, Ta, Tb;
+		    Td = iio[-WS(ios, 2)];
+		    Te = rio[WS(ios, 3)];
+		    Tf = Td - Te;
+		    Tm = Td + Te;
+		    Ta = iio[-WS(ios, 1)];
+		    Tb = rio[WS(ios, 4)];
+		    Tc = Ta - Tb;
+		    Tl = Ta + Tb;
+	       }
+	       Tn = FNMS(KP951056516, Tm, KP587785252 * Tl);
+	       TD = FMA(KP951056516, Tl, KP587785252 * Tm);
+	       Tu = KP559016994 * (Tc - Tf);
+	       Tg = Tc + Tf;
+	       Tt = FNMS(KP250000000, Tg, T9);
+	  }
+	  rio[0] = T1 + T8;
+	  iio[-WS(ios, 4)] = T9 + Tg;
+	  {
+	       E TE, TM, TI, TK, TC, TH;
+	       TC = Tj + Ti;
+	       TE = TC - TD;
+	       TM = TC + TD;
+	       TH = Tu + Tt;
+	       TI = TG + TH;
+	       TK = TH - TG;
+	       {
+		    E TB, TF, TJ, TL;
+		    TB = W[0];
+		    TF = W[1];
+		    rio[WS(ios, 1)] = FNMS(TF, TI, TB * TE);
+		    iio[-WS(ios, 3)] = FMA(TB, TI, TF * TE);
+		    TJ = W[6];
+		    TL = W[7];
+		    iio[0] = FMA(TJ, TK, TL * TM);
+		    rio[WS(ios, 4)] = FNMS(TL, TK, TJ * TM);
+	       }
+	  }
+	  {
+	       E To, TA, Tw, Ty, Tk, Tv;
+	       Tk = Ti - Tj;
+	       To = Tk - Tn;
+	       TA = Tk + Tn;
+	       Tv = Tt - Tu;
+	       Tw = Ts + Tv;
+	       Ty = Tv - Ts;
+	       {
+		    E Th, Tp, Tx, Tz;
+		    Th = W[2];
+		    Tp = W[3];
+		    rio[WS(ios, 2)] = FNMS(Tp, Tw, Th * To);
+		    iio[-WS(ios, 2)] = FMA(Th, Tw, Tp * To);
+		    Tx = W[4];
+		    Tz = W[5];
+		    iio[-WS(ios, 1)] = FMA(Tx, Ty, Tz * TA);
+		    rio[WS(ios, 3)] = FNMS(Tz, Ty, Tx * TA);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 5},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 5, "hb_5", twinstr, {26, 14, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_5) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_5, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_6.c b/src/fftw3/rdft/codelets/hc2r/hb_6.c
new file mode 100644
index 0000000..1a24696
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_6.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:25 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 6 -dif -name hb_6 -include hb.h */
+
+/*
+ * This function contains 46 FP additions, 28 FP multiplications,
+ * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
+ * 25 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_6(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 10) {
+	  E T3, Ty, Tp, TE, Ta, TO, Tm, TB, Tj, TL, Tq, TH;
+	  {
+	       E T1, T2, Tn, To;
+	       T1 = rio[0];
+	       T2 = iio[-WS(ios, 3)];
+	       T3 = T1 + T2;
+	       Ty = T1 - T2;
+	       Tn = iio[0];
+	       To = rio[WS(ios, 3)];
+	       Tp = Tn - To;
+	       TE = Tn + To;
+	  }
+	  {
+	       E T6, Tz, T9, TA;
+	       {
+		    E T4, T5, T7, T8;
+		    T4 = rio[WS(ios, 2)];
+		    T5 = iio[-WS(ios, 5)];
+		    T6 = T4 + T5;
+		    Tz = T4 - T5;
+		    T7 = iio[-WS(ios, 4)];
+		    T8 = rio[WS(ios, 1)];
+		    T9 = T7 + T8;
+		    TA = T7 - T8;
+	       }
+	       Ta = T6 + T9;
+	       TO = KP866025403 * (Tz - TA);
+	       Tm = KP866025403 * (T6 - T9);
+	       TB = Tz + TA;
+	  }
+	  {
+	       E Tf, TF, Ti, TG;
+	       {
+		    E Td, Te, Tg, Th;
+		    Td = iio[-WS(ios, 1)];
+		    Te = rio[WS(ios, 4)];
+		    Tf = Td - Te;
+		    TF = Te + Td;
+		    Tg = iio[-WS(ios, 2)];
+		    Th = rio[WS(ios, 5)];
+		    Ti = Tg - Th;
+		    TG = Tg + Th;
+	       }
+	       Tj = KP866025403 * (Tf - Ti);
+	       TL = KP866025403 * (TF + TG);
+	       Tq = Tf + Ti;
+	       TH = TF - TG;
+	  }
+	  rio[0] = T3 + Ta;
+	  iio[-WS(ios, 5)] = Tp + Tq;
+	  {
+	       E TC, TI, Tx, TD;
+	       TC = Ty + TB;
+	       TI = TE - TH;
+	       Tx = W[4];
+	       TD = W[5];
+	       rio[WS(ios, 3)] = FNMS(TD, TI, Tx * TC);
+	       iio[-WS(ios, 2)] = FMA(TD, TC, Tx * TI);
+	  }
+	  {
+	       E Tk, Tu, Ts, Tw, Tc, Tr;
+	       Tc = FNMS(KP500000000, Ta, T3);
+	       Tk = Tc + Tj;
+	       Tu = Tc - Tj;
+	       Tr = FNMS(KP500000000, Tq, Tp);
+	       Ts = Tm + Tr;
+	       Tw = Tr - Tm;
+	       {
+		    E Tb, Tl, Tt, Tv;
+		    Tb = W[6];
+		    Tl = W[7];
+		    rio[WS(ios, 4)] = FNMS(Tl, Ts, Tb * Tk);
+		    iio[-WS(ios, 1)] = FMA(Tl, Tk, Tb * Ts);
+		    Tt = W[2];
+		    Tv = W[3];
+		    rio[WS(ios, 2)] = FNMS(Tv, Tw, Tt * Tu);
+		    iio[-WS(ios, 3)] = FMA(Tv, Tu, Tt * Tw);
+	       }
+	  }
+	  {
+	       E TM, TU, TQ, TS, TK, TP;
+	       TK = FNMS(KP500000000, TB, Ty);
+	       TM = TK - TL;
+	       TU = TK + TL;
+	       TP = FMA(KP500000000, TH, TE);
+	       TQ = TO + TP;
+	       TS = TP - TO;
+	       {
+		    E TJ, TN, TR, TT;
+		    TJ = W[0];
+		    TN = W[1];
+		    rio[WS(ios, 1)] = FNMS(TN, TQ, TJ * TM);
+		    iio[-WS(ios, 4)] = FMA(TJ, TQ, TN * TM);
+		    TR = W[8];
+		    TT = W[9];
+		    iio[0] = FMA(TR, TS, TT * TU);
+		    rio[WS(ios, 5)] = FNMS(TT, TS, TR * TU);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 6},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 6, "hb_6", twinstr, {32, 14, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_6) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_6, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_64.c b/src/fftw3/rdft/codelets/hc2r/hb_64.c
new file mode 100644
index 0000000..d3f9afc
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_64.c
@@ -0,0 +1,1972 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:42 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h */
+
+/*
+ * This function contains 1038 FP additions, 500 FP multiplications,
+ * (or, 808 additions, 270 multiplications, 230 fused multiply/add),
+ * 196 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_64(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126) {
+	  E Tf, T7i, Tfa, ThM, Tgp, ThH, T2c, T5O, T4T, T6n, Tcp, Ted, TcA, TdE, T87;
+	  E T9o, TK, T93, T2P, T4F, Tfo, Thz, T5T, T6j, Tbx, TdI, Tfl, ThA, T7r, T81;
+	  E TbE, TdH, TZ, T94, T38, T4G, Tfv, ThC, T5W, T6k, TbQ, TdK, Tfs, ThD, T7w;
+	  E T82, TbX, TdL, Tu, T84, Tfh, ThG, Tgm, ThN, T2v, T6m, T4K, T5P, Tce, TdF;
+	  E TcD, Tec, T7l, T9p, T1L, T20, T9c, T9d, T9e, T9f, T40, T66, Tg1, Thu, Tg8;
+	  E Thv, Tg5, Thr, T4n, T67, T4j, T69, T4w, T6a, TaT, TdW, Tb8, TdZ, TfU, Ths;
+	  E T7O, T8y, T7T, T8z, Tbc, TdX, Tbj, Te0, T1g, T1v, T97, T98, T99, T9a, T3j;
+	  E T5Z, TfI, Thk, TfP, Thl, TfM, Tho, T3G, T60, T3C, T62, T3P, T63, Tak, TdQ;
+	  E Tav, TdT, TfB, Thn, T7D, T8v, T7I, T8w, TaD, TdP, TaG, TdS;
+	  {
+	       E T3, Tcm, T4O, Tcv, T6, Tcu, T4R, Tcn, Td, Tcy, T2a, Tch, Ta, Tcx, T27;
+	       E Tck;
+	       {
+		    E T1, T2, T4P, T4Q;
+		    T1 = rio[0];
+		    T2 = iio[-WS(ios, 32)];
+		    T3 = T1 + T2;
+		    Tcm = T1 - T2;
+		    {
+			 E T4M, T4N, T4, T5;
+			 T4M = iio[0];
+			 T4N = rio[WS(ios, 32)];
+			 T4O = T4M - T4N;
+			 Tcv = T4M + T4N;
+			 T4 = rio[WS(ios, 16)];
+			 T5 = iio[-WS(ios, 48)];
+			 T6 = T4 + T5;
+			 Tcu = T4 - T5;
+		    }
+		    T4P = iio[-WS(ios, 16)];
+		    T4Q = rio[WS(ios, 48)];
+		    T4R = T4P - T4Q;
+		    Tcn = T4P + T4Q;
+		    {
+			 E Tb, Tc, Tcf, T28, T29, Tcg;
+			 Tb = iio[-WS(ios, 56)];
+			 Tc = rio[WS(ios, 24)];
+			 Tcf = Tb - Tc;
+			 T28 = iio[-WS(ios, 24)];
+			 T29 = rio[WS(ios, 56)];
+			 Tcg = T29 + T28;
+			 Td = Tb + Tc;
+			 Tcy = Tcf + Tcg;
+			 T2a = T28 - T29;
+			 Tch = Tcf - Tcg;
+		    }
+		    {
+			 E T8, T9, Tcj, T25, T26, Tci;
+			 T8 = rio[WS(ios, 8)];
+			 T9 = iio[-WS(ios, 40)];
+			 Tcj = T8 - T9;
+			 T25 = iio[-WS(ios, 8)];
+			 T26 = rio[WS(ios, 40)];
+			 Tci = T25 + T26;
+			 Ta = T8 + T9;
+			 Tcx = Tcj + Tci;
+			 T27 = T25 - T26;
+			 Tck = Tci - Tcj;
+		    }
+	       }
+	       {
+		    E T7, Te, Tf8, Tf9;
+		    T7 = T3 + T6;
+		    Te = Ta + Td;
+		    Tf = T7 + Te;
+		    T7i = T7 - Te;
+		    Tf8 = Tcv - Tcu;
+		    Tf9 = KP707106781 * (Tck + Tch);
+		    Tfa = Tf8 + Tf9;
+		    ThM = Tf8 - Tf9;
+	       }
+	       {
+		    E Tgn, Tgo, T24, T2b;
+		    Tgn = KP707106781 * (Tcx + Tcy);
+		    Tgo = Tcm + Tcn;
+		    Tgp = Tgn + Tgo;
+		    ThH = Tgo - Tgn;
+		    T24 = T3 - T6;
+		    T2b = T27 - T2a;
+		    T2c = T24 + T2b;
+		    T5O = T24 - T2b;
+	       }
+	       {
+		    E T4L, T4S, Tcl, Tco;
+		    T4L = Td - Ta;
+		    T4S = T4O - T4R;
+		    T4T = T4L + T4S;
+		    T6n = T4S - T4L;
+		    Tcl = KP707106781 * (Tch - Tck);
+		    Tco = Tcm - Tcn;
+		    Tcp = Tcl + Tco;
+		    Ted = Tco - Tcl;
+	       }
+	       {
+		    E Tcw, Tcz, T85, T86;
+		    Tcw = Tcu + Tcv;
+		    Tcz = KP707106781 * (Tcx - Tcy);
+		    TcA = Tcw + Tcz;
+		    TdE = Tcw - Tcz;
+		    T85 = T4O + T4R;
+		    T86 = T27 + T2a;
+		    T87 = T85 - T86;
+		    T9o = T86 + T85;
+	       }
+	  }
+	  {
+	       E TC, Tby, T2x, Tbu, T2N, Tbz, T7o, Tbv, TJ, TbB, TbC, T2E, T2G, Tbp, Tbs;
+	       E T7p, Tfj, Tfk;
+	       {
+		    E Tw, Tx, Ty, Tz, TA, TB;
+		    Tw = rio[WS(ios, 2)];
+		    Tx = iio[-WS(ios, 34)];
+		    Ty = Tw + Tx;
+		    Tz = rio[WS(ios, 18)];
+		    TA = iio[-WS(ios, 50)];
+		    TB = Tz + TA;
+		    TC = Ty + TB;
+		    Tby = Tz - TA;
+		    T2x = Ty - TB;
+		    Tbu = Tw - Tx;
+	       }
+	       {
+		    E T2H, T2I, T2J, T2K, T2L, T2M;
+		    T2H = iio[-WS(ios, 2)];
+		    T2I = rio[WS(ios, 34)];
+		    T2J = T2H - T2I;
+		    T2K = iio[-WS(ios, 18)];
+		    T2L = rio[WS(ios, 50)];
+		    T2M = T2K - T2L;
+		    T2N = T2J - T2M;
+		    Tbz = T2H + T2I;
+		    T7o = T2J + T2M;
+		    Tbv = T2K + T2L;
+	       }
+	       {
+		    E TF, Tbr, T2A, Tbq, TI, Tbn, T2D, Tbo;
+		    {
+			 E TD, TE, T2y, T2z;
+			 TD = rio[WS(ios, 10)];
+			 TE = iio[-WS(ios, 42)];
+			 TF = TD + TE;
+			 Tbr = TD - TE;
+			 T2y = iio[-WS(ios, 10)];
+			 T2z = rio[WS(ios, 42)];
+			 T2A = T2y - T2z;
+			 Tbq = T2y + T2z;
+		    }
+		    {
+			 E TG, TH, T2B, T2C;
+			 TG = iio[-WS(ios, 58)];
+			 TH = rio[WS(ios, 26)];
+			 TI = TG + TH;
+			 Tbn = TG - TH;
+			 T2B = iio[-WS(ios, 26)];
+			 T2C = rio[WS(ios, 58)];
+			 T2D = T2B - T2C;
+			 Tbo = T2C + T2B;
+		    }
+		    TJ = TF + TI;
+		    TbB = Tbr + Tbq;
+		    TbC = Tbn + Tbo;
+		    T2E = T2A - T2D;
+		    T2G = TI - TF;
+		    Tbp = Tbn - Tbo;
+		    Tbs = Tbq - Tbr;
+		    T7p = T2A + T2D;
+	       }
+	       TK = TC + TJ;
+	       T93 = T7p + T7o;
+	       {
+		    E T2F, T2O, Tfm, Tfn;
+		    T2F = T2x + T2E;
+		    T2O = T2G + T2N;
+		    T2P = FMA(KP923879532, T2F, KP382683432 * T2O);
+		    T4F = FNMS(KP382683432, T2F, KP923879532 * T2O);
+		    Tfm = KP707106781 * (TbB + TbC);
+		    Tfn = Tbu + Tbv;
+		    Tfo = Tfm + Tfn;
+		    Thz = Tfn - Tfm;
+	       }
+	       {
+		    E T5R, T5S, Tbt, Tbw;
+		    T5R = T2x - T2E;
+		    T5S = T2N - T2G;
+		    T5T = FNMS(KP382683432, T5S, KP923879532 * T5R);
+		    T6j = FMA(KP382683432, T5R, KP923879532 * T5S);
+		    Tbt = KP707106781 * (Tbp - Tbs);
+		    Tbw = Tbu - Tbv;
+		    Tbx = Tbt + Tbw;
+		    TdI = Tbw - Tbt;
+	       }
+	       Tfj = Tbz - Tby;
+	       Tfk = KP707106781 * (Tbs + Tbp);
+	       Tfl = Tfj + Tfk;
+	       ThA = Tfj - Tfk;
+	       {
+		    E T7n, T7q, TbA, TbD;
+		    T7n = TC - TJ;
+		    T7q = T7o - T7p;
+		    T7r = T7n + T7q;
+		    T81 = T7q - T7n;
+		    TbA = Tby + Tbz;
+		    TbD = KP707106781 * (TbB - TbC);
+		    TbE = TbA + TbD;
+		    TdH = TbA - TbD;
+	       }
+	  }
+	  {
+	       E TR, TbU, T2Q, TbN, T36, TbV, T7t, TbO, TY, TbR, TbS, T2X, T2Z, TbI, TbL;
+	       E T7u, Tfq, Tfr;
+	       {
+		    E TL, TM, TN, TO, TP, TQ;
+		    TL = iio[-WS(ios, 62)];
+		    TM = rio[WS(ios, 30)];
+		    TN = TL + TM;
+		    TO = rio[WS(ios, 14)];
+		    TP = iio[-WS(ios, 46)];
+		    TQ = TO + TP;
+		    TR = TN + TQ;
+		    TbU = TL - TM;
+		    T2Q = TN - TQ;
+		    TbN = TO - TP;
+	       }
+	       {
+		    E T30, T31, T32, T33, T34, T35;
+		    T30 = iio[-WS(ios, 30)];
+		    T31 = rio[WS(ios, 62)];
+		    T32 = T30 - T31;
+		    T33 = iio[-WS(ios, 14)];
+		    T34 = rio[WS(ios, 46)];
+		    T35 = T33 - T34;
+		    T36 = T32 - T35;
+		    TbV = T33 + T34;
+		    T7t = T32 + T35;
+		    TbO = T31 + T30;
+	       }
+	       {
+		    E TU, TbG, T2T, TbH, TX, TbJ, T2W, TbK;
+		    {
+			 E TS, TT, T2R, T2S;
+			 TS = rio[WS(ios, 6)];
+			 TT = iio[-WS(ios, 38)];
+			 TU = TS + TT;
+			 TbG = TS - TT;
+			 T2R = iio[-WS(ios, 6)];
+			 T2S = rio[WS(ios, 38)];
+			 T2T = T2R - T2S;
+			 TbH = T2R + T2S;
+		    }
+		    {
+			 E TV, TW, T2U, T2V;
+			 TV = iio[-WS(ios, 54)];
+			 TW = rio[WS(ios, 22)];
+			 TX = TV + TW;
+			 TbJ = TV - TW;
+			 T2U = iio[-WS(ios, 22)];
+			 T2V = rio[WS(ios, 54)];
+			 T2W = T2U - T2V;
+			 TbK = T2V + T2U;
+		    }
+		    TY = TU + TX;
+		    TbR = TbJ - TbK;
+		    TbS = TbH - TbG;
+		    T2X = T2T - T2W;
+		    T2Z = TX - TU;
+		    TbI = TbG + TbH;
+		    TbL = TbJ + TbK;
+		    T7u = T2T + T2W;
+	       }
+	       TZ = TR + TY;
+	       T94 = T7u + T7t;
+	       {
+		    E T2Y, T37, Tft, Tfu;
+		    T2Y = T2Q + T2X;
+		    T37 = T2Z + T36;
+		    T38 = FNMS(KP382683432, T37, KP923879532 * T2Y);
+		    T4G = FMA(KP382683432, T2Y, KP923879532 * T37);
+		    Tft = KP707106781 * (TbI + TbL);
+		    Tfu = TbU + TbV;
+		    Tfv = Tft + Tfu;
+		    ThC = Tfu - Tft;
+	       }
+	       {
+		    E T5U, T5V, TbM, TbP;
+		    T5U = T2Q - T2X;
+		    T5V = T36 - T2Z;
+		    T5W = FMA(KP923879532, T5U, KP382683432 * T5V);
+		    T6k = FNMS(KP382683432, T5U, KP923879532 * T5V);
+		    TbM = KP707106781 * (TbI - TbL);
+		    TbP = TbN - TbO;
+		    TbQ = TbM + TbP;
+		    TdK = TbP - TbM;
+	       }
+	       Tfq = KP707106781 * (TbS + TbR);
+	       Tfr = TbN + TbO;
+	       Tfs = Tfq - Tfr;
+	       ThD = Tfq + Tfr;
+	       {
+		    E T7s, T7v, TbT, TbW;
+		    T7s = TR - TY;
+		    T7v = T7t - T7u;
+		    T7w = T7s - T7v;
+		    T82 = T7s + T7v;
+		    TbT = KP707106781 * (TbR - TbS);
+		    TbW = TbU - TbV;
+		    TbX = TbT + TbW;
+		    TdL = TbW - TbT;
+	       }
+	  }
+	  {
+	       E Ti, T2g, Tl, T2j, T2d, T2k, Tfc, Tfb, Tc5, Tc2, Tp, T2p, Ts, T2s, T2m;
+	       E T2t, Tff, Tfe, Tcc, Tc9;
+	       {
+		    E Tc0, Tc4, Tc3, Tc1;
+		    {
+			 E Tg, Th, T2e, T2f;
+			 Tg = rio[WS(ios, 4)];
+			 Th = iio[-WS(ios, 36)];
+			 Ti = Tg + Th;
+			 Tc0 = Tg - Th;
+			 T2e = iio[-WS(ios, 4)];
+			 T2f = rio[WS(ios, 36)];
+			 T2g = T2e - T2f;
+			 Tc4 = T2e + T2f;
+		    }
+		    {
+			 E Tj, Tk, T2h, T2i;
+			 Tj = rio[WS(ios, 20)];
+			 Tk = iio[-WS(ios, 52)];
+			 Tl = Tj + Tk;
+			 Tc3 = Tj - Tk;
+			 T2h = iio[-WS(ios, 20)];
+			 T2i = rio[WS(ios, 52)];
+			 T2j = T2h - T2i;
+			 Tc1 = T2h + T2i;
+		    }
+		    T2d = Ti - Tl;
+		    T2k = T2g - T2j;
+		    Tfc = Tc0 + Tc1;
+		    Tfb = Tc4 - Tc3;
+		    Tc5 = Tc3 + Tc4;
+		    Tc2 = Tc0 - Tc1;
+	       }
+	       {
+		    E Tc7, Tcb, Tca, Tc8;
+		    {
+			 E Tn, To, T2n, T2o;
+			 Tn = iio[-WS(ios, 60)];
+			 To = rio[WS(ios, 28)];
+			 Tp = Tn + To;
+			 Tc7 = Tn - To;
+			 T2n = iio[-WS(ios, 28)];
+			 T2o = rio[WS(ios, 60)];
+			 T2p = T2n - T2o;
+			 Tcb = T2o + T2n;
+		    }
+		    {
+			 E Tq, Tr, T2q, T2r;
+			 Tq = rio[WS(ios, 12)];
+			 Tr = iio[-WS(ios, 44)];
+			 Ts = Tq + Tr;
+			 Tca = Tq - Tr;
+			 T2q = iio[-WS(ios, 12)];
+			 T2r = rio[WS(ios, 44)];
+			 T2s = T2q - T2r;
+			 Tc8 = T2q + T2r;
+		    }
+		    T2m = Tp - Ts;
+		    T2t = T2p - T2s;
+		    Tff = Tca + Tcb;
+		    Tfe = Tc7 + Tc8;
+		    Tcc = Tca - Tcb;
+		    Tc9 = Tc7 - Tc8;
+	       }
+	       {
+		    E Tm, Tt, Tfd, Tfg;
+		    Tm = Ti + Tl;
+		    Tt = Tp + Ts;
+		    Tu = Tm + Tt;
+		    T84 = Tt - Tm;
+		    Tfd = FNMS(KP382683432, Tfc, KP923879532 * Tfb);
+		    Tfg = FNMS(KP923879532, Tff, KP382683432 * Tfe);
+		    Tfh = Tfd + Tfg;
+		    ThG = Tfg - Tfd;
+	       }
+	       {
+		    E Tgk, Tgl, T2l, T2u;
+		    Tgk = FMA(KP382683432, Tfb, KP923879532 * Tfc);
+		    Tgl = FMA(KP923879532, Tfe, KP382683432 * Tff);
+		    Tgm = Tgk + Tgl;
+		    ThN = Tgk - Tgl;
+		    T2l = T2d + T2k;
+		    T2u = T2m - T2t;
+		    T2v = KP707106781 * (T2l + T2u);
+		    T6m = KP707106781 * (T2l - T2u);
+	       }
+	       {
+		    E T4I, T4J, Tc6, Tcd;
+		    T4I = T2k - T2d;
+		    T4J = T2m + T2t;
+		    T4K = KP707106781 * (T4I + T4J);
+		    T5P = KP707106781 * (T4J - T4I);
+		    Tc6 = FNMS(KP382683432, Tc5, KP923879532 * Tc2);
+		    Tcd = FMA(KP923879532, Tc9, KP382683432 * Tcc);
+		    Tce = Tc6 + Tcd;
+		    TdF = Tcd - Tc6;
+	       }
+	       {
+		    E TcB, TcC, T7j, T7k;
+		    TcB = FMA(KP923879532, Tc5, KP382683432 * Tc2);
+		    TcC = FNMS(KP382683432, Tc9, KP923879532 * Tcc);
+		    TcD = TcB + TcC;
+		    Tec = TcB - TcC;
+		    T7j = T2g + T2j;
+		    T7k = T2s + T2p;
+		    T7l = T7j - T7k;
+		    T9p = T7j + T7k;
+	       }
+	  }
+	  {
+	       E T1z, T1C, T1D, Tbg, TaQ, T4r, T4u, T7Q, Tbh, TaR, T1G, T3V, T1J, T3Y, T1K;
+	       E T7R, Tbe, Tbd, TaO, TaL, T1S, TfV, TfW, T41, T48, TaW, TaZ, T7L, T1Z, TfY;
+	       E TfZ, T4a, T4h, Tb3, Tb6, T7M;
+	       {
+		    E T1x, T1y, T1A, T1B;
+		    T1x = iio[-WS(ios, 63)];
+		    T1y = rio[WS(ios, 31)];
+		    T1z = T1x + T1y;
+		    T1A = rio[WS(ios, 15)];
+		    T1B = iio[-WS(ios, 47)];
+		    T1C = T1A + T1B;
+		    T1D = T1z + T1C;
+		    Tbg = T1x - T1y;
+		    TaQ = T1A - T1B;
+	       }
+	       {
+		    E T4p, T4q, T4s, T4t;
+		    T4p = iio[-WS(ios, 31)];
+		    T4q = rio[WS(ios, 63)];
+		    T4r = T4p - T4q;
+		    T4s = iio[-WS(ios, 15)];
+		    T4t = rio[WS(ios, 47)];
+		    T4u = T4s - T4t;
+		    T7Q = T4r + T4u;
+		    Tbh = T4s + T4t;
+		    TaR = T4q + T4p;
+	       }
+	       {
+		    E TaJ, TaK, TaM, TaN;
+		    {
+			 E T1E, T1F, T3T, T3U;
+			 T1E = rio[WS(ios, 7)];
+			 T1F = iio[-WS(ios, 39)];
+			 T1G = T1E + T1F;
+			 TaJ = T1E - T1F;
+			 T3T = iio[-WS(ios, 7)];
+			 T3U = rio[WS(ios, 39)];
+			 T3V = T3T - T3U;
+			 TaK = T3T + T3U;
+		    }
+		    {
+			 E T1H, T1I, T3W, T3X;
+			 T1H = iio[-WS(ios, 55)];
+			 T1I = rio[WS(ios, 23)];
+			 T1J = T1H + T1I;
+			 TaM = T1H - T1I;
+			 T3W = iio[-WS(ios, 23)];
+			 T3X = rio[WS(ios, 55)];
+			 T3Y = T3W - T3X;
+			 TaN = T3X + T3W;
+		    }
+		    T1K = T1G + T1J;
+		    T7R = T3V + T3Y;
+		    Tbe = TaK - TaJ;
+		    Tbd = TaM - TaN;
+		    TaO = TaM + TaN;
+		    TaL = TaJ + TaK;
+	       }
+	       {
+		    E T1O, TaX, T44, TaV, T1R, TaU, T47, TaY;
+		    {
+			 E T1M, T1N, T42, T43;
+			 T1M = rio[WS(ios, 3)];
+			 T1N = iio[-WS(ios, 35)];
+			 T1O = T1M + T1N;
+			 TaX = T1M - T1N;
+			 T42 = iio[-WS(ios, 3)];
+			 T43 = rio[WS(ios, 35)];
+			 T44 = T42 - T43;
+			 TaV = T42 + T43;
+		    }
+		    {
+			 E T1P, T1Q, T45, T46;
+			 T1P = rio[WS(ios, 19)];
+			 T1Q = iio[-WS(ios, 51)];
+			 T1R = T1P + T1Q;
+			 TaU = T1P - T1Q;
+			 T45 = iio[-WS(ios, 19)];
+			 T46 = rio[WS(ios, 51)];
+			 T47 = T45 - T46;
+			 TaY = T45 + T46;
+		    }
+		    T1S = T1O + T1R;
+		    TfV = TaV - TaU;
+		    TfW = TaX + TaY;
+		    T41 = T1O - T1R;
+		    T48 = T44 - T47;
+		    TaW = TaU + TaV;
+		    TaZ = TaX - TaY;
+		    T7L = T44 + T47;
+	       }
+	       {
+		    E T1V, Tb4, T4d, Tb2, T1Y, Tb1, T4g, Tb5;
+		    {
+			 E T1T, T1U, T4b, T4c;
+			 T1T = iio[-WS(ios, 59)];
+			 T1U = rio[WS(ios, 27)];
+			 T1V = T1T + T1U;
+			 Tb4 = T1T - T1U;
+			 T4b = iio[-WS(ios, 27)];
+			 T4c = rio[WS(ios, 59)];
+			 T4d = T4b - T4c;
+			 Tb2 = T4c + T4b;
+		    }
+		    {
+			 E T1W, T1X, T4e, T4f;
+			 T1W = rio[WS(ios, 11)];
+			 T1X = iio[-WS(ios, 43)];
+			 T1Y = T1W + T1X;
+			 Tb1 = T1W - T1X;
+			 T4e = iio[-WS(ios, 11)];
+			 T4f = rio[WS(ios, 43)];
+			 T4g = T4e - T4f;
+			 Tb5 = T4e + T4f;
+		    }
+		    T1Z = T1V + T1Y;
+		    TfY = Tb4 + Tb5;
+		    TfZ = Tb1 + Tb2;
+		    T4a = T1V - T1Y;
+		    T4h = T4d - T4g;
+		    Tb3 = Tb1 - Tb2;
+		    Tb6 = Tb4 - Tb5;
+		    T7M = T4g + T4d;
+	       }
+	       T1L = T1D + T1K;
+	       T20 = T1S + T1Z;
+	       T9c = T1L - T20;
+	       T9d = T7R + T7Q;
+	       T9e = T7L + T7M;
+	       T9f = T9d - T9e;
+	       {
+		    E T3S, T3Z, TfX, Tg0;
+		    T3S = T1z - T1C;
+		    T3Z = T3V - T3Y;
+		    T40 = T3S + T3Z;
+		    T66 = T3S - T3Z;
+		    TfX = FNMS(KP382683432, TfW, KP923879532 * TfV);
+		    Tg0 = FNMS(KP923879532, TfZ, KP382683432 * TfY);
+		    Tg1 = TfX + Tg0;
+		    Thu = Tg0 - TfX;
+	       }
+	       {
+		    E Tg6, Tg7, Tg3, Tg4;
+		    Tg6 = KP707106781 * (TaL + TaO);
+		    Tg7 = Tbg + Tbh;
+		    Tg8 = Tg6 + Tg7;
+		    Thv = Tg7 - Tg6;
+		    Tg3 = FMA(KP382683432, TfV, KP923879532 * TfW);
+		    Tg4 = FMA(KP923879532, TfY, KP382683432 * TfZ);
+		    Tg5 = Tg3 + Tg4;
+		    Thr = Tg3 - Tg4;
+	       }
+	       {
+		    E T4l, T4m, T49, T4i;
+		    T4l = T48 - T41;
+		    T4m = T4a + T4h;
+		    T4n = KP707106781 * (T4l + T4m);
+		    T67 = KP707106781 * (T4m - T4l);
+		    T49 = T41 + T48;
+		    T4i = T4a - T4h;
+		    T4j = KP707106781 * (T49 + T4i);
+		    T69 = KP707106781 * (T49 - T4i);
+	       }
+	       {
+		    E T4o, T4v, TaP, TaS;
+		    T4o = T1J - T1G;
+		    T4v = T4r - T4u;
+		    T4w = T4o + T4v;
+		    T6a = T4v - T4o;
+		    TaP = KP707106781 * (TaL - TaO);
+		    TaS = TaQ - TaR;
+		    TaT = TaP + TaS;
+		    TdW = TaS - TaP;
+	       }
+	       {
+		    E Tb0, Tb7, TfS, TfT;
+		    Tb0 = FMA(KP923879532, TaW, KP382683432 * TaZ);
+		    Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb3);
+		    Tb8 = Tb0 + Tb7;
+		    TdZ = Tb0 - Tb7;
+		    TfS = KP707106781 * (Tbe + Tbd);
+		    TfT = TaQ + TaR;
+		    TfU = TfS - TfT;
+		    Ths = TfS + TfT;
+	       }
+	       {
+		    E T7K, T7N, T7P, T7S;
+		    T7K = T1D - T1K;
+		    T7N = T7L - T7M;
+		    T7O = T7K + T7N;
+		    T8y = T7K - T7N;
+		    T7P = T1Z - T1S;
+		    T7S = T7Q - T7R;
+		    T7T = T7P + T7S;
+		    T8z = T7S - T7P;
+	       }
+	       {
+		    E Tba, Tbb, Tbf, Tbi;
+		    Tba = FNMS(KP382683432, TaW, KP923879532 * TaZ);
+		    Tbb = FMA(KP923879532, Tb6, KP382683432 * Tb3);
+		    Tbc = Tba + Tbb;
+		    TdX = Tbb - Tba;
+		    Tbf = KP707106781 * (Tbd - Tbe);
+		    Tbi = Tbg - Tbh;
+		    Tbj = Tbf + Tbi;
+		    Te0 = Tbi - Tbf;
+	       }
+	  }
+	  {
+	       E T14, T17, T18, Tax, Tas, T3K, T3N, T7F, Tay, Tat, T1b, T3e, T1e, T3h, T1f;
+	       E T7G, TaB, TaA, Taq, Tan, T1n, TfC, TfD, T3k, T3r, Ta8, Tab, T7A, T1u, TfF;
+	       E TfG, T3t, T3A, Taf, Tai, T7B;
+	       {
+		    E T12, T13, T15, T16;
+		    T12 = rio[WS(ios, 1)];
+		    T13 = iio[-WS(ios, 33)];
+		    T14 = T12 + T13;
+		    T15 = rio[WS(ios, 17)];
+		    T16 = iio[-WS(ios, 49)];
+		    T17 = T15 + T16;
+		    T18 = T14 + T17;
+		    Tax = T15 - T16;
+		    Tas = T12 - T13;
+	       }
+	       {
+		    E T3I, T3J, T3L, T3M;
+		    T3I = iio[-WS(ios, 1)];
+		    T3J = rio[WS(ios, 33)];
+		    T3K = T3I - T3J;
+		    T3L = iio[-WS(ios, 17)];
+		    T3M = rio[WS(ios, 49)];
+		    T3N = T3L - T3M;
+		    T7F = T3K + T3N;
+		    Tay = T3I + T3J;
+		    Tat = T3L + T3M;
+	       }
+	       {
+		    E Tap, Tao, Tal, Tam;
+		    {
+			 E T19, T1a, T3c, T3d;
+			 T19 = rio[WS(ios, 9)];
+			 T1a = iio[-WS(ios, 41)];
+			 T1b = T19 + T1a;
+			 Tap = T19 - T1a;
+			 T3c = iio[-WS(ios, 9)];
+			 T3d = rio[WS(ios, 41)];
+			 T3e = T3c - T3d;
+			 Tao = T3c + T3d;
+		    }
+		    {
+			 E T1c, T1d, T3f, T3g;
+			 T1c = iio[-WS(ios, 57)];
+			 T1d = rio[WS(ios, 25)];
+			 T1e = T1c + T1d;
+			 Tal = T1c - T1d;
+			 T3f = iio[-WS(ios, 25)];
+			 T3g = rio[WS(ios, 57)];
+			 T3h = T3f - T3g;
+			 Tam = T3g + T3f;
+		    }
+		    T1f = T1b + T1e;
+		    T7G = T3e + T3h;
+		    TaB = Tal + Tam;
+		    TaA = Tap + Tao;
+		    Taq = Tao - Tap;
+		    Tan = Tal - Tam;
+	       }
+	       {
+		    E T1j, Ta6, T3n, Taa, T1m, Ta9, T3q, Ta7;
+		    {
+			 E T1h, T1i, T3l, T3m;
+			 T1h = rio[WS(ios, 5)];
+			 T1i = iio[-WS(ios, 37)];
+			 T1j = T1h + T1i;
+			 Ta6 = T1h - T1i;
+			 T3l = iio[-WS(ios, 5)];
+			 T3m = rio[WS(ios, 37)];
+			 T3n = T3l - T3m;
+			 Taa = T3l + T3m;
+		    }
+		    {
+			 E T1k, T1l, T3o, T3p;
+			 T1k = rio[WS(ios, 21)];
+			 T1l = iio[-WS(ios, 53)];
+			 T1m = T1k + T1l;
+			 Ta9 = T1k - T1l;
+			 T3o = iio[-WS(ios, 21)];
+			 T3p = rio[WS(ios, 53)];
+			 T3q = T3o - T3p;
+			 Ta7 = T3o + T3p;
+		    }
+		    T1n = T1j + T1m;
+		    TfC = Taa - Ta9;
+		    TfD = Ta6 + Ta7;
+		    T3k = T1j - T1m;
+		    T3r = T3n - T3q;
+		    Ta8 = Ta6 - Ta7;
+		    Tab = Ta9 + Taa;
+		    T7A = T3n + T3q;
+	       }
+	       {
+		    E T1q, Tad, T3w, Tah, T1t, Tag, T3z, Tae;
+		    {
+			 E T1o, T1p, T3u, T3v;
+			 T1o = iio[-WS(ios, 61)];
+			 T1p = rio[WS(ios, 29)];
+			 T1q = T1o + T1p;
+			 Tad = T1o - T1p;
+			 T3u = iio[-WS(ios, 29)];
+			 T3v = rio[WS(ios, 61)];
+			 T3w = T3u - T3v;
+			 Tah = T3v + T3u;
+		    }
+		    {
+			 E T1r, T1s, T3x, T3y;
+			 T1r = rio[WS(ios, 13)];
+			 T1s = iio[-WS(ios, 45)];
+			 T1t = T1r + T1s;
+			 Tag = T1r - T1s;
+			 T3x = iio[-WS(ios, 13)];
+			 T3y = rio[WS(ios, 45)];
+			 T3z = T3x - T3y;
+			 Tae = T3x + T3y;
+		    }
+		    T1u = T1q + T1t;
+		    TfF = Tad + Tae;
+		    TfG = Tag + Tah;
+		    T3t = T1q - T1t;
+		    T3A = T3w - T3z;
+		    Taf = Tad - Tae;
+		    Tai = Tag - Tah;
+		    T7B = T3z + T3w;
+	       }
+	       T1g = T18 + T1f;
+	       T1v = T1n + T1u;
+	       T97 = T1g - T1v;
+	       T98 = T7G + T7F;
+	       T99 = T7A + T7B;
+	       T9a = T98 - T99;
+	       {
+		    E T3b, T3i, TfE, TfH;
+		    T3b = T14 - T17;
+		    T3i = T3e - T3h;
+		    T3j = T3b + T3i;
+		    T5Z = T3b - T3i;
+		    TfE = FNMS(KP382683432, TfD, KP923879532 * TfC);
+		    TfH = FNMS(KP923879532, TfG, KP382683432 * TfF);
+		    TfI = TfE + TfH;
+		    Thk = TfH - TfE;
+	       }
+	       {
+		    E TfN, TfO, TfK, TfL;
+		    TfN = KP707106781 * (TaA + TaB);
+		    TfO = Tas + Tat;
+		    TfP = TfN + TfO;
+		    Thl = TfO - TfN;
+		    TfK = FMA(KP382683432, TfC, KP923879532 * TfD);
+		    TfL = FMA(KP923879532, TfF, KP382683432 * TfG);
+		    TfM = TfK + TfL;
+		    Tho = TfK - TfL;
+	       }
+	       {
+		    E T3E, T3F, T3s, T3B;
+		    T3E = T3r - T3k;
+		    T3F = T3t + T3A;
+		    T3G = KP707106781 * (T3E + T3F);
+		    T60 = KP707106781 * (T3F - T3E);
+		    T3s = T3k + T3r;
+		    T3B = T3t - T3A;
+		    T3C = KP707106781 * (T3s + T3B);
+		    T62 = KP707106781 * (T3s - T3B);
+	       }
+	       {
+		    E T3H, T3O, Tac, Taj;
+		    T3H = T1e - T1b;
+		    T3O = T3K - T3N;
+		    T3P = T3H + T3O;
+		    T63 = T3O - T3H;
+		    Tac = FNMS(KP382683432, Tab, KP923879532 * Ta8);
+		    Taj = FMA(KP923879532, Taf, KP382683432 * Tai);
+		    Tak = Tac + Taj;
+		    TdQ = Taj - Tac;
+	       }
+	       {
+		    E Tar, Tau, Tfz, TfA;
+		    Tar = KP707106781 * (Tan - Taq);
+		    Tau = Tas - Tat;
+		    Tav = Tar + Tau;
+		    TdT = Tau - Tar;
+		    Tfz = Tay - Tax;
+		    TfA = KP707106781 * (Taq + Tan);
+		    TfB = Tfz + TfA;
+		    Thn = Tfz - TfA;
+	       }
+	       {
+		    E T7z, T7C, T7E, T7H;
+		    T7z = T18 - T1f;
+		    T7C = T7A - T7B;
+		    T7D = T7z + T7C;
+		    T8v = T7z - T7C;
+		    T7E = T1u - T1n;
+		    T7H = T7F - T7G;
+		    T7I = T7E + T7H;
+		    T8w = T7H - T7E;
+	       }
+	       {
+		    E Taz, TaC, TaE, TaF;
+		    Taz = Tax + Tay;
+		    TaC = KP707106781 * (TaA - TaB);
+		    TaD = Taz + TaC;
+		    TdP = Taz - TaC;
+		    TaE = FMA(KP923879532, Tab, KP382683432 * Ta8);
+		    TaF = FNMS(KP382683432, Taf, KP923879532 * Tai);
+		    TaG = TaE + TaF;
+		    TdS = TaE - TaF;
+	       }
+	  }
+	  {
+	       E T11, T9K, T9T, Ta2, T22, T9Q, T9N, Ta3;
+	       {
+		    E Tv, T10, T9R, T9S;
+		    Tv = Tf + Tu;
+		    T10 = TK + TZ;
+		    T11 = Tv + T10;
+		    T9K = Tv - T10;
+		    T9R = T9p + T9o;
+		    T9S = T93 + T94;
+		    T9T = T9R - T9S;
+		    Ta2 = T9S + T9R;
+	       }
+	       {
+		    E T1w, T21, T9L, T9M;
+		    T1w = T1g + T1v;
+		    T21 = T1L + T20;
+		    T22 = T1w + T21;
+		    T9Q = T21 - T1w;
+		    T9L = T99 + T98;
+		    T9M = T9e + T9d;
+		    T9N = T9L - T9M;
+		    Ta3 = T9L + T9M;
+	       }
+	       rio[0] = T11 + T22;
+	       iio[-WS(ios, 63)] = Ta3 + Ta2;
+	       {
+		    E T9O, T9U, T9J, T9P;
+		    T9O = T9K + T9N;
+		    T9U = T9Q + T9T;
+		    T9J = W[94];
+		    T9P = W[95];
+		    rio[WS(ios, 48)] = FNMS(T9P, T9U, T9J * T9O);
+		    iio[-WS(ios, 15)] = FMA(T9P, T9O, T9J * T9U);
+	       }
+	       {
+		    E T9W, T9Y, T9V, T9X;
+		    T9W = T9K - T9N;
+		    T9Y = T9T - T9Q;
+		    T9V = W[30];
+		    T9X = W[31];
+		    rio[WS(ios, 16)] = FNMS(T9X, T9Y, T9V * T9W);
+		    iio[-WS(ios, 47)] = FMA(T9X, T9W, T9V * T9Y);
+	       }
+	       {
+		    E Ta0, Ta4, T9Z, Ta1;
+		    Ta0 = T11 - T22;
+		    Ta4 = Ta2 - Ta3;
+		    T9Z = W[62];
+		    Ta1 = W[63];
+		    rio[WS(ios, 32)] = FNMS(Ta1, Ta4, T9Z * Ta0);
+		    iio[-WS(ios, 31)] = FMA(Ta1, Ta0, T9Z * Ta4);
+	       }
+	  }
+	  {
+	       E T96, T9y, T9r, T9D, T9h, T9C, T9m, T9z;
+	       {
+		    E T92, T95, T9n, T9q;
+		    T92 = Tf - Tu;
+		    T95 = T93 - T94;
+		    T96 = T92 + T95;
+		    T9y = T92 - T95;
+		    T9n = TZ - TK;
+		    T9q = T9o - T9p;
+		    T9r = T9n + T9q;
+		    T9D = T9q - T9n;
+	       }
+	       {
+		    E T9b, T9g, T9k, T9l;
+		    T9b = T97 + T9a;
+		    T9g = T9c - T9f;
+		    T9h = KP707106781 * (T9b + T9g);
+		    T9C = KP707106781 * (T9b - T9g);
+		    T9k = T9a - T97;
+		    T9l = T9c + T9f;
+		    T9m = KP707106781 * (T9k + T9l);
+		    T9z = KP707106781 * (T9l - T9k);
+	       }
+	       {
+		    E T9i, T9s, T91, T9j;
+		    T9i = T96 + T9h;
+		    T9s = T9m + T9r;
+		    T91 = W[110];
+		    T9j = W[111];
+		    rio[WS(ios, 56)] = FNMS(T9j, T9s, T91 * T9i);
+		    iio[-WS(ios, 7)] = FMA(T9j, T9i, T91 * T9s);
+	       }
+	       {
+		    E T9G, T9I, T9F, T9H;
+		    T9G = T9y - T9z;
+		    T9I = T9D - T9C;
+		    T9F = W[78];
+		    T9H = W[79];
+		    rio[WS(ios, 40)] = FNMS(T9H, T9I, T9F * T9G);
+		    iio[-WS(ios, 23)] = FMA(T9H, T9G, T9F * T9I);
+	       }
+	       {
+		    E T9u, T9w, T9t, T9v;
+		    T9u = T96 - T9h;
+		    T9w = T9r - T9m;
+		    T9t = W[46];
+		    T9v = W[47];
+		    rio[WS(ios, 24)] = FNMS(T9v, T9w, T9t * T9u);
+		    iio[-WS(ios, 39)] = FMA(T9v, T9u, T9t * T9w);
+	       }
+	       {
+		    E T9A, T9E, T9x, T9B;
+		    T9A = T9y + T9z;
+		    T9E = T9C + T9D;
+		    T9x = W[14];
+		    T9B = W[15];
+		    rio[WS(ios, 8)] = FNMS(T9B, T9E, T9x * T9A);
+		    iio[-WS(ios, 55)] = FMA(T9B, T9A, T9x * T9E);
+	       }
+	  }
+	  {
+	       E T8u, T8Q, T8J, T8V, T8B, T8U, T8G, T8R;
+	       {
+		    E T8s, T8t, T8H, T8I;
+		    T8s = T7i - T7l;
+		    T8t = KP707106781 * (T82 - T81);
+		    T8u = T8s + T8t;
+		    T8Q = T8s - T8t;
+		    T8H = KP707106781 * (T7r - T7w);
+		    T8I = T87 - T84;
+		    T8J = T8H + T8I;
+		    T8V = T8I - T8H;
+	       }
+	       {
+		    E T8x, T8A, T8E, T8F;
+		    T8x = FNMS(KP382683432, T8w, KP923879532 * T8v);
+		    T8A = FMA(KP923879532, T8y, KP382683432 * T8z);
+		    T8B = T8x + T8A;
+		    T8U = T8A - T8x;
+		    T8E = FMA(KP382683432, T8v, KP923879532 * T8w);
+		    T8F = FNMS(KP382683432, T8y, KP923879532 * T8z);
+		    T8G = T8E + T8F;
+		    T8R = T8E - T8F;
+	       }
+	       {
+		    E T8C, T8K, T8r, T8D;
+		    T8C = T8u + T8B;
+		    T8K = T8G + T8J;
+		    T8r = W[6];
+		    T8D = W[7];
+		    rio[WS(ios, 4)] = FNMS(T8D, T8K, T8r * T8C);
+		    iio[-WS(ios, 59)] = FMA(T8D, T8C, T8r * T8K);
+	       }
+	       {
+		    E T8Y, T90, T8X, T8Z;
+		    T8Y = T8Q - T8R;
+		    T90 = T8V - T8U;
+		    T8X = W[38];
+		    T8Z = W[39];
+		    rio[WS(ios, 20)] = FNMS(T8Z, T90, T8X * T8Y);
+		    iio[-WS(ios, 43)] = FMA(T8Z, T8Y, T8X * T90);
+	       }
+	       {
+		    E T8M, T8O, T8L, T8N;
+		    T8M = T8u - T8B;
+		    T8O = T8J - T8G;
+		    T8L = W[70];
+		    T8N = W[71];
+		    rio[WS(ios, 36)] = FNMS(T8N, T8O, T8L * T8M);
+		    iio[-WS(ios, 27)] = FMA(T8N, T8M, T8L * T8O);
+	       }
+	       {
+		    E T8S, T8W, T8P, T8T;
+		    T8S = T8Q + T8R;
+		    T8W = T8U + T8V;
+		    T8P = W[102];
+		    T8T = W[103];
+		    rio[WS(ios, 52)] = FNMS(T8T, T8W, T8P * T8S);
+		    iio[-WS(ios, 11)] = FMA(T8T, T8S, T8P * T8W);
+	       }
+	  }
+	  {
+	       E T7y, T8g, T89, T8l, T7V, T8k, T80, T8h;
+	       {
+		    E T7m, T7x, T83, T88;
+		    T7m = T7i + T7l;
+		    T7x = KP707106781 * (T7r + T7w);
+		    T7y = T7m + T7x;
+		    T8g = T7m - T7x;
+		    T83 = KP707106781 * (T81 + T82);
+		    T88 = T84 + T87;
+		    T89 = T83 + T88;
+		    T8l = T88 - T83;
+	       }
+	       {
+		    E T7J, T7U, T7Y, T7Z;
+		    T7J = FMA(KP923879532, T7D, KP382683432 * T7I);
+		    T7U = FNMS(KP382683432, T7T, KP923879532 * T7O);
+		    T7V = T7J + T7U;
+		    T8k = T7J - T7U;
+		    T7Y = FNMS(KP382683432, T7D, KP923879532 * T7I);
+		    T7Z = FMA(KP382683432, T7O, KP923879532 * T7T);
+		    T80 = T7Y + T7Z;
+		    T8h = T7Z - T7Y;
+	       }
+	       {
+		    E T7W, T8a, T7h, T7X;
+		    T7W = T7y + T7V;
+		    T8a = T80 + T89;
+		    T7h = W[118];
+		    T7X = W[119];
+		    rio[WS(ios, 60)] = FNMS(T7X, T8a, T7h * T7W);
+		    iio[-WS(ios, 3)] = FMA(T7X, T7W, T7h * T8a);
+	       }
+	       {
+		    E T8o, T8q, T8n, T8p;
+		    T8o = T8g - T8h;
+		    T8q = T8l - T8k;
+		    T8n = W[86];
+		    T8p = W[87];
+		    rio[WS(ios, 44)] = FNMS(T8p, T8q, T8n * T8o);
+		    iio[-WS(ios, 19)] = FMA(T8p, T8o, T8n * T8q);
+	       }
+	       {
+		    E T8c, T8e, T8b, T8d;
+		    T8c = T7y - T7V;
+		    T8e = T89 - T80;
+		    T8b = W[54];
+		    T8d = W[55];
+		    rio[WS(ios, 28)] = FNMS(T8d, T8e, T8b * T8c);
+		    iio[-WS(ios, 35)] = FMA(T8d, T8c, T8b * T8e);
+	       }
+	       {
+		    E T8i, T8m, T8f, T8j;
+		    T8i = T8g + T8h;
+		    T8m = T8k + T8l;
+		    T8f = W[22];
+		    T8j = W[23];
+		    rio[WS(ios, 12)] = FNMS(T8j, T8m, T8f * T8i);
+		    iio[-WS(ios, 51)] = FMA(T8j, T8i, T8f * T8m);
+	       }
+	  }
+	  {
+	       E T6K, T76, T6Z, T7b, T6R, T7a, T6W, T77;
+	       {
+		    E T6I, T6J, T6X, T6Y;
+		    T6I = T5O - T5P;
+		    T6J = T6j - T6k;
+		    T6K = T6I + T6J;
+		    T76 = T6I - T6J;
+		    T6X = T5W - T5T;
+		    T6Y = T6n - T6m;
+		    T6Z = T6X + T6Y;
+		    T7b = T6Y - T6X;
+		    {
+			 E T6N, T6U, T6Q, T6V;
+			 {
+			      E T6L, T6M, T6O, T6P;
+			      T6L = T5Z - T60;
+			      T6M = T63 - T62;
+			      T6N = FMA(KP831469612, T6L, KP555570233 * T6M);
+			      T6U = FNMS(KP555570233, T6L, KP831469612 * T6M);
+			      T6O = T66 - T67;
+			      T6P = T6a - T69;
+			      T6Q = FNMS(KP555570233, T6P, KP831469612 * T6O);
+			      T6V = FMA(KP555570233, T6O, KP831469612 * T6P);
+			 }
+			 T6R = T6N + T6Q;
+			 T7a = T6N - T6Q;
+			 T6W = T6U + T6V;
+			 T77 = T6V - T6U;
+		    }
+	       }
+	       {
+		    E T6S, T70, T6H, T6T;
+		    T6S = T6K + T6R;
+		    T70 = T6W + T6Z;
+		    T6H = W[114];
+		    T6T = W[115];
+		    rio[WS(ios, 58)] = FNMS(T6T, T70, T6H * T6S);
+		    iio[-WS(ios, 5)] = FMA(T6T, T6S, T6H * T70);
+	       }
+	       {
+		    E T7e, T7g, T7d, T7f;
+		    T7e = T76 - T77;
+		    T7g = T7b - T7a;
+		    T7d = W[82];
+		    T7f = W[83];
+		    rio[WS(ios, 42)] = FNMS(T7f, T7g, T7d * T7e);
+		    iio[-WS(ios, 21)] = FMA(T7f, T7e, T7d * T7g);
+	       }
+	       {
+		    E T72, T74, T71, T73;
+		    T72 = T6K - T6R;
+		    T74 = T6Z - T6W;
+		    T71 = W[50];
+		    T73 = W[51];
+		    rio[WS(ios, 26)] = FNMS(T73, T74, T71 * T72);
+		    iio[-WS(ios, 37)] = FMA(T73, T72, T71 * T74);
+	       }
+	       {
+		    E T78, T7c, T75, T79;
+		    T78 = T76 + T77;
+		    T7c = T7a + T7b;
+		    T75 = W[18];
+		    T79 = W[19];
+		    rio[WS(ios, 10)] = FNMS(T79, T7c, T75 * T78);
+		    iio[-WS(ios, 53)] = FMA(T79, T78, T75 * T7c);
+	       }
+	  }
+	  {
+	       E T3a, T52, T4V, T57, T4z, T56, T4E, T53;
+	       {
+		    E T2w, T39, T4H, T4U;
+		    T2w = T2c + T2v;
+		    T39 = T2P + T38;
+		    T3a = T2w + T39;
+		    T52 = T2w - T39;
+		    T4H = T4F + T4G;
+		    T4U = T4K + T4T;
+		    T4V = T4H + T4U;
+		    T57 = T4U - T4H;
+		    {
+			 E T3R, T4C, T4y, T4D;
+			 {
+			      E T3D, T3Q, T4k, T4x;
+			      T3D = T3j + T3C;
+			      T3Q = T3G + T3P;
+			      T3R = FMA(KP980785280, T3D, KP195090322 * T3Q);
+			      T4C = FNMS(KP195090322, T3D, KP980785280 * T3Q);
+			      T4k = T40 + T4j;
+			      T4x = T4n + T4w;
+			      T4y = FNMS(KP195090322, T4x, KP980785280 * T4k);
+			      T4D = FMA(KP195090322, T4k, KP980785280 * T4x);
+			 }
+			 T4z = T3R + T4y;
+			 T56 = T3R - T4y;
+			 T4E = T4C + T4D;
+			 T53 = T4D - T4C;
+		    }
+	       }
+	       {
+		    E T4A, T4W, T23, T4B;
+		    T4A = T3a + T4z;
+		    T4W = T4E + T4V;
+		    T23 = W[122];
+		    T4B = W[123];
+		    rio[WS(ios, 62)] = FNMS(T4B, T4W, T23 * T4A);
+		    iio[-WS(ios, 1)] = FMA(T4B, T4A, T23 * T4W);
+	       }
+	       {
+		    E T5a, T5c, T59, T5b;
+		    T5a = T52 - T53;
+		    T5c = T57 - T56;
+		    T59 = W[90];
+		    T5b = W[91];
+		    rio[WS(ios, 46)] = FNMS(T5b, T5c, T59 * T5a);
+		    iio[-WS(ios, 17)] = FMA(T5b, T5a, T59 * T5c);
+	       }
+	       {
+		    E T4Y, T50, T4X, T4Z;
+		    T4Y = T3a - T4z;
+		    T50 = T4V - T4E;
+		    T4X = W[58];
+		    T4Z = W[59];
+		    rio[WS(ios, 30)] = FNMS(T4Z, T50, T4X * T4Y);
+		    iio[-WS(ios, 33)] = FMA(T4Z, T4Y, T4X * T50);
+	       }
+	       {
+		    E T54, T58, T51, T55;
+		    T54 = T52 + T53;
+		    T58 = T56 + T57;
+		    T51 = W[26];
+		    T55 = W[27];
+		    rio[WS(ios, 14)] = FNMS(T55, T58, T51 * T54);
+		    iio[-WS(ios, 49)] = FMA(T55, T54, T51 * T58);
+	       }
+	  }
+	  {
+	       E T5g, T5C, T5v, T5H, T5n, T5G, T5s, T5D;
+	       {
+		    E T5e, T5f, T5t, T5u;
+		    T5e = T2c - T2v;
+		    T5f = T4G - T4F;
+		    T5g = T5e + T5f;
+		    T5C = T5e - T5f;
+		    T5t = T2P - T38;
+		    T5u = T4T - T4K;
+		    T5v = T5t + T5u;
+		    T5H = T5u - T5t;
+		    {
+			 E T5j, T5q, T5m, T5r;
+			 {
+			      E T5h, T5i, T5k, T5l;
+			      T5h = T3j - T3C;
+			      T5i = T3P - T3G;
+			      T5j = FNMS(KP555570233, T5i, KP831469612 * T5h);
+			      T5q = FMA(KP555570233, T5h, KP831469612 * T5i);
+			      T5k = T40 - T4j;
+			      T5l = T4w - T4n;
+			      T5m = FMA(KP831469612, T5k, KP555570233 * T5l);
+			      T5r = FNMS(KP555570233, T5k, KP831469612 * T5l);
+			 }
+			 T5n = T5j + T5m;
+			 T5G = T5m - T5j;
+			 T5s = T5q + T5r;
+			 T5D = T5q - T5r;
+		    }
+	       }
+	       {
+		    E T5o, T5w, T5d, T5p;
+		    T5o = T5g + T5n;
+		    T5w = T5s + T5v;
+		    T5d = W[10];
+		    T5p = W[11];
+		    rio[WS(ios, 6)] = FNMS(T5p, T5w, T5d * T5o);
+		    iio[-WS(ios, 57)] = FMA(T5p, T5o, T5d * T5w);
+	       }
+	       {
+		    E T5K, T5M, T5J, T5L;
+		    T5K = T5C - T5D;
+		    T5M = T5H - T5G;
+		    T5J = W[42];
+		    T5L = W[43];
+		    rio[WS(ios, 22)] = FNMS(T5L, T5M, T5J * T5K);
+		    iio[-WS(ios, 41)] = FMA(T5L, T5K, T5J * T5M);
+	       }
+	       {
+		    E T5y, T5A, T5x, T5z;
+		    T5y = T5g - T5n;
+		    T5A = T5v - T5s;
+		    T5x = W[74];
+		    T5z = W[75];
+		    rio[WS(ios, 38)] = FNMS(T5z, T5A, T5x * T5y);
+		    iio[-WS(ios, 25)] = FMA(T5z, T5y, T5x * T5A);
+	       }
+	       {
+		    E T5E, T5I, T5B, T5F;
+		    T5E = T5C + T5D;
+		    T5I = T5G + T5H;
+		    T5B = W[106];
+		    T5F = W[107];
+		    rio[WS(ios, 54)] = FNMS(T5F, T5I, T5B * T5E);
+		    iio[-WS(ios, 9)] = FMA(T5F, T5E, T5B * T5I);
+	       }
+	  }
+	  {
+	       E T5Y, T6w, T6p, T6B, T6d, T6A, T6i, T6x;
+	       {
+		    E T5Q, T5X, T6l, T6o;
+		    T5Q = T5O + T5P;
+		    T5X = T5T + T5W;
+		    T5Y = T5Q + T5X;
+		    T6w = T5Q - T5X;
+		    T6l = T6j + T6k;
+		    T6o = T6m + T6n;
+		    T6p = T6l + T6o;
+		    T6B = T6o - T6l;
+		    {
+			 E T65, T6g, T6c, T6h;
+			 {
+			      E T61, T64, T68, T6b;
+			      T61 = T5Z + T60;
+			      T64 = T62 + T63;
+			      T65 = FNMS(KP195090322, T64, KP980785280 * T61);
+			      T6g = FMA(KP195090322, T61, KP980785280 * T64);
+			      T68 = T66 + T67;
+			      T6b = T69 + T6a;
+			      T6c = FMA(KP980785280, T68, KP195090322 * T6b);
+			      T6h = FNMS(KP195090322, T68, KP980785280 * T6b);
+			 }
+			 T6d = T65 + T6c;
+			 T6A = T6c - T65;
+			 T6i = T6g + T6h;
+			 T6x = T6g - T6h;
+		    }
+	       }
+	       {
+		    E T6e, T6q, T5N, T6f;
+		    T6e = T5Y + T6d;
+		    T6q = T6i + T6p;
+		    T5N = W[2];
+		    T6f = W[3];
+		    rio[WS(ios, 2)] = FNMS(T6f, T6q, T5N * T6e);
+		    iio[-WS(ios, 61)] = FMA(T6f, T6e, T5N * T6q);
+	       }
+	       {
+		    E T6E, T6G, T6D, T6F;
+		    T6E = T6w - T6x;
+		    T6G = T6B - T6A;
+		    T6D = W[34];
+		    T6F = W[35];
+		    rio[WS(ios, 18)] = FNMS(T6F, T6G, T6D * T6E);
+		    iio[-WS(ios, 45)] = FMA(T6F, T6E, T6D * T6G);
+	       }
+	       {
+		    E T6s, T6u, T6r, T6t;
+		    T6s = T5Y - T6d;
+		    T6u = T6p - T6i;
+		    T6r = W[66];
+		    T6t = W[67];
+		    rio[WS(ios, 34)] = FNMS(T6t, T6u, T6r * T6s);
+		    iio[-WS(ios, 29)] = FMA(T6t, T6s, T6r * T6u);
+	       }
+	       {
+		    E T6y, T6C, T6v, T6z;
+		    T6y = T6w + T6x;
+		    T6C = T6A + T6B;
+		    T6v = W[98];
+		    T6z = W[99];
+		    rio[WS(ios, 50)] = FNMS(T6z, T6C, T6v * T6y);
+		    iio[-WS(ios, 13)] = FMA(T6z, T6y, T6v * T6C);
+	       }
+	  }
+	  {
+	       E TdO, Tf1, Teq, TeH, Tef, TeW, Ten, TeM, Te3, Ter, Te8, Tem, TeE, Tf0, TeP;
+	       E TeX;
+	       {
+		    E TdG, TeG, TdN, TeF, TdJ, TdM;
+		    TdG = TdE + TdF;
+		    TeG = Ted - Tec;
+		    TdJ = FNMS(KP555570233, TdI, KP831469612 * TdH);
+		    TdM = FMA(KP831469612, TdK, KP555570233 * TdL);
+		    TdN = TdJ + TdM;
+		    TeF = TdM - TdJ;
+		    TdO = TdG + TdN;
+		    Tf1 = TeG - TeF;
+		    Teq = TdG - TdN;
+		    TeH = TeF + TeG;
+	       }
+	       {
+		    E Tee, TeK, Teb, TeL, Te9, Tea;
+		    Tee = Tec + Ted;
+		    TeK = TdE - TdF;
+		    Te9 = FMA(KP555570233, TdH, KP831469612 * TdI);
+		    Tea = FNMS(KP555570233, TdK, KP831469612 * TdL);
+		    Teb = Te9 + Tea;
+		    TeL = Te9 - Tea;
+		    Tef = Teb + Tee;
+		    TeW = TeK - TeL;
+		    Ten = Tee - Teb;
+		    TeM = TeK + TeL;
+	       }
+	       {
+		    E TdV, Te6, Te2, Te7;
+		    {
+			 E TdR, TdU, TdY, Te1;
+			 TdR = TdP + TdQ;
+			 TdU = TdS + TdT;
+			 TdV = FNMS(KP290284677, TdU, KP956940335 * TdR);
+			 Te6 = FMA(KP290284677, TdR, KP956940335 * TdU);
+			 TdY = TdW + TdX;
+			 Te1 = TdZ + Te0;
+			 Te2 = FMA(KP956940335, TdY, KP290284677 * Te1);
+			 Te7 = FNMS(KP290284677, TdY, KP956940335 * Te1);
+		    }
+		    Te3 = TdV + Te2;
+		    Ter = Te6 - Te7;
+		    Te8 = Te6 + Te7;
+		    Tem = Te2 - TdV;
+	       }
+	       {
+		    E TeA, TeN, TeD, TeO;
+		    {
+			 E Tey, Tez, TeB, TeC;
+			 Tey = TdT - TdS;
+			 Tez = TdP - TdQ;
+			 TeA = FNMS(KP471396736, Tez, KP881921264 * Tey);
+			 TeN = FMA(KP881921264, Tez, KP471396736 * Tey);
+			 TeB = TdW - TdX;
+			 TeC = Te0 - TdZ;
+			 TeD = FMA(KP471396736, TeB, KP881921264 * TeC);
+			 TeO = FNMS(KP471396736, TeC, KP881921264 * TeB);
+		    }
+		    TeE = TeA + TeD;
+		    Tf0 = TeN - TeO;
+		    TeP = TeN + TeO;
+		    TeX = TeD - TeA;
+	       }
+	       {
+		    E Te4, Teg, TdD, Te5;
+		    Te4 = TdO + Te3;
+		    Teg = Te8 + Tef;
+		    TdD = W[120];
+		    Te5 = W[121];
+		    iio[-WS(ios, 2)] = FMA(TdD, Te4, Te5 * Teg);
+		    rio[WS(ios, 61)] = FNMS(Te5, Te4, TdD * Teg);
+	       }
+	       {
+		    E TeY, Tf2, TeV, TeZ;
+		    TeY = TeW + TeX;
+		    Tf2 = Tf0 + Tf1;
+		    TeV = W[104];
+		    TeZ = W[105];
+		    iio[-WS(ios, 10)] = FMA(TeV, TeY, TeZ * Tf2);
+		    rio[WS(ios, 53)] = FNMS(TeZ, TeY, TeV * Tf2);
+	       }
+	       {
+		    E Tf4, Tf6, Tf3, Tf5;
+		    Tf4 = Tf1 - Tf0;
+		    Tf6 = TeW - TeX;
+		    Tf3 = W[40];
+		    Tf5 = W[41];
+		    rio[WS(ios, 21)] = FNMS(Tf5, Tf6, Tf3 * Tf4);
+		    iio[-WS(ios, 42)] = FMA(Tf3, Tf6, Tf5 * Tf4);
+	       }
+	       {
+		    E Tei, Tek, Teh, Tej;
+		    Tei = Tef - Te8;
+		    Tek = TdO - Te3;
+		    Teh = W[56];
+		    Tej = W[57];
+		    rio[WS(ios, 29)] = FNMS(Tej, Tek, Teh * Tei);
+		    iio[-WS(ios, 34)] = FMA(Teh, Tek, Tej * Tei);
+	       }
+	       {
+		    E Teo, Tes, Tel, Tep;
+		    Teo = Tem + Ten;
+		    Tes = Teq + Ter;
+		    Tel = W[24];
+		    Tep = W[25];
+		    rio[WS(ios, 13)] = FNMS(Tep, Tes, Tel * Teo);
+		    iio[-WS(ios, 50)] = FMA(Tel, Tes, Tep * Teo);
+	       }
+	       {
+		    E TeI, TeQ, Tex, TeJ;
+		    TeI = TeE + TeH;
+		    TeQ = TeM + TeP;
+		    Tex = W[8];
+		    TeJ = W[9];
+		    rio[WS(ios, 5)] = FNMS(TeJ, TeQ, Tex * TeI);
+		    iio[-WS(ios, 58)] = FMA(Tex, TeQ, TeJ * TeI);
+	       }
+	       {
+		    E TeS, TeU, TeR, TeT;
+		    TeS = TeM - TeP;
+		    TeU = TeH - TeE;
+		    TeR = W[72];
+		    TeT = W[73];
+		    iio[-WS(ios, 26)] = FMA(TeR, TeS, TeT * TeU);
+		    rio[WS(ios, 37)] = FNMS(TeT, TeS, TeR * TeU);
+	       }
+	       {
+		    E Teu, Tew, Tet, Tev;
+		    Teu = Teq - Ter;
+		    Tew = Ten - Tem;
+		    Tet = W[88];
+		    Tev = W[89];
+		    iio[-WS(ios, 18)] = FMA(Tet, Teu, Tev * Tew);
+		    rio[WS(ios, 45)] = FNMS(Tev, Teu, Tet * Tew);
+	       }
+	  }
+	  {
+	       E Tcr, Tdw, TcX, Td6, TcI, Tdt, TcS, Tdl, Tbm, TcW, TcL, TcT, Tdd, Tdx, Tdi;
+	       E Tds;
+	       {
+		    E Tcq, Td4, TbZ, Td5, TbF, TbY;
+		    Tcq = Tce + Tcp;
+		    Td4 = TcA - TcD;
+		    TbF = FNMS(KP195090322, TbE, KP980785280 * Tbx);
+		    TbY = FMA(KP195090322, TbQ, KP980785280 * TbX);
+		    TbZ = TbF + TbY;
+		    Td5 = TbY - TbF;
+		    Tcr = TbZ + Tcq;
+		    Tdw = Td4 - Td5;
+		    TcX = Tcq - TbZ;
+		    Td6 = Td4 + Td5;
+	       }
+	       {
+		    E TcE, Tdk, TcH, Tdj, TcF, TcG;
+		    TcE = TcA + TcD;
+		    Tdk = Tcp - Tce;
+		    TcF = FMA(KP980785280, TbE, KP195090322 * Tbx);
+		    TcG = FNMS(KP195090322, TbX, KP980785280 * TbQ);
+		    TcH = TcF + TcG;
+		    Tdj = TcF - TcG;
+		    TcI = TcE + TcH;
+		    Tdt = Tdk - Tdj;
+		    TcS = TcE - TcH;
+		    Tdl = Tdj + Tdk;
+	       }
+	       {
+		    E TaI, TcJ, Tbl, TcK;
+		    {
+			 E Taw, TaH, Tb9, Tbk;
+			 Taw = Tak + Tav;
+			 TaH = TaD + TaG;
+			 TaI = FNMS(KP098017140, TaH, KP995184726 * Taw);
+			 TcJ = FMA(KP995184726, TaH, KP098017140 * Taw);
+			 Tb9 = TaT + Tb8;
+			 Tbk = Tbc + Tbj;
+			 Tbl = FMA(KP098017140, Tb9, KP995184726 * Tbk);
+			 TcK = FNMS(KP098017140, Tbk, KP995184726 * Tb9);
+		    }
+		    Tbm = TaI + Tbl;
+		    TcW = TcJ - TcK;
+		    TcL = TcJ + TcK;
+		    TcT = Tbl - TaI;
+	       }
+	       {
+		    E Td9, Tdg, Tdc, Tdh;
+		    {
+			 E Td7, Td8, Tda, Tdb;
+			 Td7 = TaD - TaG;
+			 Td8 = Tav - Tak;
+			 Td9 = FNMS(KP634393284, Td8, KP773010453 * Td7);
+			 Tdg = FMA(KP634393284, Td7, KP773010453 * Td8);
+			 Tda = TaT - Tb8;
+			 Tdb = Tbj - Tbc;
+			 Tdc = FMA(KP773010453, Tda, KP634393284 * Tdb);
+			 Tdh = FNMS(KP634393284, Tda, KP773010453 * Tdb);
+		    }
+		    Tdd = Td9 + Tdc;
+		    Tdx = Tdg - Tdh;
+		    Tdi = Tdg + Tdh;
+		    Tds = Tdc - Td9;
+	       }
+	       {
+		    E Tcs, TcM, Ta5, Tct;
+		    Tcs = Tbm + Tcr;
+		    TcM = TcI + TcL;
+		    Ta5 = W[0];
+		    Tct = W[1];
+		    rio[WS(ios, 1)] = FNMS(Tct, TcM, Ta5 * Tcs);
+		    iio[-WS(ios, 62)] = FMA(Ta5, TcM, Tct * Tcs);
+	       }
+	       {
+		    E Tdu, Tdy, Tdr, Tdv;
+		    Tdu = Tds + Tdt;
+		    Tdy = Tdw + Tdx;
+		    Tdr = W[16];
+		    Tdv = W[17];
+		    rio[WS(ios, 9)] = FNMS(Tdv, Tdy, Tdr * Tdu);
+		    iio[-WS(ios, 54)] = FMA(Tdr, Tdy, Tdv * Tdu);
+	       }
+	       {
+		    E TdA, TdC, Tdz, TdB;
+		    TdA = Tdw - Tdx;
+		    TdC = Tdt - Tds;
+		    Tdz = W[80];
+		    TdB = W[81];
+		    iio[-WS(ios, 22)] = FMA(Tdz, TdA, TdB * TdC);
+		    rio[WS(ios, 41)] = FNMS(TdB, TdA, Tdz * TdC);
+	       }
+	       {
+		    E TcO, TcQ, TcN, TcP;
+		    TcO = TcI - TcL;
+		    TcQ = Tcr - Tbm;
+		    TcN = W[64];
+		    TcP = W[65];
+		    iio[-WS(ios, 30)] = FMA(TcN, TcO, TcP * TcQ);
+		    rio[WS(ios, 33)] = FNMS(TcP, TcO, TcN * TcQ);
+	       }
+	       {
+		    E TcU, TcY, TcR, TcV;
+		    TcU = TcS + TcT;
+		    TcY = TcW + TcX;
+		    TcR = W[96];
+		    TcV = W[97];
+		    iio[-WS(ios, 14)] = FMA(TcR, TcU, TcV * TcY);
+		    rio[WS(ios, 49)] = FNMS(TcV, TcU, TcR * TcY);
+	       }
+	       {
+		    E Tde, Tdm, Td3, Tdf;
+		    Tde = Td6 + Tdd;
+		    Tdm = Tdi + Tdl;
+		    Td3 = W[112];
+		    Tdf = W[113];
+		    iio[-WS(ios, 6)] = FMA(Td3, Tde, Tdf * Tdm);
+		    rio[WS(ios, 57)] = FNMS(Tdf, Tde, Td3 * Tdm);
+	       }
+	       {
+		    E Tdo, Tdq, Tdn, Tdp;
+		    Tdo = Tdl - Tdi;
+		    Tdq = Td6 - Tdd;
+		    Tdn = W[48];
+		    Tdp = W[49];
+		    rio[WS(ios, 25)] = FNMS(Tdp, Tdq, Tdn * Tdo);
+		    iio[-WS(ios, 38)] = FMA(Tdn, Tdq, Tdp * Tdo);
+	       }
+	       {
+		    E Td0, Td2, TcZ, Td1;
+		    Td0 = TcX - TcW;
+		    Td2 = TcS - TcT;
+		    TcZ = W[32];
+		    Td1 = W[33];
+		    rio[WS(ios, 17)] = FNMS(Td1, Td2, TcZ * Td0);
+		    iio[-WS(ios, 46)] = FMA(TcZ, Td2, Td1 * Td0);
+	       }
+	  }
+	  {
+	       E Tfy, Thd, TgC, TgT, Tgr, Th8, Tgz, TgY, Tgb, TgD, Tgg, Tgy, TgQ, Thc, Th1;
+	       E Th9;
+	       {
+		    E Tfi, TgS, Tfx, TgR, Tfp, Tfw;
+		    Tfi = Tfa + Tfh;
+		    TgS = Tgp - Tgm;
+		    Tfp = FNMS(KP195090322, Tfo, KP980785280 * Tfl);
+		    Tfw = FMA(KP980785280, Tfs, KP195090322 * Tfv);
+		    Tfx = Tfp + Tfw;
+		    TgR = Tfw - Tfp;
+		    Tfy = Tfi + Tfx;
+		    Thd = TgS - TgR;
+		    TgC = Tfi - Tfx;
+		    TgT = TgR + TgS;
+	       }
+	       {
+		    E Tgq, TgW, Tgj, TgX, Tgh, Tgi;
+		    Tgq = Tgm + Tgp;
+		    TgW = Tfa - Tfh;
+		    Tgh = FMA(KP195090322, Tfl, KP980785280 * Tfo);
+		    Tgi = FNMS(KP195090322, Tfs, KP980785280 * Tfv);
+		    Tgj = Tgh + Tgi;
+		    TgX = Tgh - Tgi;
+		    Tgr = Tgj + Tgq;
+		    Th8 = TgW - TgX;
+		    Tgz = Tgq - Tgj;
+		    TgY = TgW + TgX;
+	       }
+	       {
+		    E TfR, Tge, Tga, Tgf;
+		    {
+			 E TfJ, TfQ, Tg2, Tg9;
+			 TfJ = TfB + TfI;
+			 TfQ = TfM + TfP;
+			 TfR = FNMS(KP098017140, TfQ, KP995184726 * TfJ);
+			 Tge = FMA(KP098017140, TfJ, KP995184726 * TfQ);
+			 Tg2 = TfU + Tg1;
+			 Tg9 = Tg5 + Tg8;
+			 Tga = FMA(KP995184726, Tg2, KP098017140 * Tg9);
+			 Tgf = FNMS(KP098017140, Tg2, KP995184726 * Tg9);
+		    }
+		    Tgb = TfR + Tga;
+		    TgD = Tge - Tgf;
+		    Tgg = Tge + Tgf;
+		    Tgy = Tga - TfR;
+	       }
+	       {
+		    E TgM, TgZ, TgP, Th0;
+		    {
+			 E TgK, TgL, TgN, TgO;
+			 TgK = TfP - TfM;
+			 TgL = TfB - TfI;
+			 TgM = FNMS(KP634393284, TgL, KP773010453 * TgK);
+			 TgZ = FMA(KP773010453, TgL, KP634393284 * TgK);
+			 TgN = TfU - Tg1;
+			 TgO = Tg8 - Tg5;
+			 TgP = FMA(KP634393284, TgN, KP773010453 * TgO);
+			 Th0 = FNMS(KP634393284, TgO, KP773010453 * TgN);
+		    }
+		    TgQ = TgM + TgP;
+		    Thc = TgZ - Th0;
+		    Th1 = TgZ + Th0;
+		    Th9 = TgP - TgM;
+	       }
+	       {
+		    E Tgc, Tgs, Tf7, Tgd;
+		    Tgc = Tfy + Tgb;
+		    Tgs = Tgg + Tgr;
+		    Tf7 = W[124];
+		    Tgd = W[125];
+		    iio[0] = FMA(Tf7, Tgc, Tgd * Tgs);
+		    rio[WS(ios, 63)] = FNMS(Tgd, Tgc, Tf7 * Tgs);
+	       }
+	       {
+		    E Tha, The, Th7, Thb;
+		    Tha = Th8 + Th9;
+		    The = Thc + Thd;
+		    Th7 = W[108];
+		    Thb = W[109];
+		    iio[-WS(ios, 8)] = FMA(Th7, Tha, Thb * The);
+		    rio[WS(ios, 55)] = FNMS(Thb, Tha, Th7 * The);
+	       }
+	       {
+		    E Thg, Thi, Thf, Thh;
+		    Thg = Thd - Thc;
+		    Thi = Th8 - Th9;
+		    Thf = W[44];
+		    Thh = W[45];
+		    rio[WS(ios, 23)] = FNMS(Thh, Thi, Thf * Thg);
+		    iio[-WS(ios, 40)] = FMA(Thf, Thi, Thh * Thg);
+	       }
+	       {
+		    E Tgu, Tgw, Tgt, Tgv;
+		    Tgu = Tgr - Tgg;
+		    Tgw = Tfy - Tgb;
+		    Tgt = W[60];
+		    Tgv = W[61];
+		    rio[WS(ios, 31)] = FNMS(Tgv, Tgw, Tgt * Tgu);
+		    iio[-WS(ios, 32)] = FMA(Tgt, Tgw, Tgv * Tgu);
+	       }
+	       {
+		    E TgA, TgE, Tgx, TgB;
+		    TgA = Tgy + Tgz;
+		    TgE = TgC + TgD;
+		    Tgx = W[28];
+		    TgB = W[29];
+		    rio[WS(ios, 15)] = FNMS(TgB, TgE, Tgx * TgA);
+		    iio[-WS(ios, 48)] = FMA(Tgx, TgE, TgB * TgA);
+	       }
+	       {
+		    E TgU, Th2, TgJ, TgV;
+		    TgU = TgQ + TgT;
+		    Th2 = TgY + Th1;
+		    TgJ = W[12];
+		    TgV = W[13];
+		    rio[WS(ios, 7)] = FNMS(TgV, Th2, TgJ * TgU);
+		    iio[-WS(ios, 56)] = FMA(TgJ, Th2, TgV * TgU);
+	       }
+	       {
+		    E Th4, Th6, Th3, Th5;
+		    Th4 = TgY - Th1;
+		    Th6 = TgT - TgQ;
+		    Th3 = W[76];
+		    Th5 = W[77];
+		    iio[-WS(ios, 24)] = FMA(Th3, Th4, Th5 * Th6);
+		    rio[WS(ios, 39)] = FNMS(Th5, Th4, Th3 * Th6);
+	       }
+	       {
+		    E TgG, TgI, TgF, TgH;
+		    TgG = TgC - TgD;
+		    TgI = Tgz - Tgy;
+		    TgF = W[92];
+		    TgH = W[93];
+		    iio[-WS(ios, 16)] = FMA(TgF, TgG, TgH * TgI);
+		    rio[WS(ios, 47)] = FNMS(TgH, TgG, TgF * TgI);
+	       }
+	  }
+	  {
+	       E ThJ, TiG, Ti7, Tig, ThS, TiD, Ti2, Tiv, Thy, Ti6, ThV, Ti3, Tin, TiH, Tis;
+	       E TiC;
+	       {
+		    E ThI, Tie, ThF, Tif, ThB, ThE;
+		    ThI = ThG + ThH;
+		    Tie = ThM - ThN;
+		    ThB = FNMS(KP555570233, ThA, KP831469612 * Thz);
+		    ThE = FNMS(KP555570233, ThD, KP831469612 * ThC);
+		    ThF = ThB + ThE;
+		    Tif = ThE - ThB;
+		    ThJ = ThF + ThI;
+		    TiG = Tie - Tif;
+		    Ti7 = ThI - ThF;
+		    Tig = Tie + Tif;
+	       }
+	       {
+		    E ThO, Tiu, ThR, Tit, ThP, ThQ;
+		    ThO = ThM + ThN;
+		    Tiu = ThH - ThG;
+		    ThP = FMA(KP831469612, ThA, KP555570233 * Thz);
+		    ThQ = FMA(KP831469612, ThD, KP555570233 * ThC);
+		    ThR = ThP - ThQ;
+		    Tit = ThP + ThQ;
+		    ThS = ThO + ThR;
+		    TiD = Tiu - Tit;
+		    Ti2 = ThO - ThR;
+		    Tiv = Tit + Tiu;
+	       }
+	       {
+		    E Thq, ThT, Thx, ThU;
+		    {
+			 E Thm, Thp, Tht, Thw;
+			 Thm = Thk + Thl;
+			 Thp = Thn + Tho;
+			 Thq = FNMS(KP290284677, Thp, KP956940335 * Thm);
+			 ThT = FMA(KP956940335, Thp, KP290284677 * Thm);
+			 Tht = Thr - Ths;
+			 Thw = Thu + Thv;
+			 Thx = FMA(KP290284677, Tht, KP956940335 * Thw);
+			 ThU = FNMS(KP290284677, Thw, KP956940335 * Tht);
+		    }
+		    Thy = Thq + Thx;
+		    Ti6 = ThT - ThU;
+		    ThV = ThT + ThU;
+		    Ti3 = Thx - Thq;
+	       }
+	       {
+		    E Tij, Tiq, Tim, Tir;
+		    {
+			 E Tih, Tii, Tik, Til;
+			 Tih = Thn - Tho;
+			 Tii = Thl - Thk;
+			 Tij = FNMS(KP471396736, Tii, KP881921264 * Tih);
+			 Tiq = FMA(KP471396736, Tih, KP881921264 * Tii);
+			 Tik = Thv - Thu;
+			 Til = Ths + Thr;
+			 Tim = FNMS(KP881921264, Til, KP471396736 * Tik);
+			 Tir = FMA(KP471396736, Til, KP881921264 * Tik);
+		    }
+		    Tin = Tij + Tim;
+		    TiH = Tiq - Tir;
+		    Tis = Tiq + Tir;
+		    TiC = Tim - Tij;
+	       }
+	       {
+		    E ThK, ThW, Thj, ThL;
+		    ThK = Thy + ThJ;
+		    ThW = ThS + ThV;
+		    Thj = W[4];
+		    ThL = W[5];
+		    rio[WS(ios, 3)] = FNMS(ThL, ThW, Thj * ThK);
+		    iio[-WS(ios, 60)] = FMA(Thj, ThW, ThL * ThK);
+	       }
+	       {
+		    E TiE, TiI, TiB, TiF;
+		    TiE = TiC + TiD;
+		    TiI = TiG + TiH;
+		    TiB = W[20];
+		    TiF = W[21];
+		    rio[WS(ios, 11)] = FNMS(TiF, TiI, TiB * TiE);
+		    iio[-WS(ios, 52)] = FMA(TiB, TiI, TiF * TiE);
+	       }
+	       {
+		    E TiK, TiM, TiJ, TiL;
+		    TiK = TiG - TiH;
+		    TiM = TiD - TiC;
+		    TiJ = W[84];
+		    TiL = W[85];
+		    iio[-WS(ios, 20)] = FMA(TiJ, TiK, TiL * TiM);
+		    rio[WS(ios, 43)] = FNMS(TiL, TiK, TiJ * TiM);
+	       }
+	       {
+		    E ThY, Ti0, ThX, ThZ;
+		    ThY = ThS - ThV;
+		    Ti0 = ThJ - Thy;
+		    ThX = W[68];
+		    ThZ = W[69];
+		    iio[-WS(ios, 28)] = FMA(ThX, ThY, ThZ * Ti0);
+		    rio[WS(ios, 35)] = FNMS(ThZ, ThY, ThX * Ti0);
+	       }
+	       {
+		    E Ti4, Ti8, Ti1, Ti5;
+		    Ti4 = Ti2 + Ti3;
+		    Ti8 = Ti6 + Ti7;
+		    Ti1 = W[100];
+		    Ti5 = W[101];
+		    iio[-WS(ios, 12)] = FMA(Ti1, Ti4, Ti5 * Ti8);
+		    rio[WS(ios, 51)] = FNMS(Ti5, Ti4, Ti1 * Ti8);
+	       }
+	       {
+		    E Tio, Tiw, Tid, Tip;
+		    Tio = Tig + Tin;
+		    Tiw = Tis + Tiv;
+		    Tid = W[116];
+		    Tip = W[117];
+		    iio[-WS(ios, 4)] = FMA(Tid, Tio, Tip * Tiw);
+		    rio[WS(ios, 59)] = FNMS(Tip, Tio, Tid * Tiw);
+	       }
+	       {
+		    E Tiy, TiA, Tix, Tiz;
+		    Tiy = Tiv - Tis;
+		    TiA = Tig - Tin;
+		    Tix = W[52];
+		    Tiz = W[53];
+		    rio[WS(ios, 27)] = FNMS(Tiz, TiA, Tix * Tiy);
+		    iio[-WS(ios, 36)] = FMA(Tix, TiA, Tiz * Tiy);
+	       }
+	       {
+		    E Tia, Tic, Ti9, Tib;
+		    Tia = Ti7 - Ti6;
+		    Tic = Ti2 - Ti3;
+		    Ti9 = W[36];
+		    Tib = W[37];
+		    rio[WS(ios, 19)] = FNMS(Tib, Tic, Ti9 * Tia);
+		    iio[-WS(ios, 44)] = FMA(Ti9, Tic, Tib * Tia);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 64},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 64, "hb_64", twinstr, {808, 270, 230, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_64) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_7.c b/src/fftw3/rdft/codelets/hc2r/hb_7.c
new file mode 100644
index 0000000..4993e5e
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_7.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:26 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 7 -dif -name hb_7 -include hb.h */
+
+/*
+ * This function contains 72 FP additions, 60 FP multiplications,
+ * (or, 36 additions, 24 multiplications, 36 fused multiply/add),
+ * 36 stack variables, and 28 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_7(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 12) {
+	  E T1, Ta, T4, T7, Tq, TI, TR, TU, TE, Tt, Tb, Tk, Te, Th, Tx;
+	  E TF, TV, TQ, TJ, Tm;
+	  {
+	       E To, Tn, Tp, T8, T9;
+	       T1 = rio[0];
+	       T8 = rio[WS(ios, 3)];
+	       T9 = iio[-WS(ios, 4)];
+	       Ta = T8 + T9;
+	       To = T8 - T9;
+	       {
+		    E T2, T3, T5, T6;
+		    T2 = rio[WS(ios, 1)];
+		    T3 = iio[-WS(ios, 6)];
+		    T4 = T2 + T3;
+		    Tn = T2 - T3;
+		    T5 = rio[WS(ios, 2)];
+		    T6 = iio[-WS(ios, 5)];
+		    T7 = T5 + T6;
+		    Tp = T5 - T6;
+	       }
+	       Tq = FMA(KP433883739, Tn, KP974927912 * To) - (KP781831482 * Tp);
+	       TI = FMA(KP781831482, Tn, KP974927912 * Tp) + (KP433883739 * To);
+	       TR = FNMS(KP781831482, To, KP974927912 * Tn) - (KP433883739 * Tp);
+	       TU = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4);
+	       TE = FMA(KP623489801, T4, T1) + FNMA(KP900968867, Ta, KP222520933 * T7);
+	       Tt = FMA(KP623489801, T7, T1) + FNMA(KP222520933, Ta, KP900968867 * T4);
+	  }
+	  {
+	       E Tv, Tw, Tu, Ti, Tj;
+	       Tb = iio[0];
+	       Ti = rio[WS(ios, 4)];
+	       Tj = iio[-WS(ios, 3)];
+	       Tk = Ti - Tj;
+	       Tv = Ti + Tj;
+	       {
+		    E Tc, Td, Tf, Tg;
+		    Tc = iio[-WS(ios, 1)];
+		    Td = rio[WS(ios, 6)];
+		    Te = Tc - Td;
+		    Tw = Tc + Td;
+		    Tf = rio[WS(ios, 5)];
+		    Tg = iio[-WS(ios, 2)];
+		    Th = Tf - Tg;
+		    Tu = Tf + Tg;
+	       }
+	       Tx = FNMS(KP974927912, Tv, KP781831482 * Tu) - (KP433883739 * Tw);
+	       TF = FMA(KP781831482, Tw, KP974927912 * Tu) + (KP433883739 * Tv);
+	       TV = FMA(KP433883739, Tu, KP781831482 * Tv) - (KP974927912 * Tw);
+	       TQ = FMA(KP900968867, Th, Tb) + FNMA(KP623489801, Tk, KP222520933 * Te);
+	       TJ = FMA(KP623489801, Te, Tb) + FMA(KP222520933, Th, KP900968867 * Tk);
+	       Tm = FMA(KP222520933, Tk, Tb) + FNMA(KP623489801, Th, KP900968867 * Te);
+	  }
+	  rio[0] = T1 + T4 + T7 + Ta;
+	  iio[-WS(ios, 6)] = Tb + Te - (Th + Tk);
+	  {
+	       E TM, TO, TL, TN;
+	       TM = TJ - TI;
+	       TO = TE + TF;
+	       TL = W[10];
+	       TN = W[11];
+	       iio[0] = FMA(TL, TM, TN * TO);
+	       rio[WS(ios, 6)] = FNMS(TN, TM, TL * TO);
+	  }
+	  {
+	       E TY, T10, TX, TZ;
+	       TY = TU + TV;
+	       T10 = TR + TQ;
+	       TX = W[2];
+	       TZ = W[3];
+	       rio[WS(ios, 2)] = FNMS(TZ, T10, TX * TY);
+	       iio[-WS(ios, 4)] = FMA(TX, T10, TZ * TY);
+	  }
+	  {
+	       E Tr, Ty, Tl, Ts;
+	       Tr = Tm - Tq;
+	       Ty = Tt - Tx;
+	       Tl = W[6];
+	       Ts = W[7];
+	       iio[-WS(ios, 2)] = FMA(Tl, Tr, Ts * Ty);
+	       rio[WS(ios, 4)] = FNMS(Ts, Tr, Tl * Ty);
+	  }
+	  {
+	       E TA, TC, Tz, TB;
+	       TA = Tt + Tx;
+	       TC = Tq + Tm;
+	       Tz = W[4];
+	       TB = W[5];
+	       rio[WS(ios, 3)] = FNMS(TB, TC, Tz * TA);
+	       iio[-WS(ios, 3)] = FMA(Tz, TC, TB * TA);
+	  }
+	  {
+	       E TS, TW, TP, TT;
+	       TS = TQ - TR;
+	       TW = TU - TV;
+	       TP = W[8];
+	       TT = W[9];
+	       iio[-WS(ios, 1)] = FMA(TP, TS, TT * TW);
+	       rio[WS(ios, 5)] = FNMS(TT, TS, TP * TW);
+	  }
+	  {
+	       E TG, TK, TD, TH;
+	       TG = TE - TF;
+	       TK = TI + TJ;
+	       TD = W[0];
+	       TH = W[1];
+	       rio[WS(ios, 1)] = FNMS(TH, TK, TD * TG);
+	       iio[-WS(ios, 5)] = FMA(TD, TK, TH * TG);
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 7},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 7, "hb_7", twinstr, {36, 24, 36, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_7) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_7, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_8.c b/src/fftw3/rdft/codelets/hc2r/hb_8.c
new file mode 100644
index 0000000..58cc25e
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_8.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:28 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 8 -dif -name hb_8 -include hb.h */
+
+/*
+ * This function contains 66 FP additions, 32 FP multiplications,
+ * (or, 52 additions, 18 multiplications, 14 fused multiply/add),
+ * 30 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_8(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 14) {
+	  E T7, T18, T1d, Tg, Tx, TT, TY, TG, Te, TZ, T10, Tn, Tq, TM, TP;
+	  E TH;
+	  {
+	       E T3, TR, Tt, TX, T6, TW, Tw, TS;
+	       {
+		    E T1, T2, Tr, Ts;
+		    T1 = rio[0];
+		    T2 = iio[-WS(ios, 4)];
+		    T3 = T1 + T2;
+		    TR = T1 - T2;
+		    Tr = iio[0];
+		    Ts = rio[WS(ios, 4)];
+		    Tt = Tr - Ts;
+		    TX = Tr + Ts;
+	       }
+	       {
+		    E T4, T5, Tu, Tv;
+		    T4 = rio[WS(ios, 2)];
+		    T5 = iio[-WS(ios, 6)];
+		    T6 = T4 + T5;
+		    TW = T4 - T5;
+		    Tu = iio[-WS(ios, 2)];
+		    Tv = rio[WS(ios, 6)];
+		    Tw = Tu - Tv;
+		    TS = Tu + Tv;
+	       }
+	       T7 = T3 + T6;
+	       T18 = TX - TW;
+	       T1d = TR + TS;
+	       Tg = T3 - T6;
+	       Tx = Tt - Tw;
+	       TT = TR - TS;
+	       TY = TW + TX;
+	       TG = Tt + Tw;
+	  }
+	  {
+	       E Ta, TO, Tj, TN, Td, TK, Tm, TL;
+	       {
+		    E T8, T9, Th, Ti;
+		    T8 = rio[WS(ios, 1)];
+		    T9 = iio[-WS(ios, 5)];
+		    Ta = T8 + T9;
+		    TO = T8 - T9;
+		    Th = iio[-WS(ios, 1)];
+		    Ti = rio[WS(ios, 5)];
+		    Tj = Th - Ti;
+		    TN = Th + Ti;
+	       }
+	       {
+		    E Tb, Tc, Tk, Tl;
+		    Tb = iio[-WS(ios, 7)];
+		    Tc = rio[WS(ios, 3)];
+		    Td = Tb + Tc;
+		    TK = Tb - Tc;
+		    Tk = iio[-WS(ios, 3)];
+		    Tl = rio[WS(ios, 7)];
+		    Tm = Tk - Tl;
+		    TL = Tl + Tk;
+	       }
+	       Te = Ta + Td;
+	       TZ = TO + TN;
+	       T10 = TK + TL;
+	       Tn = Tj - Tm;
+	       Tq = Td - Ta;
+	       TM = TK - TL;
+	       TP = TN - TO;
+	       TH = Tj + Tm;
+	  }
+	  rio[0] = T7 + Te;
+	  iio[-WS(ios, 7)] = TH + TG;
+	  {
+	       E To, Ty, Tf, Tp;
+	       To = Tg + Tn;
+	       Ty = Tq + Tx;
+	       Tf = W[10];
+	       Tp = W[11];
+	       rio[WS(ios, 6)] = FNMS(Tp, Ty, Tf * To);
+	       iio[-WS(ios, 1)] = FMA(Tp, To, Tf * Ty);
+	  }
+	  {
+	       E TE, TI, TD, TF;
+	       TE = T7 - Te;
+	       TI = TG - TH;
+	       TD = W[6];
+	       TF = W[7];
+	       rio[WS(ios, 4)] = FNMS(TF, TI, TD * TE);
+	       iio[-WS(ios, 3)] = FMA(TF, TE, TD * TI);
+	  }
+	  {
+	       E TA, TC, Tz, TB;
+	       TA = Tg - Tn;
+	       TC = Tx - Tq;
+	       Tz = W[2];
+	       TB = W[3];
+	       rio[WS(ios, 2)] = FNMS(TB, TC, Tz * TA);
+	       iio[-WS(ios, 5)] = FMA(TB, TA, Tz * TC);
+	  }
+	  {
+	       E TU, T16, T12, T14, TQ, T11;
+	       TQ = KP707106781 * (TM - TP);
+	       TU = TQ + TT;
+	       T16 = TT - TQ;
+	       T11 = KP707106781 * (TZ - T10);
+	       T12 = TY + T11;
+	       T14 = TY - T11;
+	       {
+		    E TJ, TV, T13, T15;
+		    TJ = W[0];
+		    TV = W[1];
+		    rio[WS(ios, 1)] = FNMS(TV, T12, TJ * TU);
+		    iio[-WS(ios, 6)] = FMA(TJ, T12, TV * TU);
+		    T13 = W[8];
+		    T15 = W[9];
+		    iio[-WS(ios, 2)] = FMA(T13, T14, T15 * T16);
+		    rio[WS(ios, 5)] = FNMS(T15, T14, T13 * T16);
+	       }
+	  }
+	  {
+	       E T1a, T1i, T1e, T1g, T19, T1c;
+	       T19 = KP707106781 * (TP + TM);
+	       T1a = T18 + T19;
+	       T1i = T18 - T19;
+	       T1c = KP707106781 * (TZ + T10);
+	       T1e = T1c + T1d;
+	       T1g = T1d - T1c;
+	       {
+		    E T17, T1b, T1f, T1h;
+		    T17 = W[12];
+		    T1b = W[13];
+		    iio[0] = FMA(T17, T1a, T1b * T1e);
+		    rio[WS(ios, 7)] = FNMS(T1b, T1a, T17 * T1e);
+		    T1f = W[4];
+		    T1h = W[5];
+		    rio[WS(ios, 3)] = FNMS(T1h, T1i, T1f * T1g);
+		    iio[-WS(ios, 4)] = FMA(T1f, T1i, T1h * T1g);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 8},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 8, "hb_8", twinstr, {52, 18, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_8) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hb_9.c b/src/fftw3/rdft/codelets/hc2r/hb_9.c
new file mode 100644
index 0000000..ed3eb98
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hb_9.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:31 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -sign 1 -n 9 -dif -name hb_9 -include hb.h */
+
+/*
+ * This function contains 96 FP additions, 72 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
+ * 53 stack variables, and 36 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hb_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hb_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hb.h"
+
+static const R *hb_9(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
+     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 16) {
+	  E T5, T1z, Tm, T18, TQ, T1i, Ta, Tf, Tg, T1A, T1B, T1C, Tx, TS, T1e;
+	  E T1k, T1b, T1j, TI, TR;
+	  {
+	       E T1, TM, T4, TP, Tl, TN, Ti, TO;
+	       T1 = rio[0];
+	       TM = iio[0];
+	       {
+		    E T2, T3, Tj, Tk;
+		    T2 = rio[WS(ios, 3)];
+		    T3 = iio[-WS(ios, 6)];
+		    T4 = T2 + T3;
+		    TP = KP866025403 * (T2 - T3);
+		    Tj = rio[WS(ios, 6)];
+		    Tk = iio[-WS(ios, 3)];
+		    Tl = KP866025403 * (Tj + Tk);
+		    TN = Tj - Tk;
+	       }
+	       T5 = T1 + T4;
+	       T1z = TM - TN;
+	       Ti = FNMS(KP500000000, T4, T1);
+	       Tm = Ti + Tl;
+	       T18 = Ti - Tl;
+	       TO = FMA(KP500000000, TN, TM);
+	       TQ = TO - TP;
+	       T1i = TP + TO;
+	  }
+	  {
+	       E T6, T9, Ty, TG, TD, TE, TB, TF, Tb, Te, Tn, Tv, Tt, Ts, Tq;
+	       E Tu;
+	       {
+		    E T7, T8, Tz, TA;
+		    T6 = rio[WS(ios, 1)];
+		    T7 = rio[WS(ios, 4)];
+		    T8 = iio[-WS(ios, 7)];
+		    T9 = T7 + T8;
+		    Ty = FNMS(KP500000000, T9, T6);
+		    TG = KP866025403 * (T7 - T8);
+		    TD = iio[-WS(ios, 1)];
+		    Tz = rio[WS(ios, 7)];
+		    TA = iio[-WS(ios, 4)];
+		    TE = Tz - TA;
+		    TB = KP866025403 * (Tz + TA);
+		    TF = FMA(KP500000000, TE, TD);
+	       }
+	       {
+		    E Tc, Td, To, Tp;
+		    Tb = rio[WS(ios, 2)];
+		    Tc = iio[-WS(ios, 5)];
+		    Td = iio[-WS(ios, 8)];
+		    Te = Tc + Td;
+		    Tn = FNMS(KP500000000, Te, Tb);
+		    Tv = KP866025403 * (Tc - Td);
+		    Tt = iio[-WS(ios, 2)];
+		    To = rio[WS(ios, 5)];
+		    Tp = rio[WS(ios, 8)];
+		    Ts = To + Tp;
+		    Tq = KP866025403 * (To - Tp);
+		    Tu = FMA(KP500000000, Ts, Tt);
+	       }
+	       {
+		    E Tr, Tw, T1c, T1d;
+		    Ta = T6 + T9;
+		    Tf = Tb + Te;
+		    Tg = Ta + Tf;
+		    T1A = TD - TE;
+		    T1B = Tt - Ts;
+		    T1C = T1A + T1B;
+		    Tr = Tn - Tq;
+		    Tw = Tu - Tv;
+		    Tx = FMA(KP939692620, Tr, KP342020143 * Tw);
+		    TS = FNMS(KP939692620, Tw, KP342020143 * Tr);
+		    T1c = Tn + Tq;
+		    T1d = Tv + Tu;
+		    T1e = FNMS(KP984807753, T1d, KP173648177 * T1c);
+		    T1k = FMA(KP984807753, T1c, KP173648177 * T1d);
+		    {
+			 E T19, T1a, TC, TH;
+			 T19 = Ty - TB;
+			 T1a = TG + TF;
+			 T1b = FNMS(KP642787609, T1a, KP766044443 * T19);
+			 T1j = FMA(KP766044443, T1a, KP642787609 * T19);
+			 TC = Ty + TB;
+			 TH = TF - TG;
+			 TI = FNMS(KP984807753, TH, KP173648177 * TC);
+			 TR = FMA(KP173648177, TH, KP984807753 * TC);
+		    }
+	       }
+	  }
+	  rio[0] = T5 + Tg;
+	  {
+	       E TX, T11, TK, T10, TU, TW, TJ, TT, Th, TL;
+	       TX = KP866025403 * (TI + Tx);
+	       T11 = KP866025403 * (TS - TR);
+	       TJ = Tx - TI;
+	       TK = Tm - TJ;
+	       T10 = FMA(KP500000000, TJ, Tm);
+	       TT = TR + TS;
+	       TU = TQ + TT;
+	       TW = FNMS(KP500000000, TT, TQ);
+	       Th = W[2];
+	       TL = W[3];
+	       rio[WS(ios, 2)] = FNMS(TL, TU, Th * TK);
+	       iio[-WS(ios, 6)] = FMA(Th, TU, TL * TK);
+	       {
+		    E T14, T16, T13, T15;
+		    T14 = TW + TX;
+		    T16 = T11 + T10;
+		    T13 = W[8];
+		    T15 = W[9];
+		    iio[-WS(ios, 3)] = FMA(T13, T14, T15 * T16);
+		    rio[WS(ios, 5)] = FNMS(T15, T14, T13 * T16);
+	       }
+	       {
+		    E TY, T12, TV, TZ;
+		    TY = TW - TX;
+		    T12 = T10 - T11;
+		    TV = W[14];
+		    TZ = W[15];
+		    iio[0] = FMA(TV, TY, TZ * T12);
+		    rio[WS(ios, 8)] = FNMS(TZ, TY, TV * T12);
+	       }
+	  }
+	  iio[-WS(ios, 8)] = T1z + T1C;
+	  {
+	       E T1G, T1O, T1K, T1M;
+	       {
+		    E T1E, T1F, T1I, T1J;
+		    T1E = FNMS(KP500000000, T1C, T1z);
+		    T1F = KP866025403 * (Ta - Tf);
+		    T1G = T1E - T1F;
+		    T1O = T1F + T1E;
+		    T1I = FNMS(KP500000000, Tg, T5);
+		    T1J = KP866025403 * (T1B - T1A);
+		    T1K = T1I - T1J;
+		    T1M = T1I + T1J;
+	       }
+	       {
+		    E T1D, T1H, T1L, T1N;
+		    T1D = W[10];
+		    T1H = W[11];
+		    iio[-WS(ios, 2)] = FMA(T1D, T1G, T1H * T1K);
+		    rio[WS(ios, 6)] = FNMS(T1H, T1G, T1D * T1K);
+		    T1L = W[4];
+		    T1N = W[5];
+		    rio[WS(ios, 3)] = FNMS(T1N, T1O, T1L * T1M);
+		    iio[-WS(ios, 5)] = FMA(T1L, T1O, T1N * T1M);
+	       }
+	  }
+	  {
+	       E T1p, T1t, T1g, T1s, T1m, T1o, T1f, T1l, T17, T1h;
+	       T1p = KP866025403 * (T1b - T1e);
+	       T1t = KP866025403 * (T1k - T1j);
+	       T1f = T1b + T1e;
+	       T1g = T18 + T1f;
+	       T1s = FNMS(KP500000000, T1f, T18);
+	       T1l = T1j + T1k;
+	       T1m = T1i + T1l;
+	       T1o = FNMS(KP500000000, T1l, T1i);
+	       T17 = W[0];
+	       T1h = W[1];
+	       rio[WS(ios, 1)] = FNMS(T1h, T1m, T17 * T1g);
+	       iio[-WS(ios, 7)] = FMA(T1h, T1g, T17 * T1m);
+	       {
+		    E T1q, T1u, T1n, T1r;
+		    T1q = T1o - T1p;
+		    T1u = T1s - T1t;
+		    T1n = W[12];
+		    T1r = W[13];
+		    iio[-WS(ios, 1)] = FMA(T1n, T1q, T1r * T1u);
+		    rio[WS(ios, 7)] = FNMS(T1r, T1q, T1n * T1u);
+	       }
+	       {
+		    E T1w, T1y, T1v, T1x;
+		    T1w = T1s + T1t;
+		    T1y = T1p + T1o;
+		    T1v = W[6];
+		    T1x = W[7];
+		    rio[WS(ios, 4)] = FNMS(T1x, T1y, T1v * T1w);
+		    iio[-WS(ios, 4)] = FMA(T1v, T1y, T1x * T1w);
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 9},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 9, "hb_9", twinstr, {60, 36, 36, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hb_9) (planner *p) {
+     X(khc2hc_dif_register) (p, hb_9, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_10.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_10.c
new file mode 100644
index 0000000..df8c872
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_10.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:05 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 10 -name hc2rIII_10 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 32 FP additions, 16 FP multiplications,
+ * (or, 26 additions, 10 multiplications, 6 fused multiply/add),
+ * 22 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_10(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
+     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T1, To, T8, Tq, Ta, Tp, Te, Ts, Th, Tn;
+	  T1 = ri[WS(ris, 2)];
+	  To = ii[WS(iis, 2)];
+	  {
+	       E T2, T3, T4, T5, T6, T7;
+	       T2 = ri[WS(ris, 4)];
+	       T3 = ri[0];
+	       T4 = T2 + T3;
+	       T5 = ri[WS(ris, 3)];
+	       T6 = ri[WS(ris, 1)];
+	       T7 = T5 + T6;
+	       T8 = T4 + T7;
+	       Tq = T5 - T6;
+	       Ta = KP1_118033988 * (T7 - T4);
+	       Tp = T2 - T3;
+	  }
+	  {
+	       E Tc, Td, Tm, Tf, Tg, Tl;
+	       Tc = ii[WS(iis, 4)];
+	       Td = ii[0];
+	       Tm = Tc + Td;
+	       Tf = ii[WS(iis, 1)];
+	       Tg = ii[WS(iis, 3)];
+	       Tl = Tg + Tf;
+	       Te = Tc - Td;
+	       Ts = KP1_118033988 * (Tl + Tm);
+	       Th = Tf - Tg;
+	       Tn = Tl - Tm;
+	  }
+	  O[0] = KP2_000000000 * (T1 + T8);
+	  O[WS(os, 5)] = KP2_000000000 * (Tn - To);
+	  {
+	       E Ti, Tj, Tb, Tk, T9;
+	       Ti = FNMS(KP1_902113032, Th, KP1_175570504 * Te);
+	       Tj = FMA(KP1_175570504, Th, KP1_902113032 * Te);
+	       T9 = FNMS(KP2_000000000, T1, KP500000000 * T8);
+	       Tb = T9 - Ta;
+	       Tk = T9 + Ta;
+	       O[WS(os, 2)] = Tb + Ti;
+	       O[WS(os, 6)] = Tk + Tj;
+	       O[WS(os, 8)] = Ti - Tb;
+	       O[WS(os, 4)] = Tj - Tk;
+	  }
+	  {
+	       E Tr, Tv, Tu, Tw, Tt;
+	       Tr = FMA(KP1_902113032, Tp, KP1_175570504 * Tq);
+	       Tv = FNMS(KP1_175570504, Tp, KP1_902113032 * Tq);
+	       Tt = FMA(KP500000000, Tn, KP2_000000000 * To);
+	       Tu = Ts + Tt;
+	       Tw = Tt - Ts;
+	       O[WS(os, 1)] = -(Tr + Tu);
+	       O[WS(os, 7)] = Tw - Tv;
+	       O[WS(os, 9)] = Tr - Tu;
+	       O[WS(os, 3)] = Tv + Tw;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 10, "hc2rIII_10", {26, 10, 6, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_10) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_10, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_12.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_12.c
new file mode 100644
index 0000000..0a36bf0
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_12.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:07 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 12 -name hc2rIII_12 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 42 FP additions, 20 FP multiplications,
+ * (or, 38 additions, 16 multiplications, 4 fused multiply/add),
+ * 25 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_12(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T5, Tw, Tb, Te, Tx, Ts, Ta, TA, Tg, Tj, Tz, Tp, Tt, Tu;
+	  {
+	       E T1, T2, T3, T4;
+	       T1 = ri[WS(ris, 1)];
+	       T2 = ri[WS(ris, 5)];
+	       T3 = ri[WS(ris, 2)];
+	       T4 = T2 + T3;
+	       T5 = T1 + T4;
+	       Tw = KP866025403 * (T2 - T3);
+	       Tb = FNMS(KP500000000, T4, T1);
+	  }
+	  {
+	       E Tq, Tc, Td, Tr;
+	       Tq = ii[WS(iis, 1)];
+	       Tc = ii[WS(iis, 5)];
+	       Td = ii[WS(iis, 2)];
+	       Tr = Td - Tc;
+	       Te = KP866025403 * (Tc + Td);
+	       Tx = FMA(KP500000000, Tr, Tq);
+	       Ts = Tq - Tr;
+	  }
+	  {
+	       E T6, T7, T8, T9;
+	       T6 = ri[WS(ris, 4)];
+	       T7 = ri[0];
+	       T8 = ri[WS(ris, 3)];
+	       T9 = T7 + T8;
+	       Ta = T6 + T9;
+	       TA = KP866025403 * (T7 - T8);
+	       Tg = FNMS(KP500000000, T9, T6);
+	  }
+	  {
+	       E To, Th, Ti, Tn;
+	       To = ii[WS(iis, 4)];
+	       Th = ii[0];
+	       Ti = ii[WS(iis, 3)];
+	       Tn = Ti - Th;
+	       Tj = KP866025403 * (Th + Ti);
+	       Tz = FMA(KP500000000, Tn, To);
+	       Tp = Tn - To;
+	  }
+	  O[0] = KP2_000000000 * (T5 + Ta);
+	  O[WS(os, 6)] = KP2_000000000 * (Ts + Tp);
+	  Tt = Tp - Ts;
+	  Tu = T5 - Ta;
+	  O[WS(os, 3)] = KP1_414213562 * (Tt - Tu);
+	  O[WS(os, 9)] = KP1_414213562 * (Tu + Tt);
+	  {
+	       E Tf, Tk, Tv, Ty, TB, TC;
+	       Tf = Tb - Te;
+	       Tk = Tg + Tj;
+	       Tv = Tf - Tk;
+	       Ty = Tw + Tx;
+	       TB = Tz - TA;
+	       TC = Ty + TB;
+	       O[WS(os, 4)] = -(KP2_000000000 * (Tf + Tk));
+	       O[WS(os, 10)] = KP2_000000000 * (TB - Ty);
+	       O[WS(os, 1)] = KP1_414213562 * (Tv - TC);
+	       O[WS(os, 7)] = KP1_414213562 * (Tv + TC);
+	  }
+	  {
+	       E Tl, Tm, TF, TD, TE, TG;
+	       Tl = Tb + Te;
+	       Tm = Tg - Tj;
+	       TF = Tm - Tl;
+	       TD = TA + Tz;
+	       TE = Tx - Tw;
+	       TG = TE + TD;
+	       O[WS(os, 8)] = KP2_000000000 * (Tl + Tm);
+	       O[WS(os, 5)] = KP1_414213562 * (TF + TG);
+	       O[WS(os, 2)] = KP2_000000000 * (TD - TE);
+	       O[WS(os, 11)] = KP1_414213562 * (TF - TG);
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 12, "hc2rIII_12", {38, 16, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_12) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_12, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_15.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_15.c
new file mode 100644
index 0000000..9a94a9a
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_15.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 15 -name hc2rIII_15 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 64 FP additions, 26 FP multiplications,
+ * (or, 49 additions, 11 multiplications, 15 fused multiply/add),
+ * 47 stack variables, and 30 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     DK(KP433012701, +0.433012701892219323381861585376468091735701313);
+     DK(KP968245836, +0.968245836551854221294816349945599902708230426);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP1_647278207, +1.647278207092663851754840078556380006059321028);
+     DK(KP1_018073920, +1.018073920910254366901961726787815297021466329);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
+     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
+     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E Tv, TD, T5, Ts, TC, T6, Tf, TW, TK, Td, Tg, TP, To, TN, TA;
+	  E TO, TQ, Tt, Tu, T12, Te, T11;
+	  Tt = ii[WS(iis, 4)];
+	  Tu = ii[WS(iis, 1)];
+	  Tv = FMA(KP1_902113032, Tt, KP1_175570504 * Tu);
+	  TD = FNMS(KP1_175570504, Tt, KP1_902113032 * Tu);
+	  {
+	       E T1, T4, Tq, T2, T3, Tr;
+	       T1 = ri[WS(ris, 7)];
+	       T2 = ri[WS(ris, 4)];
+	       T3 = ri[WS(ris, 1)];
+	       T4 = T2 + T3;
+	       Tq = KP1_118033988 * (T2 - T3);
+	       T5 = FMA(KP2_000000000, T4, T1);
+	       Tr = FNMS(KP500000000, T4, T1);
+	       Ts = Tq + Tr;
+	       TC = Tr - Tq;
+	  }
+	  {
+	       E Tc, TJ, T9, TI;
+	       T6 = ri[WS(ris, 2)];
+	       {
+		    E Ta, Tb, T7, T8;
+		    Ta = ri[WS(ris, 3)];
+		    Tb = ri[WS(ris, 6)];
+		    Tc = Ta + Tb;
+		    TJ = Ta - Tb;
+		    T7 = ri[0];
+		    T8 = ri[WS(ris, 5)];
+		    T9 = T7 + T8;
+		    TI = T7 - T8;
+	       }
+	       Tf = KP559016994 * (T9 - Tc);
+	       TW = FNMS(KP1_647278207, TJ, KP1_018073920 * TI);
+	       TK = FMA(KP1_647278207, TI, KP1_018073920 * TJ);
+	       Td = T9 + Tc;
+	       Tg = FNMS(KP250000000, Td, T6);
+	  }
+	  {
+	       E Tn, TM, Tk, TL;
+	       TP = ii[WS(iis, 2)];
+	       {
+		    E Tl, Tm, Ti, Tj;
+		    Tl = ii[WS(iis, 3)];
+		    Tm = ii[WS(iis, 6)];
+		    Tn = Tl - Tm;
+		    TM = Tl + Tm;
+		    Ti = ii[0];
+		    Tj = ii[WS(iis, 5)];
+		    Tk = Ti + Tj;
+		    TL = Ti - Tj;
+	       }
+	       To = FMA(KP951056516, Tk, KP587785252 * Tn);
+	       TN = KP968245836 * (TL - TM);
+	       TA = FNMS(KP587785252, Tk, KP951056516 * Tn);
+	       TO = TL + TM;
+	       TQ = FMA(KP433012701, TO, KP1_732050807 * TP);
+	  }
+	  T12 = KP1_732050807 * (TP - TO);
+	  Te = T6 + Td;
+	  T11 = Te - T5;
+	  O[0] = FMA(KP2_000000000, Te, T5);
+	  O[WS(os, 10)] = T12 - T11;
+	  O[WS(os, 5)] = T11 + T12;
+	  {
+	       E TE, TG, TB, TF, TY, T10, Tz, TX, TV, TZ;
+	       TE = TC - TD;
+	       TG = TC + TD;
+	       Tz = Tg - Tf;
+	       TB = Tz + TA;
+	       TF = TA - Tz;
+	       TX = TN + TQ;
+	       TY = TW - TX;
+	       T10 = TW + TX;
+	       O[WS(os, 12)] = FMA(KP2_000000000, TB, TE);
+	       O[WS(os, 3)] = FMS(KP2_000000000, TF, TG);
+	       TV = TE - TB;
+	       O[WS(os, 2)] = TV + TY;
+	       O[WS(os, 7)] = TY - TV;
+	       TZ = TF + TG;
+	       O[WS(os, 8)] = TZ - T10;
+	       O[WS(os, 13)] = -(TZ + T10);
+	  }
+	  {
+	       E Tw, Ty, Tp, Tx, TS, TU, Th, TR, TH, TT;
+	       Tw = Ts - Tv;
+	       Ty = Ts + Tv;
+	       Th = Tf + Tg;
+	       Tp = Th + To;
+	       Tx = Th - To;
+	       TR = TN - TQ;
+	       TS = TK + TR;
+	       TU = TR - TK;
+	       O[WS(os, 9)] = -(FMA(KP2_000000000, Tp, Tw));
+	       O[WS(os, 6)] = FMA(KP2_000000000, Tx, Ty);
+	       TH = Tx - Ty;
+	       O[WS(os, 11)] = TH - TS;
+	       O[WS(os, 1)] = TH + TS;
+	       TT = Tw - Tp;
+	       O[WS(os, 4)] = TT - TU;
+	       O[WS(os, 14)] = TT + TU;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 15, "hc2rIII_15", {49, 11, 15, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_15) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_15, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_16.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_16.c
new file mode 100644
index 0000000..a81a4a1
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_16.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:13 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 16 -name hc2rIII_16 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 66 FP additions, 32 FP multiplications,
+ * (or, 54 additions, 20 multiplications, 12 fused multiply/add),
+ * 40 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_16(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T7, TW, T13, Tj, TD, TK, TP, TH, Te, TX, T12, To, Tt, Tx, TS;
+	  E Tw, TT, TY;
+	  {
+	       E T3, Tf, TC, TV, T6, Tz, Ti, TU;
+	       {
+		    E T1, T2, TA, TB;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 7)];
+		    T3 = T1 + T2;
+		    Tf = T1 - T2;
+		    TA = ii[0];
+		    TB = ii[WS(iis, 7)];
+		    TC = TA + TB;
+		    TV = TB - TA;
+	       }
+	       {
+		    E T4, T5, Tg, Th;
+		    T4 = ri[WS(ris, 4)];
+		    T5 = ri[WS(ris, 3)];
+		    T6 = T4 + T5;
+		    Tz = T4 - T5;
+		    Tg = ii[WS(iis, 4)];
+		    Th = ii[WS(iis, 3)];
+		    Ti = Tg + Th;
+		    TU = Tg - Th;
+	       }
+	       T7 = T3 + T6;
+	       TW = TU + TV;
+	       T13 = TV - TU;
+	       Tj = Tf - Ti;
+	       TD = Tz + TC;
+	       TK = Tz - TC;
+	       TP = T3 - T6;
+	       TH = Tf + Ti;
+	  }
+	  {
+	       E Ta, Tk, Tn, TR, Td, Tp, Ts, TQ;
+	       {
+		    E T8, T9, Tl, Tm;
+		    T8 = ri[WS(ris, 2)];
+		    T9 = ri[WS(ris, 5)];
+		    Ta = T8 + T9;
+		    Tk = T8 - T9;
+		    Tl = ii[WS(iis, 2)];
+		    Tm = ii[WS(iis, 5)];
+		    Tn = Tl + Tm;
+		    TR = Tl - Tm;
+	       }
+	       {
+		    E Tb, Tc, Tq, Tr;
+		    Tb = ri[WS(ris, 1)];
+		    Tc = ri[WS(ris, 6)];
+		    Td = Tb + Tc;
+		    Tp = Tb - Tc;
+		    Tq = ii[WS(iis, 1)];
+		    Tr = ii[WS(iis, 6)];
+		    Ts = Tq + Tr;
+		    TQ = Tr - Tq;
+	       }
+	       Te = Ta + Td;
+	       TX = Ta - Td;
+	       T12 = TR + TQ;
+	       To = Tk - Tn;
+	       Tt = Tp - Ts;
+	       Tx = Tp + Ts;
+	       TS = TQ - TR;
+	       Tw = Tk + Tn;
+	  }
+	  O[0] = KP2_000000000 * (T7 + Te);
+	  O[WS(os, 8)] = KP2_000000000 * (T13 - T12);
+	  TT = TP + TS;
+	  TY = TW - TX;
+	  O[WS(os, 2)] = FMA(KP1_847759065, TT, KP765366864 * TY);
+	  O[WS(os, 10)] = FNMS(KP765366864, TT, KP1_847759065 * TY);
+	  {
+	       E T11, T14, TZ, T10;
+	       T11 = T7 - Te;
+	       T14 = T12 + T13;
+	       O[WS(os, 4)] = KP1_414213562 * (T11 + T14);
+	       O[WS(os, 12)] = KP1_414213562 * (T14 - T11);
+	       TZ = TP - TS;
+	       T10 = TX + TW;
+	       O[WS(os, 6)] = FMA(KP765366864, TZ, KP1_847759065 * T10);
+	       O[WS(os, 14)] = FNMS(KP1_847759065, TZ, KP765366864 * T10);
+	  }
+	  {
+	       E TJ, TN, TM, TO, TI, TL;
+	       TI = KP707106781 * (Tw + Tx);
+	       TJ = TH - TI;
+	       TN = TH + TI;
+	       TL = KP707106781 * (To - Tt);
+	       TM = TK - TL;
+	       TO = TL + TK;
+	       O[WS(os, 3)] = FMA(KP1_662939224, TJ, KP1_111140466 * TM);
+	       O[WS(os, 15)] = FNMS(KP1_961570560, TN, KP390180644 * TO);
+	       O[WS(os, 11)] = FNMS(KP1_111140466, TJ, KP1_662939224 * TM);
+	       O[WS(os, 7)] = FMA(KP390180644, TN, KP1_961570560 * TO);
+	  }
+	  {
+	       E Tv, TF, TE, TG, Tu, Ty;
+	       Tu = KP707106781 * (To + Tt);
+	       Tv = Tj + Tu;
+	       TF = Tj - Tu;
+	       Ty = KP707106781 * (Tw - Tx);
+	       TE = Ty + TD;
+	       TG = Ty - TD;
+	       O[WS(os, 1)] = FNMS(KP390180644, TE, KP1_961570560 * Tv);
+	       O[WS(os, 13)] = FNMS(KP1_662939224, TF, KP1_111140466 * TG);
+	       O[WS(os, 9)] = -(FMA(KP390180644, Tv, KP1_961570560 * TE));
+	       O[WS(os, 5)] = FMA(KP1_111140466, TF, KP1_662939224 * TG);
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 16, "hc2rIII_16", {54, 20, 12, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_16) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_2.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_2.c
new file mode 100644
index 0000000..c01ed70
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_2.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:54 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 2 -name hc2rIII_2 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 0 FP additions, 2 FP multiplications,
+ * (or, 0 additions, 2 multiplications, 0 fused multiply/add),
+ * 4 stack variables, and 4 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_2.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_2.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_2.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_2(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T1, T2;
+	  T1 = ri[0];
+	  O[0] = KP2_000000000 * T1;
+	  T2 = ii[0];
+	  O[WS(os, 1)] = -(KP2_000000000 * T2);
+     }
+}
+
+static const khc2r_desc desc = { 2, "hc2rIII_2", {0, 2, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_2) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_2, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_3.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_3.c
new file mode 100644
index 0000000..05b761d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_3.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:54 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 3 -name hc2rIII_3 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 4 FP additions, 2 FP multiplications,
+ * (or, 3 additions, 1 multiplications, 1 fused multiply/add),
+ * 8 stack variables, and 6 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_3(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T5, T1, T2, T3, T4;
+	  T4 = ii[0];
+	  T5 = KP1_732050807 * T4;
+	  T1 = ri[WS(ris, 1)];
+	  T2 = ri[0];
+	  T3 = T2 - T1;
+	  O[0] = FMA(KP2_000000000, T2, T1);
+	  O[WS(os, 2)] = -(T3 + T5);
+	  O[WS(os, 1)] = T3 - T5;
+     }
+}
+
+static const khc2r_desc desc = { 3, "hc2rIII_3", {3, 1, 1, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_3) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_3, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_32.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_32.c
new file mode 100644
index 0000000..ab9409b
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_32.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:16 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 32 -name hc2rIII_32 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 138 additions, 48 multiplications, 36 fused multiply/add),
+ * 66 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_32(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
+     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
+     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
+     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
+     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
+     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
+     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
+     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK;
+	  E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M;
+	  E T2d, T2m;
+	  {
+	       E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g;
+	       {
+		    E T1, T2, T1h, T1i;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 15)];
+		    T3 = T1 + T2;
+		    Tv = T1 - T2;
+		    T1h = ii[0];
+		    T1i = ii[WS(iis, 15)];
+		    T1j = T1h + T1i;
+		    T2h = T1i - T1h;
+	       }
+	       {
+		    E T4, T5, Tw, Tx;
+		    T4 = ri[WS(ris, 8)];
+		    T5 = ri[WS(ris, 7)];
+		    T6 = T4 + T5;
+		    T1g = T4 - T5;
+		    Tw = ii[WS(iis, 8)];
+		    Tx = ii[WS(iis, 7)];
+		    Ty = Tw + Tx;
+		    T2g = Tw - Tx;
+	       }
+	       T7 = T3 + T6;
+	       T2i = T2g + T2h;
+	       T2F = T2h - T2g;
+	       Tz = Tv - Ty;
+	       T1k = T1g + T1j;
+	       T1I = T1g - T1j;
+	       T1Z = T3 - T6;
+	       T1x = Tv + Ty;
+	  }
+	  {
+	       E Ta, TA, TD, T21, Td, TF, TI, T20;
+	       {
+		    E T8, T9, TB, TC;
+		    T8 = ri[WS(ris, 4)];
+		    T9 = ri[WS(ris, 11)];
+		    Ta = T8 + T9;
+		    TA = T8 - T9;
+		    TB = ii[WS(iis, 4)];
+		    TC = ii[WS(iis, 11)];
+		    TD = TB + TC;
+		    T21 = TB - TC;
+	       }
+	       {
+		    E Tb, Tc, TG, TH;
+		    Tb = ri[WS(ris, 3)];
+		    Tc = ri[WS(ris, 12)];
+		    Td = Tb + Tc;
+		    TF = Tb - Tc;
+		    TG = ii[WS(iis, 3)];
+		    TH = ii[WS(iis, 12)];
+		    TI = TG + TH;
+		    T20 = TH - TG;
+	       }
+	       Te = Ta + Td;
+	       T22 = T20 - T21;
+	       T2E = T21 + T20;
+	       T2j = Ta - Td;
+	       {
+		    E T1d, T1e, TE, TJ;
+		    T1d = TA + TD;
+		    T1e = TF + TI;
+		    T1f = KP707106781 * (T1d - T1e);
+		    T1y = KP707106781 * (T1d + T1e);
+		    TE = TA - TD;
+		    TJ = TF - TI;
+		    TK = KP707106781 * (TE + TJ);
+		    T1J = KP707106781 * (TE - TJ);
+	       }
+	  }
+	  {
+	       E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV;
+	       {
+		    E Tg, Th, TS, TT;
+		    Tg = ri[WS(ris, 2)];
+		    Th = ri[WS(ris, 13)];
+		    Ti = Tg + Th;
+		    TM = Tg - Th;
+		    TS = ii[WS(iis, 2)];
+		    TT = ii[WS(iis, 13)];
+		    TU = TS + TT;
+		    T25 = TS - TT;
+	       }
+	       {
+		    E Tj, Tk, TN, TO;
+		    Tj = ri[WS(ris, 10)];
+		    Tk = ri[WS(ris, 5)];
+		    Tl = Tj + Tk;
+		    TR = Tj - Tk;
+		    TN = ii[WS(iis, 10)];
+		    TO = ii[WS(iis, 5)];
+		    TP = TN + TO;
+		    T26 = TN - TO;
+	       }
+	       Tm = Ti + Tl;
+	       T2B = T26 + T25;
+	       TQ = TM - TP;
+	       TV = TR + TU;
+	       TW = FNMS(KP382683432, TV, KP923879532 * TQ);
+	       T1a = FMA(KP382683432, TQ, KP923879532 * TV);
+	       {
+		    E T1A, T1B, T24, T27;
+		    T1A = TM + TP;
+		    T1B = TU - TR;
+		    T1C = FNMS(KP923879532, T1B, KP382683432 * T1A);
+		    T1L = FMA(KP923879532, T1A, KP382683432 * T1B);
+		    T24 = Ti - Tl;
+		    T27 = T25 - T26;
+		    T28 = T24 - T27;
+		    T2l = T24 + T27;
+	       }
+	  }
+	  {
+	       E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16;
+	       {
+		    E Tn, To, T13, T14;
+		    Tn = ri[WS(ris, 1)];
+		    To = ri[WS(ris, 14)];
+		    Tp = Tn + To;
+		    TX = Tn - To;
+		    T13 = ii[WS(iis, 1)];
+		    T14 = ii[WS(iis, 14)];
+		    T15 = T13 + T14;
+		    T2a = T14 - T13;
+	       }
+	       {
+		    E Tq, Tr, TY, TZ;
+		    Tq = ri[WS(ris, 6)];
+		    Tr = ri[WS(ris, 9)];
+		    Ts = Tq + Tr;
+		    T12 = Tq - Tr;
+		    TY = ii[WS(iis, 6)];
+		    TZ = ii[WS(iis, 9)];
+		    T10 = TY + TZ;
+		    T2b = TY - TZ;
+	       }
+	       Tt = Tp + Ts;
+	       T2A = T2b + T2a;
+	       T11 = TX - T10;
+	       T16 = T12 - T15;
+	       T17 = FMA(KP923879532, T11, KP382683432 * T16);
+	       T1b = FNMS(KP382683432, T11, KP923879532 * T16);
+	       {
+		    E T1D, T1E, T29, T2c;
+		    T1D = TX + T10;
+		    T1E = T12 + T15;
+		    T1F = FNMS(KP923879532, T1E, KP382683432 * T1D);
+		    T1M = FMA(KP923879532, T1D, KP382683432 * T1E);
+		    T29 = Tp - Ts;
+		    T2c = T2a - T2b;
+		    T2d = T29 + T2c;
+		    T2m = T2c - T29;
+	       }
+	  }
+	  {
+	       E Tf, Tu, T2L, T2M, T2N, T2O;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       T2L = Tf - Tu;
+	       T2M = T2B + T2A;
+	       T2N = T2F - T2E;
+	       T2O = T2M + T2N;
+	       O[0] = KP2_000000000 * (Tf + Tu);
+	       O[WS(os, 16)] = KP2_000000000 * (T2N - T2M);
+	       O[WS(os, 8)] = KP1_414213562 * (T2L + T2O);
+	       O[WS(os, 24)] = KP1_414213562 * (T2O - T2L);
+	  }
+	  {
+	       E T2t, T2x, T2w, T2y;
+	       {
+		    E T2r, T2s, T2u, T2v;
+		    T2r = T1Z - T22;
+		    T2s = KP707106781 * (T2m - T2l);
+		    T2t = T2r + T2s;
+		    T2x = T2r - T2s;
+		    T2u = T2j + T2i;
+		    T2v = KP707106781 * (T28 - T2d);
+		    T2w = T2u - T2v;
+		    T2y = T2v + T2u;
+	       }
+	       O[WS(os, 6)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w);
+	       O[WS(os, 30)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y);
+	       O[WS(os, 22)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w);
+	       O[WS(os, 14)] = FMA(KP390180644, T2x, KP1_961570560 * T2y);
+	  }
+	  {
+	       E T2D, T2J, T2I, T2K;
+	       {
+		    E T2z, T2C, T2G, T2H;
+		    T2z = T7 - Te;
+		    T2C = T2A - T2B;
+		    T2D = T2z + T2C;
+		    T2J = T2z - T2C;
+		    T2G = T2E + T2F;
+		    T2H = Tm - Tt;
+		    T2I = T2G - T2H;
+		    T2K = T2H + T2G;
+	       }
+	       O[WS(os, 4)] = FMA(KP1_847759065, T2D, KP765366864 * T2I);
+	       O[WS(os, 28)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K);
+	       O[WS(os, 20)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I);
+	       O[WS(os, 12)] = FMA(KP765366864, T2J, KP1_847759065 * T2K);
+	  }
+	  {
+	       E T19, T1n, T1m, T1o;
+	       {
+		    E TL, T18, T1c, T1l;
+		    TL = Tz + TK;
+		    T18 = TW + T17;
+		    T19 = TL + T18;
+		    T1n = TL - T18;
+		    T1c = T1a + T1b;
+		    T1l = T1f + T1k;
+		    T1m = T1c + T1l;
+		    T1o = T1c - T1l;
+	       }
+	       O[WS(os, 1)] = FNMS(KP196034280, T1m, KP1_990369453 * T19);
+	       O[WS(os, 25)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o);
+	       O[WS(os, 17)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m));
+	       O[WS(os, 9)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o);
+	  }
+	  {
+	       E T1r, T1v, T1u, T1w;
+	       {
+		    E T1p, T1q, T1s, T1t;
+		    T1p = Tz - TK;
+		    T1q = T1b - T1a;
+		    T1r = T1p + T1q;
+		    T1v = T1p - T1q;
+		    T1s = T1f - T1k;
+		    T1t = TW - T17;
+		    T1u = T1s - T1t;
+		    T1w = T1t + T1s;
+	       }
+	       O[WS(os, 5)] = FMA(KP1_763842528, T1r, KP942793473 * T1u);
+	       O[WS(os, 29)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w);
+	       O[WS(os, 21)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u);
+	       O[WS(os, 13)] = FMA(KP580569354, T1v, KP1_913880671 * T1w);
+	  }
+	  {
+	       E T1T, T1X, T1W, T1Y;
+	       {
+		    E T1R, T1S, T1U, T1V;
+		    T1R = T1x + T1y;
+		    T1S = T1L + T1M;
+		    T1T = T1R - T1S;
+		    T1X = T1R + T1S;
+		    T1U = T1J + T1I;
+		    T1V = T1C - T1F;
+		    T1W = T1U - T1V;
+		    T1Y = T1V + T1U;
+	       }
+	       O[WS(os, 7)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W);
+	       O[WS(os, 31)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y);
+	       O[WS(os, 23)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W);
+	       O[WS(os, 15)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y);
+	  }
+	  {
+	       E T2f, T2p, T2o, T2q;
+	       {
+		    E T23, T2e, T2k, T2n;
+		    T23 = T1Z + T22;
+		    T2e = KP707106781 * (T28 + T2d);
+		    T2f = T23 + T2e;
+		    T2p = T23 - T2e;
+		    T2k = T2i - T2j;
+		    T2n = KP707106781 * (T2l + T2m);
+		    T2o = T2k - T2n;
+		    T2q = T2n + T2k;
+	       }
+	       O[WS(os, 2)] = FMA(KP1_961570560, T2f, KP390180644 * T2o);
+	       O[WS(os, 26)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q);
+	       O[WS(os, 18)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o);
+	       O[WS(os, 10)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q);
+	  }
+	  {
+	       E T1H, T1P, T1O, T1Q;
+	       {
+		    E T1z, T1G, T1K, T1N;
+		    T1z = T1x - T1y;
+		    T1G = T1C + T1F;
+		    T1H = T1z + T1G;
+		    T1P = T1z - T1G;
+		    T1K = T1I - T1J;
+		    T1N = T1L - T1M;
+		    T1O = T1K - T1N;
+		    T1Q = T1N + T1K;
+	       }
+	       O[WS(os, 3)] = FMA(KP1_913880671, T1H, KP580569354 * T1O);
+	       O[WS(os, 27)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q);
+	       O[WS(os, 19)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O);
+	       O[WS(os, 11)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q);
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 32, "hc2rIII_32", {138, 48, 36, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_32) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_4.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_4.c
new file mode 100644
index 0000000..d562add
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_4.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:54 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 4 -name hc2rIII_4 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 6 FP additions, 4 FP multiplications,
+ * (or, 6 additions, 4 multiplications, 0 fused multiply/add),
+ * 9 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_4(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T1, T2, T3, T4, T5, T6;
+	  T1 = ri[0];
+	  T2 = ri[WS(ris, 1)];
+	  T3 = T1 - T2;
+	  T4 = ii[0];
+	  T5 = ii[WS(iis, 1)];
+	  T6 = T4 + T5;
+	  O[0] = KP2_000000000 * (T1 + T2);
+	  O[WS(os, 2)] = KP2_000000000 * (T5 - T4);
+	  O[WS(os, 1)] = KP1_414213562 * (T3 - T6);
+	  O[WS(os, 3)] = -(KP1_414213562 * (T3 + T6));
+     }
+}
+
+static const khc2r_desc desc = { 4, "hc2rIII_4", {6, 4, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_4) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_5.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_5.c
new file mode 100644
index 0000000..4fb772d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_5.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:55 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 5 -name hc2rIII_5 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 12 FP additions, 7 FP multiplications,
+ * (or, 8 additions, 3 multiplications, 4 fused multiply/add),
+ * 18 stack variables, and 10 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_5(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
+     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E Ta, Tc, T1, T4, T5, T6, Tb, T7;
+	  {
+	       E T8, T9, T2, T3;
+	       T8 = ii[WS(iis, 1)];
+	       T9 = ii[0];
+	       Ta = FMA(KP1_902113032, T8, KP1_175570504 * T9);
+	       Tc = FNMS(KP1_902113032, T9, KP1_175570504 * T8);
+	       T1 = ri[WS(ris, 2)];
+	       T2 = ri[WS(ris, 1)];
+	       T3 = ri[0];
+	       T4 = T2 + T3;
+	       T5 = FMS(KP500000000, T4, T1);
+	       T6 = KP1_118033988 * (T3 - T2);
+	  }
+	  O[0] = FMA(KP2_000000000, T4, T1);
+	  Tb = T6 - T5;
+	  O[WS(os, 2)] = Tb + Tc;
+	  O[WS(os, 3)] = Tc - Tb;
+	  T7 = T5 + T6;
+	  O[WS(os, 1)] = T7 - Ta;
+	  O[WS(os, 4)] = -(T7 + Ta);
+     }
+}
+
+static const khc2r_desc desc = { 5, "hc2rIII_5", {8, 3, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_5) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_5, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_6.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_6.c
new file mode 100644
index 0000000..506bffd
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_6.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:58 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 6 -name hc2rIII_6 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 12 FP additions, 6 FP multiplications,
+ * (or, 10 additions, 4 multiplications, 2 fused multiply/add),
+ * 15 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_6(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T1, T6, T4, T5, T9, Tb, Ta, Tc;
+	  T1 = ri[WS(ris, 1)];
+	  T6 = ii[WS(iis, 1)];
+	  {
+	       E T2, T3, T7, T8;
+	       T2 = ri[WS(ris, 2)];
+	       T3 = ri[0];
+	       T4 = T2 + T3;
+	       T5 = KP1_732050807 * (T2 - T3);
+	       T7 = ii[WS(iis, 2)];
+	       T8 = ii[0];
+	       T9 = T7 + T8;
+	       Tb = KP1_732050807 * (T7 - T8);
+	  }
+	  O[0] = KP2_000000000 * (T1 + T4);
+	  O[WS(os, 3)] = KP2_000000000 * (T6 - T9);
+	  Ta = FMA(KP2_000000000, T6, T9);
+	  O[WS(os, 1)] = -(T5 + Ta);
+	  O[WS(os, 5)] = T5 - Ta;
+	  Tc = FMS(KP2_000000000, T1, T4);
+	  O[WS(os, 2)] = Tb - Tc;
+	  O[WS(os, 4)] = Tc + Tb;
+     }
+}
+
+static const khc2r_desc desc = { 6, "hc2rIII_6", {10, 4, 2, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_6) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_6, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_7.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_7.c
new file mode 100644
index 0000000..f65cbfd
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_7.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:00 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 7 -name hc2rIII_7 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 24 FP additions, 19 FP multiplications,
+ * (or, 9 additions, 4 multiplications, 15 fused multiply/add),
+ * 21 stack variables, and 14 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_7(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_246979603, +1.246979603717467061050009768008479621264549462);
+     DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
+     DK(KP445041867, +0.445041867912628808577805128993589518932711138);
+     DK(KP867767478, +0.867767478235116240951536665696717509219981456);
+     DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
+     DK(KP1_563662964, +1.563662964936059617416889053348115500464669037);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T9, Td, Tb, T1, T4, T2, T3, T5, Tc, Ta, T6, T8, T7;
+	  T6 = ii[WS(iis, 2)];
+	  T8 = ii[0];
+	  T7 = ii[WS(iis, 1)];
+	  T9 = FMA(KP1_563662964, T6, KP1_949855824 * T7) + (KP867767478 * T8);
+	  Td = FNMS(KP1_949855824, T8, KP1_563662964 * T7) - (KP867767478 * T6);
+	  Tb = FNMS(KP1_563662964, T8, KP1_949855824 * T6) - (KP867767478 * T7);
+	  T1 = ri[WS(ris, 3)];
+	  T4 = ri[0];
+	  T2 = ri[WS(ris, 2)];
+	  T3 = ri[WS(ris, 1)];
+	  T5 = FMA(KP445041867, T3, KP1_801937735 * T4) + FNMA(KP1_246979603, T2, T1);
+	  Tc = FMA(KP1_801937735, T2, KP445041867 * T4) + FNMA(KP1_246979603, T3, T1);
+	  Ta = FMA(KP1_246979603, T4, T1) + FNMA(KP1_801937735, T3, KP445041867 * T2);
+	  O[WS(os, 1)] = T5 - T9;
+	  O[WS(os, 6)] = -(T5 + T9);
+	  O[WS(os, 4)] = Td - Tc;
+	  O[WS(os, 3)] = Tc + Td;
+	  O[WS(os, 5)] = Tb - Ta;
+	  O[WS(os, 2)] = Ta + Tb;
+	  O[0] = FMA(KP2_000000000, T2 + T3 + T4, T1);
+     }
+}
+
+static const khc2r_desc desc = { 7, "hc2rIII_7", {9, 4, 15, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_7) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_7, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_8.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_8.c
new file mode 100644
index 0000000..3a7ffb5
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_8.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:01 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 8 -name hc2rIII_8 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 22 FP additions, 12 FP multiplications,
+ * (or, 18 additions, 8 multiplications, 4 fused multiply/add),
+ * 19 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_8(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, T7, Tf, Tl, T6, Tc, Ta, Tk, Tb, Tg;
+	  {
+	       E T1, T2, Td, Te;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 3)];
+	       T3 = T1 + T2;
+	       T7 = T1 - T2;
+	       Td = ii[0];
+	       Te = ii[WS(iis, 3)];
+	       Tf = Td + Te;
+	       Tl = Te - Td;
+	  }
+	  {
+	       E T4, T5, T8, T9;
+	       T4 = ri[WS(ris, 2)];
+	       T5 = ri[WS(ris, 1)];
+	       T6 = T4 + T5;
+	       Tc = T4 - T5;
+	       T8 = ii[WS(iis, 2)];
+	       T9 = ii[WS(iis, 1)];
+	       Ta = T8 + T9;
+	       Tk = T8 - T9;
+	  }
+	  O[0] = KP2_000000000 * (T3 + T6);
+	  O[WS(os, 4)] = KP2_000000000 * (Tl - Tk);
+	  Tb = T7 - Ta;
+	  Tg = Tc + Tf;
+	  O[WS(os, 1)] = FNMS(KP765366864, Tg, KP1_847759065 * Tb);
+	  O[WS(os, 5)] = -(FMA(KP765366864, Tb, KP1_847759065 * Tg));
+	  {
+	       E Th, Ti, Tj, Tm;
+	       Th = T7 + Ta;
+	       Ti = Tc - Tf;
+	       O[WS(os, 3)] = FMA(KP765366864, Th, KP1_847759065 * Ti);
+	       O[WS(os, 7)] = FNMS(KP1_847759065, Th, KP765366864 * Ti);
+	       Tj = T3 - T6;
+	       Tm = Tk + Tl;
+	       O[WS(os, 2)] = KP1_414213562 * (Tj + Tm);
+	       O[WS(os, 6)] = KP1_414213562 * (Tm - Tj);
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 8, "hc2rIII_8", {18, 8, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_8) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2rIII_9.c b/src/fftw3/rdft/codelets/hc2r/hc2rIII_9.c
new file mode 100644
index 0000000..0bcc0ea
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2rIII_9.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:05 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 9 -name hc2rIII_9 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 32 FP additions, 18 FP multiplications,
+ * (or, 22 additions, 8 multiplications, 10 fused multiply/add),
+ * 35 stack variables, and 18 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2rIII_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2rIII_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void hc2rIII_9(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP1_326827896, +1.326827896337876792410842639271782594433726619);
+     DK(KP1_113340798, +1.113340798452838732905825904094046265936583811);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP1_705737063, +1.705737063904886419256501927880148143872040591);
+     DK(KP300767466, +0.300767466360870593278543795225003852144476517);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, Ts, Ti, Td, Tc, T8, To, Tu, Tl, Tt, T9, Te;
+	  {
+	       E Th, T1, T2, Tf, Tg;
+	       Tg = ii[WS(iis, 1)];
+	       Th = KP1_732050807 * Tg;
+	       T1 = ri[WS(ris, 4)];
+	       T2 = ri[WS(ris, 1)];
+	       Tf = T2 - T1;
+	       T3 = FMA(KP2_000000000, T2, T1);
+	       Ts = Tf - Th;
+	       Ti = Tf + Th;
+	  }
+	  {
+	       E T4, T7, Tm, Tk, Tn, Tj;
+	       T4 = ri[WS(ris, 3)];
+	       Td = ii[WS(iis, 3)];
+	       {
+		    E T5, T6, Ta, Tb;
+		    T5 = ri[0];
+		    T6 = ri[WS(ris, 2)];
+		    T7 = T5 + T6;
+		    Tm = KP866025403 * (T6 - T5);
+		    Ta = ii[WS(iis, 2)];
+		    Tb = ii[0];
+		    Tc = Ta - Tb;
+		    Tk = KP866025403 * (Tb + Ta);
+	       }
+	       T8 = T4 + T7;
+	       Tn = FMA(KP500000000, Tc, Td);
+	       To = Tm - Tn;
+	       Tu = Tm + Tn;
+	       Tj = FMS(KP500000000, T7, T4);
+	       Tl = Tj + Tk;
+	       Tt = Tj - Tk;
+	  }
+	  O[0] = FMA(KP2_000000000, T8, T3);
+	  T9 = T8 - T3;
+	  Te = KP1_732050807 * (Tc - Td);
+	  O[WS(os, 3)] = T9 + Te;
+	  O[WS(os, 6)] = Te - T9;
+	  {
+	       E Tr, Tp, Tq, Tx, Tv, Tw;
+	       Tr = FNMS(KP1_705737063, Tl, KP300767466 * To);
+	       Tp = FMA(KP173648177, Tl, KP984807753 * To);
+	       Tq = Ti - Tp;
+	       O[WS(os, 2)] = -(FMA(KP2_000000000, Tp, Ti));
+	       O[WS(os, 8)] = Tr - Tq;
+	       O[WS(os, 5)] = Tq + Tr;
+	       Tx = FMA(KP1_113340798, Tt, KP1_326827896 * Tu);
+	       Tv = FNMS(KP642787609, Tu, KP766044443 * Tt);
+	       Tw = Tv - Ts;
+	       O[WS(os, 1)] = FMA(KP2_000000000, Tv, Ts);
+	       O[WS(os, 7)] = Tx - Tw;
+	       O[WS(os, 4)] = Tw + Tx;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 9, "hc2rIII_9", {22, 8, 10, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2rIII_9) (planner *p) {
+     X(khc2rIII_register) (p, hc2rIII_9, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_10.c b/src/fftw3/rdft/codelets/hc2r/hc2r_10.c
new file mode 100644
index 0000000..9e6c5bc
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_10.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 10 -name hc2r_10 -include hc2r.h */
+
+/*
+ * This function contains 34 FP additions, 14 FP multiplications,
+ * (or, 26 additions, 6 multiplications, 8 fused multiply/add),
+ * 26 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_10.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_10(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
+     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, Tb, Tn, Tv, Tk, Tu, Ta, Ts, Te, Tg, Ti, Tj;
+	  {
+	       E T1, T2, Tl, Tm;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 5)];
+	       T3 = T1 - T2;
+	       Tb = T1 + T2;
+	       Tl = ii[WS(iis, 4)];
+	       Tm = ii[WS(iis, 1)];
+	       Tn = Tl - Tm;
+	       Tv = Tl + Tm;
+	  }
+	  Ti = ii[WS(iis, 2)];
+	  Tj = ii[WS(iis, 3)];
+	  Tk = Ti - Tj;
+	  Tu = Ti + Tj;
+	  {
+	       E T6, Tc, T9, Td;
+	       {
+		    E T4, T5, T7, T8;
+		    T4 = ri[WS(ris, 2)];
+		    T5 = ri[WS(ris, 3)];
+		    T6 = T4 - T5;
+		    Tc = T4 + T5;
+		    T7 = ri[WS(ris, 4)];
+		    T8 = ri[WS(ris, 1)];
+		    T9 = T7 - T8;
+		    Td = T7 + T8;
+	       }
+	       Ta = T6 + T9;
+	       Ts = KP1_118033988 * (T6 - T9);
+	       Te = Tc + Td;
+	       Tg = KP1_118033988 * (Tc - Td);
+	  }
+	  O[WS(os, 5)] = FMA(KP2_000000000, Ta, T3);
+	  O[0] = FMA(KP2_000000000, Te, Tb);
+	  {
+	       E To, Tq, Th, Tp, Tf;
+	       To = FNMS(KP1_902113032, Tn, KP1_175570504 * Tk);
+	       Tq = FMA(KP1_902113032, Tk, KP1_175570504 * Tn);
+	       Tf = FNMS(KP500000000, Te, Tb);
+	       Th = Tf - Tg;
+	       Tp = Tg + Tf;
+	       O[WS(os, 2)] = Th - To;
+	       O[WS(os, 4)] = Tp + Tq;
+	       O[WS(os, 8)] = Th + To;
+	       O[WS(os, 6)] = Tp - Tq;
+	  }
+	  {
+	       E Tw, Ty, Tt, Tx, Tr;
+	       Tw = FNMS(KP1_902113032, Tv, KP1_175570504 * Tu);
+	       Ty = FMA(KP1_902113032, Tu, KP1_175570504 * Tv);
+	       Tr = FNMS(KP500000000, Ta, T3);
+	       Tt = Tr - Ts;
+	       Tx = Ts + Tr;
+	       O[WS(os, 7)] = Tt - Tw;
+	       O[WS(os, 9)] = Tx + Ty;
+	       O[WS(os, 3)] = Tt + Tw;
+	       O[WS(os, 1)] = Tx - Ty;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 10, "hc2r_10", {26, 6, 8, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_10) (planner *p) {
+     X(khc2r_register) (p, hc2r_10, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_11.c b/src/fftw3/rdft/codelets/hc2r/hc2r_11.c
new file mode 100644
index 0000000..2310502
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_11.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 11 -name hc2r_11 -include hc2r.h */
+
+/*
+ * This function contains 60 FP additions, 51 FP multiplications,
+ * (or, 19 additions, 10 multiplications, 41 fused multiply/add),
+ * 33 stack variables, and 22 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_11.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_11.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_11.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_11(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_918985947, +1.918985947228994779780736114132655398124909697);
+     DK(KP1_309721467, +1.309721467890570128113850144932587106367582399);
+     DK(KP284629676, +0.284629676546570280887585337232739337582102722);
+     DK(KP830830026, +0.830830026003772851058548298459246407048009821);
+     DK(KP1_682507065, +1.682507065662362337723623297838735435026584997);
+     DK(KP563465113, +0.563465113682859395422835830693233798071555798);
+     DK(KP1_511499148, +1.511499148708516567548071687944688840359434890);
+     DK(KP1_979642883, +1.979642883761865464752184075553437574753038744);
+     DK(KP1_819263990, +1.819263990709036742823430766158056920120482102);
+     DK(KP1_081281634, +1.081281634911195164215271908637383390863541216);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E Td, Tl, Tf, Th, Tj, T1, T2, T6, T5, T4, T3, T7, Tk, Te, Tg;
+	  E Ti;
+	  {
+	       E T8, Tc, T9, Ta, Tb;
+	       T8 = ii[WS(iis, 2)];
+	       Tc = ii[WS(iis, 1)];
+	       T9 = ii[WS(iis, 4)];
+	       Ta = ii[WS(iis, 5)];
+	       Tb = ii[WS(iis, 3)];
+	       Td = FMA(KP1_081281634, T8, KP1_819263990 * T9) + FNMA(KP1_979642883, Ta, KP1_511499148 * Tb) - (KP563465113 * Tc);
+	       Tl = FMA(KP1_979642883, T8, KP1_819263990 * Ta) + FNMA(KP563465113, T9, KP1_081281634 * Tb) - (KP1_511499148 * Tc);
+	       Tf = FMA(KP563465113, T8, KP1_819263990 * Tb) + FNMA(KP1_511499148, Ta, KP1_081281634 * T9) - (KP1_979642883 * Tc);
+	       Th = FMA(KP1_081281634, Tc, KP1_819263990 * T8) + FMA(KP1_979642883, Tb, KP1_511499148 * T9) + (KP563465113 * Ta);
+	       Tj = FMA(KP563465113, Tb, KP1_979642883 * T9) + FNMS(KP1_511499148, T8, KP1_081281634 * Ta) - (KP1_819263990 * Tc);
+	  }
+	  T1 = ri[0];
+	  T2 = ri[WS(ris, 1)];
+	  T6 = ri[WS(ris, 5)];
+	  T5 = ri[WS(ris, 4)];
+	  T4 = ri[WS(ris, 3)];
+	  T3 = ri[WS(ris, 2)];
+	  T7 = FMA(KP1_682507065, T3, T1) + FNMS(KP284629676, T6, KP830830026 * T5) + FNMA(KP1_309721467, T4, KP1_918985947 * T2);
+	  Tk = FMA(KP1_682507065, T4, T1) + FNMS(KP1_918985947, T5, KP830830026 * T6) + FNMA(KP284629676, T3, KP1_309721467 * T2);
+	  Te = FMA(KP830830026, T4, T1) + FNMS(KP1_309721467, T6, KP1_682507065 * T5) + FNMA(KP1_918985947, T3, KP284629676 * T2);
+	  Tg = FMA(KP1_682507065, T2, T1) + FNMS(KP1_918985947, T6, KP830830026 * T3) + FNMA(KP1_309721467, T5, KP284629676 * T4);
+	  Ti = FMA(KP830830026, T2, T1) + FNMS(KP284629676, T5, KP1_682507065 * T6) + FNMA(KP1_918985947, T4, KP1_309721467 * T3);
+	  O[WS(os, 6)] = T7 - Td;
+	  O[WS(os, 8)] = Te - Tf;
+	  O[WS(os, 4)] = Tk + Tl;
+	  O[WS(os, 5)] = T7 + Td;
+	  O[WS(os, 7)] = Tk - Tl;
+	  O[WS(os, 2)] = Ti + Tj;
+	  O[WS(os, 3)] = Te + Tf;
+	  O[WS(os, 10)] = Tg + Th;
+	  O[WS(os, 1)] = Tg - Th;
+	  O[WS(os, 9)] = Ti - Tj;
+	  O[0] = FMA(KP2_000000000, T2 + T3 + T4 + T5 + T6, T1);
+     }
+}
+
+static const khc2r_desc desc = { 11, "hc2r_11", {19, 10, 41, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_11) (planner *p) {
+     X(khc2r_register) (p, hc2r_11, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_12.c b/src/fftw3/rdft/codelets/hc2r/hc2r_12.c
new file mode 100644
index 0000000..4fc3710
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_12.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 12 -name hc2r_12 -include hc2r.h */
+
+/*
+ * This function contains 38 FP additions, 10 FP multiplications,
+ * (or, 34 additions, 6 multiplications, 4 fused multiply/add),
+ * 25 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_12.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_12(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T8, Tb, Tm, TA, Tw, Tx, Tp, TB, T3, Tr, Tg, T6, Ts, Tk;
+	  {
+	       E T9, Ta, Tn, To;
+	       T8 = ri[WS(ris, 3)];
+	       T9 = ri[WS(ris, 5)];
+	       Ta = ri[WS(ris, 1)];
+	       Tb = T9 + Ta;
+	       Tm = FMS(KP2_000000000, T8, Tb);
+	       TA = KP1_732050807 * (T9 - Ta);
+	       Tw = ii[WS(iis, 3)];
+	       Tn = ii[WS(iis, 5)];
+	       To = ii[WS(iis, 1)];
+	       Tx = Tn + To;
+	       Tp = KP1_732050807 * (Tn - To);
+	       TB = FMA(KP2_000000000, Tw, Tx);
+	  }
+	  {
+	       E Tf, T1, T2, Td, Te;
+	       Te = ii[WS(iis, 4)];
+	       Tf = KP1_732050807 * Te;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 4)];
+	       Td = T1 - T2;
+	       T3 = FMA(KP2_000000000, T2, T1);
+	       Tr = Td - Tf;
+	       Tg = Td + Tf;
+	  }
+	  {
+	       E Tj, T4, T5, Th, Ti;
+	       Ti = ii[WS(iis, 2)];
+	       Tj = KP1_732050807 * Ti;
+	       T4 = ri[WS(ris, 6)];
+	       T5 = ri[WS(ris, 2)];
+	       Th = T4 - T5;
+	       T6 = FMA(KP2_000000000, T5, T4);
+	       Ts = Th + Tj;
+	       Tk = Th - Tj;
+	  }
+	  {
+	       E T7, Tc, Tz, TC;
+	       T7 = T3 + T6;
+	       Tc = KP2_000000000 * (T8 + Tb);
+	       O[WS(os, 6)] = T7 - Tc;
+	       O[0] = T7 + Tc;
+	       {
+		    E Tl, Tq, TD, TE;
+		    Tl = Tg + Tk;
+		    Tq = Tm - Tp;
+		    O[WS(os, 2)] = Tl - Tq;
+		    O[WS(os, 8)] = Tl + Tq;
+		    TD = Tg - Tk;
+		    TE = TB - TA;
+		    O[WS(os, 5)] = TD - TE;
+		    O[WS(os, 11)] = TD + TE;
+	       }
+	       Tz = Tr - Ts;
+	       TC = TA + TB;
+	       O[WS(os, 1)] = Tz - TC;
+	       O[WS(os, 7)] = Tz + TC;
+	       {
+		    E Tv, Ty, Tt, Tu;
+		    Tv = T3 - T6;
+		    Ty = KP2_000000000 * (Tw - Tx);
+		    O[WS(os, 9)] = Tv - Ty;
+		    O[WS(os, 3)] = Tv + Ty;
+		    Tt = Tr + Ts;
+		    Tu = Tm + Tp;
+		    O[WS(os, 10)] = Tt - Tu;
+		    O[WS(os, 4)] = Tt + Tu;
+	       }
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 12, "hc2r_12", {34, 6, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_12) (planner *p) {
+     X(khc2r_register) (p, hc2r_12, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_13.c b/src/fftw3/rdft/codelets/hc2r/hc2r_13.c
new file mode 100644
index 0000000..3e1ccbd
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_13.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:12 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 13 -name hc2r_13 -include hc2r.h */
+
+/*
+ * This function contains 76 FP additions, 35 FP multiplications,
+ * (or, 56 additions, 15 multiplications, 20 fused multiply/add),
+ * 56 stack variables, and 26 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_13.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_13.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_13.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_13(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_007074065, +1.007074065727533254493747707736933954186697125);
+     DK(KP227708958, +0.227708958111581597949308691735310621069285120);
+     DK(KP531932498, +0.531932498429674575175042127684371897596660533);
+     DK(KP774781170, +0.774781170935234584261351932853525703557550433);
+     DK(KP265966249, +0.265966249214837287587521063842185948798330267);
+     DK(KP516520780, +0.516520780623489722840901288569017135705033622);
+     DK(KP151805972, +0.151805972074387731966205794490207080712856746);
+     DK(KP503537032, +0.503537032863766627246873853868466977093348562);
+     DK(KP166666666, +0.166666666666666666666666666666666666666666667);
+     DK(KP600925212, +0.600925212577331548853203544578415991041882762);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP256247671, +0.256247671582936600958684654061725059144125175);
+     DK(KP156891391, +0.156891391051584611046832726756003269660212636);
+     DK(KP348277202, +0.348277202304271810011321589858529485233929352);
+     DK(KP1_150281458, +1.150281458948006242736771094910906776922003215);
+     DK(KP300238635, +0.300238635966332641462884626667381504676006424);
+     DK(KP011599105, +0.011599105605768290721655456654083252189827041);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E TG, TS, TR, T15, TJ, TT, T1, Tm, Tc, Td, Tg, Tj, Tk, Tn, To;
+	  E Tp;
+	  {
+	       E Ts, Tv, Tw, TE, TC, TB, Tz, TD, TA, TF;
+	       {
+		    E Tt, Tu, Tx, Ty;
+		    Ts = ii[WS(iis, 1)];
+		    Tt = ii[WS(iis, 3)];
+		    Tu = ii[WS(iis, 4)];
+		    Tv = Tt - Tu;
+		    Tw = FMS(KP2_000000000, Ts, Tv);
+		    TE = KP1_732050807 * (Tt + Tu);
+		    TC = ii[WS(iis, 5)];
+		    Tx = ii[WS(iis, 6)];
+		    Ty = ii[WS(iis, 2)];
+		    TB = Tx + Ty;
+		    Tz = KP1_732050807 * (Tx - Ty);
+		    TD = FNMS(KP2_000000000, TC, TB);
+	       }
+	       TA = Tw + Tz;
+	       TF = TD - TE;
+	       TG = FMA(KP011599105, TA, KP300238635 * TF);
+	       TS = FNMS(KP011599105, TF, KP300238635 * TA);
+	       {
+		    E TP, TQ, TH, TI;
+		    TP = Ts + Tv;
+		    TQ = TB + TC;
+		    TR = FNMS(KP348277202, TQ, KP1_150281458 * TP);
+		    T15 = FMA(KP348277202, TP, KP1_150281458 * TQ);
+		    TH = Tw - Tz;
+		    TI = TE + TD;
+		    TJ = FMA(KP156891391, TH, KP256247671 * TI);
+		    TT = FNMS(KP256247671, TH, KP156891391 * TI);
+	       }
+	  }
+	  {
+	       E Tb, Ti, Tf, T6, Th, Te;
+	       T1 = ri[0];
+	       {
+		    E T7, T8, T9, Ta;
+		    T7 = ri[WS(ris, 5)];
+		    T8 = ri[WS(ris, 2)];
+		    T9 = ri[WS(ris, 6)];
+		    Ta = T8 + T9;
+		    Tb = T7 + Ta;
+		    Ti = FNMS(KP500000000, Ta, T7);
+		    Tf = T8 - T9;
+	       }
+	       {
+		    E T2, T3, T4, T5;
+		    T2 = ri[WS(ris, 1)];
+		    T3 = ri[WS(ris, 3)];
+		    T4 = ri[WS(ris, 4)];
+		    T5 = T3 + T4;
+		    T6 = T2 + T5;
+		    Th = FNMS(KP500000000, T5, T2);
+		    Te = T3 - T4;
+	       }
+	       Tm = KP600925212 * (T6 - Tb);
+	       Tc = T6 + Tb;
+	       Td = FNMS(KP166666666, Tc, T1);
+	       Tg = Te + Tf;
+	       Tj = Th + Ti;
+	       Tk = FMA(KP503537032, Tg, KP151805972 * Tj);
+	       Tn = Th - Ti;
+	       To = Te - Tf;
+	       Tp = FNMS(KP265966249, To, KP516520780 * Tn);
+	  }
+	  O[0] = FMA(KP2_000000000, Tc, T1);
+	  {
+	       E TK, T1b, TV, T12, T16, T18, TO, T1a, Tr, T17, T11, T13;
+	       {
+		    E TU, T14, TM, TN;
+		    TK = KP1_732050807 * (TG + TJ);
+		    T1b = KP1_732050807 * (TS - TT);
+		    TU = TS + TT;
+		    TV = TR - TU;
+		    T12 = FMA(KP2_000000000, TU, TR);
+		    T14 = TG - TJ;
+		    T16 = FMS(KP2_000000000, T14, T15);
+		    T18 = T14 + T15;
+		    TM = FMA(KP774781170, To, KP531932498 * Tn);
+		    TN = FNMS(KP1_007074065, Tj, KP227708958 * Tg);
+		    TO = TM - TN;
+		    T1a = TM + TN;
+		    {
+			 E Tl, Tq, TZ, T10;
+			 Tl = Td - Tk;
+			 Tq = Tm - Tp;
+			 Tr = Tl - Tq;
+			 T17 = Tq + Tl;
+			 TZ = FMA(KP2_000000000, Tk, Td);
+			 T10 = FMA(KP2_000000000, Tp, Tm);
+			 T11 = TZ - T10;
+			 T13 = T10 + TZ;
+		    }
+	       }
+	       O[WS(os, 5)] = T11 - T12;
+	       O[WS(os, 12)] = T13 - T16;
+	       O[WS(os, 1)] = T13 + T16;
+	       O[WS(os, 8)] = T11 + T12;
+	       {
+		    E TL, TW, T19, T1c;
+		    TL = Tr - TK;
+		    TW = TO - TV;
+		    O[WS(os, 7)] = TL - TW;
+		    O[WS(os, 2)] = TL + TW;
+		    T19 = T17 - T18;
+		    T1c = T1a + T1b;
+		    O[WS(os, 3)] = T19 - T1c;
+		    O[WS(os, 9)] = T1c + T19;
+	       }
+	       {
+		    E T1d, T1e, TX, TY;
+		    T1d = T1a - T1b;
+		    T1e = T17 + T18;
+		    O[WS(os, 4)] = T1d + T1e;
+		    O[WS(os, 10)] = T1e - T1d;
+		    TX = Tr + TK;
+		    TY = TO + TV;
+		    O[WS(os, 6)] = TX - TY;
+		    O[WS(os, 11)] = TX + TY;
+	       }
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 13, "hc2r_13", {56, 15, 20, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_13) (planner *p) {
+     X(khc2r_register) (p, hc2r_13, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_14.c b/src/fftw3/rdft/codelets/hc2r/hc2r_14.c
new file mode 100644
index 0000000..dbbb22e
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_14.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:12 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 14 -name hc2r_14 -include hc2r.h */
+
+/*
+ * This function contains 62 FP additions, 38 FP multiplications,
+ * (or, 36 additions, 12 multiplications, 26 fused multiply/add),
+ * 28 stack variables, and 28 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_14.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_14.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_14.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_14(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
+     DK(KP445041867, +0.445041867912628808577805128993589518932711138);
+     DK(KP1_246979603, +1.246979603717467061050009768008479621264549462);
+     DK(KP867767478, +0.867767478235116240951536665696717509219981456);
+     DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
+     DK(KP1_563662964, +1.563662964936059617416889053348115500464669037);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, Td, T6, Te, Tq, Tz, Tn, Ty, Tc, Tg, Tk, Tx, T9, Tf, T1;
+	  E T2;
+	  T1 = ri[0];
+	  T2 = ri[WS(ris, 7)];
+	  T3 = T1 - T2;
+	  Td = T1 + T2;
+	  {
+	       E T4, T5, To, Tp;
+	       T4 = ri[WS(ris, 2)];
+	       T5 = ri[WS(ris, 5)];
+	       T6 = T4 - T5;
+	       Te = T4 + T5;
+	       To = ii[WS(iis, 2)];
+	       Tp = ii[WS(iis, 5)];
+	       Tq = To - Tp;
+	       Tz = To + Tp;
+	  }
+	  {
+	       E Tl, Tm, Ta, Tb;
+	       Tl = ii[WS(iis, 6)];
+	       Tm = ii[WS(iis, 1)];
+	       Tn = Tl - Tm;
+	       Ty = Tl + Tm;
+	       Ta = ri[WS(ris, 6)];
+	       Tb = ri[WS(ris, 1)];
+	       Tc = Ta - Tb;
+	       Tg = Ta + Tb;
+	  }
+	  {
+	       E Ti, Tj, T7, T8;
+	       Ti = ii[WS(iis, 4)];
+	       Tj = ii[WS(iis, 3)];
+	       Tk = Ti - Tj;
+	       Tx = Ti + Tj;
+	       T7 = ri[WS(ris, 4)];
+	       T8 = ri[WS(ris, 3)];
+	       T9 = T7 - T8;
+	       Tf = T7 + T8;
+	  }
+	  O[WS(os, 7)] = FMA(KP2_000000000, T6 + T9 + Tc, T3);
+	  O[0] = FMA(KP2_000000000, Te + Tf + Tg, Td);
+	  {
+	       E Tr, Th, TE, TD;
+	       Tr = FNMS(KP1_949855824, Tn, KP1_563662964 * Tk) - (KP867767478 * Tq);
+	       Th = FMA(KP1_246979603, Tf, Td) + FNMA(KP445041867, Tg, KP1_801937735 * Te);
+	       O[WS(os, 4)] = Th - Tr;
+	       O[WS(os, 10)] = Th + Tr;
+	       TE = FMA(KP867767478, Tx, KP1_563662964 * Ty) - (KP1_949855824 * Tz);
+	       TD = FMA(KP1_246979603, Tc, T3) + FNMA(KP1_801937735, T9, KP445041867 * T6);
+	       O[WS(os, 5)] = TD - TE;
+	       O[WS(os, 9)] = TD + TE;
+	  }
+	  {
+	       E Tt, Ts, TA, Tw;
+	       Tt = FMA(KP867767478, Tk, KP1_563662964 * Tn) - (KP1_949855824 * Tq);
+	       Ts = FMA(KP1_246979603, Tg, Td) + FNMA(KP1_801937735, Tf, KP445041867 * Te);
+	       O[WS(os, 12)] = Ts - Tt;
+	       O[WS(os, 2)] = Ts + Tt;
+	       TA = FNMS(KP1_949855824, Ty, KP1_563662964 * Tx) - (KP867767478 * Tz);
+	       Tw = FMA(KP1_246979603, T9, T3) + FNMA(KP445041867, Tc, KP1_801937735 * T6);
+	       O[WS(os, 11)] = Tw - TA;
+	       O[WS(os, 3)] = Tw + TA;
+	  }
+	  {
+	       E TC, TB, Tv, Tu;
+	       TC = FMA(KP1_563662964, Tz, KP1_949855824 * Tx) + (KP867767478 * Ty);
+	       TB = FMA(KP1_246979603, T6, T3) + FNMA(KP1_801937735, Tc, KP445041867 * T9);
+	       O[WS(os, 1)] = TB - TC;
+	       O[WS(os, 13)] = TB + TC;
+	       Tv = FMA(KP1_563662964, Tq, KP1_949855824 * Tk) + (KP867767478 * Tn);
+	       Tu = FMA(KP1_246979603, Te, Td) + FNMA(KP1_801937735, Tg, KP445041867 * Tf);
+	       O[WS(os, 8)] = Tu - Tv;
+	       O[WS(os, 6)] = Tu + Tv;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 14, "hc2r_14", {36, 12, 26, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_14) (planner *p) {
+     X(khc2r_register) (p, hc2r_14, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_15.c b/src/fftw3/rdft/codelets/hc2r/hc2r_15.c
new file mode 100644
index 0000000..d2eed35
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_15.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:12 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 15 -name hc2r_15 -include hc2r.h */
+
+/*
+ * This function contains 64 FP additions, 31 FP multiplications,
+ * (or, 47 additions, 14 multiplications, 17 fused multiply/add),
+ * 44 stack variables, and 30 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_15.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_15(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
+     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
+     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, Tu, Ti, TB, TZ, T10, TE, TG, TJ, Tn, Tv, Ts, Tw, T8, Td;
+	  E Te;
+	  {
+	       E Th, T1, T2, Tf, Tg;
+	       Tg = ii[WS(iis, 5)];
+	       Th = KP1_732050807 * Tg;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 5)];
+	       Tf = T1 - T2;
+	       T3 = FMA(KP2_000000000, T2, T1);
+	       Tu = Tf - Th;
+	       Ti = Tf + Th;
+	  }
+	  {
+	       E T4, TD, T9, TI, T5, T6, T7, Ta, Tb, Tc, Tr, TH, Tm, TC, Tj;
+	       E To;
+	       T4 = ri[WS(ris, 3)];
+	       TD = ii[WS(iis, 3)];
+	       T9 = ri[WS(ris, 6)];
+	       TI = ii[WS(iis, 6)];
+	       T5 = ri[WS(ris, 7)];
+	       T6 = ri[WS(ris, 2)];
+	       T7 = T5 + T6;
+	       Ta = ri[WS(ris, 4)];
+	       Tb = ri[WS(ris, 1)];
+	       Tc = Ta + Tb;
+	       {
+		    E Tp, Tq, Tk, Tl;
+		    Tp = ii[WS(iis, 4)];
+		    Tq = ii[WS(iis, 1)];
+		    Tr = KP866025403 * (Tp + Tq);
+		    TH = Tp - Tq;
+		    Tk = ii[WS(iis, 7)];
+		    Tl = ii[WS(iis, 2)];
+		    Tm = KP866025403 * (Tk - Tl);
+		    TC = Tk + Tl;
+	       }
+	       TB = KP866025403 * (T5 - T6);
+	       TZ = TD - TC;
+	       T10 = TI - TH;
+	       TE = FMA(KP500000000, TC, TD);
+	       TG = KP866025403 * (Ta - Tb);
+	       TJ = FMA(KP500000000, TH, TI);
+	       Tj = FNMS(KP500000000, T7, T4);
+	       Tn = Tj - Tm;
+	       Tv = Tj + Tm;
+	       To = FNMS(KP500000000, Tc, T9);
+	       Ts = To - Tr;
+	       Tw = To + Tr;
+	       T8 = T4 + T7;
+	       Td = T9 + Tc;
+	       Te = T8 + Td;
+	  }
+	  O[0] = FMA(KP2_000000000, Te, T3);
+	  {
+	       E T11, T13, TY, T12, TW, TX;
+	       T11 = FNMS(KP1_902113032, T10, KP1_175570504 * TZ);
+	       T13 = FMA(KP1_902113032, TZ, KP1_175570504 * T10);
+	       TW = FNMS(KP500000000, Te, T3);
+	       TX = KP1_118033988 * (T8 - Td);
+	       TY = TW - TX;
+	       T12 = TX + TW;
+	       O[WS(os, 12)] = TY - T11;
+	       O[WS(os, 9)] = T12 + T13;
+	       O[WS(os, 3)] = TY + T11;
+	       O[WS(os, 6)] = T12 - T13;
+	  }
+	  {
+	       E TP, Tt, TO, TT, TV, TR, TS, TU, TQ;
+	       TP = KP1_118033988 * (Tn - Ts);
+	       Tt = Tn + Ts;
+	       TO = FNMS(KP500000000, Tt, Ti);
+	       TR = TE - TB;
+	       TS = TJ - TG;
+	       TT = FNMS(KP1_902113032, TS, KP1_175570504 * TR);
+	       TV = FMA(KP1_902113032, TR, KP1_175570504 * TS);
+	       O[WS(os, 5)] = FMA(KP2_000000000, Tt, Ti);
+	       TU = TP + TO;
+	       O[WS(os, 11)] = TU - TV;
+	       O[WS(os, 14)] = TU + TV;
+	       TQ = TO - TP;
+	       O[WS(os, 2)] = TQ - TT;
+	       O[WS(os, 8)] = TQ + TT;
+	  }
+	  {
+	       E Tz, Tx, Ty, TL, TN, TF, TK, TM, TA;
+	       Tz = KP1_118033988 * (Tv - Tw);
+	       Tx = Tv + Tw;
+	       Ty = FNMS(KP500000000, Tx, Tu);
+	       TF = TB + TE;
+	       TK = TG + TJ;
+	       TL = FNMS(KP1_902113032, TK, KP1_175570504 * TF);
+	       TN = FMA(KP1_902113032, TF, KP1_175570504 * TK);
+	       O[WS(os, 10)] = FMA(KP2_000000000, Tx, Tu);
+	       TM = Tz + Ty;
+	       O[WS(os, 1)] = TM - TN;
+	       O[WS(os, 4)] = TM + TN;
+	       TA = Ty - Tz;
+	       O[WS(os, 7)] = TA - TL;
+	       O[WS(os, 13)] = TA + TL;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 15, "hc2r_15", {47, 14, 17, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_15) (planner *p) {
+     X(khc2r_register) (p, hc2r_15, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_16.c b/src/fftw3/rdft/codelets/hc2r/hc2r_16.c
new file mode 100644
index 0000000..60ca018
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_16.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:13 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 16 -name hc2r_16 -include hc2r.h */
+
+/*
+ * This function contains 58 FP additions, 18 FP multiplications,
+ * (or, 54 additions, 14 multiplications, 4 fused multiply/add),
+ * 31 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_16.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_16(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T9, TS, Tl, TG, T6, TR, Ti, TD, Td, Tq, Tg, Tt, Tn, Tu, TV;
+	  E TU, TN, TK;
+	  {
+	       E T7, T8, TE, Tj, Tk, TF;
+	       T7 = ri[WS(ris, 2)];
+	       T8 = ri[WS(ris, 6)];
+	       TE = T7 - T8;
+	       Tj = ii[WS(iis, 2)];
+	       Tk = ii[WS(iis, 6)];
+	       TF = Tj + Tk;
+	       T9 = KP2_000000000 * (T7 + T8);
+	       TS = KP1_414213562 * (TE + TF);
+	       Tl = KP2_000000000 * (Tj - Tk);
+	       TG = KP1_414213562 * (TE - TF);
+	  }
+	  {
+	       E T5, TC, T3, TA;
+	       {
+		    E T4, TB, T1, T2;
+		    T4 = ri[WS(ris, 4)];
+		    T5 = KP2_000000000 * T4;
+		    TB = ii[WS(iis, 4)];
+		    TC = KP2_000000000 * TB;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 8)];
+		    T3 = T1 + T2;
+		    TA = T1 - T2;
+	       }
+	       T6 = T3 + T5;
+	       TR = TA + TC;
+	       Ti = T3 - T5;
+	       TD = TA - TC;
+	  }
+	  {
+	       E TI, TM, TL, TJ;
+	       {
+		    E Tb, Tc, To, Tp;
+		    Tb = ri[WS(ris, 1)];
+		    Tc = ri[WS(ris, 7)];
+		    Td = Tb + Tc;
+		    TI = Tb - Tc;
+		    To = ii[WS(iis, 1)];
+		    Tp = ii[WS(iis, 7)];
+		    Tq = To - Tp;
+		    TM = To + Tp;
+	       }
+	       {
+		    E Te, Tf, Tr, Ts;
+		    Te = ri[WS(ris, 5)];
+		    Tf = ri[WS(ris, 3)];
+		    Tg = Te + Tf;
+		    TL = Te - Tf;
+		    Tr = ii[WS(iis, 5)];
+		    Ts = ii[WS(iis, 3)];
+		    Tt = Tr - Ts;
+		    TJ = Tr + Ts;
+	       }
+	       Tn = Td - Tg;
+	       Tu = Tq - Tt;
+	       TV = TM - TL;
+	       TU = TI + TJ;
+	       TN = TL + TM;
+	       TK = TI - TJ;
+	  }
+	  {
+	       E Ta, Th, TT, TW;
+	       Ta = T6 + T9;
+	       Th = KP2_000000000 * (Td + Tg);
+	       O[WS(os, 8)] = Ta - Th;
+	       O[0] = Ta + Th;
+	       TT = TR - TS;
+	       TW = FNMS(KP1_847759065, TV, KP765366864 * TU);
+	       O[WS(os, 11)] = TT - TW;
+	       O[WS(os, 3)] = TT + TW;
+	  }
+	  {
+	       E TX, TY, Tm, Tv;
+	       TX = TR + TS;
+	       TY = FMA(KP1_847759065, TU, KP765366864 * TV);
+	       O[WS(os, 7)] = TX - TY;
+	       O[WS(os, 15)] = TX + TY;
+	       Tm = Ti - Tl;
+	       Tv = KP1_414213562 * (Tn - Tu);
+	       O[WS(os, 10)] = Tm - Tv;
+	       O[WS(os, 2)] = Tm + Tv;
+	  }
+	  {
+	       E Tw, Tx, TH, TO;
+	       Tw = Ti + Tl;
+	       Tx = KP1_414213562 * (Tn + Tu);
+	       O[WS(os, 6)] = Tw - Tx;
+	       O[WS(os, 14)] = Tw + Tx;
+	       TH = TD + TG;
+	       TO = FNMS(KP765366864, TN, KP1_847759065 * TK);
+	       O[WS(os, 9)] = TH - TO;
+	       O[WS(os, 1)] = TH + TO;
+	  }
+	  {
+	       E TP, TQ, Ty, Tz;
+	       TP = TD - TG;
+	       TQ = FMA(KP765366864, TK, KP1_847759065 * TN);
+	       O[WS(os, 5)] = TP - TQ;
+	       O[WS(os, 13)] = TP + TQ;
+	       Ty = T6 - T9;
+	       Tz = KP2_000000000 * (Tt + Tq);
+	       O[WS(os, 4)] = Ty - Tz;
+	       O[WS(os, 12)] = Ty + Tz;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 16, "hc2r_16", {54, 14, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_16) (planner *p) {
+     X(khc2r_register) (p, hc2r_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_3.c b/src/fftw3/rdft/codelets/hc2r/hc2r_3.c
new file mode 100644
index 0000000..990d625
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_3.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 3 -name hc2r_3 -include hc2r.h */
+
+/*
+ * This function contains 4 FP additions, 2 FP multiplications,
+ * (or, 3 additions, 1 multiplications, 1 fused multiply/add),
+ * 8 stack variables, and 6 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_3.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_3(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T5, T1, T2, T3, T4;
+	  T4 = ii[WS(iis, 1)];
+	  T5 = KP1_732050807 * T4;
+	  T1 = ri[0];
+	  T2 = ri[WS(ris, 1)];
+	  T3 = T1 - T2;
+	  O[0] = FMA(KP2_000000000, T2, T1);
+	  O[WS(os, 2)] = T3 + T5;
+	  O[WS(os, 1)] = T3 - T5;
+     }
+}
+
+static const khc2r_desc desc = { 3, "hc2r_3", {3, 1, 1, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_3) (planner *p) {
+     X(khc2r_register) (p, hc2r_3, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_32.c b/src/fftw3/rdft/codelets/hc2r/hc2r_32.c
new file mode 100644
index 0000000..6611654
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_32.c
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:14 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 32 -name hc2r_32 -include hc2r.h */
+
+/*
+ * This function contains 156 FP additions, 50 FP multiplications,
+ * (or, 140 additions, 34 multiplications, 16 fused multiply/add),
+ * 54 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_32(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T9, T2c, TB, T1y, T6, T2b, Ty, T1v, Th, T2e, T2f, TD, TK, T1C, T1F;
+	  E T1h, Tp, T2i, T2m, TN, T13, T1K, T1Y, T1k, Tw, TU, T1l, TW, T1V, T2j;
+	  E T1R, T2l;
+	  {
+	       E T7, T8, T1w, Tz, TA, T1x;
+	       T7 = ri[WS(ris, 4)];
+	       T8 = ri[WS(ris, 12)];
+	       T1w = T7 - T8;
+	       Tz = ii[WS(iis, 4)];
+	       TA = ii[WS(iis, 12)];
+	       T1x = Tz + TA;
+	       T9 = KP2_000000000 * (T7 + T8);
+	       T2c = KP1_414213562 * (T1w + T1x);
+	       TB = KP2_000000000 * (Tz - TA);
+	       T1y = KP1_414213562 * (T1w - T1x);
+	  }
+	  {
+	       E T5, T1u, T3, T1s;
+	       {
+		    E T4, T1t, T1, T2;
+		    T4 = ri[WS(ris, 8)];
+		    T5 = KP2_000000000 * T4;
+		    T1t = ii[WS(iis, 8)];
+		    T1u = KP2_000000000 * T1t;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 16)];
+		    T3 = T1 + T2;
+		    T1s = T1 - T2;
+	       }
+	       T6 = T3 + T5;
+	       T2b = T1s + T1u;
+	       Ty = T3 - T5;
+	       T1v = T1s - T1u;
+	  }
+	  {
+	       E Td, T1A, TG, T1E, Tg, T1D, TJ, T1B;
+	       {
+		    E Tb, Tc, TE, TF;
+		    Tb = ri[WS(ris, 2)];
+		    Tc = ri[WS(ris, 14)];
+		    Td = Tb + Tc;
+		    T1A = Tb - Tc;
+		    TE = ii[WS(iis, 2)];
+		    TF = ii[WS(iis, 14)];
+		    TG = TE - TF;
+		    T1E = TE + TF;
+	       }
+	       {
+		    E Te, Tf, TH, TI;
+		    Te = ri[WS(ris, 10)];
+		    Tf = ri[WS(ris, 6)];
+		    Tg = Te + Tf;
+		    T1D = Te - Tf;
+		    TH = ii[WS(iis, 10)];
+		    TI = ii[WS(iis, 6)];
+		    TJ = TH - TI;
+		    T1B = TH + TI;
+	       }
+	       Th = KP2_000000000 * (Td + Tg);
+	       T2e = T1A + T1B;
+	       T2f = T1E - T1D;
+	       TD = Td - Tg;
+	       TK = TG - TJ;
+	       T1C = T1A - T1B;
+	       T1F = T1D + T1E;
+	       T1h = KP2_000000000 * (TJ + TG);
+	  }
+	  {
+	       E Tl, T1I, TZ, T1X, To, T1W, T12, T1J;
+	       {
+		    E Tj, Tk, TX, TY;
+		    Tj = ri[WS(ris, 1)];
+		    Tk = ri[WS(ris, 15)];
+		    Tl = Tj + Tk;
+		    T1I = Tj - Tk;
+		    TX = ii[WS(iis, 1)];
+		    TY = ii[WS(iis, 15)];
+		    TZ = TX - TY;
+		    T1X = TX + TY;
+	       }
+	       {
+		    E Tm, Tn, T10, T11;
+		    Tm = ri[WS(ris, 9)];
+		    Tn = ri[WS(ris, 7)];
+		    To = Tm + Tn;
+		    T1W = Tm - Tn;
+		    T10 = ii[WS(iis, 9)];
+		    T11 = ii[WS(iis, 7)];
+		    T12 = T10 - T11;
+		    T1J = T10 + T11;
+	       }
+	       Tp = Tl + To;
+	       T2i = T1I + T1J;
+	       T2m = T1X - T1W;
+	       TN = Tl - To;
+	       T13 = TZ - T12;
+	       T1K = T1I - T1J;
+	       T1Y = T1W + T1X;
+	       T1k = T12 + TZ;
+	  }
+	  {
+	       E Ts, T1L, TT, T1M, Tv, T1O, TQ, T1P;
+	       {
+		    E Tq, Tr, TR, TS;
+		    Tq = ri[WS(ris, 5)];
+		    Tr = ri[WS(ris, 11)];
+		    Ts = Tq + Tr;
+		    T1L = Tq - Tr;
+		    TR = ii[WS(iis, 5)];
+		    TS = ii[WS(iis, 11)];
+		    TT = TR - TS;
+		    T1M = TR + TS;
+	       }
+	       {
+		    E Tt, Tu, TO, TP;
+		    Tt = ri[WS(ris, 3)];
+		    Tu = ri[WS(ris, 13)];
+		    Tv = Tt + Tu;
+		    T1O = Tt - Tu;
+		    TO = ii[WS(iis, 13)];
+		    TP = ii[WS(iis, 3)];
+		    TQ = TO - TP;
+		    T1P = TP + TO;
+	       }
+	       Tw = Ts + Tv;
+	       TU = TQ - TT;
+	       T1l = TT + TQ;
+	       TW = Ts - Tv;
+	       {
+		    E T1T, T1U, T1N, T1Q;
+		    T1T = T1L + T1M;
+		    T1U = T1O + T1P;
+		    T1V = KP707106781 * (T1T - T1U);
+		    T2j = KP707106781 * (T1T + T1U);
+		    T1N = T1L - T1M;
+		    T1Q = T1O - T1P;
+		    T1R = KP707106781 * (T1N + T1Q);
+		    T2l = KP707106781 * (T1N - T1Q);
+	       }
+	  }
+	  {
+	       E Tx, T1r, Ti, T1q, Ta;
+	       Tx = KP2_000000000 * (Tp + Tw);
+	       T1r = KP2_000000000 * (T1l + T1k);
+	       Ta = T6 + T9;
+	       Ti = Ta + Th;
+	       T1q = Ta - Th;
+	       O[WS(os, 16)] = Ti - Tx;
+	       O[WS(os, 24)] = T1q + T1r;
+	       O[0] = Ti + Tx;
+	       O[WS(os, 8)] = T1q - T1r;
+	  }
+	  {
+	       E T1i, T1o, T1n, T1p, T1g, T1j, T1m;
+	       T1g = T6 - T9;
+	       T1i = T1g - T1h;
+	       T1o = T1g + T1h;
+	       T1j = Tp - Tw;
+	       T1m = T1k - T1l;
+	       T1n = KP1_414213562 * (T1j - T1m);
+	       T1p = KP1_414213562 * (T1j + T1m);
+	       O[WS(os, 20)] = T1i - T1n;
+	       O[WS(os, 28)] = T1o + T1p;
+	       O[WS(os, 4)] = T1i + T1n;
+	       O[WS(os, 12)] = T1o - T1p;
+	  }
+	  {
+	       E TM, T16, T15, T17;
+	       {
+		    E TC, TL, TV, T14;
+		    TC = Ty - TB;
+		    TL = KP1_414213562 * (TD - TK);
+		    TM = TC + TL;
+		    T16 = TC - TL;
+		    TV = TN + TU;
+		    T14 = TW + T13;
+		    T15 = FNMS(KP765366864, T14, KP1_847759065 * TV);
+		    T17 = FMA(KP765366864, TV, KP1_847759065 * T14);
+	       }
+	       O[WS(os, 18)] = TM - T15;
+	       O[WS(os, 26)] = T16 + T17;
+	       O[WS(os, 2)] = TM + T15;
+	       O[WS(os, 10)] = T16 - T17;
+	  }
+	  {
+	       E T2t, T2x, T2w, T2y;
+	       {
+		    E T2r, T2s, T2u, T2v;
+		    T2r = T2b + T2c;
+		    T2s = FMA(KP1_847759065, T2e, KP765366864 * T2f);
+		    T2t = T2r - T2s;
+		    T2x = T2r + T2s;
+		    T2u = T2i + T2j;
+		    T2v = T2m - T2l;
+		    T2w = FNMS(KP1_961570560, T2v, KP390180644 * T2u);
+		    T2y = FMA(KP1_961570560, T2u, KP390180644 * T2v);
+	       }
+	       O[WS(os, 23)] = T2t - T2w;
+	       O[WS(os, 31)] = T2x + T2y;
+	       O[WS(os, 7)] = T2t + T2w;
+	       O[WS(os, 15)] = T2x - T2y;
+	  }
+	  {
+	       E T1a, T1e, T1d, T1f;
+	       {
+		    E T18, T19, T1b, T1c;
+		    T18 = Ty + TB;
+		    T19 = KP1_414213562 * (TD + TK);
+		    T1a = T18 - T19;
+		    T1e = T18 + T19;
+		    T1b = TN - TU;
+		    T1c = T13 - TW;
+		    T1d = FNMS(KP1_847759065, T1c, KP765366864 * T1b);
+		    T1f = FMA(KP1_847759065, T1b, KP765366864 * T1c);
+	       }
+	       O[WS(os, 22)] = T1a - T1d;
+	       O[WS(os, 30)] = T1e + T1f;
+	       O[WS(os, 6)] = T1a + T1d;
+	       O[WS(os, 14)] = T1e - T1f;
+	  }
+	  {
+	       E T25, T29, T28, T2a;
+	       {
+		    E T23, T24, T26, T27;
+		    T23 = T1v - T1y;
+		    T24 = FMA(KP765366864, T1C, KP1_847759065 * T1F);
+		    T25 = T23 - T24;
+		    T29 = T23 + T24;
+		    T26 = T1K - T1R;
+		    T27 = T1Y - T1V;
+		    T28 = FNMS(KP1_662939224, T27, KP1_111140466 * T26);
+		    T2a = FMA(KP1_662939224, T26, KP1_111140466 * T27);
+	       }
+	       O[WS(os, 21)] = T25 - T28;
+	       O[WS(os, 29)] = T29 + T2a;
+	       O[WS(os, 5)] = T25 + T28;
+	       O[WS(os, 13)] = T29 - T2a;
+	  }
+	  {
+	       E T2h, T2p, T2o, T2q;
+	       {
+		    E T2d, T2g, T2k, T2n;
+		    T2d = T2b - T2c;
+		    T2g = FNMS(KP1_847759065, T2f, KP765366864 * T2e);
+		    T2h = T2d + T2g;
+		    T2p = T2d - T2g;
+		    T2k = T2i - T2j;
+		    T2n = T2l + T2m;
+		    T2o = FNMS(KP1_111140466, T2n, KP1_662939224 * T2k);
+		    T2q = FMA(KP1_111140466, T2k, KP1_662939224 * T2n);
+	       }
+	       O[WS(os, 19)] = T2h - T2o;
+	       O[WS(os, 27)] = T2p + T2q;
+	       O[WS(os, 3)] = T2h + T2o;
+	       O[WS(os, 11)] = T2p - T2q;
+	  }
+	  {
+	       E T1H, T21, T20, T22;
+	       {
+		    E T1z, T1G, T1S, T1Z;
+		    T1z = T1v + T1y;
+		    T1G = FNMS(KP765366864, T1F, KP1_847759065 * T1C);
+		    T1H = T1z + T1G;
+		    T21 = T1z - T1G;
+		    T1S = T1K + T1R;
+		    T1Z = T1V + T1Y;
+		    T20 = FNMS(KP390180644, T1Z, KP1_961570560 * T1S);
+		    T22 = FMA(KP390180644, T1S, KP1_961570560 * T1Z);
+	       }
+	       O[WS(os, 17)] = T1H - T20;
+	       O[WS(os, 25)] = T21 + T22;
+	       O[WS(os, 1)] = T1H + T20;
+	       O[WS(os, 9)] = T21 - T22;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 32, "hc2r_32", {140, 34, 16, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_32) (planner *p) {
+     X(khc2r_register) (p, hc2r_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_4.c b/src/fftw3/rdft/codelets/hc2r/hc2r_4.c
new file mode 100644
index 0000000..00db38d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_4.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 4 -name hc2r_4 -include hc2r.h */
+
+/*
+ * This function contains 6 FP additions, 2 FP multiplications,
+ * (or, 6 additions, 2 multiplications, 0 fused multiply/add),
+ * 10 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_4.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_4(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T5, T8, T3, T6;
+	  {
+	       E T4, T7, T1, T2;
+	       T4 = ri[WS(ris, 1)];
+	       T5 = KP2_000000000 * T4;
+	       T7 = ii[WS(iis, 1)];
+	       T8 = KP2_000000000 * T7;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 2)];
+	       T3 = T1 + T2;
+	       T6 = T1 - T2;
+	  }
+	  O[WS(os, 2)] = T3 - T5;
+	  O[WS(os, 3)] = T6 + T8;
+	  O[0] = T3 + T5;
+	  O[WS(os, 1)] = T6 - T8;
+     }
+}
+
+static const khc2r_desc desc = { 4, "hc2r_4", {6, 2, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_4) (planner *p) {
+     X(khc2r_register) (p, hc2r_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_5.c b/src/fftw3/rdft/codelets/hc2r/hc2r_5.c
new file mode 100644
index 0000000..fd1c09d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_5.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 5 -name hc2r_5 -include hc2r.h */
+
+/*
+ * This function contains 12 FP additions, 7 FP multiplications,
+ * (or, 8 additions, 3 multiplications, 4 fused multiply/add),
+ * 18 stack variables, and 10 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_5.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_5(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_118033988, +1.118033988749894848204586834365638117720309180);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP1_902113032, +1.902113032590307144232878666758764286811397268);
+     DK(KP1_175570504, +1.175570504584946258337411909278145537195304875);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E Ta, Tc, T1, T4, T5, T6, Tb, T7;
+	  {
+	       E T8, T9, T2, T3;
+	       T8 = ii[WS(iis, 1)];
+	       T9 = ii[WS(iis, 2)];
+	       Ta = FNMS(KP1_902113032, T9, KP1_175570504 * T8);
+	       Tc = FMA(KP1_902113032, T8, KP1_175570504 * T9);
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 1)];
+	       T3 = ri[WS(ris, 2)];
+	       T4 = T2 + T3;
+	       T5 = FNMS(KP500000000, T4, T1);
+	       T6 = KP1_118033988 * (T2 - T3);
+	  }
+	  O[0] = FMA(KP2_000000000, T4, T1);
+	  Tb = T6 + T5;
+	  O[WS(os, 1)] = Tb - Tc;
+	  O[WS(os, 4)] = Tb + Tc;
+	  T7 = T5 - T6;
+	  O[WS(os, 2)] = T7 - Ta;
+	  O[WS(os, 3)] = T7 + Ta;
+     }
+}
+
+static const khc2r_desc desc = { 5, "hc2r_5", {8, 3, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_5) (planner *p) {
+     X(khc2r_register) (p, hc2r_5, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_6.c b/src/fftw3/rdft/codelets/hc2r/hc2r_6.c
new file mode 100644
index 0000000..08ce9d0
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_6.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 6 -name hc2r_6 -include hc2r.h */
+
+/*
+ * This function contains 14 FP additions, 4 FP multiplications,
+ * (or, 12 additions, 2 multiplications, 2 fused multiply/add),
+ * 17 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_6.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_6(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, T7, Tc, Te, T6, T8, T1, T2, T9, Td;
+	  T1 = ri[0];
+	  T2 = ri[WS(ris, 3)];
+	  T3 = T1 - T2;
+	  T7 = T1 + T2;
+	  {
+	       E Ta, Tb, T4, T5;
+	       Ta = ii[WS(iis, 2)];
+	       Tb = ii[WS(iis, 1)];
+	       Tc = KP1_732050807 * (Ta - Tb);
+	       Te = KP1_732050807 * (Ta + Tb);
+	       T4 = ri[WS(ris, 2)];
+	       T5 = ri[WS(ris, 1)];
+	       T6 = T4 - T5;
+	       T8 = T4 + T5;
+	  }
+	  O[WS(os, 3)] = FMA(KP2_000000000, T6, T3);
+	  O[0] = FMA(KP2_000000000, T8, T7);
+	  T9 = T7 - T8;
+	  O[WS(os, 4)] = T9 - Tc;
+	  O[WS(os, 2)] = T9 + Tc;
+	  Td = T3 - T6;
+	  O[WS(os, 1)] = Td - Te;
+	  O[WS(os, 5)] = Td + Te;
+     }
+}
+
+static const khc2r_desc desc = { 6, "hc2r_6", {12, 2, 2, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_6) (planner *p) {
+     X(khc2r_register) (p, hc2r_6, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_7.c b/src/fftw3/rdft/codelets/hc2r/hc2r_7.c
new file mode 100644
index 0000000..6d1f4c1
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_7.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 7 -name hc2r_7 -include hc2r.h */
+
+/*
+ * This function contains 24 FP additions, 19 FP multiplications,
+ * (or, 11 additions, 6 multiplications, 13 fused multiply/add),
+ * 21 stack variables, and 14 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_7.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_7(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_801937735, +1.801937735804838252472204639014890102331838324);
+     DK(KP445041867, +0.445041867912628808577805128993589518932711138);
+     DK(KP1_246979603, +1.246979603717467061050009768008479621264549462);
+     DK(KP867767478, +0.867767478235116240951536665696717509219981456);
+     DK(KP1_949855824, +1.949855824363647214036263365987862434465571601);
+     DK(KP1_563662964, +1.563662964936059617416889053348115500464669037);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T9, Td, Tb, T1, T4, T2, T3, T5, Tc, Ta, T6, T8, T7;
+	  T6 = ii[WS(iis, 2)];
+	  T8 = ii[WS(iis, 1)];
+	  T7 = ii[WS(iis, 3)];
+	  T9 = FNMS(KP1_949855824, T7, KP1_563662964 * T6) - (KP867767478 * T8);
+	  Td = FMA(KP867767478, T6, KP1_563662964 * T7) - (KP1_949855824 * T8);
+	  Tb = FMA(KP1_563662964, T8, KP1_949855824 * T6) + (KP867767478 * T7);
+	  T1 = ri[0];
+	  T4 = ri[WS(ris, 3)];
+	  T2 = ri[WS(ris, 1)];
+	  T3 = ri[WS(ris, 2)];
+	  T5 = FMA(KP1_246979603, T3, T1) + FNMA(KP445041867, T4, KP1_801937735 * T2);
+	  Tc = FMA(KP1_246979603, T4, T1) + FNMA(KP1_801937735, T3, KP445041867 * T2);
+	  Ta = FMA(KP1_246979603, T2, T1) + FNMA(KP1_801937735, T4, KP445041867 * T3);
+	  O[WS(os, 4)] = T5 - T9;
+	  O[WS(os, 3)] = T5 + T9;
+	  O[WS(os, 2)] = Tc + Td;
+	  O[WS(os, 5)] = Tc - Td;
+	  O[WS(os, 6)] = Ta + Tb;
+	  O[WS(os, 1)] = Ta - Tb;
+	  O[0] = FMA(KP2_000000000, T2 + T3 + T4, T1);
+     }
+}
+
+static const khc2r_desc desc = { 7, "hc2r_7", {11, 6, 13, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_7) (planner *p) {
+     X(khc2r_register) (p, hc2r_7, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_8.c b/src/fftw3/rdft/codelets/hc2r/hc2r_8.c
new file mode 100644
index 0000000..dd11bd4
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_8.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 8 -name hc2r_8 -include hc2r.h */
+
+/*
+ * This function contains 20 FP additions, 6 FP multiplications,
+ * (or, 20 additions, 6 multiplications, 0 fused multiply/add),
+ * 21 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_8.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_8(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T5, Tg, T3, Te, T9, Ti, Td, Tj, T6, Ta;
+	  {
+	       E T4, Tf, T1, T2;
+	       T4 = ri[WS(ris, 2)];
+	       T5 = KP2_000000000 * T4;
+	       Tf = ii[WS(iis, 2)];
+	       Tg = KP2_000000000 * Tf;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 4)];
+	       T3 = T1 + T2;
+	       Te = T1 - T2;
+	       {
+		    E T7, T8, Tb, Tc;
+		    T7 = ri[WS(ris, 1)];
+		    T8 = ri[WS(ris, 3)];
+		    T9 = KP2_000000000 * (T7 + T8);
+		    Ti = T7 - T8;
+		    Tb = ii[WS(iis, 1)];
+		    Tc = ii[WS(iis, 3)];
+		    Td = KP2_000000000 * (Tb - Tc);
+		    Tj = Tb + Tc;
+	       }
+	  }
+	  T6 = T3 + T5;
+	  O[WS(os, 4)] = T6 - T9;
+	  O[0] = T6 + T9;
+	  Ta = T3 - T5;
+	  O[WS(os, 2)] = Ta - Td;
+	  O[WS(os, 6)] = Ta + Td;
+	  {
+	       E Th, Tk, Tl, Tm;
+	       Th = Te - Tg;
+	       Tk = KP1_414213562 * (Ti - Tj);
+	       O[WS(os, 5)] = Th - Tk;
+	       O[WS(os, 1)] = Th + Tk;
+	       Tl = Te + Tg;
+	       Tm = KP1_414213562 * (Ti + Tj);
+	       O[WS(os, 3)] = Tl - Tm;
+	       O[WS(os, 7)] = Tl + Tm;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 8, "hc2r_8", {20, 6, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_8) (planner *p) {
+     X(khc2r_register) (p, hc2r_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hc2r_9.c b/src/fftw3/rdft/codelets/hc2r/hc2r_9.c
new file mode 100644
index 0000000..91b0f78
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hc2r_9.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r -compact -variables 4 -sign 1 -n 9 -name hc2r_9 -include hc2r.h */
+
+/*
+ * This function contains 32 FP additions, 18 FP multiplications,
+ * (or, 22 additions, 8 multiplications, 10 fused multiply/add),
+ * 35 stack variables, and 18 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hc2r_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: hc2r_9.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void hc2r_9(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP300767466, +0.300767466360870593278543795225003852144476517);
+     DK(KP1_705737063, +1.705737063904886419256501927880148143872040591);
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP1_326827896, +1.326827896337876792410842639271782594433726619);
+     DK(KP1_113340798, +1.113340798452838732905825904094046265936583811);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     int i;
+     for (i = v; i > 0; i = i - 1, ri = ri + ivs, ii = ii + ivs, O = O + ovs) {
+	  E T3, Tq, Tc, Tk, Tj, T8, Tm, Ts, Th, Tr, Tw, Tx;
+	  {
+	       E Tb, T1, T2, T9, Ta;
+	       Ta = ii[WS(iis, 3)];
+	       Tb = KP1_732050807 * Ta;
+	       T1 = ri[0];
+	       T2 = ri[WS(ris, 3)];
+	       T9 = T1 - T2;
+	       T3 = FMA(KP2_000000000, T2, T1);
+	       Tq = T9 + Tb;
+	       Tc = T9 - Tb;
+	  }
+	  {
+	       E T4, T7, Ti, Tg, Tl, Td;
+	       T4 = ri[WS(ris, 1)];
+	       Tk = ii[WS(iis, 1)];
+	       {
+		    E T5, T6, Te, Tf;
+		    T5 = ri[WS(ris, 4)];
+		    T6 = ri[WS(ris, 2)];
+		    T7 = T5 + T6;
+		    Ti = KP866025403 * (T5 - T6);
+		    Te = ii[WS(iis, 4)];
+		    Tf = ii[WS(iis, 2)];
+		    Tg = KP866025403 * (Te + Tf);
+		    Tj = Tf - Te;
+	       }
+	       T8 = T4 + T7;
+	       Tl = FMA(KP500000000, Tj, Tk);
+	       Tm = Ti + Tl;
+	       Ts = Tl - Ti;
+	       Td = FNMS(KP500000000, T7, T4);
+	       Th = Td - Tg;
+	       Tr = Td + Tg;
+	  }
+	  O[0] = FMA(KP2_000000000, T8, T3);
+	  Tw = T3 - T8;
+	  Tx = KP1_732050807 * (Tk - Tj);
+	  O[WS(os, 3)] = Tw - Tx;
+	  O[WS(os, 6)] = Tw + Tx;
+	  {
+	       E Tp, Tn, To, Tv, Tt, Tu;
+	       Tp = FMA(KP1_113340798, Th, KP1_326827896 * Tm);
+	       Tn = FNMS(KP642787609, Tm, KP766044443 * Th);
+	       To = Tc - Tn;
+	       O[WS(os, 1)] = FMA(KP2_000000000, Tn, Tc);
+	       O[WS(os, 7)] = To + Tp;
+	       O[WS(os, 4)] = To - Tp;
+	       Tv = FMA(KP1_705737063, Tr, KP300767466 * Ts);
+	       Tt = FNMS(KP984807753, Ts, KP173648177 * Tr);
+	       Tu = Tq - Tt;
+	       O[WS(os, 2)] = FMA(KP2_000000000, Tt, Tq);
+	       O[WS(os, 8)] = Tu + Tv;
+	       O[WS(os, 5)] = Tu - Tv;
+	  }
+     }
+}
+
+static const khc2r_desc desc = { 9, "hc2r_9", {22, 8, 10, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_hc2r_9) (planner *p) {
+     X(khc2r_register) (p, hc2r_9, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/hcodlist.c b/src/fftw3/rdft/codelets/hc2r/hcodlist.c
new file mode 100644
index 0000000..d5d2f07
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/hcodlist.c
@@ -0,0 +1,102 @@
+#include "ifftw.h"
+
+extern void X(codelet_hc2r_3)(planner *);
+extern void X(codelet_hc2r_4)(planner *);
+extern void X(codelet_hc2r_5)(planner *);
+extern void X(codelet_hc2r_6)(planner *);
+extern void X(codelet_hc2r_7)(planner *);
+extern void X(codelet_hc2r_8)(planner *);
+extern void X(codelet_hc2r_9)(planner *);
+extern void X(codelet_hc2r_10)(planner *);
+extern void X(codelet_hc2r_11)(planner *);
+extern void X(codelet_hc2r_12)(planner *);
+extern void X(codelet_hc2r_13)(planner *);
+extern void X(codelet_hc2r_14)(planner *);
+extern void X(codelet_hc2r_15)(planner *);
+extern void X(codelet_hc2r_16)(planner *);
+extern void X(codelet_hc2r_32)(planner *);
+extern void X(codelet_mhc2r_32)(planner *);
+extern void X(codelet_mhc2r_64)(planner *);
+extern void X(codelet_mhc2r_128)(planner *);
+extern void X(codelet_hb_2)(planner *);
+extern void X(codelet_hb_3)(planner *);
+extern void X(codelet_hb_4)(planner *);
+extern void X(codelet_hb_5)(planner *);
+extern void X(codelet_hb_6)(planner *);
+extern void X(codelet_hb_7)(planner *);
+extern void X(codelet_hb_8)(planner *);
+extern void X(codelet_hb_9)(planner *);
+extern void X(codelet_hb_10)(planner *);
+extern void X(codelet_hb_12)(planner *);
+extern void X(codelet_hb_15)(planner *);
+extern void X(codelet_hb_16)(planner *);
+extern void X(codelet_hb_32)(planner *);
+extern void X(codelet_hb_64)(planner *);
+extern void X(codelet_hc2rIII_2)(planner *);
+extern void X(codelet_hc2rIII_3)(planner *);
+extern void X(codelet_hc2rIII_4)(planner *);
+extern void X(codelet_hc2rIII_5)(planner *);
+extern void X(codelet_hc2rIII_6)(planner *);
+extern void X(codelet_hc2rIII_7)(planner *);
+extern void X(codelet_hc2rIII_8)(planner *);
+extern void X(codelet_hc2rIII_9)(planner *);
+extern void X(codelet_hc2rIII_10)(planner *);
+extern void X(codelet_hc2rIII_12)(planner *);
+extern void X(codelet_hc2rIII_15)(planner *);
+extern void X(codelet_hc2rIII_16)(planner *);
+extern void X(codelet_hc2rIII_32)(planner *);
+extern void X(codelet_mhc2rIII_32)(planner *);
+extern void X(codelet_mhc2rIII_64)(planner *);
+
+
+extern const solvtab X(solvtab_rdft_hc2r);
+const solvtab X(solvtab_rdft_hc2r) = {
+   SOLVTAB(X(codelet_hc2r_3)),
+   SOLVTAB(X(codelet_hc2r_4)),
+   SOLVTAB(X(codelet_hc2r_5)),
+   SOLVTAB(X(codelet_hc2r_6)),
+   SOLVTAB(X(codelet_hc2r_7)),
+   SOLVTAB(X(codelet_hc2r_8)),
+   SOLVTAB(X(codelet_hc2r_9)),
+   SOLVTAB(X(codelet_hc2r_10)),
+   SOLVTAB(X(codelet_hc2r_11)),
+   SOLVTAB(X(codelet_hc2r_12)),
+   SOLVTAB(X(codelet_hc2r_13)),
+   SOLVTAB(X(codelet_hc2r_14)),
+   SOLVTAB(X(codelet_hc2r_15)),
+   SOLVTAB(X(codelet_hc2r_16)),
+   SOLVTAB(X(codelet_hc2r_32)),
+   SOLVTAB(X(codelet_mhc2r_32)),
+   SOLVTAB(X(codelet_mhc2r_64)),
+   SOLVTAB(X(codelet_mhc2r_128)),
+   SOLVTAB(X(codelet_hb_2)),
+   SOLVTAB(X(codelet_hb_3)),
+   SOLVTAB(X(codelet_hb_4)),
+   SOLVTAB(X(codelet_hb_5)),
+   SOLVTAB(X(codelet_hb_6)),
+   SOLVTAB(X(codelet_hb_7)),
+   SOLVTAB(X(codelet_hb_8)),
+   SOLVTAB(X(codelet_hb_9)),
+   SOLVTAB(X(codelet_hb_10)),
+   SOLVTAB(X(codelet_hb_12)),
+   SOLVTAB(X(codelet_hb_15)),
+   SOLVTAB(X(codelet_hb_16)),
+   SOLVTAB(X(codelet_hb_32)),
+   SOLVTAB(X(codelet_hb_64)),
+   SOLVTAB(X(codelet_hc2rIII_2)),
+   SOLVTAB(X(codelet_hc2rIII_3)),
+   SOLVTAB(X(codelet_hc2rIII_4)),
+   SOLVTAB(X(codelet_hc2rIII_5)),
+   SOLVTAB(X(codelet_hc2rIII_6)),
+   SOLVTAB(X(codelet_hc2rIII_7)),
+   SOLVTAB(X(codelet_hc2rIII_8)),
+   SOLVTAB(X(codelet_hc2rIII_9)),
+   SOLVTAB(X(codelet_hc2rIII_10)),
+   SOLVTAB(X(codelet_hc2rIII_12)),
+   SOLVTAB(X(codelet_hc2rIII_15)),
+   SOLVTAB(X(codelet_hc2rIII_16)),
+   SOLVTAB(X(codelet_hc2rIII_32)),
+   SOLVTAB(X(codelet_mhc2rIII_32)),
+   SOLVTAB(X(codelet_mhc2rIII_64)),
+   SOLVTAB_END
+};
diff --git a/src/fftw3/rdft/codelets/hc2r/mhc2rIII_32.c b/src/fftw3/rdft/codelets/hc2r/mhc2rIII_32.c
new file mode 100644
index 0000000..5ae4ebf
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/mhc2rIII_32.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:16 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r_noinline -compact -variables 4 -sign 1 -n 32 -name mhc2rIII_32 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 138 additions, 48 multiplications, 36 fused multiply/add),
+ * 65 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mhc2rIII_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2rIII_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2rIII_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void mhc2rIII_32_0(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os)
+{
+     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
+     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
+     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
+     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
+     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
+     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
+     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
+     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T7, T2i, T2F, Tz, T1k, T1I, T1Z, T1x, Te, T22, T2E, T2j, T1f, T1y, TK;
+	  E T1J, Tm, T2B, TW, T1a, T1C, T1L, T28, T2l, Tt, T2A, T17, T1b, T1F, T1M;
+	  E T2d, T2m;
+	  {
+	       E T3, Tv, T1j, T2h, T6, T1g, Ty, T2g;
+	       {
+		    E T1, T2, T1h, T1i;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 15)];
+		    T3 = T1 + T2;
+		    Tv = T1 - T2;
+		    T1h = ii[0];
+		    T1i = ii[WS(iis, 15)];
+		    T1j = T1h + T1i;
+		    T2h = T1i - T1h;
+	       }
+	       {
+		    E T4, T5, Tw, Tx;
+		    T4 = ri[WS(ris, 8)];
+		    T5 = ri[WS(ris, 7)];
+		    T6 = T4 + T5;
+		    T1g = T4 - T5;
+		    Tw = ii[WS(iis, 8)];
+		    Tx = ii[WS(iis, 7)];
+		    Ty = Tw + Tx;
+		    T2g = Tw - Tx;
+	       }
+	       T7 = T3 + T6;
+	       T2i = T2g + T2h;
+	       T2F = T2h - T2g;
+	       Tz = Tv - Ty;
+	       T1k = T1g + T1j;
+	       T1I = T1g - T1j;
+	       T1Z = T3 - T6;
+	       T1x = Tv + Ty;
+	  }
+	  {
+	       E Ta, TA, TD, T21, Td, TF, TI, T20;
+	       {
+		    E T8, T9, TB, TC;
+		    T8 = ri[WS(ris, 4)];
+		    T9 = ri[WS(ris, 11)];
+		    Ta = T8 + T9;
+		    TA = T8 - T9;
+		    TB = ii[WS(iis, 4)];
+		    TC = ii[WS(iis, 11)];
+		    TD = TB + TC;
+		    T21 = TB - TC;
+	       }
+	       {
+		    E Tb, Tc, TG, TH;
+		    Tb = ri[WS(ris, 3)];
+		    Tc = ri[WS(ris, 12)];
+		    Td = Tb + Tc;
+		    TF = Tb - Tc;
+		    TG = ii[WS(iis, 3)];
+		    TH = ii[WS(iis, 12)];
+		    TI = TG + TH;
+		    T20 = TH - TG;
+	       }
+	       Te = Ta + Td;
+	       T22 = T20 - T21;
+	       T2E = T21 + T20;
+	       T2j = Ta - Td;
+	       {
+		    E T1d, T1e, TE, TJ;
+		    T1d = TA + TD;
+		    T1e = TF + TI;
+		    T1f = KP707106781 * (T1d - T1e);
+		    T1y = KP707106781 * (T1d + T1e);
+		    TE = TA - TD;
+		    TJ = TF - TI;
+		    TK = KP707106781 * (TE + TJ);
+		    T1J = KP707106781 * (TE - TJ);
+	       }
+	  }
+	  {
+	       E Ti, TM, TU, T25, Tl, TR, TP, T26, TQ, TV;
+	       {
+		    E Tg, Th, TS, TT;
+		    Tg = ri[WS(ris, 2)];
+		    Th = ri[WS(ris, 13)];
+		    Ti = Tg + Th;
+		    TM = Tg - Th;
+		    TS = ii[WS(iis, 2)];
+		    TT = ii[WS(iis, 13)];
+		    TU = TS + TT;
+		    T25 = TS - TT;
+	       }
+	       {
+		    E Tj, Tk, TN, TO;
+		    Tj = ri[WS(ris, 10)];
+		    Tk = ri[WS(ris, 5)];
+		    Tl = Tj + Tk;
+		    TR = Tj - Tk;
+		    TN = ii[WS(iis, 10)];
+		    TO = ii[WS(iis, 5)];
+		    TP = TN + TO;
+		    T26 = TN - TO;
+	       }
+	       Tm = Ti + Tl;
+	       T2B = T26 + T25;
+	       TQ = TM - TP;
+	       TV = TR + TU;
+	       TW = FNMS(KP382683432, TV, KP923879532 * TQ);
+	       T1a = FMA(KP382683432, TQ, KP923879532 * TV);
+	       {
+		    E T1A, T1B, T24, T27;
+		    T1A = TM + TP;
+		    T1B = TU - TR;
+		    T1C = FNMS(KP923879532, T1B, KP382683432 * T1A);
+		    T1L = FMA(KP923879532, T1A, KP382683432 * T1B);
+		    T24 = Ti - Tl;
+		    T27 = T25 - T26;
+		    T28 = T24 - T27;
+		    T2l = T24 + T27;
+	       }
+	  }
+	  {
+	       E Tp, TX, T15, T2a, Ts, T12, T10, T2b, T11, T16;
+	       {
+		    E Tn, To, T13, T14;
+		    Tn = ri[WS(ris, 1)];
+		    To = ri[WS(ris, 14)];
+		    Tp = Tn + To;
+		    TX = Tn - To;
+		    T13 = ii[WS(iis, 1)];
+		    T14 = ii[WS(iis, 14)];
+		    T15 = T13 + T14;
+		    T2a = T14 - T13;
+	       }
+	       {
+		    E Tq, Tr, TY, TZ;
+		    Tq = ri[WS(ris, 6)];
+		    Tr = ri[WS(ris, 9)];
+		    Ts = Tq + Tr;
+		    T12 = Tq - Tr;
+		    TY = ii[WS(iis, 6)];
+		    TZ = ii[WS(iis, 9)];
+		    T10 = TY + TZ;
+		    T2b = TY - TZ;
+	       }
+	       Tt = Tp + Ts;
+	       T2A = T2b + T2a;
+	       T11 = TX - T10;
+	       T16 = T12 - T15;
+	       T17 = FMA(KP923879532, T11, KP382683432 * T16);
+	       T1b = FNMS(KP382683432, T11, KP923879532 * T16);
+	       {
+		    E T1D, T1E, T29, T2c;
+		    T1D = TX + T10;
+		    T1E = T12 + T15;
+		    T1F = FNMS(KP923879532, T1E, KP382683432 * T1D);
+		    T1M = FMA(KP923879532, T1D, KP382683432 * T1E);
+		    T29 = Tp - Ts;
+		    T2c = T2a - T2b;
+		    T2d = T29 + T2c;
+		    T2m = T2c - T29;
+	       }
+	  }
+	  {
+	       E Tf, Tu, T2L, T2M, T2N, T2O;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       T2L = Tf - Tu;
+	       T2M = T2B + T2A;
+	       T2N = T2F - T2E;
+	       T2O = T2M + T2N;
+	       O[0] = KP2_000000000 * (Tf + Tu);
+	       O[WS(os, 16)] = KP2_000000000 * (T2N - T2M);
+	       O[WS(os, 8)] = KP1_414213562 * (T2L + T2O);
+	       O[WS(os, 24)] = KP1_414213562 * (T2O - T2L);
+	  }
+	  {
+	       E T2t, T2x, T2w, T2y;
+	       {
+		    E T2r, T2s, T2u, T2v;
+		    T2r = T1Z - T22;
+		    T2s = KP707106781 * (T2m - T2l);
+		    T2t = T2r + T2s;
+		    T2x = T2r - T2s;
+		    T2u = T2j + T2i;
+		    T2v = KP707106781 * (T28 - T2d);
+		    T2w = T2u - T2v;
+		    T2y = T2v + T2u;
+	       }
+	       O[WS(os, 6)] = FMA(KP1_662939224, T2t, KP1_111140466 * T2w);
+	       O[WS(os, 30)] = FNMS(KP1_961570560, T2x, KP390180644 * T2y);
+	       O[WS(os, 22)] = FNMS(KP1_111140466, T2t, KP1_662939224 * T2w);
+	       O[WS(os, 14)] = FMA(KP390180644, T2x, KP1_961570560 * T2y);
+	  }
+	  {
+	       E T2D, T2J, T2I, T2K;
+	       {
+		    E T2z, T2C, T2G, T2H;
+		    T2z = T7 - Te;
+		    T2C = T2A - T2B;
+		    T2D = T2z + T2C;
+		    T2J = T2z - T2C;
+		    T2G = T2E + T2F;
+		    T2H = Tm - Tt;
+		    T2I = T2G - T2H;
+		    T2K = T2H + T2G;
+	       }
+	       O[WS(os, 4)] = FMA(KP1_847759065, T2D, KP765366864 * T2I);
+	       O[WS(os, 28)] = FNMS(KP1_847759065, T2J, KP765366864 * T2K);
+	       O[WS(os, 20)] = FNMS(KP765366864, T2D, KP1_847759065 * T2I);
+	       O[WS(os, 12)] = FMA(KP765366864, T2J, KP1_847759065 * T2K);
+	  }
+	  {
+	       E T19, T1n, T1m, T1o;
+	       {
+		    E TL, T18, T1c, T1l;
+		    TL = Tz + TK;
+		    T18 = TW + T17;
+		    T19 = TL + T18;
+		    T1n = TL - T18;
+		    T1c = T1a + T1b;
+		    T1l = T1f + T1k;
+		    T1m = T1c + T1l;
+		    T1o = T1c - T1l;
+	       }
+	       O[WS(os, 1)] = FNMS(KP196034280, T1m, KP1_990369453 * T19);
+	       O[WS(os, 25)] = FNMS(KP1_546020906, T1n, KP1_268786568 * T1o);
+	       O[WS(os, 17)] = -(FMA(KP196034280, T19, KP1_990369453 * T1m));
+	       O[WS(os, 9)] = FMA(KP1_268786568, T1n, KP1_546020906 * T1o);
+	  }
+	  {
+	       E T1r, T1v, T1u, T1w;
+	       {
+		    E T1p, T1q, T1s, T1t;
+		    T1p = Tz - TK;
+		    T1q = T1b - T1a;
+		    T1r = T1p + T1q;
+		    T1v = T1p - T1q;
+		    T1s = T1f - T1k;
+		    T1t = TW - T17;
+		    T1u = T1s - T1t;
+		    T1w = T1t + T1s;
+	       }
+	       O[WS(os, 5)] = FMA(KP1_763842528, T1r, KP942793473 * T1u);
+	       O[WS(os, 29)] = FNMS(KP1_913880671, T1v, KP580569354 * T1w);
+	       O[WS(os, 21)] = FNMS(KP942793473, T1r, KP1_763842528 * T1u);
+	       O[WS(os, 13)] = FMA(KP580569354, T1v, KP1_913880671 * T1w);
+	  }
+	  {
+	       E T1T, T1X, T1W, T1Y;
+	       {
+		    E T1R, T1S, T1U, T1V;
+		    T1R = T1x + T1y;
+		    T1S = T1L + T1M;
+		    T1T = T1R - T1S;
+		    T1X = T1R + T1S;
+		    T1U = T1J + T1I;
+		    T1V = T1C - T1F;
+		    T1W = T1U - T1V;
+		    T1Y = T1V + T1U;
+	       }
+	       O[WS(os, 7)] = FMA(KP1_546020906, T1T, KP1_268786568 * T1W);
+	       O[WS(os, 31)] = FNMS(KP1_990369453, T1X, KP196034280 * T1Y);
+	       O[WS(os, 23)] = FNMS(KP1_268786568, T1T, KP1_546020906 * T1W);
+	       O[WS(os, 15)] = FMA(KP196034280, T1X, KP1_990369453 * T1Y);
+	  }
+	  {
+	       E T2f, T2p, T2o, T2q;
+	       {
+		    E T23, T2e, T2k, T2n;
+		    T23 = T1Z + T22;
+		    T2e = KP707106781 * (T28 + T2d);
+		    T2f = T23 + T2e;
+		    T2p = T23 - T2e;
+		    T2k = T2i - T2j;
+		    T2n = KP707106781 * (T2l + T2m);
+		    T2o = T2k - T2n;
+		    T2q = T2n + T2k;
+	       }
+	       O[WS(os, 2)] = FMA(KP1_961570560, T2f, KP390180644 * T2o);
+	       O[WS(os, 26)] = FNMS(KP1_662939224, T2p, KP1_111140466 * T2q);
+	       O[WS(os, 18)] = FNMS(KP390180644, T2f, KP1_961570560 * T2o);
+	       O[WS(os, 10)] = FMA(KP1_111140466, T2p, KP1_662939224 * T2q);
+	  }
+	  {
+	       E T1H, T1P, T1O, T1Q;
+	       {
+		    E T1z, T1G, T1K, T1N;
+		    T1z = T1x - T1y;
+		    T1G = T1C + T1F;
+		    T1H = T1z + T1G;
+		    T1P = T1z - T1G;
+		    T1K = T1I - T1J;
+		    T1N = T1L - T1M;
+		    T1O = T1K - T1N;
+		    T1Q = T1N + T1K;
+	       }
+	       O[WS(os, 3)] = FMA(KP1_913880671, T1H, KP580569354 * T1O);
+	       O[WS(os, 27)] = FNMS(KP1_763842528, T1P, KP942793473 * T1Q);
+	       O[WS(os, 19)] = FNMS(KP580569354, T1H, KP1_913880671 * T1O);
+	       O[WS(os, 11)] = FMA(KP942793473, T1P, KP1_763842528 * T1Q);
+	  }
+     }
+}
+
+static void mhc2rIII_32(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mhc2rIII_32_0(ri, ii, O, ris, iis, os);
+	  ri += ivs;
+	  ii += ivs;
+	  O += ovs;
+     }
+}
+
+static const khc2r_desc desc = { 32, "mhc2rIII_32", {138, 48, 36, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mhc2rIII_32) (planner *p) {
+     X(khc2rIII_register) (p, mhc2rIII_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/mhc2rIII_64.c b/src/fftw3/rdft/codelets/hc2r/mhc2rIII_64.c
new file mode 100644
index 0000000..42e7790
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/mhc2rIII_64.c
@@ -0,0 +1,819 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:12:24 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r_noinline -compact -variables 4 -sign 1 -n 64 -name mhc2rIII_64 -dft-III -include hc2rIII.h */
+
+/*
+ * This function contains 434 FP additions, 208 FP multiplications,
+ * (or, 342 additions, 116 multiplications, 92 fused multiply/add),
+ * 129 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mhc2rIII_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2rIII_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2rIII_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2rIII.h"
+
+static void mhc2rIII_64_0(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os)
+{
+     DK(KP1_343117909, +1.343117909694036801250753700854843606457501264);
+     DK(KP1_481902250, +1.481902250709918182351233794990325459457910619);
+     DK(KP1_807978586, +1.807978586246886663172400594461074097420264050);
+     DK(KP855110186, +0.855110186860564188641933713777597068609157259);
+     DK(KP1_997590912, +1.997590912410344785429543209518201388886407229);
+     DK(KP098135348, +0.098135348654836028509909953885365316629490726);
+     DK(KP673779706, +0.673779706784440101378506425238295140955533559);
+     DK(KP1_883088130, +1.883088130366041556825018805199004714371179592);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP1_191398608, +1.191398608984866686934073057659939779023852677);
+     DK(KP1_606415062, +1.606415062961289819613353025926283847759138854);
+     DK(KP1_715457220, +1.715457220000544139804539968569540274084981599);
+     DK(KP1_028205488, +1.028205488386443453187387677937631545216098241);
+     DK(KP1_978353019, +1.978353019929561946903347476032486127967379067);
+     DK(KP293460948, +0.293460948910723503317700259293435639412430633);
+     DK(KP485960359, +0.485960359806527779896548324154942236641981567);
+     DK(KP1_940062506, +1.940062506389087985207968414572200502913731924);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
+     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
+     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
+     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
+     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
+     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
+     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
+     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T15, T3t, T3U, T2N, Tf, T6b, T6u, T6R, T4L, T5J, T1g, T3V, T5q, T5U, T2I;
+	  E T3u, Tu, T6v, T4V, T5s, T6e, T6Q, T1s, T2D, T1D, T2E, T3B, T3Y, T4Q, T5r;
+	  E T3y, T3X, TK, T6g, T57, T5N, T6j, T6N, T1W, T34, T25, T35, T3J, T4j, T52;
+	  E T5M, T3G, T4i, TZ, T6l, T5i, T5Q, T6o, T6M, T2n, T37, T2w, T38, T3Q, T4m;
+	  E T5d, T5P, T3N, T4l;
+	  {
+	       E T3, T11, T2M, T5n, T6, T2J, T14, T5m, Ta, T16, T19, T4J, Td, T1b, T1e;
+	       E T4I;
+	       {
+		    E T1, T2, T2K, T2L;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 31)];
+		    T3 = T1 + T2;
+		    T11 = T1 - T2;
+		    T2K = ii[0];
+		    T2L = ii[WS(iis, 31)];
+		    T2M = T2K + T2L;
+		    T5n = T2L - T2K;
+	       }
+	       {
+		    E T4, T5, T12, T13;
+		    T4 = ri[WS(ris, 16)];
+		    T5 = ri[WS(ris, 15)];
+		    T6 = T4 + T5;
+		    T2J = T4 - T5;
+		    T12 = ii[WS(iis, 16)];
+		    T13 = ii[WS(iis, 15)];
+		    T14 = T12 + T13;
+		    T5m = T12 - T13;
+	       }
+	       {
+		    E T8, T9, T17, T18;
+		    T8 = ri[WS(ris, 8)];
+		    T9 = ri[WS(ris, 23)];
+		    Ta = T8 + T9;
+		    T16 = T8 - T9;
+		    T17 = ii[WS(iis, 8)];
+		    T18 = ii[WS(iis, 23)];
+		    T19 = T17 + T18;
+		    T4J = T17 - T18;
+	       }
+	       {
+		    E Tb, Tc, T1c, T1d;
+		    Tb = ri[WS(ris, 7)];
+		    Tc = ri[WS(ris, 24)];
+		    Td = Tb + Tc;
+		    T1b = Tb - Tc;
+		    T1c = ii[WS(iis, 7)];
+		    T1d = ii[WS(iis, 24)];
+		    T1e = T1c + T1d;
+		    T4I = T1d - T1c;
+	       }
+	       {
+		    E T7, Te, T1a, T1f;
+		    T15 = T11 - T14;
+		    T3t = T11 + T14;
+		    T3U = T2J - T2M;
+		    T2N = T2J + T2M;
+		    T7 = T3 + T6;
+		    Te = Ta + Td;
+		    Tf = T7 + Te;
+		    T6b = T7 - Te;
+		    {
+			 E T6s, T6t, T4H, T4K;
+			 T6s = T4J + T4I;
+			 T6t = T5n - T5m;
+			 T6u = T6s + T6t;
+			 T6R = T6t - T6s;
+			 T4H = T3 - T6;
+			 T4K = T4I - T4J;
+			 T4L = T4H + T4K;
+			 T5J = T4H - T4K;
+		    }
+		    T1a = T16 - T19;
+		    T1f = T1b - T1e;
+		    T1g = KP707106781 * (T1a + T1f);
+		    T3V = KP707106781 * (T1a - T1f);
+		    {
+			 E T5o, T5p, T2G, T2H;
+			 T5o = T5m + T5n;
+			 T5p = Ta - Td;
+			 T5q = T5o - T5p;
+			 T5U = T5p + T5o;
+			 T2G = T16 + T19;
+			 T2H = T1b + T1e;
+			 T2I = KP707106781 * (T2G - T2H);
+			 T3u = KP707106781 * (T2G + T2H);
+		    }
+	       }
+	  }
+	  {
+	       E Ti, T1i, T1q, T4N, Tl, T1n, T1l, T4O, Tp, T1t, T1B, T4S, Ts, T1y, T1w;
+	       E T4T;
+	       {
+		    E Tg, Th, T1o, T1p;
+		    Tg = ri[WS(ris, 4)];
+		    Th = ri[WS(ris, 27)];
+		    Ti = Tg + Th;
+		    T1i = Tg - Th;
+		    T1o = ii[WS(iis, 4)];
+		    T1p = ii[WS(iis, 27)];
+		    T1q = T1o + T1p;
+		    T4N = T1o - T1p;
+	       }
+	       {
+		    E Tj, Tk, T1j, T1k;
+		    Tj = ri[WS(ris, 20)];
+		    Tk = ri[WS(ris, 11)];
+		    Tl = Tj + Tk;
+		    T1n = Tj - Tk;
+		    T1j = ii[WS(iis, 20)];
+		    T1k = ii[WS(iis, 11)];
+		    T1l = T1j + T1k;
+		    T4O = T1j - T1k;
+	       }
+	       {
+		    E Tn, To, T1z, T1A;
+		    Tn = ri[WS(ris, 3)];
+		    To = ri[WS(ris, 28)];
+		    Tp = Tn + To;
+		    T1t = Tn - To;
+		    T1z = ii[WS(iis, 3)];
+		    T1A = ii[WS(iis, 28)];
+		    T1B = T1z + T1A;
+		    T4S = T1A - T1z;
+	       }
+	       {
+		    E Tq, Tr, T1u, T1v;
+		    Tq = ri[WS(ris, 12)];
+		    Tr = ri[WS(ris, 19)];
+		    Ts = Tq + Tr;
+		    T1y = Tq - Tr;
+		    T1u = ii[WS(iis, 12)];
+		    T1v = ii[WS(iis, 19)];
+		    T1w = T1u + T1v;
+		    T4T = T1u - T1v;
+	       }
+	       {
+		    E Tm, Tt, T4R, T4U;
+		    Tm = Ti + Tl;
+		    Tt = Tp + Ts;
+		    Tu = Tm + Tt;
+		    T6v = Tm - Tt;
+		    T4R = Tp - Ts;
+		    T4U = T4S - T4T;
+		    T4V = T4R + T4U;
+		    T5s = T4U - T4R;
+	       }
+	       {
+		    E T6c, T6d, T1m, T1r;
+		    T6c = T4T + T4S;
+		    T6d = T4O + T4N;
+		    T6e = T6c - T6d;
+		    T6Q = T6d + T6c;
+		    T1m = T1i - T1l;
+		    T1r = T1n + T1q;
+		    T1s = FNMS(KP382683432, T1r, KP923879532 * T1m);
+		    T2D = FMA(KP382683432, T1m, KP923879532 * T1r);
+	       }
+	       {
+		    E T1x, T1C, T3z, T3A;
+		    T1x = T1t - T1w;
+		    T1C = T1y - T1B;
+		    T1D = FMA(KP923879532, T1x, KP382683432 * T1C);
+		    T2E = FNMS(KP382683432, T1x, KP923879532 * T1C);
+		    T3z = T1t + T1w;
+		    T3A = T1y + T1B;
+		    T3B = FNMS(KP923879532, T3A, KP382683432 * T3z);
+		    T3Y = FMA(KP923879532, T3z, KP382683432 * T3A);
+	       }
+	       {
+		    E T4M, T4P, T3w, T3x;
+		    T4M = Ti - Tl;
+		    T4P = T4N - T4O;
+		    T4Q = T4M - T4P;
+		    T5r = T4M + T4P;
+		    T3w = T1i + T1l;
+		    T3x = T1q - T1n;
+		    T3y = FNMS(KP923879532, T3x, KP382683432 * T3w);
+		    T3X = FMA(KP923879532, T3w, KP382683432 * T3x);
+	       }
+	  }
+	  {
+	       E Ty, T1G, T23, T54, TB, T20, T1J, T55, TI, T4Z, T1U, T1Y, TF, T50, T1P;
+	       E T1X;
+	       {
+		    E Tw, Tx, T1H, T1I;
+		    Tw = ri[WS(ris, 2)];
+		    Tx = ri[WS(ris, 29)];
+		    Ty = Tw + Tx;
+		    T1G = Tw - Tx;
+		    {
+			 E T21, T22, Tz, TA;
+			 T21 = ii[WS(iis, 2)];
+			 T22 = ii[WS(iis, 29)];
+			 T23 = T21 + T22;
+			 T54 = T21 - T22;
+			 Tz = ri[WS(ris, 18)];
+			 TA = ri[WS(ris, 13)];
+			 TB = Tz + TA;
+			 T20 = Tz - TA;
+		    }
+		    T1H = ii[WS(iis, 18)];
+		    T1I = ii[WS(iis, 13)];
+		    T1J = T1H + T1I;
+		    T55 = T1H - T1I;
+		    {
+			 E TG, TH, T1Q, T1R, T1S, T1T;
+			 TG = ri[WS(ris, 5)];
+			 TH = ri[WS(ris, 26)];
+			 T1Q = TG - TH;
+			 T1R = ii[WS(iis, 5)];
+			 T1S = ii[WS(iis, 26)];
+			 T1T = T1R + T1S;
+			 TI = TG + TH;
+			 T4Z = T1S - T1R;
+			 T1U = T1Q - T1T;
+			 T1Y = T1Q + T1T;
+		    }
+		    {
+			 E TD, TE, T1L, T1M, T1N, T1O;
+			 TD = ri[WS(ris, 10)];
+			 TE = ri[WS(ris, 21)];
+			 T1L = TD - TE;
+			 T1M = ii[WS(iis, 10)];
+			 T1N = ii[WS(iis, 21)];
+			 T1O = T1M + T1N;
+			 TF = TD + TE;
+			 T50 = T1M - T1N;
+			 T1P = T1L - T1O;
+			 T1X = T1L + T1O;
+		    }
+	       }
+	       {
+		    E TC, TJ, T53, T56;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    T6g = TC - TJ;
+		    T53 = TF - TI;
+		    T56 = T54 - T55;
+		    T57 = T53 + T56;
+		    T5N = T56 - T53;
+	       }
+	       {
+		    E T6h, T6i, T1K, T1V;
+		    T6h = T55 + T54;
+		    T6i = T50 + T4Z;
+		    T6j = T6h - T6i;
+		    T6N = T6i + T6h;
+		    T1K = T1G - T1J;
+		    T1V = KP707106781 * (T1P + T1U);
+		    T1W = T1K + T1V;
+		    T34 = T1K - T1V;
+	       }
+	       {
+		    E T1Z, T24, T3H, T3I;
+		    T1Z = KP707106781 * (T1X - T1Y);
+		    T24 = T20 + T23;
+		    T25 = T1Z + T24;
+		    T35 = T24 - T1Z;
+		    T3H = KP707106781 * (T1P - T1U);
+		    T3I = T23 - T20;
+		    T3J = T3H + T3I;
+		    T4j = T3I - T3H;
+	       }
+	       {
+		    E T4Y, T51, T3E, T3F;
+		    T4Y = Ty - TB;
+		    T51 = T4Z - T50;
+		    T52 = T4Y + T51;
+		    T5M = T4Y - T51;
+		    T3E = T1G + T1J;
+		    T3F = KP707106781 * (T1X + T1Y);
+		    T3G = T3E - T3F;
+		    T4i = T3E + T3F;
+	       }
+	  }
+	  {
+	       E TN, T27, T2u, T5f, TQ, T2r, T2a, T5g, TX, T5a, T2l, T2p, TU, T5b, T2g;
+	       E T2o;
+	       {
+		    E TL, TM, T28, T29;
+		    TL = ri[WS(ris, 1)];
+		    TM = ri[WS(ris, 30)];
+		    TN = TL + TM;
+		    T27 = TL - TM;
+		    {
+			 E T2s, T2t, TO, TP;
+			 T2s = ii[WS(iis, 1)];
+			 T2t = ii[WS(iis, 30)];
+			 T2u = T2s + T2t;
+			 T5f = T2t - T2s;
+			 TO = ri[WS(ris, 14)];
+			 TP = ri[WS(ris, 17)];
+			 TQ = TO + TP;
+			 T2r = TO - TP;
+		    }
+		    T28 = ii[WS(iis, 14)];
+		    T29 = ii[WS(iis, 17)];
+		    T2a = T28 + T29;
+		    T5g = T28 - T29;
+		    {
+			 E TV, TW, T2h, T2i, T2j, T2k;
+			 TV = ri[WS(ris, 9)];
+			 TW = ri[WS(ris, 22)];
+			 T2h = TV - TW;
+			 T2i = ii[WS(iis, 9)];
+			 T2j = ii[WS(iis, 22)];
+			 T2k = T2i + T2j;
+			 TX = TV + TW;
+			 T5a = T2j - T2i;
+			 T2l = T2h - T2k;
+			 T2p = T2h + T2k;
+		    }
+		    {
+			 E TS, TT, T2c, T2d, T2e, T2f;
+			 TS = ri[WS(ris, 6)];
+			 TT = ri[WS(ris, 25)];
+			 T2c = TS - TT;
+			 T2d = ii[WS(iis, 6)];
+			 T2e = ii[WS(iis, 25)];
+			 T2f = T2d + T2e;
+			 TU = TS + TT;
+			 T5b = T2d - T2e;
+			 T2g = T2c - T2f;
+			 T2o = T2c + T2f;
+		    }
+	       }
+	       {
+		    E TR, TY, T5e, T5h;
+		    TR = TN + TQ;
+		    TY = TU + TX;
+		    TZ = TR + TY;
+		    T6l = TR - TY;
+		    T5e = TU - TX;
+		    T5h = T5f - T5g;
+		    T5i = T5e + T5h;
+		    T5Q = T5h - T5e;
+	       }
+	       {
+		    E T6m, T6n, T2b, T2m;
+		    T6m = T5g + T5f;
+		    T6n = T5b + T5a;
+		    T6o = T6m - T6n;
+		    T6M = T6n + T6m;
+		    T2b = T27 - T2a;
+		    T2m = KP707106781 * (T2g + T2l);
+		    T2n = T2b + T2m;
+		    T37 = T2b - T2m;
+	       }
+	       {
+		    E T2q, T2v, T3O, T3P;
+		    T2q = KP707106781 * (T2o - T2p);
+		    T2v = T2r - T2u;
+		    T2w = T2q + T2v;
+		    T38 = T2v - T2q;
+		    T3O = KP707106781 * (T2g - T2l);
+		    T3P = T2r + T2u;
+		    T3Q = T3O - T3P;
+		    T4m = T3O + T3P;
+	       }
+	       {
+		    E T59, T5c, T3L, T3M;
+		    T59 = TN - TQ;
+		    T5c = T5a - T5b;
+		    T5d = T59 + T5c;
+		    T5P = T59 - T5c;
+		    T3L = T27 + T2a;
+		    T3M = KP707106781 * (T2o + T2p);
+		    T3N = T3L - T3M;
+		    T4l = T3L + T3M;
+	       }
+	  }
+	  {
+	       E Tv, T10, T6X, T6Y, T6Z, T70;
+	       Tv = Tf + Tu;
+	       T10 = TK + TZ;
+	       T6X = Tv - T10;
+	       T6Y = T6N + T6M;
+	       T6Z = T6R - T6Q;
+	       T70 = T6Y + T6Z;
+	       O[0] = KP2_000000000 * (Tv + T10);
+	       O[WS(os, 32)] = KP2_000000000 * (T6Z - T6Y);
+	       O[WS(os, 16)] = KP1_414213562 * (T6X + T70);
+	       O[WS(os, 48)] = KP1_414213562 * (T70 - T6X);
+	  }
+	  {
+	       E T6P, T6V, T6U, T6W;
+	       {
+		    E T6L, T6O, T6S, T6T;
+		    T6L = Tf - Tu;
+		    T6O = T6M - T6N;
+		    T6P = T6L + T6O;
+		    T6V = T6L - T6O;
+		    T6S = T6Q + T6R;
+		    T6T = TK - TZ;
+		    T6U = T6S - T6T;
+		    T6W = T6T + T6S;
+	       }
+	       O[WS(os, 8)] = FMA(KP1_847759065, T6P, KP765366864 * T6U);
+	       O[WS(os, 56)] = FNMS(KP1_847759065, T6V, KP765366864 * T6W);
+	       O[WS(os, 40)] = FNMS(KP765366864, T6P, KP1_847759065 * T6U);
+	       O[WS(os, 24)] = FMA(KP765366864, T6V, KP1_847759065 * T6W);
+	  }
+	  {
+	       E T6f, T6w, T6G, T6D, T6z, T6E, T6q, T6H;
+	       T6f = T6b + T6e;
+	       T6w = T6u - T6v;
+	       T6G = T6v + T6u;
+	       T6D = T6b - T6e;
+	       {
+		    E T6x, T6y, T6k, T6p;
+		    T6x = T6g + T6j;
+		    T6y = T6o - T6l;
+		    T6z = KP707106781 * (T6x + T6y);
+		    T6E = KP707106781 * (T6y - T6x);
+		    T6k = T6g - T6j;
+		    T6p = T6l + T6o;
+		    T6q = KP707106781 * (T6k + T6p);
+		    T6H = KP707106781 * (T6k - T6p);
+	       }
+	       {
+		    E T6r, T6A, T6J, T6K;
+		    T6r = T6f + T6q;
+		    T6A = T6w - T6z;
+		    O[WS(os, 4)] = FMA(KP1_961570560, T6r, KP390180644 * T6A);
+		    O[WS(os, 36)] = FNMS(KP390180644, T6r, KP1_961570560 * T6A);
+		    T6J = T6D - T6E;
+		    T6K = T6H + T6G;
+		    O[WS(os, 28)] = FMA(KP390180644, T6J, KP1_961570560 * T6K);
+		    O[WS(os, 60)] = FNMS(KP1_961570560, T6J, KP390180644 * T6K);
+	       }
+	       {
+		    E T6B, T6C, T6F, T6I;
+		    T6B = T6f - T6q;
+		    T6C = T6z + T6w;
+		    O[WS(os, 20)] = FMA(KP1_111140466, T6B, KP1_662939224 * T6C);
+		    O[WS(os, 52)] = FNMS(KP1_662939224, T6B, KP1_111140466 * T6C);
+		    T6F = T6D + T6E;
+		    T6I = T6G - T6H;
+		    O[WS(os, 12)] = FMA(KP1_662939224, T6F, KP1_111140466 * T6I);
+		    O[WS(os, 44)] = FNMS(KP1_111140466, T6F, KP1_662939224 * T6I);
+	       }
+	  }
+	  {
+	       E T5L, T63, T5W, T66, T5S, T67, T5Z, T64, T5K, T5V;
+	       T5K = KP707106781 * (T5s - T5r);
+	       T5L = T5J + T5K;
+	       T63 = T5J - T5K;
+	       T5V = KP707106781 * (T4Q - T4V);
+	       T5W = T5U - T5V;
+	       T66 = T5V + T5U;
+	       {
+		    E T5O, T5R, T5X, T5Y;
+		    T5O = FNMS(KP923879532, T5N, KP382683432 * T5M);
+		    T5R = FMA(KP382683432, T5P, KP923879532 * T5Q);
+		    T5S = T5O + T5R;
+		    T67 = T5O - T5R;
+		    T5X = FMA(KP923879532, T5M, KP382683432 * T5N);
+		    T5Y = FNMS(KP923879532, T5P, KP382683432 * T5Q);
+		    T5Z = T5X + T5Y;
+		    T64 = T5Y - T5X;
+	       }
+	       {
+		    E T5T, T60, T69, T6a;
+		    T5T = T5L + T5S;
+		    T60 = T5W - T5Z;
+		    O[WS(os, 6)] = FMA(KP1_913880671, T5T, KP580569354 * T60);
+		    O[WS(os, 38)] = FNMS(KP580569354, T5T, KP1_913880671 * T60);
+		    T69 = T63 - T64;
+		    T6a = T67 + T66;
+		    O[WS(os, 30)] = FMA(KP196034280, T69, KP1_990369453 * T6a);
+		    O[WS(os, 62)] = FNMS(KP1_990369453, T69, KP196034280 * T6a);
+	       }
+	       {
+		    E T61, T62, T65, T68;
+		    T61 = T5L - T5S;
+		    T62 = T5Z + T5W;
+		    O[WS(os, 22)] = FMA(KP942793473, T61, KP1_763842528 * T62);
+		    O[WS(os, 54)] = FNMS(KP1_763842528, T61, KP942793473 * T62);
+		    T65 = T63 + T64;
+		    T68 = T66 - T67;
+		    O[WS(os, 14)] = FMA(KP1_546020906, T65, KP1_268786568 * T68);
+		    O[WS(os, 46)] = FNMS(KP1_268786568, T65, KP1_546020906 * T68);
+	       }
+	  }
+	  {
+	       E T4X, T5B, T5u, T5E, T5k, T5F, T5x, T5C, T4W, T5t;
+	       T4W = KP707106781 * (T4Q + T4V);
+	       T4X = T4L + T4W;
+	       T5B = T4L - T4W;
+	       T5t = KP707106781 * (T5r + T5s);
+	       T5u = T5q - T5t;
+	       T5E = T5t + T5q;
+	       {
+		    E T58, T5j, T5v, T5w;
+		    T58 = FNMS(KP382683432, T57, KP923879532 * T52);
+		    T5j = FMA(KP923879532, T5d, KP382683432 * T5i);
+		    T5k = T58 + T5j;
+		    T5F = T58 - T5j;
+		    T5v = FMA(KP382683432, T52, KP923879532 * T57);
+		    T5w = FNMS(KP382683432, T5d, KP923879532 * T5i);
+		    T5x = T5v + T5w;
+		    T5C = T5w - T5v;
+	       }
+	       {
+		    E T5l, T5y, T5H, T5I;
+		    T5l = T4X + T5k;
+		    T5y = T5u - T5x;
+		    O[WS(os, 2)] = FMA(KP1_990369453, T5l, KP196034280 * T5y);
+		    O[WS(os, 34)] = FNMS(KP196034280, T5l, KP1_990369453 * T5y);
+		    T5H = T5B - T5C;
+		    T5I = T5F + T5E;
+		    O[WS(os, 26)] = FMA(KP580569354, T5H, KP1_913880671 * T5I);
+		    O[WS(os, 58)] = FNMS(KP1_913880671, T5H, KP580569354 * T5I);
+	       }
+	       {
+		    E T5z, T5A, T5D, T5G;
+		    T5z = T4X - T5k;
+		    T5A = T5x + T5u;
+		    O[WS(os, 18)] = FMA(KP1_268786568, T5z, KP1_546020906 * T5A);
+		    O[WS(os, 50)] = FNMS(KP1_546020906, T5z, KP1_268786568 * T5A);
+		    T5D = T5B + T5C;
+		    T5G = T5E - T5F;
+		    O[WS(os, 10)] = FMA(KP1_763842528, T5D, KP942793473 * T5G);
+		    O[WS(os, 42)] = FNMS(KP942793473, T5D, KP1_763842528 * T5G);
+	       }
+	  }
+	  {
+	       E T33, T3l, T3h, T3m, T3a, T3p, T3e, T3o;
+	       {
+		    E T31, T32, T3f, T3g;
+		    T31 = T15 - T1g;
+		    T32 = T2E - T2D;
+		    T33 = T31 + T32;
+		    T3l = T31 - T32;
+		    T3f = FMA(KP831469612, T34, KP555570233 * T35);
+		    T3g = FNMS(KP831469612, T37, KP555570233 * T38);
+		    T3h = T3f + T3g;
+		    T3m = T3g - T3f;
+	       }
+	       {
+		    E T36, T39, T3c, T3d;
+		    T36 = FNMS(KP831469612, T35, KP555570233 * T34);
+		    T39 = FMA(KP555570233, T37, KP831469612 * T38);
+		    T3a = T36 + T39;
+		    T3p = T36 - T39;
+		    T3c = T2I - T2N;
+		    T3d = T1s - T1D;
+		    T3e = T3c - T3d;
+		    T3o = T3d + T3c;
+	       }
+	       {
+		    E T3b, T3i, T3r, T3s;
+		    T3b = T33 + T3a;
+		    T3i = T3e - T3h;
+		    O[WS(os, 5)] = FMA(KP1_940062506, T3b, KP485960359 * T3i);
+		    O[WS(os, 37)] = FNMS(KP485960359, T3b, KP1_940062506 * T3i);
+		    T3r = T3l - T3m;
+		    T3s = T3p + T3o;
+		    O[WS(os, 29)] = FMA(KP293460948, T3r, KP1_978353019 * T3s);
+		    O[WS(os, 61)] = FNMS(KP1_978353019, T3r, KP293460948 * T3s);
+	       }
+	       {
+		    E T3j, T3k, T3n, T3q;
+		    T3j = T33 - T3a;
+		    T3k = T3h + T3e;
+		    O[WS(os, 21)] = FMA(KP1_028205488, T3j, KP1_715457220 * T3k);
+		    O[WS(os, 53)] = FNMS(KP1_715457220, T3j, KP1_028205488 * T3k);
+		    T3n = T3l + T3m;
+		    T3q = T3o - T3p;
+		    O[WS(os, 13)] = FMA(KP1_606415062, T3n, KP1_191398608 * T3q);
+		    O[WS(os, 45)] = FNMS(KP1_191398608, T3n, KP1_606415062 * T3q);
+	       }
+	  }
+	  {
+	       E T4h, T4z, T4v, T4A, T4o, T4D, T4s, T4C;
+	       {
+		    E T4f, T4g, T4t, T4u;
+		    T4f = T3t + T3u;
+		    T4g = T3X + T3Y;
+		    T4h = T4f - T4g;
+		    T4z = T4f + T4g;
+		    T4t = FMA(KP980785280, T4i, KP195090322 * T4j);
+		    T4u = FMA(KP980785280, T4l, KP195090322 * T4m);
+		    T4v = T4t - T4u;
+		    T4A = T4t + T4u;
+	       }
+	       {
+		    E T4k, T4n, T4q, T4r;
+		    T4k = FNMS(KP980785280, T4j, KP195090322 * T4i);
+		    T4n = FNMS(KP980785280, T4m, KP195090322 * T4l);
+		    T4o = T4k + T4n;
+		    T4D = T4k - T4n;
+		    T4q = T3V + T3U;
+		    T4r = T3y - T3B;
+		    T4s = T4q - T4r;
+		    T4C = T4r + T4q;
+	       }
+	       {
+		    E T4p, T4w, T4F, T4G;
+		    T4p = T4h + T4o;
+		    T4w = T4s - T4v;
+		    O[WS(os, 7)] = FMA(KP1_883088130, T4p, KP673779706 * T4w);
+		    O[WS(os, 39)] = FNMS(KP673779706, T4p, KP1_883088130 * T4w);
+		    T4F = T4z + T4A;
+		    T4G = T4D + T4C;
+		    O[WS(os, 31)] = FMA(KP098135348, T4F, KP1_997590912 * T4G);
+		    O[WS(os, 63)] = FNMS(KP1_997590912, T4F, KP098135348 * T4G);
+	       }
+	       {
+		    E T4x, T4y, T4B, T4E;
+		    T4x = T4h - T4o;
+		    T4y = T4v + T4s;
+		    O[WS(os, 23)] = FMA(KP855110186, T4x, KP1_807978586 * T4y);
+		    O[WS(os, 55)] = FNMS(KP1_807978586, T4x, KP855110186 * T4y);
+		    T4B = T4z - T4A;
+		    T4E = T4C - T4D;
+		    O[WS(os, 15)] = FMA(KP1_481902250, T4B, KP1_343117909 * T4E);
+		    O[WS(os, 47)] = FNMS(KP1_343117909, T4B, KP1_481902250 * T4E);
+	       }
+	  }
+	  {
+	       E T1F, T2T, T2P, T2W, T2y, T2X, T2C, T2U;
+	       {
+		    E T1h, T1E, T2F, T2O;
+		    T1h = T15 + T1g;
+		    T1E = T1s + T1D;
+		    T1F = T1h + T1E;
+		    T2T = T1h - T1E;
+		    T2F = T2D + T2E;
+		    T2O = T2I + T2N;
+		    T2P = T2F + T2O;
+		    T2W = T2F - T2O;
+	       }
+	       {
+		    E T26, T2x, T2A, T2B;
+		    T26 = FNMS(KP195090322, T25, KP980785280 * T1W);
+		    T2x = FMA(KP980785280, T2n, KP195090322 * T2w);
+		    T2y = T26 + T2x;
+		    T2X = T26 - T2x;
+		    T2A = FMA(KP195090322, T1W, KP980785280 * T25);
+		    T2B = FNMS(KP195090322, T2n, KP980785280 * T2w);
+		    T2C = T2A + T2B;
+		    T2U = T2B - T2A;
+	       }
+	       {
+		    E T2z, T2Q, T2Z, T30;
+		    T2z = T1F + T2y;
+		    T2Q = T2C + T2P;
+		    O[WS(os, 1)] = FNMS(KP098135348, T2Q, KP1_997590912 * T2z);
+		    O[WS(os, 33)] = -(FMA(KP098135348, T2z, KP1_997590912 * T2Q));
+		    T2Z = T2T - T2U;
+		    T30 = T2X + T2W;
+		    O[WS(os, 25)] = FMA(KP673779706, T2Z, KP1_883088130 * T30);
+		    O[WS(os, 57)] = FNMS(KP1_883088130, T2Z, KP673779706 * T30);
+	       }
+	       {
+		    E T2R, T2S, T2V, T2Y;
+		    T2R = T1F - T2y;
+		    T2S = T2C - T2P;
+		    O[WS(os, 17)] = FMA(KP1_343117909, T2R, KP1_481902250 * T2S);
+		    O[WS(os, 49)] = FNMS(KP1_481902250, T2R, KP1_343117909 * T2S);
+		    T2V = T2T + T2U;
+		    T2Y = T2W - T2X;
+		    O[WS(os, 9)] = FMA(KP1_807978586, T2V, KP855110186 * T2Y);
+		    O[WS(os, 41)] = FNMS(KP855110186, T2V, KP1_807978586 * T2Y);
+	       }
+	  }
+	  {
+	       E T3D, T47, T43, T48, T3S, T4b, T40, T4a;
+	       {
+		    E T3v, T3C, T41, T42;
+		    T3v = T3t - T3u;
+		    T3C = T3y + T3B;
+		    T3D = T3v + T3C;
+		    T47 = T3v - T3C;
+		    T41 = FMA(KP555570233, T3G, KP831469612 * T3J);
+		    T42 = FNMS(KP555570233, T3N, KP831469612 * T3Q);
+		    T43 = T41 + T42;
+		    T48 = T42 - T41;
+	       }
+	       {
+		    E T3K, T3R, T3W, T3Z;
+		    T3K = FNMS(KP555570233, T3J, KP831469612 * T3G);
+		    T3R = FMA(KP831469612, T3N, KP555570233 * T3Q);
+		    T3S = T3K + T3R;
+		    T4b = T3K - T3R;
+		    T3W = T3U - T3V;
+		    T3Z = T3X - T3Y;
+		    T40 = T3W - T3Z;
+		    T4a = T3Z + T3W;
+	       }
+	       {
+		    E T3T, T44, T4d, T4e;
+		    T3T = T3D + T3S;
+		    T44 = T40 - T43;
+		    O[WS(os, 3)] = FMA(KP1_978353019, T3T, KP293460948 * T44);
+		    O[WS(os, 35)] = FNMS(KP293460948, T3T, KP1_978353019 * T44);
+		    T4d = T47 - T48;
+		    T4e = T4b + T4a;
+		    O[WS(os, 27)] = FMA(KP485960359, T4d, KP1_940062506 * T4e);
+		    O[WS(os, 59)] = FNMS(KP1_940062506, T4d, KP485960359 * T4e);
+	       }
+	       {
+		    E T45, T46, T49, T4c;
+		    T45 = T3D - T3S;
+		    T46 = T43 + T40;
+		    O[WS(os, 19)] = FMA(KP1_191398608, T45, KP1_606415062 * T46);
+		    O[WS(os, 51)] = FNMS(KP1_606415062, T45, KP1_191398608 * T46);
+		    T49 = T47 + T48;
+		    T4c = T4a - T4b;
+		    O[WS(os, 11)] = FMA(KP1_715457220, T49, KP1_028205488 * T4c);
+		    O[WS(os, 43)] = FNMS(KP1_028205488, T49, KP1_715457220 * T4c);
+	       }
+	  }
+     }
+}
+
+static void mhc2rIII_64(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mhc2rIII_64_0(ri, ii, O, ris, iis, os);
+	  ri += ivs;
+	  ii += ivs;
+	  O += ovs;
+     }
+}
+
+static const khc2r_desc desc = { 64, "mhc2rIII_64", {342, 116, 92, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mhc2rIII_64) (planner *p) {
+     X(khc2rIII_register) (p, mhc2rIII_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/mhc2r_128.c b/src/fftw3/rdft/codelets/hc2r/mhc2r_128.c
new file mode 100644
index 0000000..27209a3
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/mhc2r_128.c
@@ -0,0 +1,1652 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:20 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r_noinline -compact -variables 4 -sign 1 -n 128 -name mhc2r_128 -include hc2r.h */
+
+/*
+ * This function contains 956 FP additions, 342 FP multiplications,
+ * (or, 812 additions, 198 multiplications, 144 fused multiply/add),
+ * 197 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mhc2r_128.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2r_128.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2r_128.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void mhc2r_128_0(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os)
+{
+     DK(KP1_028205488, +1.028205488386443453187387677937631545216098241);
+     DK(KP1_715457220, +1.715457220000544139804539968569540274084981599);
+     DK(KP1_606415062, +1.606415062961289819613353025926283847759138854);
+     DK(KP1_191398608, +1.191398608984866686934073057659939779023852677);
+     DK(KP1_940062506, +1.940062506389087985207968414572200502913731924);
+     DK(KP485960359, +0.485960359806527779896548324154942236641981567);
+     DK(KP293460948, +0.293460948910723503317700259293435639412430633);
+     DK(KP1_978353019, +1.978353019929561946903347476032486127967379067);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP855110186, +0.855110186860564188641933713777597068609157259);
+     DK(KP1_807978586, +1.807978586246886663172400594461074097420264050);
+     DK(KP1_481902250, +1.481902250709918182351233794990325459457910619);
+     DK(KP1_343117909, +1.343117909694036801250753700854843606457501264);
+     DK(KP1_883088130, +1.883088130366041556825018805199004714371179592);
+     DK(KP673779706, +0.673779706784440101378506425238295140955533559);
+     DK(KP098135348, +0.098135348654836028509909953885365316629490726);
+     DK(KP1_997590912, +1.997590912410344785429543209518201388886407229);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
+     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
+     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
+     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
+     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
+     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
+     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     {
+	  E Ta, T6q, T2a, T5k, T8x, Tbx, TcF, Ten, Th, T6r, T2j, T5l, T8E, Tby, TcI;
+	  E Teo, Tx, T6t, TcM, Teq, TcP, Ter, T2t, T5n, T2C, T5o, T8Q, TbA, T8X, TbB;
+	  E T6w, T7L, T1j, T6L, Tde, TeC, TdL, TeR, T3v, T5z, T4I, T5O, T9O, TbM, TaV;
+	  E Tc1, T78, T7Z, TN, T6z, TcU, Teu, Td8, Tey, T2N, T5r, T3j, T5v, T9a, TbE;
+	  E T9A, TbI, T6H, T7O, T1O, T7V, T48, T4u, Tds, TeG, T5E, T5K, Taf, TbP, Tdp;
+	  E TeF, T6U, T72, Tam, TbQ, T23, T7U, T4r, T4v, Tdz, TeJ, T5H, T5L, Tay, TbS;
+	  E Tdw, TeI, T6Z, T73, TaF, TbT, T1y, T75, Tdl, TeQ, TdI, TeD, T3O, T5N, T4z;
+	  E T5A, Ta3, Tc0, TaO, TbN, T6O, T80, T12, T6E, Td1, Tex, Td5, Tev, T36, T5u;
+	  E T3a, T5s, T9p, TbH, T9t, TbF, T6C, T7P;
+	  {
+	       E T5, T8s, T3, T8q, T9, T8u, T29, T8v, T6, T26;
+	       {
+		    E T4, T8r, T1, T2;
+		    T4 = ri[WS(ris, 32)];
+		    T5 = KP2_000000000 * T4;
+		    T8r = ii[WS(iis, 32)];
+		    T8s = KP2_000000000 * T8r;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 64)];
+		    T3 = T1 + T2;
+		    T8q = T1 - T2;
+		    {
+			 E T7, T8, T27, T28;
+			 T7 = ri[WS(ris, 16)];
+			 T8 = ri[WS(ris, 48)];
+			 T9 = KP2_000000000 * (T7 + T8);
+			 T8u = T7 - T8;
+			 T27 = ii[WS(iis, 16)];
+			 T28 = ii[WS(iis, 48)];
+			 T29 = KP2_000000000 * (T27 - T28);
+			 T8v = T27 + T28;
+		    }
+	       }
+	       T6 = T3 + T5;
+	       Ta = T6 + T9;
+	       T6q = T6 - T9;
+	       T26 = T3 - T5;
+	       T2a = T26 - T29;
+	       T5k = T26 + T29;
+	       {
+		    E T8t, T8w, TcD, TcE;
+		    T8t = T8q - T8s;
+		    T8w = KP1_414213562 * (T8u - T8v);
+		    T8x = T8t + T8w;
+		    Tbx = T8t - T8w;
+		    TcD = T8q + T8s;
+		    TcE = KP1_414213562 * (T8u + T8v);
+		    TcF = TcD - TcE;
+		    Ten = TcD + TcE;
+	       }
+	  }
+	  {
+	       E Td, T8y, T2e, T8C, Tg, T8B, T2h, T8z, T2b, T2i;
+	       {
+		    E Tb, Tc, T2c, T2d;
+		    Tb = ri[WS(ris, 8)];
+		    Tc = ri[WS(ris, 56)];
+		    Td = Tb + Tc;
+		    T8y = Tb - Tc;
+		    T2c = ii[WS(iis, 8)];
+		    T2d = ii[WS(iis, 56)];
+		    T2e = T2c - T2d;
+		    T8C = T2c + T2d;
+	       }
+	       {
+		    E Te, Tf, T2f, T2g;
+		    Te = ri[WS(ris, 40)];
+		    Tf = ri[WS(ris, 24)];
+		    Tg = Te + Tf;
+		    T8B = Te - Tf;
+		    T2f = ii[WS(iis, 40)];
+		    T2g = ii[WS(iis, 24)];
+		    T2h = T2f - T2g;
+		    T8z = T2f + T2g;
+	       }
+	       Th = KP2_000000000 * (Td + Tg);
+	       T6r = KP2_000000000 * (T2h + T2e);
+	       T2b = Td - Tg;
+	       T2i = T2e - T2h;
+	       T2j = KP1_414213562 * (T2b - T2i);
+	       T5l = KP1_414213562 * (T2b + T2i);
+	       {
+		    E T8A, T8D, TcG, TcH;
+		    T8A = T8y - T8z;
+		    T8D = T8B + T8C;
+		    T8E = FNMS(KP765366864, T8D, KP1_847759065 * T8A);
+		    Tby = FMA(KP765366864, T8A, KP1_847759065 * T8D);
+		    TcG = T8y + T8z;
+		    TcH = T8C - T8B;
+		    TcI = FNMS(KP1_847759065, TcH, KP765366864 * TcG);
+		    Teo = FMA(KP1_847759065, TcG, KP765366864 * TcH);
+	       }
+	  }
+	  {
+	       E Tl, T8G, T2x, T8V, To, T8U, T2A, T8H, Tv, T8S, T2o, T8O, Ts, T8R, T2r;
+	       E T8L;
+	       {
+		    E Tj, Tk, T2y, T2z;
+		    Tj = ri[WS(ris, 4)];
+		    Tk = ri[WS(ris, 60)];
+		    Tl = Tj + Tk;
+		    T8G = Tj - Tk;
+		    {
+			 E T2v, T2w, Tm, Tn;
+			 T2v = ii[WS(iis, 4)];
+			 T2w = ii[WS(iis, 60)];
+			 T2x = T2v - T2w;
+			 T8V = T2v + T2w;
+			 Tm = ri[WS(ris, 36)];
+			 Tn = ri[WS(ris, 28)];
+			 To = Tm + Tn;
+			 T8U = Tm - Tn;
+		    }
+		    T2y = ii[WS(iis, 36)];
+		    T2z = ii[WS(iis, 28)];
+		    T2A = T2y - T2z;
+		    T8H = T2y + T2z;
+		    {
+			 E Tt, Tu, T8M, T2m, T2n, T8N;
+			 Tt = ri[WS(ris, 12)];
+			 Tu = ri[WS(ris, 52)];
+			 T8M = Tt - Tu;
+			 T2m = ii[WS(iis, 52)];
+			 T2n = ii[WS(iis, 12)];
+			 T8N = T2n + T2m;
+			 Tv = Tt + Tu;
+			 T8S = T8M + T8N;
+			 T2o = T2m - T2n;
+			 T8O = T8M - T8N;
+		    }
+		    {
+			 E Tq, Tr, T8J, T2p, T2q, T8K;
+			 Tq = ri[WS(ris, 20)];
+			 Tr = ri[WS(ris, 44)];
+			 T8J = Tq - Tr;
+			 T2p = ii[WS(iis, 20)];
+			 T2q = ii[WS(iis, 44)];
+			 T8K = T2p + T2q;
+			 Ts = Tq + Tr;
+			 T8R = T8J + T8K;
+			 T2r = T2p - T2q;
+			 T8L = T8J - T8K;
+		    }
+	       }
+	       {
+		    E Tp, Tw, TcK, TcL;
+		    Tp = Tl + To;
+		    Tw = Ts + Tv;
+		    Tx = KP2_000000000 * (Tp + Tw);
+		    T6t = Tp - Tw;
+		    TcK = T8G + T8H;
+		    TcL = KP707106781 * (T8R + T8S);
+		    TcM = TcK - TcL;
+		    Teq = TcK + TcL;
+	       }
+	       {
+		    E TcN, TcO, T2l, T2s;
+		    TcN = KP707106781 * (T8L - T8O);
+		    TcO = T8V - T8U;
+		    TcP = TcN + TcO;
+		    Ter = TcO - TcN;
+		    T2l = Tl - To;
+		    T2s = T2o - T2r;
+		    T2t = T2l + T2s;
+		    T5n = T2l - T2s;
+	       }
+	       {
+		    E T2u, T2B, T8I, T8P;
+		    T2u = Ts - Tv;
+		    T2B = T2x - T2A;
+		    T2C = T2u + T2B;
+		    T5o = T2B - T2u;
+		    T8I = T8G - T8H;
+		    T8P = KP707106781 * (T8L + T8O);
+		    T8Q = T8I + T8P;
+		    TbA = T8I - T8P;
+	       }
+	       {
+		    E T8T, T8W, T6u, T6v;
+		    T8T = KP707106781 * (T8R - T8S);
+		    T8W = T8U + T8V;
+		    T8X = T8T + T8W;
+		    TbB = T8W - T8T;
+		    T6u = T2A + T2x;
+		    T6v = T2r + T2o;
+		    T6w = T6u - T6v;
+		    T7L = KP2_000000000 * (T6v + T6u);
+	       }
+	  }
+	  {
+	       E T17, T9E, T4D, TaT, T1a, TaS, T4G, T9F, T1h, TaQ, T3q, T9M, T1e, TaP, T3t;
+	       E T9J;
+	       {
+		    E T15, T16, T4E, T4F;
+		    T15 = ri[WS(ris, 1)];
+		    T16 = ri[WS(ris, 63)];
+		    T17 = T15 + T16;
+		    T9E = T15 - T16;
+		    {
+			 E T4B, T4C, T18, T19;
+			 T4B = ii[WS(iis, 1)];
+			 T4C = ii[WS(iis, 63)];
+			 T4D = T4B - T4C;
+			 TaT = T4B + T4C;
+			 T18 = ri[WS(ris, 33)];
+			 T19 = ri[WS(ris, 31)];
+			 T1a = T18 + T19;
+			 TaS = T18 - T19;
+		    }
+		    T4E = ii[WS(iis, 33)];
+		    T4F = ii[WS(iis, 31)];
+		    T4G = T4E - T4F;
+		    T9F = T4E + T4F;
+		    {
+			 E T1f, T1g, T9K, T3o, T3p, T9L;
+			 T1f = ri[WS(ris, 15)];
+			 T1g = ri[WS(ris, 49)];
+			 T9K = T1f - T1g;
+			 T3o = ii[WS(iis, 49)];
+			 T3p = ii[WS(iis, 15)];
+			 T9L = T3p + T3o;
+			 T1h = T1f + T1g;
+			 TaQ = T9K + T9L;
+			 T3q = T3o - T3p;
+			 T9M = T9K - T9L;
+		    }
+		    {
+			 E T1c, T1d, T9H, T3r, T3s, T9I;
+			 T1c = ri[WS(ris, 17)];
+			 T1d = ri[WS(ris, 47)];
+			 T9H = T1c - T1d;
+			 T3r = ii[WS(iis, 17)];
+			 T3s = ii[WS(iis, 47)];
+			 T9I = T3r + T3s;
+			 T1e = T1c + T1d;
+			 TaP = T9H + T9I;
+			 T3t = T3r - T3s;
+			 T9J = T9H - T9I;
+		    }
+	       }
+	       {
+		    E T1b, T1i, Tdc, Tdd;
+		    T1b = T17 + T1a;
+		    T1i = T1e + T1h;
+		    T1j = T1b + T1i;
+		    T6L = T1b - T1i;
+		    Tdc = T9E + T9F;
+		    Tdd = KP707106781 * (TaP + TaQ);
+		    Tde = Tdc - Tdd;
+		    TeC = Tdc + Tdd;
+	       }
+	       {
+		    E TdJ, TdK, T3n, T3u;
+		    TdJ = KP707106781 * (T9J - T9M);
+		    TdK = TaT - TaS;
+		    TdL = TdJ + TdK;
+		    TeR = TdK - TdJ;
+		    T3n = T17 - T1a;
+		    T3u = T3q - T3t;
+		    T3v = T3n + T3u;
+		    T5z = T3n - T3u;
+	       }
+	       {
+		    E T4A, T4H, T9G, T9N;
+		    T4A = T1e - T1h;
+		    T4H = T4D - T4G;
+		    T4I = T4A + T4H;
+		    T5O = T4H - T4A;
+		    T9G = T9E - T9F;
+		    T9N = KP707106781 * (T9J + T9M);
+		    T9O = T9G + T9N;
+		    TbM = T9G - T9N;
+	       }
+	       {
+		    E TaR, TaU, T76, T77;
+		    TaR = KP707106781 * (TaP - TaQ);
+		    TaU = TaS + TaT;
+		    TaV = TaR + TaU;
+		    Tc1 = TaU - TaR;
+		    T76 = T4G + T4D;
+		    T77 = T3t + T3q;
+		    T78 = T76 - T77;
+		    T7Z = T77 + T76;
+	       }
+	  }
+	  {
+	       E TB, T90, T3e, T9y, TE, T9x, T3h, T91, TL, T9v, T2I, T98, TI, T9u, T2L;
+	       E T95;
+	       {
+		    E Tz, TA, T3f, T3g;
+		    Tz = ri[WS(ris, 2)];
+		    TA = ri[WS(ris, 62)];
+		    TB = Tz + TA;
+		    T90 = Tz - TA;
+		    {
+			 E T3c, T3d, TC, TD;
+			 T3c = ii[WS(iis, 2)];
+			 T3d = ii[WS(iis, 62)];
+			 T3e = T3c - T3d;
+			 T9y = T3c + T3d;
+			 TC = ri[WS(ris, 34)];
+			 TD = ri[WS(ris, 30)];
+			 TE = TC + TD;
+			 T9x = TC - TD;
+		    }
+		    T3f = ii[WS(iis, 34)];
+		    T3g = ii[WS(iis, 30)];
+		    T3h = T3f - T3g;
+		    T91 = T3f + T3g;
+		    {
+			 E TJ, TK, T96, T2G, T2H, T97;
+			 TJ = ri[WS(ris, 14)];
+			 TK = ri[WS(ris, 50)];
+			 T96 = TJ - TK;
+			 T2G = ii[WS(iis, 50)];
+			 T2H = ii[WS(iis, 14)];
+			 T97 = T2H + T2G;
+			 TL = TJ + TK;
+			 T9v = T96 + T97;
+			 T2I = T2G - T2H;
+			 T98 = T96 - T97;
+		    }
+		    {
+			 E TG, TH, T93, T2J, T2K, T94;
+			 TG = ri[WS(ris, 18)];
+			 TH = ri[WS(ris, 46)];
+			 T93 = TG - TH;
+			 T2J = ii[WS(iis, 18)];
+			 T2K = ii[WS(iis, 46)];
+			 T94 = T2J + T2K;
+			 TI = TG + TH;
+			 T9u = T93 + T94;
+			 T2L = T2J - T2K;
+			 T95 = T93 - T94;
+		    }
+	       }
+	       {
+		    E TF, TM, TcS, TcT;
+		    TF = TB + TE;
+		    TM = TI + TL;
+		    TN = TF + TM;
+		    T6z = TF - TM;
+		    TcS = T90 + T91;
+		    TcT = KP707106781 * (T9u + T9v);
+		    TcU = TcS - TcT;
+		    Teu = TcS + TcT;
+	       }
+	       {
+		    E Td6, Td7, T2F, T2M;
+		    Td6 = KP707106781 * (T95 - T98);
+		    Td7 = T9y - T9x;
+		    Td8 = Td6 + Td7;
+		    Tey = Td7 - Td6;
+		    T2F = TB - TE;
+		    T2M = T2I - T2L;
+		    T2N = T2F + T2M;
+		    T5r = T2F - T2M;
+	       }
+	       {
+		    E T3b, T3i, T92, T99;
+		    T3b = TI - TL;
+		    T3i = T3e - T3h;
+		    T3j = T3b + T3i;
+		    T5v = T3i - T3b;
+		    T92 = T90 - T91;
+		    T99 = KP707106781 * (T95 + T98);
+		    T9a = T92 + T99;
+		    TbE = T92 - T99;
+	       }
+	       {
+		    E T9w, T9z, T6F, T6G;
+		    T9w = KP707106781 * (T9u - T9v);
+		    T9z = T9x + T9y;
+		    T9A = T9w + T9z;
+		    TbI = T9z - T9w;
+		    T6F = T3h + T3e;
+		    T6G = T2L + T2I;
+		    T6H = T6F - T6G;
+		    T7O = T6G + T6F;
+	       }
+	  }
+	  {
+	       E T1G, Taj, T3Q, Ta5, T46, Tak, T6R, Ta6, T1N, Tag, Tah, T3X, T3Z, Taa, Tad;
+	       E T6S, Tdn, Tdo;
+	       {
+		    E T1A, T1B, T1C, T1D, T1E, T1F;
+		    T1A = ri[WS(ris, 5)];
+		    T1B = ri[WS(ris, 59)];
+		    T1C = T1A + T1B;
+		    T1D = ri[WS(ris, 37)];
+		    T1E = ri[WS(ris, 27)];
+		    T1F = T1D + T1E;
+		    T1G = T1C + T1F;
+		    Taj = T1D - T1E;
+		    T3Q = T1C - T1F;
+		    Ta5 = T1A - T1B;
+	       }
+	       {
+		    E T40, T41, T42, T43, T44, T45;
+		    T40 = ii[WS(iis, 5)];
+		    T41 = ii[WS(iis, 59)];
+		    T42 = T40 - T41;
+		    T43 = ii[WS(iis, 37)];
+		    T44 = ii[WS(iis, 27)];
+		    T45 = T43 - T44;
+		    T46 = T42 - T45;
+		    Tak = T40 + T41;
+		    T6R = T45 + T42;
+		    Ta6 = T43 + T44;
+	       }
+	       {
+		    E T1J, Ta8, T3W, Ta9, T1M, Tab, T3T, Tac;
+		    {
+			 E T1H, T1I, T3U, T3V;
+			 T1H = ri[WS(ris, 21)];
+			 T1I = ri[WS(ris, 43)];
+			 T1J = T1H + T1I;
+			 Ta8 = T1H - T1I;
+			 T3U = ii[WS(iis, 21)];
+			 T3V = ii[WS(iis, 43)];
+			 T3W = T3U - T3V;
+			 Ta9 = T3U + T3V;
+		    }
+		    {
+			 E T1K, T1L, T3R, T3S;
+			 T1K = ri[WS(ris, 11)];
+			 T1L = ri[WS(ris, 53)];
+			 T1M = T1K + T1L;
+			 Tab = T1K - T1L;
+			 T3R = ii[WS(iis, 53)];
+			 T3S = ii[WS(iis, 11)];
+			 T3T = T3R - T3S;
+			 Tac = T3S + T3R;
+		    }
+		    T1N = T1J + T1M;
+		    Tag = Ta8 + Ta9;
+		    Tah = Tab + Tac;
+		    T3X = T3T - T3W;
+		    T3Z = T1J - T1M;
+		    Taa = Ta8 - Ta9;
+		    Tad = Tab - Tac;
+		    T6S = T3W + T3T;
+	       }
+	       T1O = T1G + T1N;
+	       T7V = T6S + T6R;
+	       {
+		    E T3Y, T47, Tdq, Tdr;
+		    T3Y = T3Q + T3X;
+		    T47 = T3Z + T46;
+		    T48 = FNMS(KP382683432, T47, KP923879532 * T3Y);
+		    T4u = FMA(KP382683432, T3Y, KP923879532 * T47);
+		    Tdq = KP707106781 * (Taa - Tad);
+		    Tdr = Tak - Taj;
+		    Tds = Tdq + Tdr;
+		    TeG = Tdr - Tdq;
+	       }
+	       {
+		    E T5C, T5D, Ta7, Tae;
+		    T5C = T3Q - T3X;
+		    T5D = T46 - T3Z;
+		    T5E = FNMS(KP923879532, T5D, KP382683432 * T5C);
+		    T5K = FMA(KP923879532, T5C, KP382683432 * T5D);
+		    Ta7 = Ta5 - Ta6;
+		    Tae = KP707106781 * (Taa + Tad);
+		    Taf = Ta7 + Tae;
+		    TbP = Ta7 - Tae;
+	       }
+	       Tdn = Ta5 + Ta6;
+	       Tdo = KP707106781 * (Tag + Tah);
+	       Tdp = Tdn - Tdo;
+	       TeF = Tdn + Tdo;
+	       {
+		    E T6Q, T6T, Tai, Tal;
+		    T6Q = T1G - T1N;
+		    T6T = T6R - T6S;
+		    T6U = T6Q - T6T;
+		    T72 = T6Q + T6T;
+		    Tai = KP707106781 * (Tag - Tah);
+		    Tal = Taj + Tak;
+		    Tam = Tai + Tal;
+		    TbQ = Tal - Tai;
+	       }
+	  }
+	  {
+	       E T1V, TaC, T49, Tao, T4p, TaD, T6W, Tap, T22, Taz, TaA, T4g, T4i, Tat, Taw;
+	       E T6X, Tdu, Tdv;
+	       {
+		    E T1P, T1Q, T1R, T1S, T1T, T1U;
+		    T1P = ri[WS(ris, 3)];
+		    T1Q = ri[WS(ris, 61)];
+		    T1R = T1P + T1Q;
+		    T1S = ri[WS(ris, 29)];
+		    T1T = ri[WS(ris, 35)];
+		    T1U = T1S + T1T;
+		    T1V = T1R + T1U;
+		    TaC = T1S - T1T;
+		    T49 = T1R - T1U;
+		    Tao = T1P - T1Q;
+	       }
+	       {
+		    E T4j, T4k, T4l, T4m, T4n, T4o;
+		    T4j = ii[WS(iis, 61)];
+		    T4k = ii[WS(iis, 3)];
+		    T4l = T4j - T4k;
+		    T4m = ii[WS(iis, 29)];
+		    T4n = ii[WS(iis, 35)];
+		    T4o = T4m - T4n;
+		    T4p = T4l - T4o;
+		    TaD = T4k + T4j;
+		    T6W = T4o + T4l;
+		    Tap = T4m + T4n;
+	       }
+	       {
+		    E T1Y, Tar, T4f, Tas, T21, Tau, T4c, Tav;
+		    {
+			 E T1W, T1X, T4d, T4e;
+			 T1W = ri[WS(ris, 13)];
+			 T1X = ri[WS(ris, 51)];
+			 T1Y = T1W + T1X;
+			 Tar = T1W - T1X;
+			 T4d = ii[WS(iis, 13)];
+			 T4e = ii[WS(iis, 51)];
+			 T4f = T4d - T4e;
+			 Tas = T4d + T4e;
+		    }
+		    {
+			 E T1Z, T20, T4a, T4b;
+			 T1Z = ri[WS(ris, 19)];
+			 T20 = ri[WS(ris, 45)];
+			 T21 = T1Z + T20;
+			 Tau = T1Z - T20;
+			 T4a = ii[WS(iis, 45)];
+			 T4b = ii[WS(iis, 19)];
+			 T4c = T4a - T4b;
+			 Tav = T4b + T4a;
+		    }
+		    T22 = T1Y + T21;
+		    Taz = Tar + Tas;
+		    TaA = Tau + Tav;
+		    T4g = T4c - T4f;
+		    T4i = T1Y - T21;
+		    Tat = Tar - Tas;
+		    Taw = Tau - Tav;
+		    T6X = T4f + T4c;
+	       }
+	       T23 = T1V + T22;
+	       T7U = T6X + T6W;
+	       {
+		    E T4h, T4q, Tdx, Tdy;
+		    T4h = T49 + T4g;
+		    T4q = T4i + T4p;
+		    T4r = FMA(KP923879532, T4h, KP382683432 * T4q);
+		    T4v = FNMS(KP382683432, T4h, KP923879532 * T4q);
+		    Tdx = KP707106781 * (Tat - Taw);
+		    Tdy = TaC + TaD;
+		    Tdz = Tdx - Tdy;
+		    TeJ = Tdx + Tdy;
+	       }
+	       {
+		    E T5F, T5G, Taq, Tax;
+		    T5F = T49 - T4g;
+		    T5G = T4p - T4i;
+		    T5H = FMA(KP382683432, T5F, KP923879532 * T5G);
+		    T5L = FNMS(KP923879532, T5F, KP382683432 * T5G);
+		    Taq = Tao - Tap;
+		    Tax = KP707106781 * (Tat + Taw);
+		    Tay = Taq + Tax;
+		    TbS = Taq - Tax;
+	       }
+	       Tdu = Tao + Tap;
+	       Tdv = KP707106781 * (Taz + TaA);
+	       Tdw = Tdu - Tdv;
+	       TeI = Tdu + Tdv;
+	       {
+		    E T6V, T6Y, TaB, TaE;
+		    T6V = T1V - T22;
+		    T6Y = T6W - T6X;
+		    T6Z = T6V + T6Y;
+		    T73 = T6Y - T6V;
+		    TaB = KP707106781 * (Taz - TaA);
+		    TaE = TaC - TaD;
+		    TaF = TaB + TaE;
+		    TbT = TaE - TaB;
+	       }
+	  }
+	  {
+	       E T1m, T3z, T1p, T3C, T3w, T3D, Tdg, Tdf, T9U, T9R, T1t, T3I, T1w, T3L, T3F;
+	       E T3M, Tdj, Tdi, Ta1, T9Y;
+	       {
+		    E T9P, T9T, T9S, T9Q;
+		    {
+			 E T1k, T1l, T3x, T3y;
+			 T1k = ri[WS(ris, 9)];
+			 T1l = ri[WS(ris, 55)];
+			 T1m = T1k + T1l;
+			 T9P = T1k - T1l;
+			 T3x = ii[WS(iis, 9)];
+			 T3y = ii[WS(iis, 55)];
+			 T3z = T3x - T3y;
+			 T9T = T3x + T3y;
+		    }
+		    {
+			 E T1n, T1o, T3A, T3B;
+			 T1n = ri[WS(ris, 41)];
+			 T1o = ri[WS(ris, 23)];
+			 T1p = T1n + T1o;
+			 T9S = T1n - T1o;
+			 T3A = ii[WS(iis, 41)];
+			 T3B = ii[WS(iis, 23)];
+			 T3C = T3A - T3B;
+			 T9Q = T3A + T3B;
+		    }
+		    T3w = T1m - T1p;
+		    T3D = T3z - T3C;
+		    Tdg = T9T - T9S;
+		    Tdf = T9P + T9Q;
+		    T9U = T9S + T9T;
+		    T9R = T9P - T9Q;
+	       }
+	       {
+		    E T9W, Ta0, T9Z, T9X;
+		    {
+			 E T1r, T1s, T3G, T3H;
+			 T1r = ri[WS(ris, 7)];
+			 T1s = ri[WS(ris, 57)];
+			 T1t = T1r + T1s;
+			 T9W = T1r - T1s;
+			 T3G = ii[WS(iis, 57)];
+			 T3H = ii[WS(iis, 7)];
+			 T3I = T3G - T3H;
+			 Ta0 = T3H + T3G;
+		    }
+		    {
+			 E T1u, T1v, T3J, T3K;
+			 T1u = ri[WS(ris, 25)];
+			 T1v = ri[WS(ris, 39)];
+			 T1w = T1u + T1v;
+			 T9Z = T1u - T1v;
+			 T3J = ii[WS(iis, 25)];
+			 T3K = ii[WS(iis, 39)];
+			 T3L = T3J - T3K;
+			 T9X = T3J + T3K;
+		    }
+		    T3F = T1t - T1w;
+		    T3M = T3I - T3L;
+		    Tdj = T9Z + Ta0;
+		    Tdi = T9W + T9X;
+		    Ta1 = T9Z - Ta0;
+		    T9Y = T9W - T9X;
+	       }
+	       {
+		    E T1q, T1x, Tdh, Tdk;
+		    T1q = T1m + T1p;
+		    T1x = T1t + T1w;
+		    T1y = T1q + T1x;
+		    T75 = T1q - T1x;
+		    Tdh = FNMS(KP923879532, Tdg, KP382683432 * Tdf);
+		    Tdk = FNMS(KP923879532, Tdj, KP382683432 * Tdi);
+		    Tdl = Tdh + Tdk;
+		    TeQ = Tdh - Tdk;
+	       }
+	       {
+		    E TdG, TdH, T3E, T3N;
+		    TdG = FMA(KP923879532, Tdf, KP382683432 * Tdg);
+		    TdH = FMA(KP923879532, Tdi, KP382683432 * Tdj);
+		    TdI = TdG - TdH;
+		    TeD = TdG + TdH;
+		    T3E = T3w - T3D;
+		    T3N = T3F + T3M;
+		    T3O = KP707106781 * (T3E + T3N);
+		    T5N = KP707106781 * (T3E - T3N);
+	       }
+	       {
+		    E T4x, T4y, T9V, Ta2;
+		    T4x = T3w + T3D;
+		    T4y = T3M - T3F;
+		    T4z = KP707106781 * (T4x + T4y);
+		    T5A = KP707106781 * (T4y - T4x);
+		    T9V = FNMS(KP382683432, T9U, KP923879532 * T9R);
+		    Ta2 = FMA(KP923879532, T9Y, KP382683432 * Ta1);
+		    Ta3 = T9V + Ta2;
+		    Tc0 = T9V - Ta2;
+	       }
+	       {
+		    E TaM, TaN, T6M, T6N;
+		    TaM = FMA(KP382683432, T9R, KP923879532 * T9U);
+		    TaN = FNMS(KP382683432, T9Y, KP923879532 * Ta1);
+		    TaO = TaM + TaN;
+		    TbN = TaN - TaM;
+		    T6M = T3L + T3I;
+		    T6N = T3C + T3z;
+		    T6O = T6M - T6N;
+		    T80 = T6N + T6M;
+	       }
+	  }
+	  {
+	       E TQ, T2R, TT, T2U, T2O, T2V, TcW, TcV, T9g, T9d, TX, T30, T10, T33, T2X;
+	       E T34, TcZ, TcY, T9n, T9k;
+	       {
+		    E T9b, T9f, T9e, T9c;
+		    {
+			 E TO, TP, T2P, T2Q;
+			 TO = ri[WS(ris, 10)];
+			 TP = ri[WS(ris, 54)];
+			 TQ = TO + TP;
+			 T9b = TO - TP;
+			 T2P = ii[WS(iis, 10)];
+			 T2Q = ii[WS(iis, 54)];
+			 T2R = T2P - T2Q;
+			 T9f = T2P + T2Q;
+		    }
+		    {
+			 E TR, TS, T2S, T2T;
+			 TR = ri[WS(ris, 42)];
+			 TS = ri[WS(ris, 22)];
+			 TT = TR + TS;
+			 T9e = TR - TS;
+			 T2S = ii[WS(iis, 42)];
+			 T2T = ii[WS(iis, 22)];
+			 T2U = T2S - T2T;
+			 T9c = T2S + T2T;
+		    }
+		    T2O = TQ - TT;
+		    T2V = T2R - T2U;
+		    TcW = T9f - T9e;
+		    TcV = T9b + T9c;
+		    T9g = T9e + T9f;
+		    T9d = T9b - T9c;
+	       }
+	       {
+		    E T9i, T9m, T9l, T9j;
+		    {
+			 E TV, TW, T2Y, T2Z;
+			 TV = ri[WS(ris, 6)];
+			 TW = ri[WS(ris, 58)];
+			 TX = TV + TW;
+			 T9i = TV - TW;
+			 T2Y = ii[WS(iis, 58)];
+			 T2Z = ii[WS(iis, 6)];
+			 T30 = T2Y - T2Z;
+			 T9m = T2Z + T2Y;
+		    }
+		    {
+			 E TY, TZ, T31, T32;
+			 TY = ri[WS(ris, 26)];
+			 TZ = ri[WS(ris, 38)];
+			 T10 = TY + TZ;
+			 T9l = TY - TZ;
+			 T31 = ii[WS(iis, 26)];
+			 T32 = ii[WS(iis, 38)];
+			 T33 = T31 - T32;
+			 T9j = T31 + T32;
+		    }
+		    T2X = TX - T10;
+		    T34 = T30 - T33;
+		    TcZ = T9l + T9m;
+		    TcY = T9i + T9j;
+		    T9n = T9l - T9m;
+		    T9k = T9i - T9j;
+	       }
+	       {
+		    E TU, T11, TcX, Td0;
+		    TU = TQ + TT;
+		    T11 = TX + T10;
+		    T12 = TU + T11;
+		    T6E = TU - T11;
+		    TcX = FNMS(KP923879532, TcW, KP382683432 * TcV);
+		    Td0 = FNMS(KP923879532, TcZ, KP382683432 * TcY);
+		    Td1 = TcX + Td0;
+		    Tex = TcX - Td0;
+	       }
+	       {
+		    E Td3, Td4, T2W, T35;
+		    Td3 = FMA(KP923879532, TcV, KP382683432 * TcW);
+		    Td4 = FMA(KP923879532, TcY, KP382683432 * TcZ);
+		    Td5 = Td3 - Td4;
+		    Tev = Td3 + Td4;
+		    T2W = T2O - T2V;
+		    T35 = T2X + T34;
+		    T36 = KP707106781 * (T2W + T35);
+		    T5u = KP707106781 * (T2W - T35);
+	       }
+	       {
+		    E T38, T39, T9h, T9o;
+		    T38 = T2O + T2V;
+		    T39 = T34 - T2X;
+		    T3a = KP707106781 * (T38 + T39);
+		    T5s = KP707106781 * (T39 - T38);
+		    T9h = FNMS(KP382683432, T9g, KP923879532 * T9d);
+		    T9o = FMA(KP923879532, T9k, KP382683432 * T9n);
+		    T9p = T9h + T9o;
+		    TbH = T9h - T9o;
+	       }
+	       {
+		    E T9r, T9s, T6A, T6B;
+		    T9r = FMA(KP382683432, T9d, KP923879532 * T9g);
+		    T9s = FNMS(KP382683432, T9k, KP923879532 * T9n);
+		    T9t = T9r + T9s;
+		    TbF = T9s - T9r;
+		    T6A = T33 + T30;
+		    T6B = T2U + T2R;
+		    T6C = T6A - T6B;
+		    T7P = T6B + T6A;
+	       }
+	  }
+	  {
+	       E T13, T8f, Ty, T8e, T25, T8h, T8k, T8p, Ti, T14, T8o;
+	       T13 = KP2_000000000 * (TN + T12);
+	       T8f = KP2_000000000 * (T7P + T7O);
+	       Ti = Ta + Th;
+	       Ty = Ti + Tx;
+	       T8e = Ti - Tx;
+	       {
+		    E T1z, T24, T8i, T8j;
+		    T1z = T1j + T1y;
+		    T24 = T1O + T23;
+		    T25 = KP2_000000000 * (T1z + T24);
+		    T8h = T1z - T24;
+		    T8i = T80 + T7Z;
+		    T8j = T7V + T7U;
+		    T8k = T8i - T8j;
+		    T8p = KP2_000000000 * (T8j + T8i);
+	       }
+	       T14 = Ty + T13;
+	       O[WS(os, 64)] = T14 - T25;
+	       O[0] = T14 + T25;
+	       T8o = Ty - T13;
+	       O[WS(os, 32)] = T8o - T8p;
+	       O[WS(os, 96)] = T8o + T8p;
+	       {
+		    E T8g, T8l, T8m, T8n;
+		    T8g = T8e - T8f;
+		    T8l = KP1_414213562 * (T8h - T8k);
+		    O[WS(os, 80)] = T8g - T8l;
+		    O[WS(os, 16)] = T8g + T8l;
+		    T8m = T8e + T8f;
+		    T8n = KP1_414213562 * (T8h + T8k);
+		    O[WS(os, 48)] = T8m - T8n;
+		    O[WS(os, 112)] = T8m + T8n;
+	       }
+	  }
+	  {
+	       E T7M, T86, T82, T8a, T7R, T87, T7X, T89, T7K, T7Y, T81;
+	       T7K = Ta - Th;
+	       T7M = T7K - T7L;
+	       T86 = T7K + T7L;
+	       T7Y = T1O - T23;
+	       T81 = T7Z - T80;
+	       T82 = T7Y + T81;
+	       T8a = T81 - T7Y;
+	       {
+		    E T7N, T7Q, T7T, T7W;
+		    T7N = TN - T12;
+		    T7Q = T7O - T7P;
+		    T7R = KP1_414213562 * (T7N - T7Q);
+		    T87 = KP1_414213562 * (T7N + T7Q);
+		    T7T = T1j - T1y;
+		    T7W = T7U - T7V;
+		    T7X = T7T + T7W;
+		    T89 = T7T - T7W;
+	       }
+	       {
+		    E T7S, T83, T8c, T8d;
+		    T7S = T7M + T7R;
+		    T83 = FNMS(KP765366864, T82, KP1_847759065 * T7X);
+		    O[WS(os, 72)] = T7S - T83;
+		    O[WS(os, 8)] = T7S + T83;
+		    T8c = T86 + T87;
+		    T8d = FMA(KP1_847759065, T89, KP765366864 * T8a);
+		    O[WS(os, 56)] = T8c - T8d;
+		    O[WS(os, 120)] = T8c + T8d;
+	       }
+	       {
+		    E T84, T85, T88, T8b;
+		    T84 = T7M - T7R;
+		    T85 = FMA(KP765366864, T7X, KP1_847759065 * T82);
+		    O[WS(os, 40)] = T84 - T85;
+		    O[WS(os, 104)] = T84 + T85;
+		    T88 = T86 - T87;
+		    T8b = FNMS(KP1_847759065, T8a, KP765366864 * T89);
+		    O[WS(os, 88)] = T88 - T8b;
+		    O[WS(os, 24)] = T88 + T8b;
+	       }
+	  }
+	  {
+	       E T2E, T4O, T4K, T4S, T3l, T4P, T4t, T4R;
+	       {
+		    E T2k, T2D, T4w, T4J;
+		    T2k = T2a + T2j;
+		    T2D = FNMS(KP765366864, T2C, KP1_847759065 * T2t);
+		    T2E = T2k + T2D;
+		    T4O = T2k - T2D;
+		    T4w = T4u + T4v;
+		    T4J = T4z + T4I;
+		    T4K = T4w + T4J;
+		    T4S = T4J - T4w;
+	       }
+	       {
+		    E T37, T3k, T3P, T4s;
+		    T37 = T2N + T36;
+		    T3k = T3a + T3j;
+		    T3l = FNMS(KP390180644, T3k, KP1_961570560 * T37);
+		    T4P = FMA(KP390180644, T37, KP1_961570560 * T3k);
+		    T3P = T3v + T3O;
+		    T4s = T48 + T4r;
+		    T4t = T3P + T4s;
+		    T4R = T3P - T4s;
+	       }
+	       {
+		    E T3m, T4L, T4U, T4V;
+		    T3m = T2E + T3l;
+		    T4L = FNMS(KP196034280, T4K, KP1_990369453 * T4t);
+		    O[WS(os, 66)] = T3m - T4L;
+		    O[WS(os, 2)] = T3m + T4L;
+		    T4U = T4O + T4P;
+		    T4V = FMA(KP1_546020906, T4R, KP1_268786568 * T4S);
+		    O[WS(os, 50)] = T4U - T4V;
+		    O[WS(os, 114)] = T4U + T4V;
+	       }
+	       {
+		    E T4M, T4N, T4Q, T4T;
+		    T4M = T2E - T3l;
+		    T4N = FMA(KP196034280, T4t, KP1_990369453 * T4K);
+		    O[WS(os, 34)] = T4M - T4N;
+		    O[WS(os, 98)] = T4M + T4N;
+		    T4Q = T4O - T4P;
+		    T4T = FNMS(KP1_546020906, T4S, KP1_268786568 * T4R);
+		    O[WS(os, 82)] = T4Q - T4T;
+		    O[WS(os, 18)] = T4Q + T4T;
+	       }
+	  }
+	  {
+	       E T6y, T7e, T7a, T7i, T6J, T7f, T71, T7h;
+	       {
+		    E T6s, T6x, T74, T79;
+		    T6s = T6q - T6r;
+		    T6x = KP1_414213562 * (T6t - T6w);
+		    T6y = T6s + T6x;
+		    T7e = T6s - T6x;
+		    T74 = KP707106781 * (T72 + T73);
+		    T79 = T75 + T78;
+		    T7a = T74 + T79;
+		    T7i = T79 - T74;
+	       }
+	       {
+		    E T6D, T6I, T6P, T70;
+		    T6D = T6z + T6C;
+		    T6I = T6E + T6H;
+		    T6J = FNMS(KP765366864, T6I, KP1_847759065 * T6D);
+		    T7f = FMA(KP765366864, T6D, KP1_847759065 * T6I);
+		    T6P = T6L + T6O;
+		    T70 = KP707106781 * (T6U + T6Z);
+		    T71 = T6P + T70;
+		    T7h = T6P - T70;
+	       }
+	       {
+		    E T6K, T7b, T7k, T7l;
+		    T6K = T6y + T6J;
+		    T7b = FNMS(KP390180644, T7a, KP1_961570560 * T71);
+		    O[WS(os, 68)] = T6K - T7b;
+		    O[WS(os, 4)] = T6K + T7b;
+		    T7k = T7e + T7f;
+		    T7l = FMA(KP1_662939224, T7h, KP1_111140466 * T7i);
+		    O[WS(os, 52)] = T7k - T7l;
+		    O[WS(os, 116)] = T7k + T7l;
+	       }
+	       {
+		    E T7c, T7d, T7g, T7j;
+		    T7c = T6y - T6J;
+		    T7d = FMA(KP390180644, T71, KP1_961570560 * T7a);
+		    O[WS(os, 36)] = T7c - T7d;
+		    O[WS(os, 100)] = T7c + T7d;
+		    T7g = T7e - T7f;
+		    T7j = FNMS(KP1_662939224, T7i, KP1_111140466 * T7h);
+		    O[WS(os, 84)] = T7g - T7j;
+		    O[WS(os, 20)] = T7g + T7j;
+	       }
+	  }
+	  {
+	       E T4Y, T5c, T58, T5g, T51, T5d, T55, T5f;
+	       {
+		    E T4W, T4X, T56, T57;
+		    T4W = T2a - T2j;
+		    T4X = FMA(KP765366864, T2t, KP1_847759065 * T2C);
+		    T4Y = T4W - T4X;
+		    T5c = T4W + T4X;
+		    T56 = T48 - T4r;
+		    T57 = T4I - T4z;
+		    T58 = T56 + T57;
+		    T5g = T57 - T56;
+	       }
+	       {
+		    E T4Z, T50, T53, T54;
+		    T4Z = T2N - T36;
+		    T50 = T3j - T3a;
+		    T51 = FNMS(KP1_662939224, T50, KP1_111140466 * T4Z);
+		    T5d = FMA(KP1_662939224, T4Z, KP1_111140466 * T50);
+		    T53 = T3v - T3O;
+		    T54 = T4v - T4u;
+		    T55 = T53 + T54;
+		    T5f = T53 - T54;
+	       }
+	       {
+		    E T52, T59, T5i, T5j;
+		    T52 = T4Y + T51;
+		    T59 = FNMS(KP942793473, T58, KP1_763842528 * T55);
+		    O[WS(os, 74)] = T52 - T59;
+		    O[WS(os, 10)] = T52 + T59;
+		    T5i = T5c + T5d;
+		    T5j = FMA(KP1_913880671, T5f, KP580569354 * T5g);
+		    O[WS(os, 58)] = T5i - T5j;
+		    O[WS(os, 122)] = T5i + T5j;
+	       }
+	       {
+		    E T5a, T5b, T5e, T5h;
+		    T5a = T4Y - T51;
+		    T5b = FMA(KP942793473, T55, KP1_763842528 * T58);
+		    O[WS(os, 42)] = T5a - T5b;
+		    O[WS(os, 106)] = T5a + T5b;
+		    T5e = T5c - T5d;
+		    T5h = FNMS(KP1_913880671, T5g, KP580569354 * T5f);
+		    O[WS(os, 90)] = T5e - T5h;
+		    O[WS(os, 26)] = T5e + T5h;
+	       }
+	  }
+	  {
+	       E T7o, T7C, T7y, T7G, T7r, T7D, T7v, T7F;
+	       {
+		    E T7m, T7n, T7w, T7x;
+		    T7m = T6q + T6r;
+		    T7n = KP1_414213562 * (T6t + T6w);
+		    T7o = T7m - T7n;
+		    T7C = T7m + T7n;
+		    T7w = KP707106781 * (T6U - T6Z);
+		    T7x = T78 - T75;
+		    T7y = T7w + T7x;
+		    T7G = T7x - T7w;
+	       }
+	       {
+		    E T7p, T7q, T7t, T7u;
+		    T7p = T6z - T6C;
+		    T7q = T6H - T6E;
+		    T7r = FNMS(KP1_847759065, T7q, KP765366864 * T7p);
+		    T7D = FMA(KP1_847759065, T7p, KP765366864 * T7q);
+		    T7t = T6L - T6O;
+		    T7u = KP707106781 * (T73 - T72);
+		    T7v = T7t + T7u;
+		    T7F = T7t - T7u;
+	       }
+	       {
+		    E T7s, T7z, T7I, T7J;
+		    T7s = T7o + T7r;
+		    T7z = FNMS(KP1_111140466, T7y, KP1_662939224 * T7v);
+		    O[WS(os, 76)] = T7s - T7z;
+		    O[WS(os, 12)] = T7s + T7z;
+		    T7I = T7C + T7D;
+		    T7J = FMA(KP1_961570560, T7F, KP390180644 * T7G);
+		    O[WS(os, 60)] = T7I - T7J;
+		    O[WS(os, 124)] = T7I + T7J;
+	       }
+	       {
+		    E T7A, T7B, T7E, T7H;
+		    T7A = T7o - T7r;
+		    T7B = FMA(KP1_111140466, T7v, KP1_662939224 * T7y);
+		    O[WS(os, 44)] = T7A - T7B;
+		    O[WS(os, 108)] = T7A + T7B;
+		    T7E = T7C - T7D;
+		    T7H = FNMS(KP1_961570560, T7G, KP390180644 * T7F);
+		    O[WS(os, 92)] = T7E - T7H;
+		    O[WS(os, 28)] = T7E + T7H;
+	       }
+	  }
+	  {
+	       E T5q, T5U, T5Q, T5Y, T5x, T5V, T5J, T5X;
+	       {
+		    E T5m, T5p, T5M, T5P;
+		    T5m = T5k - T5l;
+		    T5p = FNMS(KP1_847759065, T5o, KP765366864 * T5n);
+		    T5q = T5m + T5p;
+		    T5U = T5m - T5p;
+		    T5M = T5K + T5L;
+		    T5P = T5N + T5O;
+		    T5Q = T5M + T5P;
+		    T5Y = T5P - T5M;
+	       }
+	       {
+		    E T5t, T5w, T5B, T5I;
+		    T5t = T5r + T5s;
+		    T5w = T5u + T5v;
+		    T5x = FNMS(KP1_111140466, T5w, KP1_662939224 * T5t);
+		    T5V = FMA(KP1_111140466, T5t, KP1_662939224 * T5w);
+		    T5B = T5z + T5A;
+		    T5I = T5E + T5H;
+		    T5J = T5B + T5I;
+		    T5X = T5B - T5I;
+	       }
+	       {
+		    E T5y, T5R, T60, T61;
+		    T5y = T5q + T5x;
+		    T5R = FNMS(KP580569354, T5Q, KP1_913880671 * T5J);
+		    O[WS(os, 70)] = T5y - T5R;
+		    O[WS(os, 6)] = T5y + T5R;
+		    T60 = T5U + T5V;
+		    T61 = FMA(KP1_763842528, T5X, KP942793473 * T5Y);
+		    O[WS(os, 54)] = T60 - T61;
+		    O[WS(os, 118)] = T60 + T61;
+	       }
+	       {
+		    E T5S, T5T, T5W, T5Z;
+		    T5S = T5q - T5x;
+		    T5T = FMA(KP580569354, T5J, KP1_913880671 * T5Q);
+		    O[WS(os, 38)] = T5S - T5T;
+		    O[WS(os, 102)] = T5S + T5T;
+		    T5W = T5U - T5V;
+		    T5Z = FNMS(KP1_763842528, T5Y, KP942793473 * T5X);
+		    O[WS(os, 86)] = T5W - T5Z;
+		    O[WS(os, 22)] = T5W + T5Z;
+	       }
+	  }
+	  {
+	       E T64, T6i, T6e, T6m, T67, T6j, T6b, T6l;
+	       {
+		    E T62, T63, T6c, T6d;
+		    T62 = T5k + T5l;
+		    T63 = FMA(KP1_847759065, T5n, KP765366864 * T5o);
+		    T64 = T62 - T63;
+		    T6i = T62 + T63;
+		    T6c = T5E - T5H;
+		    T6d = T5O - T5N;
+		    T6e = T6c + T6d;
+		    T6m = T6d - T6c;
+	       }
+	       {
+		    E T65, T66, T69, T6a;
+		    T65 = T5r - T5s;
+		    T66 = T5v - T5u;
+		    T67 = FNMS(KP1_961570560, T66, KP390180644 * T65);
+		    T6j = FMA(KP1_961570560, T65, KP390180644 * T66);
+		    T69 = T5z - T5A;
+		    T6a = T5L - T5K;
+		    T6b = T69 + T6a;
+		    T6l = T69 - T6a;
+	       }
+	       {
+		    E T68, T6f, T6o, T6p;
+		    T68 = T64 + T67;
+		    T6f = FNMS(KP1_268786568, T6e, KP1_546020906 * T6b);
+		    O[WS(os, 78)] = T68 - T6f;
+		    O[WS(os, 14)] = T68 + T6f;
+		    T6o = T6i + T6j;
+		    T6p = FMA(KP1_990369453, T6l, KP196034280 * T6m);
+		    O[WS(os, 62)] = T6o - T6p;
+		    O[WS(os, 126)] = T6o + T6p;
+	       }
+	       {
+		    E T6g, T6h, T6k, T6n;
+		    T6g = T64 - T67;
+		    T6h = FMA(KP1_268786568, T6b, KP1_546020906 * T6e);
+		    O[WS(os, 46)] = T6g - T6h;
+		    O[WS(os, 110)] = T6g + T6h;
+		    T6k = T6i - T6j;
+		    T6n = FNMS(KP1_990369453, T6m, KP196034280 * T6l);
+		    O[WS(os, 94)] = T6k - T6n;
+		    O[WS(os, 30)] = T6k + T6n;
+	       }
+	  }
+	  {
+	       E T8Z, Tb1, T9C, Tb2, Tbe, Tbq, Tbb, Tbp, TaX, Tbs, Tb5, Tbi, TaI, Tbt, Tb4;
+	       E Tbl;
+	       {
+		    E T8F, T8Y, Tb9, Tba;
+		    T8F = T8x + T8E;
+		    T8Y = FNMS(KP390180644, T8X, KP1_961570560 * T8Q);
+		    T8Z = T8F + T8Y;
+		    Tb1 = T8F - T8Y;
+		    {
+			 E T9q, T9B, Tbc, Tbd;
+			 T9q = T9a + T9p;
+			 T9B = T9t + T9A;
+			 T9C = FNMS(KP196034280, T9B, KP1_990369453 * T9q);
+			 Tb2 = FMA(KP196034280, T9q, KP1_990369453 * T9B);
+			 Tbc = T9a - T9p;
+			 Tbd = T9A - T9t;
+			 Tbe = FNMS(KP1_546020906, Tbd, KP1_268786568 * Tbc);
+			 Tbq = FMA(KP1_546020906, Tbc, KP1_268786568 * Tbd);
+		    }
+		    Tb9 = T8x - T8E;
+		    Tba = FMA(KP390180644, T8Q, KP1_961570560 * T8X);
+		    Tbb = Tb9 - Tba;
+		    Tbp = Tb9 + Tba;
+		    {
+			 E TaW, Tbg, TaL, Tbh, TaJ, TaK;
+			 TaW = TaO + TaV;
+			 Tbg = T9O - Ta3;
+			 TaJ = FMA(KP195090322, Taf, KP980785280 * Tam);
+			 TaK = FNMS(KP195090322, Tay, KP980785280 * TaF);
+			 TaL = TaJ + TaK;
+			 Tbh = TaK - TaJ;
+			 TaX = TaL + TaW;
+			 Tbs = Tbg - Tbh;
+			 Tb5 = TaW - TaL;
+			 Tbi = Tbg + Tbh;
+		    }
+		    {
+			 E Ta4, Tbk, TaH, Tbj, Tan, TaG;
+			 Ta4 = T9O + Ta3;
+			 Tbk = TaV - TaO;
+			 Tan = FNMS(KP195090322, Tam, KP980785280 * Taf);
+			 TaG = FMA(KP980785280, Tay, KP195090322 * TaF);
+			 TaH = Tan + TaG;
+			 Tbj = Tan - TaG;
+			 TaI = Ta4 + TaH;
+			 Tbt = Tbk - Tbj;
+			 Tb4 = Ta4 - TaH;
+			 Tbl = Tbj + Tbk;
+		    }
+	       }
+	       {
+		    E T9D, TaY, Tbr, Tbu;
+		    T9D = T8Z + T9C;
+		    TaY = FNMS(KP098135348, TaX, KP1_997590912 * TaI);
+		    O[WS(os, 65)] = T9D - TaY;
+		    O[WS(os, 1)] = T9D + TaY;
+		    Tbr = Tbp - Tbq;
+		    Tbu = FNMS(KP1_883088130, Tbt, KP673779706 * Tbs);
+		    O[WS(os, 89)] = Tbr - Tbu;
+		    O[WS(os, 25)] = Tbr + Tbu;
+	       }
+	       {
+		    E Tbv, Tbw, TaZ, Tb0;
+		    Tbv = Tbp + Tbq;
+		    Tbw = FMA(KP1_883088130, Tbs, KP673779706 * Tbt);
+		    O[WS(os, 57)] = Tbv - Tbw;
+		    O[WS(os, 121)] = Tbv + Tbw;
+		    TaZ = T8Z - T9C;
+		    Tb0 = FMA(KP098135348, TaI, KP1_997590912 * TaX);
+		    O[WS(os, 33)] = TaZ - Tb0;
+		    O[WS(os, 97)] = TaZ + Tb0;
+	       }
+	       {
+		    E Tb3, Tb6, Tbf, Tbm;
+		    Tb3 = Tb1 - Tb2;
+		    Tb6 = FNMS(KP1_481902250, Tb5, KP1_343117909 * Tb4);
+		    O[WS(os, 81)] = Tb3 - Tb6;
+		    O[WS(os, 17)] = Tb3 + Tb6;
+		    Tbf = Tbb + Tbe;
+		    Tbm = FNMS(KP855110186, Tbl, KP1_807978586 * Tbi);
+		    O[WS(os, 73)] = Tbf - Tbm;
+		    O[WS(os, 9)] = Tbf + Tbm;
+	       }
+	       {
+		    E Tbn, Tbo, Tb7, Tb8;
+		    Tbn = Tbb - Tbe;
+		    Tbo = FMA(KP855110186, Tbi, KP1_807978586 * Tbl);
+		    O[WS(os, 41)] = Tbn - Tbo;
+		    O[WS(os, 105)] = Tbn + Tbo;
+		    Tb7 = Tb1 + Tb2;
+		    Tb8 = FMA(KP1_481902250, Tb4, KP1_343117909 * Tb5);
+		    O[WS(os, 49)] = Tb7 - Tb8;
+		    O[WS(os, 113)] = Tb7 + Tb8;
+	       }
+	  }
+	  {
+	       E TcR, TdR, Tda, TdS, Te4, Teg, Te1, Tef, TdN, Tei, TdV, Te8, TdC, Tej, TdU;
+	       E Teb;
+	       {
+		    E TcJ, TcQ, TdZ, Te0;
+		    TcJ = TcF + TcI;
+		    TcQ = FNMS(KP1_111140466, TcP, KP1_662939224 * TcM);
+		    TcR = TcJ + TcQ;
+		    TdR = TcJ - TcQ;
+		    {
+			 E Td2, Td9, Te2, Te3;
+			 Td2 = TcU + Td1;
+			 Td9 = Td5 + Td8;
+			 Tda = FNMS(KP580569354, Td9, KP1_913880671 * Td2);
+			 TdS = FMA(KP580569354, Td2, KP1_913880671 * Td9);
+			 Te2 = TcU - Td1;
+			 Te3 = Td8 - Td5;
+			 Te4 = FNMS(KP1_763842528, Te3, KP942793473 * Te2);
+			 Teg = FMA(KP1_763842528, Te2, KP942793473 * Te3);
+		    }
+		    TdZ = TcF - TcI;
+		    Te0 = FMA(KP1_111140466, TcM, KP1_662939224 * TcP);
+		    Te1 = TdZ - Te0;
+		    Tef = TdZ + Te0;
+		    {
+			 E TdM, Te6, TdF, Te7, TdD, TdE;
+			 TdM = TdI + TdL;
+			 Te6 = Tde - Tdl;
+			 TdD = FMA(KP555570233, Tdp, KP831469612 * Tds);
+			 TdE = FNMS(KP555570233, Tdw, KP831469612 * Tdz);
+			 TdF = TdD + TdE;
+			 Te7 = TdE - TdD;
+			 TdN = TdF + TdM;
+			 Tei = Te6 - Te7;
+			 TdV = TdM - TdF;
+			 Te8 = Te6 + Te7;
+		    }
+		    {
+			 E Tdm, Tea, TdB, Te9, Tdt, TdA;
+			 Tdm = Tde + Tdl;
+			 Tea = TdL - TdI;
+			 Tdt = FNMS(KP555570233, Tds, KP831469612 * Tdp);
+			 TdA = FMA(KP831469612, Tdw, KP555570233 * Tdz);
+			 TdB = Tdt + TdA;
+			 Te9 = Tdt - TdA;
+			 TdC = Tdm + TdB;
+			 Tej = Tea - Te9;
+			 TdU = Tdm - TdB;
+			 Teb = Te9 + Tea;
+		    }
+	       }
+	       {
+		    E Tdb, TdO, Teh, Tek;
+		    Tdb = TcR + Tda;
+		    TdO = FNMS(KP293460948, TdN, KP1_978353019 * TdC);
+		    O[WS(os, 67)] = Tdb - TdO;
+		    O[WS(os, 3)] = Tdb + TdO;
+		    Teh = Tef - Teg;
+		    Tek = FNMS(KP1_940062506, Tej, KP485960359 * Tei);
+		    O[WS(os, 91)] = Teh - Tek;
+		    O[WS(os, 27)] = Teh + Tek;
+	       }
+	       {
+		    E Tel, Tem, TdP, TdQ;
+		    Tel = Tef + Teg;
+		    Tem = FMA(KP1_940062506, Tei, KP485960359 * Tej);
+		    O[WS(os, 59)] = Tel - Tem;
+		    O[WS(os, 123)] = Tel + Tem;
+		    TdP = TcR - Tda;
+		    TdQ = FMA(KP293460948, TdC, KP1_978353019 * TdN);
+		    O[WS(os, 35)] = TdP - TdQ;
+		    O[WS(os, 99)] = TdP + TdQ;
+	       }
+	       {
+		    E TdT, TdW, Te5, Tec;
+		    TdT = TdR - TdS;
+		    TdW = FNMS(KP1_606415062, TdV, KP1_191398608 * TdU);
+		    O[WS(os, 83)] = TdT - TdW;
+		    O[WS(os, 19)] = TdT + TdW;
+		    Te5 = Te1 + Te4;
+		    Tec = FNMS(KP1_028205488, Teb, KP1_715457220 * Te8);
+		    O[WS(os, 75)] = Te5 - Tec;
+		    O[WS(os, 11)] = Te5 + Tec;
+	       }
+	       {
+		    E Ted, Tee, TdX, TdY;
+		    Ted = Te1 - Te4;
+		    Tee = FMA(KP1_028205488, Te8, KP1_715457220 * Teb);
+		    O[WS(os, 43)] = Ted - Tee;
+		    O[WS(os, 107)] = Ted + Tee;
+		    TdX = TdR + TdS;
+		    TdY = FMA(KP1_606415062, TdU, KP1_191398608 * TdV);
+		    O[WS(os, 51)] = TdX - TdY;
+		    O[WS(os, 115)] = TdX + TdY;
+	       }
+	  }
+	  {
+	       E TbD, Tc7, TbK, Tc8, Tck, Tcw, Tch, Tcv, Tc3, Tcy, Tcb, Tco, TbW, Tcz, Tca;
+	       E Tcr;
+	       {
+		    E Tbz, TbC, Tcf, Tcg;
+		    Tbz = Tbx - Tby;
+		    TbC = FNMS(KP1_662939224, TbB, KP1_111140466 * TbA);
+		    TbD = Tbz + TbC;
+		    Tc7 = Tbz - TbC;
+		    {
+			 E TbG, TbJ, Tci, Tcj;
+			 TbG = TbE + TbF;
+			 TbJ = TbH + TbI;
+			 TbK = FNMS(KP942793473, TbJ, KP1_763842528 * TbG);
+			 Tc8 = FMA(KP942793473, TbG, KP1_763842528 * TbJ);
+			 Tci = TbE - TbF;
+			 Tcj = TbI - TbH;
+			 Tck = FNMS(KP1_913880671, Tcj, KP580569354 * Tci);
+			 Tcw = FMA(KP1_913880671, Tci, KP580569354 * Tcj);
+		    }
+		    Tcf = Tbx + Tby;
+		    Tcg = FMA(KP1_662939224, TbA, KP1_111140466 * TbB);
+		    Tch = Tcf - Tcg;
+		    Tcv = Tcf + Tcg;
+		    {
+			 E Tc2, Tcm, TbZ, Tcn, TbX, TbY;
+			 Tc2 = Tc0 + Tc1;
+			 Tcm = TbM - TbN;
+			 TbX = FMA(KP831469612, TbP, KP555570233 * TbQ);
+			 TbY = FNMS(KP831469612, TbS, KP555570233 * TbT);
+			 TbZ = TbX + TbY;
+			 Tcn = TbY - TbX;
+			 Tc3 = TbZ + Tc2;
+			 Tcy = Tcm - Tcn;
+			 Tcb = Tc2 - TbZ;
+			 Tco = Tcm + Tcn;
+		    }
+		    {
+			 E TbO, Tcq, TbV, Tcp, TbR, TbU;
+			 TbO = TbM + TbN;
+			 Tcq = Tc1 - Tc0;
+			 TbR = FNMS(KP831469612, TbQ, KP555570233 * TbP);
+			 TbU = FMA(KP555570233, TbS, KP831469612 * TbT);
+			 TbV = TbR + TbU;
+			 Tcp = TbR - TbU;
+			 TbW = TbO + TbV;
+			 Tcz = Tcq - Tcp;
+			 Tca = TbO - TbV;
+			 Tcr = Tcp + Tcq;
+		    }
+	       }
+	       {
+		    E TbL, Tc4, Tcx, TcA;
+		    TbL = TbD + TbK;
+		    Tc4 = FNMS(KP485960359, Tc3, KP1_940062506 * TbW);
+		    O[WS(os, 69)] = TbL - Tc4;
+		    O[WS(os, 5)] = TbL + Tc4;
+		    Tcx = Tcv - Tcw;
+		    TcA = FNMS(KP1_978353019, Tcz, KP293460948 * Tcy);
+		    O[WS(os, 93)] = Tcx - TcA;
+		    O[WS(os, 29)] = Tcx + TcA;
+	       }
+	       {
+		    E TcB, TcC, Tc5, Tc6;
+		    TcB = Tcv + Tcw;
+		    TcC = FMA(KP1_978353019, Tcy, KP293460948 * Tcz);
+		    O[WS(os, 61)] = TcB - TcC;
+		    O[WS(os, 125)] = TcB + TcC;
+		    Tc5 = TbD - TbK;
+		    Tc6 = FMA(KP485960359, TbW, KP1_940062506 * Tc3);
+		    O[WS(os, 37)] = Tc5 - Tc6;
+		    O[WS(os, 101)] = Tc5 + Tc6;
+	       }
+	       {
+		    E Tc9, Tcc, Tcl, Tcs;
+		    Tc9 = Tc7 - Tc8;
+		    Tcc = FNMS(KP1_715457220, Tcb, KP1_028205488 * Tca);
+		    O[WS(os, 85)] = Tc9 - Tcc;
+		    O[WS(os, 21)] = Tc9 + Tcc;
+		    Tcl = Tch + Tck;
+		    Tcs = FNMS(KP1_191398608, Tcr, KP1_606415062 * Tco);
+		    O[WS(os, 77)] = Tcl - Tcs;
+		    O[WS(os, 13)] = Tcl + Tcs;
+	       }
+	       {
+		    E Tct, Tcu, Tcd, Tce;
+		    Tct = Tch - Tck;
+		    Tcu = FMA(KP1_191398608, Tco, KP1_606415062 * Tcr);
+		    O[WS(os, 45)] = Tct - Tcu;
+		    O[WS(os, 109)] = Tct + Tcu;
+		    Tcd = Tc7 + Tc8;
+		    Tce = FMA(KP1_715457220, Tca, KP1_028205488 * Tcb);
+		    O[WS(os, 53)] = Tcd - Tce;
+		    O[WS(os, 117)] = Tcd + Tce;
+	       }
+	  }
+	  {
+	       E Tet, TeX, TeA, TeY, Tfa, Tfm, Tf7, Tfl, TeT, Tfo, Tf1, Tfe, TeM, Tfp, Tf0;
+	       E Tfh;
+	       {
+		    E Tep, Tes, Tf5, Tf6;
+		    Tep = Ten - Teo;
+		    Tes = FNMS(KP1_961570560, Ter, KP390180644 * Teq);
+		    Tet = Tep + Tes;
+		    TeX = Tep - Tes;
+		    {
+			 E Tew, Tez, Tf8, Tf9;
+			 Tew = Teu - Tev;
+			 Tez = Tex + Tey;
+			 TeA = FNMS(KP1_268786568, Tez, KP1_546020906 * Tew);
+			 TeY = FMA(KP1_268786568, Tew, KP1_546020906 * Tez);
+			 Tf8 = Teu + Tev;
+			 Tf9 = Tey - Tex;
+			 Tfa = FNMS(KP1_990369453, Tf9, KP196034280 * Tf8);
+			 Tfm = FMA(KP1_990369453, Tf8, KP196034280 * Tf9);
+		    }
+		    Tf5 = Ten + Teo;
+		    Tf6 = FMA(KP1_961570560, Teq, KP390180644 * Ter);
+		    Tf7 = Tf5 - Tf6;
+		    Tfl = Tf5 + Tf6;
+		    {
+			 E TeS, Tfc, TeP, Tfd, TeN, TeO;
+			 TeS = TeQ + TeR;
+			 Tfc = TeC + TeD;
+			 TeN = FMA(KP980785280, TeF, KP195090322 * TeG);
+			 TeO = FMA(KP980785280, TeI, KP195090322 * TeJ);
+			 TeP = TeN - TeO;
+			 Tfd = TeN + TeO;
+			 TeT = TeP + TeS;
+			 Tfo = Tfc + Tfd;
+			 Tf1 = TeS - TeP;
+			 Tfe = Tfc - Tfd;
+		    }
+		    {
+			 E TeE, Tfg, TeL, Tff, TeH, TeK;
+			 TeE = TeC - TeD;
+			 Tfg = TeR - TeQ;
+			 TeH = FNMS(KP980785280, TeG, KP195090322 * TeF);
+			 TeK = FNMS(KP980785280, TeJ, KP195090322 * TeI);
+			 TeL = TeH + TeK;
+			 Tff = TeH - TeK;
+			 TeM = TeE + TeL;
+			 Tfp = Tfg - Tff;
+			 Tf0 = TeE - TeL;
+			 Tfh = Tff + Tfg;
+		    }
+	       }
+	       {
+		    E TeB, TeU, Tfn, Tfq;
+		    TeB = Tet + TeA;
+		    TeU = FNMS(KP673779706, TeT, KP1_883088130 * TeM);
+		    O[WS(os, 71)] = TeB - TeU;
+		    O[WS(os, 7)] = TeB + TeU;
+		    Tfn = Tfl - Tfm;
+		    Tfq = FNMS(KP1_997590912, Tfp, KP098135348 * Tfo);
+		    O[WS(os, 95)] = Tfn - Tfq;
+		    O[WS(os, 31)] = Tfn + Tfq;
+	       }
+	       {
+		    E Tfr, Tfs, TeV, TeW;
+		    Tfr = Tfl + Tfm;
+		    Tfs = FMA(KP1_997590912, Tfo, KP098135348 * Tfp);
+		    O[WS(os, 63)] = Tfr - Tfs;
+		    O[WS(os, 127)] = Tfr + Tfs;
+		    TeV = Tet - TeA;
+		    TeW = FMA(KP673779706, TeM, KP1_883088130 * TeT);
+		    O[WS(os, 39)] = TeV - TeW;
+		    O[WS(os, 103)] = TeV + TeW;
+	       }
+	       {
+		    E TeZ, Tf2, Tfb, Tfi;
+		    TeZ = TeX - TeY;
+		    Tf2 = FNMS(KP1_807978586, Tf1, KP855110186 * Tf0);
+		    O[WS(os, 87)] = TeZ - Tf2;
+		    O[WS(os, 23)] = TeZ + Tf2;
+		    Tfb = Tf7 + Tfa;
+		    Tfi = FNMS(KP1_343117909, Tfh, KP1_481902250 * Tfe);
+		    O[WS(os, 79)] = Tfb - Tfi;
+		    O[WS(os, 15)] = Tfb + Tfi;
+	       }
+	       {
+		    E Tfj, Tfk, Tf3, Tf4;
+		    Tfj = Tf7 - Tfa;
+		    Tfk = FMA(KP1_343117909, Tfe, KP1_481902250 * Tfh);
+		    O[WS(os, 47)] = Tfj - Tfk;
+		    O[WS(os, 111)] = Tfj + Tfk;
+		    Tf3 = TeX + TeY;
+		    Tf4 = FMA(KP1_807978586, Tf0, KP855110186 * Tf1);
+		    O[WS(os, 55)] = Tf3 - Tf4;
+		    O[WS(os, 119)] = Tf3 + Tf4;
+	       }
+	  }
+     }
+}
+
+static void mhc2r_128(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mhc2r_128_0(ri, ii, O, ris, iis, os);
+	  ri += ivs;
+	  ii += ivs;
+	  O += ovs;
+     }
+}
+
+static const khc2r_desc desc = { 128, "mhc2r_128", {812, 198, 144, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mhc2r_128) (planner *p) {
+     X(khc2r_register) (p, mhc2r_128, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/mhc2r_32.c b/src/fftw3/rdft/codelets/hc2r/mhc2r_32.c
new file mode 100644
index 0000000..154a6d4
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/mhc2r_32.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:15 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r_noinline -compact -variables 4 -sign 1 -n 32 -name mhc2r_32 -include hc2r.h */
+
+/*
+ * This function contains 156 FP additions, 50 FP multiplications,
+ * (or, 140 additions, 34 multiplications, 16 fused multiply/add),
+ * 53 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mhc2r_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2r_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2r_32.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void mhc2r_32_0(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os)
+{
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     {
+	  E T9, T2c, TB, T1y, T6, T2b, Ty, T1v, Th, T2e, T2f, TD, TK, T1C, T1F;
+	  E T1h, Tp, T2i, T2m, TN, T13, T1K, T1Y, T1k, Tw, TU, T1l, TW, T1V, T2j;
+	  E T1R, T2l;
+	  {
+	       E T7, T8, T1w, Tz, TA, T1x;
+	       T7 = ri[WS(ris, 4)];
+	       T8 = ri[WS(ris, 12)];
+	       T1w = T7 - T8;
+	       Tz = ii[WS(iis, 4)];
+	       TA = ii[WS(iis, 12)];
+	       T1x = Tz + TA;
+	       T9 = KP2_000000000 * (T7 + T8);
+	       T2c = KP1_414213562 * (T1w + T1x);
+	       TB = KP2_000000000 * (Tz - TA);
+	       T1y = KP1_414213562 * (T1w - T1x);
+	  }
+	  {
+	       E T5, T1u, T3, T1s;
+	       {
+		    E T4, T1t, T1, T2;
+		    T4 = ri[WS(ris, 8)];
+		    T5 = KP2_000000000 * T4;
+		    T1t = ii[WS(iis, 8)];
+		    T1u = KP2_000000000 * T1t;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 16)];
+		    T3 = T1 + T2;
+		    T1s = T1 - T2;
+	       }
+	       T6 = T3 + T5;
+	       T2b = T1s + T1u;
+	       Ty = T3 - T5;
+	       T1v = T1s - T1u;
+	  }
+	  {
+	       E Td, T1A, TG, T1E, Tg, T1D, TJ, T1B;
+	       {
+		    E Tb, Tc, TE, TF;
+		    Tb = ri[WS(ris, 2)];
+		    Tc = ri[WS(ris, 14)];
+		    Td = Tb + Tc;
+		    T1A = Tb - Tc;
+		    TE = ii[WS(iis, 2)];
+		    TF = ii[WS(iis, 14)];
+		    TG = TE - TF;
+		    T1E = TE + TF;
+	       }
+	       {
+		    E Te, Tf, TH, TI;
+		    Te = ri[WS(ris, 10)];
+		    Tf = ri[WS(ris, 6)];
+		    Tg = Te + Tf;
+		    T1D = Te - Tf;
+		    TH = ii[WS(iis, 10)];
+		    TI = ii[WS(iis, 6)];
+		    TJ = TH - TI;
+		    T1B = TH + TI;
+	       }
+	       Th = KP2_000000000 * (Td + Tg);
+	       T2e = T1A + T1B;
+	       T2f = T1E - T1D;
+	       TD = Td - Tg;
+	       TK = TG - TJ;
+	       T1C = T1A - T1B;
+	       T1F = T1D + T1E;
+	       T1h = KP2_000000000 * (TJ + TG);
+	  }
+	  {
+	       E Tl, T1I, TZ, T1X, To, T1W, T12, T1J;
+	       {
+		    E Tj, Tk, TX, TY;
+		    Tj = ri[WS(ris, 1)];
+		    Tk = ri[WS(ris, 15)];
+		    Tl = Tj + Tk;
+		    T1I = Tj - Tk;
+		    TX = ii[WS(iis, 1)];
+		    TY = ii[WS(iis, 15)];
+		    TZ = TX - TY;
+		    T1X = TX + TY;
+	       }
+	       {
+		    E Tm, Tn, T10, T11;
+		    Tm = ri[WS(ris, 9)];
+		    Tn = ri[WS(ris, 7)];
+		    To = Tm + Tn;
+		    T1W = Tm - Tn;
+		    T10 = ii[WS(iis, 9)];
+		    T11 = ii[WS(iis, 7)];
+		    T12 = T10 - T11;
+		    T1J = T10 + T11;
+	       }
+	       Tp = Tl + To;
+	       T2i = T1I + T1J;
+	       T2m = T1X - T1W;
+	       TN = Tl - To;
+	       T13 = TZ - T12;
+	       T1K = T1I - T1J;
+	       T1Y = T1W + T1X;
+	       T1k = T12 + TZ;
+	  }
+	  {
+	       E Ts, T1L, TT, T1M, Tv, T1O, TQ, T1P;
+	       {
+		    E Tq, Tr, TR, TS;
+		    Tq = ri[WS(ris, 5)];
+		    Tr = ri[WS(ris, 11)];
+		    Ts = Tq + Tr;
+		    T1L = Tq - Tr;
+		    TR = ii[WS(iis, 5)];
+		    TS = ii[WS(iis, 11)];
+		    TT = TR - TS;
+		    T1M = TR + TS;
+	       }
+	       {
+		    E Tt, Tu, TO, TP;
+		    Tt = ri[WS(ris, 3)];
+		    Tu = ri[WS(ris, 13)];
+		    Tv = Tt + Tu;
+		    T1O = Tt - Tu;
+		    TO = ii[WS(iis, 13)];
+		    TP = ii[WS(iis, 3)];
+		    TQ = TO - TP;
+		    T1P = TP + TO;
+	       }
+	       Tw = Ts + Tv;
+	       TU = TQ - TT;
+	       T1l = TT + TQ;
+	       TW = Ts - Tv;
+	       {
+		    E T1T, T1U, T1N, T1Q;
+		    T1T = T1L + T1M;
+		    T1U = T1O + T1P;
+		    T1V = KP707106781 * (T1T - T1U);
+		    T2j = KP707106781 * (T1T + T1U);
+		    T1N = T1L - T1M;
+		    T1Q = T1O - T1P;
+		    T1R = KP707106781 * (T1N + T1Q);
+		    T2l = KP707106781 * (T1N - T1Q);
+	       }
+	  }
+	  {
+	       E Tx, T1r, Ti, T1q, Ta;
+	       Tx = KP2_000000000 * (Tp + Tw);
+	       T1r = KP2_000000000 * (T1l + T1k);
+	       Ta = T6 + T9;
+	       Ti = Ta + Th;
+	       T1q = Ta - Th;
+	       O[WS(os, 16)] = Ti - Tx;
+	       O[WS(os, 24)] = T1q + T1r;
+	       O[0] = Ti + Tx;
+	       O[WS(os, 8)] = T1q - T1r;
+	  }
+	  {
+	       E T1i, T1o, T1n, T1p, T1g, T1j, T1m;
+	       T1g = T6 - T9;
+	       T1i = T1g - T1h;
+	       T1o = T1g + T1h;
+	       T1j = Tp - Tw;
+	       T1m = T1k - T1l;
+	       T1n = KP1_414213562 * (T1j - T1m);
+	       T1p = KP1_414213562 * (T1j + T1m);
+	       O[WS(os, 20)] = T1i - T1n;
+	       O[WS(os, 28)] = T1o + T1p;
+	       O[WS(os, 4)] = T1i + T1n;
+	       O[WS(os, 12)] = T1o - T1p;
+	  }
+	  {
+	       E TM, T16, T15, T17;
+	       {
+		    E TC, TL, TV, T14;
+		    TC = Ty - TB;
+		    TL = KP1_414213562 * (TD - TK);
+		    TM = TC + TL;
+		    T16 = TC - TL;
+		    TV = TN + TU;
+		    T14 = TW + T13;
+		    T15 = FNMS(KP765366864, T14, KP1_847759065 * TV);
+		    T17 = FMA(KP765366864, TV, KP1_847759065 * T14);
+	       }
+	       O[WS(os, 18)] = TM - T15;
+	       O[WS(os, 26)] = T16 + T17;
+	       O[WS(os, 2)] = TM + T15;
+	       O[WS(os, 10)] = T16 - T17;
+	  }
+	  {
+	       E T2t, T2x, T2w, T2y;
+	       {
+		    E T2r, T2s, T2u, T2v;
+		    T2r = T2b + T2c;
+		    T2s = FMA(KP1_847759065, T2e, KP765366864 * T2f);
+		    T2t = T2r - T2s;
+		    T2x = T2r + T2s;
+		    T2u = T2i + T2j;
+		    T2v = T2m - T2l;
+		    T2w = FNMS(KP1_961570560, T2v, KP390180644 * T2u);
+		    T2y = FMA(KP1_961570560, T2u, KP390180644 * T2v);
+	       }
+	       O[WS(os, 23)] = T2t - T2w;
+	       O[WS(os, 31)] = T2x + T2y;
+	       O[WS(os, 7)] = T2t + T2w;
+	       O[WS(os, 15)] = T2x - T2y;
+	  }
+	  {
+	       E T1a, T1e, T1d, T1f;
+	       {
+		    E T18, T19, T1b, T1c;
+		    T18 = Ty + TB;
+		    T19 = KP1_414213562 * (TD + TK);
+		    T1a = T18 - T19;
+		    T1e = T18 + T19;
+		    T1b = TN - TU;
+		    T1c = T13 - TW;
+		    T1d = FNMS(KP1_847759065, T1c, KP765366864 * T1b);
+		    T1f = FMA(KP1_847759065, T1b, KP765366864 * T1c);
+	       }
+	       O[WS(os, 22)] = T1a - T1d;
+	       O[WS(os, 30)] = T1e + T1f;
+	       O[WS(os, 6)] = T1a + T1d;
+	       O[WS(os, 14)] = T1e - T1f;
+	  }
+	  {
+	       E T25, T29, T28, T2a;
+	       {
+		    E T23, T24, T26, T27;
+		    T23 = T1v - T1y;
+		    T24 = FMA(KP765366864, T1C, KP1_847759065 * T1F);
+		    T25 = T23 - T24;
+		    T29 = T23 + T24;
+		    T26 = T1K - T1R;
+		    T27 = T1Y - T1V;
+		    T28 = FNMS(KP1_662939224, T27, KP1_111140466 * T26);
+		    T2a = FMA(KP1_662939224, T26, KP1_111140466 * T27);
+	       }
+	       O[WS(os, 21)] = T25 - T28;
+	       O[WS(os, 29)] = T29 + T2a;
+	       O[WS(os, 5)] = T25 + T28;
+	       O[WS(os, 13)] = T29 - T2a;
+	  }
+	  {
+	       E T2h, T2p, T2o, T2q;
+	       {
+		    E T2d, T2g, T2k, T2n;
+		    T2d = T2b - T2c;
+		    T2g = FNMS(KP1_847759065, T2f, KP765366864 * T2e);
+		    T2h = T2d + T2g;
+		    T2p = T2d - T2g;
+		    T2k = T2i - T2j;
+		    T2n = T2l + T2m;
+		    T2o = FNMS(KP1_111140466, T2n, KP1_662939224 * T2k);
+		    T2q = FMA(KP1_111140466, T2k, KP1_662939224 * T2n);
+	       }
+	       O[WS(os, 19)] = T2h - T2o;
+	       O[WS(os, 27)] = T2p + T2q;
+	       O[WS(os, 3)] = T2h + T2o;
+	       O[WS(os, 11)] = T2p - T2q;
+	  }
+	  {
+	       E T1H, T21, T20, T22;
+	       {
+		    E T1z, T1G, T1S, T1Z;
+		    T1z = T1v + T1y;
+		    T1G = FNMS(KP765366864, T1F, KP1_847759065 * T1C);
+		    T1H = T1z + T1G;
+		    T21 = T1z - T1G;
+		    T1S = T1K + T1R;
+		    T1Z = T1V + T1Y;
+		    T20 = FNMS(KP390180644, T1Z, KP1_961570560 * T1S);
+		    T22 = FMA(KP390180644, T1S, KP1_961570560 * T1Z);
+	       }
+	       O[WS(os, 17)] = T1H - T20;
+	       O[WS(os, 25)] = T21 + T22;
+	       O[WS(os, 1)] = T1H + T20;
+	       O[WS(os, 9)] = T21 - T22;
+	  }
+     }
+}
+
+static void mhc2r_32(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mhc2r_32_0(ri, ii, O, ris, iis, os);
+	  ri += ivs;
+	  ii += ivs;
+	  O += ovs;
+     }
+}
+
+static const khc2r_desc desc = { 32, "mhc2r_32", {140, 34, 16, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mhc2r_32) (planner *p) {
+     X(khc2r_register) (p, mhc2r_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2r/mhc2r_64.c b/src/fftw3/rdft/codelets/hc2r/mhc2r_64.c
new file mode 100644
index 0000000..48fb66f
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2r/mhc2r_64.c
@@ -0,0 +1,733 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:11:18 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2r_noinline -compact -variables 4 -sign 1 -n 64 -name mhc2r_64 -include hc2r.h */
+
+/*
+ * This function contains 394 FP additions, 134 FP multiplications,
+ * (or, 342 additions, 82 multiplications, 52 fused multiply/add),
+ * 109 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mhc2r_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2r_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ * $Id: mhc2r_64.c,v 1.1 2008/10/17 06:12:08 scuri Exp $
+ */
+
+#include "hc2r.h"
+
+static void mhc2r_64_0(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os)
+{
+     DK(KP1_268786568, +1.268786568327290996430343226450986741351374190);
+     DK(KP1_546020906, +1.546020906725473921621813219516939601942082586);
+     DK(KP196034280, +0.196034280659121203988391127777283691722273346);
+     DK(KP1_990369453, +1.990369453344393772489673906218959843150949737);
+     DK(KP942793473, +0.942793473651995297112775251810508755314920638);
+     DK(KP1_763842528, +1.763842528696710059425513727320776699016885241);
+     DK(KP580569354, +0.580569354508924735272384751634790549382952557);
+     DK(KP1_913880671, +1.913880671464417729871595773960539938965698411);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     {
+	  E Ta, T2S, T18, T2u, T3F, T4V, T5l, T61, Th, T2T, T1h, T2v, T3M, T4W, T5o;
+	  E T62, T3Q, T5q, T5u, T44, Tp, Tw, T2V, T2W, T2X, T2Y, T3X, T5t, T1r, T2x;
+	  E T41, T5r, T1A, T2y, T4a, T5y, T5N, T4H, TN, T31, T4E, T5z, T39, T3q, T1L;
+	  E T2B, T4h, T5M, T2h, T2F, T12, T36, T5D, T5J, T5G, T5K, T1U, T26, T23, T27;
+	  E T4p, T4z, T4w, T4A, T34, T3r;
+	  {
+	       E T5, T3A, T3, T3y, T9, T3C, T17, T3D, T6, T14;
+	       {
+		    E T4, T3z, T1, T2;
+		    T4 = ri[WS(ris, 16)];
+		    T5 = KP2_000000000 * T4;
+		    T3z = ii[WS(iis, 16)];
+		    T3A = KP2_000000000 * T3z;
+		    T1 = ri[0];
+		    T2 = ri[WS(ris, 32)];
+		    T3 = T1 + T2;
+		    T3y = T1 - T2;
+		    {
+			 E T7, T8, T15, T16;
+			 T7 = ri[WS(ris, 8)];
+			 T8 = ri[WS(ris, 24)];
+			 T9 = KP2_000000000 * (T7 + T8);
+			 T3C = T7 - T8;
+			 T15 = ii[WS(iis, 8)];
+			 T16 = ii[WS(iis, 24)];
+			 T17 = KP2_000000000 * (T15 - T16);
+			 T3D = T15 + T16;
+		    }
+	       }
+	       T6 = T3 + T5;
+	       Ta = T6 + T9;
+	       T2S = T6 - T9;
+	       T14 = T3 - T5;
+	       T18 = T14 - T17;
+	       T2u = T14 + T17;
+	       {
+		    E T3B, T3E, T5j, T5k;
+		    T3B = T3y - T3A;
+		    T3E = KP1_414213562 * (T3C - T3D);
+		    T3F = T3B + T3E;
+		    T4V = T3B - T3E;
+		    T5j = T3y + T3A;
+		    T5k = KP1_414213562 * (T3C + T3D);
+		    T5l = T5j - T5k;
+		    T61 = T5j + T5k;
+	       }
+	  }
+	  {
+	       E Td, T3G, T1c, T3K, Tg, T3J, T1f, T3H, T19, T1g;
+	       {
+		    E Tb, Tc, T1a, T1b;
+		    Tb = ri[WS(ris, 4)];
+		    Tc = ri[WS(ris, 28)];
+		    Td = Tb + Tc;
+		    T3G = Tb - Tc;
+		    T1a = ii[WS(iis, 4)];
+		    T1b = ii[WS(iis, 28)];
+		    T1c = T1a - T1b;
+		    T3K = T1a + T1b;
+	       }
+	       {
+		    E Te, Tf, T1d, T1e;
+		    Te = ri[WS(ris, 20)];
+		    Tf = ri[WS(ris, 12)];
+		    Tg = Te + Tf;
+		    T3J = Te - Tf;
+		    T1d = ii[WS(iis, 20)];
+		    T1e = ii[WS(iis, 12)];
+		    T1f = T1d - T1e;
+		    T3H = T1d + T1e;
+	       }
+	       Th = KP2_000000000 * (Td + Tg);
+	       T2T = KP2_000000000 * (T1f + T1c);
+	       T19 = Td - Tg;
+	       T1g = T1c - T1f;
+	       T1h = KP1_414213562 * (T19 - T1g);
+	       T2v = KP1_414213562 * (T19 + T1g);
+	       {
+		    E T3I, T3L, T5m, T5n;
+		    T3I = T3G - T3H;
+		    T3L = T3J + T3K;
+		    T3M = FNMS(KP765366864, T3L, KP1_847759065 * T3I);
+		    T4W = FMA(KP765366864, T3I, KP1_847759065 * T3L);
+		    T5m = T3G + T3H;
+		    T5n = T3K - T3J;
+		    T5o = FNMS(KP1_847759065, T5n, KP765366864 * T5m);
+		    T62 = FMA(KP1_847759065, T5m, KP765366864 * T5n);
+	       }
+	  }
+	  {
+	       E Tl, T3O, T1v, T43, To, T42, T1y, T3P, Ts, T3R, T1p, T3S, Tv, T3U, T1m;
+	       E T3V;
+	       {
+		    E Tj, Tk, T1t, T1u;
+		    Tj = ri[WS(ris, 2)];
+		    Tk = ri[WS(ris, 30)];
+		    Tl = Tj + Tk;
+		    T3O = Tj - Tk;
+		    T1t = ii[WS(iis, 2)];
+		    T1u = ii[WS(iis, 30)];
+		    T1v = T1t - T1u;
+		    T43 = T1t + T1u;
+	       }
+	       {
+		    E Tm, Tn, T1w, T1x;
+		    Tm = ri[WS(ris, 18)];
+		    Tn = ri[WS(ris, 14)];
+		    To = Tm + Tn;
+		    T42 = Tm - Tn;
+		    T1w = ii[WS(iis, 18)];
+		    T1x = ii[WS(iis, 14)];
+		    T1y = T1w - T1x;
+		    T3P = T1w + T1x;
+	       }
+	       {
+		    E Tq, Tr, T1n, T1o;
+		    Tq = ri[WS(ris, 10)];
+		    Tr = ri[WS(ris, 22)];
+		    Ts = Tq + Tr;
+		    T3R = Tq - Tr;
+		    T1n = ii[WS(iis, 10)];
+		    T1o = ii[WS(iis, 22)];
+		    T1p = T1n - T1o;
+		    T3S = T1n + T1o;
+	       }
+	       {
+		    E Tt, Tu, T1k, T1l;
+		    Tt = ri[WS(ris, 6)];
+		    Tu = ri[WS(ris, 26)];
+		    Tv = Tt + Tu;
+		    T3U = Tt - Tu;
+		    T1k = ii[WS(iis, 26)];
+		    T1l = ii[WS(iis, 6)];
+		    T1m = T1k - T1l;
+		    T3V = T1l + T1k;
+	       }
+	       T3Q = T3O - T3P;
+	       T5q = T3O + T3P;
+	       T5u = T43 - T42;
+	       T44 = T42 + T43;
+	       Tp = Tl + To;
+	       Tw = Ts + Tv;
+	       T2V = Tp - Tw;
+	       {
+		    E T3T, T3W, T1j, T1q;
+		    T2W = T1y + T1v;
+		    T2X = T1p + T1m;
+		    T2Y = T2W - T2X;
+		    T3T = T3R - T3S;
+		    T3W = T3U - T3V;
+		    T3X = KP707106781 * (T3T + T3W);
+		    T5t = KP707106781 * (T3T - T3W);
+		    T1j = Tl - To;
+		    T1q = T1m - T1p;
+		    T1r = T1j + T1q;
+		    T2x = T1j - T1q;
+		    {
+			 E T3Z, T40, T1s, T1z;
+			 T3Z = T3R + T3S;
+			 T40 = T3U + T3V;
+			 T41 = KP707106781 * (T3Z - T40);
+			 T5r = KP707106781 * (T3Z + T40);
+			 T1s = Ts - Tv;
+			 T1z = T1v - T1y;
+			 T1A = T1s + T1z;
+			 T2y = T1z - T1s;
+		    }
+	       }
+	  }
+	  {
+	       E TB, T48, T2c, T4G, TE, T4F, T2f, T49, TI, T4b, T1J, T4c, TL, T4e, T1G;
+	       E T4f;
+	       {
+		    E Tz, TA, T2a, T2b;
+		    Tz = ri[WS(ris, 1)];
+		    TA = ri[WS(ris, 31)];
+		    TB = Tz + TA;
+		    T48 = Tz - TA;
+		    T2a = ii[WS(iis, 1)];
+		    T2b = ii[WS(iis, 31)];
+		    T2c = T2a - T2b;
+		    T4G = T2a + T2b;
+	       }
+	       {
+		    E TC, TD, T2d, T2e;
+		    TC = ri[WS(ris, 17)];
+		    TD = ri[WS(ris, 15)];
+		    TE = TC + TD;
+		    T4F = TC - TD;
+		    T2d = ii[WS(iis, 17)];
+		    T2e = ii[WS(iis, 15)];
+		    T2f = T2d - T2e;
+		    T49 = T2d + T2e;
+	       }
+	       {
+		    E TG, TH, T1H, T1I;
+		    TG = ri[WS(ris, 9)];
+		    TH = ri[WS(ris, 23)];
+		    TI = TG + TH;
+		    T4b = TG - TH;
+		    T1H = ii[WS(iis, 9)];
+		    T1I = ii[WS(iis, 23)];
+		    T1J = T1H - T1I;
+		    T4c = T1H + T1I;
+	       }
+	       {
+		    E TJ, TK, T1E, T1F;
+		    TJ = ri[WS(ris, 7)];
+		    TK = ri[WS(ris, 25)];
+		    TL = TJ + TK;
+		    T4e = TJ - TK;
+		    T1E = ii[WS(iis, 25)];
+		    T1F = ii[WS(iis, 7)];
+		    T1G = T1E - T1F;
+		    T4f = T1F + T1E;
+	       }
+	       {
+		    E TF, TM, T1D, T1K;
+		    T4a = T48 - T49;
+		    T5y = T48 + T49;
+		    T5N = T4G - T4F;
+		    T4H = T4F + T4G;
+		    TF = TB + TE;
+		    TM = TI + TL;
+		    TN = TF + TM;
+		    T31 = TF - TM;
+		    {
+			 E T4C, T4D, T37, T38;
+			 T4C = T4b + T4c;
+			 T4D = T4e + T4f;
+			 T4E = KP707106781 * (T4C - T4D);
+			 T5z = KP707106781 * (T4C + T4D);
+			 T37 = T2f + T2c;
+			 T38 = T1J + T1G;
+			 T39 = T37 - T38;
+			 T3q = T38 + T37;
+		    }
+		    T1D = TB - TE;
+		    T1K = T1G - T1J;
+		    T1L = T1D + T1K;
+		    T2B = T1D - T1K;
+		    {
+			 E T4d, T4g, T29, T2g;
+			 T4d = T4b - T4c;
+			 T4g = T4e - T4f;
+			 T4h = KP707106781 * (T4d + T4g);
+			 T5M = KP707106781 * (T4d - T4g);
+			 T29 = TI - TL;
+			 T2g = T2c - T2f;
+			 T2h = T29 + T2g;
+			 T2F = T2g - T29;
+		    }
+	       }
+	  }
+	  {
+	       E TQ, T4j, T1P, T4n, TT, T4m, T1S, T4k, TX, T4q, T1Y, T4u, T10, T4t, T21;
+	       E T4r;
+	       {
+		    E TO, TP, T1N, T1O;
+		    TO = ri[WS(ris, 5)];
+		    TP = ri[WS(ris, 27)];
+		    TQ = TO + TP;
+		    T4j = TO - TP;
+		    T1N = ii[WS(iis, 5)];
+		    T1O = ii[WS(iis, 27)];
+		    T1P = T1N - T1O;
+		    T4n = T1N + T1O;
+	       }
+	       {
+		    E TR, TS, T1Q, T1R;
+		    TR = ri[WS(ris, 21)];
+		    TS = ri[WS(ris, 11)];
+		    TT = TR + TS;
+		    T4m = TR - TS;
+		    T1Q = ii[WS(iis, 21)];
+		    T1R = ii[WS(iis, 11)];
+		    T1S = T1Q - T1R;
+		    T4k = T1Q + T1R;
+	       }
+	       {
+		    E TV, TW, T1W, T1X;
+		    TV = ri[WS(ris, 3)];
+		    TW = ri[WS(ris, 29)];
+		    TX = TV + TW;
+		    T4q = TV - TW;
+		    T1W = ii[WS(iis, 29)];
+		    T1X = ii[WS(iis, 3)];
+		    T1Y = T1W - T1X;
+		    T4u = T1X + T1W;
+	       }
+	       {
+		    E TY, TZ, T1Z, T20;
+		    TY = ri[WS(ris, 13)];
+		    TZ = ri[WS(ris, 19)];
+		    T10 = TY + TZ;
+		    T4t = TY - TZ;
+		    T1Z = ii[WS(iis, 13)];
+		    T20 = ii[WS(iis, 19)];
+		    T21 = T1Z - T20;
+		    T4r = T1Z + T20;
+	       }
+	       {
+		    E TU, T11, T5B, T5C;
+		    TU = TQ + TT;
+		    T11 = TX + T10;
+		    T12 = TU + T11;
+		    T36 = TU - T11;
+		    T5B = T4j + T4k;
+		    T5C = T4n - T4m;
+		    T5D = FNMS(KP923879532, T5C, KP382683432 * T5B);
+		    T5J = FMA(KP923879532, T5B, KP382683432 * T5C);
+	       }
+	       {
+		    E T5E, T5F, T1M, T1T;
+		    T5E = T4q + T4r;
+		    T5F = T4t + T4u;
+		    T5G = FNMS(KP923879532, T5F, KP382683432 * T5E);
+		    T5K = FMA(KP923879532, T5E, KP382683432 * T5F);
+		    T1M = TQ - TT;
+		    T1T = T1P - T1S;
+		    T1U = T1M - T1T;
+		    T26 = T1M + T1T;
+	       }
+	       {
+		    E T1V, T22, T4l, T4o;
+		    T1V = TX - T10;
+		    T22 = T1Y - T21;
+		    T23 = T1V + T22;
+		    T27 = T22 - T1V;
+		    T4l = T4j - T4k;
+		    T4o = T4m + T4n;
+		    T4p = FNMS(KP382683432, T4o, KP923879532 * T4l);
+		    T4z = FMA(KP382683432, T4l, KP923879532 * T4o);
+	       }
+	       {
+		    E T4s, T4v, T32, T33;
+		    T4s = T4q - T4r;
+		    T4v = T4t - T4u;
+		    T4w = FMA(KP923879532, T4s, KP382683432 * T4v);
+		    T4A = FNMS(KP382683432, T4s, KP923879532 * T4v);
+		    T32 = T21 + T1Y;
+		    T33 = T1S + T1P;
+		    T34 = T32 - T33;
+		    T3r = T33 + T32;
+	       }
+	  }
+	  {
+	       E T13, T3x, Ty, T3w, Ti, Tx;
+	       T13 = KP2_000000000 * (TN + T12);
+	       T3x = KP2_000000000 * (T3r + T3q);
+	       Ti = Ta + Th;
+	       Tx = KP2_000000000 * (Tp + Tw);
+	       Ty = Ti + Tx;
+	       T3w = Ti - Tx;
+	       O[WS(os, 32)] = Ty - T13;
+	       O[WS(os, 48)] = T3w + T3x;
+	       O[0] = Ty + T13;
+	       O[WS(os, 16)] = T3w - T3x;
+	  }
+	  {
+	       E T3g, T3k, T3j, T3l;
+	       {
+		    E T3e, T3f, T3h, T3i;
+		    T3e = T2S + T2T;
+		    T3f = KP1_414213562 * (T2V + T2Y);
+		    T3g = T3e - T3f;
+		    T3k = T3e + T3f;
+		    T3h = T31 - T34;
+		    T3i = T39 - T36;
+		    T3j = FNMS(KP1_847759065, T3i, KP765366864 * T3h);
+		    T3l = FMA(KP1_847759065, T3h, KP765366864 * T3i);
+	       }
+	       O[WS(os, 44)] = T3g - T3j;
+	       O[WS(os, 60)] = T3k + T3l;
+	       O[WS(os, 12)] = T3g + T3j;
+	       O[WS(os, 28)] = T3k - T3l;
+	  }
+	  {
+	       E T3o, T3u, T3t, T3v;
+	       {
+		    E T3m, T3n, T3p, T3s;
+		    T3m = Ta - Th;
+		    T3n = KP2_000000000 * (T2X + T2W);
+		    T3o = T3m - T3n;
+		    T3u = T3m + T3n;
+		    T3p = TN - T12;
+		    T3s = T3q - T3r;
+		    T3t = KP1_414213562 * (T3p - T3s);
+		    T3v = KP1_414213562 * (T3p + T3s);
+	       }
+	       O[WS(os, 40)] = T3o - T3t;
+	       O[WS(os, 56)] = T3u + T3v;
+	       O[WS(os, 8)] = T3o + T3t;
+	       O[WS(os, 24)] = T3u - T3v;
+	  }
+	  {
+	       E T30, T3c, T3b, T3d;
+	       {
+		    E T2U, T2Z, T35, T3a;
+		    T2U = T2S - T2T;
+		    T2Z = KP1_414213562 * (T2V - T2Y);
+		    T30 = T2U + T2Z;
+		    T3c = T2U - T2Z;
+		    T35 = T31 + T34;
+		    T3a = T36 + T39;
+		    T3b = FNMS(KP765366864, T3a, KP1_847759065 * T35);
+		    T3d = FMA(KP765366864, T35, KP1_847759065 * T3a);
+	       }
+	       O[WS(os, 36)] = T30 - T3b;
+	       O[WS(os, 52)] = T3c + T3d;
+	       O[WS(os, 4)] = T30 + T3b;
+	       O[WS(os, 20)] = T3c - T3d;
+	  }
+	  {
+	       E T25, T2p, T2i, T2q, T1C, T2k, T2o, T2s, T24, T28;
+	       T24 = KP707106781 * (T1U + T23);
+	       T25 = T1L + T24;
+	       T2p = T1L - T24;
+	       T28 = KP707106781 * (T26 + T27);
+	       T2i = T28 + T2h;
+	       T2q = T2h - T28;
+	       {
+		    E T1i, T1B, T2m, T2n;
+		    T1i = T18 + T1h;
+		    T1B = FNMS(KP765366864, T1A, KP1_847759065 * T1r);
+		    T1C = T1i + T1B;
+		    T2k = T1i - T1B;
+		    T2m = T18 - T1h;
+		    T2n = FMA(KP765366864, T1r, KP1_847759065 * T1A);
+		    T2o = T2m - T2n;
+		    T2s = T2m + T2n;
+	       }
+	       {
+		    E T2j, T2t, T2l, T2r;
+		    T2j = FNMS(KP390180644, T2i, KP1_961570560 * T25);
+		    O[WS(os, 34)] = T1C - T2j;
+		    O[WS(os, 2)] = T1C + T2j;
+		    T2t = FMA(KP1_662939224, T2p, KP1_111140466 * T2q);
+		    O[WS(os, 26)] = T2s - T2t;
+		    O[WS(os, 58)] = T2s + T2t;
+		    T2l = FMA(KP390180644, T25, KP1_961570560 * T2i);
+		    O[WS(os, 18)] = T2k - T2l;
+		    O[WS(os, 50)] = T2k + T2l;
+		    T2r = FNMS(KP1_662939224, T2q, KP1_111140466 * T2p);
+		    O[WS(os, 42)] = T2o - T2r;
+		    O[WS(os, 10)] = T2o + T2r;
+	       }
+	  }
+	  {
+	       E T2D, T2N, T2G, T2O, T2A, T2I, T2M, T2Q, T2C, T2E;
+	       T2C = KP707106781 * (T27 - T26);
+	       T2D = T2B + T2C;
+	       T2N = T2B - T2C;
+	       T2E = KP707106781 * (T1U - T23);
+	       T2G = T2E + T2F;
+	       T2O = T2F - T2E;
+	       {
+		    E T2w, T2z, T2K, T2L;
+		    T2w = T2u - T2v;
+		    T2z = FNMS(KP1_847759065, T2y, KP765366864 * T2x);
+		    T2A = T2w + T2z;
+		    T2I = T2w - T2z;
+		    T2K = T2u + T2v;
+		    T2L = FMA(KP1_847759065, T2x, KP765366864 * T2y);
+		    T2M = T2K - T2L;
+		    T2Q = T2K + T2L;
+	       }
+	       {
+		    E T2H, T2R, T2J, T2P;
+		    T2H = FNMS(KP1_111140466, T2G, KP1_662939224 * T2D);
+		    O[WS(os, 38)] = T2A - T2H;
+		    O[WS(os, 6)] = T2A + T2H;
+		    T2R = FMA(KP1_961570560, T2N, KP390180644 * T2O);
+		    O[WS(os, 30)] = T2Q - T2R;
+		    O[WS(os, 62)] = T2Q + T2R;
+		    T2J = FMA(KP1_111140466, T2D, KP1_662939224 * T2G);
+		    O[WS(os, 22)] = T2I - T2J;
+		    O[WS(os, 54)] = T2I + T2J;
+		    T2P = FNMS(KP1_961570560, T2O, KP390180644 * T2N);
+		    O[WS(os, 46)] = T2M - T2P;
+		    O[WS(os, 14)] = T2M + T2P;
+	       }
+	  }
+	  {
+	       E T5p, T5T, T5w, T5U, T5I, T5W, T5P, T5X, T5s, T5v;
+	       T5p = T5l + T5o;
+	       T5T = T5l - T5o;
+	       T5s = T5q - T5r;
+	       T5v = T5t + T5u;
+	       T5w = FNMS(KP1_111140466, T5v, KP1_662939224 * T5s);
+	       T5U = FMA(KP1_111140466, T5s, KP1_662939224 * T5v);
+	       {
+		    E T5A, T5H, T5L, T5O;
+		    T5A = T5y - T5z;
+		    T5H = T5D + T5G;
+		    T5I = T5A + T5H;
+		    T5W = T5A - T5H;
+		    T5L = T5J - T5K;
+		    T5O = T5M + T5N;
+		    T5P = T5L + T5O;
+		    T5X = T5O - T5L;
+	       }
+	       {
+		    E T5x, T5Q, T5Z, T60;
+		    T5x = T5p + T5w;
+		    T5Q = FNMS(KP580569354, T5P, KP1_913880671 * T5I);
+		    O[WS(os, 35)] = T5x - T5Q;
+		    O[WS(os, 3)] = T5x + T5Q;
+		    T5Z = T5T + T5U;
+		    T60 = FMA(KP1_763842528, T5W, KP942793473 * T5X);
+		    O[WS(os, 27)] = T5Z - T60;
+		    O[WS(os, 59)] = T5Z + T60;
+	       }
+	       {
+		    E T5R, T5S, T5V, T5Y;
+		    T5R = T5p - T5w;
+		    T5S = FMA(KP580569354, T5I, KP1_913880671 * T5P);
+		    O[WS(os, 19)] = T5R - T5S;
+		    O[WS(os, 51)] = T5R + T5S;
+		    T5V = T5T - T5U;
+		    T5Y = FNMS(KP1_763842528, T5X, KP942793473 * T5W);
+		    O[WS(os, 43)] = T5V - T5Y;
+		    O[WS(os, 11)] = T5V + T5Y;
+	       }
+	  }
+	  {
+	       E T3N, T4N, T46, T4O, T4y, T4Q, T4J, T4R, T3Y, T45;
+	       T3N = T3F + T3M;
+	       T4N = T3F - T3M;
+	       T3Y = T3Q + T3X;
+	       T45 = T41 + T44;
+	       T46 = FNMS(KP390180644, T45, KP1_961570560 * T3Y);
+	       T4O = FMA(KP390180644, T3Y, KP1_961570560 * T45);
+	       {
+		    E T4i, T4x, T4B, T4I;
+		    T4i = T4a + T4h;
+		    T4x = T4p + T4w;
+		    T4y = T4i + T4x;
+		    T4Q = T4i - T4x;
+		    T4B = T4z + T4A;
+		    T4I = T4E + T4H;
+		    T4J = T4B + T4I;
+		    T4R = T4I - T4B;
+	       }
+	       {
+		    E T47, T4K, T4T, T4U;
+		    T47 = T3N + T46;
+		    T4K = FNMS(KP196034280, T4J, KP1_990369453 * T4y);
+		    O[WS(os, 33)] = T47 - T4K;
+		    O[WS(os, 1)] = T47 + T4K;
+		    T4T = T4N + T4O;
+		    T4U = FMA(KP1_546020906, T4Q, KP1_268786568 * T4R);
+		    O[WS(os, 25)] = T4T - T4U;
+		    O[WS(os, 57)] = T4T + T4U;
+	       }
+	       {
+		    E T4L, T4M, T4P, T4S;
+		    T4L = T3N - T46;
+		    T4M = FMA(KP196034280, T4y, KP1_990369453 * T4J);
+		    O[WS(os, 17)] = T4L - T4M;
+		    O[WS(os, 49)] = T4L + T4M;
+		    T4P = T4N - T4O;
+		    T4S = FNMS(KP1_546020906, T4R, KP1_268786568 * T4Q);
+		    O[WS(os, 41)] = T4P - T4S;
+		    O[WS(os, 9)] = T4P + T4S;
+	       }
+	  }
+	  {
+	       E T63, T6h, T66, T6i, T6a, T6k, T6d, T6l, T64, T65;
+	       T63 = T61 - T62;
+	       T6h = T61 + T62;
+	       T64 = T5q + T5r;
+	       T65 = T5u - T5t;
+	       T66 = FNMS(KP1_961570560, T65, KP390180644 * T64);
+	       T6i = FMA(KP1_961570560, T64, KP390180644 * T65);
+	       {
+		    E T68, T69, T6b, T6c;
+		    T68 = T5y + T5z;
+		    T69 = T5J + T5K;
+		    T6a = T68 - T69;
+		    T6k = T68 + T69;
+		    T6b = T5D - T5G;
+		    T6c = T5N - T5M;
+		    T6d = T6b + T6c;
+		    T6l = T6c - T6b;
+	       }
+	       {
+		    E T67, T6e, T6n, T6o;
+		    T67 = T63 + T66;
+		    T6e = FNMS(KP1_268786568, T6d, KP1_546020906 * T6a);
+		    O[WS(os, 39)] = T67 - T6e;
+		    O[WS(os, 7)] = T67 + T6e;
+		    T6n = T6h + T6i;
+		    T6o = FMA(KP1_990369453, T6k, KP196034280 * T6l);
+		    O[WS(os, 31)] = T6n - T6o;
+		    O[WS(os, 63)] = T6n + T6o;
+	       }
+	       {
+		    E T6f, T6g, T6j, T6m;
+		    T6f = T63 - T66;
+		    T6g = FMA(KP1_268786568, T6a, KP1_546020906 * T6d);
+		    O[WS(os, 23)] = T6f - T6g;
+		    O[WS(os, 55)] = T6f + T6g;
+		    T6j = T6h - T6i;
+		    T6m = FNMS(KP1_990369453, T6l, KP196034280 * T6k);
+		    O[WS(os, 47)] = T6j - T6m;
+		    O[WS(os, 15)] = T6j + T6m;
+	       }
+	  }
+	  {
+	       E T4X, T5b, T50, T5c, T54, T5e, T57, T5f, T4Y, T4Z;
+	       T4X = T4V - T4W;
+	       T5b = T4V + T4W;
+	       T4Y = T3Q - T3X;
+	       T4Z = T44 - T41;
+	       T50 = FNMS(KP1_662939224, T4Z, KP1_111140466 * T4Y);
+	       T5c = FMA(KP1_662939224, T4Y, KP1_111140466 * T4Z);
+	       {
+		    E T52, T53, T55, T56;
+		    T52 = T4a - T4h;
+		    T53 = T4A - T4z;
+		    T54 = T52 + T53;
+		    T5e = T52 - T53;
+		    T55 = T4p - T4w;
+		    T56 = T4H - T4E;
+		    T57 = T55 + T56;
+		    T5f = T56 - T55;
+	       }
+	       {
+		    E T51, T58, T5h, T5i;
+		    T51 = T4X + T50;
+		    T58 = FNMS(KP942793473, T57, KP1_763842528 * T54);
+		    O[WS(os, 37)] = T51 - T58;
+		    O[WS(os, 5)] = T51 + T58;
+		    T5h = T5b + T5c;
+		    T5i = FMA(KP1_913880671, T5e, KP580569354 * T5f);
+		    O[WS(os, 29)] = T5h - T5i;
+		    O[WS(os, 61)] = T5h + T5i;
+	       }
+	       {
+		    E T59, T5a, T5d, T5g;
+		    T59 = T4X - T50;
+		    T5a = FMA(KP942793473, T54, KP1_763842528 * T57);
+		    O[WS(os, 21)] = T59 - T5a;
+		    O[WS(os, 53)] = T59 + T5a;
+		    T5d = T5b - T5c;
+		    T5g = FNMS(KP1_913880671, T5f, KP580569354 * T5e);
+		    O[WS(os, 45)] = T5d - T5g;
+		    O[WS(os, 13)] = T5d + T5g;
+	       }
+	  }
+     }
+}
+
+static void mhc2r_64(const R *ri, const R *ii, R *O, stride ris, stride iis, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mhc2r_64_0(ri, ii, O, ris, iis, os);
+	  ri += ivs;
+	  ii += ivs;
+	  O += ovs;
+     }
+}
+
+static const khc2r_desc desc = { 64, "mhc2r_64", {342, 82, 52, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mhc2r_64) (planner *p) {
+     X(khc2r_register) (p, mhc2r_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/hc2rIII.h b/src/fftw3/rdft/codelets/hc2rIII.h
new file mode 100644
index 0000000..f14d248
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hc2rIII.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_hc2rIII_genus)
+extern const khc2rIII_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/hf.h b/src/fftw3/rdft/codelets/hf.h
new file mode 100644
index 0000000..ff80d36
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hf.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_hf_genus)
+extern const hc2hc_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/hfb.c b/src/fftw3/rdft/codelets/hfb.c
new file mode 100644
index 0000000..93b2bb6
--- /dev/null
+++ b/src/fftw3/rdft/codelets/hfb.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "codelet-rdft.h"
+#include "hf.h"
+
+static int okp(const hc2hc_desc *d,
+	       const R *rio, const R *iio, 
+	       int ios, int vs, int m, int dist)
+{
+     UNUSED(rio); UNUSED(iio); UNUSED(m);
+     return (1
+	     && (!d->s1 || (d->s1 == ios))
+	     && (!d->s2 || (d->s2 == vs))
+	     && (!d->dist || (d->dist == dist))
+	  );
+}
+
+const hc2hc_genus GENUS = { okp, R2HC, 1 };
+
+#undef GENUS
+#include "hb.h"
+
+const hc2hc_genus GENUS = { okp, HC2R, 1 };
diff --git a/src/fftw3/rdft/codelets/r2hc.c b/src/fftw3/rdft/codelets/r2hc.c
new file mode 100644
index 0000000..1506868
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "codelet-rdft.h"
+#include "r2hc.h"
+
+static int okp(const kr2hc_desc *d,
+	       const R *I,
+	       const R *ro, const R *io,
+	       int is, int ros, int ios, int vl, int ivs, int ovs)
+{
+     UNUSED(I); UNUSED(ro); UNUSED(io); UNUSED(vl);
+     return (1
+	     && (!d->is || (d->is == is))
+	     && (!d->ros || (d->ros == ros))
+	     && (!d->ios || (d->ros == ios))
+	     && (!d->ivs || (d->ivs == ivs))
+	     && (!d->ovs || (d->ovs == ovs))
+	  );
+}
+
+const kr2hc_genus GENUS = { okp, R2HC, 1 };
+
+#undef GENUS
+#include "r2hcII.h"
+
+const kr2hcII_genus GENUS = { okp, R2HCII, 1 };
diff --git a/src/fftw3/rdft/codelets/r2hc.h b/src/fftw3/rdft/codelets/r2hc.h
new file mode 100644
index 0000000..6ec1e28
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_r2hc_genus)
+extern const kr2hc_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/r2hc/hf2_16.c b/src/fftw3/rdft/codelets/r2hc/hf2_16.c
new file mode 100644
index 0000000..47323ee
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf2_16.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:30 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -twiddle-log3 -n 16 -dit -name hf2_16 -include hf.h */
+
+/*
+ * This function contains 196 FP additions, 108 FP multiplications,
+ * (or, 156 additions, 68 multiplications, 40 fused multiply/add),
+ * 104 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf2_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf2_16(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8) {
+	  E T1, T3d, T18, T26, T29, T2R, Tq, T1r, T1E, T2k, T2g, T1O, Te, T3c, Tz;
+	  E T1P, T1S, T1T, T1U, TG, TL, T1V, T1Y, T1Z, T20, TT, TY, T1X, T1A, T2l;
+	  E T1J, T2h, T1h, T2b, T1m, T2a;
+	  T1 = rio[0];
+	  T3d = iio[-WS(ios, 15)];
+	  {
+	       E T9, T1z, Td, T1v, T1I, Tl, Tp, T1G, Tu, T1D, TD, Ty, T1C, T1l, TX;
+	       E TK, T1g, TI, T1j, TF, T1c, TQ, TS, T1p, T1q, TV, T2, T5, Ti, Tg;
+	       E T4, Tw, Ts, Ta, Tv, T7, Tb, Tr, Tk, TW, TJ, TC, TU, To, TE;
+	       E TH, T14, T24, T17, T25, TN, TO, TP, TR;
+	       T9 = rio[WS(ios, 8)];
+	       T1z = iio[-WS(ios, 8)];
+	       Td = iio[-WS(ios, 7)];
+	       T1v = rio[WS(ios, 7)];
+	       T1I = iio[-WS(ios, 4)];
+	       Tl = rio[WS(ios, 4)];
+	       Tp = iio[-WS(ios, 11)];
+	       T1G = rio[WS(ios, 11)];
+	       Tu = rio[WS(ios, 12)];
+	       T1D = iio[-WS(ios, 12)];
+	       TD = rio[WS(ios, 2)];
+	       Ty = iio[-WS(ios, 3)];
+	       T1C = rio[WS(ios, 3)];
+	       T1l = iio[-WS(ios, 2)];
+	       TX = iio[-WS(ios, 9)];
+	       TK = iio[-WS(ios, 5)];
+	       T1g = iio[-WS(ios, 10)];
+	       TI = rio[WS(ios, 10)];
+	       T1j = rio[WS(ios, 13)];
+	       TF = iio[-WS(ios, 13)];
+	       T1c = rio[WS(ios, 5)];
+	       TQ = rio[WS(ios, 14)];
+	       TS = iio[-WS(ios, 1)];
+	       T1p = rio[WS(ios, 15)];
+	       T1q = iio[0];
+	       TV = rio[WS(ios, 6)];
+	       {
+		    E T12, T16, T13, T15, T3, T6, Tm, Tj, Tn, Th;
+		    T12 = rio[WS(ios, 1)];
+		    T16 = iio[-WS(ios, 6)];
+		    T13 = iio[-WS(ios, 14)];
+		    T15 = rio[WS(ios, 9)];
+		    T2 = W[4];
+		    T5 = W[5];
+		    T3 = W[0];
+		    T6 = W[1];
+		    Ti = W[3];
+		    Tg = W[2];
+		    T4 = T2 * T3;
+		    Tw = T5 * Tg;
+		    Ts = T5 * Ti;
+		    Ta = T2 * T6;
+		    Tv = T2 * Ti;
+		    T7 = T5 * T6;
+		    Tb = T5 * T3;
+		    Tr = T2 * Tg;
+		    Tm = Tg * T6;
+		    Tj = Ti * T6;
+		    Tn = Ti * T3;
+		    Th = Tg * T3;
+		    Tk = Th - Tj;
+		    TW = Tv - Tw;
+		    TJ = Ta + Tb;
+		    TC = Th + Tj;
+		    TU = Tr + Ts;
+		    To = Tm + Tn;
+		    TE = Tm - Tn;
+		    TH = T4 - T7;
+		    T14 = FMA(T3, T12, T6 * T13);
+		    T24 = FNMS(T6, T12, T3 * T13);
+		    T17 = FMA(T2, T15, T5 * T16);
+		    T25 = FNMS(T5, T15, T2 * T16);
+		    TN = W[6];
+		    TO = W[7];
+		    TP = FMA(TN, T3, TO * T6);
+		    TR = FNMS(TO, T3, TN * T6);
+	       }
+	       T18 = T14 + T17;
+	       T26 = T24 - T25;
+	       T29 = T14 - T17;
+	       T2R = T24 + T25;
+	       Tq = FMA(Tk, Tl, To * Tp);
+	       T1r = FMA(TN, T1p, TO * T1q);
+	       T1E = FMA(Tg, T1C, Ti * T1D);
+	       T2k = FNMS(TO, T1p, TN * T1q);
+	       T2g = FNMS(Ti, T1C, Tg * T1D);
+	       {
+		    E T8, Tc, Tt, Tx;
+		    T1O = FNMS(To, Tl, Tk * Tp);
+		    T8 = T4 + T7;
+		    Tc = Ta - Tb;
+		    Te = FNMS(Tc, Td, T8 * T9);
+		    T3c = FMA(Tc, T9, T8 * Td);
+		    Tt = Tr - Ts;
+		    Tx = Tv + Tw;
+		    Tz = FMA(Tt, Tu, Tx * Ty);
+		    T1P = FNMS(Tx, Tu, Tt * Ty);
+		    T1S = FMA(TE, TD, TC * TF);
+		    T1T = FNMS(TJ, TI, TH * TK);
+		    T1U = T1S - T1T;
+	       }
+	       TG = FNMS(TE, TF, TC * TD);
+	       TL = FMA(TH, TI, TJ * TK);
+	       T1V = TG - TL;
+	       T1Y = FMA(TR, TQ, TP * TS);
+	       T1Z = FMA(TW, TV, TU * TX);
+	       T20 = T1Y - T1Z;
+	       TT = FNMS(TR, TS, TP * TQ);
+	       TY = FNMS(TW, TX, TU * TV);
+	       T1X = TT - TY;
+	       {
+		    E T1u, T1F, T1y, T1H;
+		    {
+			 E T1s, T1t, T1w, T1x;
+			 T1s = T2 * TC;
+			 T1t = T5 * TE;
+			 T1u = T1s - T1t;
+			 T1F = T1s + T1t;
+			 T1w = T2 * TE;
+			 T1x = T5 * TC;
+			 T1y = T1w + T1x;
+			 T1H = T1w - T1x;
+		    }
+		    T1A = FMA(T1u, T1v, T1y * T1z);
+		    T2l = FNMS(T1y, T1v, T1u * T1z);
+		    T1J = FNMS(T1H, T1I, T1F * T1G);
+		    T2h = FMA(T1H, T1G, T1F * T1I);
+	       }
+	       {
+		    E T1b, T1i, T1f, T1k;
+		    {
+			 E T19, T1a, T1d, T1e;
+			 T19 = T2 * Tk;
+			 T1a = T5 * To;
+			 T1b = T19 + T1a;
+			 T1i = T19 - T1a;
+			 T1d = T2 * To;
+			 T1e = T5 * Tk;
+			 T1f = T1d - T1e;
+			 T1k = T1d + T1e;
+		    }
+		    T1h = FNMS(T1f, T1g, T1b * T1c);
+		    T2b = FNMS(T1k, T1j, T1i * T1l);
+		    T1m = FMA(T1i, T1j, T1k * T1l);
+		    T2a = FMA(T1f, T1c, T1b * T1g);
+	       }
+	  }
+	  {
+	       E TB, T2L, T10, T3k, T3f, T3l, T2O, T3a, T1o, T36, T2U, T32, T1L, T37, T2Z;
+	       E T33;
+	       {
+		    E Tf, TA, T2M, T2N;
+		    Tf = T1 + Te;
+		    TA = Tq + Tz;
+		    TB = Tf + TA;
+		    T2L = Tf - TA;
+		    {
+			 E TM, TZ, T3b, T3e;
+			 TM = TG + TL;
+			 TZ = TT + TY;
+			 T10 = TM + TZ;
+			 T3k = TZ - TM;
+			 T3b = T1O + T1P;
+			 T3e = T3c + T3d;
+			 T3f = T3b + T3e;
+			 T3l = T3e - T3b;
+		    }
+		    T2M = T1S + T1T;
+		    T2N = T1Y + T1Z;
+		    T2O = T2M - T2N;
+		    T3a = T2M + T2N;
+		    {
+			 E T1n, T2Q, T2S, T2T;
+			 T1n = T1h + T1m;
+			 T2Q = T18 - T1n;
+			 T2S = T2a + T2b;
+			 T2T = T2R - T2S;
+			 T1o = T18 + T1n;
+			 T36 = T2R + T2S;
+			 T2U = T2Q + T2T;
+			 T32 = T2T - T2Q;
+		    }
+		    {
+			 E T1B, T1K, T2V, T2W, T2X, T2Y;
+			 T1B = T1r + T1A;
+			 T1K = T1E + T1J;
+			 T2V = T1B - T1K;
+			 T2W = T2k + T2l;
+			 T2X = T2g + T2h;
+			 T2Y = T2W - T2X;
+			 T1L = T1B + T1K;
+			 T37 = T2W + T2X;
+			 T2Z = T2V - T2Y;
+			 T33 = T2V + T2Y;
+		    }
+	       }
+	       {
+		    E T11, T1M, T39, T3g;
+		    T11 = TB + T10;
+		    T1M = T1o + T1L;
+		    iio[-WS(ios, 8)] = T11 - T1M;
+		    rio[0] = T11 + T1M;
+		    T39 = T36 + T37;
+		    T3g = T3a + T3f;
+		    rio[WS(ios, 8)] = T39 - T3g;
+		    iio[0] = T39 + T3g;
+	       }
+	       {
+		    E T2P, T30, T3j, T3m;
+		    T2P = T2L + T2O;
+		    T30 = KP707106781 * (T2U + T2Z);
+		    iio[-WS(ios, 10)] = T2P - T30;
+		    rio[WS(ios, 2)] = T2P + T30;
+		    T3j = KP707106781 * (T32 + T33);
+		    T3m = T3k + T3l;
+		    rio[WS(ios, 10)] = T3j - T3m;
+		    iio[-WS(ios, 2)] = T3j + T3m;
+	       }
+	       {
+		    E T31, T34, T3n, T3o;
+		    T31 = T2L - T2O;
+		    T34 = KP707106781 * (T32 - T33);
+		    iio[-WS(ios, 14)] = T31 - T34;
+		    rio[WS(ios, 6)] = T31 + T34;
+		    T3n = KP707106781 * (T2Z - T2U);
+		    T3o = T3l - T3k;
+		    rio[WS(ios, 14)] = T3n - T3o;
+		    iio[-WS(ios, 6)] = T3n + T3o;
+	       }
+	       {
+		    E T35, T38, T3h, T3i;
+		    T35 = TB - T10;
+		    T38 = T36 - T37;
+		    iio[-WS(ios, 12)] = T35 - T38;
+		    rio[WS(ios, 4)] = T35 + T38;
+		    T3h = T1L - T1o;
+		    T3i = T3f - T3a;
+		    rio[WS(ios, 12)] = T3h - T3i;
+		    iio[-WS(ios, 4)] = T3h + T3i;
+	       }
+	  }
+	  {
+	       E T1R, T2v, T22, T3q, T3t, T3z, T2y, T3y, T2e, T2I, T2s, T2C, T2p, T2J, T2t;
+	       E T2F;
+	       {
+		    E T1N, T1Q, T2w, T2x;
+		    T1N = T1 - Te;
+		    T1Q = T1O - T1P;
+		    T1R = T1N - T1Q;
+		    T2v = T1N + T1Q;
+		    {
+			 E T1W, T21, T3r, T3s;
+			 T1W = T1U - T1V;
+			 T21 = T1X + T20;
+			 T22 = KP707106781 * (T1W - T21);
+			 T3q = KP707106781 * (T1W + T21);
+			 T3r = T3d - T3c;
+			 T3s = Tq - Tz;
+			 T3t = T3r - T3s;
+			 T3z = T3s + T3r;
+		    }
+		    T2w = T1V + T1U;
+		    T2x = T1X - T20;
+		    T2y = KP707106781 * (T2w + T2x);
+		    T3y = KP707106781 * (T2x - T2w);
+		    {
+			 E T28, T2A, T2d, T2B, T27, T2c;
+			 T27 = T1h - T1m;
+			 T28 = T26 + T27;
+			 T2A = T26 - T27;
+			 T2c = T2a - T2b;
+			 T2d = T29 - T2c;
+			 T2B = T29 + T2c;
+			 T2e = FMA(KP923879532, T28, KP382683432 * T2d);
+			 T2I = FNMS(KP382683432, T2B, KP923879532 * T2A);
+			 T2s = FNMS(KP923879532, T2d, KP382683432 * T28);
+			 T2C = FMA(KP382683432, T2A, KP923879532 * T2B);
+		    }
+		    {
+			 E T2j, T2D, T2o, T2E;
+			 {
+			      E T2f, T2i, T2m, T2n;
+			      T2f = T1r - T1A;
+			      T2i = T2g - T2h;
+			      T2j = T2f - T2i;
+			      T2D = T2f + T2i;
+			      T2m = T2k - T2l;
+			      T2n = T1E - T1J;
+			      T2o = T2m + T2n;
+			      T2E = T2m - T2n;
+			 }
+			 T2p = FNMS(KP923879532, T2o, KP382683432 * T2j);
+			 T2J = FMA(KP923879532, T2E, KP382683432 * T2D);
+			 T2t = FMA(KP382683432, T2o, KP923879532 * T2j);
+			 T2F = FNMS(KP382683432, T2E, KP923879532 * T2D);
+		    }
+	       }
+	       {
+		    E T23, T2q, T3x, T3A;
+		    T23 = T1R + T22;
+		    T2q = T2e + T2p;
+		    iio[-WS(ios, 11)] = T23 - T2q;
+		    rio[WS(ios, 3)] = T23 + T2q;
+		    T3x = T2s + T2t;
+		    T3A = T3y + T3z;
+		    rio[WS(ios, 11)] = T3x - T3A;
+		    iio[-WS(ios, 3)] = T3x + T3A;
+	       }
+	       {
+		    E T2r, T2u, T3B, T3C;
+		    T2r = T1R - T22;
+		    T2u = T2s - T2t;
+		    iio[-WS(ios, 15)] = T2r - T2u;
+		    rio[WS(ios, 7)] = T2r + T2u;
+		    T3B = T2p - T2e;
+		    T3C = T3z - T3y;
+		    rio[WS(ios, 15)] = T3B - T3C;
+		    iio[-WS(ios, 7)] = T3B + T3C;
+	       }
+	       {
+		    E T2z, T2G, T3p, T3u;
+		    T2z = T2v + T2y;
+		    T2G = T2C + T2F;
+		    iio[-WS(ios, 9)] = T2z - T2G;
+		    rio[WS(ios, 1)] = T2z + T2G;
+		    T3p = T2I + T2J;
+		    T3u = T3q + T3t;
+		    rio[WS(ios, 9)] = T3p - T3u;
+		    iio[-WS(ios, 1)] = T3p + T3u;
+	       }
+	       {
+		    E T2H, T2K, T3v, T3w;
+		    T2H = T2v - T2y;
+		    T2K = T2I - T2J;
+		    iio[-WS(ios, 13)] = T2H - T2K;
+		    rio[WS(ios, 5)] = T2H + T2K;
+		    T3v = T2F - T2C;
+		    T3w = T3t - T3q;
+		    rio[WS(ios, 13)] = T3v - T3w;
+		    iio[-WS(ios, 5)] = T3v + T3w;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 9},
+     {TW_SIN, 0, 9},
+     {TW_COS, 0, 15},
+     {TW_SIN, 0, 15},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 16, "hf2_16", twinstr, {156, 68, 40, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf2_16) (planner *p) {
+     X(khc2hc_dit_register) (p, hf2_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf2_32.c b/src/fftw3/rdft/codelets/r2hc/hf2_32.c
new file mode 100644
index 0000000..56c7769
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf2_32.c
@@ -0,0 +1,853 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:34 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -twiddle-log3 -n 32 -dit -name hf2_32 -include hf.h */
+
+/*
+ * This function contains 488 FP additions, 280 FP multiplications,
+ * (or, 376 additions, 168 multiplications, 112 fused multiply/add),
+ * 204 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf2_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf2_32(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8) {
+	  E T1, T3t, T4S, TQ, T3G, T49, T20, T2n, T4y, T1J, T43, T2w, T4z, T36, T4Z;
+	  E TK, T8b, T40, T6l, T3U, T6k, T1h, T3L, T1D, T3V, T1s, T3X, T3E, T7E, T3O;
+	  E T6h, T2k, T6w, T4i, T4x, T3q, T6I, T4O, T4P, T3w, T4T, T4R, T4U, Tm, To;
+	  E TX, T4I, T3a, T3H, T31, T4Y, T3f, T4J, T2G, T4s, T4r, T2B, T4q, T4t, T27;
+	  E T4a, T2M, T4m, T4n, T2P, T4l, T4o, T1U, T44, Tn, Tp, T7G;
+	  T1 = rio[0];
+	  {
+	       E Tv, T3e, Tz, T3c, T39, TE, TI, T38, TN, T3v, TU, T1l, T3u, TW, T12;
+	       E T35, T1f, T3m, T1b, T3o, T30, T2L, T16, T1k, T33, T2W, TP, T2J, T2D, T1w;
+	       E T1z, T2A, T1B, T2z, T2F, T1u, T2N, T1q, T1I, T2O, T1o, T1G, T2m, T24, T1Z;
+	       E T2r, T1X, T2v, T2l, T26, T2i, T1R, T1T, T2g, T2, Ti, T3, Tc, TF, TC;
+	       E TG, TB, Tu, T1a, T15, Ty, T1t, T1Y, T1W, T1v, TH, T1y, T11, TD, T1A;
+	       E T1e, T4g, T3k, T1n, T1p, T2e, T4M, TM, T1K, T1O, TO, T1L, T1N, Ta, Tb;
+	       E T2t, Tk, T2o, Tf, Tg, T2s, Tj, T2p;
+	       Tv = rio[WS(ios, 8)];
+	       T3e = iio[-WS(ios, 8)];
+	       Tz = iio[-WS(ios, 23)];
+	       T3c = rio[WS(ios, 23)];
+	       T39 = iio[-WS(ios, 24)];
+	       TE = rio[WS(ios, 24)];
+	       TI = iio[-WS(ios, 7)];
+	       T38 = rio[WS(ios, 7)];
+	       TN = rio[WS(ios, 4)];
+	       T3v = iio[-WS(ios, 20)];
+	       TU = rio[WS(ios, 20)];
+	       T1l = iio[-WS(ios, 29)];
+	       T3u = rio[WS(ios, 11)];
+	       TW = iio[-WS(ios, 11)];
+	       T12 = rio[WS(ios, 28)];
+	       T35 = iio[-WS(ios, 16)];
+	       T1f = iio[-WS(ios, 19)];
+	       T3m = rio[WS(ios, 19)];
+	       T1b = rio[WS(ios, 12)];
+	       T3o = iio[-WS(ios, 12)];
+	       T30 = iio[0];
+	       T2L = iio[-WS(ios, 2)];
+	       T16 = iio[-WS(ios, 3)];
+	       T1k = rio[WS(ios, 2)];
+	       T33 = rio[WS(ios, 15)];
+	       T2W = rio[WS(ios, 31)];
+	       TP = iio[-WS(ios, 27)];
+	       T2J = rio[WS(ios, 29)];
+	       T2D = rio[WS(ios, 21)];
+	       T1w = iio[-WS(ios, 21)];
+	       T1z = rio[WS(ios, 26)];
+	       T2A = iio[-WS(ios, 26)];
+	       T1B = iio[-WS(ios, 5)];
+	       T2z = rio[WS(ios, 5)];
+	       T2F = iio[-WS(ios, 10)];
+	       T1u = rio[WS(ios, 10)];
+	       T2N = rio[WS(ios, 13)];
+	       T1q = iio[-WS(ios, 13)];
+	       T1I = iio[-WS(ios, 1)];
+	       T2O = iio[-WS(ios, 18)];
+	       T1o = rio[WS(ios, 18)];
+	       T1G = rio[WS(ios, 30)];
+	       T2m = iio[-WS(ios, 22)];
+	       T24 = rio[WS(ios, 22)];
+	       T1Z = iio[-WS(ios, 25)];
+	       T2r = rio[WS(ios, 25)];
+	       T1X = rio[WS(ios, 6)];
+	       T2v = iio[-WS(ios, 6)];
+	       T2l = rio[WS(ios, 9)];
+	       T26 = iio[-WS(ios, 9)];
+	       T2i = iio[-WS(ios, 14)];
+	       T1R = rio[WS(ios, 14)];
+	       T1T = iio[-WS(ios, 17)];
+	       T2g = rio[WS(ios, 17)];
+	       {
+		    E T2c, T2d, T3s, T3r, T3j, T3i, T4, T7, T5, T8, T6, T9, T14, T1d, Ts;
+		    E T18, T19, T1c, Te, Td, Tt, Tw, T13, TZ, T10, Tx;
+		    T2c = rio[WS(ios, 1)];
+		    T2d = iio[-WS(ios, 30)];
+		    T3s = iio[-WS(ios, 4)];
+		    T3r = rio[WS(ios, 27)];
+		    T3j = iio[-WS(ios, 28)];
+		    T3i = rio[WS(ios, 3)];
+		    T2 = W[6];
+		    Ti = W[7];
+		    T3 = W[4];
+		    Tc = W[5];
+		    T4 = W[2];
+		    T7 = W[3];
+		    T5 = W[0];
+		    T8 = W[1];
+		    T6 = T4 * T5;
+		    T9 = T7 * T8;
+		    T14 = Ti * T5;
+		    T1d = Tc * T4;
+		    Ts = T3 * T5;
+		    T18 = T3 * T4;
+		    T19 = Tc * T7;
+		    T1c = T3 * T7;
+		    Te = T7 * T5;
+		    Td = T4 * T8;
+		    Tt = Tc * T8;
+		    Tw = T3 * T8;
+		    TF = T2 * T7;
+		    T13 = T2 * T8;
+		    TC = Ti * T7;
+		    TG = Ti * T4;
+		    TZ = T2 * T5;
+		    T10 = Ti * T8;
+		    TB = T2 * T4;
+		    Tx = Tc * T5;
+		    Tu = Ts + Tt;
+		    T1a = T18 - T19;
+		    T15 = T13 + T14;
+		    Ty = Tw - Tx;
+		    T1t = Ts - Tt;
+		    T1Y = T1c - T1d;
+		    T1W = T18 + T19;
+		    T1v = Tw + Tx;
+		    TH = TF - TG;
+		    T1y = TZ + T10;
+		    T11 = TZ - T10;
+		    TD = TB + TC;
+		    T1A = T13 - T14;
+		    T1e = T1c + T1d;
+		    T3t = FMA(T2, T3r, Ti * T3s);
+		    T4g = FNMS(T8, T2c, T5 * T2d);
+		    T4S = FNMS(Ti, T3r, T2 * T3s);
+		    T3k = FMA(T4, T3i, T7 * T3j);
+		    T1n = FMA(T2, T3, Ti * Tc);
+		    T1p = FNMS(Ti, T3, T2 * Tc);
+		    T2e = FMA(T5, T2c, T8 * T2d);
+		    T4M = FNMS(T7, T3i, T4 * T3j);
+		    TM = T6 - T9;
+		    T1K = T3 * TM;
+		    T1O = Tc * TM;
+		    TO = Td + Te;
+		    T1L = Tc * TO;
+		    T1N = T3 * TO;
+		    Ta = T6 + T9;
+		    Tb = T3 * Ta;
+		    T2t = Ti * Ta;
+		    Tk = Tc * Ta;
+		    T2o = T2 * Ta;
+		    Tf = Td - Te;
+		    Tg = Tc * Tf;
+		    T2s = T2 * Tf;
+		    Tj = T3 * Tf;
+		    T2p = Ti * Tf;
+	       }
+	       TQ = FMA(TM, TN, TO * TP);
+	       T3G = FNMS(TO, TN, TM * TP);
+	       T49 = FMA(T1Y, T1X, T1W * T1Z);
+	       T20 = FNMS(T1Y, T1Z, T1W * T1X);
+	       T2n = FMA(T3, T2l, Tc * T2m);
+	       T4y = FNMS(Tc, T2l, T3 * T2m);
+	       {
+		    E T1F, T1H, TA, TJ;
+		    T1F = TB - TC;
+		    T1H = TF + TG;
+		    T1J = FMA(T1F, T1G, T1H * T1I);
+		    T43 = FNMS(T1H, T1G, T1F * T1I);
+		    {
+			 E T2q, T2u, T32, T34;
+			 T2q = T2o - T2p;
+			 T2u = T2s + T2t;
+			 T2w = FMA(T2q, T2r, T2u * T2v);
+			 T4z = FNMS(T2u, T2r, T2q * T2v);
+			 T32 = FMA(T2, T1a, Ti * T1e);
+			 T34 = FNMS(Ti, T1a, T2 * T1e);
+			 T36 = FNMS(T34, T35, T32 * T33);
+			 T4Z = FMA(T34, T33, T32 * T35);
+		    }
+		    TA = FNMS(Ty, Tz, Tu * Tv);
+		    TJ = FNMS(TH, TI, TD * TE);
+		    TK = TA + TJ;
+		    T8b = TA - TJ;
+		    {
+			 E T3Y, T3Z, T3S, T3T;
+			 T3Y = FNMS(T1v, T1u, T1t * T1w);
+			 T3Z = FMA(T1A, T1z, T1y * T1B);
+			 T40 = T3Y - T3Z;
+			 T6l = T3Y + T3Z;
+			 T3S = FMA(Tf, T1k, Ta * T1l);
+			 T3T = FMA(T1p, T1o, T1n * T1q);
+			 T3U = T3S - T3T;
+			 T6k = T3S + T3T;
+		    }
+	       }
+	       {
+		    E T17, T1g, Th, Tl;
+		    T17 = FMA(T11, T12, T15 * T16);
+		    T1g = FMA(T1a, T1b, T1e * T1f);
+		    T1h = T17 + T1g;
+		    T3L = T17 - T1g;
+		    {
+			 E T1x, T1C, T1m, T1r;
+			 T1x = FMA(T1t, T1u, T1v * T1w);
+			 T1C = FNMS(T1A, T1B, T1y * T1z);
+			 T1D = T1x + T1C;
+			 T3V = T1x - T1C;
+			 T1m = FNMS(Tf, T1l, Ta * T1k);
+			 T1r = FNMS(T1p, T1q, T1n * T1o);
+			 T1s = T1m + T1r;
+			 T3X = T1m - T1r;
+		    }
+		    {
+			 E T3C, T3D, T3M, T3N;
+			 T3C = FMA(Ty, Tv, Tu * Tz);
+			 T3D = FMA(TH, TE, TD * TI);
+			 T3E = T3C - T3D;
+			 T7E = T3C + T3D;
+			 T3M = FNMS(T15, T12, T11 * T16);
+			 T3N = FNMS(T1e, T1b, T1a * T1f);
+			 T3O = T3M - T3N;
+			 T6h = T3M + T3N;
+			 {
+			      E T2j, T4h, T2f, T2h;
+			      T2f = FMA(T2, T1t, Ti * T1v);
+			      T2h = FNMS(Ti, T1t, T2 * T1v);
+			      T2j = FNMS(T2h, T2i, T2f * T2g);
+			      T4h = FMA(T2h, T2g, T2f * T2i);
+			      T2k = T2e + T2j;
+			      T6w = T4g + T4h;
+			      T4i = T4g - T4h;
+			      T4x = T2e - T2j;
+			 }
+		    }
+		    {
+			 E T3p, T4N, T3l, T3n;
+			 T3l = FNMS(Ti, Ty, T2 * Tu);
+			 T3n = FMA(T2, Ty, Ti * Tu);
+			 T3p = FMA(T3l, T3m, T3n * T3o);
+			 T4N = FNMS(T3n, T3m, T3l * T3o);
+			 T3q = T3k + T3p;
+			 T6I = T4M + T4N;
+			 T4O = T4M - T4N;
+			 T4P = T3k - T3p;
+		    }
+		    Th = Tb + Tg;
+		    Tl = Tj - Tk;
+		    T3w = FNMS(Tl, T3v, Th * T3u);
+		    T4T = FMA(Tl, T3u, Th * T3v);
+		    T4R = T3t - T3w;
+		    T4U = T4S - T4T;
+		    Tm = FNMS(Ti, Tl, T2 * Th);
+		    To = FMA(T2, Tl, Ti * Th);
+		    {
+			 E TR, TS, TT, TV;
+			 TR = Tb - Tg;
+			 TS = Tj + Tk;
+			 TT = FMA(T2, TR, Ti * TS);
+			 TV = FNMS(Ti, TR, T2 * TS);
+			 TX = FNMS(TV, TW, TT * TU);
+			 T4I = FNMS(TS, T38, TR * T39);
+			 T3a = FMA(TR, T38, TS * T39);
+			 T3H = FMA(TV, TU, TT * TW);
+		    }
+		    {
+			 E T2V, T3b, T2Z, T3d;
+			 {
+			      E T2T, T2U, T2X, T2Y;
+			      T2T = T2 * TM;
+			      T2U = Ti * TO;
+			      T2V = T2T - T2U;
+			      T3b = T2T + T2U;
+			      T2X = T2 * TO;
+			      T2Y = Ti * TM;
+			      T2Z = T2X + T2Y;
+			      T3d = T2X - T2Y;
+			 }
+			 T31 = FMA(T2V, T2W, T2Z * T30);
+			 T4Y = FNMS(T2Z, T2W, T2V * T30);
+			 T3f = FNMS(T3d, T3e, T3b * T3c);
+			 T4J = FMA(T3d, T3c, T3b * T3e);
+		    }
+		    {
+			 E T23, T25, T1Q, T1S;
+			 {
+			      E T2C, T2E, T21, T22;
+			      T2C = FNMS(Ti, T1Y, T2 * T1W);
+			      T2E = FMA(T2, T1Y, Ti * T1W);
+			      T2G = FMA(T2C, T2D, T2E * T2F);
+			      T4s = FNMS(T2E, T2D, T2C * T2F);
+			      T21 = T1K + T1L;
+			      T22 = T1N - T1O;
+			      T23 = FNMS(Ti, T22, T2 * T21);
+			      T4r = FMA(T22, T2z, T21 * T2A);
+			      T25 = FMA(T2, T22, Ti * T21);
+			      T2B = FNMS(T22, T2A, T21 * T2z);
+			 }
+			 T4q = T2B - T2G;
+			 T4t = T4r - T4s;
+			 T27 = FMA(T23, T24, T25 * T26);
+			 T4a = FNMS(T25, T24, T23 * T26);
+			 {
+			      E T2I, T2K, T1M, T1P;
+			      T2I = T2o + T2p;
+			      T2K = T2s - T2t;
+			      T2M = FNMS(T2K, T2L, T2I * T2J);
+			      T4m = FMA(T2K, T2J, T2I * T2L);
+			      T1M = T1K - T1L;
+			      T1P = T1N + T1O;
+			      T1Q = FMA(T2, T1M, Ti * T1P);
+			      T4n = FNMS(T1P, T2N, T1M * T2O);
+			      T1S = FNMS(Ti, T1M, T2 * T1P);
+			      T2P = FMA(T1M, T2N, T1P * T2O);
+			 }
+			 T4l = T2M - T2P;
+			 T4o = T4m - T4n;
+			 T1U = FNMS(T1S, T1T, T1Q * T1R);
+			 T44 = FMA(T1S, T1R, T1Q * T1T);
+		    }
+	       }
+	  }
+	  Tn = rio[WS(ios, 16)];
+	  Tp = iio[-WS(ios, 15)];
+	  T7G = iio[-WS(ios, 31)];
+	  {
+	       E T1i, T7V, T6i, T7D, T42, T5e, T5A, T60, T6o, T6Y, TL, T6f, T3F, T5t, T7I;
+	       E T8q, T7W, T8c, T3Q, T8p, T5w, T89, T4d, T61, T5f, T5D, T2a, T6t, T7O, T7C;
+	       E T7g, T6Z, T4w, T64, T65, T4F, T5i, T5I, T5L, T5j, T2S, T7l, T7y, T6A, T6F;
+	       E T73, T7i, T72, T4X, T67, T68, T56, T5l, T5P, T5S, T5m, T3z, T7q, T7z, T6L;
+	       E T6Q, T76, T7n, T75;
+	       {
+		    E TY, T6g, T3W, T41;
+		    TY = TQ + TX;
+		    T1i = TY + T1h;
+		    T7V = T1h - TY;
+		    T6g = T3G + T3H;
+		    T6i = T6g - T6h;
+		    T7D = T6g + T6h;
+		    T3W = T3U + T3V;
+		    T41 = T3X - T40;
+		    T42 = FNMS(KP923879532, T41, KP382683432 * T3W);
+		    T5e = FMA(KP923879532, T3W, KP382683432 * T41);
+	       }
+	       {
+		    E T5y, T5z, T6m, T6n;
+		    T5y = T3U - T3V;
+		    T5z = T3X + T40;
+		    T5A = FNMS(KP382683432, T5z, KP923879532 * T5y);
+		    T60 = FMA(KP382683432, T5y, KP923879532 * T5z);
+		    T6m = T6k - T6l;
+		    T6n = T1s - T1D;
+		    T6o = T6m - T6n;
+		    T6Y = T6n + T6m;
+	       }
+	       {
+		    E Tr, T3B, Tq, T7H, T8a, T7F;
+		    Tq = FMA(Tm, Tn, To * Tp);
+		    Tr = T1 + Tq;
+		    T3B = T1 - Tq;
+		    TL = Tr + TK;
+		    T6f = Tr - TK;
+		    T3F = T3B - T3E;
+		    T5t = T3B + T3E;
+		    T7F = FNMS(To, Tn, Tm * Tp);
+		    T7H = T7F + T7G;
+		    T8a = T7G - T7F;
+		    T7I = T7E + T7H;
+		    T8q = T8b + T8a;
+		    T7W = T7H - T7E;
+		    T8c = T8a - T8b;
+	       }
+	       {
+		    E T3P, T5v, T3K, T5u, T3I, T3J;
+		    T3P = T3L + T3O;
+		    T5v = T3L - T3O;
+		    T3I = T3G - T3H;
+		    T3J = TQ - TX;
+		    T3K = T3I - T3J;
+		    T5u = T3J + T3I;
+		    T3Q = KP707106781 * (T3K - T3P);
+		    T8p = KP707106781 * (T5v - T5u);
+		    T5w = KP707106781 * (T5u + T5v);
+		    T89 = KP707106781 * (T3K + T3P);
+	       }
+	       {
+		    E T47, T5B, T4c, T5C;
+		    {
+			 E T45, T46, T48, T4b;
+			 T45 = T43 - T44;
+			 T46 = T20 - T27;
+			 T47 = T45 + T46;
+			 T5B = T45 - T46;
+			 T48 = T1J - T1U;
+			 T4b = T49 - T4a;
+			 T4c = T48 - T4b;
+			 T5C = T48 + T4b;
+		    }
+		    T4d = FMA(KP382683432, T47, KP923879532 * T4c);
+		    T61 = FNMS(KP382683432, T5B, KP923879532 * T5C);
+		    T5f = FNMS(KP923879532, T47, KP382683432 * T4c);
+		    T5D = FMA(KP923879532, T5B, KP382683432 * T5C);
+	       }
+	       {
+		    E T1E, T7e, T29, T6p, T6s, T7f;
+		    T1E = T1s + T1D;
+		    T7e = T6k + T6l;
+		    {
+			 E T1V, T28, T6q, T6r;
+			 T1V = T1J + T1U;
+			 T28 = T20 + T27;
+			 T29 = T1V + T28;
+			 T6p = T1V - T28;
+			 T6q = T43 + T44;
+			 T6r = T49 + T4a;
+			 T6s = T6q - T6r;
+			 T7f = T6q + T6r;
+		    }
+		    T2a = T1E + T29;
+		    T6t = T6p + T6s;
+		    T7O = T29 - T1E;
+		    T7C = T7e + T7f;
+		    T7g = T7e - T7f;
+		    T6Z = T6p - T6s;
+	       }
+	       {
+		    E T4k, T5J, T4B, T5G, T4v, T5H, T4E, T5K, T4j, T4A;
+		    T4j = T2n - T2w;
+		    T4k = T4i + T4j;
+		    T5J = T4i - T4j;
+		    T4A = T4y - T4z;
+		    T4B = T4x - T4A;
+		    T5G = T4x + T4A;
+		    {
+			 E T4p, T4u, T4C, T4D;
+			 T4p = T4l - T4o;
+			 T4u = T4q + T4t;
+			 T4v = KP707106781 * (T4p - T4u);
+			 T5H = KP707106781 * (T4u + T4p);
+			 T4C = T4t - T4q;
+			 T4D = T4l + T4o;
+			 T4E = KP707106781 * (T4C - T4D);
+			 T5K = KP707106781 * (T4C + T4D);
+		    }
+		    T4w = T4k - T4v;
+		    T64 = T5G + T5H;
+		    T65 = T5J + T5K;
+		    T4F = T4B - T4E;
+		    T5i = T4k + T4v;
+		    T5I = T5G - T5H;
+		    T5L = T5J - T5K;
+		    T5j = T4B + T4E;
+	       }
+	       {
+		    E T2y, T6B, T6y, T7j, T2R, T6z, T6E, T7k, T2x, T6x;
+		    T2x = T2n + T2w;
+		    T2y = T2k + T2x;
+		    T6B = T2k - T2x;
+		    T6x = T4y + T4z;
+		    T6y = T6w - T6x;
+		    T7j = T6w + T6x;
+		    {
+			 E T2H, T2Q, T6C, T6D;
+			 T2H = T2B + T2G;
+			 T2Q = T2M + T2P;
+			 T2R = T2H + T2Q;
+			 T6z = T2Q - T2H;
+			 T6C = T4r + T4s;
+			 T6D = T4m + T4n;
+			 T6E = T6C - T6D;
+			 T7k = T6C + T6D;
+		    }
+		    T2S = T2y + T2R;
+		    T7l = T7j - T7k;
+		    T7y = T7j + T7k;
+		    T6A = T6y - T6z;
+		    T6F = T6B - T6E;
+		    T73 = T6B + T6E;
+		    T7i = T2y - T2R;
+		    T72 = T6y + T6z;
+	       }
+	       {
+		    E T4L, T5N, T55, T5O, T4W, T5R, T52, T5Q;
+		    {
+			 E T4H, T4K, T53, T54;
+			 T4H = T31 - T36;
+			 T4K = T4I - T4J;
+			 T4L = T4H - T4K;
+			 T5N = T4H + T4K;
+			 T53 = T4R - T4U;
+			 T54 = T4P + T4O;
+			 T55 = KP707106781 * (T53 - T54);
+			 T5O = KP707106781 * (T54 + T53);
+		    }
+		    {
+			 E T4Q, T4V, T50, T51;
+			 T4Q = T4O - T4P;
+			 T4V = T4R + T4U;
+			 T4W = KP707106781 * (T4Q - T4V);
+			 T5R = KP707106781 * (T4Q + T4V);
+			 T50 = T4Y - T4Z;
+			 T51 = T3a - T3f;
+			 T52 = T50 + T51;
+			 T5Q = T50 - T51;
+		    }
+		    T4X = T4L - T4W;
+		    T67 = T5N + T5O;
+		    T68 = T5Q + T5R;
+		    T56 = T52 - T55;
+		    T5l = T4L + T4W;
+		    T5P = T5N - T5O;
+		    T5S = T5Q - T5R;
+		    T5m = T52 + T55;
+	       }
+	       {
+		    E T3y, T6P, T6K, T7p, T3h, T6H, T6O, T7o, T3x, T6J;
+		    T3x = T3t + T3w;
+		    T3y = T3q + T3x;
+		    T6P = T3x - T3q;
+		    T6J = T4S + T4T;
+		    T6K = T6I - T6J;
+		    T7p = T6I + T6J;
+		    {
+			 E T37, T3g, T6M, T6N;
+			 T37 = T31 + T36;
+			 T3g = T3a + T3f;
+			 T3h = T37 + T3g;
+			 T6H = T37 - T3g;
+			 T6M = T4Y + T4Z;
+			 T6N = T4I + T4J;
+			 T6O = T6M - T6N;
+			 T7o = T6M + T6N;
+		    }
+		    T3z = T3h + T3y;
+		    T7q = T7o - T7p;
+		    T7z = T7o + T7p;
+		    T6L = T6H - T6K;
+		    T6Q = T6O - T6P;
+		    T76 = T6O + T6P;
+		    T7n = T3h - T3y;
+		    T75 = T6H + T6K;
+	       }
+	       {
+		    E T3A, T7A, T2b, T7x, T1j;
+		    T3A = T2S + T3z;
+		    T7A = T7y - T7z;
+		    T1j = TL + T1i;
+		    T2b = T1j + T2a;
+		    T7x = T1j - T2a;
+		    iio[-WS(ios, 16)] = T2b - T3A;
+		    rio[WS(ios, 8)] = T7x + T7A;
+		    rio[0] = T2b + T3A;
+		    iio[-WS(ios, 24)] = T7x - T7A;
+	       }
+	       {
+		    E T7B, T7L, T7K, T7M, T7J;
+		    T7B = T7y + T7z;
+		    T7L = T3z - T2S;
+		    T7J = T7D + T7I;
+		    T7K = T7C + T7J;
+		    T7M = T7J - T7C;
+		    rio[WS(ios, 16)] = T7B - T7K;
+		    iio[-WS(ios, 8)] = T7L + T7M;
+		    iio[0] = T7B + T7K;
+		    rio[WS(ios, 24)] = T7L - T7M;
+	       }
+	       {
+		    E T7h, T7t, T7Q, T7S, T7s, T7R, T7w, T7N, T7d, T7P;
+		    T7d = TL - T1i;
+		    T7h = T7d + T7g;
+		    T7t = T7d - T7g;
+		    T7P = T7I - T7D;
+		    T7Q = T7O + T7P;
+		    T7S = T7P - T7O;
+		    {
+			 E T7m, T7r, T7u, T7v;
+			 T7m = T7i + T7l;
+			 T7r = T7n - T7q;
+			 T7s = KP707106781 * (T7m + T7r);
+			 T7R = KP707106781 * (T7r - T7m);
+			 T7u = T7l - T7i;
+			 T7v = T7n + T7q;
+			 T7w = KP707106781 * (T7u - T7v);
+			 T7N = KP707106781 * (T7u + T7v);
+		    }
+		    iio[-WS(ios, 20)] = T7h - T7s;
+		    rio[WS(ios, 20)] = T7N - T7Q;
+		    rio[WS(ios, 4)] = T7h + T7s;
+		    iio[-WS(ios, 4)] = T7N + T7Q;
+		    iio[-WS(ios, 28)] = T7t - T7w;
+		    rio[WS(ios, 28)] = T7R - T7S;
+		    rio[WS(ios, 12)] = T7t + T7w;
+		    iio[-WS(ios, 12)] = T7R + T7S;
+	       }
+	       {
+		    E T71, T79, T7Y, T80, T78, T7Z, T7c, T7T;
+		    {
+			 E T6X, T70, T7U, T7X;
+			 T6X = T6f + T6i;
+			 T70 = KP707106781 * (T6Y + T6Z);
+			 T71 = T6X + T70;
+			 T79 = T6X - T70;
+			 T7U = KP707106781 * (T6o + T6t);
+			 T7X = T7V + T7W;
+			 T7Y = T7U + T7X;
+			 T80 = T7X - T7U;
+		    }
+		    {
+			 E T74, T77, T7a, T7b;
+			 T74 = FMA(KP382683432, T72, KP923879532 * T73);
+			 T77 = FNMS(KP382683432, T76, KP923879532 * T75);
+			 T78 = T74 + T77;
+			 T7Z = T77 - T74;
+			 T7a = FNMS(KP382683432, T73, KP923879532 * T72);
+			 T7b = FMA(KP923879532, T76, KP382683432 * T75);
+			 T7c = T7a - T7b;
+			 T7T = T7a + T7b;
+		    }
+		    iio[-WS(ios, 18)] = T71 - T78;
+		    rio[WS(ios, 18)] = T7T - T7Y;
+		    rio[WS(ios, 2)] = T71 + T78;
+		    iio[-WS(ios, 2)] = T7T + T7Y;
+		    iio[-WS(ios, 26)] = T79 - T7c;
+		    rio[WS(ios, 26)] = T7Z - T80;
+		    rio[WS(ios, 10)] = T79 + T7c;
+		    iio[-WS(ios, 10)] = T7Z + T80;
+	       }
+	       {
+		    E T4f, T59, T8y, T8A, T58, T8z, T5c, T8v;
+		    {
+			 E T3R, T4e, T8w, T8x;
+			 T3R = T3F - T3Q;
+			 T4e = T42 - T4d;
+			 T4f = T3R + T4e;
+			 T59 = T3R - T4e;
+			 T8w = T5f - T5e;
+			 T8x = T8q - T8p;
+			 T8y = T8w + T8x;
+			 T8A = T8x - T8w;
+		    }
+		    {
+			 E T4G, T57, T5a, T5b;
+			 T4G = FMA(KP980785280, T4w, KP195090322 * T4F);
+			 T57 = FNMS(KP980785280, T56, KP195090322 * T4X);
+			 T58 = T4G + T57;
+			 T8z = T57 - T4G;
+			 T5a = FNMS(KP980785280, T4F, KP195090322 * T4w);
+			 T5b = FMA(KP195090322, T56, KP980785280 * T4X);
+			 T5c = T5a - T5b;
+			 T8v = T5a + T5b;
+		    }
+		    iio[-WS(ios, 23)] = T4f - T58;
+		    rio[WS(ios, 23)] = T8v - T8y;
+		    rio[WS(ios, 7)] = T4f + T58;
+		    iio[-WS(ios, 7)] = T8v + T8y;
+		    iio[-WS(ios, 31)] = T59 - T5c;
+		    rio[WS(ios, 31)] = T8z - T8A;
+		    rio[WS(ios, 15)] = T59 + T5c;
+		    iio[-WS(ios, 15)] = T8z + T8A;
+	       }
+	       {
+		    E T5F, T5V, T8k, T8m, T5U, T8l, T5Y, T8h;
+		    {
+			 E T5x, T5E, T8i, T8j;
+			 T5x = T5t - T5w;
+			 T5E = T5A - T5D;
+			 T5F = T5x + T5E;
+			 T5V = T5x - T5E;
+			 T8i = T61 - T60;
+			 T8j = T8c - T89;
+			 T8k = T8i + T8j;
+			 T8m = T8j - T8i;
+		    }
+		    {
+			 E T5M, T5T, T5W, T5X;
+			 T5M = FMA(KP555570233, T5I, KP831469612 * T5L);
+			 T5T = FNMS(KP831469612, T5S, KP555570233 * T5P);
+			 T5U = T5M + T5T;
+			 T8l = T5T - T5M;
+			 T5W = FNMS(KP831469612, T5I, KP555570233 * T5L);
+			 T5X = FMA(KP831469612, T5P, KP555570233 * T5S);
+			 T5Y = T5W - T5X;
+			 T8h = T5W + T5X;
+		    }
+		    iio[-WS(ios, 21)] = T5F - T5U;
+		    rio[WS(ios, 21)] = T8h - T8k;
+		    rio[WS(ios, 5)] = T5F + T5U;
+		    iio[-WS(ios, 5)] = T8h + T8k;
+		    iio[-WS(ios, 29)] = T5V - T5Y;
+		    rio[WS(ios, 29)] = T8l - T8m;
+		    rio[WS(ios, 13)] = T5V + T5Y;
+		    iio[-WS(ios, 13)] = T8l + T8m;
+	       }
+	       {
+		    E T6v, T6T, T84, T86, T6S, T85, T6W, T81;
+		    {
+			 E T6j, T6u, T82, T83;
+			 T6j = T6f - T6i;
+			 T6u = KP707106781 * (T6o - T6t);
+			 T6v = T6j + T6u;
+			 T6T = T6j - T6u;
+			 T82 = KP707106781 * (T6Z - T6Y);
+			 T83 = T7W - T7V;
+			 T84 = T82 + T83;
+			 T86 = T83 - T82;
+		    }
+		    {
+			 E T6G, T6R, T6U, T6V;
+			 T6G = FMA(KP923879532, T6A, KP382683432 * T6F);
+			 T6R = FNMS(KP923879532, T6Q, KP382683432 * T6L);
+			 T6S = T6G + T6R;
+			 T85 = T6R - T6G;
+			 T6U = FNMS(KP923879532, T6F, KP382683432 * T6A);
+			 T6V = FMA(KP382683432, T6Q, KP923879532 * T6L);
+			 T6W = T6U - T6V;
+			 T81 = T6U + T6V;
+		    }
+		    iio[-WS(ios, 22)] = T6v - T6S;
+		    rio[WS(ios, 22)] = T81 - T84;
+		    rio[WS(ios, 6)] = T6v + T6S;
+		    iio[-WS(ios, 6)] = T81 + T84;
+		    iio[-WS(ios, 30)] = T6T - T6W;
+		    rio[WS(ios, 30)] = T85 - T86;
+		    rio[WS(ios, 14)] = T6T + T6W;
+		    iio[-WS(ios, 14)] = T85 + T86;
+	       }
+	       {
+		    E T5h, T5p, T8s, T8u, T5o, T8t, T5s, T8n;
+		    {
+			 E T5d, T5g, T8o, T8r;
+			 T5d = T3F + T3Q;
+			 T5g = T5e + T5f;
+			 T5h = T5d + T5g;
+			 T5p = T5d - T5g;
+			 T8o = T42 + T4d;
+			 T8r = T8p + T8q;
+			 T8s = T8o + T8r;
+			 T8u = T8r - T8o;
+		    }
+		    {
+			 E T5k, T5n, T5q, T5r;
+			 T5k = FMA(KP555570233, T5i, KP831469612 * T5j);
+			 T5n = FNMS(KP555570233, T5m, KP831469612 * T5l);
+			 T5o = T5k + T5n;
+			 T8t = T5n - T5k;
+			 T5q = FNMS(KP555570233, T5j, KP831469612 * T5i);
+			 T5r = FMA(KP831469612, T5m, KP555570233 * T5l);
+			 T5s = T5q - T5r;
+			 T8n = T5q + T5r;
+		    }
+		    iio[-WS(ios, 19)] = T5h - T5o;
+		    rio[WS(ios, 19)] = T8n - T8s;
+		    rio[WS(ios, 3)] = T5h + T5o;
+		    iio[-WS(ios, 3)] = T8n + T8s;
+		    iio[-WS(ios, 27)] = T5p - T5s;
+		    rio[WS(ios, 27)] = T8t - T8u;
+		    rio[WS(ios, 11)] = T5p + T5s;
+		    iio[-WS(ios, 11)] = T8t + T8u;
+	       }
+	       {
+		    E T63, T6b, T8e, T8g, T6a, T8f, T6e, T87;
+		    {
+			 E T5Z, T62, T88, T8d;
+			 T5Z = T5t + T5w;
+			 T62 = T60 + T61;
+			 T63 = T5Z + T62;
+			 T6b = T5Z - T62;
+			 T88 = T5A + T5D;
+			 T8d = T89 + T8c;
+			 T8e = T88 + T8d;
+			 T8g = T8d - T88;
+		    }
+		    {
+			 E T66, T69, T6c, T6d;
+			 T66 = FMA(KP980785280, T64, KP195090322 * T65);
+			 T69 = FNMS(KP195090322, T68, KP980785280 * T67);
+			 T6a = T66 + T69;
+			 T8f = T69 - T66;
+			 T6c = FNMS(KP195090322, T64, KP980785280 * T65);
+			 T6d = FMA(KP195090322, T67, KP980785280 * T68);
+			 T6e = T6c - T6d;
+			 T87 = T6c + T6d;
+		    }
+		    iio[-WS(ios, 17)] = T63 - T6a;
+		    rio[WS(ios, 17)] = T87 - T8e;
+		    rio[WS(ios, 1)] = T63 + T6a;
+		    iio[-WS(ios, 1)] = T87 + T8e;
+		    iio[-WS(ios, 25)] = T6b - T6e;
+		    rio[WS(ios, 25)] = T8f - T8g;
+		    rio[WS(ios, 9)] = T6b + T6e;
+		    iio[-WS(ios, 9)] = T8f + T8g;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 9},
+     {TW_SIN, 0, 9},
+     {TW_COS, 0, 27},
+     {TW_SIN, 0, 27},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 32, "hf2_32", twinstr, {376, 168, 112, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf2_32) (planner *p) {
+     X(khc2hc_dit_register) (p, hf2_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf2_4.c b/src/fftw3/rdft/codelets/r2hc/hf2_4.c
new file mode 100644
index 0000000..a676143
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf2_4.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:26 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -twiddle-log3 -n 4 -dit -name hf2_4 -include hf.h */
+
+/*
+ * This function contains 24 FP additions, 16 FP multiplications,
+ * (or, 16 additions, 8 multiplications, 8 fused multiply/add),
+ * 21 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf2_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf2_4(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 4) {
+	  E T1, Tp, Ta, Te, To, Tl, Tk, Th;
+	  T1 = rio[0];
+	  Tp = iio[-WS(ios, 3)];
+	  {
+	       E T7, Td, T9, Tc, Tg, Tf, T2, T4, T3, T5, T6, T8;
+	       T7 = rio[WS(ios, 2)];
+	       Td = iio[-WS(ios, 2)];
+	       T9 = iio[-WS(ios, 1)];
+	       Tc = rio[WS(ios, 1)];
+	       Tg = iio[0];
+	       Tf = rio[WS(ios, 3)];
+	       T2 = W[2];
+	       T4 = W[3];
+	       T3 = W[0];
+	       T5 = W[1];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       T8 = FNMS(T4, T3, T2 * T5);
+	       Ta = FNMS(T8, T9, T6 * T7);
+	       Te = FMA(T3, Tc, T5 * Td);
+	       To = FMA(T8, T7, T6 * T9);
+	       Tl = FNMS(T4, Tf, T2 * Tg);
+	       Tk = FNMS(T5, Tc, T3 * Td);
+	       Th = FMA(T2, Tf, T4 * Tg);
+	  }
+	  {
+	       E Tb, Ti, Tn, Tq;
+	       Tb = T1 + Ta;
+	       Ti = Te + Th;
+	       iio[-WS(ios, 2)] = Tb - Ti;
+	       rio[0] = Tb + Ti;
+	       Tn = Tk + Tl;
+	       Tq = To + Tp;
+	       rio[WS(ios, 2)] = Tn - Tq;
+	       iio[0] = Tn + Tq;
+	  }
+	  {
+	       E Tj, Tm, Tr, Ts;
+	       Tj = T1 - Ta;
+	       Tm = Tk - Tl;
+	       iio[-WS(ios, 3)] = Tj - Tm;
+	       rio[WS(ios, 1)] = Tj + Tm;
+	       Tr = Th - Te;
+	       Ts = Tp - To;
+	       rio[WS(ios, 3)] = Tr - Ts;
+	       iio[-WS(ios, 1)] = Tr + Ts;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 4, "hf2_4", twinstr, {16, 8, 8, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf2_4) (planner *p) {
+     X(khc2hc_dit_register) (p, hf2_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf2_64.c b/src/fftw3/rdft/codelets/r2hc/hf2_64.c
new file mode 100644
index 0000000..fc2ec21
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf2_64.c
@@ -0,0 +1,1906 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:57 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -twiddle-log3 -n 64 -dit -name hf2_64 -include hf.h */
+
+/*
+ * This function contains 1154 FP additions, 660 FP multiplications,
+ * (or, 880 additions, 386 multiplications, 274 fused multiply/add),
+ * 382 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf2_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf2_64(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 10) {
+	  E T1, T1g, T91, T7W, T7m, T2O, T4j, T7P, T4P, T8y, T2w, T8t, T2Z, T8e, T48;
+	  E T1z, T7s, T1I, T7t, T8p, Ten, T1Y, T7D, T2t, T7O, T7L, Te6, T3N, T8E, T7A;
+	  E Te0, T4C, TeA, T8S, T9v, T65, Tfi, T9J, Taq, T6K, Tf6, Ta2, Ta5, T73, Tfc;
+	  E Tad, Tag, T3z, T83, T3u, T82, T81, T84, T15, T9K, T68, T7j, T43, T9w, T4F;
+	  E T8G, T5l, TeL, T9k, T9n, T6o, Tf2, T9Q, T9R, T6z, Tf3, T9T, T9W, To, Ts;
+	  E T4o, T8u, T4U, T92, T5a, TeT, T8V, T8Y, T5G, TeG, T97, T9e, T27, T7X, T2T;
+	  E T7E, T7b, Tai, T6T, Ta3, Tf7, Ta8, T7Q, T2H, T2c, T76, Tah, T7F, T4d, T8z;
+	  E TG, TK, T69, T6b, T3b, T87, T5u, T9l, TeM, T9q, T88, T89, T3o, T86, T5P;
+	  E T9f, TeH, T9a, T34, T8f, T1r, T7n, T3S, T8F, T4G, T4I, Tp, T6c, TH, T6a;
+	  E TL, Ti1, T4H, T4J, Tt;
+	  T1 = rio[0];
+	  {
+	       E T12, T67, T14, T66, T6s, T1b, T1f, T6q, T1m, T6x, T1w, T1q, T6v, T6h, T31;
+	       E T1D, T5I, T1y, T6g, T1S, T6m, T1N, T6W, T6Y, T1M, T6k, T1H, T2Y, T5L, T2W;
+	       E T5N, T2b, T74, T2g, T29, T75, T26, T78, T1W, T22, T7a, T6R, T2u, T6P, T2v;
+	       E T6L, T6M, T2E, T2G, T6I, T5Z, T2n, T63, T6G, T2r, T5H, T33, T5E, T2Q, T5z;
+	       E T5C, T2S, T2M, T5q, T3a, T38, T5s, T2N, T5x, T5n, T3l, T5m, T3n, T5h, T5j;
+	       E T3w, T3y, T58, T4a, T3t, T5d, T3r, T5e, T54, T4c, T4Z, T46, T4T, T4X, T47;
+	       E T4l, T4N, T4i, T4g, T4O, T4n, T4R, T4E, T40, T4D, T42, T4y, T4A, T3J, T3L;
+	       E T3R, T3G, T3E, T3P, T2i, Ta, Ty, Tf, Tw, T2, Tj, T3, Tc, T1E, T1B;
+	       E T1F, T1A, T1R, T3x, T2m, T3K, T61, T1V, T60, T3I, T51, T52, T2V, T56, T5X;
+	       E T3v, T55, T2X, T2q, T5W, T4w, T6E, Ta0, T8Q, Tac, T72, Tb, Tg, Th, T3e;
+	       E T3f, T3h, T1a, T2x, T2B, TU, TV, TY, T1e, T2y, T2A, TC, TD, T1u, Tk;
+	       E Tl, Tm, T39, T3U, T3W, T37, T3T, T3X, TQ, TR, TZ, T3c, T3d, T3i, Tx;
+	       E Tz, T1t, TN, TX, T2f, T5V, Tao, T2h, T3D, T4f, T4h, T3F, T3q, T3s;
+	       T12 = rio[WS(ios, 48)];
+	       T67 = iio[-WS(ios, 48)];
+	       T14 = iio[-WS(ios, 15)];
+	       T66 = rio[WS(ios, 15)];
+	       T6s = iio[-WS(ios, 8)];
+	       T1b = rio[WS(ios, 8)];
+	       T1f = iio[-WS(ios, 55)];
+	       T6q = rio[WS(ios, 55)];
+	       T1m = rio[WS(ios, 40)];
+	       T6x = iio[-WS(ios, 40)];
+	       T1w = rio[WS(ios, 56)];
+	       T1q = iio[-WS(ios, 23)];
+	       T6v = rio[WS(ios, 23)];
+	       T6h = iio[-WS(ios, 56)];
+	       T31 = rio[WS(ios, 50)];
+	       T1D = rio[WS(ios, 24)];
+	       T5I = iio[-WS(ios, 50)];
+	       T1y = iio[-WS(ios, 7)];
+	       T6g = rio[WS(ios, 7)];
+	       T1S = rio[WS(ios, 36)];
+	       T6m = iio[-WS(ios, 24)];
+	       T1N = iio[-WS(ios, 59)];
+	       T6W = rio[WS(ios, 59)];
+	       T6Y = iio[-WS(ios, 4)];
+	       T1M = rio[WS(ios, 4)];
+	       T6k = rio[WS(ios, 39)];
+	       T1H = iio[-WS(ios, 39)];
+	       T2Y = iio[-WS(ios, 45)];
+	       T5L = rio[WS(ios, 45)];
+	       T2W = rio[WS(ios, 18)];
+	       T5N = iio[-WS(ios, 18)];
+	       T2b = iio[-WS(ios, 11)];
+	       T74 = rio[WS(ios, 11)];
+	       T2g = rio[WS(ios, 60)];
+	       T29 = rio[WS(ios, 52)];
+	       T75 = iio[-WS(ios, 52)];
+	       T26 = iio[-WS(ios, 43)];
+	       T78 = rio[WS(ios, 43)];
+	       T1W = iio[-WS(ios, 27)];
+	       T22 = rio[WS(ios, 20)];
+	       T7a = iio[-WS(ios, 20)];
+	       T6R = iio[-WS(ios, 12)];
+	       T2u = rio[WS(ios, 12)];
+	       T6P = rio[WS(ios, 51)];
+	       T2v = iio[-WS(ios, 51)];
+	       T6L = rio[WS(ios, 19)];
+	       T6M = iio[-WS(ios, 44)];
+	       T2E = rio[WS(ios, 44)];
+	       T2G = iio[-WS(ios, 19)];
+	       T6I = iio[-WS(ios, 28)];
+	       T5Z = rio[WS(ios, 31)];
+	       T2n = rio[WS(ios, 28)];
+	       T63 = iio[-WS(ios, 32)];
+	       T6G = rio[WS(ios, 35)];
+	       T2r = iio[-WS(ios, 35)];
+	       T5H = rio[WS(ios, 13)];
+	       T33 = iio[-WS(ios, 13)];
+	       T5E = iio[-WS(ios, 34)];
+	       T2Q = rio[WS(ios, 34)];
+	       T5z = iio[-WS(ios, 2)];
+	       T5C = rio[WS(ios, 29)];
+	       T2S = iio[-WS(ios, 29)];
+	       T2M = rio[WS(ios, 2)];
+	       T5q = rio[WS(ios, 53)];
+	       T3a = iio[-WS(ios, 53)];
+	       T38 = rio[WS(ios, 10)];
+	       T5s = iio[-WS(ios, 10)];
+	       T2N = iio[-WS(ios, 61)];
+	       T5x = rio[WS(ios, 61)];
+	       T5n = iio[-WS(ios, 42)];
+	       T3l = rio[WS(ios, 42)];
+	       T5m = rio[WS(ios, 21)];
+	       T3n = iio[-WS(ios, 21)];
+	       T5h = rio[WS(ios, 37)];
+	       T5j = iio[-WS(ios, 26)];
+	       T3w = rio[WS(ios, 26)];
+	       T3y = iio[-WS(ios, 37)];
+	       T58 = iio[-WS(ios, 38)];
+	       T4a = rio[WS(ios, 38)];
+	       T3t = iio[-WS(ios, 5)];
+	       T5d = rio[WS(ios, 5)];
+	       T3r = rio[WS(ios, 58)];
+	       T5e = iio[-WS(ios, 58)];
+	       T54 = rio[WS(ios, 25)];
+	       T4c = iio[-WS(ios, 25)];
+	       T4Z = iio[-WS(ios, 6)];
+	       T46 = rio[WS(ios, 6)];
+	       T4T = iio[-WS(ios, 22)];
+	       T4X = rio[WS(ios, 57)];
+	       T47 = iio[-WS(ios, 57)];
+	       T4l = rio[WS(ios, 22)];
+	       T4N = rio[WS(ios, 9)];
+	       T4i = iio[-WS(ios, 9)];
+	       T4g = rio[WS(ios, 54)];
+	       T4O = iio[-WS(ios, 54)];
+	       T4n = iio[-WS(ios, 41)];
+	       T4R = rio[WS(ios, 41)];
+	       T4E = iio[-WS(ios, 46)];
+	       T40 = rio[WS(ios, 46)];
+	       T4D = rio[WS(ios, 17)];
+	       T42 = iio[-WS(ios, 17)];
+	       T4y = rio[WS(ios, 33)];
+	       T4A = iio[-WS(ios, 30)];
+	       T3J = rio[WS(ios, 30)];
+	       T3L = iio[-WS(ios, 33)];
+	       T3R = iio[-WS(ios, 49)];
+	       T3G = iio[-WS(ios, 1)];
+	       T3E = rio[WS(ios, 62)];
+	       T3P = rio[WS(ios, 14)];
+	       T2i = iio[-WS(ios, 3)];
+	       {
+		    E T4u, T70, T71, T4v, T5T, T6C, T6D, T5U, T4, T7, T5, T8, TO, TP, T1U;
+		    E T2p, T18, T2k, T2l, T2o, TT, TS, T19, T1c, T1T, T1P, T1Q, T1d;
+		    T4u = rio[WS(ios, 1)];
+		    T70 = rio[WS(ios, 27)];
+		    T71 = iio[-WS(ios, 36)];
+		    T4v = iio[-WS(ios, 62)];
+		    T5T = rio[WS(ios, 63)];
+		    T6C = rio[WS(ios, 3)];
+		    T6D = iio[-WS(ios, 60)];
+		    T5U = iio[0];
+		    {
+			 E T6, Te, T9, Td;
+			 T4 = W[2];
+			 T7 = W[3];
+			 T5 = W[0];
+			 T8 = W[1];
+			 T6 = T4 * T5;
+			 Te = T7 * T5;
+			 T9 = T7 * T8;
+			 Td = T4 * T8;
+			 Ta = T6 - T9;
+			 Ty = Td - Te;
+			 Tf = Td + Te;
+			 Tw = T6 + T9;
+			 T2 = W[6];
+			 Tj = W[7];
+			 T3 = W[4];
+			 Tc = W[5];
+			 TO = T3 * T4;
+			 TP = Tc * T7;
+			 T1U = Tj * T3;
+			 T2p = Tj * T5;
+			 T18 = T3 * T5;
+			 T2k = T2 * T5;
+			 T2l = Tj * T8;
+			 T2o = T2 * T8;
+			 TT = Tc * T4;
+			 TS = T3 * T7;
+			 T19 = Tc * T8;
+			 T1c = T3 * T8;
+			 T1E = T2 * T7;
+			 T1T = T2 * Tc;
+			 T1B = Tj * T7;
+			 T1F = Tj * T4;
+			 T1P = T2 * T3;
+			 T1Q = Tj * Tc;
+			 T1A = T2 * T4;
+			 T1d = Tc * T5;
+		    }
+		    T1R = T1P - T1Q;
+		    T3x = T2o - T2p;
+		    T2m = T2k - T2l;
+		    T3K = T1E + T1F;
+		    T61 = Tj * Ta;
+		    T1V = T1T + T1U;
+		    T60 = T2 * Tf;
+		    T3I = T1A - T1B;
+		    T51 = T2 * Tw;
+		    T52 = Tj * Ty;
+		    T2V = T1P + T1Q;
+		    T56 = Tj * Tw;
+		    T5X = Tj * Tf;
+		    T3v = T2k + T2l;
+		    T55 = T2 * Ty;
+		    T2X = T1T - T1U;
+		    T2q = T2o + T2p;
+		    T5W = T2 * Ta;
+		    T4w = FMA(T5, T4u, T8 * T4v);
+		    T6E = FMA(T4, T6C, T7 * T6D);
+		    Ta0 = FNMS(T7, T6C, T4 * T6D);
+		    T8Q = FNMS(T8, T4u, T5 * T4v);
+		    Tac = FNMS(Tj, T70, T2 * T71);
+		    T72 = FMA(T2, T70, Tj * T71);
+		    Tb = T3 * Ta;
+		    Tg = Tc * Tf;
+		    Th = Tb + Tg;
+		    T3e = TS - TT;
+		    T3f = Tj * T3e;
+		    T3h = T2 * T3e;
+		    T1a = T18 + T19;
+		    T2x = T2 * T1a;
+		    T2B = Tj * T1a;
+		    TU = TS + TT;
+		    TV = Tj * TU;
+		    TY = T2 * TU;
+		    T1e = T1c - T1d;
+		    T2y = Tj * T1e;
+		    T2A = T2 * T1e;
+		    TC = T3 * Ty;
+		    TD = Tc * Tw;
+		    T1u = TC + TD;
+		    Tk = T3 * Tf;
+		    Tl = Tc * Ta;
+		    Tm = Tk - Tl;
+		    T39 = T1c + T1d;
+		    T3U = Tj * T39;
+		    T3W = T2 * T39;
+		    T37 = T18 - T19;
+		    T3T = T2 * T37;
+		    T3X = Tj * T37;
+		    TQ = TO - TP;
+		    TR = T2 * TQ;
+		    TZ = Tj * TQ;
+		    T3c = TO + TP;
+		    T3d = T2 * T3c;
+		    T3i = Tj * T3c;
+		    Tx = T3 * Tw;
+		    Tz = Tc * Ty;
+		    T1t = Tx - Tz;
+		    TN = W[8];
+		    TX = W[9];
+		    T2f = FMA(TN, T4, TX * T7);
+		    T5V = FMA(TN, T5T, TX * T5U);
+		    Tao = FNMS(TX, T5T, TN * T5U);
+		    T2h = FNMS(TX, T4, TN * T7);
+		    T3D = FMA(TN, T5, TX * T8);
+		    T4f = FMA(TN, T3, TX * Tc);
+		    T4h = FNMS(TX, T3, TN * Tc);
+		    T3F = FNMS(TX, T5, TN * T8);
+	       }
+	       T1g = FNMS(T1e, T1f, T1a * T1b);
+	       T91 = FNMS(Tc, T4N, T3 * T4O);
+	       T7W = FMA(Ty, T2M, Tw * T2N);
+	       T7m = FMA(T1e, T1b, T1a * T1f);
+	       T2O = FNMS(Ty, T2N, Tw * T2M);
+	       T4j = FNMS(T4h, T4i, T4f * T4g);
+	       T7P = FNMS(TU, T2u, TQ * T2v);
+	       T4P = FMA(T3, T4N, Tc * T4O);
+	       T8y = FMA(T3e, T46, T3c * T47);
+	       T2w = FMA(TQ, T2u, TU * T2v);
+	       {
+		    E T1v, T1x, T1O, T1X;
+		    T8t = FMA(T4h, T4g, T4f * T4i);
+		    T2Z = FNMS(T2X, T2Y, T2V * T2W);
+		    T8e = FMA(T2X, T2W, T2V * T2Y);
+		    T48 = FNMS(T3e, T47, T3c * T46);
+		    T1v = FMA(TN, T1t, TX * T1u);
+		    T1x = FNMS(TX, T1t, TN * T1u);
+		    T1z = FNMS(T1x, T1y, T1v * T1w);
+		    T7s = FMA(T1x, T1w, T1v * T1y);
+		    {
+			 E T1C, T1G, T8n, T8o;
+			 T1C = T1A + T1B;
+			 T1G = T1E - T1F;
+			 T1I = FNMS(T1G, T1H, T1C * T1D);
+			 T7t = FMA(T1G, T1D, T1C * T1H);
+			 T8n = FMA(T3F, T3E, T3D * T3G);
+			 T8o = FNMS(T3K, T3J, T3I * T3L);
+			 T8p = T8n - T8o;
+			 Ten = T8n + T8o;
+		    }
+		    T1O = FMA(Ta, T1M, Tf * T1N);
+		    T1X = FMA(T1R, T1S, T1V * T1W);
+		    T1Y = T1O + T1X;
+		    T7D = T1O - T1X;
+		    {
+			 E T2j, T2s, T7J, T7K;
+			 T2j = FNMS(T2h, T2i, T2f * T2g);
+			 T2s = FMA(T2m, T2n, T2q * T2r);
+			 T2t = T2j + T2s;
+			 T7O = T2j - T2s;
+			 T7J = FMA(T2h, T2g, T2f * T2i);
+			 T7K = FNMS(T2q, T2n, T2m * T2r);
+			 T7L = T7J - T7K;
+			 Te6 = T7J + T7K;
+		    }
+	       }
+	       {
+		    E T3H, T3M, T7y, T7z;
+		    T3H = FNMS(T3F, T3G, T3D * T3E);
+		    T3M = FMA(T3I, T3J, T3K * T3L);
+		    T3N = T3H + T3M;
+		    T8E = T3H - T3M;
+		    T7y = FNMS(Tf, T1M, Ta * T1N);
+		    T7z = FNMS(T1V, T1S, T1R * T1W);
+		    T7A = T7y - T7z;
+		    Te0 = T7y + T7z;
+	       }
+	       {
+		    E T4B, T8R, T4x, T4z;
+		    T4x = T3d + T3f;
+		    T4z = T3h - T3i;
+		    T4B = FNMS(T4z, T4A, T4x * T4y);
+		    T8R = FMA(T4z, T4y, T4x * T4A);
+		    T4C = T4w + T4B;
+		    TeA = T8Q + T8R;
+		    T8S = T8Q - T8R;
+		    T9v = T4w - T4B;
+	       }
+	       {
+		    E T64, Tap, T5Y, T62;
+		    T5Y = T5W - T5X;
+		    T62 = T60 + T61;
+		    T64 = FMA(T5Y, T5Z, T62 * T63);
+		    Tap = FNMS(T62, T5Z, T5Y * T63);
+		    T65 = T5V + T64;
+		    Tfi = Tao + Tap;
+		    T9J = T5V - T64;
+		    Taq = Tao - Tap;
+	       }
+	       {
+		    E T6J, Ta1, T6F, T6H;
+		    T6F = T2x + T2y;
+		    T6H = T2A - T2B;
+		    T6J = FNMS(T6H, T6I, T6F * T6G);
+		    Ta1 = FMA(T6H, T6G, T6F * T6I);
+		    T6K = T6E + T6J;
+		    Tf6 = Ta0 + Ta1;
+		    Ta2 = Ta0 - Ta1;
+		    Ta5 = T6E - T6J;
+	       }
+	       {
+		    E T6Z, Tab, T6V, T6X;
+		    T6V = FMA(TN, Ta, TX * Tf);
+		    T6X = FNMS(TX, Ta, TN * Tf);
+		    T6Z = FNMS(T6X, T6Y, T6V * T6W);
+		    Tab = FMA(T6X, T6W, T6V * T6Y);
+		    T73 = T6Z + T72;
+		    Tfc = Tab + Tac;
+		    Tad = Tab - Tac;
+		    Tag = T6Z - T72;
+	       }
+	       T3z = FNMS(T3x, T3y, T3v * T3w);
+	       T83 = FMA(T3x, T3w, T3v * T3y);
+	       T3q = FNMS(TX, Tm, TN * Th);
+	       T3s = FMA(TN, Tm, TX * Th);
+	       T3u = FMA(T3q, T3r, T3s * T3t);
+	       T82 = FNMS(T3s, T3r, T3q * T3t);
+	       T81 = T3u - T3z;
+	       T84 = T82 - T83;
+	       {
+		    E TW, T10, T11, T13;
+		    TW = TR + TV;
+		    T10 = TY - TZ;
+		    T11 = FNMS(TX, T10, TN * TW);
+		    T13 = FMA(TN, T10, TX * TW);
+		    T15 = FMA(T11, T12, T13 * T14);
+		    T9K = FMA(T10, T66, TW * T67);
+		    T68 = FNMS(T10, T67, TW * T66);
+		    T7j = FNMS(T13, T12, T11 * T14);
+	       }
+	       {
+		    E T3V, T3Y, T3Z, T41;
+		    T3V = T3T + T3U;
+		    T3Y = T3W - T3X;
+		    T3Z = FNMS(TX, T3Y, TN * T3V);
+		    T41 = FMA(TN, T3Y, TX * T3V);
+		    T43 = FMA(T3Z, T40, T41 * T42);
+		    T9w = FMA(T3Y, T4D, T3V * T4E);
+		    T4F = FNMS(T3Y, T4E, T3V * T4D);
+		    T8G = FNMS(T41, T40, T3Z * T42);
+	       }
+	       {
+		    E T5f, T9i, T5k, T9j, T5g, T5i;
+		    T5f = FNMS(Tm, T5e, Th * T5d);
+		    T9i = FMA(Tm, T5d, Th * T5e);
+		    T5g = T3T - T3U;
+		    T5i = T3W + T3X;
+		    T5k = FMA(T5g, T5h, T5i * T5j);
+		    T9j = FNMS(T5i, T5h, T5g * T5j);
+		    T5l = T5f + T5k;
+		    TeL = T9i + T9j;
+		    T9k = T9i - T9j;
+		    T9n = T5f - T5k;
+	       }
+	       {
+		    E T6i, T9O, T6n, T9P, T6j, T6l;
+		    T6i = FMA(T1t, T6g, T1u * T6h);
+		    T9O = FNMS(T1u, T6g, T1t * T6h);
+		    T6j = TR - TV;
+		    T6l = TY + TZ;
+		    T6n = FMA(T6j, T6k, T6l * T6m);
+		    T9P = FNMS(T6l, T6k, T6j * T6m);
+		    T6o = T6i + T6n;
+		    Tf2 = T9O + T9P;
+		    T9Q = T9O - T9P;
+		    T9R = T6i - T6n;
+	       }
+	       {
+		    E T6t, T9U, T6y, T9V;
+		    {
+			 E T6p, T6r, T6u, T6w;
+			 T6p = FNMS(TX, T1e, TN * T1a);
+			 T6r = FMA(TN, T1e, TX * T1a);
+			 T6t = FMA(T6p, T6q, T6r * T6s);
+			 T9U = FNMS(T6r, T6q, T6p * T6s);
+			 T6u = T5W + T5X;
+			 T6w = T60 - T61;
+			 T6y = FNMS(T6w, T6x, T6u * T6v);
+			 T9V = FMA(T6w, T6v, T6u * T6x);
+		    }
+		    T6z = T6t + T6y;
+		    Tf3 = T9U + T9V;
+		    T9T = T6t - T6y;
+		    T9W = T9U - T9V;
+	       }
+	       {
+		    E Ti, Tn, T4k, Tq, Tr, T4m, T4Q, T4S;
+		    Ti = T2 * Th;
+		    Tn = Tj * Tm;
+		    T4k = Ti - Tn;
+		    Tq = T2 * Tm;
+		    Tr = Tj * Th;
+		    T4m = Tq + Tr;
+		    To = Ti + Tn;
+		    Ts = Tq - Tr;
+		    T4o = FMA(T4k, T4l, T4m * T4n);
+		    T8u = FNMS(T4m, T4l, T4k * T4n);
+		    T4Q = FMA(TN, T4k, TX * T4m);
+		    T4S = FNMS(TX, T4k, TN * T4m);
+		    T4U = FNMS(T4S, T4T, T4Q * T4R);
+		    T92 = FMA(T4S, T4R, T4Q * T4T);
+	       }
+	       {
+		    E T50, T8W, T59, T8X;
+		    {
+			 E T4W, T4Y, T53, T57;
+			 T4W = FNMS(TX, T3e, TN * T3c);
+			 T4Y = FMA(TN, T3e, TX * T3c);
+			 T50 = FMA(T4W, T4X, T4Y * T4Z);
+			 T8W = FNMS(T4Y, T4X, T4W * T4Z);
+			 T53 = T51 - T52;
+			 T57 = T55 + T56;
+			 T59 = FMA(T53, T54, T57 * T58);
+			 T8X = FNMS(T57, T54, T53 * T58);
+		    }
+		    T5a = T50 + T59;
+		    TeT = T8W + T8X;
+		    T8V = T50 - T59;
+		    T8Y = T8W - T8X;
+	       }
+	       {
+		    E T5A, T9c, T5F, T9d;
+		    {
+			 E T5w, T5y, T5B, T5D;
+			 T5w = FNMS(TX, Ty, TN * Tw);
+			 T5y = FMA(TN, Ty, TX * Tw);
+			 T5A = FMA(T5w, T5x, T5y * T5z);
+			 T9c = FNMS(T5y, T5x, T5w * T5z);
+			 T5B = T51 + T52;
+			 T5D = T55 - T56;
+			 T5F = FNMS(T5D, T5E, T5B * T5C);
+			 T9d = FMA(T5D, T5C, T5B * T5E);
+		    }
+		    T5G = T5A + T5F;
+		    TeG = T9c + T9d;
+		    T97 = T5A - T5F;
+		    T9e = T9c - T9d;
+	       }
+	       {
+		    E T21, T2P, T25, T2R, T77, T79;
+		    {
+			 E T1Z, T20, T23, T24;
+			 T1Z = T2 * T1t;
+			 T20 = Tj * T1u;
+			 T21 = T1Z + T20;
+			 T2P = T1Z - T20;
+			 T23 = T2 * T1u;
+			 T24 = Tj * T1t;
+			 T25 = T23 - T24;
+			 T2R = T23 + T24;
+		    }
+		    T27 = FNMS(T25, T26, T21 * T22);
+		    T7X = FNMS(T2R, T2Q, T2P * T2S);
+		    T2T = FMA(T2P, T2Q, T2R * T2S);
+		    T7E = FMA(T25, T22, T21 * T26);
+		    T77 = FNMS(TX, T25, TN * T21);
+		    T79 = FMA(TN, T25, TX * T21);
+		    T7b = FMA(T77, T78, T79 * T7a);
+		    Tai = FNMS(T79, T78, T77 * T7a);
+	       }
+	       {
+		    E T6S, Ta7, T2D, Ta6, T2F, T6N;
+		    {
+			 E T6O, T6Q, T2z, T2C;
+			 T6O = FMA(TN, TQ, TX * TU);
+			 T6Q = FNMS(TX, TQ, TN * TU);
+			 T6S = FNMS(T6Q, T6R, T6O * T6P);
+			 Ta7 = FMA(T6Q, T6P, T6O * T6R);
+			 T2z = T2x - T2y;
+			 T2C = T2A + T2B;
+			 T2D = FMA(TN, T2z, TX * T2C);
+			 Ta6 = FNMS(T2C, T6L, T2z * T6M);
+			 T2F = FNMS(TX, T2z, TN * T2C);
+			 T6N = FMA(T2z, T6L, T2C * T6M);
+		    }
+		    T6T = T6N + T6S;
+		    Ta3 = T6N - T6S;
+		    Tf7 = Ta6 + Ta7;
+		    Ta8 = Ta6 - Ta7;
+		    T7Q = FMA(T2F, T2E, T2D * T2G);
+		    T2H = FNMS(T2F, T2G, T2D * T2E);
+	       }
+	       {
+		    E TA, TE, TB, TF, TJ, TI, T2a, T28, T49, T4b;
+		    TA = Tx + Tz;
+		    TE = TC - TD;
+		    TB = T2 * TA;
+		    TF = Tj * TE;
+		    TJ = Tj * TA;
+		    TI = T2 * TE;
+		    T2a = FMA(TN, TE, TX * TA);
+		    T28 = FNMS(TX, TE, TN * TA);
+		    T2c = FMA(T28, T29, T2a * T2b);
+		    T76 = FNMS(TE, T75, TA * T74);
+		    Tah = FMA(TE, T74, TA * T75);
+		    T7F = FNMS(T2a, T29, T28 * T2b);
+		    T49 = TB + TF;
+		    T4b = TI - TJ;
+		    T4d = FNMS(T4b, T4c, T49 * T4a);
+		    T8z = FMA(T4b, T4a, T49 * T4c);
+		    TG = TB - TF;
+		    TK = TI + TJ;
+		    T69 = FMA(TN, TG, TX * TK);
+		    T6b = FNMS(TX, TG, TN * TK);
+	       }
+	       {
+		    E T5t, T9p, T3k, T9o, T3m, T5o;
+		    T3b = FMA(T37, T38, T39 * T3a);
+		    T87 = FNMS(T39, T38, T37 * T3a);
+		    {
+			 E T5p, T5r, T3g, T3j;
+			 T5p = FMA(TN, T37, TX * T39);
+			 T5r = FNMS(TX, T37, TN * T39);
+			 T5t = FNMS(T5r, T5s, T5p * T5q);
+			 T9p = FMA(T5r, T5q, T5p * T5s);
+			 T3g = T3d - T3f;
+			 T3j = T3h + T3i;
+			 T3k = FMA(TN, T3g, TX * T3j);
+			 T9o = FNMS(T3j, T5m, T3g * T5n);
+			 T3m = FNMS(TX, T3g, TN * T3j);
+			 T5o = FMA(T3g, T5m, T3j * T5n);
+		    }
+		    T5u = T5o + T5t;
+		    T9l = T5o - T5t;
+		    TeM = T9o + T9p;
+		    T9q = T9o - T9p;
+		    T88 = FMA(T3m, T3l, T3k * T3n);
+		    T89 = T87 - T88;
+		    T3o = FNMS(T3m, T3n, T3k * T3l);
+		    T86 = T3b - T3o;
+	       }
+	       {
+		    E T5O, T99, T1i, T1n, T1o, T1k, T30, T5J, T98, T32;
+		    {
+			 E T5K, T5M, T1h, T1j;
+			 T5K = FNMS(TX, T2X, TN * T2V);
+			 T5M = FMA(TN, T2X, TX * T2V);
+			 T5O = FMA(T5K, T5L, T5M * T5N);
+			 T99 = FNMS(T5M, T5L, T5K * T5N);
+			 T1h = Tb - Tg;
+			 T1j = Tk + Tl;
+			 T1i = T2 * T1h;
+			 T1n = T2 * T1j;
+			 T1o = Tj * T1h;
+			 T1k = Tj * T1j;
+			 T30 = FMA(TN, T1h, TX * T1j);
+			 T5J = FMA(T1h, T5H, T1j * T5I);
+			 T98 = FNMS(T1j, T5H, T1h * T5I);
+			 T32 = FNMS(TX, T1h, TN * T1j);
+		    }
+		    T5P = T5J + T5O;
+		    T9f = T5J - T5O;
+		    TeH = T98 + T99;
+		    T9a = T98 - T99;
+		    T34 = FNMS(T32, T33, T30 * T31);
+		    T8f = FMA(T32, T31, T30 * T33);
+		    {
+			 E T1l, T1p, T3O, T3Q;
+			 T1l = T1i - T1k;
+			 T1p = T1n + T1o;
+			 T1r = FMA(T1l, T1m, T1p * T1q);
+			 T7n = FNMS(T1p, T1m, T1l * T1q);
+			 T3O = T1i + T1k;
+			 T3Q = T1n - T1o;
+			 T3S = FNMS(T3Q, T3R, T3O * T3P);
+			 T8F = FMA(T3Q, T3P, T3O * T3R);
+			 T4G = FNMS(TX, T3Q, TN * T3O);
+			 T4I = FMA(TN, T3Q, TX * T3O);
+		    }
+	       }
+	  }
+	  Tp = rio[WS(ios, 32)];
+	  T6c = iio[-WS(ios, 16)];
+	  TH = rio[WS(ios, 16)];
+	  T6a = rio[WS(ios, 47)];
+	  TL = iio[-WS(ios, 47)];
+	  Ti1 = iio[-WS(ios, 63)];
+	  T4H = rio[WS(ios, 49)];
+	  T4J = iio[-WS(ios, 14)];
+	  Tt = iio[-WS(ios, 31)];
+	  {
+	       E T5R, TgT, TgY, ThE, T9t, Tbe, T9G, Tbb, Tcl, Tdq, Tcs, Tdn, TeP, Tg4, TeY;
+	       E Tg1, T7e, Th4, ThJ, Th9, Tfp, Tg8, Tfg, Tgb, T2K, TgC, Tih, ThX, TfQ, TiL;
+	       E Tea, Tiv, Tam, Tbl, TcL, Tdu, Taz, Tbi, TcE, Tdx, T7U, Tjv, Tdc, Tjh, Tb0;
+	       E TjL, TbU, TiZ, T8D, Tb5, Tc8, Tdi, T8M, Tb6, Tc5, Tdh, T4r, Thz, Tex, Tfz;
+	       E TfX, Tgl, TgN, Thj, T8m, TaI, Tdg, TdG, Tb4, Tbu, Tc2, TcU, T3C, Thy, Tem;
+	       E Tfy, TfU, Tgk, TgI, Thi, T6B, Th1, Tfm, Tga, Th8, ThI, T9Z, Tbh, Taw, Tbk;
+	       E TcI, Tdw, Tf5, Tg7, Tcx, Tdt, T5c, TgV, TeV, Tg0, TgS, ThD, TeE, Tg3, T96;
+	       E Tbd, Tce, Tdp, Tcp, Tdm, T9D, Tba, T1L, Tgz, Ti4, Tii, Tiy, TiM, TdZ, TfN;
+	       E T7x, TaX, Tj4, Tji, Tjy, TjM, TbN, Td9;
+	       {
+		    E T5v, T5Q, TgW, TgX;
+		    T5v = T5l + T5u;
+		    T5Q = T5G + T5P;
+		    T5R = T5v + T5Q;
+		    TgT = T5Q - T5v;
+		    TgW = TeL + TeM;
+		    TgX = TeG + TeH;
+		    TgY = TgW - TgX;
+		    ThE = TgW + TgX;
+	       }
+	       {
+		    E T9h, T9F, T9s, T9E;
+		    {
+			 E T9b, T9g, T9m, T9r;
+			 T9b = T97 - T9a;
+			 T9g = T9e + T9f;
+			 T9h = FNMS(KP923879532, T9g, KP382683432 * T9b);
+			 T9F = FMA(KP382683432, T9g, KP923879532 * T9b);
+			 T9m = T9k + T9l;
+			 T9r = T9n - T9q;
+			 T9s = FMA(KP923879532, T9m, KP382683432 * T9r);
+			 T9E = FNMS(KP923879532, T9r, KP382683432 * T9m);
+		    }
+		    T9t = T9h - T9s;
+		    Tbe = T9E + T9F;
+		    T9G = T9E - T9F;
+		    Tbb = T9s + T9h;
+	       }
+	       {
+		    E Tch, Tcr, Tck, Tcq;
+		    {
+			 E Tcf, Tcg, Tci, Tcj;
+			 Tcf = T97 + T9a;
+			 Tcg = T9e - T9f;
+			 Tch = FNMS(KP382683432, Tcg, KP923879532 * Tcf);
+			 Tcr = FMA(KP923879532, Tcg, KP382683432 * Tcf);
+			 Tci = T9k - T9l;
+			 Tcj = T9n + T9q;
+			 Tck = FMA(KP382683432, Tci, KP923879532 * Tcj);
+			 Tcq = FNMS(KP382683432, Tcj, KP923879532 * Tci);
+		    }
+		    Tcl = Tch - Tck;
+		    Tdq = Tcq + Tcr;
+		    Tcs = Tcq - Tcr;
+		    Tdn = Tck + Tch;
+	       }
+	       {
+		    E TeJ, TeX, TeO, TeW;
+		    {
+			 E TeF, TeI, TeK, TeN;
+			 TeF = T5G - T5P;
+			 TeI = TeG - TeH;
+			 TeJ = TeF - TeI;
+			 TeX = TeF + TeI;
+			 TeK = T5l - T5u;
+			 TeN = TeL - TeM;
+			 TeO = TeK + TeN;
+			 TeW = TeN - TeK;
+		    }
+		    TeP = KP707106781 * (TeJ - TeO);
+		    Tg4 = KP707106781 * (TeW + TeX);
+		    TeY = KP707106781 * (TeW - TeX);
+		    Tg1 = KP707106781 * (TeO + TeJ);
+	       }
+	       {
+		    E T6U, Th2, T7d, Tfb, Tfe, Th3, Tfa, Tfo, Tfn, Tff;
+		    T6U = T6K + T6T;
+		    Th2 = Tf6 + Tf7;
+		    {
+			 E T7c, Tfd, Tf8, Tf9;
+			 T7c = T76 + T7b;
+			 T7d = T73 + T7c;
+			 Tfb = T73 - T7c;
+			 Tfd = Tah + Tai;
+			 Tfe = Tfc - Tfd;
+			 Th3 = Tfc + Tfd;
+			 Tf8 = Tf6 - Tf7;
+			 Tf9 = T6K - T6T;
+			 Tfa = Tf8 - Tf9;
+			 Tfo = Tf9 + Tf8;
+		    }
+		    T7e = T6U + T7d;
+		    Th4 = Th2 - Th3;
+		    ThJ = Th2 + Th3;
+		    Th9 = T7d - T6U;
+		    Tfn = Tfb - Tfe;
+		    Tfp = KP707106781 * (Tfn - Tfo);
+		    Tg8 = KP707106781 * (Tfo + Tfn);
+		    Tff = Tfb + Tfe;
+		    Tfg = KP707106781 * (Tfa - Tff);
+		    Tgb = KP707106781 * (Tfa + Tff);
+	       }
+	       {
+		    E T2e, Te3, Te8, TgB, T2J, Te5, Te2, TgA;
+		    {
+			 E T2d, Te7, T2I, Te1;
+			 T2d = T27 + T2c;
+			 T2e = T1Y + T2d;
+			 Te3 = T1Y - T2d;
+			 Te7 = T7P + T7Q;
+			 Te8 = Te6 - Te7;
+			 TgB = Te6 + Te7;
+			 T2I = T2w + T2H;
+			 T2J = T2t + T2I;
+			 Te5 = T2t - T2I;
+			 Te1 = T7E + T7F;
+			 Te2 = Te0 - Te1;
+			 TgA = Te0 + Te1;
+		    }
+		    T2K = T2e + T2J;
+		    TgC = TgA - TgB;
+		    Tih = T2J - T2e;
+		    ThX = TgA + TgB;
+		    {
+			 E TfO, TfP, Te4, Te9;
+			 TfO = Te3 + Te2;
+			 TfP = Te5 - Te8;
+			 TfQ = KP707106781 * (TfO + TfP);
+			 TiL = KP707106781 * (TfP - TfO);
+			 Te4 = Te2 - Te3;
+			 Te9 = Te5 + Te8;
+			 Tea = KP707106781 * (Te4 - Te9);
+			 Tiv = KP707106781 * (Te4 + Te9);
+		    }
+	       }
+	       {
+		    E Taf, TcB, Tak, TcC, Taa, Tay, TcA, TcK, Tae, Taj;
+		    Tae = T76 - T7b;
+		    Taf = Tad + Tae;
+		    TcB = Tad - Tae;
+		    Taj = Tah - Tai;
+		    Tak = Tag - Taj;
+		    TcC = Tag + Taj;
+		    {
+			 E Ta4, Ta9, Tcy, Tcz;
+			 Ta4 = Ta2 + Ta3;
+			 Ta9 = Ta5 - Ta8;
+			 Taa = FNMS(KP923879532, Ta9, KP382683432 * Ta4);
+			 Tay = FMA(KP923879532, Ta4, KP382683432 * Ta9);
+			 Tcy = Ta2 - Ta3;
+			 Tcz = Ta5 + Ta8;
+			 TcA = FNMS(KP382683432, Tcz, KP923879532 * Tcy);
+			 TcK = FMA(KP382683432, Tcy, KP923879532 * Tcz);
+		    }
+		    {
+			 E Tal, TcJ, Tax, TcD;
+			 Tal = FMA(KP382683432, Taf, KP923879532 * Tak);
+			 Tam = Taa - Tal;
+			 Tbl = Taa + Tal;
+			 TcJ = FNMS(KP382683432, TcB, KP923879532 * TcC);
+			 TcL = TcJ - TcK;
+			 Tdu = TcK + TcJ;
+			 Tax = FNMS(KP923879532, Taf, KP382683432 * Tak);
+			 Taz = Tax - Tay;
+			 Tbi = Tay + Tax;
+			 TcD = FMA(KP923879532, TcB, KP382683432 * TcC);
+			 TcE = TcA - TcD;
+			 Tdx = TcA + TcD;
+		    }
+	       }
+	       {
+		    E T7C, TbO, T7S, TbS, T7H, TbP, T7N, TbR;
+		    {
+			 E T7B, T7R, T7G, T7M;
+			 T7B = T27 - T2c;
+			 T7C = T7A + T7B;
+			 TbO = T7A - T7B;
+			 T7R = T7P - T7Q;
+			 T7S = T7O - T7R;
+			 TbS = T7O + T7R;
+			 T7G = T7E - T7F;
+			 T7H = T7D - T7G;
+			 TbP = T7D + T7G;
+			 T7M = T2w - T2H;
+			 T7N = T7L + T7M;
+			 TbR = T7L - T7M;
+		    }
+		    {
+			 E T7I, T7T, Tda, Tdb;
+			 T7I = FNMS(KP923879532, T7H, KP382683432 * T7C);
+			 T7T = FMA(KP382683432, T7N, KP923879532 * T7S);
+			 T7U = T7I - T7T;
+			 Tjv = T7I + T7T;
+			 Tda = FMA(KP382683432, TbO, KP923879532 * TbP);
+			 Tdb = FNMS(KP382683432, TbR, KP923879532 * TbS);
+			 Tdc = Tda + Tdb;
+			 Tjh = Tdb - Tda;
+		    }
+		    {
+			 E TaY, TaZ, TbQ, TbT;
+			 TaY = FMA(KP923879532, T7C, KP382683432 * T7H);
+			 TaZ = FNMS(KP923879532, T7N, KP382683432 * T7S);
+			 Tb0 = TaY + TaZ;
+			 TjL = TaZ - TaY;
+			 TbQ = FNMS(KP382683432, TbP, KP923879532 * TbO);
+			 TbT = FMA(KP923879532, TbR, KP382683432 * TbS);
+			 TbU = TbQ - TbT;
+			 TiZ = TbQ + TbT;
+		    }
+	       }
+	       {
+		    E T8r, Tc6, T8I, Tc3, T8w, T8K, T8B, T8J, T8q, T8H;
+		    T8q = T3S - T43;
+		    T8r = T8p + T8q;
+		    Tc6 = T8p - T8q;
+		    T8H = T8F - T8G;
+		    T8I = T8E - T8H;
+		    Tc3 = T8E + T8H;
+		    {
+			 E T8s, T8v, T8x, T8A;
+			 T8s = T4j - T4o;
+			 T8v = T8t - T8u;
+			 T8w = T8s - T8v;
+			 T8K = T8s + T8v;
+			 T8x = T48 - T4d;
+			 T8A = T8y - T8z;
+			 T8B = T8x + T8A;
+			 T8J = T8A - T8x;
+		    }
+		    {
+			 E T8C, Tc7, T8L, Tc4;
+			 T8C = KP707106781 * (T8w - T8B);
+			 T8D = T8r - T8C;
+			 Tb5 = T8r + T8C;
+			 Tc7 = KP707106781 * (T8J + T8K);
+			 Tc8 = Tc6 - Tc7;
+			 Tdi = Tc6 + Tc7;
+			 T8L = KP707106781 * (T8J - T8K);
+			 T8M = T8I - T8L;
+			 Tb6 = T8I + T8L;
+			 Tc4 = KP707106781 * (T8B + T8w);
+			 Tc5 = Tc3 - Tc4;
+			 Tdh = Tc3 + Tc4;
+		    }
+	       }
+	       {
+		    E T45, Tes, Tep, TgK, T4q, Teq, Tev, TgL, T44, Teo, Ter, Tew;
+		    T44 = T3S + T43;
+		    T45 = T3N + T44;
+		    Tes = T3N - T44;
+		    Teo = T8F + T8G;
+		    Tep = Ten - Teo;
+		    TgK = Ten + Teo;
+		    {
+			 E T4e, T4p, Tet, Teu;
+			 T4e = T48 + T4d;
+			 T4p = T4j + T4o;
+			 T4q = T4e + T4p;
+			 Teq = T4p - T4e;
+			 Tet = T8y + T8z;
+			 Teu = T8t + T8u;
+			 Tev = Tet - Teu;
+			 TgL = Tet + Teu;
+		    }
+		    T4r = T45 + T4q;
+		    Thz = TgK + TgL;
+		    Ter = Tep - Teq;
+		    Tew = Tes - Tev;
+		    Tex = FMA(KP382683432, Ter, KP923879532 * Tew);
+		    Tfz = FNMS(KP923879532, Ter, KP382683432 * Tew);
+		    {
+			 E TfV, TfW, TgJ, TgM;
+			 TfV = Tep + Teq;
+			 TfW = Tes + Tev;
+			 TfX = FMA(KP923879532, TfV, KP382683432 * TfW);
+			 Tgl = FNMS(KP382683432, TfV, KP923879532 * TfW);
+			 TgJ = T45 - T4q;
+			 TgM = TgK - TgL;
+			 TgN = TgJ + TgM;
+			 Thj = TgJ - TgM;
+		    }
+	       }
+	       {
+		    E T80, TbW, T8k, TbX, T8b, Tc0, T8h, TbZ;
+		    {
+			 E T7Y, T7Z, T8i, T8j;
+			 T7Y = T7W - T7X;
+			 T7Z = T2Z - T34;
+			 T80 = T7Y + T7Z;
+			 TbW = T7Y - T7Z;
+			 T8i = T89 - T86;
+			 T8j = T81 + T84;
+			 T8k = KP707106781 * (T8i - T8j);
+			 TbX = KP707106781 * (T8i + T8j);
+		    }
+		    {
+			 E T85, T8a, T8d, T8g;
+			 T85 = T81 - T84;
+			 T8a = T86 + T89;
+			 T8b = KP707106781 * (T85 - T8a);
+			 Tc0 = KP707106781 * (T8a + T85);
+			 T8d = T2O - T2T;
+			 T8g = T8e - T8f;
+			 T8h = T8d - T8g;
+			 TbZ = T8d + T8g;
+		    }
+		    {
+			 E T8c, T8l, Tde, Tdf;
+			 T8c = T80 - T8b;
+			 T8l = T8h - T8k;
+			 T8m = FNMS(KP980785280, T8l, KP195090322 * T8c);
+			 TaI = FMA(KP980785280, T8c, KP195090322 * T8l);
+			 Tde = TbW + TbX;
+			 Tdf = TbZ + Tc0;
+			 Tdg = FNMS(KP195090322, Tdf, KP980785280 * Tde);
+			 TdG = FMA(KP980785280, Tdf, KP195090322 * Tde);
+		    }
+		    {
+			 E Tb2, Tb3, TbY, Tc1;
+			 Tb2 = T80 + T8b;
+			 Tb3 = T8h + T8k;
+			 Tb4 = FNMS(KP555570233, Tb3, KP831469612 * Tb2);
+			 Tbu = FMA(KP555570233, Tb2, KP831469612 * Tb3);
+			 TbY = TbW - TbX;
+			 Tc1 = TbZ - Tc0;
+			 Tc2 = FNMS(KP831469612, Tc1, KP555570233 * TbY);
+			 TcU = FMA(KP555570233, Tc1, KP831469612 * TbY);
+		    }
+	       }
+	       {
+		    E T36, Teh, Tek, TgF, T3B, Tef, Tee, TgE, Teg, Tel;
+		    {
+			 E T2U, T35, Tei, Tej;
+			 T2U = T2O + T2T;
+			 T35 = T2Z + T34;
+			 T36 = T2U + T35;
+			 Teh = T2U - T35;
+			 Tei = T87 + T88;
+			 Tej = T82 + T83;
+			 Tek = Tei - Tej;
+			 TgF = Tei + Tej;
+		    }
+		    {
+			 E T3p, T3A, Tec, Ted;
+			 T3p = T3b + T3o;
+			 T3A = T3u + T3z;
+			 T3B = T3p + T3A;
+			 Tef = T3A - T3p;
+			 Tec = T7W + T7X;
+			 Ted = T8e + T8f;
+			 Tee = Tec - Ted;
+			 TgE = Tec + Ted;
+		    }
+		    T3C = T36 + T3B;
+		    Thy = TgE + TgF;
+		    Teg = Tee - Tef;
+		    Tel = Teh - Tek;
+		    Tem = FNMS(KP923879532, Tel, KP382683432 * Teg);
+		    Tfy = FMA(KP923879532, Teg, KP382683432 * Tel);
+		    {
+			 E TfS, TfT, TgG, TgH;
+			 TfS = Tee + Tef;
+			 TfT = Teh + Tek;
+			 TfU = FNMS(KP382683432, TfT, KP923879532 * TfS);
+			 Tgk = FMA(KP382683432, TfS, KP923879532 * TfT);
+			 TgG = TgE - TgF;
+			 TgH = T36 - T3B;
+			 TgI = TgG - TgH;
+			 Thi = TgH + TgG;
+		    }
+	       }
+	       {
+		    E T6A, Tfl, Th7, Tf4, T6e, Tar, T9Y, TcH, Tav, Tcw, T9M, Tfj;
+		    T6A = T6o + T6z;
+		    Tfl = T6z - T6o;
+		    Th7 = Tf2 + Tf3;
+		    Tf4 = Tf2 - Tf3;
+		    {
+			 E T6d, T9S, T9X, Tat, Tau, T9L;
+			 T6d = FNMS(T6b, T6c, T69 * T6a);
+			 T6e = T68 + T6d;
+			 Tar = T68 - T6d;
+			 T9S = T9Q - T9R;
+			 T9X = T9T + T9W;
+			 T9Y = KP707106781 * (T9S - T9X);
+			 TcH = KP707106781 * (T9S + T9X);
+			 Tat = T9T - T9W;
+			 Tau = T9R + T9Q;
+			 Tav = KP707106781 * (Tat - Tau);
+			 Tcw = KP707106781 * (Tau + Tat);
+			 T9L = FMA(T6b, T6a, T69 * T6c);
+			 T9M = T9K - T9L;
+			 Tfj = T9K + T9L;
+		    }
+		    {
+			 E T6f, Tfk, Th6, T9N;
+			 T6f = T65 + T6e;
+			 T6B = T6f + T6A;
+			 Th1 = T6f - T6A;
+			 Tfk = Tfi - Tfj;
+			 Tfm = Tfk - Tfl;
+			 Tga = Tfk + Tfl;
+			 Th6 = Tfi + Tfj;
+			 Th8 = Th6 - Th7;
+			 ThI = Th6 + Th7;
+			 T9N = T9J - T9M;
+			 T9Z = T9N - T9Y;
+			 Tbh = T9N + T9Y;
+		    }
+		    {
+			 E Tas, TcG, Tf1, Tcv;
+			 Tas = Taq + Tar;
+			 Taw = Tas - Tav;
+			 Tbk = Tas + Tav;
+			 TcG = Taq - Tar;
+			 TcI = TcG - TcH;
+			 Tdw = TcG + TcH;
+			 Tf1 = T65 - T6e;
+			 Tf5 = Tf1 - Tf4;
+			 Tg7 = Tf1 + Tf4;
+			 Tcv = T9J + T9M;
+			 Tcx = Tcv - Tcw;
+			 Tdt = Tcv + Tcw;
+		    }
+	       }
+	       {
+		    E T8Z, T9B, T5b, TeD, TeU, TgR, T94, T9A, T4L, T8T, T9y, TeB, T4V;
+		    T8Z = T8V - T8Y;
+		    T9B = T8V + T8Y;
+		    T4V = T4P + T4U;
+		    T5b = T4V + T5a;
+		    TeD = T5a - T4V;
+		    {
+			 E TeS, T90, T93, T4K, T9x;
+			 TeS = T91 + T92;
+			 TeU = TeS - TeT;
+			 TgR = TeS + TeT;
+			 T90 = T4P - T4U;
+			 T93 = T91 - T92;
+			 T94 = T90 + T93;
+			 T9A = T93 - T90;
+			 T4K = FMA(T4G, T4H, T4I * T4J);
+			 T4L = T4F + T4K;
+			 T8T = T4F - T4K;
+			 T9x = FNMS(T4I, T4H, T4G * T4J);
+			 T9y = T9w - T9x;
+			 TeB = T9w + T9x;
+		    }
+		    {
+			 E T4M, TeR, TgQ, TeC;
+			 T4M = T4C + T4L;
+			 T5c = T4M + T5b;
+			 TgV = T4M - T5b;
+			 TeR = T4C - T4L;
+			 TeV = TeR - TeU;
+			 Tg0 = TeR + TeU;
+			 TgQ = TeA + TeB;
+			 TgS = TgQ - TgR;
+			 ThD = TgQ + TgR;
+			 TeC = TeA - TeB;
+			 TeE = TeC - TeD;
+			 Tg3 = TeC + TeD;
+		    }
+		    {
+			 E T8U, T95, Tcc, Tcd;
+			 T8U = T8S + T8T;
+			 T95 = KP707106781 * (T8Z - T94);
+			 T96 = T8U - T95;
+			 Tbd = T8U + T95;
+			 Tcc = T8S - T8T;
+			 Tcd = KP707106781 * (T9A + T9B);
+			 Tce = Tcc - Tcd;
+			 Tdp = Tcc + Tcd;
+		    }
+		    {
+			 E Tcn, Tco, T9z, T9C;
+			 Tcn = T9v + T9y;
+			 Tco = KP707106781 * (T94 + T8Z);
+			 Tcp = Tcn - Tco;
+			 Tdm = Tcn + Tco;
+			 T9z = T9v - T9y;
+			 T9C = KP707106781 * (T9A - T9B);
+			 T9D = T9z - T9C;
+			 Tba = T9z + T9C;
+		    }
+	       }
+	       {
+		    E Tv, T7h, TdY, ThY, Ti2, Tj1, T16, Tj2, T1K, Tiw, T7q, TbK, T7v, TbL, T7k;
+		    E ThZ, T7r, T7u, T7i;
+		    {
+			 E Tu, TdW, TdX, Ti0, TM;
+			 Tu = FNMS(Ts, Tt, To * Tp);
+			 Tv = T1 + Tu;
+			 T7h = T1 - Tu;
+			 TdW = T7m + T7n;
+			 TdX = T7s + T7t;
+			 TdY = TdW - TdX;
+			 ThY = TdW + TdX;
+			 Ti0 = FMA(Ts, Tp, To * Tt);
+			 Ti2 = Ti0 + Ti1;
+			 Tj1 = Ti1 - Ti0;
+			 TM = FMA(TG, TH, TK * TL);
+			 T16 = TM + T15;
+			 Tj2 = TM - T15;
+		    }
+		    {
+			 E T1s, T1J, T7o, T7p;
+			 T1s = T1g + T1r;
+			 T1J = T1z + T1I;
+			 T1K = T1s + T1J;
+			 Tiw = T1J - T1s;
+			 T7o = T7m - T7n;
+			 T7p = T1g - T1r;
+			 T7q = T7o - T7p;
+			 TbK = T7p + T7o;
+		    }
+		    T7r = T1z - T1I;
+		    T7u = T7s - T7t;
+		    T7v = T7r + T7u;
+		    TbL = T7r - T7u;
+		    T7i = FNMS(TK, TH, TG * TL);
+		    T7k = T7i - T7j;
+		    ThZ = T7i + T7j;
+		    {
+			 E T17, Ti3, Tix, TdV;
+			 T17 = Tv + T16;
+			 T1L = T17 + T1K;
+			 Tgz = T17 - T1K;
+			 Ti3 = ThZ + Ti2;
+			 Ti4 = ThY + Ti3;
+			 Tii = Ti3 - ThY;
+			 Tix = Ti2 - ThZ;
+			 Tiy = Tiw + Tix;
+			 TiM = Tix - Tiw;
+			 TdV = Tv - T16;
+			 TdZ = TdV - TdY;
+			 TfN = TdV + TdY;
+		    }
+		    {
+			 E T7l, T7w, Tj0, Tj3;
+			 T7l = T7h - T7k;
+			 T7w = KP707106781 * (T7q - T7v);
+			 T7x = T7l - T7w;
+			 TaX = T7l + T7w;
+			 Tj0 = KP707106781 * (T7q + T7v);
+			 Tj3 = Tj1 - Tj2;
+			 Tj4 = Tj0 + Tj3;
+			 Tji = Tj3 - Tj0;
+		    }
+		    {
+			 E Tjw, Tjx, TbJ, TbM;
+			 Tjw = KP707106781 * (TbL - TbK);
+			 Tjx = Tj2 + Tj1;
+			 Tjy = Tjw + Tjx;
+			 TjM = Tjx - Tjw;
+			 TbJ = T7h + T7k;
+			 TbM = KP707106781 * (TbK + TbL);
+			 TbN = TbJ - TbM;
+			 Td9 = TbJ + TbM;
+		    }
+	       }
+	       {
+		    E T4t, ThR, Ti6, Ti8, T7g, Ti7, ThU, ThV;
+		    {
+			 E T2L, T4s, ThW, Ti5;
+			 T2L = T1L + T2K;
+			 T4s = T3C + T4r;
+			 T4t = T2L + T4s;
+			 ThR = T2L - T4s;
+			 ThW = Thy + Thz;
+			 Ti5 = ThX + Ti4;
+			 Ti6 = ThW + Ti5;
+			 Ti8 = Ti5 - ThW;
+		    }
+		    {
+			 E T5S, T7f, ThS, ThT;
+			 T5S = T5c + T5R;
+			 T7f = T6B + T7e;
+			 T7g = T5S + T7f;
+			 Ti7 = T7f - T5S;
+			 ThS = ThD + ThE;
+			 ThT = ThI + ThJ;
+			 ThU = ThS - ThT;
+			 ThV = ThS + ThT;
+		    }
+		    iio[-WS(ios, 32)] = T4t - T7g;
+		    rio[WS(ios, 32)] = ThV - Ti6;
+		    rio[0] = T4t + T7g;
+		    iio[0] = ThV + Ti6;
+		    iio[-WS(ios, 48)] = ThR - ThU;
+		    rio[WS(ios, 48)] = Ti7 - Ti8;
+		    rio[WS(ios, 16)] = ThR + ThU;
+		    iio[-WS(ios, 16)] = Ti7 + Ti8;
+	       }
+	       {
+		    E ThB, ThN, Tic, Tie, ThG, ThO, ThL, ThP;
+		    {
+			 E Thx, ThA, Tia, Tib;
+			 Thx = T1L - T2K;
+			 ThA = Thy - Thz;
+			 ThB = Thx + ThA;
+			 ThN = Thx - ThA;
+			 Tia = T4r - T3C;
+			 Tib = Ti4 - ThX;
+			 Tic = Tia + Tib;
+			 Tie = Tib - Tia;
+		    }
+		    {
+			 E ThC, ThF, ThH, ThK;
+			 ThC = T5c - T5R;
+			 ThF = ThD - ThE;
+			 ThG = ThC + ThF;
+			 ThO = ThF - ThC;
+			 ThH = T6B - T7e;
+			 ThK = ThI - ThJ;
+			 ThL = ThH - ThK;
+			 ThP = ThH + ThK;
+		    }
+		    {
+			 E ThM, Ti9, ThQ, Tid;
+			 ThM = KP707106781 * (ThG + ThL);
+			 iio[-WS(ios, 40)] = ThB - ThM;
+			 rio[WS(ios, 8)] = ThB + ThM;
+			 Ti9 = KP707106781 * (ThO + ThP);
+			 rio[WS(ios, 40)] = Ti9 - Tic;
+			 iio[-WS(ios, 8)] = Ti9 + Tic;
+			 ThQ = KP707106781 * (ThO - ThP);
+			 iio[-WS(ios, 56)] = ThN - ThQ;
+			 rio[WS(ios, 24)] = ThN + ThQ;
+			 Tid = KP707106781 * (ThL - ThG);
+			 rio[WS(ios, 56)] = Tid - Tie;
+			 iio[-WS(ios, 24)] = Tid + Tie;
+		    }
+	       }
+	       {
+		    E TgP, Thd, Tiq, Tis, Th0, The, Thb, Thf;
+		    {
+			 E TgD, TgO, Tio, Tip;
+			 TgD = Tgz - TgC;
+			 TgO = KP707106781 * (TgI - TgN);
+			 TgP = TgD + TgO;
+			 Thd = TgD - TgO;
+			 Tio = KP707106781 * (Thj - Thi);
+			 Tip = Tii - Tih;
+			 Tiq = Tio + Tip;
+			 Tis = Tip - Tio;
+		    }
+		    {
+			 E TgU, TgZ, Th5, Tha;
+			 TgU = TgS - TgT;
+			 TgZ = TgV - TgY;
+			 Th0 = FMA(KP923879532, TgU, KP382683432 * TgZ);
+			 The = FNMS(KP923879532, TgZ, KP382683432 * TgU);
+			 Th5 = Th1 - Th4;
+			 Tha = Th8 - Th9;
+			 Thb = FNMS(KP923879532, Tha, KP382683432 * Th5);
+			 Thf = FMA(KP382683432, Tha, KP923879532 * Th5);
+		    }
+		    {
+			 E Thc, Tin, Thg, Tir;
+			 Thc = Th0 + Thb;
+			 iio[-WS(ios, 44)] = TgP - Thc;
+			 rio[WS(ios, 12)] = TgP + Thc;
+			 Tin = The + Thf;
+			 rio[WS(ios, 44)] = Tin - Tiq;
+			 iio[-WS(ios, 12)] = Tin + Tiq;
+			 Thg = The - Thf;
+			 iio[-WS(ios, 60)] = Thd - Thg;
+			 rio[WS(ios, 28)] = Thd + Thg;
+			 Tir = Thb - Th0;
+			 rio[WS(ios, 60)] = Tir - Tis;
+			 iio[-WS(ios, 28)] = Tir + Tis;
+		    }
+	       }
+	       {
+		    E TfB, TfJ, TiO, TiQ, TfE, TfK, TfH, TfL;
+		    {
+			 E Tfx, TfA, TiK, TiN;
+			 Tfx = TdZ + Tea;
+			 TfA = Tfy + Tfz;
+			 TfB = Tfx + TfA;
+			 TfJ = Tfx - TfA;
+			 TiK = Tem + Tex;
+			 TiN = TiL + TiM;
+			 TiO = TiK + TiN;
+			 TiQ = TiN - TiK;
+		    }
+		    {
+			 E TfC, TfD, TfF, TfG;
+			 TfC = TeE + TeP;
+			 TfD = TeV + TeY;
+			 TfE = FMA(KP555570233, TfC, KP831469612 * TfD);
+			 TfK = FNMS(KP555570233, TfD, KP831469612 * TfC);
+			 TfF = Tf5 + Tfg;
+			 TfG = Tfm + Tfp;
+			 TfH = FNMS(KP555570233, TfG, KP831469612 * TfF);
+			 TfL = FMA(KP831469612, TfG, KP555570233 * TfF);
+		    }
+		    {
+			 E TfI, TiJ, TfM, TiP;
+			 TfI = TfE + TfH;
+			 iio[-WS(ios, 38)] = TfB - TfI;
+			 rio[WS(ios, 6)] = TfB + TfI;
+			 TiJ = TfK + TfL;
+			 rio[WS(ios, 38)] = TiJ - TiO;
+			 iio[-WS(ios, 6)] = TiJ + TiO;
+			 TfM = TfK - TfL;
+			 iio[-WS(ios, 54)] = TfJ - TfM;
+			 rio[WS(ios, 22)] = TfJ + TfM;
+			 TiP = TfH - TfE;
+			 rio[WS(ios, 54)] = TiP - TiQ;
+			 iio[-WS(ios, 22)] = TiP + TiQ;
+		    }
+	       }
+	       {
+		    E Thl, Tht, Tik, Tim, Tho, Thu, Thr, Thv;
+		    {
+			 E Thh, Thk, Tig, Tij;
+			 Thh = Tgz + TgC;
+			 Thk = KP707106781 * (Thi + Thj);
+			 Thl = Thh + Thk;
+			 Tht = Thh - Thk;
+			 Tig = KP707106781 * (TgI + TgN);
+			 Tij = Tih + Tii;
+			 Tik = Tig + Tij;
+			 Tim = Tij - Tig;
+		    }
+		    {
+			 E Thm, Thn, Thp, Thq;
+			 Thm = TgS + TgT;
+			 Thn = TgV + TgY;
+			 Tho = FMA(KP382683432, Thm, KP923879532 * Thn);
+			 Thu = FNMS(KP382683432, Thn, KP923879532 * Thm);
+			 Thp = Th1 + Th4;
+			 Thq = Th8 + Th9;
+			 Thr = FNMS(KP382683432, Thq, KP923879532 * Thp);
+			 Thv = FMA(KP923879532, Thq, KP382683432 * Thp);
+		    }
+		    {
+			 E Ths, Tif, Thw, Til;
+			 Ths = Tho + Thr;
+			 iio[-WS(ios, 36)] = Thl - Ths;
+			 rio[WS(ios, 4)] = Thl + Ths;
+			 Tif = Thu + Thv;
+			 rio[WS(ios, 36)] = Tif - Tik;
+			 iio[-WS(ios, 4)] = Tif + Tik;
+			 Thw = Thu - Thv;
+			 iio[-WS(ios, 52)] = Tht - Thw;
+			 rio[WS(ios, 20)] = Tht + Thw;
+			 Til = Thr - Tho;
+			 rio[WS(ios, 52)] = Til - Tim;
+			 iio[-WS(ios, 20)] = Til + Tim;
+		    }
+	       }
+	       {
+		    E Tez, Tft, TiU, TiW, Tf0, Tfu, Tfr, Tfv;
+		    {
+			 E Teb, Tey, TiS, TiT;
+			 Teb = TdZ - Tea;
+			 Tey = Tem - Tex;
+			 Tez = Teb + Tey;
+			 Tft = Teb - Tey;
+			 TiS = Tfz - Tfy;
+			 TiT = TiM - TiL;
+			 TiU = TiS + TiT;
+			 TiW = TiT - TiS;
+		    }
+		    {
+			 E TeQ, TeZ, Tfh, Tfq;
+			 TeQ = TeE - TeP;
+			 TeZ = TeV - TeY;
+			 Tf0 = FMA(KP980785280, TeQ, KP195090322 * TeZ);
+			 Tfu = FNMS(KP980785280, TeZ, KP195090322 * TeQ);
+			 Tfh = Tf5 - Tfg;
+			 Tfq = Tfm - Tfp;
+			 Tfr = FNMS(KP980785280, Tfq, KP195090322 * Tfh);
+			 Tfv = FMA(KP195090322, Tfq, KP980785280 * Tfh);
+		    }
+		    {
+			 E Tfs, TiR, Tfw, TiV;
+			 Tfs = Tf0 + Tfr;
+			 iio[-WS(ios, 46)] = Tez - Tfs;
+			 rio[WS(ios, 14)] = Tez + Tfs;
+			 TiR = Tfu + Tfv;
+			 rio[WS(ios, 46)] = TiR - TiU;
+			 iio[-WS(ios, 14)] = TiR + TiU;
+			 Tfw = Tfu - Tfv;
+			 iio[-WS(ios, 62)] = Tft - Tfw;
+			 rio[WS(ios, 30)] = Tft + Tfw;
+			 TiV = Tfr - Tf0;
+			 rio[WS(ios, 62)] = TiV - TiW;
+			 iio[-WS(ios, 30)] = TiV + TiW;
+		    }
+	       }
+	       {
+		    E TfZ, Tgf, TiG, TiI, Tg6, Tgg, Tgd, Tgh;
+		    {
+			 E TfR, TfY, TiE, TiF;
+			 TfR = TfN - TfQ;
+			 TfY = TfU - TfX;
+			 TfZ = TfR + TfY;
+			 Tgf = TfR - TfY;
+			 TiE = Tgl - Tgk;
+			 TiF = Tiy - Tiv;
+			 TiG = TiE + TiF;
+			 TiI = TiF - TiE;
+		    }
+		    {
+			 E Tg2, Tg5, Tg9, Tgc;
+			 Tg2 = Tg0 - Tg1;
+			 Tg5 = Tg3 - Tg4;
+			 Tg6 = FMA(KP555570233, Tg2, KP831469612 * Tg5);
+			 Tgg = FNMS(KP831469612, Tg2, KP555570233 * Tg5);
+			 Tg9 = Tg7 - Tg8;
+			 Tgc = Tga - Tgb;
+			 Tgd = FNMS(KP831469612, Tgc, KP555570233 * Tg9);
+			 Tgh = FMA(KP831469612, Tg9, KP555570233 * Tgc);
+		    }
+		    {
+			 E Tge, TiD, Tgi, TiH;
+			 Tge = Tg6 + Tgd;
+			 iio[-WS(ios, 42)] = TfZ - Tge;
+			 rio[WS(ios, 10)] = TfZ + Tge;
+			 TiD = Tgg + Tgh;
+			 rio[WS(ios, 42)] = TiD - TiG;
+			 iio[-WS(ios, 10)] = TiD + TiG;
+			 Tgi = Tgg - Tgh;
+			 iio[-WS(ios, 58)] = Tgf - Tgi;
+			 rio[WS(ios, 26)] = Tgf + Tgi;
+			 TiH = Tgd - Tg6;
+			 rio[WS(ios, 58)] = TiH - TiI;
+			 iio[-WS(ios, 26)] = TiH + TiI;
+		    }
+	       }
+	       {
+		    E Tgn, Tgv, TiA, TiC, Tgq, Tgw, Tgt, Tgx;
+		    {
+			 E Tgj, Tgm, Tiu, Tiz;
+			 Tgj = TfN + TfQ;
+			 Tgm = Tgk + Tgl;
+			 Tgn = Tgj + Tgm;
+			 Tgv = Tgj - Tgm;
+			 Tiu = TfU + TfX;
+			 Tiz = Tiv + Tiy;
+			 TiA = Tiu + Tiz;
+			 TiC = Tiz - Tiu;
+		    }
+		    {
+			 E Tgo, Tgp, Tgr, Tgs;
+			 Tgo = Tg0 + Tg1;
+			 Tgp = Tg3 + Tg4;
+			 Tgq = FMA(KP980785280, Tgo, KP195090322 * Tgp);
+			 Tgw = FNMS(KP195090322, Tgo, KP980785280 * Tgp);
+			 Tgr = Tg7 + Tg8;
+			 Tgs = Tga + Tgb;
+			 Tgt = FNMS(KP195090322, Tgs, KP980785280 * Tgr);
+			 Tgx = FMA(KP195090322, Tgr, KP980785280 * Tgs);
+		    }
+		    {
+			 E Tgu, Tit, Tgy, TiB;
+			 Tgu = Tgq + Tgt;
+			 iio[-WS(ios, 34)] = Tgn - Tgu;
+			 rio[WS(ios, 2)] = Tgn + Tgu;
+			 Tit = Tgw + Tgx;
+			 rio[WS(ios, 34)] = Tit - TiA;
+			 iio[-WS(ios, 2)] = Tit + TiA;
+			 Tgy = Tgw - Tgx;
+			 iio[-WS(ios, 50)] = Tgv - Tgy;
+			 rio[WS(ios, 18)] = Tgv + Tgy;
+			 TiB = Tgt - Tgq;
+			 rio[WS(ios, 50)] = TiB - TiC;
+			 iio[-WS(ios, 18)] = TiB + TiC;
+		    }
+	       }
+	       {
+		    E T7V, TjN, TjT, TaH, T8O, TjK, TaK, TjS, TaO, TaU, T9I, TaE, TaR, TaV, TaB;
+		    E TaF, T8N;
+		    T7V = T7x - T7U;
+		    TjN = TjL + TjM;
+		    TjT = TjM - TjL;
+		    TaH = T7x + T7U;
+		    T8N = FMA(KP195090322, T8D, KP980785280 * T8M);
+		    T8O = T8m - T8N;
+		    TjK = T8m + T8N;
+		    {
+			 E TaJ, TaM, TaN, T9u, T9H;
+			 TaJ = FNMS(KP980785280, T8D, KP195090322 * T8M);
+			 TaK = TaI + TaJ;
+			 TjS = TaJ - TaI;
+			 TaM = T96 + T9t;
+			 TaN = T9D + T9G;
+			 TaO = FMA(KP634393284, TaM, KP773010453 * TaN);
+			 TaU = FNMS(KP634393284, TaN, KP773010453 * TaM);
+			 T9u = T96 - T9t;
+			 T9H = T9D - T9G;
+			 T9I = FMA(KP995184726, T9u, KP098017140 * T9H);
+			 TaE = FNMS(KP995184726, T9H, KP098017140 * T9u);
+			 {
+			      E TaP, TaQ, Tan, TaA;
+			      TaP = T9Z + Tam;
+			      TaQ = Taw + Taz;
+			      TaR = FNMS(KP634393284, TaQ, KP773010453 * TaP);
+			      TaV = FMA(KP773010453, TaQ, KP634393284 * TaP);
+			      Tan = T9Z - Tam;
+			      TaA = Taw - Taz;
+			      TaB = FNMS(KP995184726, TaA, KP098017140 * Tan);
+			      TaF = FMA(KP098017140, TaA, KP995184726 * Tan);
+			 }
+		    }
+		    {
+			 E T8P, TaC, TjR, TjU;
+			 T8P = T7V + T8O;
+			 TaC = T9I + TaB;
+			 iio[-WS(ios, 47)] = T8P - TaC;
+			 rio[WS(ios, 15)] = T8P + TaC;
+			 TjR = TaE + TaF;
+			 TjU = TjS + TjT;
+			 rio[WS(ios, 47)] = TjR - TjU;
+			 iio[-WS(ios, 15)] = TjR + TjU;
+		    }
+		    {
+			 E TaD, TaG, TjV, TjW;
+			 TaD = T7V - T8O;
+			 TaG = TaE - TaF;
+			 iio[-WS(ios, 63)] = TaD - TaG;
+			 rio[WS(ios, 31)] = TaD + TaG;
+			 TjV = TaB - T9I;
+			 TjW = TjT - TjS;
+			 rio[WS(ios, 63)] = TjV - TjW;
+			 iio[-WS(ios, 31)] = TjV + TjW;
+		    }
+		    {
+			 E TaL, TaS, TjJ, TjO;
+			 TaL = TaH + TaK;
+			 TaS = TaO + TaR;
+			 iio[-WS(ios, 39)] = TaL - TaS;
+			 rio[WS(ios, 7)] = TaL + TaS;
+			 TjJ = TaU + TaV;
+			 TjO = TjK + TjN;
+			 rio[WS(ios, 39)] = TjJ - TjO;
+			 iio[-WS(ios, 7)] = TjJ + TjO;
+		    }
+		    {
+			 E TaT, TaW, TjP, TjQ;
+			 TaT = TaH - TaK;
+			 TaW = TaU - TaV;
+			 iio[-WS(ios, 55)] = TaT - TaW;
+			 rio[WS(ios, 23)] = TaT + TaW;
+			 TjP = TaR - TaO;
+			 TjQ = TjN - TjK;
+			 rio[WS(ios, 55)] = TjP - TjQ;
+			 iio[-WS(ios, 23)] = TjP + TjQ;
+		    }
+	       }
+	       {
+		    E TbV, Tjj, Tjp, TcT, Tca, Tjg, TcW, Tjo, Td0, Td6, Tcu, TcQ, Td3, Td7, TcN;
+		    E TcR, Tc9;
+		    TbV = TbN - TbU;
+		    Tjj = Tjh + Tji;
+		    Tjp = Tji - Tjh;
+		    TcT = TbN + TbU;
+		    Tc9 = FMA(KP831469612, Tc5, KP555570233 * Tc8);
+		    Tca = Tc2 - Tc9;
+		    Tjg = Tc2 + Tc9;
+		    {
+			 E TcV, TcY, TcZ, Tcm, Tct;
+			 TcV = FNMS(KP831469612, Tc8, KP555570233 * Tc5);
+			 TcW = TcU + TcV;
+			 Tjo = TcV - TcU;
+			 TcY = Tce + Tcl;
+			 TcZ = Tcp + Tcs;
+			 Td0 = FMA(KP471396736, TcY, KP881921264 * TcZ);
+			 Td6 = FNMS(KP471396736, TcZ, KP881921264 * TcY);
+			 Tcm = Tce - Tcl;
+			 Tct = Tcp - Tcs;
+			 Tcu = FMA(KP956940335, Tcm, KP290284677 * Tct);
+			 TcQ = FNMS(KP956940335, Tct, KP290284677 * Tcm);
+			 {
+			      E Td1, Td2, TcF, TcM;
+			      Td1 = Tcx + TcE;
+			      Td2 = TcI + TcL;
+			      Td3 = FNMS(KP471396736, Td2, KP881921264 * Td1);
+			      Td7 = FMA(KP881921264, Td2, KP471396736 * Td1);
+			      TcF = Tcx - TcE;
+			      TcM = TcI - TcL;
+			      TcN = FNMS(KP956940335, TcM, KP290284677 * TcF);
+			      TcR = FMA(KP290284677, TcM, KP956940335 * TcF);
+			 }
+		    }
+		    {
+			 E Tcb, TcO, Tjn, Tjq;
+			 Tcb = TbV + Tca;
+			 TcO = Tcu + TcN;
+			 iio[-WS(ios, 45)] = Tcb - TcO;
+			 rio[WS(ios, 13)] = Tcb + TcO;
+			 Tjn = TcQ + TcR;
+			 Tjq = Tjo + Tjp;
+			 rio[WS(ios, 45)] = Tjn - Tjq;
+			 iio[-WS(ios, 13)] = Tjn + Tjq;
+		    }
+		    {
+			 E TcP, TcS, Tjr, Tjs;
+			 TcP = TbV - Tca;
+			 TcS = TcQ - TcR;
+			 iio[-WS(ios, 61)] = TcP - TcS;
+			 rio[WS(ios, 29)] = TcP + TcS;
+			 Tjr = TcN - Tcu;
+			 Tjs = Tjp - Tjo;
+			 rio[WS(ios, 61)] = Tjr - Tjs;
+			 iio[-WS(ios, 29)] = Tjr + Tjs;
+		    }
+		    {
+			 E TcX, Td4, Tjf, Tjk;
+			 TcX = TcT + TcW;
+			 Td4 = Td0 + Td3;
+			 iio[-WS(ios, 37)] = TcX - Td4;
+			 rio[WS(ios, 5)] = TcX + Td4;
+			 Tjf = Td6 + Td7;
+			 Tjk = Tjg + Tjj;
+			 rio[WS(ios, 37)] = Tjf - Tjk;
+			 iio[-WS(ios, 5)] = Tjf + Tjk;
+		    }
+		    {
+			 E Td5, Td8, Tjl, Tjm;
+			 Td5 = TcT - TcW;
+			 Td8 = Td6 - Td7;
+			 iio[-WS(ios, 53)] = Td5 - Td8;
+			 rio[WS(ios, 21)] = Td5 + Td8;
+			 Tjl = Td3 - Td0;
+			 Tjm = Tjj - Tjg;
+			 rio[WS(ios, 53)] = Tjl - Tjm;
+			 iio[-WS(ios, 21)] = Tjl + Tjm;
+		    }
+	       }
+	       {
+		    E Tb1, Tjz, TjF, Tbt, Tb8, Tju, Tbw, TjE, TbA, TbG, Tbg, Tbq, TbD, TbH, Tbn;
+		    E Tbr, Tb7;
+		    Tb1 = TaX - Tb0;
+		    Tjz = Tjv + Tjy;
+		    TjF = Tjy - Tjv;
+		    Tbt = TaX + Tb0;
+		    Tb7 = FMA(KP831469612, Tb5, KP555570233 * Tb6);
+		    Tb8 = Tb4 - Tb7;
+		    Tju = Tb4 + Tb7;
+		    {
+			 E Tbv, Tby, Tbz, Tbc, Tbf;
+			 Tbv = FNMS(KP555570233, Tb5, KP831469612 * Tb6);
+			 Tbw = Tbu + Tbv;
+			 TjE = Tbv - Tbu;
+			 Tby = Tba + Tbb;
+			 Tbz = Tbd + Tbe;
+			 TbA = FMA(KP956940335, Tby, KP290284677 * Tbz);
+			 TbG = FNMS(KP290284677, Tby, KP956940335 * Tbz);
+			 Tbc = Tba - Tbb;
+			 Tbf = Tbd - Tbe;
+			 Tbg = FMA(KP471396736, Tbc, KP881921264 * Tbf);
+			 Tbq = FNMS(KP881921264, Tbc, KP471396736 * Tbf);
+			 {
+			      E TbB, TbC, Tbj, Tbm;
+			      TbB = Tbh + Tbi;
+			      TbC = Tbk + Tbl;
+			      TbD = FNMS(KP290284677, TbC, KP956940335 * TbB);
+			      TbH = FMA(KP290284677, TbB, KP956940335 * TbC);
+			      Tbj = Tbh - Tbi;
+			      Tbm = Tbk - Tbl;
+			      Tbn = FNMS(KP881921264, Tbm, KP471396736 * Tbj);
+			      Tbr = FMA(KP881921264, Tbj, KP471396736 * Tbm);
+			 }
+		    }
+		    {
+			 E Tb9, Tbo, TjD, TjG;
+			 Tb9 = Tb1 + Tb8;
+			 Tbo = Tbg + Tbn;
+			 iio[-WS(ios, 43)] = Tb9 - Tbo;
+			 rio[WS(ios, 11)] = Tb9 + Tbo;
+			 TjD = Tbq + Tbr;
+			 TjG = TjE + TjF;
+			 rio[WS(ios, 43)] = TjD - TjG;
+			 iio[-WS(ios, 11)] = TjD + TjG;
+		    }
+		    {
+			 E Tbp, Tbs, TjH, TjI;
+			 Tbp = Tb1 - Tb8;
+			 Tbs = Tbq - Tbr;
+			 iio[-WS(ios, 59)] = Tbp - Tbs;
+			 rio[WS(ios, 27)] = Tbp + Tbs;
+			 TjH = Tbn - Tbg;
+			 TjI = TjF - TjE;
+			 rio[WS(ios, 59)] = TjH - TjI;
+			 iio[-WS(ios, 27)] = TjH + TjI;
+		    }
+		    {
+			 E Tbx, TbE, Tjt, TjA;
+			 Tbx = Tbt + Tbw;
+			 TbE = TbA + TbD;
+			 iio[-WS(ios, 35)] = Tbx - TbE;
+			 rio[WS(ios, 3)] = Tbx + TbE;
+			 Tjt = TbG + TbH;
+			 TjA = Tju + Tjz;
+			 rio[WS(ios, 35)] = Tjt - TjA;
+			 iio[-WS(ios, 3)] = Tjt + TjA;
+		    }
+		    {
+			 E TbF, TbI, TjB, TjC;
+			 TbF = Tbt - Tbw;
+			 TbI = TbG - TbH;
+			 iio[-WS(ios, 51)] = TbF - TbI;
+			 rio[WS(ios, 19)] = TbF + TbI;
+			 TjB = TbD - TbA;
+			 TjC = Tjz - Tju;
+			 rio[WS(ios, 51)] = TjB - TjC;
+			 iio[-WS(ios, 19)] = TjB + TjC;
+		    }
+	       }
+	       {
+		    E Tdd, Tj5, Tjb, TdF, Tdk, TiY, TdI, Tja, TdM, TdS, Tds, TdC, TdP, TdT, Tdz;
+		    E TdD, Tdj;
+		    Tdd = Td9 - Tdc;
+		    Tj5 = TiZ + Tj4;
+		    Tjb = Tj4 - TiZ;
+		    TdF = Td9 + Tdc;
+		    Tdj = FMA(KP195090322, Tdh, KP980785280 * Tdi);
+		    Tdk = Tdg - Tdj;
+		    TiY = Tdg + Tdj;
+		    {
+			 E TdH, TdK, TdL, Tdo, Tdr;
+			 TdH = FNMS(KP195090322, Tdi, KP980785280 * Tdh);
+			 TdI = TdG + TdH;
+			 Tja = TdH - TdG;
+			 TdK = Tdm + Tdn;
+			 TdL = Tdp + Tdq;
+			 TdM = FMA(KP995184726, TdK, KP098017140 * TdL);
+			 TdS = FNMS(KP098017140, TdK, KP995184726 * TdL);
+			 Tdo = Tdm - Tdn;
+			 Tdr = Tdp - Tdq;
+			 Tds = FMA(KP634393284, Tdo, KP773010453 * Tdr);
+			 TdC = FNMS(KP773010453, Tdo, KP634393284 * Tdr);
+			 {
+			      E TdN, TdO, Tdv, Tdy;
+			      TdN = Tdt + Tdu;
+			      TdO = Tdw + Tdx;
+			      TdP = FNMS(KP098017140, TdO, KP995184726 * TdN);
+			      TdT = FMA(KP098017140, TdN, KP995184726 * TdO);
+			      Tdv = Tdt - Tdu;
+			      Tdy = Tdw - Tdx;
+			      Tdz = FNMS(KP773010453, Tdy, KP634393284 * Tdv);
+			      TdD = FMA(KP773010453, Tdv, KP634393284 * Tdy);
+			 }
+		    }
+		    {
+			 E Tdl, TdA, Tj9, Tjc;
+			 Tdl = Tdd + Tdk;
+			 TdA = Tds + Tdz;
+			 iio[-WS(ios, 41)] = Tdl - TdA;
+			 rio[WS(ios, 9)] = Tdl + TdA;
+			 Tj9 = TdC + TdD;
+			 Tjc = Tja + Tjb;
+			 rio[WS(ios, 41)] = Tj9 - Tjc;
+			 iio[-WS(ios, 9)] = Tj9 + Tjc;
+		    }
+		    {
+			 E TdB, TdE, Tjd, Tje;
+			 TdB = Tdd - Tdk;
+			 TdE = TdC - TdD;
+			 iio[-WS(ios, 57)] = TdB - TdE;
+			 rio[WS(ios, 25)] = TdB + TdE;
+			 Tjd = Tdz - Tds;
+			 Tje = Tjb - Tja;
+			 rio[WS(ios, 57)] = Tjd - Tje;
+			 iio[-WS(ios, 25)] = Tjd + Tje;
+		    }
+		    {
+			 E TdJ, TdQ, TiX, Tj6;
+			 TdJ = TdF + TdI;
+			 TdQ = TdM + TdP;
+			 iio[-WS(ios, 33)] = TdJ - TdQ;
+			 rio[WS(ios, 1)] = TdJ + TdQ;
+			 TiX = TdS + TdT;
+			 Tj6 = TiY + Tj5;
+			 rio[WS(ios, 33)] = TiX - Tj6;
+			 iio[-WS(ios, 1)] = TiX + Tj6;
+		    }
+		    {
+			 E TdR, TdU, Tj7, Tj8;
+			 TdR = TdF - TdI;
+			 TdU = TdS - TdT;
+			 iio[-WS(ios, 49)] = TdR - TdU;
+			 rio[WS(ios, 17)] = TdR + TdU;
+			 Tj7 = TdP - TdM;
+			 Tj8 = Tj5 - TiY;
+			 rio[WS(ios, 49)] = Tj7 - Tj8;
+			 iio[-WS(ios, 17)] = Tj7 + Tj8;
+		    }
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 9},
+     {TW_SIN, 0, 9},
+     {TW_COS, 0, 27},
+     {TW_SIN, 0, 27},
+     {TW_COS, 0, 63},
+     {TW_SIN, 0, 63},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 64, "hf2_64", twinstr, {880, 386, 274, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf2_64) (planner *p) {
+     X(khc2hc_dit_register) (p, hf2_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf2_8.c b/src/fftw3/rdft/codelets/r2hc/hf2_8.c
new file mode 100644
index 0000000..663e8dd
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf2_8.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:27 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -twiddle-log3 -n 8 -dit -name hf2_8 -include hf.h */
+
+/*
+ * This function contains 74 FP additions, 44 FP multiplications,
+ * (or, 56 additions, 26 multiplications, 18 fused multiply/add),
+ * 50 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf2_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf2_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf2_8(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 6) {
+	  E T1, T1c, TJ, T17, TY, TV, TR, Tk, Tr, TN, TM, Tw, TB, TS, Te;
+	  E T1b;
+	  T1 = rio[0];
+	  T1c = iio[-WS(ios, 7)];
+	  {
+	       E T9, T3, T6, T4, Tb, T7, Ta, Tg, Ti, TI, TX, Tl, Tm, Tn, TW;
+	       E Tp, TF, Td, TA, Th, Ty, Tj, Tv, To, Tq, Tu;
+	       T9 = rio[WS(ios, 4)];
+	       {
+		    E TD, TH, TE, TG, T2, T5;
+		    TD = rio[WS(ios, 7)];
+		    TH = iio[-WS(ios, 4)];
+		    TE = iio[0];
+		    TG = rio[WS(ios, 3)];
+		    T2 = W[2];
+		    T5 = W[3];
+		    T3 = W[0];
+		    T6 = W[1];
+		    T4 = T2 * T3;
+		    Tb = T5 * T3;
+		    T7 = T5 * T6;
+		    Ta = T2 * T6;
+		    Tg = T4 + T7;
+		    Ti = Ta - Tb;
+		    TI = FMA(T2, TG, T5 * TH);
+		    TX = FNMS(T5, TG, T2 * TH);
+		    Tl = W[4];
+		    Tm = W[5];
+		    Tn = FMA(Tl, T3, Tm * T6);
+		    TW = FNMS(Tm, TD, Tl * TE);
+		    Tp = FNMS(Tm, T3, Tl * T6);
+		    TF = FMA(Tl, TD, Tm * TE);
+	       }
+	       Td = iio[-WS(ios, 3)];
+	       TA = iio[-WS(ios, 2)];
+	       Th = rio[WS(ios, 2)];
+	       Ty = rio[WS(ios, 5)];
+	       Tj = iio[-WS(ios, 5)];
+	       Tv = iio[-WS(ios, 6)];
+	       To = rio[WS(ios, 6)];
+	       Tq = iio[-WS(ios, 1)];
+	       Tu = rio[WS(ios, 1)];
+	       TJ = TF + TI;
+	       T17 = TW + TX;
+	       TY = TW - TX;
+	       TV = TF - TI;
+	       TR = FNMS(T6, Tu, T3 * Tv);
+	       Tk = FNMS(Ti, Tj, Tg * Th);
+	       Tr = FNMS(Tp, Tq, Tn * To);
+	       TN = FMA(Tp, To, Tn * Tq);
+	       TM = FMA(Ti, Th, Tg * Tj);
+	       Tw = FMA(T3, Tu, T6 * Tv);
+	       {
+		    E Tx, Tz, T8, Tc;
+		    Tx = FNMS(Tm, Ti, Tl * Tg);
+		    Tz = FMA(Tl, Ti, Tm * Tg);
+		    TB = FMA(Tx, Ty, Tz * TA);
+		    TS = FNMS(Tz, Ty, Tx * TA);
+		    T8 = T4 - T7;
+		    Tc = Ta + Tb;
+		    Te = FMA(T8, T9, Tc * Td);
+		    T1b = FNMS(Tc, T9, T8 * Td);
+	       }
+	  }
+	  {
+	       E TK, T1f, T18, T19, Tt, T15, T1e, T1g, TC, T16;
+	       TC = Tw + TB;
+	       TK = TC + TJ;
+	       T1f = TJ - TC;
+	       T16 = TR + TS;
+	       T18 = T16 - T17;
+	       T19 = T16 + T17;
+	       {
+		    E Tf, Ts, T1a, T1d;
+		    Tf = T1 + Te;
+		    Ts = Tk + Tr;
+		    Tt = Tf + Ts;
+		    T15 = Tf - Ts;
+		    T1a = TM + TN;
+		    T1d = T1b + T1c;
+		    T1e = T1a + T1d;
+		    T1g = T1d - T1a;
+	       }
+	       iio[-WS(ios, 4)] = Tt - TK;
+	       rio[WS(ios, 4)] = T19 - T1e;
+	       rio[0] = Tt + TK;
+	       iio[0] = T19 + T1e;
+	       iio[-WS(ios, 6)] = T15 - T18;
+	       rio[WS(ios, 6)] = T1f - T1g;
+	       rio[WS(ios, 2)] = T15 + T18;
+	       iio[-WS(ios, 2)] = T1f + T1g;
+	  }
+	  {
+	       E TZ, T13, TP, T11, TU, T12, T1k, T1m, TL, TO;
+	       TZ = TV - TY;
+	       T13 = TV + TY;
+	       TL = T1 - Te;
+	       TO = TM - TN;
+	       TP = TL + TO;
+	       T11 = TL - TO;
+	       {
+		    E TQ, TT, T1i, T1j;
+		    TQ = Tw - TB;
+		    TT = TR - TS;
+		    TU = TQ + TT;
+		    T12 = TT - TQ;
+		    T1i = T1c - T1b;
+		    T1j = Tk - Tr;
+		    T1k = T1i - T1j;
+		    T1m = T1j + T1i;
+	       }
+	       {
+		    E T10, T1h, T14, T1l;
+		    T10 = KP707106781 * (TU + TZ);
+		    iio[-WS(ios, 5)] = TP - T10;
+		    rio[WS(ios, 1)] = TP + T10;
+		    T1h = KP707106781 * (T12 + T13);
+		    rio[WS(ios, 5)] = T1h - T1k;
+		    iio[-WS(ios, 1)] = T1h + T1k;
+		    T14 = KP707106781 * (T12 - T13);
+		    iio[-WS(ios, 7)] = T11 - T14;
+		    rio[WS(ios, 3)] = T11 + T14;
+		    T1l = KP707106781 * (TZ - TU);
+		    rio[WS(ios, 7)] = T1l - T1m;
+		    iio[-WS(ios, 3)] = T1l + T1m;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_COS, 0, 1},
+     {TW_SIN, 0, 1},
+     {TW_COS, 0, 3},
+     {TW_SIN, 0, 3},
+     {TW_COS, 0, 7},
+     {TW_SIN, 0, 7},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 8, "hf2_8", twinstr, {56, 26, 18, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf2_8) (planner *p) {
+     X(khc2hc_dit_register) (p, hf2_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_10.c b/src/fftw3/rdft/codelets/r2hc/hf_10.c
new file mode 100644
index 0000000..bb970fa
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_10.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:02 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 10 -dit -name hf_10 -include hf.h */
+
+/*
+ * This function contains 102 FP additions, 60 FP multiplications,
+ * (or, 72 additions, 30 multiplications, 30 fused multiply/add),
+ * 45 stack variables, and 40 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_10(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 18) {
+	  E T7, T1O, TT, T1C, TF, TQ, TR, T1r, T1s, T1L, TX, TY, TZ, T16, T19;
+	  E T1y, Ti, Tt, Tu, T1o, T1p, T1M, TU, TV, TW, T1d, T1g, T1x;
+	  {
+	       E T1, T1B, T6, T1A;
+	       T1 = rio[0];
+	       T1B = iio[-WS(ios, 9)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 5)];
+		    T5 = iio[-WS(ios, 4)];
+		    T2 = W[8];
+		    T4 = W[9];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T1A = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 - T6;
+	       T1O = T1B - T1A;
+	       TT = T1 + T6;
+	       T1C = T1A + T1B;
+	  }
+	  {
+	       E Tz, T14, TP, T18, TE, T15, TK, T17;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 4)];
+		    Ty = iio[-WS(ios, 5)];
+		    Tv = W[6];
+		    Tx = W[7];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T14 = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TM, TO, TL, TN;
+		    TM = rio[WS(ios, 1)];
+		    TO = iio[-WS(ios, 8)];
+		    TL = W[0];
+		    TN = W[1];
+		    TP = FMA(TL, TM, TN * TO);
+		    T18 = FNMS(TN, TM, TL * TO);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 9)];
+		    TD = iio[0];
+		    TA = W[16];
+		    TC = W[17];
+		    TE = FMA(TA, TB, TC * TD);
+		    T15 = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E TH, TJ, TG, TI;
+		    TH = rio[WS(ios, 6)];
+		    TJ = iio[-WS(ios, 3)];
+		    TG = W[10];
+		    TI = W[11];
+		    TK = FMA(TG, TH, TI * TJ);
+		    T17 = FNMS(TI, TH, TG * TJ);
+	       }
+	       TF = Tz - TE;
+	       TQ = TK - TP;
+	       TR = TF + TQ;
+	       T1r = T14 - T15;
+	       T1s = T18 - T17;
+	       T1L = T1s - T1r;
+	       TX = Tz + TE;
+	       TY = TK + TP;
+	       TZ = TX + TY;
+	       T16 = T14 + T15;
+	       T19 = T17 + T18;
+	       T1y = T16 + T19;
+	  }
+	  {
+	       E Tc, T1b, Th, T1c, Tn, T1e, Ts, T1f;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = rio[WS(ios, 2)];
+		    Tb = iio[-WS(ios, 7)];
+		    T8 = W[2];
+		    Ta = W[3];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T1b = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 7)];
+		    Tg = iio[-WS(ios, 2)];
+		    Td = W[12];
+		    Tf = W[13];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T1c = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = rio[WS(ios, 8)];
+		    Tm = iio[-WS(ios, 1)];
+		    Tj = W[14];
+		    Tl = W[15];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    T1e = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = rio[WS(ios, 3)];
+		    Tr = iio[-WS(ios, 6)];
+		    To = W[4];
+		    Tq = W[5];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    T1f = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Ti = Tc - Th;
+	       Tt = Tn - Ts;
+	       Tu = Ti + Tt;
+	       T1o = T1b - T1c;
+	       T1p = T1e - T1f;
+	       T1M = T1o + T1p;
+	       TU = Tc + Th;
+	       TV = Tn + Ts;
+	       TW = TU + TV;
+	       T1d = T1b + T1c;
+	       T1g = T1e + T1f;
+	       T1x = T1d + T1g;
+	  }
+	  {
+	       E T1l, TS, T1m, T1u, T1w, T1q, T1t, T1v, T1n;
+	       T1l = KP559016994 * (Tu - TR);
+	       TS = Tu + TR;
+	       T1m = FNMS(KP250000000, TS, T7);
+	       T1q = T1o - T1p;
+	       T1t = T1r + T1s;
+	       T1u = FMA(KP951056516, T1q, KP587785252 * T1t);
+	       T1w = FNMS(KP587785252, T1q, KP951056516 * T1t);
+	       iio[-WS(ios, 5)] = T7 + TS;
+	       T1v = T1m - T1l;
+	       iio[-WS(ios, 7)] = T1v - T1w;
+	       rio[WS(ios, 3)] = T1v + T1w;
+	       T1n = T1l + T1m;
+	       iio[-WS(ios, 9)] = T1n - T1u;
+	       rio[WS(ios, 1)] = T1n + T1u;
+	  }
+	  {
+	       E T1S, T1N, T1T, T1R, T1V, T1P, T1Q, T1W, T1U;
+	       T1S = KP559016994 * (T1M + T1L);
+	       T1N = T1L - T1M;
+	       T1T = FMA(KP250000000, T1N, T1O);
+	       T1P = TQ - TF;
+	       T1Q = Ti - Tt;
+	       T1R = FNMS(KP951056516, T1Q, KP587785252 * T1P);
+	       T1V = FMA(KP587785252, T1Q, KP951056516 * T1P);
+	       rio[WS(ios, 5)] = T1N - T1O;
+	       T1W = T1T - T1S;
+	       rio[WS(ios, 7)] = T1V - T1W;
+	       iio[-WS(ios, 3)] = T1V + T1W;
+	       T1U = T1S + T1T;
+	       rio[WS(ios, 9)] = T1R - T1U;
+	       iio[-WS(ios, 1)] = T1R + T1U;
+	  }
+	  {
+	       E T12, T10, T11, T1i, T1k, T1a, T1h, T1j, T13;
+	       T12 = KP559016994 * (TW - TZ);
+	       T10 = TW + TZ;
+	       T11 = FNMS(KP250000000, T10, TT);
+	       T1a = T16 - T19;
+	       T1h = T1d - T1g;
+	       T1i = FNMS(KP587785252, T1h, KP951056516 * T1a);
+	       T1k = FMA(KP951056516, T1h, KP587785252 * T1a);
+	       rio[0] = TT + T10;
+	       T1j = T12 + T11;
+	       rio[WS(ios, 4)] = T1j - T1k;
+	       iio[-WS(ios, 6)] = T1j + T1k;
+	       T13 = T11 - T12;
+	       rio[WS(ios, 2)] = T13 - T1i;
+	       iio[-WS(ios, 8)] = T13 + T1i;
+	  }
+	  {
+	       E T1H, T1z, T1G, T1F, T1J, T1D, T1E, T1K, T1I;
+	       T1H = KP559016994 * (T1x - T1y);
+	       T1z = T1x + T1y;
+	       T1G = FNMS(KP250000000, T1z, T1C);
+	       T1D = TX - TY;
+	       T1E = TU - TV;
+	       T1F = FNMS(KP587785252, T1E, KP951056516 * T1D);
+	       T1J = FMA(KP951056516, T1E, KP587785252 * T1D);
+	       iio[0] = T1z + T1C;
+	       T1K = T1H + T1G;
+	       rio[WS(ios, 6)] = T1J - T1K;
+	       iio[-WS(ios, 4)] = T1J + T1K;
+	       T1I = T1G - T1H;
+	       rio[WS(ios, 8)] = T1F - T1I;
+	       iio[-WS(ios, 2)] = T1F + T1I;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 10},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 10, "hf_10", twinstr, {72, 30, 30, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_10) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_10, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_12.c b/src/fftw3/rdft/codelets/r2hc/hf_12.c
new file mode 100644
index 0000000..e399159
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_12.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:06 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 12 -dit -name hf_12 -include hf.h */
+
+/*
+ * This function contains 118 FP additions, 60 FP multiplications,
+ * (or, 88 additions, 30 multiplications, 30 fused multiply/add),
+ * 47 stack variables, and 48 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_12(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 22) {
+	  E T1, T1W, T18, T22, Tc, T15, T1V, T23, TA, T1y, T1j, T1B, TL, T1g, T1z;
+	  E T1A, Ti, T1S, T1d, T25, Tt, T1a, T1T, T26, TR, T1E, T1o, T1D, T12, T1l;
+	  E T1F, T1G;
+	  {
+	       E T6, T16, Tb, T17;
+	       T1 = rio[0];
+	       T1W = iio[-WS(ios, 11)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 4)];
+		    T5 = iio[-WS(ios, 7)];
+		    T2 = W[6];
+		    T4 = W[7];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T16 = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = rio[WS(ios, 8)];
+		    Ta = iio[-WS(ios, 3)];
+		    T7 = W[14];
+		    T9 = W[15];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    T17 = FNMS(T9, T8, T7 * Ta);
+	       }
+	       T18 = KP866025403 * (T16 - T17);
+	       T22 = KP866025403 * (Tb - T6);
+	       Tc = T6 + Tb;
+	       T15 = FNMS(KP500000000, Tc, T1);
+	       T1V = T16 + T17;
+	       T23 = FNMS(KP500000000, T1V, T1W);
+	  }
+	  {
+	       E TK, T1i, TF, T1h;
+	       {
+		    E Tx, Tz, Tw, Ty;
+		    Tx = rio[WS(ios, 3)];
+		    Tz = iio[-WS(ios, 8)];
+		    Tw = W[4];
+		    Ty = W[5];
+		    TA = FMA(Tw, Tx, Ty * Tz);
+		    T1y = FNMS(Ty, Tx, Tw * Tz);
+	       }
+	       {
+		    E TH, TJ, TG, TI;
+		    TH = rio[WS(ios, 11)];
+		    TJ = iio[0];
+		    TG = W[20];
+		    TI = W[21];
+		    TK = FMA(TG, TH, TI * TJ);
+		    T1i = FNMS(TI, TH, TG * TJ);
+	       }
+	       {
+		    E TC, TE, TB, TD;
+		    TC = rio[WS(ios, 7)];
+		    TE = iio[-WS(ios, 4)];
+		    TB = W[12];
+		    TD = W[13];
+		    TF = FMA(TB, TC, TD * TE);
+		    T1h = FNMS(TD, TC, TB * TE);
+	       }
+	       T1j = KP866025403 * (T1h - T1i);
+	       T1B = KP866025403 * (TK - TF);
+	       TL = TF + TK;
+	       T1g = FNMS(KP500000000, TL, TA);
+	       T1z = T1h + T1i;
+	       T1A = FNMS(KP500000000, T1z, T1y);
+	  }
+	  {
+	       E Ts, T1c, Tn, T1b;
+	       {
+		    E Tf, Th, Te, Tg;
+		    Tf = rio[WS(ios, 6)];
+		    Th = iio[-WS(ios, 5)];
+		    Te = W[10];
+		    Tg = W[11];
+		    Ti = FMA(Te, Tf, Tg * Th);
+		    T1S = FNMS(Tg, Tf, Te * Th);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = rio[WS(ios, 2)];
+		    Tr = iio[-WS(ios, 9)];
+		    To = W[2];
+		    Tq = W[3];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    T1c = FNMS(Tq, Tp, To * Tr);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = rio[WS(ios, 10)];
+		    Tm = iio[-WS(ios, 1)];
+		    Tj = W[18];
+		    Tl = W[19];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    T1b = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       T1d = KP866025403 * (T1b - T1c);
+	       T25 = KP866025403 * (Ts - Tn);
+	       Tt = Tn + Ts;
+	       T1a = FNMS(KP500000000, Tt, Ti);
+	       T1T = T1b + T1c;
+	       T26 = FNMS(KP500000000, T1T, T1S);
+	  }
+	  {
+	       E T11, T1n, TW, T1m;
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = rio[WS(ios, 9)];
+		    TQ = iio[-WS(ios, 2)];
+		    TN = W[16];
+		    TP = W[17];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T1E = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E TY, T10, TX, TZ;
+		    TY = rio[WS(ios, 5)];
+		    T10 = iio[-WS(ios, 6)];
+		    TX = W[8];
+		    TZ = W[9];
+		    T11 = FMA(TX, TY, TZ * T10);
+		    T1n = FNMS(TZ, TY, TX * T10);
+	       }
+	       {
+		    E TT, TV, TS, TU;
+		    TT = rio[WS(ios, 1)];
+		    TV = iio[-WS(ios, 10)];
+		    TS = W[0];
+		    TU = W[1];
+		    TW = FMA(TS, TT, TU * TV);
+		    T1m = FNMS(TU, TT, TS * TV);
+	       }
+	       T1o = KP866025403 * (T1m - T1n);
+	       T1D = KP866025403 * (T11 - TW);
+	       T12 = TW + T11;
+	       T1l = FNMS(KP500000000, T12, TR);
+	       T1F = T1m + T1n;
+	       T1G = FNMS(KP500000000, T1F, T1E);
+	  }
+	  {
+	       E Tv, T1N, T1Y, T20, T14, T1Z, T1Q, T1R;
+	       {
+		    E Td, Tu, T1U, T1X;
+		    Td = T1 + Tc;
+		    Tu = Ti + Tt;
+		    Tv = Td + Tu;
+		    T1N = Td - Tu;
+		    T1U = T1S + T1T;
+		    T1X = T1V + T1W;
+		    T1Y = T1U + T1X;
+		    T20 = T1X - T1U;
+	       }
+	       {
+		    E TM, T13, T1O, T1P;
+		    TM = TA + TL;
+		    T13 = TR + T12;
+		    T14 = TM + T13;
+		    T1Z = TM - T13;
+		    T1O = T1y + T1z;
+		    T1P = T1E + T1F;
+		    T1Q = T1O - T1P;
+		    T1R = T1O + T1P;
+	       }
+	       iio[-WS(ios, 6)] = Tv - T14;
+	       rio[WS(ios, 6)] = T1R - T1Y;
+	       rio[0] = Tv + T14;
+	       iio[0] = T1R + T1Y;
+	       rio[WS(ios, 3)] = T1N - T1Q;
+	       iio[-WS(ios, 3)] = T1Z + T20;
+	       iio[-WS(ios, 9)] = T1N + T1Q;
+	       rio[WS(ios, 9)] = T1Z - T20;
+	  }
+	  {
+	       E T1t, T1J, T28, T2a, T1w, T21, T1M, T29;
+	       {
+		    E T1r, T1s, T24, T27;
+		    T1r = T15 + T18;
+		    T1s = T1a + T1d;
+		    T1t = T1r + T1s;
+		    T1J = T1r - T1s;
+		    T24 = T22 + T23;
+		    T27 = T25 + T26;
+		    T28 = T24 - T27;
+		    T2a = T27 + T24;
+	       }
+	       {
+		    E T1u, T1v, T1K, T1L;
+		    T1u = T1g + T1j;
+		    T1v = T1l + T1o;
+		    T1w = T1u + T1v;
+		    T21 = T1v - T1u;
+		    T1K = T1B + T1A;
+		    T1L = T1D + T1G;
+		    T1M = T1K - T1L;
+		    T29 = T1K + T1L;
+	       }
+	       iio[-WS(ios, 10)] = T1t - T1w;
+	       rio[WS(ios, 10)] = T29 - T2a;
+	       rio[WS(ios, 4)] = T1t + T1w;
+	       iio[-WS(ios, 4)] = T29 + T2a;
+	       iio[-WS(ios, 7)] = T1J - T1M;
+	       rio[WS(ios, 7)] = T21 - T28;
+	       rio[WS(ios, 1)] = T1J + T1M;
+	       iio[-WS(ios, 1)] = T21 + T28;
+	  }
+	  {
+	       E T1f, T1x, T2e, T2g, T1q, T2f, T1I, T2b;
+	       {
+		    E T19, T1e, T2c, T2d;
+		    T19 = T15 - T18;
+		    T1e = T1a - T1d;
+		    T1f = T19 + T1e;
+		    T1x = T19 - T1e;
+		    T2c = T26 - T25;
+		    T2d = T23 - T22;
+		    T2e = T2c + T2d;
+		    T2g = T2d - T2c;
+	       }
+	       {
+		    E T1k, T1p, T1C, T1H;
+		    T1k = T1g - T1j;
+		    T1p = T1l - T1o;
+		    T1q = T1k + T1p;
+		    T2f = T1p - T1k;
+		    T1C = T1A - T1B;
+		    T1H = T1D - T1G;
+		    T1I = T1C + T1H;
+		    T2b = T1H - T1C;
+	       }
+	       rio[WS(ios, 2)] = T1f - T1q;
+	       iio[-WS(ios, 2)] = T2b + T2e;
+	       iio[-WS(ios, 8)] = T1f + T1q;
+	       rio[WS(ios, 8)] = T2b - T2e;
+	       iio[-WS(ios, 11)] = T1x - T1I;
+	       rio[WS(ios, 11)] = T2f - T2g;
+	       rio[WS(ios, 5)] = T1x + T1I;
+	       iio[-WS(ios, 5)] = T2f + T2g;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 12},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 12, "hf_12", twinstr, {88, 30, 30, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_12) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_12, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_15.c b/src/fftw3/rdft/codelets/r2hc/hf_15.c
new file mode 100644
index 0000000..71a3b40
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_15.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:07 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 15 -dit -name hf_15 -include hf.h */
+
+/*
+ * This function contains 184 FP additions, 112 FP multiplications,
+ * (or, 128 additions, 56 multiplications, 56 fused multiply/add),
+ * 65 stack variables, and 60 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_15(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 28) {
+	  E T1q, T34, Td, T1n, T2S, T35, T13, T1k, T1l, T2E, T2F, T2O, T1H, T1T, T2k;
+	  E T2t, T2f, T2s, T1M, T1U, Tu, TL, TM, T2H, T2I, T2N, T1w, T1Q, T29, T2w;
+	  E T24, T2v, T1B, T1R;
+	  {
+	       E T1, T2R, T6, T1o, Tb, T1p, Tc, T2Q;
+	       T1 = rio[0];
+	       T2R = iio[-WS(ios, 14)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 5)];
+		    T5 = iio[-WS(ios, 9)];
+		    T2 = W[8];
+		    T4 = W[9];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T1o = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = rio[WS(ios, 10)];
+		    Ta = iio[-WS(ios, 4)];
+		    T7 = W[18];
+		    T9 = W[19];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    T1p = FNMS(T9, T8, T7 * Ta);
+	       }
+	       T1q = KP866025403 * (T1o - T1p);
+	       T34 = KP866025403 * (Tb - T6);
+	       Tc = T6 + Tb;
+	       Td = T1 + Tc;
+	       T1n = FNMS(KP500000000, Tc, T1);
+	       T2Q = T1o + T1p;
+	       T2S = T2Q + T2R;
+	       T35 = FNMS(KP500000000, T2Q, T2R);
+	  }
+	  {
+	       E TR, T2c, T18, T2h, TW, T1E, T11, T1F, T12, T2d, T1d, T1J, T1i, T1K, T1j;
+	       E T2i;
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = rio[WS(ios, 6)];
+		    TQ = iio[-WS(ios, 8)];
+		    TN = W[10];
+		    TP = W[11];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T2c = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E T15, T17, T14, T16;
+		    T15 = rio[WS(ios, 9)];
+		    T17 = iio[-WS(ios, 5)];
+		    T14 = W[16];
+		    T16 = W[17];
+		    T18 = FMA(T14, T15, T16 * T17);
+		    T2h = FNMS(T16, T15, T14 * T17);
+	       }
+	       {
+		    E TT, TV, TS, TU;
+		    TT = rio[WS(ios, 11)];
+		    TV = iio[-WS(ios, 3)];
+		    TS = W[20];
+		    TU = W[21];
+		    TW = FMA(TS, TT, TU * TV);
+		    T1E = FNMS(TU, TT, TS * TV);
+	       }
+	       {
+		    E TY, T10, TX, TZ;
+		    TY = rio[WS(ios, 1)];
+		    T10 = iio[-WS(ios, 13)];
+		    TX = W[0];
+		    TZ = W[1];
+		    T11 = FMA(TX, TY, TZ * T10);
+		    T1F = FNMS(TZ, TY, TX * T10);
+	       }
+	       T12 = TW + T11;
+	       T2d = T1E + T1F;
+	       {
+		    E T1a, T1c, T19, T1b;
+		    T1a = rio[WS(ios, 14)];
+		    T1c = iio[0];
+		    T19 = W[26];
+		    T1b = W[27];
+		    T1d = FMA(T19, T1a, T1b * T1c);
+		    T1J = FNMS(T1b, T1a, T19 * T1c);
+	       }
+	       {
+		    E T1f, T1h, T1e, T1g;
+		    T1f = rio[WS(ios, 4)];
+		    T1h = iio[-WS(ios, 10)];
+		    T1e = W[6];
+		    T1g = W[7];
+		    T1i = FMA(T1e, T1f, T1g * T1h);
+		    T1K = FNMS(T1g, T1f, T1e * T1h);
+	       }
+	       T1j = T1d + T1i;
+	       T2i = T1J + T1K;
+	       {
+		    E T1D, T1G, T2g, T2j;
+		    T13 = TR + T12;
+		    T1k = T18 + T1j;
+		    T1l = T13 + T1k;
+		    T2E = T2c + T2d;
+		    T2F = T2h + T2i;
+		    T2O = T2E + T2F;
+		    T1D = FNMS(KP500000000, T12, TR);
+		    T1G = KP866025403 * (T1E - T1F);
+		    T1H = T1D - T1G;
+		    T1T = T1D + T1G;
+		    T2g = KP866025403 * (T1d - T1i);
+		    T2j = FNMS(KP500000000, T2i, T2h);
+		    T2k = T2g - T2j;
+		    T2t = T2g + T2j;
+		    {
+			 E T2b, T2e, T1I, T1L;
+			 T2b = KP866025403 * (T11 - TW);
+			 T2e = FNMS(KP500000000, T2d, T2c);
+			 T2f = T2b + T2e;
+			 T2s = T2e - T2b;
+			 T1I = FNMS(KP500000000, T1j, T18);
+			 T1L = KP866025403 * (T1J - T1K);
+			 T1M = T1I - T1L;
+			 T1U = T1I + T1L;
+		    }
+	       }
+	  }
+	  {
+	       E Ti, T21, Tz, T26, TE, T1y, TJ, T1z, TK, T27, Tn, T1t, Ts, T1u, Tt;
+	       E T22;
+	       {
+		    E Tf, Th, Te, Tg;
+		    Tf = rio[WS(ios, 3)];
+		    Th = iio[-WS(ios, 11)];
+		    Te = W[4];
+		    Tg = W[5];
+		    Ti = FMA(Te, Tf, Tg * Th);
+		    T21 = FNMS(Tg, Tf, Te * Th);
+	       }
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 12)];
+		    Ty = iio[-WS(ios, 2)];
+		    Tv = W[22];
+		    Tx = W[23];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T26 = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 2)];
+		    TD = iio[-WS(ios, 12)];
+		    TA = W[2];
+		    TC = W[3];
+		    TE = FMA(TA, TB, TC * TD);
+		    T1y = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E TG, TI, TF, TH;
+		    TG = rio[WS(ios, 7)];
+		    TI = iio[-WS(ios, 7)];
+		    TF = W[12];
+		    TH = W[13];
+		    TJ = FMA(TF, TG, TH * TI);
+		    T1z = FNMS(TH, TG, TF * TI);
+	       }
+	       TK = TE + TJ;
+	       T27 = T1y + T1z;
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = rio[WS(ios, 8)];
+		    Tm = iio[-WS(ios, 6)];
+		    Tj = W[14];
+		    Tl = W[15];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    T1t = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = rio[WS(ios, 13)];
+		    Tr = iio[-WS(ios, 1)];
+		    To = W[24];
+		    Tq = W[25];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    T1u = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Tt = Tn + Ts;
+	       T22 = T1t + T1u;
+	       {
+		    E T1s, T1v, T25, T28;
+		    Tu = Ti + Tt;
+		    TL = Tz + TK;
+		    TM = Tu + TL;
+		    T2H = T21 + T22;
+		    T2I = T26 + T27;
+		    T2N = T2H + T2I;
+		    T1s = FNMS(KP500000000, Tt, Ti);
+		    T1v = KP866025403 * (T1t - T1u);
+		    T1w = T1s - T1v;
+		    T1Q = T1s + T1v;
+		    T25 = KP866025403 * (TJ - TE);
+		    T28 = FNMS(KP500000000, T27, T26);
+		    T29 = T25 + T28;
+		    T2w = T28 - T25;
+		    {
+			 E T20, T23, T1x, T1A;
+			 T20 = KP866025403 * (Ts - Tn);
+			 T23 = FNMS(KP500000000, T22, T21);
+			 T24 = T20 + T23;
+			 T2v = T23 - T20;
+			 T1x = FNMS(KP500000000, TK, Tz);
+			 T1A = KP866025403 * (T1y - T1z);
+			 T1B = T1x - T1A;
+			 T1R = T1x + T1A;
+		    }
+	       }
+	  }
+	  {
+	       E T2C, T1m, T2B, T2K, T2M, T2G, T2J, T2L, T2D;
+	       T2C = KP559016994 * (TM - T1l);
+	       T1m = TM + T1l;
+	       T2B = FNMS(KP250000000, T1m, Td);
+	       T2G = T2E - T2F;
+	       T2J = T2H - T2I;
+	       T2K = FNMS(KP587785252, T2J, KP951056516 * T2G);
+	       T2M = FMA(KP951056516, T2J, KP587785252 * T2G);
+	       rio[0] = Td + T1m;
+	       T2L = T2C + T2B;
+	       iio[-WS(ios, 9)] = T2L - T2M;
+	       rio[WS(ios, 6)] = T2L + T2M;
+	       T2D = T2B - T2C;
+	       iio[-WS(ios, 12)] = T2D - T2K;
+	       rio[WS(ios, 3)] = T2D + T2K;
+	  }
+	  {
+	       E T2X, T2P, T2W, T2V, T2Z, T2T, T2U, T30, T2Y;
+	       T2X = KP559016994 * (T2N - T2O);
+	       T2P = T2N + T2O;
+	       T2W = FNMS(KP250000000, T2P, T2S);
+	       T2T = Tu - TL;
+	       T2U = T1k - T13;
+	       T2V = FMA(KP587785252, T2T, KP951056516 * T2U);
+	       T2Z = FNMS(KP951056516, T2T, KP587785252 * T2U);
+	       iio[0] = T2P + T2S;
+	       T30 = T2X + T2W;
+	       rio[WS(ios, 9)] = T2Z - T30;
+	       iio[-WS(ios, 6)] = T2Z + T30;
+	       T2Y = T2W - T2X;
+	       rio[WS(ios, 12)] = T2V - T2Y;
+	       iio[-WS(ios, 3)] = T2V + T2Y;
+	  }
+	  {
+	       E T2y, T2A, T1r, T1O, T2p, T2q, T2z, T2r;
+	       {
+		    E T2u, T2x, T1C, T1N;
+		    T2u = T2s - T2t;
+		    T2x = T2v - T2w;
+		    T2y = FNMS(KP587785252, T2x, KP951056516 * T2u);
+		    T2A = FMA(KP951056516, T2x, KP587785252 * T2u);
+		    T1r = T1n - T1q;
+		    T1C = T1w + T1B;
+		    T1N = T1H + T1M;
+		    T1O = T1C + T1N;
+		    T2p = FNMS(KP250000000, T1O, T1r);
+		    T2q = KP559016994 * (T1C - T1N);
+	       }
+	       rio[WS(ios, 5)] = T1r + T1O;
+	       T2z = T2q + T2p;
+	       iio[-WS(ios, 14)] = T2z - T2A;
+	       iio[-WS(ios, 11)] = T2z + T2A;
+	       T2r = T2p - T2q;
+	       rio[WS(ios, 2)] = T2r - T2y;
+	       iio[-WS(ios, 8)] = T2r + T2y;
+	  }
+	  {
+	       E T3h, T3p, T3l, T3m, T3k, T3n, T3q, T3o;
+	       {
+		    E T3f, T3g, T3i, T3j;
+		    T3f = T1w - T1B;
+		    T3g = T1H - T1M;
+		    T3h = FMA(KP951056516, T3f, KP587785252 * T3g);
+		    T3p = FNMS(KP587785252, T3f, KP951056516 * T3g);
+		    T3l = T35 - T34;
+		    T3i = T2s + T2t;
+		    T3j = T2v + T2w;
+		    T3m = T3j + T3i;
+		    T3k = KP559016994 * (T3i - T3j);
+		    T3n = FNMS(KP250000000, T3m, T3l);
+	       }
+	       iio[-WS(ios, 5)] = T3m + T3l;
+	       T3q = T3k + T3n;
+	       rio[WS(ios, 8)] = T3p - T3q;
+	       iio[-WS(ios, 2)] = T3p + T3q;
+	       T3o = T3k - T3n;
+	       rio[WS(ios, 11)] = T3h + T3o;
+	       rio[WS(ios, 14)] = T3o - T3h;
+	  }
+	  {
+	       E T3c, T3d, T36, T33, T37, T38, T3e, T39;
+	       {
+		    E T3a, T3b, T31, T32;
+		    T3a = T1Q - T1R;
+		    T3b = T1T - T1U;
+		    T3c = FMA(KP951056516, T3a, KP587785252 * T3b);
+		    T3d = FNMS(KP587785252, T3a, KP951056516 * T3b);
+		    T36 = T34 + T35;
+		    T31 = T2k - T2f;
+		    T32 = T24 + T29;
+		    T33 = T31 - T32;
+		    T37 = KP559016994 * (T32 + T31);
+		    T38 = FMA(KP250000000, T33, T36);
+	       }
+	       rio[WS(ios, 10)] = T33 - T36;
+	       T3e = T38 - T37;
+	       rio[WS(ios, 13)] = T3d - T3e;
+	       iio[-WS(ios, 7)] = T3d + T3e;
+	       T39 = T37 + T38;
+	       iio[-WS(ios, 1)] = T39 - T3c;
+	       iio[-WS(ios, 4)] = T3c + T39;
+	  }
+	  {
+	       E T2m, T2o, T1P, T1W, T1X, T1Y, T2n, T1Z;
+	       {
+		    E T2a, T2l, T1S, T1V;
+		    T2a = T24 - T29;
+		    T2l = T2f + T2k;
+		    T2m = FMA(KP951056516, T2a, KP587785252 * T2l);
+		    T2o = FNMS(KP587785252, T2a, KP951056516 * T2l);
+		    T1P = T1n + T1q;
+		    T1S = T1Q + T1R;
+		    T1V = T1T + T1U;
+		    T1W = T1S + T1V;
+		    T1X = KP559016994 * (T1S - T1V);
+		    T1Y = FNMS(KP250000000, T1W, T1P);
+	       }
+	       iio[-WS(ios, 10)] = T1P + T1W;
+	       T2n = T1Y - T1X;
+	       rio[WS(ios, 7)] = T2n - T2o;
+	       iio[-WS(ios, 13)] = T2n + T2o;
+	       T1Z = T1X + T1Y;
+	       rio[WS(ios, 4)] = T1Z - T2m;
+	       rio[WS(ios, 1)] = T1Z + T2m;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 15},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 15, "hf_15", twinstr, {128, 56, 56, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_15) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_15, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_16.c b/src/fftw3/rdft/codelets/r2hc/hf_16.c
new file mode 100644
index 0000000..ff3145d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_16.c
@@ -0,0 +1,406 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:07 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 16 -dit -name hf_16 -include hf.h */
+
+/*
+ * This function contains 174 FP additions, 84 FP multiplications,
+ * (or, 136 additions, 46 multiplications, 38 fused multiply/add),
+ * 52 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_16(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 30) {
+	  E T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H;
+	  E T2d, TS, T13, T2w, T2x, T2y, T2z, T1O, T2g, T1T, T2h, T1f, T1q, T2B, T2C;
+	  E T2D, T2E, T1Z, T2j, T24, T2k;
+	  {
+	       E T1, T2T, T6, T2S;
+	       T1 = rio[0];
+	       T2T = iio[-WS(ios, 15)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 8)];
+		    T5 = iio[-WS(ios, 7)];
+		    T2 = W[14];
+		    T4 = W[15];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T2S = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 + T6;
+	       T37 = T2T - T2S;
+	       T1t = T1 - T6;
+	       T2U = T2S + T2T;
+	  }
+	  {
+	       E Tc, T1u, Th, T1v;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = rio[WS(ios, 4)];
+		    Tb = iio[-WS(ios, 11)];
+		    T8 = W[6];
+		    Ta = W[7];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T1u = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 12)];
+		    Tg = iio[-WS(ios, 3)];
+		    Td = W[22];
+		    Tf = W[23];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T1v = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Ti = Tc + Th;
+	       T38 = Tc - Th;
+	       T1w = T1u - T1v;
+	       T2R = T1u + T1v;
+	  }
+	  {
+	       E To, T1y, Tt, T1z, T1A, T1B;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = rio[WS(ios, 2)];
+		    Tn = iio[-WS(ios, 13)];
+		    Tk = W[2];
+		    Tm = W[3];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    T1y = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = rio[WS(ios, 10)];
+		    Ts = iio[-WS(ios, 5)];
+		    Tp = W[18];
+		    Tr = W[19];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    T1z = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       Tu = To + Tt;
+	       T2s = T1y + T1z;
+	       T1A = T1y - T1z;
+	       T1B = To - Tt;
+	       T1C = T1A - T1B;
+	       T2c = T1B + T1A;
+	  }
+	  {
+	       E Tz, T1E, TE, T1F, T1D, T1G;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 14)];
+		    Ty = iio[-WS(ios, 1)];
+		    Tv = W[26];
+		    Tx = W[27];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T1E = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 6)];
+		    TD = iio[-WS(ios, 9)];
+		    TA = W[10];
+		    TC = W[11];
+		    TE = FMA(TA, TB, TC * TD);
+		    T1F = FNMS(TC, TB, TA * TD);
+	       }
+	       TF = Tz + TE;
+	       T2t = T1E + T1F;
+	       T1D = Tz - TE;
+	       T1G = T1E - T1F;
+	       T1H = T1D + T1G;
+	       T2d = T1D - T1G;
+	  }
+	  {
+	       E TM, T1K, T12, T1R, TR, T1L, TX, T1Q;
+	       {
+		    E TJ, TL, TI, TK;
+		    TJ = rio[WS(ios, 1)];
+		    TL = iio[-WS(ios, 14)];
+		    TI = W[0];
+		    TK = W[1];
+		    TM = FMA(TI, TJ, TK * TL);
+		    T1K = FNMS(TK, TJ, TI * TL);
+	       }
+	       {
+		    E TZ, T11, TY, T10;
+		    TZ = rio[WS(ios, 13)];
+		    T11 = iio[-WS(ios, 2)];
+		    TY = W[24];
+		    T10 = W[25];
+		    T12 = FMA(TY, TZ, T10 * T11);
+		    T1R = FNMS(T10, TZ, TY * T11);
+	       }
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = rio[WS(ios, 9)];
+		    TQ = iio[-WS(ios, 6)];
+		    TN = W[16];
+		    TP = W[17];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T1L = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E TU, TW, TT, TV;
+		    TU = rio[WS(ios, 5)];
+		    TW = iio[-WS(ios, 10)];
+		    TT = W[8];
+		    TV = W[9];
+		    TX = FMA(TT, TU, TV * TW);
+		    T1Q = FNMS(TV, TU, TT * TW);
+	       }
+	       TS = TM + TR;
+	       T13 = TX + T12;
+	       T2w = TS - T13;
+	       T2x = T1K + T1L;
+	       T2y = T1Q + T1R;
+	       T2z = T2x - T2y;
+	       {
+		    E T1M, T1N, T1P, T1S;
+		    T1M = T1K - T1L;
+		    T1N = TX - T12;
+		    T1O = T1M + T1N;
+		    T2g = T1M - T1N;
+		    T1P = TM - TR;
+		    T1S = T1Q - T1R;
+		    T1T = T1P - T1S;
+		    T2h = T1P + T1S;
+	       }
+	  }
+	  {
+	       E T19, T20, T1p, T1X, T1e, T21, T1k, T1W;
+	       {
+		    E T16, T18, T15, T17;
+		    T16 = rio[WS(ios, 15)];
+		    T18 = iio[0];
+		    T15 = W[28];
+		    T17 = W[29];
+		    T19 = FMA(T15, T16, T17 * T18);
+		    T20 = FNMS(T17, T16, T15 * T18);
+	       }
+	       {
+		    E T1m, T1o, T1l, T1n;
+		    T1m = rio[WS(ios, 11)];
+		    T1o = iio[-WS(ios, 4)];
+		    T1l = W[20];
+		    T1n = W[21];
+		    T1p = FMA(T1l, T1m, T1n * T1o);
+		    T1X = FNMS(T1n, T1m, T1l * T1o);
+	       }
+	       {
+		    E T1b, T1d, T1a, T1c;
+		    T1b = rio[WS(ios, 7)];
+		    T1d = iio[-WS(ios, 8)];
+		    T1a = W[12];
+		    T1c = W[13];
+		    T1e = FMA(T1a, T1b, T1c * T1d);
+		    T21 = FNMS(T1c, T1b, T1a * T1d);
+	       }
+	       {
+		    E T1h, T1j, T1g, T1i;
+		    T1h = rio[WS(ios, 3)];
+		    T1j = iio[-WS(ios, 12)];
+		    T1g = W[4];
+		    T1i = W[5];
+		    T1k = FMA(T1g, T1h, T1i * T1j);
+		    T1W = FNMS(T1i, T1h, T1g * T1j);
+	       }
+	       T1f = T19 + T1e;
+	       T1q = T1k + T1p;
+	       T2B = T1f - T1q;
+	       T2C = T20 + T21;
+	       T2D = T1W + T1X;
+	       T2E = T2C - T2D;
+	       {
+		    E T1V, T1Y, T22, T23;
+		    T1V = T19 - T1e;
+		    T1Y = T1W - T1X;
+		    T1Z = T1V - T1Y;
+		    T2j = T1V + T1Y;
+		    T22 = T20 - T21;
+		    T23 = T1k - T1p;
+		    T24 = T22 + T23;
+		    T2k = T22 - T23;
+	       }
+	  }
+	  {
+	       E T1J, T27, T3g, T3i, T26, T3h, T2a, T3d;
+	       {
+		    E T1x, T1I, T3e, T3f;
+		    T1x = T1t - T1w;
+		    T1I = KP707106781 * (T1C - T1H);
+		    T1J = T1x + T1I;
+		    T27 = T1x - T1I;
+		    T3e = KP707106781 * (T2d - T2c);
+		    T3f = T38 + T37;
+		    T3g = T3e + T3f;
+		    T3i = T3f - T3e;
+	       }
+	       {
+		    E T1U, T25, T28, T29;
+		    T1U = FMA(KP923879532, T1O, KP382683432 * T1T);
+		    T25 = FNMS(KP923879532, T24, KP382683432 * T1Z);
+		    T26 = T1U + T25;
+		    T3h = T25 - T1U;
+		    T28 = FNMS(KP923879532, T1T, KP382683432 * T1O);
+		    T29 = FMA(KP382683432, T24, KP923879532 * T1Z);
+		    T2a = T28 - T29;
+		    T3d = T28 + T29;
+	       }
+	       iio[-WS(ios, 11)] = T1J - T26;
+	       rio[WS(ios, 11)] = T3d - T3g;
+	       rio[WS(ios, 3)] = T1J + T26;
+	       iio[-WS(ios, 3)] = T3d + T3g;
+	       iio[-WS(ios, 15)] = T27 - T2a;
+	       rio[WS(ios, 15)] = T3h - T3i;
+	       rio[WS(ios, 7)] = T27 + T2a;
+	       iio[-WS(ios, 7)] = T3h + T3i;
+	  }
+	  {
+	       E T2v, T2H, T32, T34, T2G, T33, T2K, T2Z;
+	       {
+		    E T2r, T2u, T30, T31;
+		    T2r = T7 - Ti;
+		    T2u = T2s - T2t;
+		    T2v = T2r + T2u;
+		    T2H = T2r - T2u;
+		    T30 = TF - Tu;
+		    T31 = T2U - T2R;
+		    T32 = T30 + T31;
+		    T34 = T31 - T30;
+	       }
+	       {
+		    E T2A, T2F, T2I, T2J;
+		    T2A = T2w + T2z;
+		    T2F = T2B - T2E;
+		    T2G = KP707106781 * (T2A + T2F);
+		    T33 = KP707106781 * (T2F - T2A);
+		    T2I = T2z - T2w;
+		    T2J = T2B + T2E;
+		    T2K = KP707106781 * (T2I - T2J);
+		    T2Z = KP707106781 * (T2I + T2J);
+	       }
+	       iio[-WS(ios, 10)] = T2v - T2G;
+	       rio[WS(ios, 10)] = T2Z - T32;
+	       rio[WS(ios, 2)] = T2v + T2G;
+	       iio[-WS(ios, 2)] = T2Z + T32;
+	       iio[-WS(ios, 14)] = T2H - T2K;
+	       rio[WS(ios, 14)] = T33 - T34;
+	       rio[WS(ios, 6)] = T2H + T2K;
+	       iio[-WS(ios, 6)] = T33 + T34;
+	  }
+	  {
+	       E T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35;
+	       {
+		    E T2b, T2e, T36, T39;
+		    T2b = T1t + T1w;
+		    T2e = KP707106781 * (T2c + T2d);
+		    T2f = T2b + T2e;
+		    T2n = T2b - T2e;
+		    T36 = KP707106781 * (T1C + T1H);
+		    T39 = T37 - T38;
+		    T3a = T36 + T39;
+		    T3c = T39 - T36;
+	       }
+	       {
+		    E T2i, T2l, T2o, T2p;
+		    T2i = FMA(KP382683432, T2g, KP923879532 * T2h);
+		    T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
+		    T2m = T2i + T2l;
+		    T3b = T2l - T2i;
+		    T2o = FNMS(KP382683432, T2h, KP923879532 * T2g);
+		    T2p = FMA(KP923879532, T2k, KP382683432 * T2j);
+		    T2q = T2o - T2p;
+		    T35 = T2o + T2p;
+	       }
+	       iio[-WS(ios, 9)] = T2f - T2m;
+	       rio[WS(ios, 9)] = T35 - T3a;
+	       rio[WS(ios, 1)] = T2f + T2m;
+	       iio[-WS(ios, 1)] = T35 + T3a;
+	       iio[-WS(ios, 13)] = T2n - T2q;
+	       rio[WS(ios, 13)] = T3b - T3c;
+	       rio[WS(ios, 5)] = T2n + T2q;
+	       iio[-WS(ios, 5)] = T3b + T3c;
+	  }
+	  {
+	       E TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;
+	       {
+		    E Tj, TG, T2Q, T2V;
+		    Tj = T7 + Ti;
+		    TG = Tu + TF;
+		    TH = Tj + TG;
+		    T2L = Tj - TG;
+		    T2Q = T2s + T2t;
+		    T2V = T2R + T2U;
+		    T2W = T2Q + T2V;
+		    T2Y = T2V - T2Q;
+	       }
+	       {
+		    E T14, T1r, T2M, T2N;
+		    T14 = TS + T13;
+		    T1r = T1f + T1q;
+		    T1s = T14 + T1r;
+		    T2X = T1r - T14;
+		    T2M = T2x + T2y;
+		    T2N = T2C + T2D;
+		    T2O = T2M - T2N;
+		    T2P = T2M + T2N;
+	       }
+	       iio[-WS(ios, 8)] = TH - T1s;
+	       rio[WS(ios, 8)] = T2P - T2W;
+	       rio[0] = TH + T1s;
+	       iio[0] = T2P + T2W;
+	       iio[-WS(ios, 12)] = T2L - T2O;
+	       rio[WS(ios, 12)] = T2X - T2Y;
+	       rio[WS(ios, 4)] = T2L + T2O;
+	       iio[-WS(ios, 4)] = T2X + T2Y;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 16},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 16, "hf_16", twinstr, {136, 46, 38, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_16) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_2.c b/src/fftw3/rdft/codelets/r2hc/hf_2.c
new file mode 100644
index 0000000..7de094c
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_2.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:51 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 2 -dit -name hf_2 -include hf.h */
+
+/*
+ * This function contains 6 FP additions, 4 FP multiplications,
+ * (or, 4 additions, 2 multiplications, 2 fused multiply/add),
+ * 9 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_2(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 2) {
+	  E T1, T8, T6, T7;
+	  T1 = rio[0];
+	  T8 = iio[-WS(ios, 1)];
+	  {
+	       E T3, T5, T2, T4;
+	       T3 = rio[WS(ios, 1)];
+	       T5 = iio[0];
+	       T2 = W[0];
+	       T4 = W[1];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       T7 = FNMS(T4, T3, T2 * T5);
+	  }
+	  iio[-WS(ios, 1)] = T1 - T6;
+	  rio[WS(ios, 1)] = T7 - T8;
+	  rio[0] = T1 + T6;
+	  iio[0] = T7 + T8;
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 2},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 2, "hf_2", twinstr, {4, 2, 2, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_2) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_2, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_3.c b/src/fftw3/rdft/codelets/r2hc/hf_3.c
new file mode 100644
index 0000000..fce963f
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_3.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:51 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 3 -dit -name hf_3 -include hf.h */
+
+/*
+ * This function contains 16 FP additions, 12 FP multiplications,
+ * (or, 10 additions, 6 multiplications, 6 fused multiply/add),
+ * 15 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_3(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 4) {
+	  E T1, Ti, T6, Te, Tb, Tf, Tc, Th;
+	  T1 = rio[0];
+	  Ti = iio[-WS(ios, 2)];
+	  {
+	       E T3, T5, T2, T4;
+	       T3 = rio[WS(ios, 1)];
+	       T5 = iio[-WS(ios, 1)];
+	       T2 = W[0];
+	       T4 = W[1];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       Te = FNMS(T4, T3, T2 * T5);
+	  }
+	  {
+	       E T8, Ta, T7, T9;
+	       T8 = rio[WS(ios, 2)];
+	       Ta = iio[0];
+	       T7 = W[2];
+	       T9 = W[3];
+	       Tb = FMA(T7, T8, T9 * Ta);
+	       Tf = FNMS(T9, T8, T7 * Ta);
+	  }
+	  Tc = T6 + Tb;
+	  Th = Te + Tf;
+	  rio[0] = T1 + Tc;
+	  iio[0] = Th + Ti;
+	  {
+	       E Td, Tg, Tj, Tk;
+	       Td = FNMS(KP500000000, Tc, T1);
+	       Tg = KP866025403 * (Te - Tf);
+	       iio[-WS(ios, 2)] = Td - Tg;
+	       rio[WS(ios, 1)] = Td + Tg;
+	       Tj = KP866025403 * (Tb - T6);
+	       Tk = FNMS(KP500000000, Th, Ti);
+	       rio[WS(ios, 2)] = Tj - Tk;
+	       iio[-WS(ios, 1)] = Tj + Tk;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 3},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 3, "hf_3", twinstr, {10, 6, 6, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_3) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_3, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_32.c b/src/fftw3/rdft/codelets/r2hc/hf_32.c
new file mode 100644
index 0000000..262ef43
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_32.c
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:08 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 32 -dit -name hf_32 -include hf.h */
+
+/*
+ * This function contains 434 FP additions, 208 FP multiplications,
+ * (or, 340 additions, 114 multiplications, 94 fused multiply/add),
+ * 96 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_32(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62) {
+	  E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T2B, T67, T6e, T6O, T4b, T5d, T4s;
+	  E T5g, TG, T7l, T5I, T73, T3a, T4U, T3f, T4V, T1Q, T61, T5Y, T6J, T3K, T59;
+	  E T41, T56, T1r, T5P, T5S, T6F, T3x, T51, T3C, T52, T14, T5N, T5M, T6E, T3m;
+	  E T4Y, T3r, T4Z, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P;
+	  E T4m, T5h, T4v, T5e;
+	  {
+	       E T1, T76, T6, T75, Tc, T32, Th, T33;
+	       T1 = rio[0];
+	       T76 = iio[-WS(ios, 31)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 16)];
+		    T5 = iio[-WS(ios, 15)];
+		    T2 = W[30];
+		    T4 = W[31];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T75 = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = rio[WS(ios, 8)];
+		    Tb = iio[-WS(ios, 23)];
+		    T8 = W[14];
+		    Ta = W[15];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T32 = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 24)];
+		    Tg = iio[-WS(ios, 7)];
+		    Td = W[46];
+		    Tf = W[47];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T33 = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E T7, Ti, T7A, T7B;
+		    T7 = T1 + T6;
+		    Ti = Tc + Th;
+		    Tj = T7 + Ti;
+		    T5F = T7 - Ti;
+		    T7A = T76 - T75;
+		    T7B = Tc - Th;
+		    T7C = T7A - T7B;
+		    T7Q = T7B + T7A;
+	       }
+	       {
+		    E T31, T34, T74, T77;
+		    T31 = T1 - T6;
+		    T34 = T32 - T33;
+		    T35 = T31 - T34;
+		    T4T = T31 + T34;
+		    T74 = T32 + T33;
+		    T77 = T75 + T76;
+		    T78 = T74 + T77;
+		    T7m = T77 - T74;
+	       }
+	  }
+	  {
+	       E T2j, T4o, T2z, T49, T2o, T4p, T2u, T48;
+	       {
+		    E T2g, T2i, T2f, T2h;
+		    T2g = rio[WS(ios, 31)];
+		    T2i = iio[0];
+		    T2f = W[60];
+		    T2h = W[61];
+		    T2j = FMA(T2f, T2g, T2h * T2i);
+		    T4o = FNMS(T2h, T2g, T2f * T2i);
+	       }
+	       {
+		    E T2w, T2y, T2v, T2x;
+		    T2w = rio[WS(ios, 23)];
+		    T2y = iio[-WS(ios, 8)];
+		    T2v = W[44];
+		    T2x = W[45];
+		    T2z = FMA(T2v, T2w, T2x * T2y);
+		    T49 = FNMS(T2x, T2w, T2v * T2y);
+	       }
+	       {
+		    E T2l, T2n, T2k, T2m;
+		    T2l = rio[WS(ios, 15)];
+		    T2n = iio[-WS(ios, 16)];
+		    T2k = W[28];
+		    T2m = W[29];
+		    T2o = FMA(T2k, T2l, T2m * T2n);
+		    T4p = FNMS(T2m, T2l, T2k * T2n);
+	       }
+	       {
+		    E T2r, T2t, T2q, T2s;
+		    T2r = rio[WS(ios, 7)];
+		    T2t = iio[-WS(ios, 24)];
+		    T2q = W[12];
+		    T2s = W[13];
+		    T2u = FMA(T2q, T2r, T2s * T2t);
+		    T48 = FNMS(T2s, T2r, T2q * T2t);
+	       }
+	       {
+		    E T2p, T2A, T6c, T6d;
+		    T2p = T2j + T2o;
+		    T2A = T2u + T2z;
+		    T2B = T2p + T2A;
+		    T67 = T2p - T2A;
+		    T6c = T4o + T4p;
+		    T6d = T48 + T49;
+		    T6e = T6c - T6d;
+		    T6O = T6c + T6d;
+	       }
+	       {
+		    E T47, T4a, T4q, T4r;
+		    T47 = T2j - T2o;
+		    T4a = T48 - T49;
+		    T4b = T47 - T4a;
+		    T5d = T47 + T4a;
+		    T4q = T4o - T4p;
+		    T4r = T2u - T2z;
+		    T4s = T4q + T4r;
+		    T5g = T4q - T4r;
+	       }
+	  }
+	  {
+	       E To, T36, TE, T3d, Tt, T37, Tz, T3c;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = rio[WS(ios, 4)];
+		    Tn = iio[-WS(ios, 27)];
+		    Tk = W[6];
+		    Tm = W[7];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    T36 = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 12)];
+		    TD = iio[-WS(ios, 19)];
+		    TA = W[22];
+		    TC = W[23];
+		    TE = FMA(TA, TB, TC * TD);
+		    T3d = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = rio[WS(ios, 20)];
+		    Ts = iio[-WS(ios, 11)];
+		    Tp = W[38];
+		    Tr = W[39];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    T37 = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 28)];
+		    Ty = iio[-WS(ios, 3)];
+		    Tv = W[54];
+		    Tx = W[55];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T3c = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E Tu, TF, T5G, T5H;
+		    Tu = To + Tt;
+		    TF = Tz + TE;
+		    TG = Tu + TF;
+		    T7l = TF - Tu;
+		    T5G = T36 + T37;
+		    T5H = T3c + T3d;
+		    T5I = T5G - T5H;
+		    T73 = T5G + T5H;
+	       }
+	       {
+		    E T38, T39, T3b, T3e;
+		    T38 = T36 - T37;
+		    T39 = To - Tt;
+		    T3a = T38 - T39;
+		    T4U = T39 + T38;
+		    T3b = Tz - TE;
+		    T3e = T3c - T3d;
+		    T3f = T3b + T3e;
+		    T4V = T3b - T3e;
+	       }
+	  }
+	  {
+	       E T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y;
+	       {
+		    E T1v, T1x, T1u, T1w;
+		    T1v = rio[WS(ios, 1)];
+		    T1x = iio[-WS(ios, 30)];
+		    T1u = W[0];
+		    T1w = W[1];
+		    T1y = FMA(T1u, T1v, T1w * T1x);
+		    T3G = FNMS(T1w, T1v, T1u * T1x);
+	       }
+	       {
+		    E T1L, T1N, T1K, T1M;
+		    T1L = rio[WS(ios, 25)];
+		    T1N = iio[-WS(ios, 6)];
+		    T1K = W[48];
+		    T1M = W[49];
+		    T1O = FMA(T1K, T1L, T1M * T1N);
+		    T3Z = FNMS(T1M, T1L, T1K * T1N);
+	       }
+	       {
+		    E T1A, T1C, T1z, T1B;
+		    T1A = rio[WS(ios, 17)];
+		    T1C = iio[-WS(ios, 14)];
+		    T1z = W[32];
+		    T1B = W[33];
+		    T1D = FMA(T1z, T1A, T1B * T1C);
+		    T3H = FNMS(T1B, T1A, T1z * T1C);
+	       }
+	       {
+		    E T1G, T1I, T1F, T1H;
+		    T1G = rio[WS(ios, 9)];
+		    T1I = iio[-WS(ios, 22)];
+		    T1F = W[16];
+		    T1H = W[17];
+		    T1J = FMA(T1F, T1G, T1H * T1I);
+		    T3Y = FNMS(T1H, T1G, T1F * T1I);
+	       }
+	       {
+		    E T1E, T1P, T5W, T5X;
+		    T1E = T1y + T1D;
+		    T1P = T1J + T1O;
+		    T1Q = T1E + T1P;
+		    T61 = T1E - T1P;
+		    T5W = T3G + T3H;
+		    T5X = T3Y + T3Z;
+		    T5Y = T5W - T5X;
+		    T6J = T5W + T5X;
+	       }
+	       {
+		    E T3I, T3J, T3X, T40;
+		    T3I = T3G - T3H;
+		    T3J = T1J - T1O;
+		    T3K = T3I + T3J;
+		    T59 = T3I - T3J;
+		    T3X = T1y - T1D;
+		    T40 = T3Y - T3Z;
+		    T41 = T3X - T40;
+		    T56 = T3X + T40;
+	       }
+	  }
+	  {
+	       E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;
+	       {
+		    E T16, T18, T15, T17;
+		    T16 = rio[WS(ios, 30)];
+		    T18 = iio[-WS(ios, 1)];
+		    T15 = W[58];
+		    T17 = W[59];
+		    T19 = FMA(T15, T16, T17 * T18);
+		    T3t = FNMS(T17, T16, T15 * T18);
+	       }
+	       {
+		    E T1m, T1o, T1l, T1n;
+		    T1m = rio[WS(ios, 22)];
+		    T1o = iio[-WS(ios, 9)];
+		    T1l = W[42];
+		    T1n = W[43];
+		    T1p = FMA(T1l, T1m, T1n * T1o);
+		    T3A = FNMS(T1n, T1m, T1l * T1o);
+	       }
+	       {
+		    E T1b, T1d, T1a, T1c;
+		    T1b = rio[WS(ios, 14)];
+		    T1d = iio[-WS(ios, 17)];
+		    T1a = W[26];
+		    T1c = W[27];
+		    T1e = FMA(T1a, T1b, T1c * T1d);
+		    T3u = FNMS(T1c, T1b, T1a * T1d);
+	       }
+	       {
+		    E T1h, T1j, T1g, T1i;
+		    T1h = rio[WS(ios, 6)];
+		    T1j = iio[-WS(ios, 25)];
+		    T1g = W[10];
+		    T1i = W[11];
+		    T1k = FMA(T1g, T1h, T1i * T1j);
+		    T3z = FNMS(T1i, T1h, T1g * T1j);
+	       }
+	       {
+		    E T1f, T1q, T5Q, T5R;
+		    T1f = T19 + T1e;
+		    T1q = T1k + T1p;
+		    T1r = T1f + T1q;
+		    T5P = T1f - T1q;
+		    T5Q = T3t + T3u;
+		    T5R = T3z + T3A;
+		    T5S = T5Q - T5R;
+		    T6F = T5Q + T5R;
+	       }
+	       {
+		    E T3v, T3w, T3y, T3B;
+		    T3v = T3t - T3u;
+		    T3w = T1k - T1p;
+		    T3x = T3v + T3w;
+		    T51 = T3v - T3w;
+		    T3y = T19 - T1e;
+		    T3B = T3z - T3A;
+		    T3C = T3y - T3B;
+		    T52 = T3y + T3B;
+	       }
+	  }
+	  {
+	       E TM, T3i, T12, T3p, TR, T3j, TX, T3o;
+	       {
+		    E TJ, TL, TI, TK;
+		    TJ = rio[WS(ios, 2)];
+		    TL = iio[-WS(ios, 29)];
+		    TI = W[2];
+		    TK = W[3];
+		    TM = FMA(TI, TJ, TK * TL);
+		    T3i = FNMS(TK, TJ, TI * TL);
+	       }
+	       {
+		    E TZ, T11, TY, T10;
+		    TZ = rio[WS(ios, 26)];
+		    T11 = iio[-WS(ios, 5)];
+		    TY = W[50];
+		    T10 = W[51];
+		    T12 = FMA(TY, TZ, T10 * T11);
+		    T3p = FNMS(T10, TZ, TY * T11);
+	       }
+	       {
+		    E TO, TQ, TN, TP;
+		    TO = rio[WS(ios, 18)];
+		    TQ = iio[-WS(ios, 13)];
+		    TN = W[34];
+		    TP = W[35];
+		    TR = FMA(TN, TO, TP * TQ);
+		    T3j = FNMS(TP, TO, TN * TQ);
+	       }
+	       {
+		    E TU, TW, TT, TV;
+		    TU = rio[WS(ios, 10)];
+		    TW = iio[-WS(ios, 21)];
+		    TT = W[18];
+		    TV = W[19];
+		    TX = FMA(TT, TU, TV * TW);
+		    T3o = FNMS(TV, TU, TT * TW);
+	       }
+	       {
+		    E TS, T13, T5K, T5L;
+		    TS = TM + TR;
+		    T13 = TX + T12;
+		    T14 = TS + T13;
+		    T5N = TS - T13;
+		    T5K = T3i + T3j;
+		    T5L = T3o + T3p;
+		    T5M = T5K - T5L;
+		    T6E = T5K + T5L;
+	       }
+	       {
+		    E T3k, T3l, T3n, T3q;
+		    T3k = T3i - T3j;
+		    T3l = TX - T12;
+		    T3m = T3k + T3l;
+		    T4Y = T3k - T3l;
+		    T3n = TM - TR;
+		    T3q = T3o - T3p;
+		    T3r = T3n - T3q;
+		    T4Z = T3n + T3q;
+	       }
+	  }
+	  {
+	       E T1V, T3R, T20, T3S, T3Q, T3T, T26, T3M, T2b, T3N, T3L, T3O;
+	       {
+		    E T1S, T1U, T1R, T1T;
+		    T1S = rio[WS(ios, 5)];
+		    T1U = iio[-WS(ios, 26)];
+		    T1R = W[8];
+		    T1T = W[9];
+		    T1V = FMA(T1R, T1S, T1T * T1U);
+		    T3R = FNMS(T1T, T1S, T1R * T1U);
+	       }
+	       {
+		    E T1X, T1Z, T1W, T1Y;
+		    T1X = rio[WS(ios, 21)];
+		    T1Z = iio[-WS(ios, 10)];
+		    T1W = W[40];
+		    T1Y = W[41];
+		    T20 = FMA(T1W, T1X, T1Y * T1Z);
+		    T3S = FNMS(T1Y, T1X, T1W * T1Z);
+	       }
+	       T3Q = T1V - T20;
+	       T3T = T3R - T3S;
+	       {
+		    E T23, T25, T22, T24;
+		    T23 = rio[WS(ios, 29)];
+		    T25 = iio[-WS(ios, 2)];
+		    T22 = W[56];
+		    T24 = W[57];
+		    T26 = FMA(T22, T23, T24 * T25);
+		    T3M = FNMS(T24, T23, T22 * T25);
+	       }
+	       {
+		    E T28, T2a, T27, T29;
+		    T28 = rio[WS(ios, 13)];
+		    T2a = iio[-WS(ios, 18)];
+		    T27 = W[24];
+		    T29 = W[25];
+		    T2b = FMA(T27, T28, T29 * T2a);
+		    T3N = FNMS(T29, T28, T27 * T2a);
+	       }
+	       T3L = T26 - T2b;
+	       T3O = T3M - T3N;
+	       {
+		    E T21, T2c, T62, T63;
+		    T21 = T1V + T20;
+		    T2c = T26 + T2b;
+		    T2d = T21 + T2c;
+		    T5Z = T2c - T21;
+		    T62 = T3R + T3S;
+		    T63 = T3M + T3N;
+		    T64 = T62 - T63;
+		    T6K = T62 + T63;
+	       }
+	       {
+		    E T3P, T3U, T42, T43;
+		    T3P = T3L - T3O;
+		    T3U = T3Q + T3T;
+		    T3V = KP707106781 * (T3P - T3U);
+		    T57 = KP707106781 * (T3U + T3P);
+		    T42 = T3T - T3Q;
+		    T43 = T3L + T3O;
+		    T44 = KP707106781 * (T42 - T43);
+		    T5a = KP707106781 * (T42 + T43);
+	       }
+	  }
+	  {
+	       E T2G, T4c, T2L, T4d, T4e, T4f, T2R, T4i, T2W, T4j, T4h, T4k;
+	       {
+		    E T2D, T2F, T2C, T2E;
+		    T2D = rio[WS(ios, 3)];
+		    T2F = iio[-WS(ios, 28)];
+		    T2C = W[4];
+		    T2E = W[5];
+		    T2G = FMA(T2C, T2D, T2E * T2F);
+		    T4c = FNMS(T2E, T2D, T2C * T2F);
+	       }
+	       {
+		    E T2I, T2K, T2H, T2J;
+		    T2I = rio[WS(ios, 19)];
+		    T2K = iio[-WS(ios, 12)];
+		    T2H = W[36];
+		    T2J = W[37];
+		    T2L = FMA(T2H, T2I, T2J * T2K);
+		    T4d = FNMS(T2J, T2I, T2H * T2K);
+	       }
+	       T4e = T4c - T4d;
+	       T4f = T2G - T2L;
+	       {
+		    E T2O, T2Q, T2N, T2P;
+		    T2O = rio[WS(ios, 27)];
+		    T2Q = iio[-WS(ios, 4)];
+		    T2N = W[52];
+		    T2P = W[53];
+		    T2R = FMA(T2N, T2O, T2P * T2Q);
+		    T4i = FNMS(T2P, T2O, T2N * T2Q);
+	       }
+	       {
+		    E T2T, T2V, T2S, T2U;
+		    T2T = rio[WS(ios, 11)];
+		    T2V = iio[-WS(ios, 20)];
+		    T2S = W[20];
+		    T2U = W[21];
+		    T2W = FMA(T2S, T2T, T2U * T2V);
+		    T4j = FNMS(T2U, T2T, T2S * T2V);
+	       }
+	       T4h = T2R - T2W;
+	       T4k = T4i - T4j;
+	       {
+		    E T2M, T2X, T68, T69;
+		    T2M = T2G + T2L;
+		    T2X = T2R + T2W;
+		    T2Y = T2M + T2X;
+		    T6f = T2X - T2M;
+		    T68 = T4c + T4d;
+		    T69 = T4i + T4j;
+		    T6a = T68 - T69;
+		    T6P = T68 + T69;
+	       }
+	       {
+		    E T4g, T4l, T4t, T4u;
+		    T4g = T4e - T4f;
+		    T4l = T4h + T4k;
+		    T4m = KP707106781 * (T4g - T4l);
+		    T5h = KP707106781 * (T4g + T4l);
+		    T4t = T4h - T4k;
+		    T4u = T4f + T4e;
+		    T4v = KP707106781 * (T4t - T4u);
+		    T5e = KP707106781 * (T4u + T4t);
+	       }
+	  }
+	  {
+	       E T1t, T6X, T7a, T7c, T30, T7b, T70, T71;
+	       {
+		    E TH, T1s, T72, T79;
+		    TH = Tj + TG;
+		    T1s = T14 + T1r;
+		    T1t = TH + T1s;
+		    T6X = TH - T1s;
+		    T72 = T6E + T6F;
+		    T79 = T73 + T78;
+		    T7a = T72 + T79;
+		    T7c = T79 - T72;
+	       }
+	       {
+		    E T2e, T2Z, T6Y, T6Z;
+		    T2e = T1Q + T2d;
+		    T2Z = T2B + T2Y;
+		    T30 = T2e + T2Z;
+		    T7b = T2Z - T2e;
+		    T6Y = T6J + T6K;
+		    T6Z = T6O + T6P;
+		    T70 = T6Y - T6Z;
+		    T71 = T6Y + T6Z;
+	       }
+	       iio[-WS(ios, 16)] = T1t - T30;
+	       rio[WS(ios, 16)] = T71 - T7a;
+	       rio[0] = T1t + T30;
+	       iio[0] = T71 + T7a;
+	       iio[-WS(ios, 24)] = T6X - T70;
+	       rio[WS(ios, 24)] = T7b - T7c;
+	       rio[WS(ios, 8)] = T6X + T70;
+	       iio[-WS(ios, 8)] = T7b + T7c;
+	  }
+	  {
+	       E T6H, T6T, T7g, T7i, T6M, T6U, T6R, T6V;
+	       {
+		    E T6D, T6G, T7e, T7f;
+		    T6D = Tj - TG;
+		    T6G = T6E - T6F;
+		    T6H = T6D + T6G;
+		    T6T = T6D - T6G;
+		    T7e = T1r - T14;
+		    T7f = T78 - T73;
+		    T7g = T7e + T7f;
+		    T7i = T7f - T7e;
+	       }
+	       {
+		    E T6I, T6L, T6N, T6Q;
+		    T6I = T1Q - T2d;
+		    T6L = T6J - T6K;
+		    T6M = T6I + T6L;
+		    T6U = T6L - T6I;
+		    T6N = T2B - T2Y;
+		    T6Q = T6O - T6P;
+		    T6R = T6N - T6Q;
+		    T6V = T6N + T6Q;
+	       }
+	       {
+		    E T6S, T7d, T6W, T7h;
+		    T6S = KP707106781 * (T6M + T6R);
+		    iio[-WS(ios, 20)] = T6H - T6S;
+		    rio[WS(ios, 4)] = T6H + T6S;
+		    T7d = KP707106781 * (T6U + T6V);
+		    rio[WS(ios, 20)] = T7d - T7g;
+		    iio[-WS(ios, 4)] = T7d + T7g;
+		    T6W = KP707106781 * (T6U - T6V);
+		    iio[-WS(ios, 28)] = T6T - T6W;
+		    rio[WS(ios, 12)] = T6T + T6W;
+		    T7h = KP707106781 * (T6R - T6M);
+		    rio[WS(ios, 28)] = T7h - T7i;
+		    iio[-WS(ios, 12)] = T7h + T7i;
+	       }
+	  }
+	  {
+	       E T5J, T7n, T7t, T6n, T5U, T7k, T6x, T6B, T6q, T7s, T66, T6k, T6u, T6A, T6h;
+	       E T6l;
+	       {
+		    E T5O, T5T, T60, T65;
+		    T5J = T5F - T5I;
+		    T7n = T7l + T7m;
+		    T7t = T7m - T7l;
+		    T6n = T5F + T5I;
+		    T5O = T5M - T5N;
+		    T5T = T5P + T5S;
+		    T5U = KP707106781 * (T5O - T5T);
+		    T7k = KP707106781 * (T5O + T5T);
+		    {
+			 E T6v, T6w, T6o, T6p;
+			 T6v = T67 + T6a;
+			 T6w = T6e + T6f;
+			 T6x = FNMS(KP382683432, T6w, KP923879532 * T6v);
+			 T6B = FMA(KP923879532, T6w, KP382683432 * T6v);
+			 T6o = T5N + T5M;
+			 T6p = T5P - T5S;
+			 T6q = KP707106781 * (T6o + T6p);
+			 T7s = KP707106781 * (T6p - T6o);
+		    }
+		    T60 = T5Y - T5Z;
+		    T65 = T61 - T64;
+		    T66 = FMA(KP923879532, T60, KP382683432 * T65);
+		    T6k = FNMS(KP923879532, T65, KP382683432 * T60);
+		    {
+			 E T6s, T6t, T6b, T6g;
+			 T6s = T5Y + T5Z;
+			 T6t = T61 + T64;
+			 T6u = FMA(KP382683432, T6s, KP923879532 * T6t);
+			 T6A = FNMS(KP382683432, T6t, KP923879532 * T6s);
+			 T6b = T67 - T6a;
+			 T6g = T6e - T6f;
+			 T6h = FNMS(KP923879532, T6g, KP382683432 * T6b);
+			 T6l = FMA(KP382683432, T6g, KP923879532 * T6b);
+		    }
+	       }
+	       {
+		    E T5V, T6i, T7r, T7u;
+		    T5V = T5J + T5U;
+		    T6i = T66 + T6h;
+		    iio[-WS(ios, 22)] = T5V - T6i;
+		    rio[WS(ios, 6)] = T5V + T6i;
+		    T7r = T6k + T6l;
+		    T7u = T7s + T7t;
+		    rio[WS(ios, 22)] = T7r - T7u;
+		    iio[-WS(ios, 6)] = T7r + T7u;
+	       }
+	       {
+		    E T6j, T6m, T7v, T7w;
+		    T6j = T5J - T5U;
+		    T6m = T6k - T6l;
+		    iio[-WS(ios, 30)] = T6j - T6m;
+		    rio[WS(ios, 14)] = T6j + T6m;
+		    T7v = T6h - T66;
+		    T7w = T7t - T7s;
+		    rio[WS(ios, 30)] = T7v - T7w;
+		    iio[-WS(ios, 14)] = T7v + T7w;
+	       }
+	       {
+		    E T6r, T6y, T7j, T7o;
+		    T6r = T6n + T6q;
+		    T6y = T6u + T6x;
+		    iio[-WS(ios, 18)] = T6r - T6y;
+		    rio[WS(ios, 2)] = T6r + T6y;
+		    T7j = T6A + T6B;
+		    T7o = T7k + T7n;
+		    rio[WS(ios, 18)] = T7j - T7o;
+		    iio[-WS(ios, 2)] = T7j + T7o;
+	       }
+	       {
+		    E T6z, T6C, T7p, T7q;
+		    T6z = T6n - T6q;
+		    T6C = T6A - T6B;
+		    iio[-WS(ios, 26)] = T6z - T6C;
+		    rio[WS(ios, 10)] = T6z + T6C;
+		    T7p = T6x - T6u;
+		    T7q = T7n - T7k;
+		    rio[WS(ios, 26)] = T7p - T7q;
+		    iio[-WS(ios, 10)] = T7p + T7q;
+	       }
+	  }
+	  {
+	       E T3h, T4D, T7R, T7X, T3E, T7O, T4N, T4R, T46, T4A, T4G, T7W, T4K, T4Q, T4x;
+	       E T4B, T3g, T7P;
+	       T3g = KP707106781 * (T3a - T3f);
+	       T3h = T35 - T3g;
+	       T4D = T35 + T3g;
+	       T7P = KP707106781 * (T4V - T4U);
+	       T7R = T7P + T7Q;
+	       T7X = T7Q - T7P;
+	       {
+		    E T3s, T3D, T4L, T4M;
+		    T3s = FNMS(KP923879532, T3r, KP382683432 * T3m);
+		    T3D = FMA(KP382683432, T3x, KP923879532 * T3C);
+		    T3E = T3s - T3D;
+		    T7O = T3s + T3D;
+		    T4L = T4b + T4m;
+		    T4M = T4s + T4v;
+		    T4N = FNMS(KP555570233, T4M, KP831469612 * T4L);
+		    T4R = FMA(KP831469612, T4M, KP555570233 * T4L);
+	       }
+	       {
+		    E T3W, T45, T4E, T4F;
+		    T3W = T3K - T3V;
+		    T45 = T41 - T44;
+		    T46 = FMA(KP980785280, T3W, KP195090322 * T45);
+		    T4A = FNMS(KP980785280, T45, KP195090322 * T3W);
+		    T4E = FMA(KP923879532, T3m, KP382683432 * T3r);
+		    T4F = FNMS(KP923879532, T3x, KP382683432 * T3C);
+		    T4G = T4E + T4F;
+		    T7W = T4F - T4E;
+	       }
+	       {
+		    E T4I, T4J, T4n, T4w;
+		    T4I = T3K + T3V;
+		    T4J = T41 + T44;
+		    T4K = FMA(KP555570233, T4I, KP831469612 * T4J);
+		    T4Q = FNMS(KP555570233, T4J, KP831469612 * T4I);
+		    T4n = T4b - T4m;
+		    T4w = T4s - T4v;
+		    T4x = FNMS(KP980785280, T4w, KP195090322 * T4n);
+		    T4B = FMA(KP195090322, T4w, KP980785280 * T4n);
+	       }
+	       {
+		    E T3F, T4y, T7V, T7Y;
+		    T3F = T3h + T3E;
+		    T4y = T46 + T4x;
+		    iio[-WS(ios, 23)] = T3F - T4y;
+		    rio[WS(ios, 7)] = T3F + T4y;
+		    T7V = T4A + T4B;
+		    T7Y = T7W + T7X;
+		    rio[WS(ios, 23)] = T7V - T7Y;
+		    iio[-WS(ios, 7)] = T7V + T7Y;
+	       }
+	       {
+		    E T4z, T4C, T7Z, T80;
+		    T4z = T3h - T3E;
+		    T4C = T4A - T4B;
+		    iio[-WS(ios, 31)] = T4z - T4C;
+		    rio[WS(ios, 15)] = T4z + T4C;
+		    T7Z = T4x - T46;
+		    T80 = T7X - T7W;
+		    rio[WS(ios, 31)] = T7Z - T80;
+		    iio[-WS(ios, 15)] = T7Z + T80;
+	       }
+	       {
+		    E T4H, T4O, T7N, T7S;
+		    T4H = T4D + T4G;
+		    T4O = T4K + T4N;
+		    iio[-WS(ios, 19)] = T4H - T4O;
+		    rio[WS(ios, 3)] = T4H + T4O;
+		    T7N = T4Q + T4R;
+		    T7S = T7O + T7R;
+		    rio[WS(ios, 19)] = T7N - T7S;
+		    iio[-WS(ios, 3)] = T7N + T7S;
+	       }
+	       {
+		    E T4P, T4S, T7T, T7U;
+		    T4P = T4D - T4G;
+		    T4S = T4Q - T4R;
+		    iio[-WS(ios, 27)] = T4P - T4S;
+		    rio[WS(ios, 11)] = T4P + T4S;
+		    T7T = T4N - T4K;
+		    T7U = T7R - T7O;
+		    rio[WS(ios, 27)] = T7T - T7U;
+		    iio[-WS(ios, 11)] = T7T + T7U;
+	       }
+	  }
+	  {
+	       E T4X, T5p, T7D, T7J, T54, T7y, T5z, T5D, T5c, T5m, T5s, T7I, T5w, T5C, T5j;
+	       E T5n, T4W, T7z;
+	       T4W = KP707106781 * (T4U + T4V);
+	       T4X = T4T - T4W;
+	       T5p = T4T + T4W;
+	       T7z = KP707106781 * (T3a + T3f);
+	       T7D = T7z + T7C;
+	       T7J = T7C - T7z;
+	       {
+		    E T50, T53, T5x, T5y;
+		    T50 = FNMS(KP382683432, T4Z, KP923879532 * T4Y);
+		    T53 = FMA(KP923879532, T51, KP382683432 * T52);
+		    T54 = T50 - T53;
+		    T7y = T50 + T53;
+		    T5x = T5d + T5e;
+		    T5y = T5g + T5h;
+		    T5z = FNMS(KP195090322, T5y, KP980785280 * T5x);
+		    T5D = FMA(KP195090322, T5x, KP980785280 * T5y);
+	       }
+	       {
+		    E T58, T5b, T5q, T5r;
+		    T58 = T56 - T57;
+		    T5b = T59 - T5a;
+		    T5c = FMA(KP555570233, T58, KP831469612 * T5b);
+		    T5m = FNMS(KP831469612, T58, KP555570233 * T5b);
+		    T5q = FMA(KP382683432, T4Y, KP923879532 * T4Z);
+		    T5r = FNMS(KP382683432, T51, KP923879532 * T52);
+		    T5s = T5q + T5r;
+		    T7I = T5r - T5q;
+	       }
+	       {
+		    E T5u, T5v, T5f, T5i;
+		    T5u = T56 + T57;
+		    T5v = T59 + T5a;
+		    T5w = FMA(KP980785280, T5u, KP195090322 * T5v);
+		    T5C = FNMS(KP195090322, T5u, KP980785280 * T5v);
+		    T5f = T5d - T5e;
+		    T5i = T5g - T5h;
+		    T5j = FNMS(KP831469612, T5i, KP555570233 * T5f);
+		    T5n = FMA(KP831469612, T5f, KP555570233 * T5i);
+	       }
+	       {
+		    E T55, T5k, T7H, T7K;
+		    T55 = T4X + T54;
+		    T5k = T5c + T5j;
+		    iio[-WS(ios, 21)] = T55 - T5k;
+		    rio[WS(ios, 5)] = T55 + T5k;
+		    T7H = T5m + T5n;
+		    T7K = T7I + T7J;
+		    rio[WS(ios, 21)] = T7H - T7K;
+		    iio[-WS(ios, 5)] = T7H + T7K;
+	       }
+	       {
+		    E T5l, T5o, T7L, T7M;
+		    T5l = T4X - T54;
+		    T5o = T5m - T5n;
+		    iio[-WS(ios, 29)] = T5l - T5o;
+		    rio[WS(ios, 13)] = T5l + T5o;
+		    T7L = T5j - T5c;
+		    T7M = T7J - T7I;
+		    rio[WS(ios, 29)] = T7L - T7M;
+		    iio[-WS(ios, 13)] = T7L + T7M;
+	       }
+	       {
+		    E T5t, T5A, T7x, T7E;
+		    T5t = T5p + T5s;
+		    T5A = T5w + T5z;
+		    iio[-WS(ios, 17)] = T5t - T5A;
+		    rio[WS(ios, 1)] = T5t + T5A;
+		    T7x = T5C + T5D;
+		    T7E = T7y + T7D;
+		    rio[WS(ios, 17)] = T7x - T7E;
+		    iio[-WS(ios, 1)] = T7x + T7E;
+	       }
+	       {
+		    E T5B, T5E, T7F, T7G;
+		    T5B = T5p - T5s;
+		    T5E = T5C - T5D;
+		    iio[-WS(ios, 25)] = T5B - T5E;
+		    rio[WS(ios, 9)] = T5B + T5E;
+		    T7F = T5z - T5w;
+		    T7G = T7D - T7y;
+		    rio[WS(ios, 25)] = T7F - T7G;
+		    iio[-WS(ios, 9)] = T7F + T7G;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 32},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 32, "hf_32", twinstr, {340, 114, 94, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_32) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_4.c b/src/fftw3/rdft/codelets/r2hc/hf_4.c
new file mode 100644
index 0000000..f4468dd
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_4.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:51 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 4 -dit -name hf_4 -include hf.h */
+
+/*
+ * This function contains 22 FP additions, 12 FP multiplications,
+ * (or, 16 additions, 6 multiplications, 6 fused multiply/add),
+ * 13 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_4(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 6) {
+	  E T1, Th, Tl, Tp, T6, To, Tc, Tk;
+	  T1 = rio[0];
+	  {
+	       E Te, Tg, Td, Tf;
+	       Te = rio[WS(ios, 3)];
+	       Tg = iio[0];
+	       Td = W[4];
+	       Tf = W[5];
+	       Th = FMA(Td, Te, Tf * Tg);
+	       Tl = FNMS(Tf, Te, Td * Tg);
+	  }
+	  Tp = iio[-WS(ios, 3)];
+	  {
+	       E T3, T5, T2, T4;
+	       T3 = rio[WS(ios, 2)];
+	       T5 = iio[-WS(ios, 1)];
+	       T2 = W[2];
+	       T4 = W[3];
+	       T6 = FMA(T2, T3, T4 * T5);
+	       To = FNMS(T4, T3, T2 * T5);
+	  }
+	  {
+	       E T9, Tb, T8, Ta;
+	       T9 = rio[WS(ios, 1)];
+	       Tb = iio[-WS(ios, 2)];
+	       T8 = W[0];
+	       Ta = W[1];
+	       Tc = FMA(T8, T9, Ta * Tb);
+	       Tk = FNMS(Ta, T9, T8 * Tb);
+	  }
+	  {
+	       E T7, Ti, Tn, Tq;
+	       T7 = T1 + T6;
+	       Ti = Tc + Th;
+	       iio[-WS(ios, 2)] = T7 - Ti;
+	       rio[0] = T7 + Ti;
+	       Tn = Tk + Tl;
+	       Tq = To + Tp;
+	       rio[WS(ios, 2)] = Tn - Tq;
+	       iio[0] = Tn + Tq;
+	  }
+	  {
+	       E Tj, Tm, Tr, Ts;
+	       Tj = T1 - T6;
+	       Tm = Tk - Tl;
+	       iio[-WS(ios, 3)] = Tj - Tm;
+	       rio[WS(ios, 1)] = Tj + Tm;
+	       Tr = Th - Tc;
+	       Ts = Tp - To;
+	       rio[WS(ios, 3)] = Tr - Ts;
+	       iio[-WS(ios, 1)] = Tr + Ts;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 4},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 4, "hf_4", twinstr, {16, 6, 6, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_4) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_5.c b/src/fftw3/rdft/codelets/r2hc/hf_5.c
new file mode 100644
index 0000000..2f02461
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_5.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:52 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 5 -dit -name hf_5 -include hf.h */
+
+/*
+ * This function contains 40 FP additions, 28 FP multiplications,
+ * (or, 26 additions, 14 multiplications, 14 fused multiply/add),
+ * 29 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_5(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8) {
+	  E T1, TE, Tu, Tx, TG, TF, TB, TC, TD, Tc, Tn, To;
+	  T1 = rio[0];
+	  TE = iio[-WS(ios, 4)];
+	  {
+	       E T6, Ts, Tm, Tw, Tb, Tt, Th, Tv;
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 1)];
+		    T5 = iio[-WS(ios, 3)];
+		    T2 = W[0];
+		    T4 = W[1];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    Ts = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E Tj, Tl, Ti, Tk;
+		    Tj = rio[WS(ios, 3)];
+		    Tl = iio[-WS(ios, 1)];
+		    Ti = W[4];
+		    Tk = W[5];
+		    Tm = FMA(Ti, Tj, Tk * Tl);
+		    Tw = FNMS(Tk, Tj, Ti * Tl);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = rio[WS(ios, 4)];
+		    Ta = iio[0];
+		    T7 = W[6];
+		    T9 = W[7];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    Tt = FNMS(T9, T8, T7 * Ta);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 2)];
+		    Tg = iio[-WS(ios, 2)];
+		    Td = W[2];
+		    Tf = W[3];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    Tv = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Tu = Ts - Tt;
+	       Tx = Tv - Tw;
+	       TG = Th - Tm;
+	       TF = Tb - T6;
+	       TB = Ts + Tt;
+	       TC = Tv + Tw;
+	       TD = TB + TC;
+	       Tc = T6 + Tb;
+	       Tn = Th + Tm;
+	       To = Tc + Tn;
+	  }
+	  rio[0] = T1 + To;
+	  iio[0] = TD + TE;
+	  {
+	       E Ty, TA, Tr, Tz, Tp, Tq;
+	       Ty = FMA(KP951056516, Tu, KP587785252 * Tx);
+	       TA = FNMS(KP587785252, Tu, KP951056516 * Tx);
+	       Tp = KP559016994 * (Tc - Tn);
+	       Tq = FNMS(KP250000000, To, T1);
+	       Tr = Tp + Tq;
+	       Tz = Tq - Tp;
+	       iio[-WS(ios, 4)] = Tr - Ty;
+	       iio[-WS(ios, 3)] = Tz + TA;
+	       rio[WS(ios, 1)] = Tr + Ty;
+	       rio[WS(ios, 2)] = Tz - TA;
+	  }
+	  {
+	       E TH, TL, TK, TM, TI, TJ;
+	       TH = FNMS(KP587785252, TG, KP951056516 * TF);
+	       TL = FMA(KP587785252, TF, KP951056516 * TG);
+	       TI = KP559016994 * (TB - TC);
+	       TJ = FNMS(KP250000000, TD, TE);
+	       TK = TI + TJ;
+	       TM = TJ - TI;
+	       rio[WS(ios, 4)] = TH - TK;
+	       iio[-WS(ios, 2)] = TL + TM;
+	       iio[-WS(ios, 1)] = TH + TK;
+	       rio[WS(ios, 3)] = TL - TM;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 5},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 5, "hf_5", twinstr, {26, 14, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_5) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_5, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_6.c b/src/fftw3/rdft/codelets/r2hc/hf_6.c
new file mode 100644
index 0000000..e4c9f8d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_6.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:54 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 6 -dit -name hf_6 -include hf.h */
+
+/*
+ * This function contains 46 FP additions, 28 FP multiplications,
+ * (or, 32 additions, 14 multiplications, 14 fused multiply/add),
+ * 23 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_6(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 10) {
+	  E T7, TS, Tv, TO, Ti, TI, Tw, TC, Tt, TJ, Tx, TF;
+	  {
+	       E T1, TN, T6, TM;
+	       T1 = rio[0];
+	       TN = iio[-WS(ios, 5)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 3)];
+		    T5 = iio[-WS(ios, 2)];
+		    T2 = W[4];
+		    T4 = W[5];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TM = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 - T6;
+	       TS = TN - TM;
+	       Tv = T1 + T6;
+	       TO = TM + TN;
+	  }
+	  {
+	       E Tc, TA, Th, TB;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = rio[WS(ios, 2)];
+		    Tb = iio[-WS(ios, 3)];
+		    T8 = W[2];
+		    Ta = W[3];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    TA = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 5)];
+		    Tg = iio[0];
+		    Td = W[8];
+		    Tf = W[9];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    TB = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Ti = Tc - Th;
+	       TI = TA - TB;
+	       Tw = Tc + Th;
+	       TC = TA + TB;
+	  }
+	  {
+	       E Tn, TD, Ts, TE;
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = rio[WS(ios, 4)];
+		    Tm = iio[-WS(ios, 1)];
+		    Tj = W[6];
+		    Tl = W[7];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    TD = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = rio[WS(ios, 1)];
+		    Tr = iio[-WS(ios, 4)];
+		    To = W[0];
+		    Tq = W[1];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    TE = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Tt = Tn - Ts;
+	       TJ = TE - TD;
+	       Tx = Tn + Ts;
+	       TF = TD + TE;
+	  }
+	  {
+	       E TK, Tu, TH, TT, TR, TU;
+	       TK = KP866025403 * (TI + TJ);
+	       Tu = Ti + Tt;
+	       TH = FNMS(KP500000000, Tu, T7);
+	       iio[-WS(ios, 3)] = T7 + Tu;
+	       rio[WS(ios, 1)] = TH + TK;
+	       iio[-WS(ios, 5)] = TH - TK;
+	       TT = KP866025403 * (Tt - Ti);
+	       TR = TJ - TI;
+	       TU = FMA(KP500000000, TR, TS);
+	       rio[WS(ios, 3)] = TR - TS;
+	       iio[-WS(ios, 1)] = TT + TU;
+	       rio[WS(ios, 5)] = TT - TU;
+	  }
+	  {
+	       E TG, Ty, Tz, TP, TL, TQ;
+	       TG = KP866025403 * (TC - TF);
+	       Ty = Tw + Tx;
+	       Tz = FNMS(KP500000000, Ty, Tv);
+	       rio[0] = Tv + Ty;
+	       iio[-WS(ios, 4)] = Tz + TG;
+	       rio[WS(ios, 2)] = Tz - TG;
+	       TP = KP866025403 * (Tw - Tx);
+	       TL = TC + TF;
+	       TQ = FNMS(KP500000000, TL, TO);
+	       iio[0] = TL + TO;
+	       iio[-WS(ios, 2)] = TP + TQ;
+	       rio[WS(ios, 4)] = TP - TQ;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 6},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 6, "hf_6", twinstr, {32, 14, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_6) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_6, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_64.c b/src/fftw3/rdft/codelets/r2hc/hf_64.c
new file mode 100644
index 0000000..3e99d63
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_64.c
@@ -0,0 +1,2001 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:11 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 64 -dit -name hf_64 -include hf.h */
+
+/*
+ * This function contains 1038 FP additions, 500 FP multiplications,
+ * (or, 808 additions, 270 multiplications, 230 fused multiply/add),
+ * 176 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_64(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126) {
+	  E Tj, TcL, ThT, Tin, T6b, Taz, TgT, Thn, TG, Thm, TcO, TgO, T6m, ThQ, TaC;
+	  E Tim, T14, Tfq, T6y, T9O, TaG, Tc0, TcU, TeE, T1r, Tfr, T6J, T9P, TaJ, Tc1;
+	  E TcZ, TeF, T1Q, T2d, Tfx, Tfu, Tfv, Tfw, T6Q, TaM, Tdb, TeJ, T71, TaQ, T7a;
+	  E TaN, Td6, TeI, T77, TaP, T2B, T2Y, Tfz, TfA, TfB, TfC, T7h, TaW, Tdm, TeM;
+	  E T7s, TaU, T7B, TaX, Tdh, TeL, T7y, TaT, T3M, TfL, TdL, TeQ, TfI, Tgt, T7K;
+	  E Tb2, T7V, Tbe, T8s, Tb3, Tdu, TeT, T8p, Tbd, T5j, TfR, Tec, Tf0, TfY, Tgy;
+	  E T8D, Tbl, T8O, Tbx, T9l, Tbm, TdV, TeX, T9i, Tbw, T64, TfZ, Te5, Ted, TfU;
+	  E Tgz, T90, T9o, T9b, T9n, Tbt, Tbz, Te0, Tee, Tbq, TbA, T4x, TfJ, TdE, TdM;
+	  E TfO, Tgu, T87, T8v, T8i, T8u, Tba, Tbg, Tdz, TdN, Tb7, Tbh;
+	  {
+	       E T1, TgR, T6, TgQ, Tc, T68, Th, T69;
+	       T1 = rio[0];
+	       TgR = iio[-WS(ios, 63)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 32)];
+		    T5 = iio[-WS(ios, 31)];
+		    T2 = W[62];
+		    T4 = W[63];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TgQ = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = rio[WS(ios, 16)];
+		    Tb = iio[-WS(ios, 47)];
+		    T8 = W[30];
+		    Ta = W[31];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    T68 = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 48)];
+		    Tg = iio[-WS(ios, 15)];
+		    Td = W[94];
+		    Tf = W[95];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    T69 = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E T7, Ti, ThR, ThS;
+		    T7 = T1 + T6;
+		    Ti = Tc + Th;
+		    Tj = T7 + Ti;
+		    TcL = T7 - Ti;
+		    ThR = TgR - TgQ;
+		    ThS = Tc - Th;
+		    ThT = ThR - ThS;
+		    Tin = ThS + ThR;
+	       }
+	       {
+		    E T67, T6a, TgP, TgS;
+		    T67 = T1 - T6;
+		    T6a = T68 - T69;
+		    T6b = T67 - T6a;
+		    Taz = T67 + T6a;
+		    TgP = T68 + T69;
+		    TgS = TgQ + TgR;
+		    TgT = TgP + TgS;
+		    Thn = TgS - TgP;
+	       }
+	  }
+	  {
+	       E To, T6c, Tt, T6d, T6e, T6f, Tz, T6i, TE, T6j, T6h, T6k;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = rio[WS(ios, 8)];
+		    Tn = iio[-WS(ios, 55)];
+		    Tk = W[14];
+		    Tm = W[15];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    T6c = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = rio[WS(ios, 40)];
+		    Ts = iio[-WS(ios, 23)];
+		    Tp = W[78];
+		    Tr = W[79];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    T6d = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       T6e = T6c - T6d;
+	       T6f = To - Tt;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 56)];
+		    Ty = iio[-WS(ios, 7)];
+		    Tv = W[110];
+		    Tx = W[111];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T6i = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 24)];
+		    TD = iio[-WS(ios, 39)];
+		    TA = W[46];
+		    TC = W[47];
+		    TE = FMA(TA, TB, TC * TD);
+		    T6j = FNMS(TC, TB, TA * TD);
+	       }
+	       T6h = Tz - TE;
+	       T6k = T6i - T6j;
+	       {
+		    E Tu, TF, TcM, TcN;
+		    Tu = To + Tt;
+		    TF = Tz + TE;
+		    TG = Tu + TF;
+		    Thm = TF - Tu;
+		    TcM = T6c + T6d;
+		    TcN = T6i + T6j;
+		    TcO = TcM - TcN;
+		    TgO = TcM + TcN;
+	       }
+	       {
+		    E T6g, T6l, TaA, TaB;
+		    T6g = T6e - T6f;
+		    T6l = T6h + T6k;
+		    T6m = KP707106781 * (T6g - T6l);
+		    ThQ = KP707106781 * (T6g + T6l);
+		    TaA = T6f + T6e;
+		    TaB = T6h - T6k;
+		    TaC = KP707106781 * (TaA + TaB);
+		    Tim = KP707106781 * (TaB - TaA);
+	       }
+	  }
+	  {
+	       E TS, TcQ, T6q, T6t, T13, TcR, T6r, T6w, T6s, T6x;
+	       {
+		    E TM, T6o, TR, T6p;
+		    {
+			 E TJ, TL, TI, TK;
+			 TJ = rio[WS(ios, 4)];
+			 TL = iio[-WS(ios, 59)];
+			 TI = W[6];
+			 TK = W[7];
+			 TM = FMA(TI, TJ, TK * TL);
+			 T6o = FNMS(TK, TJ, TI * TL);
+		    }
+		    {
+			 E TO, TQ, TN, TP;
+			 TO = rio[WS(ios, 36)];
+			 TQ = iio[-WS(ios, 27)];
+			 TN = W[70];
+			 TP = W[71];
+			 TR = FMA(TN, TO, TP * TQ);
+			 T6p = FNMS(TP, TO, TN * TQ);
+		    }
+		    TS = TM + TR;
+		    TcQ = T6o + T6p;
+		    T6q = T6o - T6p;
+		    T6t = TM - TR;
+	       }
+	       {
+		    E TX, T6u, T12, T6v;
+		    {
+			 E TU, TW, TT, TV;
+			 TU = rio[WS(ios, 20)];
+			 TW = iio[-WS(ios, 43)];
+			 TT = W[38];
+			 TV = W[39];
+			 TX = FMA(TT, TU, TV * TW);
+			 T6u = FNMS(TV, TU, TT * TW);
+		    }
+		    {
+			 E TZ, T11, TY, T10;
+			 TZ = rio[WS(ios, 52)];
+			 T11 = iio[-WS(ios, 11)];
+			 TY = W[102];
+			 T10 = W[103];
+			 T12 = FMA(TY, TZ, T10 * T11);
+			 T6v = FNMS(T10, TZ, TY * T11);
+		    }
+		    T13 = TX + T12;
+		    TcR = T6u + T6v;
+		    T6r = TX - T12;
+		    T6w = T6u - T6v;
+	       }
+	       T14 = TS + T13;
+	       Tfq = TcQ + TcR;
+	       T6s = T6q + T6r;
+	       T6x = T6t - T6w;
+	       T6y = FNMS(KP923879532, T6x, KP382683432 * T6s);
+	       T9O = FMA(KP923879532, T6s, KP382683432 * T6x);
+	       {
+		    E TaE, TaF, TcS, TcT;
+		    TaE = T6q - T6r;
+		    TaF = T6t + T6w;
+		    TaG = FNMS(KP382683432, TaF, KP923879532 * TaE);
+		    Tc0 = FMA(KP382683432, TaE, KP923879532 * TaF);
+		    TcS = TcQ - TcR;
+		    TcT = TS - T13;
+		    TcU = TcS - TcT;
+		    TeE = TcT + TcS;
+	       }
+	  }
+	  {
+	       E T1f, TcW, T6B, T6E, T1q, TcX, T6C, T6H, T6D, T6I;
+	       {
+		    E T19, T6z, T1e, T6A;
+		    {
+			 E T16, T18, T15, T17;
+			 T16 = rio[WS(ios, 60)];
+			 T18 = iio[-WS(ios, 3)];
+			 T15 = W[118];
+			 T17 = W[119];
+			 T19 = FMA(T15, T16, T17 * T18);
+			 T6z = FNMS(T17, T16, T15 * T18);
+		    }
+		    {
+			 E T1b, T1d, T1a, T1c;
+			 T1b = rio[WS(ios, 28)];
+			 T1d = iio[-WS(ios, 35)];
+			 T1a = W[54];
+			 T1c = W[55];
+			 T1e = FMA(T1a, T1b, T1c * T1d);
+			 T6A = FNMS(T1c, T1b, T1a * T1d);
+		    }
+		    T1f = T19 + T1e;
+		    TcW = T6z + T6A;
+		    T6B = T6z - T6A;
+		    T6E = T19 - T1e;
+	       }
+	       {
+		    E T1k, T6F, T1p, T6G;
+		    {
+			 E T1h, T1j, T1g, T1i;
+			 T1h = rio[WS(ios, 12)];
+			 T1j = iio[-WS(ios, 51)];
+			 T1g = W[22];
+			 T1i = W[23];
+			 T1k = FMA(T1g, T1h, T1i * T1j);
+			 T6F = FNMS(T1i, T1h, T1g * T1j);
+		    }
+		    {
+			 E T1m, T1o, T1l, T1n;
+			 T1m = rio[WS(ios, 44)];
+			 T1o = iio[-WS(ios, 19)];
+			 T1l = W[86];
+			 T1n = W[87];
+			 T1p = FMA(T1l, T1m, T1n * T1o);
+			 T6G = FNMS(T1n, T1m, T1l * T1o);
+		    }
+		    T1q = T1k + T1p;
+		    TcX = T6F + T6G;
+		    T6C = T1k - T1p;
+		    T6H = T6F - T6G;
+	       }
+	       T1r = T1f + T1q;
+	       Tfr = TcW + TcX;
+	       T6D = T6B + T6C;
+	       T6I = T6E - T6H;
+	       T6J = FMA(KP382683432, T6D, KP923879532 * T6I);
+	       T9P = FNMS(KP923879532, T6D, KP382683432 * T6I);
+	       {
+		    E TaH, TaI, TcV, TcY;
+		    TaH = T6B - T6C;
+		    TaI = T6E + T6H;
+		    TaJ = FMA(KP923879532, TaH, KP382683432 * TaI);
+		    Tc1 = FNMS(KP382683432, TaH, KP923879532 * TaI);
+		    TcV = T1f - T1q;
+		    TcY = TcW - TcX;
+		    TcZ = TcV + TcY;
+		    TeF = TcV - TcY;
+	       }
+	  }
+	  {
+	       E T1y, T6M, T1D, T6N, T1E, Td2, T1J, T74, T1O, T75, T1P, Td3, T21, Td8, T6W;
+	       E T6Z, T2c, Td9, T6R, T6U;
+	       {
+		    E T1v, T1x, T1u, T1w;
+		    T1v = rio[WS(ios, 2)];
+		    T1x = iio[-WS(ios, 61)];
+		    T1u = W[2];
+		    T1w = W[3];
+		    T1y = FMA(T1u, T1v, T1w * T1x);
+		    T6M = FNMS(T1w, T1v, T1u * T1x);
+	       }
+	       {
+		    E T1A, T1C, T1z, T1B;
+		    T1A = rio[WS(ios, 34)];
+		    T1C = iio[-WS(ios, 29)];
+		    T1z = W[66];
+		    T1B = W[67];
+		    T1D = FMA(T1z, T1A, T1B * T1C);
+		    T6N = FNMS(T1B, T1A, T1z * T1C);
+	       }
+	       T1E = T1y + T1D;
+	       Td2 = T6M + T6N;
+	       {
+		    E T1G, T1I, T1F, T1H;
+		    T1G = rio[WS(ios, 18)];
+		    T1I = iio[-WS(ios, 45)];
+		    T1F = W[34];
+		    T1H = W[35];
+		    T1J = FMA(T1F, T1G, T1H * T1I);
+		    T74 = FNMS(T1H, T1G, T1F * T1I);
+	       }
+	       {
+		    E T1L, T1N, T1K, T1M;
+		    T1L = rio[WS(ios, 50)];
+		    T1N = iio[-WS(ios, 13)];
+		    T1K = W[98];
+		    T1M = W[99];
+		    T1O = FMA(T1K, T1L, T1M * T1N);
+		    T75 = FNMS(T1M, T1L, T1K * T1N);
+	       }
+	       T1P = T1J + T1O;
+	       Td3 = T74 + T75;
+	       {
+		    E T1V, T6X, T20, T6Y;
+		    {
+			 E T1S, T1U, T1R, T1T;
+			 T1S = rio[WS(ios, 10)];
+			 T1U = iio[-WS(ios, 53)];
+			 T1R = W[18];
+			 T1T = W[19];
+			 T1V = FMA(T1R, T1S, T1T * T1U);
+			 T6X = FNMS(T1T, T1S, T1R * T1U);
+		    }
+		    {
+			 E T1X, T1Z, T1W, T1Y;
+			 T1X = rio[WS(ios, 42)];
+			 T1Z = iio[-WS(ios, 21)];
+			 T1W = W[82];
+			 T1Y = W[83];
+			 T20 = FMA(T1W, T1X, T1Y * T1Z);
+			 T6Y = FNMS(T1Y, T1X, T1W * T1Z);
+		    }
+		    T21 = T1V + T20;
+		    Td8 = T6X + T6Y;
+		    T6W = T1V - T20;
+		    T6Z = T6X - T6Y;
+	       }
+	       {
+		    E T26, T6S, T2b, T6T;
+		    {
+			 E T23, T25, T22, T24;
+			 T23 = rio[WS(ios, 58)];
+			 T25 = iio[-WS(ios, 5)];
+			 T22 = W[114];
+			 T24 = W[115];
+			 T26 = FMA(T22, T23, T24 * T25);
+			 T6S = FNMS(T24, T23, T22 * T25);
+		    }
+		    {
+			 E T28, T2a, T27, T29;
+			 T28 = rio[WS(ios, 26)];
+			 T2a = iio[-WS(ios, 37)];
+			 T27 = W[50];
+			 T29 = W[51];
+			 T2b = FMA(T27, T28, T29 * T2a);
+			 T6T = FNMS(T29, T28, T27 * T2a);
+		    }
+		    T2c = T26 + T2b;
+		    Td9 = T6S + T6T;
+		    T6R = T26 - T2b;
+		    T6U = T6S - T6T;
+	       }
+	       T1Q = T1E + T1P;
+	       T2d = T21 + T2c;
+	       Tfx = T1Q - T2d;
+	       Tfu = Td2 + Td3;
+	       Tfv = Td8 + Td9;
+	       Tfw = Tfu - Tfv;
+	       {
+		    E T6O, T6P, Td7, Tda;
+		    T6O = T6M - T6N;
+		    T6P = T1J - T1O;
+		    T6Q = T6O + T6P;
+		    TaM = T6O - T6P;
+		    Td7 = T1E - T1P;
+		    Tda = Td8 - Td9;
+		    Tdb = Td7 - Tda;
+		    TeJ = Td7 + Tda;
+	       }
+	       {
+		    E T6V, T70, T78, T79;
+		    T6V = T6R - T6U;
+		    T70 = T6W + T6Z;
+		    T71 = KP707106781 * (T6V - T70);
+		    TaQ = KP707106781 * (T70 + T6V);
+		    T78 = T6Z - T6W;
+		    T79 = T6R + T6U;
+		    T7a = KP707106781 * (T78 - T79);
+		    TaN = KP707106781 * (T78 + T79);
+	       }
+	       {
+		    E Td4, Td5, T73, T76;
+		    Td4 = Td2 - Td3;
+		    Td5 = T2c - T21;
+		    Td6 = Td4 - Td5;
+		    TeI = Td4 + Td5;
+		    T73 = T1y - T1D;
+		    T76 = T74 - T75;
+		    T77 = T73 - T76;
+		    TaP = T73 + T76;
+	       }
+	  }
+	  {
+	       E T2j, T7d, T2o, T7e, T2p, Tdd, T2u, T7v, T2z, T7w, T2A, Tde, T2M, Tdj, T7n;
+	       E T7q, T2X, Tdk, T7i, T7l;
+	       {
+		    E T2g, T2i, T2f, T2h;
+		    T2g = rio[WS(ios, 62)];
+		    T2i = iio[-WS(ios, 1)];
+		    T2f = W[122];
+		    T2h = W[123];
+		    T2j = FMA(T2f, T2g, T2h * T2i);
+		    T7d = FNMS(T2h, T2g, T2f * T2i);
+	       }
+	       {
+		    E T2l, T2n, T2k, T2m;
+		    T2l = rio[WS(ios, 30)];
+		    T2n = iio[-WS(ios, 33)];
+		    T2k = W[58];
+		    T2m = W[59];
+		    T2o = FMA(T2k, T2l, T2m * T2n);
+		    T7e = FNMS(T2m, T2l, T2k * T2n);
+	       }
+	       T2p = T2j + T2o;
+	       Tdd = T7d + T7e;
+	       {
+		    E T2r, T2t, T2q, T2s;
+		    T2r = rio[WS(ios, 14)];
+		    T2t = iio[-WS(ios, 49)];
+		    T2q = W[26];
+		    T2s = W[27];
+		    T2u = FMA(T2q, T2r, T2s * T2t);
+		    T7v = FNMS(T2s, T2r, T2q * T2t);
+	       }
+	       {
+		    E T2w, T2y, T2v, T2x;
+		    T2w = rio[WS(ios, 46)];
+		    T2y = iio[-WS(ios, 17)];
+		    T2v = W[90];
+		    T2x = W[91];
+		    T2z = FMA(T2v, T2w, T2x * T2y);
+		    T7w = FNMS(T2x, T2w, T2v * T2y);
+	       }
+	       T2A = T2u + T2z;
+	       Tde = T7v + T7w;
+	       {
+		    E T2G, T7o, T2L, T7p;
+		    {
+			 E T2D, T2F, T2C, T2E;
+			 T2D = rio[WS(ios, 6)];
+			 T2F = iio[-WS(ios, 57)];
+			 T2C = W[10];
+			 T2E = W[11];
+			 T2G = FMA(T2C, T2D, T2E * T2F);
+			 T7o = FNMS(T2E, T2D, T2C * T2F);
+		    }
+		    {
+			 E T2I, T2K, T2H, T2J;
+			 T2I = rio[WS(ios, 38)];
+			 T2K = iio[-WS(ios, 25)];
+			 T2H = W[74];
+			 T2J = W[75];
+			 T2L = FMA(T2H, T2I, T2J * T2K);
+			 T7p = FNMS(T2J, T2I, T2H * T2K);
+		    }
+		    T2M = T2G + T2L;
+		    Tdj = T7o + T7p;
+		    T7n = T2G - T2L;
+		    T7q = T7o - T7p;
+	       }
+	       {
+		    E T2R, T7j, T2W, T7k;
+		    {
+			 E T2O, T2Q, T2N, T2P;
+			 T2O = rio[WS(ios, 54)];
+			 T2Q = iio[-WS(ios, 9)];
+			 T2N = W[106];
+			 T2P = W[107];
+			 T2R = FMA(T2N, T2O, T2P * T2Q);
+			 T7j = FNMS(T2P, T2O, T2N * T2Q);
+		    }
+		    {
+			 E T2T, T2V, T2S, T2U;
+			 T2T = rio[WS(ios, 22)];
+			 T2V = iio[-WS(ios, 41)];
+			 T2S = W[42];
+			 T2U = W[43];
+			 T2W = FMA(T2S, T2T, T2U * T2V);
+			 T7k = FNMS(T2U, T2T, T2S * T2V);
+		    }
+		    T2X = T2R + T2W;
+		    Tdk = T7j + T7k;
+		    T7i = T2R - T2W;
+		    T7l = T7j - T7k;
+	       }
+	       T2B = T2p + T2A;
+	       T2Y = T2M + T2X;
+	       Tfz = T2B - T2Y;
+	       TfA = Tdd + Tde;
+	       TfB = Tdj + Tdk;
+	       TfC = TfA - TfB;
+	       {
+		    E T7f, T7g, Tdi, Tdl;
+		    T7f = T7d - T7e;
+		    T7g = T2u - T2z;
+		    T7h = T7f + T7g;
+		    TaW = T7f - T7g;
+		    Tdi = T2p - T2A;
+		    Tdl = Tdj - Tdk;
+		    Tdm = Tdi - Tdl;
+		    TeM = Tdi + Tdl;
+	       }
+	       {
+		    E T7m, T7r, T7z, T7A;
+		    T7m = T7i - T7l;
+		    T7r = T7n + T7q;
+		    T7s = KP707106781 * (T7m - T7r);
+		    TaU = KP707106781 * (T7r + T7m);
+		    T7z = T7q - T7n;
+		    T7A = T7i + T7l;
+		    T7B = KP707106781 * (T7z - T7A);
+		    TaX = KP707106781 * (T7z + T7A);
+	       }
+	       {
+		    E Tdf, Tdg, T7u, T7x;
+		    Tdf = Tdd - Tde;
+		    Tdg = T2X - T2M;
+		    Tdh = Tdf - Tdg;
+		    TeL = Tdf + Tdg;
+		    T7u = T2j - T2o;
+		    T7x = T7v - T7w;
+		    T7y = T7u - T7x;
+		    TaT = T7u + T7x;
+	       }
+	  }
+	  {
+	       E T36, T7G, T3b, T7H, T3c, Tdq, T3h, T8m, T3m, T8n, T3n, Tdr, T3z, TdI, T7Q;
+	       E T7T, T3K, TdJ, T7L, T7O;
+	       {
+		    E T33, T35, T32, T34;
+		    T33 = rio[WS(ios, 1)];
+		    T35 = iio[-WS(ios, 62)];
+		    T32 = W[0];
+		    T34 = W[1];
+		    T36 = FMA(T32, T33, T34 * T35);
+		    T7G = FNMS(T34, T33, T32 * T35);
+	       }
+	       {
+		    E T38, T3a, T37, T39;
+		    T38 = rio[WS(ios, 33)];
+		    T3a = iio[-WS(ios, 30)];
+		    T37 = W[64];
+		    T39 = W[65];
+		    T3b = FMA(T37, T38, T39 * T3a);
+		    T7H = FNMS(T39, T38, T37 * T3a);
+	       }
+	       T3c = T36 + T3b;
+	       Tdq = T7G + T7H;
+	       {
+		    E T3e, T3g, T3d, T3f;
+		    T3e = rio[WS(ios, 17)];
+		    T3g = iio[-WS(ios, 46)];
+		    T3d = W[32];
+		    T3f = W[33];
+		    T3h = FMA(T3d, T3e, T3f * T3g);
+		    T8m = FNMS(T3f, T3e, T3d * T3g);
+	       }
+	       {
+		    E T3j, T3l, T3i, T3k;
+		    T3j = rio[WS(ios, 49)];
+		    T3l = iio[-WS(ios, 14)];
+		    T3i = W[96];
+		    T3k = W[97];
+		    T3m = FMA(T3i, T3j, T3k * T3l);
+		    T8n = FNMS(T3k, T3j, T3i * T3l);
+	       }
+	       T3n = T3h + T3m;
+	       Tdr = T8m + T8n;
+	       {
+		    E T3t, T7R, T3y, T7S;
+		    {
+			 E T3q, T3s, T3p, T3r;
+			 T3q = rio[WS(ios, 9)];
+			 T3s = iio[-WS(ios, 54)];
+			 T3p = W[16];
+			 T3r = W[17];
+			 T3t = FMA(T3p, T3q, T3r * T3s);
+			 T7R = FNMS(T3r, T3q, T3p * T3s);
+		    }
+		    {
+			 E T3v, T3x, T3u, T3w;
+			 T3v = rio[WS(ios, 41)];
+			 T3x = iio[-WS(ios, 22)];
+			 T3u = W[80];
+			 T3w = W[81];
+			 T3y = FMA(T3u, T3v, T3w * T3x);
+			 T7S = FNMS(T3w, T3v, T3u * T3x);
+		    }
+		    T3z = T3t + T3y;
+		    TdI = T7R + T7S;
+		    T7Q = T3t - T3y;
+		    T7T = T7R - T7S;
+	       }
+	       {
+		    E T3E, T7M, T3J, T7N;
+		    {
+			 E T3B, T3D, T3A, T3C;
+			 T3B = rio[WS(ios, 57)];
+			 T3D = iio[-WS(ios, 6)];
+			 T3A = W[112];
+			 T3C = W[113];
+			 T3E = FMA(T3A, T3B, T3C * T3D);
+			 T7M = FNMS(T3C, T3B, T3A * T3D);
+		    }
+		    {
+			 E T3G, T3I, T3F, T3H;
+			 T3G = rio[WS(ios, 25)];
+			 T3I = iio[-WS(ios, 38)];
+			 T3F = W[48];
+			 T3H = W[49];
+			 T3J = FMA(T3F, T3G, T3H * T3I);
+			 T7N = FNMS(T3H, T3G, T3F * T3I);
+		    }
+		    T3K = T3E + T3J;
+		    TdJ = T7M + T7N;
+		    T7L = T3E - T3J;
+		    T7O = T7M - T7N;
+	       }
+	       {
+		    E T3o, T3L, TdH, TdK;
+		    T3o = T3c + T3n;
+		    T3L = T3z + T3K;
+		    T3M = T3o + T3L;
+		    TfL = T3o - T3L;
+		    TdH = T3c - T3n;
+		    TdK = TdI - TdJ;
+		    TdL = TdH - TdK;
+		    TeQ = TdH + TdK;
+	       }
+	       {
+		    E TfG, TfH, T7I, T7J;
+		    TfG = Tdq + Tdr;
+		    TfH = TdI + TdJ;
+		    TfI = TfG - TfH;
+		    Tgt = TfG + TfH;
+		    T7I = T7G - T7H;
+		    T7J = T3h - T3m;
+		    T7K = T7I + T7J;
+		    Tb2 = T7I - T7J;
+	       }
+	       {
+		    E T7P, T7U, T8q, T8r;
+		    T7P = T7L - T7O;
+		    T7U = T7Q + T7T;
+		    T7V = KP707106781 * (T7P - T7U);
+		    Tbe = KP707106781 * (T7U + T7P);
+		    T8q = T7T - T7Q;
+		    T8r = T7L + T7O;
+		    T8s = KP707106781 * (T8q - T8r);
+		    Tb3 = KP707106781 * (T8q + T8r);
+	       }
+	       {
+		    E Tds, Tdt, T8l, T8o;
+		    Tds = Tdq - Tdr;
+		    Tdt = T3K - T3z;
+		    Tdu = Tds - Tdt;
+		    TeT = Tds + Tdt;
+		    T8l = T36 - T3b;
+		    T8o = T8m - T8n;
+		    T8p = T8l - T8o;
+		    Tbd = T8l + T8o;
+	       }
+	  }
+	  {
+	       E T4D, T9e, T4I, T9f, T4J, Te8, T4O, T8A, T4T, T8B, T4U, Te9, T56, TdS, T8G;
+	       E T8H, T5h, TdT, T8J, T8M;
+	       {
+		    E T4A, T4C, T4z, T4B;
+		    T4A = rio[WS(ios, 63)];
+		    T4C = iio[0];
+		    T4z = W[124];
+		    T4B = W[125];
+		    T4D = FMA(T4z, T4A, T4B * T4C);
+		    T9e = FNMS(T4B, T4A, T4z * T4C);
+	       }
+	       {
+		    E T4F, T4H, T4E, T4G;
+		    T4F = rio[WS(ios, 31)];
+		    T4H = iio[-WS(ios, 32)];
+		    T4E = W[60];
+		    T4G = W[61];
+		    T4I = FMA(T4E, T4F, T4G * T4H);
+		    T9f = FNMS(T4G, T4F, T4E * T4H);
+	       }
+	       T4J = T4D + T4I;
+	       Te8 = T9e + T9f;
+	       {
+		    E T4L, T4N, T4K, T4M;
+		    T4L = rio[WS(ios, 15)];
+		    T4N = iio[-WS(ios, 48)];
+		    T4K = W[28];
+		    T4M = W[29];
+		    T4O = FMA(T4K, T4L, T4M * T4N);
+		    T8A = FNMS(T4M, T4L, T4K * T4N);
+	       }
+	       {
+		    E T4Q, T4S, T4P, T4R;
+		    T4Q = rio[WS(ios, 47)];
+		    T4S = iio[-WS(ios, 16)];
+		    T4P = W[92];
+		    T4R = W[93];
+		    T4T = FMA(T4P, T4Q, T4R * T4S);
+		    T8B = FNMS(T4R, T4Q, T4P * T4S);
+	       }
+	       T4U = T4O + T4T;
+	       Te9 = T8A + T8B;
+	       {
+		    E T50, T8E, T55, T8F;
+		    {
+			 E T4X, T4Z, T4W, T4Y;
+			 T4X = rio[WS(ios, 7)];
+			 T4Z = iio[-WS(ios, 56)];
+			 T4W = W[12];
+			 T4Y = W[13];
+			 T50 = FMA(T4W, T4X, T4Y * T4Z);
+			 T8E = FNMS(T4Y, T4X, T4W * T4Z);
+		    }
+		    {
+			 E T52, T54, T51, T53;
+			 T52 = rio[WS(ios, 39)];
+			 T54 = iio[-WS(ios, 24)];
+			 T51 = W[76];
+			 T53 = W[77];
+			 T55 = FMA(T51, T52, T53 * T54);
+			 T8F = FNMS(T53, T52, T51 * T54);
+		    }
+		    T56 = T50 + T55;
+		    TdS = T8E + T8F;
+		    T8G = T8E - T8F;
+		    T8H = T50 - T55;
+	       }
+	       {
+		    E T5b, T8K, T5g, T8L;
+		    {
+			 E T58, T5a, T57, T59;
+			 T58 = rio[WS(ios, 55)];
+			 T5a = iio[-WS(ios, 8)];
+			 T57 = W[108];
+			 T59 = W[109];
+			 T5b = FMA(T57, T58, T59 * T5a);
+			 T8K = FNMS(T59, T58, T57 * T5a);
+		    }
+		    {
+			 E T5d, T5f, T5c, T5e;
+			 T5d = rio[WS(ios, 23)];
+			 T5f = iio[-WS(ios, 40)];
+			 T5c = W[44];
+			 T5e = W[45];
+			 T5g = FMA(T5c, T5d, T5e * T5f);
+			 T8L = FNMS(T5e, T5d, T5c * T5f);
+		    }
+		    T5h = T5b + T5g;
+		    TdT = T8K + T8L;
+		    T8J = T5b - T5g;
+		    T8M = T8K - T8L;
+	       }
+	       {
+		    E T4V, T5i, Tea, Teb;
+		    T4V = T4J + T4U;
+		    T5i = T56 + T5h;
+		    T5j = T4V + T5i;
+		    TfR = T4V - T5i;
+		    Tea = Te8 - Te9;
+		    Teb = T5h - T56;
+		    Tec = Tea - Teb;
+		    Tf0 = Tea + Teb;
+	       }
+	       {
+		    E TfW, TfX, T8z, T8C;
+		    TfW = Te8 + Te9;
+		    TfX = TdS + TdT;
+		    TfY = TfW - TfX;
+		    Tgy = TfW + TfX;
+		    T8z = T4D - T4I;
+		    T8C = T8A - T8B;
+		    T8D = T8z - T8C;
+		    Tbl = T8z + T8C;
+	       }
+	       {
+		    E T8I, T8N, T9j, T9k;
+		    T8I = T8G - T8H;
+		    T8N = T8J + T8M;
+		    T8O = KP707106781 * (T8I - T8N);
+		    Tbx = KP707106781 * (T8I + T8N);
+		    T9j = T8J - T8M;
+		    T9k = T8H + T8G;
+		    T9l = KP707106781 * (T9j - T9k);
+		    Tbm = KP707106781 * (T9k + T9j);
+	       }
+	       {
+		    E TdR, TdU, T9g, T9h;
+		    TdR = T4J - T4U;
+		    TdU = TdS - TdT;
+		    TdV = TdR - TdU;
+		    TeX = TdR + TdU;
+		    T9g = T9e - T9f;
+		    T9h = T4O - T4T;
+		    T9i = T9g + T9h;
+		    Tbw = T9g - T9h;
+	       }
+	  }
+	  {
+	       E T5u, TdW, T8S, T8V, T62, Te3, T94, T99, T5F, TdX, T8T, T8Y, T5R, Te2, T93;
+	       E T96;
+	       {
+		    E T5o, T8Q, T5t, T8R;
+		    {
+			 E T5l, T5n, T5k, T5m;
+			 T5l = rio[WS(ios, 3)];
+			 T5n = iio[-WS(ios, 60)];
+			 T5k = W[4];
+			 T5m = W[5];
+			 T5o = FMA(T5k, T5l, T5m * T5n);
+			 T8Q = FNMS(T5m, T5l, T5k * T5n);
+		    }
+		    {
+			 E T5q, T5s, T5p, T5r;
+			 T5q = rio[WS(ios, 35)];
+			 T5s = iio[-WS(ios, 28)];
+			 T5p = W[68];
+			 T5r = W[69];
+			 T5t = FMA(T5p, T5q, T5r * T5s);
+			 T8R = FNMS(T5r, T5q, T5p * T5s);
+		    }
+		    T5u = T5o + T5t;
+		    TdW = T8Q + T8R;
+		    T8S = T8Q - T8R;
+		    T8V = T5o - T5t;
+	       }
+	       {
+		    E T5W, T97, T61, T98;
+		    {
+			 E T5T, T5V, T5S, T5U;
+			 T5T = rio[WS(ios, 11)];
+			 T5V = iio[-WS(ios, 52)];
+			 T5S = W[20];
+			 T5U = W[21];
+			 T5W = FMA(T5S, T5T, T5U * T5V);
+			 T97 = FNMS(T5U, T5T, T5S * T5V);
+		    }
+		    {
+			 E T5Y, T60, T5X, T5Z;
+			 T5Y = rio[WS(ios, 43)];
+			 T60 = iio[-WS(ios, 20)];
+			 T5X = W[84];
+			 T5Z = W[85];
+			 T61 = FMA(T5X, T5Y, T5Z * T60);
+			 T98 = FNMS(T5Z, T5Y, T5X * T60);
+		    }
+		    T62 = T5W + T61;
+		    Te3 = T97 + T98;
+		    T94 = T5W - T61;
+		    T99 = T97 - T98;
+	       }
+	       {
+		    E T5z, T8W, T5E, T8X;
+		    {
+			 E T5w, T5y, T5v, T5x;
+			 T5w = rio[WS(ios, 19)];
+			 T5y = iio[-WS(ios, 44)];
+			 T5v = W[36];
+			 T5x = W[37];
+			 T5z = FMA(T5v, T5w, T5x * T5y);
+			 T8W = FNMS(T5x, T5w, T5v * T5y);
+		    }
+		    {
+			 E T5B, T5D, T5A, T5C;
+			 T5B = rio[WS(ios, 51)];
+			 T5D = iio[-WS(ios, 12)];
+			 T5A = W[100];
+			 T5C = W[101];
+			 T5E = FMA(T5A, T5B, T5C * T5D);
+			 T8X = FNMS(T5C, T5B, T5A * T5D);
+		    }
+		    T5F = T5z + T5E;
+		    TdX = T8W + T8X;
+		    T8T = T5z - T5E;
+		    T8Y = T8W - T8X;
+	       }
+	       {
+		    E T5L, T91, T5Q, T92;
+		    {
+			 E T5I, T5K, T5H, T5J;
+			 T5I = rio[WS(ios, 59)];
+			 T5K = iio[-WS(ios, 4)];
+			 T5H = W[116];
+			 T5J = W[117];
+			 T5L = FMA(T5H, T5I, T5J * T5K);
+			 T91 = FNMS(T5J, T5I, T5H * T5K);
+		    }
+		    {
+			 E T5N, T5P, T5M, T5O;
+			 T5N = rio[WS(ios, 27)];
+			 T5P = iio[-WS(ios, 36)];
+			 T5M = W[52];
+			 T5O = W[53];
+			 T5Q = FMA(T5M, T5N, T5O * T5P);
+			 T92 = FNMS(T5O, T5N, T5M * T5P);
+		    }
+		    T5R = T5L + T5Q;
+		    Te2 = T91 + T92;
+		    T93 = T91 - T92;
+		    T96 = T5L - T5Q;
+	       }
+	       {
+		    E T5G, T63, Te1, Te4;
+		    T5G = T5u + T5F;
+		    T63 = T5R + T62;
+		    T64 = T5G + T63;
+		    TfZ = T63 - T5G;
+		    Te1 = T5R - T62;
+		    Te4 = Te2 - Te3;
+		    Te5 = Te1 + Te4;
+		    Ted = Te1 - Te4;
+	       }
+	       {
+		    E TfS, TfT, T8U, T8Z;
+		    TfS = TdW + TdX;
+		    TfT = Te2 + Te3;
+		    TfU = TfS - TfT;
+		    Tgz = TfS + TfT;
+		    T8U = T8S + T8T;
+		    T8Z = T8V - T8Y;
+		    T90 = FNMS(KP923879532, T8Z, KP382683432 * T8U);
+		    T9o = FMA(KP923879532, T8U, KP382683432 * T8Z);
+	       }
+	       {
+		    E T95, T9a, Tbr, Tbs;
+		    T95 = T93 + T94;
+		    T9a = T96 - T99;
+		    T9b = FMA(KP382683432, T95, KP923879532 * T9a);
+		    T9n = FNMS(KP923879532, T95, KP382683432 * T9a);
+		    Tbr = T93 - T94;
+		    Tbs = T96 + T99;
+		    Tbt = FMA(KP923879532, Tbr, KP382683432 * Tbs);
+		    Tbz = FNMS(KP382683432, Tbr, KP923879532 * Tbs);
+	       }
+	       {
+		    E TdY, TdZ, Tbo, Tbp;
+		    TdY = TdW - TdX;
+		    TdZ = T5u - T5F;
+		    Te0 = TdY - TdZ;
+		    Tee = TdZ + TdY;
+		    Tbo = T8S - T8T;
+		    Tbp = T8V + T8Y;
+		    Tbq = FNMS(KP382683432, Tbp, KP923879532 * Tbo);
+		    TbA = FMA(KP382683432, Tbo, KP923879532 * Tbp);
+	       }
+	  }
+	  {
+	       E T3X, TdB, T8a, T8d, T4v, Tdx, T80, T85, T48, TdC, T8b, T8g, T4k, Tdw, T7X;
+	       E T84;
+	       {
+		    E T3R, T88, T3W, T89;
+		    {
+			 E T3O, T3Q, T3N, T3P;
+			 T3O = rio[WS(ios, 5)];
+			 T3Q = iio[-WS(ios, 58)];
+			 T3N = W[8];
+			 T3P = W[9];
+			 T3R = FMA(T3N, T3O, T3P * T3Q);
+			 T88 = FNMS(T3P, T3O, T3N * T3Q);
+		    }
+		    {
+			 E T3T, T3V, T3S, T3U;
+			 T3T = rio[WS(ios, 37)];
+			 T3V = iio[-WS(ios, 26)];
+			 T3S = W[72];
+			 T3U = W[73];
+			 T3W = FMA(T3S, T3T, T3U * T3V);
+			 T89 = FNMS(T3U, T3T, T3S * T3V);
+		    }
+		    T3X = T3R + T3W;
+		    TdB = T88 + T89;
+		    T8a = T88 - T89;
+		    T8d = T3R - T3W;
+	       }
+	       {
+		    E T4p, T7Y, T4u, T7Z;
+		    {
+			 E T4m, T4o, T4l, T4n;
+			 T4m = rio[WS(ios, 13)];
+			 T4o = iio[-WS(ios, 50)];
+			 T4l = W[24];
+			 T4n = W[25];
+			 T4p = FMA(T4l, T4m, T4n * T4o);
+			 T7Y = FNMS(T4n, T4m, T4l * T4o);
+		    }
+		    {
+			 E T4r, T4t, T4q, T4s;
+			 T4r = rio[WS(ios, 45)];
+			 T4t = iio[-WS(ios, 18)];
+			 T4q = W[88];
+			 T4s = W[89];
+			 T4u = FMA(T4q, T4r, T4s * T4t);
+			 T7Z = FNMS(T4s, T4r, T4q * T4t);
+		    }
+		    T4v = T4p + T4u;
+		    Tdx = T7Y + T7Z;
+		    T80 = T7Y - T7Z;
+		    T85 = T4p - T4u;
+	       }
+	       {
+		    E T42, T8e, T47, T8f;
+		    {
+			 E T3Z, T41, T3Y, T40;
+			 T3Z = rio[WS(ios, 21)];
+			 T41 = iio[-WS(ios, 42)];
+			 T3Y = W[40];
+			 T40 = W[41];
+			 T42 = FMA(T3Y, T3Z, T40 * T41);
+			 T8e = FNMS(T40, T3Z, T3Y * T41);
+		    }
+		    {
+			 E T44, T46, T43, T45;
+			 T44 = rio[WS(ios, 53)];
+			 T46 = iio[-WS(ios, 10)];
+			 T43 = W[104];
+			 T45 = W[105];
+			 T47 = FMA(T43, T44, T45 * T46);
+			 T8f = FNMS(T45, T44, T43 * T46);
+		    }
+		    T48 = T42 + T47;
+		    TdC = T8e + T8f;
+		    T8b = T42 - T47;
+		    T8g = T8e - T8f;
+	       }
+	       {
+		    E T4e, T82, T4j, T83;
+		    {
+			 E T4b, T4d, T4a, T4c;
+			 T4b = rio[WS(ios, 61)];
+			 T4d = iio[-WS(ios, 2)];
+			 T4a = W[120];
+			 T4c = W[121];
+			 T4e = FMA(T4a, T4b, T4c * T4d);
+			 T82 = FNMS(T4c, T4b, T4a * T4d);
+		    }
+		    {
+			 E T4g, T4i, T4f, T4h;
+			 T4g = rio[WS(ios, 29)];
+			 T4i = iio[-WS(ios, 34)];
+			 T4f = W[56];
+			 T4h = W[57];
+			 T4j = FMA(T4f, T4g, T4h * T4i);
+			 T83 = FNMS(T4h, T4g, T4f * T4i);
+		    }
+		    T4k = T4e + T4j;
+		    Tdw = T82 + T83;
+		    T7X = T4e - T4j;
+		    T84 = T82 - T83;
+	       }
+	       {
+		    E T49, T4w, TdA, TdD;
+		    T49 = T3X + T48;
+		    T4w = T4k + T4v;
+		    T4x = T49 + T4w;
+		    TfJ = T4w - T49;
+		    TdA = T3X - T48;
+		    TdD = TdB - TdC;
+		    TdE = TdA + TdD;
+		    TdM = TdD - TdA;
+	       }
+	       {
+		    E TfM, TfN, T81, T86;
+		    TfM = TdB + TdC;
+		    TfN = Tdw + Tdx;
+		    TfO = TfM - TfN;
+		    Tgu = TfM + TfN;
+		    T81 = T7X - T80;
+		    T86 = T84 + T85;
+		    T87 = FNMS(KP923879532, T86, KP382683432 * T81);
+		    T8v = FMA(KP382683432, T86, KP923879532 * T81);
+	       }
+	       {
+		    E T8c, T8h, Tb8, Tb9;
+		    T8c = T8a + T8b;
+		    T8h = T8d - T8g;
+		    T8i = FMA(KP923879532, T8c, KP382683432 * T8h);
+		    T8u = FNMS(KP923879532, T8h, KP382683432 * T8c);
+		    Tb8 = T8a - T8b;
+		    Tb9 = T8d + T8g;
+		    Tba = FMA(KP382683432, Tb8, KP923879532 * Tb9);
+		    Tbg = FNMS(KP382683432, Tb9, KP923879532 * Tb8);
+	       }
+	       {
+		    E Tdv, Tdy, Tb5, Tb6;
+		    Tdv = T4k - T4v;
+		    Tdy = Tdw - Tdx;
+		    Tdz = Tdv - Tdy;
+		    TdN = Tdv + Tdy;
+		    Tb5 = T7X + T80;
+		    Tb6 = T84 - T85;
+		    Tb7 = FNMS(KP382683432, Tb6, KP923879532 * Tb5);
+		    Tbh = FMA(KP923879532, Tb6, KP382683432 * Tb5);
+	       }
+	  }
+	  {
+	       E T1t, Tgn, TgK, TgL, TgV, Th1, T30, Th0, T66, TgX, Tgw, TgE, TgB, TgF, Tgq;
+	       E TgM;
+	       {
+		    E TH, T1s, TgI, TgJ;
+		    TH = Tj + TG;
+		    T1s = T14 + T1r;
+		    T1t = TH + T1s;
+		    Tgn = TH - T1s;
+		    TgI = Tgt + Tgu;
+		    TgJ = Tgy + Tgz;
+		    TgK = TgI - TgJ;
+		    TgL = TgI + TgJ;
+	       }
+	       {
+		    E TgN, TgU, T2e, T2Z;
+		    TgN = Tfq + Tfr;
+		    TgU = TgO + TgT;
+		    TgV = TgN + TgU;
+		    Th1 = TgU - TgN;
+		    T2e = T1Q + T2d;
+		    T2Z = T2B + T2Y;
+		    T30 = T2e + T2Z;
+		    Th0 = T2Z - T2e;
+	       }
+	       {
+		    E T4y, T65, Tgs, Tgv;
+		    T4y = T3M + T4x;
+		    T65 = T5j + T64;
+		    T66 = T4y + T65;
+		    TgX = T65 - T4y;
+		    Tgs = T3M - T4x;
+		    Tgv = Tgt - Tgu;
+		    Tgw = Tgs + Tgv;
+		    TgE = Tgv - Tgs;
+	       }
+	       {
+		    E Tgx, TgA, Tgo, Tgp;
+		    Tgx = T5j - T64;
+		    TgA = Tgy - Tgz;
+		    TgB = Tgx - TgA;
+		    TgF = Tgx + TgA;
+		    Tgo = Tfu + Tfv;
+		    Tgp = TfA + TfB;
+		    Tgq = Tgo - Tgp;
+		    TgM = Tgo + Tgp;
+	       }
+	       {
+		    E T31, TgW, TgH, TgY;
+		    T31 = T1t + T30;
+		    iio[-WS(ios, 32)] = T31 - T66;
+		    rio[0] = T31 + T66;
+		    TgW = TgM + TgV;
+		    rio[WS(ios, 32)] = TgL - TgW;
+		    iio[0] = TgL + TgW;
+		    TgH = T1t - T30;
+		    iio[-WS(ios, 48)] = TgH - TgK;
+		    rio[WS(ios, 16)] = TgH + TgK;
+		    TgY = TgV - TgM;
+		    rio[WS(ios, 48)] = TgX - TgY;
+		    iio[-WS(ios, 16)] = TgX + TgY;
+	       }
+	       {
+		    E Tgr, TgC, TgZ, Th2;
+		    Tgr = Tgn + Tgq;
+		    TgC = KP707106781 * (Tgw + TgB);
+		    iio[-WS(ios, 40)] = Tgr - TgC;
+		    rio[WS(ios, 8)] = Tgr + TgC;
+		    TgZ = KP707106781 * (TgE + TgF);
+		    Th2 = Th0 + Th1;
+		    rio[WS(ios, 40)] = TgZ - Th2;
+		    iio[-WS(ios, 8)] = TgZ + Th2;
+	       }
+	       {
+		    E TgD, TgG, Th3, Th4;
+		    TgD = Tgn - Tgq;
+		    TgG = KP707106781 * (TgE - TgF);
+		    iio[-WS(ios, 56)] = TgD - TgG;
+		    rio[WS(ios, 24)] = TgD + TgG;
+		    Th3 = KP707106781 * (TgB - Tgw);
+		    Th4 = Th1 - Th0;
+		    rio[WS(ios, 56)] = Th3 - Th4;
+		    iio[-WS(ios, 24)] = Th3 + Th4;
+	       }
+	  }
+	  {
+	       E Tft, Tg7, Tgh, Tgl, Th9, Thf, TfE, Th6, TfQ, Tg4, Tga, The, Tge, Tgk, Tg1;
+	       E Tg5;
+	       {
+		    E Tfp, Tfs, Tgf, Tgg;
+		    Tfp = Tj - TG;
+		    Tfs = Tfq - Tfr;
+		    Tft = Tfp - Tfs;
+		    Tg7 = Tfp + Tfs;
+		    Tgf = TfR + TfU;
+		    Tgg = TfY + TfZ;
+		    Tgh = FNMS(KP382683432, Tgg, KP923879532 * Tgf);
+		    Tgl = FMA(KP923879532, Tgg, KP382683432 * Tgf);
+	       }
+	       {
+		    E Th7, Th8, Tfy, TfD;
+		    Th7 = T1r - T14;
+		    Th8 = TgT - TgO;
+		    Th9 = Th7 + Th8;
+		    Thf = Th8 - Th7;
+		    Tfy = Tfw - Tfx;
+		    TfD = Tfz + TfC;
+		    TfE = KP707106781 * (Tfy - TfD);
+		    Th6 = KP707106781 * (Tfy + TfD);
+	       }
+	       {
+		    E TfK, TfP, Tg8, Tg9;
+		    TfK = TfI - TfJ;
+		    TfP = TfL - TfO;
+		    TfQ = FMA(KP923879532, TfK, KP382683432 * TfP);
+		    Tg4 = FNMS(KP923879532, TfP, KP382683432 * TfK);
+		    Tg8 = Tfx + Tfw;
+		    Tg9 = Tfz - TfC;
+		    Tga = KP707106781 * (Tg8 + Tg9);
+		    The = KP707106781 * (Tg9 - Tg8);
+	       }
+	       {
+		    E Tgc, Tgd, TfV, Tg0;
+		    Tgc = TfI + TfJ;
+		    Tgd = TfL + TfO;
+		    Tge = FMA(KP382683432, Tgc, KP923879532 * Tgd);
+		    Tgk = FNMS(KP382683432, Tgd, KP923879532 * Tgc);
+		    TfV = TfR - TfU;
+		    Tg0 = TfY - TfZ;
+		    Tg1 = FNMS(KP923879532, Tg0, KP382683432 * TfV);
+		    Tg5 = FMA(KP382683432, Tg0, KP923879532 * TfV);
+	       }
+	       {
+		    E TfF, Tg2, Thd, Thg;
+		    TfF = Tft + TfE;
+		    Tg2 = TfQ + Tg1;
+		    iio[-WS(ios, 44)] = TfF - Tg2;
+		    rio[WS(ios, 12)] = TfF + Tg2;
+		    Thd = Tg4 + Tg5;
+		    Thg = The + Thf;
+		    rio[WS(ios, 44)] = Thd - Thg;
+		    iio[-WS(ios, 12)] = Thd + Thg;
+	       }
+	       {
+		    E Tg3, Tg6, Thh, Thi;
+		    Tg3 = Tft - TfE;
+		    Tg6 = Tg4 - Tg5;
+		    iio[-WS(ios, 60)] = Tg3 - Tg6;
+		    rio[WS(ios, 28)] = Tg3 + Tg6;
+		    Thh = Tg1 - TfQ;
+		    Thi = Thf - The;
+		    rio[WS(ios, 60)] = Thh - Thi;
+		    iio[-WS(ios, 28)] = Thh + Thi;
+	       }
+	       {
+		    E Tgb, Tgi, Th5, Tha;
+		    Tgb = Tg7 + Tga;
+		    Tgi = Tge + Tgh;
+		    iio[-WS(ios, 36)] = Tgb - Tgi;
+		    rio[WS(ios, 4)] = Tgb + Tgi;
+		    Th5 = Tgk + Tgl;
+		    Tha = Th6 + Th9;
+		    rio[WS(ios, 36)] = Th5 - Tha;
+		    iio[-WS(ios, 4)] = Th5 + Tha;
+	       }
+	       {
+		    E Tgj, Tgm, Thb, Thc;
+		    Tgj = Tg7 - Tga;
+		    Tgm = Tgk - Tgl;
+		    iio[-WS(ios, 52)] = Tgj - Tgm;
+		    rio[WS(ios, 20)] = Tgj + Tgm;
+		    Thb = Tgh - Tge;
+		    Thc = Th9 - Th6;
+		    rio[WS(ios, 52)] = Thb - Thc;
+		    iio[-WS(ios, 20)] = Thb + Thc;
+	       }
+	  }
+	  {
+	       E Td1, Ten, Tdo, ThA, ThD, ThJ, Teq, ThI, Teh, TeB, Tel, Tex, TdQ, TeA, Tek;
+	       E Teu;
+	       {
+		    E TcP, Td0, Teo, Tep;
+		    TcP = TcL - TcO;
+		    Td0 = KP707106781 * (TcU - TcZ);
+		    Td1 = TcP - Td0;
+		    Ten = TcP + Td0;
+		    {
+			 E Tdc, Tdn, ThB, ThC;
+			 Tdc = FNMS(KP923879532, Tdb, KP382683432 * Td6);
+			 Tdn = FMA(KP382683432, Tdh, KP923879532 * Tdm);
+			 Tdo = Tdc - Tdn;
+			 ThA = Tdc + Tdn;
+			 ThB = KP707106781 * (TeF - TeE);
+			 ThC = Thn - Thm;
+			 ThD = ThB + ThC;
+			 ThJ = ThC - ThB;
+		    }
+		    Teo = FMA(KP923879532, Td6, KP382683432 * Tdb);
+		    Tep = FNMS(KP923879532, Tdh, KP382683432 * Tdm);
+		    Teq = Teo + Tep;
+		    ThI = Tep - Teo;
+		    {
+			 E Te7, Tev, Teg, Tew, Te6, Tef;
+			 Te6 = KP707106781 * (Te0 - Te5);
+			 Te7 = TdV - Te6;
+			 Tev = TdV + Te6;
+			 Tef = KP707106781 * (Ted - Tee);
+			 Teg = Tec - Tef;
+			 Tew = Tec + Tef;
+			 Teh = FNMS(KP980785280, Teg, KP195090322 * Te7);
+			 TeB = FMA(KP831469612, Tew, KP555570233 * Tev);
+			 Tel = FMA(KP195090322, Teg, KP980785280 * Te7);
+			 Tex = FNMS(KP555570233, Tew, KP831469612 * Tev);
+		    }
+		    {
+			 E TdG, Tes, TdP, Tet, TdF, TdO;
+			 TdF = KP707106781 * (Tdz - TdE);
+			 TdG = Tdu - TdF;
+			 Tes = Tdu + TdF;
+			 TdO = KP707106781 * (TdM - TdN);
+			 TdP = TdL - TdO;
+			 Tet = TdL + TdO;
+			 TdQ = FMA(KP980785280, TdG, KP195090322 * TdP);
+			 TeA = FNMS(KP555570233, Tet, KP831469612 * Tes);
+			 Tek = FNMS(KP980785280, TdP, KP195090322 * TdG);
+			 Teu = FMA(KP555570233, Tes, KP831469612 * Tet);
+		    }
+	       }
+	       {
+		    E Tdp, Tei, ThH, ThK;
+		    Tdp = Td1 + Tdo;
+		    Tei = TdQ + Teh;
+		    iio[-WS(ios, 46)] = Tdp - Tei;
+		    rio[WS(ios, 14)] = Tdp + Tei;
+		    ThH = Tek + Tel;
+		    ThK = ThI + ThJ;
+		    rio[WS(ios, 46)] = ThH - ThK;
+		    iio[-WS(ios, 14)] = ThH + ThK;
+	       }
+	       {
+		    E Tej, Tem, ThL, ThM;
+		    Tej = Td1 - Tdo;
+		    Tem = Tek - Tel;
+		    iio[-WS(ios, 62)] = Tej - Tem;
+		    rio[WS(ios, 30)] = Tej + Tem;
+		    ThL = Teh - TdQ;
+		    ThM = ThJ - ThI;
+		    rio[WS(ios, 62)] = ThL - ThM;
+		    iio[-WS(ios, 30)] = ThL + ThM;
+	       }
+	       {
+		    E Ter, Tey, Thz, ThE;
+		    Ter = Ten + Teq;
+		    Tey = Teu + Tex;
+		    iio[-WS(ios, 38)] = Ter - Tey;
+		    rio[WS(ios, 6)] = Ter + Tey;
+		    Thz = TeA + TeB;
+		    ThE = ThA + ThD;
+		    rio[WS(ios, 38)] = Thz - ThE;
+		    iio[-WS(ios, 6)] = Thz + ThE;
+	       }
+	       {
+		    E Tez, TeC, ThF, ThG;
+		    Tez = Ten - Teq;
+		    TeC = TeA - TeB;
+		    iio[-WS(ios, 54)] = Tez - TeC;
+		    rio[WS(ios, 22)] = Tez + TeC;
+		    ThF = Tex - Teu;
+		    ThG = ThD - ThA;
+		    rio[WS(ios, 54)] = ThF - ThG;
+		    iio[-WS(ios, 22)] = ThF + ThG;
+	       }
+	  }
+	  {
+	       E TeH, Tf9, TeO, Thk, Thp, Thv, Tfc, Thu, Tf3, Tfn, Tf7, Tfj, TeW, Tfm, Tf6;
+	       E Tfg;
+	       {
+		    E TeD, TeG, Tfa, Tfb;
+		    TeD = TcL + TcO;
+		    TeG = KP707106781 * (TeE + TeF);
+		    TeH = TeD - TeG;
+		    Tf9 = TeD + TeG;
+		    {
+			 E TeK, TeN, Thl, Tho;
+			 TeK = FNMS(KP382683432, TeJ, KP923879532 * TeI);
+			 TeN = FMA(KP923879532, TeL, KP382683432 * TeM);
+			 TeO = TeK - TeN;
+			 Thk = TeK + TeN;
+			 Thl = KP707106781 * (TcU + TcZ);
+			 Tho = Thm + Thn;
+			 Thp = Thl + Tho;
+			 Thv = Tho - Thl;
+		    }
+		    Tfa = FMA(KP382683432, TeI, KP923879532 * TeJ);
+		    Tfb = FNMS(KP382683432, TeL, KP923879532 * TeM);
+		    Tfc = Tfa + Tfb;
+		    Thu = Tfb - Tfa;
+		    {
+			 E TeZ, Tfh, Tf2, Tfi, TeY, Tf1;
+			 TeY = KP707106781 * (Tee + Ted);
+			 TeZ = TeX - TeY;
+			 Tfh = TeX + TeY;
+			 Tf1 = KP707106781 * (Te0 + Te5);
+			 Tf2 = Tf0 - Tf1;
+			 Tfi = Tf0 + Tf1;
+			 Tf3 = FNMS(KP831469612, Tf2, KP555570233 * TeZ);
+			 Tfn = FMA(KP195090322, Tfh, KP980785280 * Tfi);
+			 Tf7 = FMA(KP831469612, TeZ, KP555570233 * Tf2);
+			 Tfj = FNMS(KP195090322, Tfi, KP980785280 * Tfh);
+		    }
+		    {
+			 E TeS, Tfe, TeV, Tff, TeR, TeU;
+			 TeR = KP707106781 * (TdE + Tdz);
+			 TeS = TeQ - TeR;
+			 Tfe = TeQ + TeR;
+			 TeU = KP707106781 * (TdM + TdN);
+			 TeV = TeT - TeU;
+			 Tff = TeT + TeU;
+			 TeW = FMA(KP555570233, TeS, KP831469612 * TeV);
+			 Tfm = FNMS(KP195090322, Tfe, KP980785280 * Tff);
+			 Tf6 = FNMS(KP831469612, TeS, KP555570233 * TeV);
+			 Tfg = FMA(KP980785280, Tfe, KP195090322 * Tff);
+		    }
+	       }
+	       {
+		    E TeP, Tf4, Tht, Thw;
+		    TeP = TeH + TeO;
+		    Tf4 = TeW + Tf3;
+		    iio[-WS(ios, 42)] = TeP - Tf4;
+		    rio[WS(ios, 10)] = TeP + Tf4;
+		    Tht = Tf6 + Tf7;
+		    Thw = Thu + Thv;
+		    rio[WS(ios, 42)] = Tht - Thw;
+		    iio[-WS(ios, 10)] = Tht + Thw;
+	       }
+	       {
+		    E Tf5, Tf8, Thx, Thy;
+		    Tf5 = TeH - TeO;
+		    Tf8 = Tf6 - Tf7;
+		    iio[-WS(ios, 58)] = Tf5 - Tf8;
+		    rio[WS(ios, 26)] = Tf5 + Tf8;
+		    Thx = Tf3 - TeW;
+		    Thy = Thv - Thu;
+		    rio[WS(ios, 58)] = Thx - Thy;
+		    iio[-WS(ios, 26)] = Thx + Thy;
+	       }
+	       {
+		    E Tfd, Tfk, Thj, Thq;
+		    Tfd = Tf9 + Tfc;
+		    Tfk = Tfg + Tfj;
+		    iio[-WS(ios, 34)] = Tfd - Tfk;
+		    rio[WS(ios, 2)] = Tfd + Tfk;
+		    Thj = Tfm + Tfn;
+		    Thq = Thk + Thp;
+		    rio[WS(ios, 34)] = Thj - Thq;
+		    iio[-WS(ios, 2)] = Thj + Thq;
+	       }
+	       {
+		    E Tfl, Tfo, Thr, Ths;
+		    Tfl = Tf9 - Tfc;
+		    Tfo = Tfm - Tfn;
+		    iio[-WS(ios, 50)] = Tfl - Tfo;
+		    rio[WS(ios, 18)] = Tfl + Tfo;
+		    Thr = Tfj - Tfg;
+		    Ths = Thp - Thk;
+		    rio[WS(ios, 50)] = Thr - Ths;
+		    iio[-WS(ios, 18)] = Thr + Ths;
+	       }
+	  }
+	  {
+	       E T6L, T9x, TiD, TiJ, T7E, TiI, T9A, TiA, T8y, T9K, T9u, T9E, T9r, T9L, T9v;
+	       E T9H;
+	       {
+		    E T6n, T6K, TiB, TiC;
+		    T6n = T6b - T6m;
+		    T6K = T6y - T6J;
+		    T6L = T6n - T6K;
+		    T9x = T6n + T6K;
+		    TiB = T9P - T9O;
+		    TiC = Tin - Tim;
+		    TiD = TiB + TiC;
+		    TiJ = TiC - TiB;
+	       }
+	       {
+		    E T7c, T9y, T7D, T9z;
+		    {
+			 E T72, T7b, T7t, T7C;
+			 T72 = T6Q - T71;
+			 T7b = T77 - T7a;
+			 T7c = FNMS(KP980785280, T7b, KP195090322 * T72);
+			 T9y = FMA(KP980785280, T72, KP195090322 * T7b);
+			 T7t = T7h - T7s;
+			 T7C = T7y - T7B;
+			 T7D = FMA(KP195090322, T7t, KP980785280 * T7C);
+			 T9z = FNMS(KP980785280, T7t, KP195090322 * T7C);
+		    }
+		    T7E = T7c - T7D;
+		    TiI = T9z - T9y;
+		    T9A = T9y + T9z;
+		    TiA = T7c + T7D;
+	       }
+	       {
+		    E T8k, T9C, T8x, T9D;
+		    {
+			 E T7W, T8j, T8t, T8w;
+			 T7W = T7K - T7V;
+			 T8j = T87 - T8i;
+			 T8k = T7W - T8j;
+			 T9C = T7W + T8j;
+			 T8t = T8p - T8s;
+			 T8w = T8u - T8v;
+			 T8x = T8t - T8w;
+			 T9D = T8t + T8w;
+		    }
+		    T8y = FMA(KP995184726, T8k, KP098017140 * T8x);
+		    T9K = FNMS(KP634393284, T9D, KP773010453 * T9C);
+		    T9u = FNMS(KP995184726, T8x, KP098017140 * T8k);
+		    T9E = FMA(KP634393284, T9C, KP773010453 * T9D);
+	       }
+	       {
+		    E T9d, T9F, T9q, T9G;
+		    {
+			 E T8P, T9c, T9m, T9p;
+			 T8P = T8D - T8O;
+			 T9c = T90 - T9b;
+			 T9d = T8P - T9c;
+			 T9F = T8P + T9c;
+			 T9m = T9i - T9l;
+			 T9p = T9n - T9o;
+			 T9q = T9m - T9p;
+			 T9G = T9m + T9p;
+		    }
+		    T9r = FNMS(KP995184726, T9q, KP098017140 * T9d);
+		    T9L = FMA(KP773010453, T9G, KP634393284 * T9F);
+		    T9v = FMA(KP098017140, T9q, KP995184726 * T9d);
+		    T9H = FNMS(KP634393284, T9G, KP773010453 * T9F);
+	       }
+	       {
+		    E T7F, T9s, TiH, TiK;
+		    T7F = T6L + T7E;
+		    T9s = T8y + T9r;
+		    iio[-WS(ios, 47)] = T7F - T9s;
+		    rio[WS(ios, 15)] = T7F + T9s;
+		    TiH = T9u + T9v;
+		    TiK = TiI + TiJ;
+		    rio[WS(ios, 47)] = TiH - TiK;
+		    iio[-WS(ios, 15)] = TiH + TiK;
+	       }
+	       {
+		    E T9t, T9w, TiL, TiM;
+		    T9t = T6L - T7E;
+		    T9w = T9u - T9v;
+		    iio[-WS(ios, 63)] = T9t - T9w;
+		    rio[WS(ios, 31)] = T9t + T9w;
+		    TiL = T9r - T8y;
+		    TiM = TiJ - TiI;
+		    rio[WS(ios, 63)] = TiL - TiM;
+		    iio[-WS(ios, 31)] = TiL + TiM;
+	       }
+	       {
+		    E T9B, T9I, Tiz, TiE;
+		    T9B = T9x + T9A;
+		    T9I = T9E + T9H;
+		    iio[-WS(ios, 39)] = T9B - T9I;
+		    rio[WS(ios, 7)] = T9B + T9I;
+		    Tiz = T9K + T9L;
+		    TiE = TiA + TiD;
+		    rio[WS(ios, 39)] = Tiz - TiE;
+		    iio[-WS(ios, 7)] = Tiz + TiE;
+	       }
+	       {
+		    E T9J, T9M, TiF, TiG;
+		    T9J = T9x - T9A;
+		    T9M = T9K - T9L;
+		    iio[-WS(ios, 55)] = T9J - T9M;
+		    rio[WS(ios, 23)] = T9J + T9M;
+		    TiF = T9H - T9E;
+		    TiG = TiD - TiA;
+		    rio[WS(ios, 55)] = TiF - TiG;
+		    iio[-WS(ios, 23)] = TiF + TiG;
+	       }
+	  }
+	  {
+	       E TaL, TbJ, Ti9, Tif, Tb0, Tie, TbM, Ti6, Tbk, TbW, TbG, TbQ, TbD, TbX, TbH;
+	       E TbT;
+	       {
+		    E TaD, TaK, Ti7, Ti8;
+		    TaD = Taz - TaC;
+		    TaK = TaG - TaJ;
+		    TaL = TaD - TaK;
+		    TbJ = TaD + TaK;
+		    Ti7 = Tc1 - Tc0;
+		    Ti8 = ThT - ThQ;
+		    Ti9 = Ti7 + Ti8;
+		    Tif = Ti8 - Ti7;
+	       }
+	       {
+		    E TaS, TbK, TaZ, TbL;
+		    {
+			 E TaO, TaR, TaV, TaY;
+			 TaO = TaM - TaN;
+			 TaR = TaP - TaQ;
+			 TaS = FNMS(KP831469612, TaR, KP555570233 * TaO);
+			 TbK = FMA(KP555570233, TaR, KP831469612 * TaO);
+			 TaV = TaT - TaU;
+			 TaY = TaW - TaX;
+			 TaZ = FMA(KP831469612, TaV, KP555570233 * TaY);
+			 TbL = FNMS(KP831469612, TaY, KP555570233 * TaV);
+		    }
+		    Tb0 = TaS - TaZ;
+		    Tie = TbL - TbK;
+		    TbM = TbK + TbL;
+		    Ti6 = TaS + TaZ;
+	       }
+	       {
+		    E Tbc, TbO, Tbj, TbP;
+		    {
+			 E Tb4, Tbb, Tbf, Tbi;
+			 Tb4 = Tb2 - Tb3;
+			 Tbb = Tb7 - Tba;
+			 Tbc = Tb4 - Tbb;
+			 TbO = Tb4 + Tbb;
+			 Tbf = Tbd - Tbe;
+			 Tbi = Tbg - Tbh;
+			 Tbj = Tbf - Tbi;
+			 TbP = Tbf + Tbi;
+		    }
+		    Tbk = FMA(KP956940335, Tbc, KP290284677 * Tbj);
+		    TbW = FNMS(KP471396736, TbP, KP881921264 * TbO);
+		    TbG = FNMS(KP956940335, Tbj, KP290284677 * Tbc);
+		    TbQ = FMA(KP471396736, TbO, KP881921264 * TbP);
+	       }
+	       {
+		    E Tbv, TbR, TbC, TbS;
+		    {
+			 E Tbn, Tbu, Tby, TbB;
+			 Tbn = Tbl - Tbm;
+			 Tbu = Tbq - Tbt;
+			 Tbv = Tbn - Tbu;
+			 TbR = Tbn + Tbu;
+			 Tby = Tbw - Tbx;
+			 TbB = Tbz - TbA;
+			 TbC = Tby - TbB;
+			 TbS = Tby + TbB;
+		    }
+		    TbD = FNMS(KP956940335, TbC, KP290284677 * Tbv);
+		    TbX = FMA(KP881921264, TbS, KP471396736 * TbR);
+		    TbH = FMA(KP290284677, TbC, KP956940335 * Tbv);
+		    TbT = FNMS(KP471396736, TbS, KP881921264 * TbR);
+	       }
+	       {
+		    E Tb1, TbE, Tid, Tig;
+		    Tb1 = TaL + Tb0;
+		    TbE = Tbk + TbD;
+		    iio[-WS(ios, 45)] = Tb1 - TbE;
+		    rio[WS(ios, 13)] = Tb1 + TbE;
+		    Tid = TbG + TbH;
+		    Tig = Tie + Tif;
+		    rio[WS(ios, 45)] = Tid - Tig;
+		    iio[-WS(ios, 13)] = Tid + Tig;
+	       }
+	       {
+		    E TbF, TbI, Tih, Tii;
+		    TbF = TaL - Tb0;
+		    TbI = TbG - TbH;
+		    iio[-WS(ios, 61)] = TbF - TbI;
+		    rio[WS(ios, 29)] = TbF + TbI;
+		    Tih = TbD - Tbk;
+		    Tii = Tif - Tie;
+		    rio[WS(ios, 61)] = Tih - Tii;
+		    iio[-WS(ios, 29)] = Tih + Tii;
+	       }
+	       {
+		    E TbN, TbU, Ti5, Tia;
+		    TbN = TbJ + TbM;
+		    TbU = TbQ + TbT;
+		    iio[-WS(ios, 37)] = TbN - TbU;
+		    rio[WS(ios, 5)] = TbN + TbU;
+		    Ti5 = TbW + TbX;
+		    Tia = Ti6 + Ti9;
+		    rio[WS(ios, 37)] = Ti5 - Tia;
+		    iio[-WS(ios, 5)] = Ti5 + Tia;
+	       }
+	       {
+		    E TbV, TbY, Tib, Tic;
+		    TbV = TbJ - TbM;
+		    TbY = TbW - TbX;
+		    iio[-WS(ios, 53)] = TbV - TbY;
+		    rio[WS(ios, 21)] = TbV + TbY;
+		    Tib = TbT - TbQ;
+		    Tic = Ti9 - Ti6;
+		    rio[WS(ios, 53)] = Tib - Tic;
+		    iio[-WS(ios, 21)] = Tib + Tic;
+	       }
+	  }
+	  {
+	       E Tc3, Tcv, ThV, Ti1, Tca, Ti0, Tcy, ThO, Tci, TcI, Tcs, TcC, Tcp, TcJ, Tct;
+	       E TcF;
+	       {
+		    E TbZ, Tc2, ThP, ThU;
+		    TbZ = Taz + TaC;
+		    Tc2 = Tc0 + Tc1;
+		    Tc3 = TbZ - Tc2;
+		    Tcv = TbZ + Tc2;
+		    ThP = TaG + TaJ;
+		    ThU = ThQ + ThT;
+		    ThV = ThP + ThU;
+		    Ti1 = ThU - ThP;
+	       }
+	       {
+		    E Tc6, Tcw, Tc9, Tcx;
+		    {
+			 E Tc4, Tc5, Tc7, Tc8;
+			 Tc4 = TaM + TaN;
+			 Tc5 = TaP + TaQ;
+			 Tc6 = FNMS(KP195090322, Tc5, KP980785280 * Tc4);
+			 Tcw = FMA(KP980785280, Tc5, KP195090322 * Tc4);
+			 Tc7 = TaT + TaU;
+			 Tc8 = TaW + TaX;
+			 Tc9 = FMA(KP195090322, Tc7, KP980785280 * Tc8);
+			 Tcx = FNMS(KP195090322, Tc8, KP980785280 * Tc7);
+		    }
+		    Tca = Tc6 - Tc9;
+		    Ti0 = Tcx - Tcw;
+		    Tcy = Tcw + Tcx;
+		    ThO = Tc6 + Tc9;
+	       }
+	       {
+		    E Tce, TcA, Tch, TcB;
+		    {
+			 E Tcc, Tcd, Tcf, Tcg;
+			 Tcc = Tbd + Tbe;
+			 Tcd = Tba + Tb7;
+			 Tce = Tcc - Tcd;
+			 TcA = Tcc + Tcd;
+			 Tcf = Tb2 + Tb3;
+			 Tcg = Tbg + Tbh;
+			 Tch = Tcf - Tcg;
+			 TcB = Tcf + Tcg;
+		    }
+		    Tci = FMA(KP634393284, Tce, KP773010453 * Tch);
+		    TcI = FNMS(KP098017140, TcA, KP995184726 * TcB);
+		    Tcs = FNMS(KP773010453, Tce, KP634393284 * Tch);
+		    TcC = FMA(KP995184726, TcA, KP098017140 * TcB);
+	       }
+	       {
+		    E Tcl, TcD, Tco, TcE;
+		    {
+			 E Tcj, Tck, Tcm, Tcn;
+			 Tcj = Tbl + Tbm;
+			 Tck = TbA + Tbz;
+			 Tcl = Tcj - Tck;
+			 TcD = Tcj + Tck;
+			 Tcm = Tbw + Tbx;
+			 Tcn = Tbq + Tbt;
+			 Tco = Tcm - Tcn;
+			 TcE = Tcm + Tcn;
+		    }
+		    Tcp = FNMS(KP773010453, Tco, KP634393284 * Tcl);
+		    TcJ = FMA(KP098017140, TcD, KP995184726 * TcE);
+		    Tct = FMA(KP773010453, Tcl, KP634393284 * Tco);
+		    TcF = FNMS(KP098017140, TcE, KP995184726 * TcD);
+	       }
+	       {
+		    E Tcb, Tcq, ThZ, Ti2;
+		    Tcb = Tc3 + Tca;
+		    Tcq = Tci + Tcp;
+		    iio[-WS(ios, 41)] = Tcb - Tcq;
+		    rio[WS(ios, 9)] = Tcb + Tcq;
+		    ThZ = Tcs + Tct;
+		    Ti2 = Ti0 + Ti1;
+		    rio[WS(ios, 41)] = ThZ - Ti2;
+		    iio[-WS(ios, 9)] = ThZ + Ti2;
+	       }
+	       {
+		    E Tcr, Tcu, Ti3, Ti4;
+		    Tcr = Tc3 - Tca;
+		    Tcu = Tcs - Tct;
+		    iio[-WS(ios, 57)] = Tcr - Tcu;
+		    rio[WS(ios, 25)] = Tcr + Tcu;
+		    Ti3 = Tcp - Tci;
+		    Ti4 = Ti1 - Ti0;
+		    rio[WS(ios, 57)] = Ti3 - Ti4;
+		    iio[-WS(ios, 25)] = Ti3 + Ti4;
+	       }
+	       {
+		    E Tcz, TcG, ThN, ThW;
+		    Tcz = Tcv + Tcy;
+		    TcG = TcC + TcF;
+		    iio[-WS(ios, 33)] = Tcz - TcG;
+		    rio[WS(ios, 1)] = Tcz + TcG;
+		    ThN = TcI + TcJ;
+		    ThW = ThO + ThV;
+		    rio[WS(ios, 33)] = ThN - ThW;
+		    iio[-WS(ios, 1)] = ThN + ThW;
+	       }
+	       {
+		    E TcH, TcK, ThX, ThY;
+		    TcH = Tcv - Tcy;
+		    TcK = TcI - TcJ;
+		    iio[-WS(ios, 49)] = TcH - TcK;
+		    rio[WS(ios, 17)] = TcH + TcK;
+		    ThX = TcF - TcC;
+		    ThY = ThV - ThO;
+		    rio[WS(ios, 49)] = ThX - ThY;
+		    iio[-WS(ios, 17)] = ThX + ThY;
+	       }
+	  }
+	  {
+	       E T9R, Taj, Tip, Tiv, T9Y, Tiu, Tam, Tik, Ta6, Taw, Tag, Taq, Tad, Tax, Tah;
+	       E Tat;
+	       {
+		    E T9N, T9Q, Til, Tio;
+		    T9N = T6b + T6m;
+		    T9Q = T9O + T9P;
+		    T9R = T9N - T9Q;
+		    Taj = T9N + T9Q;
+		    Til = T6y + T6J;
+		    Tio = Tim + Tin;
+		    Tip = Til + Tio;
+		    Tiv = Tio - Til;
+	       }
+	       {
+		    E T9U, Tak, T9X, Tal;
+		    {
+			 E T9S, T9T, T9V, T9W;
+			 T9S = T6Q + T71;
+			 T9T = T77 + T7a;
+			 T9U = FNMS(KP555570233, T9T, KP831469612 * T9S);
+			 Tak = FMA(KP555570233, T9S, KP831469612 * T9T);
+			 T9V = T7h + T7s;
+			 T9W = T7y + T7B;
+			 T9X = FMA(KP831469612, T9V, KP555570233 * T9W);
+			 Tal = FNMS(KP555570233, T9V, KP831469612 * T9W);
+		    }
+		    T9Y = T9U - T9X;
+		    Tiu = Tal - Tak;
+		    Tam = Tak + Tal;
+		    Tik = T9U + T9X;
+	       }
+	       {
+		    E Ta2, Tao, Ta5, Tap;
+		    {
+			 E Ta0, Ta1, Ta3, Ta4;
+			 Ta0 = T8p + T8s;
+			 Ta1 = T8i + T87;
+			 Ta2 = Ta0 - Ta1;
+			 Tao = Ta0 + Ta1;
+			 Ta3 = T7K + T7V;
+			 Ta4 = T8u + T8v;
+			 Ta5 = Ta3 - Ta4;
+			 Tap = Ta3 + Ta4;
+		    }
+		    Ta6 = FMA(KP471396736, Ta2, KP881921264 * Ta5);
+		    Taw = FNMS(KP290284677, Tao, KP956940335 * Tap);
+		    Tag = FNMS(KP881921264, Ta2, KP471396736 * Ta5);
+		    Taq = FMA(KP956940335, Tao, KP290284677 * Tap);
+	       }
+	       {
+		    E Ta9, Tar, Tac, Tas;
+		    {
+			 E Ta7, Ta8, Taa, Tab;
+			 Ta7 = T8D + T8O;
+			 Ta8 = T9o + T9n;
+			 Ta9 = Ta7 - Ta8;
+			 Tar = Ta7 + Ta8;
+			 Taa = T9i + T9l;
+			 Tab = T90 + T9b;
+			 Tac = Taa - Tab;
+			 Tas = Taa + Tab;
+		    }
+		    Tad = FNMS(KP881921264, Tac, KP471396736 * Ta9);
+		    Tax = FMA(KP290284677, Tar, KP956940335 * Tas);
+		    Tah = FMA(KP881921264, Ta9, KP471396736 * Tac);
+		    Tat = FNMS(KP290284677, Tas, KP956940335 * Tar);
+	       }
+	       {
+		    E T9Z, Tae, Tit, Tiw;
+		    T9Z = T9R + T9Y;
+		    Tae = Ta6 + Tad;
+		    iio[-WS(ios, 43)] = T9Z - Tae;
+		    rio[WS(ios, 11)] = T9Z + Tae;
+		    Tit = Tag + Tah;
+		    Tiw = Tiu + Tiv;
+		    rio[WS(ios, 43)] = Tit - Tiw;
+		    iio[-WS(ios, 11)] = Tit + Tiw;
+	       }
+	       {
+		    E Taf, Tai, Tix, Tiy;
+		    Taf = T9R - T9Y;
+		    Tai = Tag - Tah;
+		    iio[-WS(ios, 59)] = Taf - Tai;
+		    rio[WS(ios, 27)] = Taf + Tai;
+		    Tix = Tad - Ta6;
+		    Tiy = Tiv - Tiu;
+		    rio[WS(ios, 59)] = Tix - Tiy;
+		    iio[-WS(ios, 27)] = Tix + Tiy;
+	       }
+	       {
+		    E Tan, Tau, Tij, Tiq;
+		    Tan = Taj + Tam;
+		    Tau = Taq + Tat;
+		    iio[-WS(ios, 35)] = Tan - Tau;
+		    rio[WS(ios, 3)] = Tan + Tau;
+		    Tij = Taw + Tax;
+		    Tiq = Tik + Tip;
+		    rio[WS(ios, 35)] = Tij - Tiq;
+		    iio[-WS(ios, 3)] = Tij + Tiq;
+	       }
+	       {
+		    E Tav, Tay, Tir, Tis;
+		    Tav = Taj - Tam;
+		    Tay = Taw - Tax;
+		    iio[-WS(ios, 51)] = Tav - Tay;
+		    rio[WS(ios, 19)] = Tav + Tay;
+		    Tir = Tat - Taq;
+		    Tis = Tip - Tik;
+		    rio[WS(ios, 51)] = Tir - Tis;
+		    iio[-WS(ios, 19)] = Tir + Tis;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 64},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 64, "hf_64", twinstr, {808, 270, 230, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_64) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_7.c b/src/fftw3/rdft/codelets/r2hc/hf_7.c
new file mode 100644
index 0000000..df7b4ac
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_7.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:56 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 7 -dit -name hf_7 -include hf.h */
+
+/*
+ * This function contains 72 FP additions, 60 FP multiplications,
+ * (or, 36 additions, 24 multiplications, 36 fused multiply/add),
+ * 29 stack variables, and 28 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_7(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 12) {
+	  E T1, Tc, TS, TC, TO, TR, Tn, TT, TI, TP, Ty, TU, TF, TQ;
+	  T1 = rio[0];
+	  {
+	       E T6, TA, Tb, TB;
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 1)];
+		    T5 = iio[-WS(ios, 5)];
+		    T2 = W[0];
+		    T4 = W[1];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TA = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = rio[WS(ios, 6)];
+		    Ta = iio[0];
+		    T7 = W[10];
+		    T9 = W[11];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    TB = FNMS(T9, T8, T7 * Ta);
+	       }
+	       Tc = T6 + Tb;
+	       TS = Tb - T6;
+	       TC = TA - TB;
+	       TO = TA + TB;
+	  }
+	  TR = iio[-WS(ios, 6)];
+	  {
+	       E Th, TG, Tm, TH;
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 2)];
+		    Tg = iio[-WS(ios, 4)];
+		    Td = W[2];
+		    Tf = W[3];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    TG = FNMS(Tf, Te, Td * Tg);
+	       }
+	       {
+		    E Tj, Tl, Ti, Tk;
+		    Tj = rio[WS(ios, 5)];
+		    Tl = iio[-WS(ios, 1)];
+		    Ti = W[8];
+		    Tk = W[9];
+		    Tm = FMA(Ti, Tj, Tk * Tl);
+		    TH = FNMS(Tk, Tj, Ti * Tl);
+	       }
+	       Tn = Th + Tm;
+	       TT = Tm - Th;
+	       TI = TG - TH;
+	       TP = TG + TH;
+	  }
+	  {
+	       E Ts, TD, Tx, TE;
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = rio[WS(ios, 3)];
+		    Tr = iio[-WS(ios, 3)];
+		    To = W[4];
+		    Tq = W[5];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    TD = FNMS(Tq, Tp, To * Tr);
+	       }
+	       {
+		    E Tu, Tw, Tt, Tv;
+		    Tu = rio[WS(ios, 4)];
+		    Tw = iio[-WS(ios, 2)];
+		    Tt = W[6];
+		    Tv = W[7];
+		    Tx = FMA(Tt, Tu, Tv * Tw);
+		    TE = FNMS(Tv, Tu, Tt * Tw);
+	       }
+	       Ty = Ts + Tx;
+	       TU = Tx - Ts;
+	       TF = TD - TE;
+	       TQ = TD + TE;
+	  }
+	  rio[0] = T1 + Tc + Tn + Ty;
+	  iio[0] = TO + TP + TQ + TR;
+	  {
+	       E TJ, Tz, TX, TY;
+	       TJ = FNMS(KP781831482, TF, KP974927912 * TC) - (KP433883739 * TI);
+	       Tz = FMA(KP623489801, Ty, T1) + FNMA(KP900968867, Tn, KP222520933 * Tc);
+	       iio[-WS(ios, 5)] = Tz - TJ;
+	       rio[WS(ios, 2)] = Tz + TJ;
+	       TX = FNMS(KP781831482, TU, KP974927912 * TS) - (KP433883739 * TT);
+	       TY = FMA(KP623489801, TQ, TR) + FNMA(KP900968867, TP, KP222520933 * TO);
+	       rio[WS(ios, 5)] = TX - TY;
+	       iio[-WS(ios, 2)] = TX + TY;
+	  }
+	  {
+	       E TL, TK, TV, TW;
+	       TL = FMA(KP781831482, TC, KP974927912 * TI) + (KP433883739 * TF);
+	       TK = FMA(KP623489801, Tc, T1) + FNMA(KP900968867, Ty, KP222520933 * Tn);
+	       iio[-WS(ios, 6)] = TK - TL;
+	       rio[WS(ios, 1)] = TK + TL;
+	       TV = FMA(KP781831482, TS, KP974927912 * TT) + (KP433883739 * TU);
+	       TW = FMA(KP623489801, TO, TR) + FNMA(KP900968867, TQ, KP222520933 * TP);
+	       rio[WS(ios, 6)] = TV - TW;
+	       iio[-WS(ios, 1)] = TV + TW;
+	  }
+	  {
+	       E TN, TM, TZ, T10;
+	       TN = FMA(KP433883739, TC, KP974927912 * TF) - (KP781831482 * TI);
+	       TM = FMA(KP623489801, Tn, T1) + FNMA(KP222520933, Ty, KP900968867 * Tc);
+	       iio[-WS(ios, 4)] = TM - TN;
+	       rio[WS(ios, 3)] = TM + TN;
+	       TZ = FMA(KP433883739, TS, KP974927912 * TU) - (KP781831482 * TT);
+	       T10 = FMA(KP623489801, TP, TR) + FNMA(KP222520933, TQ, KP900968867 * TO);
+	       rio[WS(ios, 4)] = TZ - T10;
+	       iio[-WS(ios, 3)] = TZ + T10;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 7},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 7, "hf_7", twinstr, {36, 24, 36, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_7) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_7, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_8.c b/src/fftw3/rdft/codelets/r2hc/hf_8.c
new file mode 100644
index 0000000..21dedce
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_8.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:58 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 8 -dit -name hf_8 -include hf.h */
+
+/*
+ * This function contains 66 FP additions, 32 FP multiplications,
+ * (or, 52 additions, 18 multiplications, 14 fused multiply/add),
+ * 28 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_8(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 14) {
+	  E T7, T1e, TH, T19, TF, T13, TR, TU, Ti, T1f, TK, T16, Tu, T12, TM;
+	  E TP;
+	  {
+	       E T1, T18, T6, T17;
+	       T1 = rio[0];
+	       T18 = iio[-WS(ios, 7)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 4)];
+		    T5 = iio[-WS(ios, 3)];
+		    T2 = W[6];
+		    T4 = W[7];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    T17 = FNMS(T4, T3, T2 * T5);
+	       }
+	       T7 = T1 + T6;
+	       T1e = T18 - T17;
+	       TH = T1 - T6;
+	       T19 = T17 + T18;
+	  }
+	  {
+	       E Tz, TS, TE, TT;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 7)];
+		    Ty = iio[0];
+		    Tv = W[12];
+		    Tx = W[13];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    TS = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 3)];
+		    TD = iio[-WS(ios, 4)];
+		    TA = W[4];
+		    TC = W[5];
+		    TE = FMA(TA, TB, TC * TD);
+		    TT = FNMS(TC, TB, TA * TD);
+	       }
+	       TF = Tz + TE;
+	       T13 = TS + TT;
+	       TR = Tz - TE;
+	       TU = TS - TT;
+	  }
+	  {
+	       E Tc, TI, Th, TJ;
+	       {
+		    E T9, Tb, T8, Ta;
+		    T9 = rio[WS(ios, 2)];
+		    Tb = iio[-WS(ios, 5)];
+		    T8 = W[2];
+		    Ta = W[3];
+		    Tc = FMA(T8, T9, Ta * Tb);
+		    TI = FNMS(Ta, T9, T8 * Tb);
+	       }
+	       {
+		    E Te, Tg, Td, Tf;
+		    Te = rio[WS(ios, 6)];
+		    Tg = iio[-WS(ios, 1)];
+		    Td = W[10];
+		    Tf = W[11];
+		    Th = FMA(Td, Te, Tf * Tg);
+		    TJ = FNMS(Tf, Te, Td * Tg);
+	       }
+	       Ti = Tc + Th;
+	       T1f = Tc - Th;
+	       TK = TI - TJ;
+	       T16 = TI + TJ;
+	  }
+	  {
+	       E To, TN, Tt, TO;
+	       {
+		    E Tl, Tn, Tk, Tm;
+		    Tl = rio[WS(ios, 1)];
+		    Tn = iio[-WS(ios, 6)];
+		    Tk = W[0];
+		    Tm = W[1];
+		    To = FMA(Tk, Tl, Tm * Tn);
+		    TN = FNMS(Tm, Tl, Tk * Tn);
+	       }
+	       {
+		    E Tq, Ts, Tp, Tr;
+		    Tq = rio[WS(ios, 5)];
+		    Ts = iio[-WS(ios, 2)];
+		    Tp = W[8];
+		    Tr = W[9];
+		    Tt = FMA(Tp, Tq, Tr * Ts);
+		    TO = FNMS(Tr, Tq, Tp * Ts);
+	       }
+	       Tu = To + Tt;
+	       T12 = TN + TO;
+	       TM = To - Tt;
+	       TP = TN - TO;
+	  }
+	  {
+	       E Tj, TG, T1b, T1c;
+	       Tj = T7 + Ti;
+	       TG = Tu + TF;
+	       iio[-WS(ios, 4)] = Tj - TG;
+	       rio[0] = Tj + TG;
+	       {
+		    E T15, T1a, T11, T14;
+		    T15 = T12 + T13;
+		    T1a = T16 + T19;
+		    rio[WS(ios, 4)] = T15 - T1a;
+		    iio[0] = T15 + T1a;
+		    T11 = T7 - Ti;
+		    T14 = T12 - T13;
+		    iio[-WS(ios, 6)] = T11 - T14;
+		    rio[WS(ios, 2)] = T11 + T14;
+	       }
+	       T1b = TF - Tu;
+	       T1c = T19 - T16;
+	       rio[WS(ios, 6)] = T1b - T1c;
+	       iio[-WS(ios, 2)] = T1b + T1c;
+	       {
+		    E TX, T1g, T10, T1d, TY, TZ;
+		    TX = TH - TK;
+		    T1g = T1e - T1f;
+		    TY = TP - TM;
+		    TZ = TR + TU;
+		    T10 = KP707106781 * (TY - TZ);
+		    T1d = KP707106781 * (TY + TZ);
+		    iio[-WS(ios, 7)] = TX - T10;
+		    iio[-WS(ios, 1)] = T1d + T1g;
+		    rio[WS(ios, 3)] = TX + T10;
+		    rio[WS(ios, 5)] = T1d - T1g;
+	       }
+	       {
+		    E TL, T1i, TW, T1h, TQ, TV;
+		    TL = TH + TK;
+		    T1i = T1f + T1e;
+		    TQ = TM + TP;
+		    TV = TR - TU;
+		    TW = KP707106781 * (TQ + TV);
+		    T1h = KP707106781 * (TV - TQ);
+		    iio[-WS(ios, 5)] = TL - TW;
+		    iio[-WS(ios, 3)] = T1h + T1i;
+		    rio[WS(ios, 1)] = TL + TW;
+		    rio[WS(ios, 7)] = T1h - T1i;
+	       }
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 8},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 8, "hf_8", twinstr, {52, 18, 14, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_8) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/hf_9.c b/src/fftw3/rdft/codelets/r2hc/hf_9.c
new file mode 100644
index 0000000..167bd52
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/hf_9.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:57:01 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_hc2hc -compact -variables 4 -n 9 -dit -name hf_9 -include hf.h */
+
+/*
+ * This function contains 96 FP additions, 72 FP multiplications,
+ * (or, 60 additions, 36 multiplications, 36 fused multiply/add),
+ * 41 stack variables, and 36 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: hf_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: hf_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "hf.h"
+
+static const R *hf_9(R *rio, R *iio, const R *W, stride ios, int m, int dist)
+{
+     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
+     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 16) {
+	  E T1, T1B, TQ, T1G, Tc, TN, T1A, T1H, TL, T1x, T17, T1o, T1c, T1n, Tu;
+	  E T1w, TW, T1k, T11, T1l;
+	  {
+	       E T6, TO, Tb, TP;
+	       T1 = rio[0];
+	       T1B = iio[-WS(ios, 8)];
+	       {
+		    E T3, T5, T2, T4;
+		    T3 = rio[WS(ios, 3)];
+		    T5 = iio[-WS(ios, 5)];
+		    T2 = W[4];
+		    T4 = W[5];
+		    T6 = FMA(T2, T3, T4 * T5);
+		    TO = FNMS(T4, T3, T2 * T5);
+	       }
+	       {
+		    E T8, Ta, T7, T9;
+		    T8 = rio[WS(ios, 6)];
+		    Ta = iio[-WS(ios, 2)];
+		    T7 = W[10];
+		    T9 = W[11];
+		    Tb = FMA(T7, T8, T9 * Ta);
+		    TP = FNMS(T9, T8, T7 * Ta);
+	       }
+	       TQ = KP866025403 * (TO - TP);
+	       T1G = KP866025403 * (Tb - T6);
+	       Tc = T6 + Tb;
+	       TN = FNMS(KP500000000, Tc, T1);
+	       T1A = TO + TP;
+	       T1H = FNMS(KP500000000, T1A, T1B);
+	  }
+	  {
+	       E Tz, T19, TE, T14, TJ, T15, TK, T1a;
+	       {
+		    E Tw, Ty, Tv, Tx;
+		    Tw = rio[WS(ios, 2)];
+		    Ty = iio[-WS(ios, 6)];
+		    Tv = W[2];
+		    Tx = W[3];
+		    Tz = FMA(Tv, Tw, Tx * Ty);
+		    T19 = FNMS(Tx, Tw, Tv * Ty);
+	       }
+	       {
+		    E TB, TD, TA, TC;
+		    TB = rio[WS(ios, 5)];
+		    TD = iio[-WS(ios, 3)];
+		    TA = W[8];
+		    TC = W[9];
+		    TE = FMA(TA, TB, TC * TD);
+		    T14 = FNMS(TC, TB, TA * TD);
+	       }
+	       {
+		    E TG, TI, TF, TH;
+		    TG = rio[WS(ios, 8)];
+		    TI = iio[0];
+		    TF = W[14];
+		    TH = W[15];
+		    TJ = FMA(TF, TG, TH * TI);
+		    T15 = FNMS(TH, TG, TF * TI);
+	       }
+	       TK = TE + TJ;
+	       T1a = T14 + T15;
+	       TL = Tz + TK;
+	       T1x = T19 + T1a;
+	       {
+		    E T13, T16, T18, T1b;
+		    T13 = FNMS(KP500000000, TK, Tz);
+		    T16 = KP866025403 * (T14 - T15);
+		    T17 = T13 + T16;
+		    T1o = T13 - T16;
+		    T18 = KP866025403 * (TJ - TE);
+		    T1b = FNMS(KP500000000, T1a, T19);
+		    T1c = T18 + T1b;
+		    T1n = T1b - T18;
+	       }
+	  }
+	  {
+	       E Ti, TY, Tn, TT, Ts, TU, Tt, TZ;
+	       {
+		    E Tf, Th, Te, Tg;
+		    Tf = rio[WS(ios, 1)];
+		    Th = iio[-WS(ios, 7)];
+		    Te = W[0];
+		    Tg = W[1];
+		    Ti = FMA(Te, Tf, Tg * Th);
+		    TY = FNMS(Tg, Tf, Te * Th);
+	       }
+	       {
+		    E Tk, Tm, Tj, Tl;
+		    Tk = rio[WS(ios, 4)];
+		    Tm = iio[-WS(ios, 4)];
+		    Tj = W[6];
+		    Tl = W[7];
+		    Tn = FMA(Tj, Tk, Tl * Tm);
+		    TT = FNMS(Tl, Tk, Tj * Tm);
+	       }
+	       {
+		    E Tp, Tr, To, Tq;
+		    Tp = rio[WS(ios, 7)];
+		    Tr = iio[-WS(ios, 1)];
+		    To = W[12];
+		    Tq = W[13];
+		    Ts = FMA(To, Tp, Tq * Tr);
+		    TU = FNMS(Tq, Tp, To * Tr);
+	       }
+	       Tt = Tn + Ts;
+	       TZ = TT + TU;
+	       Tu = Ti + Tt;
+	       T1w = TY + TZ;
+	       {
+		    E TS, TV, TX, T10;
+		    TS = FNMS(KP500000000, Tt, Ti);
+		    TV = KP866025403 * (TT - TU);
+		    TW = TS + TV;
+		    T1k = TS - TV;
+		    TX = KP866025403 * (Ts - Tn);
+		    T10 = FNMS(KP500000000, TZ, TY);
+		    T11 = TX + T10;
+		    T1l = T10 - TX;
+	       }
+	  }
+	  {
+	       E T1y, Td, TM, T1v;
+	       T1y = KP866025403 * (T1w - T1x);
+	       Td = T1 + Tc;
+	       TM = Tu + TL;
+	       T1v = FNMS(KP500000000, TM, Td);
+	       rio[0] = Td + TM;
+	       rio[WS(ios, 3)] = T1v + T1y;
+	       iio[-WS(ios, 6)] = T1v - T1y;
+	  }
+	  {
+	       E T1D, T1z, T1C, T1E;
+	       T1D = KP866025403 * (TL - Tu);
+	       T1z = T1w + T1x;
+	       T1C = T1A + T1B;
+	       T1E = FNMS(KP500000000, T1z, T1C);
+	       iio[0] = T1z + T1C;
+	       iio[-WS(ios, 3)] = T1D + T1E;
+	       rio[WS(ios, 6)] = T1D - T1E;
+	  }
+	  {
+	       E TR, T1I, T1e, T1J, T1i, T1F, T1f, T1K;
+	       TR = TN + TQ;
+	       T1I = T1G + T1H;
+	       {
+		    E T12, T1d, T1g, T1h;
+		    T12 = FMA(KP766044443, TW, KP642787609 * T11);
+		    T1d = FMA(KP173648177, T17, KP984807753 * T1c);
+		    T1e = T12 + T1d;
+		    T1J = KP866025403 * (T1d - T12);
+		    T1g = FNMS(KP642787609, TW, KP766044443 * T11);
+		    T1h = FNMS(KP984807753, T17, KP173648177 * T1c);
+		    T1i = KP866025403 * (T1g - T1h);
+		    T1F = T1g + T1h;
+	       }
+	       rio[WS(ios, 1)] = TR + T1e;
+	       iio[-WS(ios, 1)] = T1F + T1I;
+	       T1f = FNMS(KP500000000, T1e, TR);
+	       iio[-WS(ios, 7)] = T1f - T1i;
+	       rio[WS(ios, 4)] = T1f + T1i;
+	       T1K = FNMS(KP500000000, T1F, T1I);
+	       rio[WS(ios, 7)] = T1J - T1K;
+	       iio[-WS(ios, 4)] = T1J + T1K;
+	  }
+	  {
+	       E T1j, T1M, T1q, T1O, T1u, T1L, T1r, T1N;
+	       T1j = TN - TQ;
+	       T1M = T1H - T1G;
+	       {
+		    E T1m, T1p, T1s, T1t;
+		    T1m = FMA(KP173648177, T1k, KP984807753 * T1l);
+		    T1p = FNMS(KP939692620, T1o, KP342020143 * T1n);
+		    T1q = T1m + T1p;
+		    T1O = KP866025403 * (T1p - T1m);
+		    T1s = FNMS(KP984807753, T1k, KP173648177 * T1l);
+		    T1t = FMA(KP342020143, T1o, KP939692620 * T1n);
+		    T1u = KP866025403 * (T1s + T1t);
+		    T1L = T1s - T1t;
+	       }
+	       rio[WS(ios, 2)] = T1j + T1q;
+	       iio[-WS(ios, 2)] = T1L + T1M;
+	       T1r = FNMS(KP500000000, T1q, T1j);
+	       iio[-WS(ios, 8)] = T1r - T1u;
+	       iio[-WS(ios, 5)] = T1r + T1u;
+	       T1N = FMS(KP500000000, T1L, T1M);
+	       rio[WS(ios, 5)] = T1N - T1O;
+	       rio[WS(ios, 8)] = T1O + T1N;
+	  }
+     }
+     return W;
+}
+
+static const tw_instr twinstr[] = {
+     {TW_FULL, 0, 9},
+     {TW_NEXT, 1, 0}
+};
+
+static const hc2hc_desc desc = { 9, "hf_9", twinstr, {60, 36, 36, 0}, &GENUS, 0, 0, 0 };
+
+void X(codelet_hf_9) (planner *p) {
+     X(khc2hc_dit_register) (p, hf_9, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/mr2hcII_32.c b/src/fftw3/rdft/codelets/r2hc/mr2hcII_32.c
new file mode 100644
index 0000000..38c2613
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/mr2hcII_32.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:43 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc_noinline -compact -variables 4 -n 32 -name mr2hcII_32 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 174 FP additions, 82 FP multiplications,
+ * (or, 138 additions, 46 multiplications, 36 fused multiply/add),
+ * 61 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mr2hcII_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hcII_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hcII_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void mr2hcII_32_0(const R *I, R *ro, R *io, stride is, stride ros, stride ios)
+{
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G;
+	  E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
+	  E T1O, T2a;
+	  {
+	       E T1, T2p, T4, T2o, T2, T3;
+	       T1 = I[0];
+	       T2p = I[WS(is, 16)];
+	       T2 = I[WS(is, 8)];
+	       T3 = I[WS(is, 24)];
+	       T4 = KP707106781 * (T2 - T3);
+	       T2o = KP707106781 * (T2 + T3);
+	       T5 = T1 + T4;
+	       T2D = T2p - T2o;
+	       T1z = T1 - T4;
+	       T2q = T2o + T2p;
+	  }
+	  {
+	       E T8, T1A, Tb, T1B;
+	       {
+		    E T6, T7, T9, Ta;
+		    T6 = I[WS(is, 4)];
+		    T7 = I[WS(is, 20)];
+		    T8 = FNMS(KP382683432, T7, KP923879532 * T6);
+		    T1A = FMA(KP382683432, T6, KP923879532 * T7);
+		    T9 = I[WS(is, 12)];
+		    Ta = I[WS(is, 28)];
+		    Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
+		    T1B = FMA(KP923879532, T9, KP382683432 * Ta);
+	       }
+	       Tc = T8 + Tb;
+	       T2C = Tb - T8;
+	       T1C = T1A - T1B;
+	       T2n = T1A + T1B;
+	  }
+	  {
+	       E Te, Tk, Th, Tj, Tf, Tg;
+	       Te = I[WS(is, 2)];
+	       Tk = I[WS(is, 18)];
+	       Tf = I[WS(is, 10)];
+	       Tg = I[WS(is, 26)];
+	       Th = KP707106781 * (Tf - Tg);
+	       Tj = KP707106781 * (Tf + Tg);
+	       {
+		    E Ti, Tl, T1H, T1I;
+		    Ti = Te + Th;
+		    Tl = Tj + Tk;
+		    Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
+		    T1k = FMA(KP195090322, Ti, KP980785280 * Tl);
+		    T1H = Tk - Tj;
+		    T1I = Te - Th;
+		    T1J = FNMS(KP555570233, T1I, KP831469612 * T1H);
+		    T26 = FMA(KP831469612, T1I, KP555570233 * T1H);
+	       }
+	  }
+	  {
+	       E Tq, Tt, Tp, Ts, Tn, To;
+	       Tq = I[WS(is, 30)];
+	       Tt = I[WS(is, 14)];
+	       Tn = I[WS(is, 6)];
+	       To = I[WS(is, 22)];
+	       Tp = KP707106781 * (Tn - To);
+	       Ts = KP707106781 * (Tn + To);
+	       {
+		    E Tr, Tu, T1E, T1F;
+		    Tr = Tp - Tq;
+		    Tu = Ts + Tt;
+		    Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
+		    T1l = FNMS(KP980785280, Tu, KP195090322 * Tr);
+		    T1E = Tt - Ts;
+		    T1F = Tp + Tq;
+		    T1G = FNMS(KP555570233, T1F, KP831469612 * T1E);
+		    T27 = FMA(KP831469612, T1F, KP555570233 * T1E);
+	       }
+	  }
+	  {
+	       E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
+	       TW = I[WS(is, 31)];
+	       T1a = I[WS(is, 15)];
+	       TT = I[WS(is, 7)];
+	       TU = I[WS(is, 23)];
+	       TV = KP707106781 * (TT - TU);
+	       T19 = KP707106781 * (TT + TU);
+	       {
+		    E TY, TZ, T11, T12;
+		    TY = I[WS(is, 3)];
+		    TZ = I[WS(is, 19)];
+		    T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
+		    T16 = FMA(KP382683432, TY, KP923879532 * TZ);
+		    T11 = I[WS(is, 11)];
+		    T12 = I[WS(is, 27)];
+		    T13 = FNMS(KP923879532, T12, KP382683432 * T11);
+		    T17 = FMA(KP923879532, T11, KP382683432 * T12);
+	       }
+	       {
+		    E TX, T14, T1W, T1X;
+		    TX = TV - TW;
+		    T14 = T10 + T13;
+		    T15 = TX + T14;
+		    T1r = TX - T14;
+		    T1W = T13 - T10;
+		    T1X = T1a - T19;
+		    T1Y = T1W - T1X;
+		    T2e = T1W + T1X;
+	       }
+	       {
+		    E T18, T1b, T1T, T1U;
+		    T18 = T16 + T17;
+		    T1b = T19 + T1a;
+		    T1c = T18 + T1b;
+		    T1s = T1b - T18;
+		    T1T = TV + TW;
+		    T1U = T16 - T17;
+		    T1V = T1T + T1U;
+		    T2d = T1U - T1T;
+	       }
+	  }
+	  {
+	       E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
+	       Ty = I[WS(is, 1)];
+	       TP = I[WS(is, 17)];
+	       Tz = I[WS(is, 9)];
+	       TA = I[WS(is, 25)];
+	       TB = KP707106781 * (Tz - TA);
+	       TO = KP707106781 * (Tz + TA);
+	       {
+		    E TD, TE, TG, TH;
+		    TD = I[WS(is, 5)];
+		    TE = I[WS(is, 21)];
+		    TF = FNMS(KP382683432, TE, KP923879532 * TD);
+		    TL = FMA(KP382683432, TD, KP923879532 * TE);
+		    TG = I[WS(is, 13)];
+		    TH = I[WS(is, 29)];
+		    TI = FNMS(KP923879532, TH, KP382683432 * TG);
+		    TM = FMA(KP923879532, TG, KP382683432 * TH);
+	       }
+	       {
+		    E TC, TJ, T1P, T1Q;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    T1o = TC - TJ;
+		    T1P = TI - TF;
+		    T1Q = TP - TO;
+		    T1R = T1P - T1Q;
+		    T2b = T1P + T1Q;
+	       }
+	       {
+		    E TN, TQ, T1M, T1N;
+		    TN = TL + TM;
+		    TQ = TO + TP;
+		    TR = TN + TQ;
+		    T1p = TQ - TN;
+		    T1M = Ty - TB;
+		    T1N = TL - TM;
+		    T1O = T1M - T1N;
+		    T2a = T1M + T1N;
+	       }
+	  }
+	  {
+	       E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t;
+	       {
+		    E Td, Tw, T2m, T2r;
+		    Td = T5 + Tc;
+		    Tw = Tm + Tv;
+		    Tx = Td - Tw;
+		    T1f = Td + Tw;
+		    T2m = T1l - T1k;
+		    T2r = T2n + T2q;
+		    T2s = T2m - T2r;
+		    T2u = T2m + T2r;
+	       }
+	       {
+		    E TS, T1d, T1g, T1h;
+		    TS = FMA(KP098017140, TK, KP995184726 * TR);
+		    T1d = FNMS(KP995184726, T1c, KP098017140 * T15);
+		    T1e = TS + T1d;
+		    T2l = T1d - TS;
+		    T1g = FNMS(KP098017140, TR, KP995184726 * TK);
+		    T1h = FMA(KP995184726, T15, KP098017140 * T1c);
+		    T1i = T1g + T1h;
+		    T2t = T1h - T1g;
+	       }
+	       ro[WS(ros, 8)] = Tx - T1e;
+	       io[WS(ios, 8)] = T2t - T2u;
+	       ro[WS(ros, 7)] = Tx + T1e;
+	       io[WS(ios, 7)] = T2t + T2u;
+	       ro[WS(ros, 15)] = T1f - T1i;
+	       io[WS(ios, 15)] = T2l - T2s;
+	       ro[0] = T1f + T1i;
+	       io[0] = T2l + T2s;
+	  }
+	  {
+	       E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N;
+	       {
+		    E T25, T28, T2K, T2L;
+		    T25 = T1z + T1C;
+		    T28 = T26 - T27;
+		    T29 = T25 + T28;
+		    T2h = T25 - T28;
+		    T2K = T1J + T1G;
+		    T2L = T2C + T2D;
+		    T2M = T2K - T2L;
+		    T2O = T2K + T2L;
+	       }
+	       {
+		    E T2c, T2f, T2i, T2j;
+		    T2c = FMA(KP956940335, T2a, KP290284677 * T2b);
+		    T2f = FNMS(KP290284677, T2e, KP956940335 * T2d);
+		    T2g = T2c + T2f;
+		    T2J = T2f - T2c;
+		    T2i = FMA(KP290284677, T2d, KP956940335 * T2e);
+		    T2j = FNMS(KP290284677, T2a, KP956940335 * T2b);
+		    T2k = T2i - T2j;
+		    T2N = T2j + T2i;
+	       }
+	       ro[WS(ros, 14)] = T29 - T2g;
+	       io[WS(ios, 14)] = T2N - T2O;
+	       ro[WS(ros, 1)] = T29 + T2g;
+	       io[WS(ios, 1)] = T2N + T2O;
+	       ro[WS(ros, 9)] = T2h - T2k;
+	       io[WS(ios, 9)] = T2J - T2M;
+	       ro[WS(ros, 6)] = T2h + T2k;
+	       io[WS(ios, 6)] = T2J + T2M;
+	  }
+	  {
+	       E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z;
+	       {
+		    E T1j, T1m, T2w, T2x;
+		    T1j = T5 - Tc;
+		    T1m = T1k + T1l;
+		    T1n = T1j + T1m;
+		    T1v = T1j - T1m;
+		    T2w = Tv - Tm;
+		    T2x = T2q - T2n;
+		    T2y = T2w - T2x;
+		    T2A = T2w + T2x;
+	       }
+	       {
+		    E T1q, T1t, T1w, T1x;
+		    T1q = FMA(KP773010453, T1o, KP634393284 * T1p);
+		    T1t = FNMS(KP634393284, T1s, KP773010453 * T1r);
+		    T1u = T1q + T1t;
+		    T2v = T1t - T1q;
+		    T1w = FMA(KP634393284, T1r, KP773010453 * T1s);
+		    T1x = FNMS(KP634393284, T1o, KP773010453 * T1p);
+		    T1y = T1w - T1x;
+		    T2z = T1x + T1w;
+	       }
+	       ro[WS(ros, 12)] = T1n - T1u;
+	       io[WS(ios, 12)] = T2z - T2A;
+	       ro[WS(ros, 3)] = T1n + T1u;
+	       io[WS(ios, 3)] = T2z + T2A;
+	       ro[WS(ros, 11)] = T1v - T1y;
+	       io[WS(ios, 11)] = T2v - T2y;
+	       ro[WS(ros, 4)] = T1v + T1y;
+	       io[WS(ios, 4)] = T2v + T2y;
+	  }
+	  {
+	       E T1L, T21, T2G, T2I, T20, T2H, T24, T2B;
+	       {
+		    E T1D, T1K, T2E, T2F;
+		    T1D = T1z - T1C;
+		    T1K = T1G - T1J;
+		    T1L = T1D + T1K;
+		    T21 = T1D - T1K;
+		    T2E = T2C - T2D;
+		    T2F = T26 + T27;
+		    T2G = T2E - T2F;
+		    T2I = T2F + T2E;
+	       }
+	       {
+		    E T1S, T1Z, T22, T23;
+		    T1S = FMA(KP881921264, T1O, KP471396736 * T1R);
+		    T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y);
+		    T20 = T1S - T1Z;
+		    T2H = T1S + T1Z;
+		    T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y);
+		    T23 = FNMS(KP471396736, T1O, KP881921264 * T1R);
+		    T24 = T22 - T23;
+		    T2B = T23 + T22;
+	       }
+	       ro[WS(ros, 13)] = T1L - T20;
+	       io[WS(ios, 13)] = T2B - T2G;
+	       ro[WS(ros, 2)] = T1L + T20;
+	       io[WS(ios, 2)] = T2B + T2G;
+	       ro[WS(ros, 10)] = T21 - T24;
+	       io[WS(ios, 10)] = T2I - T2H;
+	       ro[WS(ros, 5)] = T21 + T24;
+	       io[WS(ios, 5)] = -(T2H + T2I);
+	  }
+     }
+}
+
+static void mr2hcII_32(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mr2hcII_32_0(I, ro, io, is, ros, ios);
+	  I += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kr2hc_desc desc = { 32, "mr2hcII_32", {138, 46, 36, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mr2hcII_32) (planner *p) {
+     X(kr2hcII_register) (p, mr2hcII_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/mr2hcII_64.c b/src/fftw3/rdft/codelets/r2hc/mr2hcII_64.c
new file mode 100644
index 0000000..4752322
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/mr2hcII_64.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:59:51 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc_noinline -compact -variables 4 -n 64 -name mr2hcII_64 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 434 FP additions, 206 FP multiplications,
+ * (or, 342 additions, 114 multiplications, 92 fused multiply/add),
+ * 117 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mr2hcII_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hcII_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hcII_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void mr2hcII_64_0(const R *I, R *ro, R *io, stride is, stride ros, stride ios)
+{
+     DK(KP242980179, +0.242980179903263889948274162077471118320990783);
+     DK(KP970031253, +0.970031253194543992603984207286100251456865962);
+     DK(KP857728610, +0.857728610000272069902269984284770137042490799);
+     DK(KP514102744, +0.514102744193221726593693838968815772608049120);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP427555093, +0.427555093430282094320966856888798534304578629);
+     DK(KP903989293, +0.903989293123443331586200297230537048710132025);
+     DK(KP336889853, +0.336889853392220050689253212619147570477766780);
+     DK(KP941544065, +0.941544065183020778412509402599502357185589796);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP595699304, +0.595699304492433343467036528829969889511926338);
+     DK(KP803207531, +0.803207531480644909806676512963141923879569427);
+     DK(KP146730474, +0.146730474455361751658850129646717819706215317);
+     DK(KP989176509, +0.989176509964780973451673738016243063983689533);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP049067674, +0.049067674327418014254954976942682658314745363);
+     DK(KP998795456, +0.998795456205172392714771604759100694443203615);
+     DK(KP671558954, +0.671558954847018400625376850427421803228750632);
+     DK(KP740951125, +0.740951125354959091175616897495162729728955309);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E Tm, T34, T3Z, T5g, Tv, T35, T3W, T5h, Td, T33, T6B, T6Q, T3T, T5f, T68;
+	  E T6m, T2b, T3n, T4O, T5D, T2F, T3r, T4K, T5z, TK, T3c, T47, T5n, TR, T3b;
+	  E T44, T5o, T15, T38, T4e, T5l, T1c, T39, T4b, T5k, T1s, T3g, T4v, T5w, T1W;
+	  E T3k, T4k, T5s, T2u, T3q, T4R, T5A, T2y, T3o, T4H, T5C, T1L, T3j, T4y, T5t;
+	  E T1P, T3h, T4r, T5v;
+	  {
+	       E Te, Tk, Th, Tj, Tf, Tg;
+	       Te = I[WS(is, 4)];
+	       Tk = I[WS(is, 36)];
+	       Tf = I[WS(is, 20)];
+	       Tg = I[WS(is, 52)];
+	       Th = KP707106781 * (Tf - Tg);
+	       Tj = KP707106781 * (Tf + Tg);
+	       {
+		    E Ti, Tl, T3X, T3Y;
+		    Ti = Te + Th;
+		    Tl = Tj + Tk;
+		    Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
+		    T34 = FMA(KP195090322, Ti, KP980785280 * Tl);
+		    T3X = Tk - Tj;
+		    T3Y = Te - Th;
+		    T3Z = FNMS(KP555570233, T3Y, KP831469612 * T3X);
+		    T5g = FMA(KP831469612, T3Y, KP555570233 * T3X);
+	       }
+	  }
+	  {
+	       E Tq, Tt, Tp, Ts, Tn, To;
+	       Tq = I[WS(is, 60)];
+	       Tt = I[WS(is, 28)];
+	       Tn = I[WS(is, 12)];
+	       To = I[WS(is, 44)];
+	       Tp = KP707106781 * (Tn - To);
+	       Ts = KP707106781 * (Tn + To);
+	       {
+		    E Tr, Tu, T3U, T3V;
+		    Tr = Tp - Tq;
+		    Tu = Ts + Tt;
+		    Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
+		    T35 = FNMS(KP980785280, Tu, KP195090322 * Tr);
+		    T3U = Tt - Ts;
+		    T3V = Tp + Tq;
+		    T3W = FNMS(KP555570233, T3V, KP831469612 * T3U);
+		    T5h = FMA(KP831469612, T3V, KP555570233 * T3U);
+	       }
+	  }
+	  {
+	       E T1, T66, T4, T65, T8, T3Q, Tb, T3R, T2, T3;
+	       T1 = I[0];
+	       T66 = I[WS(is, 32)];
+	       T2 = I[WS(is, 16)];
+	       T3 = I[WS(is, 48)];
+	       T4 = KP707106781 * (T2 - T3);
+	       T65 = KP707106781 * (T2 + T3);
+	       {
+		    E T6, T7, T9, Ta;
+		    T6 = I[WS(is, 8)];
+		    T7 = I[WS(is, 40)];
+		    T8 = FNMS(KP382683432, T7, KP923879532 * T6);
+		    T3Q = FMA(KP382683432, T6, KP923879532 * T7);
+		    T9 = I[WS(is, 24)];
+		    Ta = I[WS(is, 56)];
+		    Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
+		    T3R = FMA(KP923879532, T9, KP382683432 * Ta);
+	       }
+	       {
+		    E T5, Tc, T6z, T6A;
+		    T5 = T1 + T4;
+		    Tc = T8 + Tb;
+		    Td = T5 + Tc;
+		    T33 = T5 - Tc;
+		    T6z = Tb - T8;
+		    T6A = T66 - T65;
+		    T6B = T6z - T6A;
+		    T6Q = T6z + T6A;
+	       }
+	       {
+		    E T3P, T3S, T64, T67;
+		    T3P = T1 - T4;
+		    T3S = T3Q - T3R;
+		    T3T = T3P - T3S;
+		    T5f = T3P + T3S;
+		    T64 = T3Q + T3R;
+		    T67 = T65 + T66;
+		    T68 = T64 + T67;
+		    T6m = T67 - T64;
+	       }
+	  }
+	  {
+	       E T22, T2D, T21, T2C, T26, T2z, T29, T2A, T1Z, T20;
+	       T22 = I[WS(is, 63)];
+	       T2D = I[WS(is, 31)];
+	       T1Z = I[WS(is, 15)];
+	       T20 = I[WS(is, 47)];
+	       T21 = KP707106781 * (T1Z - T20);
+	       T2C = KP707106781 * (T1Z + T20);
+	       {
+		    E T24, T25, T27, T28;
+		    T24 = I[WS(is, 7)];
+		    T25 = I[WS(is, 39)];
+		    T26 = FNMS(KP382683432, T25, KP923879532 * T24);
+		    T2z = FMA(KP382683432, T24, KP923879532 * T25);
+		    T27 = I[WS(is, 23)];
+		    T28 = I[WS(is, 55)];
+		    T29 = FNMS(KP923879532, T28, KP382683432 * T27);
+		    T2A = FMA(KP923879532, T27, KP382683432 * T28);
+	       }
+	       {
+		    E T23, T2a, T4M, T4N;
+		    T23 = T21 - T22;
+		    T2a = T26 + T29;
+		    T2b = T23 + T2a;
+		    T3n = T23 - T2a;
+		    T4M = T29 - T26;
+		    T4N = T2D - T2C;
+		    T4O = T4M - T4N;
+		    T5D = T4M + T4N;
+	       }
+	       {
+		    E T2B, T2E, T4I, T4J;
+		    T2B = T2z + T2A;
+		    T2E = T2C + T2D;
+		    T2F = T2B + T2E;
+		    T3r = T2E - T2B;
+		    T4I = T21 + T22;
+		    T4J = T2z - T2A;
+		    T4K = T4I + T4J;
+		    T5z = T4J - T4I;
+	       }
+	  }
+	  {
+	       E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
+	       Ty = I[WS(is, 2)];
+	       TP = I[WS(is, 34)];
+	       Tz = I[WS(is, 18)];
+	       TA = I[WS(is, 50)];
+	       TB = KP707106781 * (Tz - TA);
+	       TO = KP707106781 * (Tz + TA);
+	       {
+		    E TD, TE, TG, TH;
+		    TD = I[WS(is, 10)];
+		    TE = I[WS(is, 42)];
+		    TF = FNMS(KP382683432, TE, KP923879532 * TD);
+		    TL = FMA(KP382683432, TD, KP923879532 * TE);
+		    TG = I[WS(is, 26)];
+		    TH = I[WS(is, 58)];
+		    TI = FNMS(KP923879532, TH, KP382683432 * TG);
+		    TM = FMA(KP923879532, TG, KP382683432 * TH);
+	       }
+	       {
+		    E TC, TJ, T45, T46;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    T3c = TC - TJ;
+		    T45 = TI - TF;
+		    T46 = TP - TO;
+		    T47 = T45 - T46;
+		    T5n = T45 + T46;
+	       }
+	       {
+		    E TN, TQ, T42, T43;
+		    TN = TL + TM;
+		    TQ = TO + TP;
+		    TR = TN + TQ;
+		    T3b = TQ - TN;
+		    T42 = Ty - TB;
+		    T43 = TL - TM;
+		    T44 = T42 - T43;
+		    T5o = T42 + T43;
+	       }
+	  }
+	  {
+	       E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
+	       TW = I[WS(is, 62)];
+	       T1a = I[WS(is, 30)];
+	       TT = I[WS(is, 14)];
+	       TU = I[WS(is, 46)];
+	       TV = KP707106781 * (TT - TU);
+	       T19 = KP707106781 * (TT + TU);
+	       {
+		    E TY, TZ, T11, T12;
+		    TY = I[WS(is, 6)];
+		    TZ = I[WS(is, 38)];
+		    T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
+		    T16 = FMA(KP382683432, TY, KP923879532 * TZ);
+		    T11 = I[WS(is, 22)];
+		    T12 = I[WS(is, 54)];
+		    T13 = FNMS(KP923879532, T12, KP382683432 * T11);
+		    T17 = FMA(KP923879532, T11, KP382683432 * T12);
+	       }
+	       {
+		    E TX, T14, T4c, T4d;
+		    TX = TV - TW;
+		    T14 = T10 + T13;
+		    T15 = TX + T14;
+		    T38 = TX - T14;
+		    T4c = T13 - T10;
+		    T4d = T1a - T19;
+		    T4e = T4c - T4d;
+		    T5l = T4c + T4d;
+	       }
+	       {
+		    E T18, T1b, T49, T4a;
+		    T18 = T16 + T17;
+		    T1b = T19 + T1a;
+		    T1c = T18 + T1b;
+		    T39 = T1b - T18;
+		    T49 = TV + TW;
+		    T4a = T16 - T17;
+		    T4b = T49 + T4a;
+		    T5k = T4a - T49;
+	       }
+	  }
+	  {
+	       E T1g, T1U, T1j, T1T, T1n, T1Q, T1q, T1R, T1h, T1i;
+	       T1g = I[WS(is, 1)];
+	       T1U = I[WS(is, 33)];
+	       T1h = I[WS(is, 17)];
+	       T1i = I[WS(is, 49)];
+	       T1j = KP707106781 * (T1h - T1i);
+	       T1T = KP707106781 * (T1h + T1i);
+	       {
+		    E T1l, T1m, T1o, T1p;
+		    T1l = I[WS(is, 9)];
+		    T1m = I[WS(is, 41)];
+		    T1n = FNMS(KP382683432, T1m, KP923879532 * T1l);
+		    T1Q = FMA(KP382683432, T1l, KP923879532 * T1m);
+		    T1o = I[WS(is, 25)];
+		    T1p = I[WS(is, 57)];
+		    T1q = FNMS(KP923879532, T1p, KP382683432 * T1o);
+		    T1R = FMA(KP923879532, T1o, KP382683432 * T1p);
+	       }
+	       {
+		    E T1k, T1r, T4t, T4u;
+		    T1k = T1g + T1j;
+		    T1r = T1n + T1q;
+		    T1s = T1k + T1r;
+		    T3g = T1k - T1r;
+		    T4t = T1q - T1n;
+		    T4u = T1U - T1T;
+		    T4v = T4t - T4u;
+		    T5w = T4t + T4u;
+	       }
+	       {
+		    E T1S, T1V, T4i, T4j;
+		    T1S = T1Q + T1R;
+		    T1V = T1T + T1U;
+		    T1W = T1S + T1V;
+		    T3k = T1V - T1S;
+		    T4i = T1g - T1j;
+		    T4j = T1Q - T1R;
+		    T4k = T4i - T4j;
+		    T5s = T4i + T4j;
+	       }
+	  }
+	  {
+	       E T2g, T4F, T2j, T4E, T2p, T4C, T2s, T4B;
+	       {
+		    E T2c, T2i, T2f, T2h, T2d, T2e;
+		    T2c = I[WS(is, 3)];
+		    T2i = I[WS(is, 35)];
+		    T2d = I[WS(is, 19)];
+		    T2e = I[WS(is, 51)];
+		    T2f = KP707106781 * (T2d - T2e);
+		    T2h = KP707106781 * (T2d + T2e);
+		    T2g = T2c + T2f;
+		    T4F = T2c - T2f;
+		    T2j = T2h + T2i;
+		    T4E = T2i - T2h;
+	       }
+	       {
+		    E T2o, T2r, T2n, T2q, T2l, T2m;
+		    T2o = I[WS(is, 59)];
+		    T2r = I[WS(is, 27)];
+		    T2l = I[WS(is, 11)];
+		    T2m = I[WS(is, 43)];
+		    T2n = KP707106781 * (T2l - T2m);
+		    T2q = KP707106781 * (T2l + T2m);
+		    T2p = T2n - T2o;
+		    T4C = T2n + T2o;
+		    T2s = T2q + T2r;
+		    T4B = T2r - T2q;
+	       }
+	       {
+		    E T2k, T2t, T4P, T4Q;
+		    T2k = FNMS(KP195090322, T2j, KP980785280 * T2g);
+		    T2t = FMA(KP980785280, T2p, KP195090322 * T2s);
+		    T2u = T2k + T2t;
+		    T3q = T2t - T2k;
+		    T4P = FMA(KP831469612, T4F, KP555570233 * T4E);
+		    T4Q = FMA(KP831469612, T4C, KP555570233 * T4B);
+		    T4R = T4P + T4Q;
+		    T5A = T4P - T4Q;
+	       }
+	       {
+		    E T2w, T2x, T4D, T4G;
+		    T2w = FNMS(KP980785280, T2s, KP195090322 * T2p);
+		    T2x = FMA(KP195090322, T2g, KP980785280 * T2j);
+		    T2y = T2w - T2x;
+		    T3o = T2x + T2w;
+		    T4D = FNMS(KP555570233, T4C, KP831469612 * T4B);
+		    T4G = FNMS(KP555570233, T4F, KP831469612 * T4E);
+		    T4H = T4D - T4G;
+		    T5C = T4G + T4D;
+	       }
+	  }
+	  {
+	       E T1x, T4p, T1A, T4o, T1G, T4m, T1J, T4l;
+	       {
+		    E T1t, T1z, T1w, T1y, T1u, T1v;
+		    T1t = I[WS(is, 5)];
+		    T1z = I[WS(is, 37)];
+		    T1u = I[WS(is, 21)];
+		    T1v = I[WS(is, 53)];
+		    T1w = KP707106781 * (T1u - T1v);
+		    T1y = KP707106781 * (T1u + T1v);
+		    T1x = T1t + T1w;
+		    T4p = T1t - T1w;
+		    T1A = T1y + T1z;
+		    T4o = T1z - T1y;
+	       }
+	       {
+		    E T1F, T1I, T1E, T1H, T1C, T1D;
+		    T1F = I[WS(is, 61)];
+		    T1I = I[WS(is, 29)];
+		    T1C = I[WS(is, 13)];
+		    T1D = I[WS(is, 45)];
+		    T1E = KP707106781 * (T1C - T1D);
+		    T1H = KP707106781 * (T1C + T1D);
+		    T1G = T1E - T1F;
+		    T4m = T1E + T1F;
+		    T1J = T1H + T1I;
+		    T4l = T1I - T1H;
+	       }
+	       {
+		    E T1B, T1K, T4w, T4x;
+		    T1B = FNMS(KP195090322, T1A, KP980785280 * T1x);
+		    T1K = FMA(KP980785280, T1G, KP195090322 * T1J);
+		    T1L = T1B + T1K;
+		    T3j = T1K - T1B;
+		    T4w = FMA(KP831469612, T4p, KP555570233 * T4o);
+		    T4x = FMA(KP831469612, T4m, KP555570233 * T4l);
+		    T4y = T4w + T4x;
+		    T5t = T4w - T4x;
+	       }
+	       {
+		    E T1N, T1O, T4n, T4q;
+		    T1N = FNMS(KP980785280, T1J, KP195090322 * T1G);
+		    T1O = FMA(KP195090322, T1x, KP980785280 * T1A);
+		    T1P = T1N - T1O;
+		    T3h = T1O + T1N;
+		    T4n = FNMS(KP555570233, T4m, KP831469612 * T4l);
+		    T4q = FNMS(KP555570233, T4p, KP831469612 * T4o);
+		    T4r = T4n - T4q;
+		    T5v = T4q + T4n;
+	       }
+	  }
+	  {
+	       E Tx, T2N, T69, T6f, T1e, T6e, T2X, T30, T1Y, T2L, T2Q, T62, T2U, T31, T2H;
+	       E T2K, Tw, T63;
+	       Tw = Tm + Tv;
+	       Tx = Td + Tw;
+	       T2N = Td - Tw;
+	       T63 = T35 - T34;
+	       T69 = T63 - T68;
+	       T6f = T63 + T68;
+	       {
+		    E TS, T1d, T2V, T2W;
+		    TS = FNMS(KP098017140, TR, KP995184726 * TK);
+		    T1d = FMA(KP995184726, T15, KP098017140 * T1c);
+		    T1e = TS + T1d;
+		    T6e = T1d - TS;
+		    T2V = T2b - T2u;
+		    T2W = T2y + T2F;
+		    T2X = FNMS(KP671558954, T2W, KP740951125 * T2V);
+		    T30 = FMA(KP671558954, T2V, KP740951125 * T2W);
+	       }
+	       {
+		    E T1M, T1X, T2O, T2P;
+		    T1M = T1s + T1L;
+		    T1X = T1P - T1W;
+		    T1Y = FMA(KP998795456, T1M, KP049067674 * T1X);
+		    T2L = FNMS(KP049067674, T1M, KP998795456 * T1X);
+		    T2O = FMA(KP098017140, TK, KP995184726 * TR);
+		    T2P = FNMS(KP995184726, T1c, KP098017140 * T15);
+		    T2Q = T2O + T2P;
+		    T62 = T2P - T2O;
+	       }
+	       {
+		    E T2S, T2T, T2v, T2G;
+		    T2S = T1s - T1L;
+		    T2T = T1P + T1W;
+		    T2U = FMA(KP740951125, T2S, KP671558954 * T2T);
+		    T31 = FNMS(KP671558954, T2S, KP740951125 * T2T);
+		    T2v = T2b + T2u;
+		    T2G = T2y - T2F;
+		    T2H = FNMS(KP049067674, T2G, KP998795456 * T2v);
+		    T2K = FMA(KP049067674, T2v, KP998795456 * T2G);
+	       }
+	       {
+		    E T1f, T2I, T6b, T6c;
+		    T1f = Tx + T1e;
+		    T2I = T1Y + T2H;
+		    ro[WS(ros, 31)] = T1f - T2I;
+		    ro[0] = T1f + T2I;
+		    T6b = T2L + T2K;
+		    T6c = T62 + T69;
+		    io[WS(ios, 31)] = T6b - T6c;
+		    io[0] = T6b + T6c;
+	       }
+	       {
+		    E T2J, T2M, T61, T6a;
+		    T2J = Tx - T1e;
+		    T2M = T2K - T2L;
+		    ro[WS(ros, 16)] = T2J - T2M;
+		    ro[WS(ros, 15)] = T2J + T2M;
+		    T61 = T2H - T1Y;
+		    T6a = T62 - T69;
+		    io[WS(ios, 16)] = T61 - T6a;
+		    io[WS(ios, 15)] = T61 + T6a;
+	       }
+	       {
+		    E T2R, T2Y, T6h, T6i;
+		    T2R = T2N + T2Q;
+		    T2Y = T2U + T2X;
+		    ro[WS(ros, 24)] = T2R - T2Y;
+		    ro[WS(ros, 7)] = T2R + T2Y;
+		    T6h = T31 + T30;
+		    T6i = T6e + T6f;
+		    io[WS(ios, 24)] = T6h - T6i;
+		    io[WS(ios, 7)] = T6h + T6i;
+	       }
+	       {
+		    E T2Z, T32, T6d, T6g;
+		    T2Z = T2N - T2Q;
+		    T32 = T30 - T31;
+		    ro[WS(ros, 23)] = T2Z - T32;
+		    ro[WS(ros, 8)] = T2Z + T32;
+		    T6d = T2X - T2U;
+		    T6g = T6e - T6f;
+		    io[WS(ios, 23)] = T6d - T6g;
+		    io[WS(ios, 8)] = T6d + T6g;
+	       }
+	  }
+	  {
+	       E T5j, T5L, T6R, T6X, T5q, T6W, T5V, T5Y, T5y, T5J, T5O, T6O, T5S, T5Z, T5F;
+	       E T5I, T5i, T6P;
+	       T5i = T5g - T5h;
+	       T5j = T5f - T5i;
+	       T5L = T5f + T5i;
+	       T6P = T3Z + T3W;
+	       T6R = T6P - T6Q;
+	       T6X = T6P + T6Q;
+	       {
+		    E T5m, T5p, T5T, T5U;
+		    T5m = FMA(KP290284677, T5k, KP956940335 * T5l);
+		    T5p = FNMS(KP290284677, T5o, KP956940335 * T5n);
+		    T5q = T5m - T5p;
+		    T6W = T5p + T5m;
+		    T5T = T5z + T5A;
+		    T5U = T5C + T5D;
+		    T5V = FNMS(KP146730474, T5U, KP989176509 * T5T);
+		    T5Y = FMA(KP146730474, T5T, KP989176509 * T5U);
+	       }
+	       {
+		    E T5u, T5x, T5M, T5N;
+		    T5u = T5s - T5t;
+		    T5x = T5v - T5w;
+		    T5y = FMA(KP803207531, T5u, KP595699304 * T5x);
+		    T5J = FNMS(KP595699304, T5u, KP803207531 * T5x);
+		    T5M = FMA(KP956940335, T5o, KP290284677 * T5n);
+		    T5N = FNMS(KP290284677, T5l, KP956940335 * T5k);
+		    T5O = T5M + T5N;
+		    T6O = T5N - T5M;
+	       }
+	       {
+		    E T5Q, T5R, T5B, T5E;
+		    T5Q = T5s + T5t;
+		    T5R = T5v + T5w;
+		    T5S = FMA(KP989176509, T5Q, KP146730474 * T5R);
+		    T5Z = FNMS(KP146730474, T5Q, KP989176509 * T5R);
+		    T5B = T5z - T5A;
+		    T5E = T5C - T5D;
+		    T5F = FNMS(KP595699304, T5E, KP803207531 * T5B);
+		    T5I = FMA(KP595699304, T5B, KP803207531 * T5E);
+	       }
+	       {
+		    E T5r, T5G, T6T, T6U;
+		    T5r = T5j + T5q;
+		    T5G = T5y + T5F;
+		    ro[WS(ros, 25)] = T5r - T5G;
+		    ro[WS(ros, 6)] = T5r + T5G;
+		    T6T = T5J + T5I;
+		    T6U = T6O + T6R;
+		    io[WS(ios, 25)] = T6T - T6U;
+		    io[WS(ios, 6)] = T6T + T6U;
+	       }
+	       {
+		    E T5H, T5K, T6N, T6S;
+		    T5H = T5j - T5q;
+		    T5K = T5I - T5J;
+		    ro[WS(ros, 22)] = T5H - T5K;
+		    ro[WS(ros, 9)] = T5H + T5K;
+		    T6N = T5F - T5y;
+		    T6S = T6O - T6R;
+		    io[WS(ios, 22)] = T6N - T6S;
+		    io[WS(ios, 9)] = T6N + T6S;
+	       }
+	       {
+		    E T5P, T5W, T6Z, T70;
+		    T5P = T5L + T5O;
+		    T5W = T5S + T5V;
+		    ro[WS(ros, 30)] = T5P - T5W;
+		    ro[WS(ros, 1)] = T5P + T5W;
+		    T6Z = T5Z + T5Y;
+		    T70 = T6W + T6X;
+		    io[WS(ios, 30)] = T6Z - T70;
+		    io[WS(ios, 1)] = T6Z + T70;
+	       }
+	       {
+		    E T5X, T60, T6V, T6Y;
+		    T5X = T5L - T5O;
+		    T60 = T5Y - T5Z;
+		    ro[WS(ros, 17)] = T5X - T60;
+		    ro[WS(ros, 14)] = T5X + T60;
+		    T6V = T5V - T5S;
+		    T6Y = T6W - T6X;
+		    io[WS(ios, 17)] = T6V - T6Y;
+		    io[WS(ios, 14)] = T6V + T6Y;
+	       }
+	  }
+	  {
+	       E T37, T3z, T6n, T6t, T3e, T6s, T3J, T3M, T3m, T3x, T3C, T6k, T3G, T3N, T3t;
+	       E T3w, T36, T6l;
+	       T36 = T34 + T35;
+	       T37 = T33 - T36;
+	       T3z = T33 + T36;
+	       T6l = Tv - Tm;
+	       T6n = T6l - T6m;
+	       T6t = T6l + T6m;
+	       {
+		    E T3a, T3d, T3H, T3I;
+		    T3a = FMA(KP634393284, T38, KP773010453 * T39);
+		    T3d = FNMS(KP634393284, T3c, KP773010453 * T3b);
+		    T3e = T3a - T3d;
+		    T6s = T3d + T3a;
+		    T3H = T3n + T3o;
+		    T3I = T3q + T3r;
+		    T3J = FNMS(KP336889853, T3I, KP941544065 * T3H);
+		    T3M = FMA(KP336889853, T3H, KP941544065 * T3I);
+	       }
+	       {
+		    E T3i, T3l, T3A, T3B;
+		    T3i = T3g - T3h;
+		    T3l = T3j - T3k;
+		    T3m = FMA(KP903989293, T3i, KP427555093 * T3l);
+		    T3x = FNMS(KP427555093, T3i, KP903989293 * T3l);
+		    T3A = FMA(KP773010453, T3c, KP634393284 * T3b);
+		    T3B = FNMS(KP634393284, T39, KP773010453 * T38);
+		    T3C = T3A + T3B;
+		    T6k = T3B - T3A;
+	       }
+	       {
+		    E T3E, T3F, T3p, T3s;
+		    T3E = T3g + T3h;
+		    T3F = T3j + T3k;
+		    T3G = FMA(KP941544065, T3E, KP336889853 * T3F);
+		    T3N = FNMS(KP336889853, T3E, KP941544065 * T3F);
+		    T3p = T3n - T3o;
+		    T3s = T3q - T3r;
+		    T3t = FNMS(KP427555093, T3s, KP903989293 * T3p);
+		    T3w = FMA(KP427555093, T3p, KP903989293 * T3s);
+	       }
+	       {
+		    E T3f, T3u, T6p, T6q;
+		    T3f = T37 + T3e;
+		    T3u = T3m + T3t;
+		    ro[WS(ros, 27)] = T3f - T3u;
+		    ro[WS(ros, 4)] = T3f + T3u;
+		    T6p = T3x + T3w;
+		    T6q = T6k + T6n;
+		    io[WS(ios, 27)] = T6p - T6q;
+		    io[WS(ios, 4)] = T6p + T6q;
+	       }
+	       {
+		    E T3v, T3y, T6j, T6o;
+		    T3v = T37 - T3e;
+		    T3y = T3w - T3x;
+		    ro[WS(ros, 20)] = T3v - T3y;
+		    ro[WS(ros, 11)] = T3v + T3y;
+		    T6j = T3t - T3m;
+		    T6o = T6k - T6n;
+		    io[WS(ios, 20)] = T6j - T6o;
+		    io[WS(ios, 11)] = T6j + T6o;
+	       }
+	       {
+		    E T3D, T3K, T6v, T6w;
+		    T3D = T3z + T3C;
+		    T3K = T3G + T3J;
+		    ro[WS(ros, 28)] = T3D - T3K;
+		    ro[WS(ros, 3)] = T3D + T3K;
+		    T6v = T3N + T3M;
+		    T6w = T6s + T6t;
+		    io[WS(ios, 28)] = T6v - T6w;
+		    io[WS(ios, 3)] = T6v + T6w;
+	       }
+	       {
+		    E T3L, T3O, T6r, T6u;
+		    T3L = T3z - T3C;
+		    T3O = T3M - T3N;
+		    ro[WS(ros, 19)] = T3L - T3O;
+		    ro[WS(ros, 12)] = T3L + T3O;
+		    T6r = T3J - T3G;
+		    T6u = T6s - T6t;
+		    io[WS(ios, 19)] = T6r - T6u;
+		    io[WS(ios, 12)] = T6r + T6u;
+	       }
+	  }
+	  {
+	       E T41, T4Z, T6D, T6J, T4g, T6I, T59, T5d, T4A, T4X, T52, T6y, T56, T5c, T4T;
+	       E T4W, T40, T6C;
+	       T40 = T3W - T3Z;
+	       T41 = T3T + T40;
+	       T4Z = T3T - T40;
+	       T6C = T5g + T5h;
+	       T6D = T6B - T6C;
+	       T6J = T6C + T6B;
+	       {
+		    E T48, T4f, T57, T58;
+		    T48 = FMA(KP881921264, T44, KP471396736 * T47);
+		    T4f = FMA(KP881921264, T4b, KP471396736 * T4e);
+		    T4g = T48 - T4f;
+		    T6I = T48 + T4f;
+		    T57 = T4K + T4H;
+		    T58 = T4R + T4O;
+		    T59 = FMA(KP514102744, T57, KP857728610 * T58);
+		    T5d = FNMS(KP857728610, T57, KP514102744 * T58);
+	       }
+	       {
+		    E T4s, T4z, T50, T51;
+		    T4s = T4k + T4r;
+		    T4z = T4v - T4y;
+		    T4A = FMA(KP970031253, T4s, KP242980179 * T4z);
+		    T4X = FNMS(KP242980179, T4s, KP970031253 * T4z);
+		    T50 = FNMS(KP471396736, T4b, KP881921264 * T4e);
+		    T51 = FNMS(KP471396736, T44, KP881921264 * T47);
+		    T52 = T50 - T51;
+		    T6y = T51 + T50;
+	       }
+	       {
+		    E T54, T55, T4L, T4S;
+		    T54 = T4k - T4r;
+		    T55 = T4y + T4v;
+		    T56 = FMA(KP514102744, T54, KP857728610 * T55);
+		    T5c = FNMS(KP514102744, T55, KP857728610 * T54);
+		    T4L = T4H - T4K;
+		    T4S = T4O - T4R;
+		    T4T = FNMS(KP242980179, T4S, KP970031253 * T4L);
+		    T4W = FMA(KP242980179, T4L, KP970031253 * T4S);
+	       }
+	       {
+		    E T4h, T4U, T6F, T6G;
+		    T4h = T41 + T4g;
+		    T4U = T4A + T4T;
+		    ro[WS(ros, 29)] = T4h - T4U;
+		    ro[WS(ros, 2)] = T4h + T4U;
+		    T6F = T4X + T4W;
+		    T6G = T6y + T6D;
+		    io[WS(ios, 29)] = T6F - T6G;
+		    io[WS(ios, 2)] = T6F + T6G;
+	       }
+	       {
+		    E T4V, T4Y, T6x, T6E;
+		    T4V = T41 - T4g;
+		    T4Y = T4W - T4X;
+		    ro[WS(ros, 18)] = T4V - T4Y;
+		    ro[WS(ros, 13)] = T4V + T4Y;
+		    T6x = T4T - T4A;
+		    T6E = T6y - T6D;
+		    io[WS(ios, 18)] = T6x - T6E;
+		    io[WS(ios, 13)] = T6x + T6E;
+	       }
+	       {
+		    E T53, T5a, T6L, T6M;
+		    T53 = T4Z - T52;
+		    T5a = T56 - T59;
+		    ro[WS(ros, 21)] = T53 - T5a;
+		    ro[WS(ros, 10)] = T53 + T5a;
+		    T6L = T5d - T5c;
+		    T6M = T6J - T6I;
+		    io[WS(ios, 21)] = T6L - T6M;
+		    io[WS(ios, 10)] = T6L + T6M;
+	       }
+	       {
+		    E T5b, T5e, T6H, T6K;
+		    T5b = T4Z + T52;
+		    T5e = T5c + T5d;
+		    ro[WS(ros, 26)] = T5b - T5e;
+		    ro[WS(ros, 5)] = T5b + T5e;
+		    T6H = T56 + T59;
+		    T6K = T6I + T6J;
+		    io[WS(ios, 5)] = -(T6H + T6K);
+		    io[WS(ios, 26)] = T6K - T6H;
+	       }
+	  }
+     }
+}
+
+static void mr2hcII_64(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mr2hcII_64_0(I, ro, io, is, ros, ios);
+	  I += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kr2hc_desc desc = { 64, "mr2hcII_64", {342, 114, 92, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mr2hcII_64) (planner *p) {
+     X(kr2hcII_register) (p, mr2hcII_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/mr2hc_128.c b/src/fftw3/rdft/codelets/r2hc/mr2hc_128.c
new file mode 100644
index 0000000..0409559
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/mr2hc_128.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:49 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc_noinline -compact -variables 4 -n 128 -name mr2hc_128 -include r2hc.h */
+
+/*
+ * This function contains 956 FP additions, 330 FP multiplications,
+ * (or, 812 additions, 186 multiplications, 144 fused multiply/add),
+ * 185 stack variables, and 256 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mr2hc_128.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hc_128.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hc_128.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void mr2hc_128_0(const R *I, R *ro, R *io, stride is, stride ros, stride ios)
+{
+     DK(KP803207531, +0.803207531480644909806676512963141923879569427);
+     DK(KP595699304, +0.595699304492433343467036528829969889511926338);
+     DK(KP146730474, +0.146730474455361751658850129646717819706215317);
+     DK(KP989176509, +0.989176509964780973451673738016243063983689533);
+     DK(KP740951125, +0.740951125354959091175616897495162729728955309);
+     DK(KP671558954, +0.671558954847018400625376850427421803228750632);
+     DK(KP049067674, +0.049067674327418014254954976942682658314745363);
+     DK(KP998795456, +0.998795456205172392714771604759100694443203615);
+     DK(KP242980179, +0.242980179903263889948274162077471118320990783);
+     DK(KP970031253, +0.970031253194543992603984207286100251456865962);
+     DK(KP514102744, +0.514102744193221726593693838968815772608049120);
+     DK(KP857728610, +0.857728610000272069902269984284770137042490799);
+     DK(KP336889853, +0.336889853392220050689253212619147570477766780);
+     DK(KP941544065, +0.941544065183020778412509402599502357185589796);
+     DK(KP427555093, +0.427555093430282094320966856888798534304578629);
+     DK(KP903989293, +0.903989293123443331586200297230537048710132025);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E TcD, TdU, T27, T7r, T5S, T8y, Tf, Ta5, Tu, Tbq, TcG, TdV, T2e, T8z, T5V;
+	  E T7s, TK, Ta6, TcK, TdX, T2o, T5X, T7w, T8B, TZ, Ta7, TcN, TdY, T2x, T5Y;
+	  E T7z, T8C, T1g, Taa, TcU, TeA, TcX, Tez, T1v, Tab, T2M, T6z, T7E, T9e, T7H;
+	  E T9d, T2T, T6A, T4X, T6L, Tdz, TeL, TdK, TeP, T5G, T6P, T8d, T9p, TaV, Tc3;
+	  E Tbi, Tc4, T8o, T9t, T3I, T6H, Tde, TeH, Tdp, TeF, T4r, T6F, T7U, T9l, Tao;
+	  E TbW, TaL, TbX, T85, T9j, T1L, Tad, Td3, Tew, Td6, Tex, T20, Tae, T37, T6x;
+	  E T7L, T9a, T7O, T9b, T3e, T6w, TbZ, Tc0, T3Z, T4s, Tds, TeI, T4g, T4t, T80;
+	  E T87, Tdl, TeE, T7X, T86, TaD, TaM, Tc6, Tc7, T5e, T5H, TdN, TeM, T5v, T5I;
+	  E T8j, T8q, TdG, TeO, T8g, T8p, Tba, Tbj;
+	  {
+	       E T3, T23, Td, T25, T6, T5R, Ta, T24;
+	       {
+		    E T1, T2, Tb, Tc;
+		    T1 = I[0];
+		    T2 = I[WS(is, 64)];
+		    T3 = T1 + T2;
+		    T23 = T1 - T2;
+		    Tb = I[WS(is, 112)];
+		    Tc = I[WS(is, 48)];
+		    Td = Tb + Tc;
+		    T25 = Tb - Tc;
+	       }
+	       {
+		    E T4, T5, T8, T9;
+		    T4 = I[WS(is, 32)];
+		    T5 = I[WS(is, 96)];
+		    T6 = T4 + T5;
+		    T5R = T4 - T5;
+		    T8 = I[WS(is, 16)];
+		    T9 = I[WS(is, 80)];
+		    Ta = T8 + T9;
+		    T24 = T8 - T9;
+	       }
+	       TcD = T3 - T6;
+	       TdU = Td - Ta;
+	       {
+		    E T26, T5Q, T7, Te;
+		    T26 = KP707106781 * (T24 + T25);
+		    T27 = T23 + T26;
+		    T7r = T23 - T26;
+		    T5Q = KP707106781 * (T25 - T24);
+		    T5S = T5Q - T5R;
+		    T8y = T5R + T5Q;
+		    T7 = T3 + T6;
+		    Te = Ta + Td;
+		    Tf = T7 + Te;
+		    Ta5 = T7 - Te;
+	       }
+	  }
+	  {
+	       E Ti, T28, Ts, T2c, Tl, T29, Tp, T2b;
+	       {
+		    E Tg, Th, Tq, Tr;
+		    Tg = I[WS(is, 8)];
+		    Th = I[WS(is, 72)];
+		    Ti = Tg + Th;
+		    T28 = Tg - Th;
+		    Tq = I[WS(is, 24)];
+		    Tr = I[WS(is, 88)];
+		    Ts = Tq + Tr;
+		    T2c = Tq - Tr;
+	       }
+	       {
+		    E Tj, Tk, Tn, To;
+		    Tj = I[WS(is, 40)];
+		    Tk = I[WS(is, 104)];
+		    Tl = Tj + Tk;
+		    T29 = Tj - Tk;
+		    Tn = I[WS(is, 120)];
+		    To = I[WS(is, 56)];
+		    Tp = Tn + To;
+		    T2b = Tn - To;
+	       }
+	       {
+		    E Tm, Tt, TcE, TcF;
+		    Tm = Ti + Tl;
+		    Tt = Tp + Ts;
+		    Tu = Tm + Tt;
+		    Tbq = Tt - Tm;
+		    TcE = Ti - Tl;
+		    TcF = Tp - Ts;
+		    TcG = KP707106781 * (TcE + TcF);
+		    TdV = KP707106781 * (TcF - TcE);
+	       }
+	       {
+		    E T2a, T2d, T5T, T5U;
+		    T2a = FNMS(KP382683432, T29, KP923879532 * T28);
+		    T2d = FMA(KP923879532, T2b, KP382683432 * T2c);
+		    T2e = T2a + T2d;
+		    T8z = T2d - T2a;
+		    T5T = FNMS(KP923879532, T2c, KP382683432 * T2b);
+		    T5U = FMA(KP382683432, T28, KP923879532 * T29);
+		    T5V = T5T - T5U;
+		    T7s = T5U + T5T;
+	       }
+	  }
+	  {
+	       E Ty, T2g, TB, T2m, TF, T2l, TI, T2j;
+	       {
+		    E Tw, Tx, Tz, TA;
+		    Tw = I[WS(is, 4)];
+		    Tx = I[WS(is, 68)];
+		    Ty = Tw + Tx;
+		    T2g = Tw - Tx;
+		    Tz = I[WS(is, 36)];
+		    TA = I[WS(is, 100)];
+		    TB = Tz + TA;
+		    T2m = Tz - TA;
+		    {
+			 E TD, TE, T2h, TG, TH, T2i;
+			 TD = I[WS(is, 20)];
+			 TE = I[WS(is, 84)];
+			 T2h = TD - TE;
+			 TG = I[WS(is, 116)];
+			 TH = I[WS(is, 52)];
+			 T2i = TG - TH;
+			 TF = TD + TE;
+			 T2l = KP707106781 * (T2i - T2h);
+			 TI = TG + TH;
+			 T2j = KP707106781 * (T2h + T2i);
+		    }
+	       }
+	       {
+		    E TC, TJ, TcI, TcJ;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    Ta6 = TC - TJ;
+		    TcI = Ty - TB;
+		    TcJ = TI - TF;
+		    TcK = FMA(KP923879532, TcI, KP382683432 * TcJ);
+		    TdX = FNMS(KP382683432, TcI, KP923879532 * TcJ);
+	       }
+	       {
+		    E T2k, T2n, T7u, T7v;
+		    T2k = T2g + T2j;
+		    T2n = T2l - T2m;
+		    T2o = FMA(KP980785280, T2k, KP195090322 * T2n);
+		    T5X = FNMS(KP195090322, T2k, KP980785280 * T2n);
+		    T7u = T2g - T2j;
+		    T7v = T2m + T2l;
+		    T7w = FMA(KP831469612, T7u, KP555570233 * T7v);
+		    T8B = FNMS(KP555570233, T7u, KP831469612 * T7v);
+	       }
+	  }
+	  {
+	       E TN, T2p, TQ, T2v, TU, T2u, TX, T2s;
+	       {
+		    E TL, TM, TO, TP;
+		    TL = I[WS(is, 124)];
+		    TM = I[WS(is, 60)];
+		    TN = TL + TM;
+		    T2p = TL - TM;
+		    TO = I[WS(is, 28)];
+		    TP = I[WS(is, 92)];
+		    TQ = TO + TP;
+		    T2v = TO - TP;
+		    {
+			 E TS, TT, T2q, TV, TW, T2r;
+			 TS = I[WS(is, 12)];
+			 TT = I[WS(is, 76)];
+			 T2q = TS - TT;
+			 TV = I[WS(is, 108)];
+			 TW = I[WS(is, 44)];
+			 T2r = TV - TW;
+			 TU = TS + TT;
+			 T2u = KP707106781 * (T2r - T2q);
+			 TX = TV + TW;
+			 T2s = KP707106781 * (T2q + T2r);
+		    }
+	       }
+	       {
+		    E TR, TY, TcL, TcM;
+		    TR = TN + TQ;
+		    TY = TU + TX;
+		    TZ = TR + TY;
+		    Ta7 = TR - TY;
+		    TcL = TN - TQ;
+		    TcM = TX - TU;
+		    TcN = FNMS(KP382683432, TcM, KP923879532 * TcL);
+		    TdY = FMA(KP382683432, TcL, KP923879532 * TcM);
+	       }
+	       {
+		    E T2t, T2w, T7x, T7y;
+		    T2t = T2p + T2s;
+		    T2w = T2u - T2v;
+		    T2x = FNMS(KP195090322, T2w, KP980785280 * T2t);
+		    T5Y = FMA(KP195090322, T2t, KP980785280 * T2w);
+		    T7x = T2p - T2s;
+		    T7y = T2v + T2u;
+		    T7z = FNMS(KP555570233, T7y, KP831469612 * T7x);
+		    T8C = FMA(KP555570233, T7x, KP831469612 * T7y);
+	       }
+	  }
+	  {
+	       E T14, T2N, T17, T2D, T1b, T2O, T1e, T2C, T1j, T1m, T2K, TcR, T2Q, T1q, T1t;
+	       E T2H, TcS, T2R;
+	       {
+		    E T12, T13, T15, T16;
+		    T12 = I[WS(is, 2)];
+		    T13 = I[WS(is, 66)];
+		    T14 = T12 + T13;
+		    T2N = T12 - T13;
+		    T15 = I[WS(is, 34)];
+		    T16 = I[WS(is, 98)];
+		    T17 = T15 + T16;
+		    T2D = T15 - T16;
+	       }
+	       {
+		    E T19, T1a, T2B, T1c, T1d, T2A;
+		    T19 = I[WS(is, 18)];
+		    T1a = I[WS(is, 82)];
+		    T2B = T19 - T1a;
+		    T1c = I[WS(is, 114)];
+		    T1d = I[WS(is, 50)];
+		    T2A = T1c - T1d;
+		    T1b = T19 + T1a;
+		    T2O = KP707106781 * (T2B + T2A);
+		    T1e = T1c + T1d;
+		    T2C = KP707106781 * (T2A - T2B);
+	       }
+	       {
+		    E T2I, T2J, T2F, T2G;
+		    {
+			 E T1h, T1i, T1k, T1l;
+			 T1h = I[WS(is, 10)];
+			 T1i = I[WS(is, 74)];
+			 T1j = T1h + T1i;
+			 T2I = T1h - T1i;
+			 T1k = I[WS(is, 42)];
+			 T1l = I[WS(is, 106)];
+			 T1m = T1k + T1l;
+			 T2J = T1k - T1l;
+		    }
+		    T2K = FMA(KP382683432, T2I, KP923879532 * T2J);
+		    TcR = T1j - T1m;
+		    T2Q = FNMS(KP382683432, T2J, KP923879532 * T2I);
+		    {
+			 E T1o, T1p, T1r, T1s;
+			 T1o = I[WS(is, 122)];
+			 T1p = I[WS(is, 58)];
+			 T1q = T1o + T1p;
+			 T2F = T1o - T1p;
+			 T1r = I[WS(is, 26)];
+			 T1s = I[WS(is, 90)];
+			 T1t = T1r + T1s;
+			 T2G = T1r - T1s;
+		    }
+		    T2H = FNMS(KP923879532, T2G, KP382683432 * T2F);
+		    TcS = T1q - T1t;
+		    T2R = FMA(KP923879532, T2F, KP382683432 * T2G);
+	       }
+	       {
+		    E T18, T1f, TcQ, TcT;
+		    T18 = T14 + T17;
+		    T1f = T1b + T1e;
+		    T1g = T18 + T1f;
+		    Taa = T18 - T1f;
+		    TcQ = T14 - T17;
+		    TcT = KP707106781 * (TcR + TcS);
+		    TcU = TcQ + TcT;
+		    TeA = TcQ - TcT;
+	       }
+	       {
+		    E TcV, TcW, T1n, T1u;
+		    TcV = T1e - T1b;
+		    TcW = KP707106781 * (TcS - TcR);
+		    TcX = TcV + TcW;
+		    Tez = TcW - TcV;
+		    T1n = T1j + T1m;
+		    T1u = T1q + T1t;
+		    T1v = T1n + T1u;
+		    Tab = T1u - T1n;
+	       }
+	       {
+		    E T2E, T2L, T7C, T7D;
+		    T2E = T2C - T2D;
+		    T2L = T2H - T2K;
+		    T2M = T2E + T2L;
+		    T6z = T2L - T2E;
+		    T7C = T2N - T2O;
+		    T7D = T2K + T2H;
+		    T7E = T7C + T7D;
+		    T9e = T7C - T7D;
+	       }
+	       {
+		    E T7F, T7G, T2P, T2S;
+		    T7F = T2D + T2C;
+		    T7G = T2R - T2Q;
+		    T7H = T7F + T7G;
+		    T9d = T7G - T7F;
+		    T2P = T2N + T2O;
+		    T2S = T2Q + T2R;
+		    T2T = T2P + T2S;
+		    T6A = T2P - T2S;
+	       }
+	  }
+	  {
+	       E T4z, TaP, T5B, TaQ, T4G, TaT, T5y, TaS, Tbf, Tbg, T4O, Tdw, T5E, Tbc, Tbd;
+	       E T4V, Tdx, T5D;
+	       {
+		    E T4x, T4y, T5z, T5A;
+		    T4x = I[WS(is, 127)];
+		    T4y = I[WS(is, 63)];
+		    T4z = T4x - T4y;
+		    TaP = T4x + T4y;
+		    T5z = I[WS(is, 31)];
+		    T5A = I[WS(is, 95)];
+		    T5B = T5z - T5A;
+		    TaQ = T5z + T5A;
+	       }
+	       {
+		    E T4A, T4B, T4C, T4D, T4E, T4F;
+		    T4A = I[WS(is, 15)];
+		    T4B = I[WS(is, 79)];
+		    T4C = T4A - T4B;
+		    T4D = I[WS(is, 111)];
+		    T4E = I[WS(is, 47)];
+		    T4F = T4D - T4E;
+		    T4G = KP707106781 * (T4C + T4F);
+		    TaT = T4D + T4E;
+		    T5y = KP707106781 * (T4F - T4C);
+		    TaS = T4A + T4B;
+	       }
+	       {
+		    E T4K, T4N, T4R, T4U;
+		    {
+			 E T4I, T4J, T4L, T4M;
+			 T4I = I[WS(is, 7)];
+			 T4J = I[WS(is, 71)];
+			 T4K = T4I - T4J;
+			 Tbf = T4I + T4J;
+			 T4L = I[WS(is, 39)];
+			 T4M = I[WS(is, 103)];
+			 T4N = T4L - T4M;
+			 Tbg = T4L + T4M;
+		    }
+		    T4O = FNMS(KP382683432, T4N, KP923879532 * T4K);
+		    Tdw = Tbf - Tbg;
+		    T5E = FMA(KP382683432, T4K, KP923879532 * T4N);
+		    {
+			 E T4P, T4Q, T4S, T4T;
+			 T4P = I[WS(is, 119)];
+			 T4Q = I[WS(is, 55)];
+			 T4R = T4P - T4Q;
+			 Tbc = T4P + T4Q;
+			 T4S = I[WS(is, 23)];
+			 T4T = I[WS(is, 87)];
+			 T4U = T4S - T4T;
+			 Tbd = T4S + T4T;
+		    }
+		    T4V = FMA(KP923879532, T4R, KP382683432 * T4U);
+		    Tdx = Tbc - Tbd;
+		    T5D = FNMS(KP923879532, T4U, KP382683432 * T4R);
+	       }
+	       {
+		    E T4H, T4W, Tdv, Tdy;
+		    T4H = T4z + T4G;
+		    T4W = T4O + T4V;
+		    T4X = T4H + T4W;
+		    T6L = T4H - T4W;
+		    Tdv = TaP - TaQ;
+		    Tdy = KP707106781 * (Tdw + Tdx);
+		    Tdz = Tdv + Tdy;
+		    TeL = Tdv - Tdy;
+	       }
+	       {
+		    E TdI, TdJ, T5C, T5F;
+		    TdI = TaT - TaS;
+		    TdJ = KP707106781 * (Tdx - Tdw);
+		    TdK = TdI + TdJ;
+		    TeP = TdJ - TdI;
+		    T5C = T5y - T5B;
+		    T5F = T5D - T5E;
+		    T5G = T5C + T5F;
+		    T6P = T5F - T5C;
+	       }
+	       {
+		    E T8b, T8c, TaR, TaU;
+		    T8b = T4z - T4G;
+		    T8c = T5E + T5D;
+		    T8d = T8b + T8c;
+		    T9p = T8b - T8c;
+		    TaR = TaP + TaQ;
+		    TaU = TaS + TaT;
+		    TaV = TaR - TaU;
+		    Tc3 = TaR + TaU;
+	       }
+	       {
+		    E Tbe, Tbh, T8m, T8n;
+		    Tbe = Tbc + Tbd;
+		    Tbh = Tbf + Tbg;
+		    Tbi = Tbe - Tbh;
+		    Tc4 = Tbh + Tbe;
+		    T8m = T5B + T5y;
+		    T8n = T4V - T4O;
+		    T8o = T8m + T8n;
+		    T9t = T8n - T8m;
+	       }
+	  }
+	  {
+	       E T3k, Tai, T4m, Taj, T3r, Tam, T4j, Tal, TaI, TaJ, T3z, Tdb, T4p, TaF, TaG;
+	       E T3G, Tdc, T4o;
+	       {
+		    E T3i, T3j, T4k, T4l;
+		    T3i = I[WS(is, 1)];
+		    T3j = I[WS(is, 65)];
+		    T3k = T3i - T3j;
+		    Tai = T3i + T3j;
+		    T4k = I[WS(is, 33)];
+		    T4l = I[WS(is, 97)];
+		    T4m = T4k - T4l;
+		    Taj = T4k + T4l;
+	       }
+	       {
+		    E T3l, T3m, T3n, T3o, T3p, T3q;
+		    T3l = I[WS(is, 17)];
+		    T3m = I[WS(is, 81)];
+		    T3n = T3l - T3m;
+		    T3o = I[WS(is, 113)];
+		    T3p = I[WS(is, 49)];
+		    T3q = T3o - T3p;
+		    T3r = KP707106781 * (T3n + T3q);
+		    Tam = T3o + T3p;
+		    T4j = KP707106781 * (T3q - T3n);
+		    Tal = T3l + T3m;
+	       }
+	       {
+		    E T3v, T3y, T3C, T3F;
+		    {
+			 E T3t, T3u, T3w, T3x;
+			 T3t = I[WS(is, 9)];
+			 T3u = I[WS(is, 73)];
+			 T3v = T3t - T3u;
+			 TaI = T3t + T3u;
+			 T3w = I[WS(is, 41)];
+			 T3x = I[WS(is, 105)];
+			 T3y = T3w - T3x;
+			 TaJ = T3w + T3x;
+		    }
+		    T3z = FNMS(KP382683432, T3y, KP923879532 * T3v);
+		    Tdb = TaI - TaJ;
+		    T4p = FMA(KP382683432, T3v, KP923879532 * T3y);
+		    {
+			 E T3A, T3B, T3D, T3E;
+			 T3A = I[WS(is, 121)];
+			 T3B = I[WS(is, 57)];
+			 T3C = T3A - T3B;
+			 TaF = T3A + T3B;
+			 T3D = I[WS(is, 25)];
+			 T3E = I[WS(is, 89)];
+			 T3F = T3D - T3E;
+			 TaG = T3D + T3E;
+		    }
+		    T3G = FMA(KP923879532, T3C, KP382683432 * T3F);
+		    Tdc = TaF - TaG;
+		    T4o = FNMS(KP923879532, T3F, KP382683432 * T3C);
+	       }
+	       {
+		    E T3s, T3H, Tda, Tdd;
+		    T3s = T3k + T3r;
+		    T3H = T3z + T3G;
+		    T3I = T3s + T3H;
+		    T6H = T3s - T3H;
+		    Tda = Tai - Taj;
+		    Tdd = KP707106781 * (Tdb + Tdc);
+		    Tde = Tda + Tdd;
+		    TeH = Tda - Tdd;
+	       }
+	       {
+		    E Tdn, Tdo, T4n, T4q;
+		    Tdn = Tam - Tal;
+		    Tdo = KP707106781 * (Tdc - Tdb);
+		    Tdp = Tdn + Tdo;
+		    TeF = Tdo - Tdn;
+		    T4n = T4j - T4m;
+		    T4q = T4o - T4p;
+		    T4r = T4n + T4q;
+		    T6F = T4q - T4n;
+	       }
+	       {
+		    E T7S, T7T, Tak, Tan;
+		    T7S = T3k - T3r;
+		    T7T = T4p + T4o;
+		    T7U = T7S + T7T;
+		    T9l = T7S - T7T;
+		    Tak = Tai + Taj;
+		    Tan = Tal + Tam;
+		    Tao = Tak - Tan;
+		    TbW = Tak + Tan;
+	       }
+	       {
+		    E TaH, TaK, T83, T84;
+		    TaH = TaF + TaG;
+		    TaK = TaI + TaJ;
+		    TaL = TaH - TaK;
+		    TbX = TaK + TaH;
+		    T83 = T4m + T4j;
+		    T84 = T3G - T3z;
+		    T85 = T83 + T84;
+		    T9j = T84 - T83;
+	       }
+	  }
+	  {
+	       E T1z, T2V, T1C, T39, T1G, T38, T1J, T2Y, T1O, T1R, T32, Td0, T3c, T1V, T1Y;
+	       E T35, Td1, T3b;
+	       {
+		    E T1x, T1y, T1A, T1B;
+		    T1x = I[WS(is, 126)];
+		    T1y = I[WS(is, 62)];
+		    T1z = T1x + T1y;
+		    T2V = T1x - T1y;
+		    T1A = I[WS(is, 30)];
+		    T1B = I[WS(is, 94)];
+		    T1C = T1A + T1B;
+		    T39 = T1A - T1B;
+	       }
+	       {
+		    E T1E, T1F, T2W, T1H, T1I, T2X;
+		    T1E = I[WS(is, 14)];
+		    T1F = I[WS(is, 78)];
+		    T2W = T1E - T1F;
+		    T1H = I[WS(is, 110)];
+		    T1I = I[WS(is, 46)];
+		    T2X = T1H - T1I;
+		    T1G = T1E + T1F;
+		    T38 = KP707106781 * (T2X - T2W);
+		    T1J = T1H + T1I;
+		    T2Y = KP707106781 * (T2W + T2X);
+	       }
+	       {
+		    E T30, T31, T33, T34;
+		    {
+			 E T1M, T1N, T1P, T1Q;
+			 T1M = I[WS(is, 6)];
+			 T1N = I[WS(is, 70)];
+			 T1O = T1M + T1N;
+			 T30 = T1M - T1N;
+			 T1P = I[WS(is, 38)];
+			 T1Q = I[WS(is, 102)];
+			 T1R = T1P + T1Q;
+			 T31 = T1P - T1Q;
+		    }
+		    T32 = FNMS(KP382683432, T31, KP923879532 * T30);
+		    Td0 = T1O - T1R;
+		    T3c = FMA(KP382683432, T30, KP923879532 * T31);
+		    {
+			 E T1T, T1U, T1W, T1X;
+			 T1T = I[WS(is, 118)];
+			 T1U = I[WS(is, 54)];
+			 T1V = T1T + T1U;
+			 T33 = T1T - T1U;
+			 T1W = I[WS(is, 22)];
+			 T1X = I[WS(is, 86)];
+			 T1Y = T1W + T1X;
+			 T34 = T1W - T1X;
+		    }
+		    T35 = FMA(KP923879532, T33, KP382683432 * T34);
+		    Td1 = T1V - T1Y;
+		    T3b = FNMS(KP923879532, T34, KP382683432 * T33);
+	       }
+	       {
+		    E T1D, T1K, TcZ, Td2;
+		    T1D = T1z + T1C;
+		    T1K = T1G + T1J;
+		    T1L = T1D + T1K;
+		    Tad = T1D - T1K;
+		    TcZ = T1z - T1C;
+		    Td2 = KP707106781 * (Td0 + Td1);
+		    Td3 = TcZ + Td2;
+		    Tew = TcZ - Td2;
+	       }
+	       {
+		    E Td4, Td5, T1S, T1Z;
+		    Td4 = T1J - T1G;
+		    Td5 = KP707106781 * (Td1 - Td0);
+		    Td6 = Td4 + Td5;
+		    Tex = Td5 - Td4;
+		    T1S = T1O + T1R;
+		    T1Z = T1V + T1Y;
+		    T20 = T1S + T1Z;
+		    Tae = T1Z - T1S;
+	       }
+	       {
+		    E T2Z, T36, T7J, T7K;
+		    T2Z = T2V + T2Y;
+		    T36 = T32 + T35;
+		    T37 = T2Z + T36;
+		    T6x = T2Z - T36;
+		    T7J = T2V - T2Y;
+		    T7K = T3c + T3b;
+		    T7L = T7J + T7K;
+		    T9a = T7J - T7K;
+	       }
+	       {
+		    E T7M, T7N, T3a, T3d;
+		    T7M = T39 + T38;
+		    T7N = T35 - T32;
+		    T7O = T7M + T7N;
+		    T9b = T7N - T7M;
+		    T3a = T38 - T39;
+		    T3d = T3b - T3c;
+		    T3e = T3a + T3d;
+		    T6w = T3d - T3a;
+	       }
+	  }
+	  {
+	       E T3L, Tdf, T3X, Tar, T42, Tdi, T4e, Tay, T3S, Tdg, T3U, Tau, T49, Tdj, T4b;
+	       E TaB, Tdh, Tdk;
+	       {
+		    E T3J, T3K, Tap, T3V, T3W, Taq;
+		    T3J = I[WS(is, 5)];
+		    T3K = I[WS(is, 69)];
+		    Tap = T3J + T3K;
+		    T3V = I[WS(is, 37)];
+		    T3W = I[WS(is, 101)];
+		    Taq = T3V + T3W;
+		    T3L = T3J - T3K;
+		    Tdf = Tap - Taq;
+		    T3X = T3V - T3W;
+		    Tar = Tap + Taq;
+	       }
+	       {
+		    E T40, T41, Taw, T4c, T4d, Tax;
+		    T40 = I[WS(is, 125)];
+		    T41 = I[WS(is, 61)];
+		    Taw = T40 + T41;
+		    T4c = I[WS(is, 29)];
+		    T4d = I[WS(is, 93)];
+		    Tax = T4c + T4d;
+		    T42 = T40 - T41;
+		    Tdi = Taw - Tax;
+		    T4e = T4c - T4d;
+		    Tay = Taw + Tax;
+	       }
+	       {
+		    E T3O, Tas, T3R, Tat;
+		    {
+			 E T3M, T3N, T3P, T3Q;
+			 T3M = I[WS(is, 21)];
+			 T3N = I[WS(is, 85)];
+			 T3O = T3M - T3N;
+			 Tas = T3M + T3N;
+			 T3P = I[WS(is, 117)];
+			 T3Q = I[WS(is, 53)];
+			 T3R = T3P - T3Q;
+			 Tat = T3P + T3Q;
+		    }
+		    T3S = KP707106781 * (T3O + T3R);
+		    Tdg = Tat - Tas;
+		    T3U = KP707106781 * (T3R - T3O);
+		    Tau = Tas + Tat;
+	       }
+	       {
+		    E T45, Taz, T48, TaA;
+		    {
+			 E T43, T44, T46, T47;
+			 T43 = I[WS(is, 13)];
+			 T44 = I[WS(is, 77)];
+			 T45 = T43 - T44;
+			 Taz = T43 + T44;
+			 T46 = I[WS(is, 109)];
+			 T47 = I[WS(is, 45)];
+			 T48 = T46 - T47;
+			 TaA = T46 + T47;
+		    }
+		    T49 = KP707106781 * (T45 + T48);
+		    Tdj = TaA - Taz;
+		    T4b = KP707106781 * (T48 - T45);
+		    TaB = Taz + TaA;
+	       }
+	       TbZ = Tar + Tau;
+	       Tc0 = Tay + TaB;
+	       {
+		    E T3T, T3Y, Tdq, Tdr;
+		    T3T = T3L + T3S;
+		    T3Y = T3U - T3X;
+		    T3Z = FMA(KP980785280, T3T, KP195090322 * T3Y);
+		    T4s = FNMS(KP195090322, T3T, KP980785280 * T3Y);
+		    Tdq = FNMS(KP382683432, Tdf, KP923879532 * Tdg);
+		    Tdr = FMA(KP382683432, Tdi, KP923879532 * Tdj);
+		    Tds = Tdq + Tdr;
+		    TeI = Tdr - Tdq;
+	       }
+	       {
+		    E T4a, T4f, T7Y, T7Z;
+		    T4a = T42 + T49;
+		    T4f = T4b - T4e;
+		    T4g = FNMS(KP195090322, T4f, KP980785280 * T4a);
+		    T4t = FMA(KP195090322, T4a, KP980785280 * T4f);
+		    T7Y = T42 - T49;
+		    T7Z = T4e + T4b;
+		    T80 = FNMS(KP555570233, T7Z, KP831469612 * T7Y);
+		    T87 = FMA(KP555570233, T7Y, KP831469612 * T7Z);
+	       }
+	       Tdh = FMA(KP923879532, Tdf, KP382683432 * Tdg);
+	       Tdk = FNMS(KP382683432, Tdj, KP923879532 * Tdi);
+	       Tdl = Tdh + Tdk;
+	       TeE = Tdk - Tdh;
+	       {
+		    E T7V, T7W, Tav, TaC;
+		    T7V = T3L - T3S;
+		    T7W = T3X + T3U;
+		    T7X = FMA(KP831469612, T7V, KP555570233 * T7W);
+		    T86 = FNMS(KP555570233, T7V, KP831469612 * T7W);
+		    Tav = Tar - Tau;
+		    TaC = Tay - TaB;
+		    TaD = KP707106781 * (Tav + TaC);
+		    TaM = KP707106781 * (TaC - Tav);
+	       }
+	  }
+	  {
+	       E T50, TdA, T5c, TaY, T5h, TdD, T5t, Tb5, T57, TdB, T59, Tb1, T5o, TdE, T5q;
+	       E Tb8, TdC, TdF;
+	       {
+		    E T4Y, T4Z, TaW, T5a, T5b, TaX;
+		    T4Y = I[WS(is, 3)];
+		    T4Z = I[WS(is, 67)];
+		    TaW = T4Y + T4Z;
+		    T5a = I[WS(is, 35)];
+		    T5b = I[WS(is, 99)];
+		    TaX = T5a + T5b;
+		    T50 = T4Y - T4Z;
+		    TdA = TaW - TaX;
+		    T5c = T5a - T5b;
+		    TaY = TaW + TaX;
+	       }
+	       {
+		    E T5f, T5g, Tb3, T5r, T5s, Tb4;
+		    T5f = I[WS(is, 123)];
+		    T5g = I[WS(is, 59)];
+		    Tb3 = T5f + T5g;
+		    T5r = I[WS(is, 27)];
+		    T5s = I[WS(is, 91)];
+		    Tb4 = T5r + T5s;
+		    T5h = T5f - T5g;
+		    TdD = Tb3 - Tb4;
+		    T5t = T5r - T5s;
+		    Tb5 = Tb3 + Tb4;
+	       }
+	       {
+		    E T53, TaZ, T56, Tb0;
+		    {
+			 E T51, T52, T54, T55;
+			 T51 = I[WS(is, 19)];
+			 T52 = I[WS(is, 83)];
+			 T53 = T51 - T52;
+			 TaZ = T51 + T52;
+			 T54 = I[WS(is, 115)];
+			 T55 = I[WS(is, 51)];
+			 T56 = T54 - T55;
+			 Tb0 = T54 + T55;
+		    }
+		    T57 = KP707106781 * (T53 + T56);
+		    TdB = Tb0 - TaZ;
+		    T59 = KP707106781 * (T56 - T53);
+		    Tb1 = TaZ + Tb0;
+	       }
+	       {
+		    E T5k, Tb6, T5n, Tb7;
+		    {
+			 E T5i, T5j, T5l, T5m;
+			 T5i = I[WS(is, 11)];
+			 T5j = I[WS(is, 75)];
+			 T5k = T5i - T5j;
+			 Tb6 = T5i + T5j;
+			 T5l = I[WS(is, 107)];
+			 T5m = I[WS(is, 43)];
+			 T5n = T5l - T5m;
+			 Tb7 = T5l + T5m;
+		    }
+		    T5o = KP707106781 * (T5k + T5n);
+		    TdE = Tb7 - Tb6;
+		    T5q = KP707106781 * (T5n - T5k);
+		    Tb8 = Tb6 + Tb7;
+	       }
+	       Tc6 = TaY + Tb1;
+	       Tc7 = Tb5 + Tb8;
+	       {
+		    E T58, T5d, TdL, TdM;
+		    T58 = T50 + T57;
+		    T5d = T59 - T5c;
+		    T5e = FMA(KP980785280, T58, KP195090322 * T5d);
+		    T5H = FNMS(KP195090322, T58, KP980785280 * T5d);
+		    TdL = FNMS(KP382683432, TdA, KP923879532 * TdB);
+		    TdM = FMA(KP382683432, TdD, KP923879532 * TdE);
+		    TdN = TdL + TdM;
+		    TeM = TdM - TdL;
+	       }
+	       {
+		    E T5p, T5u, T8h, T8i;
+		    T5p = T5h + T5o;
+		    T5u = T5q - T5t;
+		    T5v = FNMS(KP195090322, T5u, KP980785280 * T5p);
+		    T5I = FMA(KP195090322, T5p, KP980785280 * T5u);
+		    T8h = T5h - T5o;
+		    T8i = T5t + T5q;
+		    T8j = FNMS(KP555570233, T8i, KP831469612 * T8h);
+		    T8q = FMA(KP555570233, T8h, KP831469612 * T8i);
+	       }
+	       TdC = FMA(KP923879532, TdA, KP382683432 * TdB);
+	       TdF = FNMS(KP382683432, TdE, KP923879532 * TdD);
+	       TdG = TdC + TdF;
+	       TeO = TdF - TdC;
+	       {
+		    E T8e, T8f, Tb2, Tb9;
+		    T8e = T50 - T57;
+		    T8f = T5c + T59;
+		    T8g = FMA(KP831469612, T8e, KP555570233 * T8f);
+		    T8p = FNMS(KP555570233, T8e, KP831469612 * T8f);
+		    Tb2 = TaY - Tb1;
+		    Tb9 = Tb5 - Tb8;
+		    Tba = KP707106781 * (Tb2 + Tb9);
+		    Tbj = KP707106781 * (Tb9 - Tb2);
+	       }
+	  }
+	  {
+	       E T11, TbV, Tc9, Tcf, T22, Tcb, Tc2, Tce;
+	       {
+		    E Tv, T10, Tc5, Tc8;
+		    Tv = Tf + Tu;
+		    T10 = TK + TZ;
+		    T11 = Tv + T10;
+		    TbV = Tv - T10;
+		    Tc5 = Tc3 + Tc4;
+		    Tc8 = Tc6 + Tc7;
+		    Tc9 = Tc5 - Tc8;
+		    Tcf = Tc5 + Tc8;
+	       }
+	       {
+		    E T1w, T21, TbY, Tc1;
+		    T1w = T1g + T1v;
+		    T21 = T1L + T20;
+		    T22 = T1w + T21;
+		    Tcb = T21 - T1w;
+		    TbY = TbW + TbX;
+		    Tc1 = TbZ + Tc0;
+		    Tc2 = TbY - Tc1;
+		    Tce = TbY + Tc1;
+	       }
+	       ro[WS(ros, 32)] = T11 - T22;
+	       io[WS(ios, 32)] = Tcf - Tce;
+	       {
+		    E Tca, Tcc, Tcd, Tcg;
+		    Tca = KP707106781 * (Tc2 + Tc9);
+		    ro[WS(ros, 48)] = TbV - Tca;
+		    ro[WS(ros, 16)] = TbV + Tca;
+		    Tcc = KP707106781 * (Tc9 - Tc2);
+		    io[WS(ios, 16)] = Tcb + Tcc;
+		    io[WS(ios, 48)] = Tcc - Tcb;
+		    Tcd = T11 + T22;
+		    Tcg = Tce + Tcf;
+		    ro[WS(ros, 64)] = Tcd - Tcg;
+		    ro[0] = Tcd + Tcg;
+	       }
+	  }
+	  {
+	       E Tch, Tcu, Tck, Tct, Tco, Tcy, Tcr, Tcz, Tci, Tcj;
+	       Tch = Tf - Tu;
+	       Tcu = TZ - TK;
+	       Tci = T1g - T1v;
+	       Tcj = T1L - T20;
+	       Tck = KP707106781 * (Tci + Tcj);
+	       Tct = KP707106781 * (Tcj - Tci);
+	       {
+		    E Tcm, Tcn, Tcp, Tcq;
+		    Tcm = TbW - TbX;
+		    Tcn = Tc0 - TbZ;
+		    Tco = FMA(KP923879532, Tcm, KP382683432 * Tcn);
+		    Tcy = FNMS(KP382683432, Tcm, KP923879532 * Tcn);
+		    Tcp = Tc3 - Tc4;
+		    Tcq = Tc7 - Tc6;
+		    Tcr = FNMS(KP382683432, Tcq, KP923879532 * Tcp);
+		    Tcz = FMA(KP382683432, Tcp, KP923879532 * Tcq);
+	       }
+	       {
+		    E Tcl, Tcs, Tcx, TcA;
+		    Tcl = Tch + Tck;
+		    Tcs = Tco + Tcr;
+		    ro[WS(ros, 56)] = Tcl - Tcs;
+		    ro[WS(ros, 8)] = Tcl + Tcs;
+		    Tcx = Tcu + Tct;
+		    TcA = Tcy + Tcz;
+		    io[WS(ios, 8)] = Tcx + TcA;
+		    io[WS(ios, 56)] = TcA - Tcx;
+	       }
+	       {
+		    E Tcv, Tcw, TcB, TcC;
+		    Tcv = Tct - Tcu;
+		    Tcw = Tcr - Tco;
+		    io[WS(ios, 24)] = Tcv + Tcw;
+		    io[WS(ios, 40)] = Tcw - Tcv;
+		    TcB = Tch - Tck;
+		    TcC = Tcz - Tcy;
+		    ro[WS(ros, 40)] = TcB - TcC;
+		    ro[WS(ros, 24)] = TcB + TcC;
+	       }
+	  }
+	  {
+	       E Ta9, TbB, Tbs, TbM, Tag, TbL, TbJ, TbR, TaO, Tbw, Tbp, TbC, TbG, TbQ, Tbl;
+	       E Tbx, Ta8, Tbr;
+	       Ta8 = KP707106781 * (Ta6 + Ta7);
+	       Ta9 = Ta5 + Ta8;
+	       TbB = Ta5 - Ta8;
+	       Tbr = KP707106781 * (Ta7 - Ta6);
+	       Tbs = Tbq + Tbr;
+	       TbM = Tbr - Tbq;
+	       {
+		    E Tac, Taf, TbH, TbI;
+		    Tac = FMA(KP923879532, Taa, KP382683432 * Tab);
+		    Taf = FNMS(KP382683432, Tae, KP923879532 * Tad);
+		    Tag = Tac + Taf;
+		    TbL = Taf - Tac;
+		    TbH = TaV - Tba;
+		    TbI = Tbj - Tbi;
+		    TbJ = FNMS(KP555570233, TbI, KP831469612 * TbH);
+		    TbR = FMA(KP555570233, TbH, KP831469612 * TbI);
+	       }
+	       {
+		    E TaE, TaN, Tbn, Tbo;
+		    TaE = Tao + TaD;
+		    TaN = TaL + TaM;
+		    TaO = FMA(KP980785280, TaE, KP195090322 * TaN);
+		    Tbw = FNMS(KP195090322, TaE, KP980785280 * TaN);
+		    Tbn = FNMS(KP382683432, Taa, KP923879532 * Tab);
+		    Tbo = FMA(KP382683432, Tad, KP923879532 * Tae);
+		    Tbp = Tbn + Tbo;
+		    TbC = Tbo - Tbn;
+	       }
+	       {
+		    E TbE, TbF, Tbb, Tbk;
+		    TbE = Tao - TaD;
+		    TbF = TaM - TaL;
+		    TbG = FMA(KP831469612, TbE, KP555570233 * TbF);
+		    TbQ = FNMS(KP555570233, TbE, KP831469612 * TbF);
+		    Tbb = TaV + Tba;
+		    Tbk = Tbi + Tbj;
+		    Tbl = FNMS(KP195090322, Tbk, KP980785280 * Tbb);
+		    Tbx = FMA(KP195090322, Tbb, KP980785280 * Tbk);
+	       }
+	       {
+		    E Tah, Tbm, Tbv, Tby;
+		    Tah = Ta9 + Tag;
+		    Tbm = TaO + Tbl;
+		    ro[WS(ros, 60)] = Tah - Tbm;
+		    ro[WS(ros, 4)] = Tah + Tbm;
+		    Tbv = Tbs + Tbp;
+		    Tby = Tbw + Tbx;
+		    io[WS(ios, 4)] = Tbv + Tby;
+		    io[WS(ios, 60)] = Tby - Tbv;
+	       }
+	       {
+		    E Tbt, Tbu, Tbz, TbA;
+		    Tbt = Tbp - Tbs;
+		    Tbu = Tbl - TaO;
+		    io[WS(ios, 28)] = Tbt + Tbu;
+		    io[WS(ios, 36)] = Tbu - Tbt;
+		    Tbz = Ta9 - Tag;
+		    TbA = Tbx - Tbw;
+		    ro[WS(ros, 36)] = Tbz - TbA;
+		    ro[WS(ros, 28)] = Tbz + TbA;
+	       }
+	       {
+		    E TbD, TbK, TbP, TbS;
+		    TbD = TbB + TbC;
+		    TbK = TbG + TbJ;
+		    ro[WS(ros, 52)] = TbD - TbK;
+		    ro[WS(ros, 12)] = TbD + TbK;
+		    TbP = TbM + TbL;
+		    TbS = TbQ + TbR;
+		    io[WS(ios, 12)] = TbP + TbS;
+		    io[WS(ios, 52)] = TbS - TbP;
+	       }
+	       {
+		    E TbN, TbO, TbT, TbU;
+		    TbN = TbL - TbM;
+		    TbO = TbJ - TbG;
+		    io[WS(ios, 20)] = TbN + TbO;
+		    io[WS(ios, 44)] = TbO - TbN;
+		    TbT = TbB - TbC;
+		    TbU = TbR - TbQ;
+		    ro[WS(ros, 44)] = TbT - TbU;
+		    ro[WS(ros, 20)] = TbT + TbU;
+	       }
+	  }
+	  {
+	       E Tev, Tf7, Tfc, Tfm, Tff, Tfn, TeC, Tfh, TeK, Tf2, TeV, Tf8, TeY, Tfi, TeR;
+	       E Tf3;
+	       {
+		    E Tet, Teu, Tfa, Tfb;
+		    Tet = TcD - TcG;
+		    Teu = TdY - TdX;
+		    Tev = Tet - Teu;
+		    Tf7 = Tet + Teu;
+		    Tfa = TeF + TeE;
+		    Tfb = TeH + TeI;
+		    Tfc = FMA(KP290284677, Tfa, KP956940335 * Tfb);
+		    Tfm = FNMS(KP290284677, Tfb, KP956940335 * Tfa);
+	       }
+	       {
+		    E Tfd, Tfe, Tey, TeB;
+		    Tfd = TeL + TeM;
+		    Tfe = TeP + TeO;
+		    Tff = FNMS(KP290284677, Tfe, KP956940335 * Tfd);
+		    Tfn = FMA(KP956940335, Tfe, KP290284677 * Tfd);
+		    Tey = FMA(KP555570233, Tew, KP831469612 * Tex);
+		    TeB = FNMS(KP555570233, TeA, KP831469612 * Tez);
+		    TeC = Tey - TeB;
+		    Tfh = TeB + Tey;
+	       }
+	       {
+		    E TeG, TeJ, TeT, TeU;
+		    TeG = TeE - TeF;
+		    TeJ = TeH - TeI;
+		    TeK = FMA(KP471396736, TeG, KP881921264 * TeJ);
+		    Tf2 = FNMS(KP471396736, TeJ, KP881921264 * TeG);
+		    TeT = FNMS(KP555570233, Tex, KP831469612 * Tew);
+		    TeU = FMA(KP831469612, TeA, KP555570233 * Tez);
+		    TeV = TeT - TeU;
+		    Tf8 = TeU + TeT;
+	       }
+	       {
+		    E TeW, TeX, TeN, TeQ;
+		    TeW = TcN - TcK;
+		    TeX = TdV - TdU;
+		    TeY = TeW - TeX;
+		    Tfi = TeX + TeW;
+		    TeN = TeL - TeM;
+		    TeQ = TeO - TeP;
+		    TeR = FNMS(KP471396736, TeQ, KP881921264 * TeN);
+		    Tf3 = FMA(KP881921264, TeQ, KP471396736 * TeN);
+	       }
+	       {
+		    E TeD, TeS, Tf1, Tf4;
+		    TeD = Tev + TeC;
+		    TeS = TeK + TeR;
+		    ro[WS(ros, 54)] = TeD - TeS;
+		    ro[WS(ros, 10)] = TeD + TeS;
+		    Tf1 = TeY + TeV;
+		    Tf4 = Tf2 + Tf3;
+		    io[WS(ios, 10)] = Tf1 + Tf4;
+		    io[WS(ios, 54)] = Tf4 - Tf1;
+	       }
+	       {
+		    E TeZ, Tf0, Tf5, Tf6;
+		    TeZ = TeV - TeY;
+		    Tf0 = TeR - TeK;
+		    io[WS(ios, 22)] = TeZ + Tf0;
+		    io[WS(ios, 42)] = Tf0 - TeZ;
+		    Tf5 = Tev - TeC;
+		    Tf6 = Tf3 - Tf2;
+		    ro[WS(ros, 42)] = Tf5 - Tf6;
+		    ro[WS(ros, 22)] = Tf5 + Tf6;
+	       }
+	       {
+		    E Tf9, Tfg, Tfl, Tfo;
+		    Tf9 = Tf7 + Tf8;
+		    Tfg = Tfc + Tff;
+		    ro[WS(ros, 58)] = Tf9 - Tfg;
+		    ro[WS(ros, 6)] = Tf9 + Tfg;
+		    Tfl = Tfi + Tfh;
+		    Tfo = Tfm + Tfn;
+		    io[WS(ios, 6)] = Tfl + Tfo;
+		    io[WS(ios, 58)] = Tfo - Tfl;
+	       }
+	       {
+		    E Tfj, Tfk, Tfp, Tfq;
+		    Tfj = Tfh - Tfi;
+		    Tfk = Tff - Tfc;
+		    io[WS(ios, 26)] = Tfj + Tfk;
+		    io[WS(ios, 38)] = Tfk - Tfj;
+		    Tfp = Tf7 - Tf8;
+		    Tfq = Tfn - Tfm;
+		    ro[WS(ros, 38)] = Tfp - Tfq;
+		    ro[WS(ros, 26)] = Tfp + Tfq;
+	       }
+	  }
+	  {
+	       E TcP, Te9, Tee, Teo, Teh, Tep, Td8, Tej, Tdu, Te4, TdT, Tea, Te0, Tek, TdP;
+	       E Te5;
+	       {
+		    E TcH, TcO, Tec, Ted;
+		    TcH = TcD + TcG;
+		    TcO = TcK + TcN;
+		    TcP = TcH + TcO;
+		    Te9 = TcH - TcO;
+		    Tec = Tde - Tdl;
+		    Ted = Tds - Tdp;
+		    Tee = FMA(KP773010453, Tec, KP634393284 * Ted);
+		    Teo = FNMS(KP634393284, Tec, KP773010453 * Ted);
+	       }
+	       {
+		    E Tef, Teg, TcY, Td7;
+		    Tef = Tdz - TdG;
+		    Teg = TdN - TdK;
+		    Teh = FNMS(KP634393284, Teg, KP773010453 * Tef);
+		    Tep = FMA(KP634393284, Tef, KP773010453 * Teg);
+		    TcY = FMA(KP980785280, TcU, KP195090322 * TcX);
+		    Td7 = FNMS(KP195090322, Td6, KP980785280 * Td3);
+		    Td8 = TcY + Td7;
+		    Tej = Td7 - TcY;
+	       }
+	       {
+		    E Tdm, Tdt, TdR, TdS;
+		    Tdm = Tde + Tdl;
+		    Tdt = Tdp + Tds;
+		    Tdu = FMA(KP995184726, Tdm, KP098017140 * Tdt);
+		    Te4 = FNMS(KP098017140, Tdm, KP995184726 * Tdt);
+		    TdR = FNMS(KP195090322, TcU, KP980785280 * TcX);
+		    TdS = FMA(KP195090322, Td3, KP980785280 * Td6);
+		    TdT = TdR + TdS;
+		    Tea = TdS - TdR;
+	       }
+	       {
+		    E TdW, TdZ, TdH, TdO;
+		    TdW = TdU + TdV;
+		    TdZ = TdX + TdY;
+		    Te0 = TdW + TdZ;
+		    Tek = TdZ - TdW;
+		    TdH = Tdz + TdG;
+		    TdO = TdK + TdN;
+		    TdP = FNMS(KP098017140, TdO, KP995184726 * TdH);
+		    Te5 = FMA(KP098017140, TdH, KP995184726 * TdO);
+	       }
+	       {
+		    E Td9, TdQ, Te3, Te6;
+		    Td9 = TcP + Td8;
+		    TdQ = Tdu + TdP;
+		    ro[WS(ros, 62)] = Td9 - TdQ;
+		    ro[WS(ros, 2)] = Td9 + TdQ;
+		    Te3 = Te0 + TdT;
+		    Te6 = Te4 + Te5;
+		    io[WS(ios, 2)] = Te3 + Te6;
+		    io[WS(ios, 62)] = Te6 - Te3;
+	       }
+	       {
+		    E Te1, Te2, Te7, Te8;
+		    Te1 = TdT - Te0;
+		    Te2 = TdP - Tdu;
+		    io[WS(ios, 30)] = Te1 + Te2;
+		    io[WS(ios, 34)] = Te2 - Te1;
+		    Te7 = TcP - Td8;
+		    Te8 = Te5 - Te4;
+		    ro[WS(ros, 34)] = Te7 - Te8;
+		    ro[WS(ros, 30)] = Te7 + Te8;
+	       }
+	       {
+		    E Teb, Tei, Ten, Teq;
+		    Teb = Te9 + Tea;
+		    Tei = Tee + Teh;
+		    ro[WS(ros, 50)] = Teb - Tei;
+		    ro[WS(ros, 14)] = Teb + Tei;
+		    Ten = Tek + Tej;
+		    Teq = Teo + Tep;
+		    io[WS(ios, 14)] = Ten + Teq;
+		    io[WS(ios, 50)] = Teq - Ten;
+	       }
+	       {
+		    E Tel, Tem, Ter, Tes;
+		    Tel = Tej - Tek;
+		    Tem = Teh - Tee;
+		    io[WS(ios, 18)] = Tel + Tem;
+		    io[WS(ios, 46)] = Tem - Tel;
+		    Ter = Te9 - Tea;
+		    Tes = Tep - Teo;
+		    ro[WS(ros, 46)] = Ter - Tes;
+		    ro[WS(ros, 18)] = Ter + Tes;
+	       }
+	  }
+	  {
+	       E T6v, T77, T6C, T7h, T6Y, T7i, T6V, T78, T6R, T7n, T73, T7f, T6K, T7m, T72;
+	       E T7c;
+	       {
+		    E T6t, T6u, T6T, T6U;
+		    T6t = T27 - T2e;
+		    T6u = T5Y - T5X;
+		    T6v = T6t - T6u;
+		    T77 = T6t + T6u;
+		    {
+			 E T6y, T6B, T6W, T6X;
+			 T6y = FMA(KP773010453, T6w, KP634393284 * T6x);
+			 T6B = FNMS(KP634393284, T6A, KP773010453 * T6z);
+			 T6C = T6y - T6B;
+			 T7h = T6B + T6y;
+			 T6W = T2x - T2o;
+			 T6X = T5V - T5S;
+			 T6Y = T6W - T6X;
+			 T7i = T6X + T6W;
+		    }
+		    T6T = FNMS(KP634393284, T6w, KP773010453 * T6x);
+		    T6U = FMA(KP634393284, T6z, KP773010453 * T6A);
+		    T6V = T6T - T6U;
+		    T78 = T6U + T6T;
+		    {
+			 E T6N, T7d, T6Q, T7e, T6M, T6O;
+			 T6M = T5I - T5H;
+			 T6N = T6L - T6M;
+			 T7d = T6L + T6M;
+			 T6O = T5v - T5e;
+			 T6Q = T6O - T6P;
+			 T7e = T6P + T6O;
+			 T6R = FNMS(KP427555093, T6Q, KP903989293 * T6N);
+			 T7n = FMA(KP941544065, T7e, KP336889853 * T7d);
+			 T73 = FMA(KP903989293, T6Q, KP427555093 * T6N);
+			 T7f = FNMS(KP336889853, T7e, KP941544065 * T7d);
+		    }
+		    {
+			 E T6G, T7a, T6J, T7b, T6E, T6I;
+			 T6E = T4g - T3Z;
+			 T6G = T6E - T6F;
+			 T7a = T6F + T6E;
+			 T6I = T4t - T4s;
+			 T6J = T6H - T6I;
+			 T7b = T6H + T6I;
+			 T6K = FMA(KP427555093, T6G, KP903989293 * T6J);
+			 T7m = FNMS(KP336889853, T7b, KP941544065 * T7a);
+			 T72 = FNMS(KP427555093, T6J, KP903989293 * T6G);
+			 T7c = FMA(KP336889853, T7a, KP941544065 * T7b);
+		    }
+	       }
+	       {
+		    E T6D, T6S, T71, T74;
+		    T6D = T6v + T6C;
+		    T6S = T6K + T6R;
+		    ro[WS(ros, 55)] = T6D - T6S;
+		    ro[WS(ros, 9)] = T6D + T6S;
+		    T71 = T6Y + T6V;
+		    T74 = T72 + T73;
+		    io[WS(ios, 9)] = T71 + T74;
+		    io[WS(ios, 55)] = T74 - T71;
+	       }
+	       {
+		    E T6Z, T70, T75, T76;
+		    T6Z = T6V - T6Y;
+		    T70 = T6R - T6K;
+		    io[WS(ios, 23)] = T6Z + T70;
+		    io[WS(ios, 41)] = T70 - T6Z;
+		    T75 = T6v - T6C;
+		    T76 = T73 - T72;
+		    ro[WS(ros, 41)] = T75 - T76;
+		    ro[WS(ros, 23)] = T75 + T76;
+	       }
+	       {
+		    E T79, T7g, T7l, T7o;
+		    T79 = T77 + T78;
+		    T7g = T7c + T7f;
+		    ro[WS(ros, 57)] = T79 - T7g;
+		    ro[WS(ros, 7)] = T79 + T7g;
+		    T7l = T7i + T7h;
+		    T7o = T7m + T7n;
+		    io[WS(ios, 7)] = T7l + T7o;
+		    io[WS(ios, 57)] = T7o - T7l;
+	       }
+	       {
+		    E T7j, T7k, T7p, T7q;
+		    T7j = T7h - T7i;
+		    T7k = T7f - T7c;
+		    io[WS(ios, 25)] = T7j + T7k;
+		    io[WS(ios, 39)] = T7k - T7j;
+		    T7p = T77 - T78;
+		    T7q = T7n - T7m;
+		    ro[WS(ros, 39)] = T7p - T7q;
+		    ro[WS(ros, 25)] = T7p + T7q;
+	       }
+	  }
+	  {
+	       E T99, T9L, T9g, T9V, T9C, T9W, T9z, T9M, T9v, Ta1, T9H, T9T, T9o, Ta0, T9G;
+	       E T9Q;
+	       {
+		    E T97, T98, T9x, T9y;
+		    T97 = T7r - T7s;
+		    T98 = T8C - T8B;
+		    T99 = T97 - T98;
+		    T9L = T97 + T98;
+		    {
+			 E T9c, T9f, T9A, T9B;
+			 T9c = FMA(KP471396736, T9a, KP881921264 * T9b);
+			 T9f = FNMS(KP471396736, T9e, KP881921264 * T9d);
+			 T9g = T9c - T9f;
+			 T9V = T9f + T9c;
+			 T9A = T7z - T7w;
+			 T9B = T8z - T8y;
+			 T9C = T9A - T9B;
+			 T9W = T9B + T9A;
+		    }
+		    T9x = FNMS(KP471396736, T9b, KP881921264 * T9a);
+		    T9y = FMA(KP881921264, T9e, KP471396736 * T9d);
+		    T9z = T9x - T9y;
+		    T9M = T9y + T9x;
+		    {
+			 E T9r, T9R, T9u, T9S, T9q, T9s;
+			 T9q = T8q - T8p;
+			 T9r = T9p - T9q;
+			 T9R = T9p + T9q;
+			 T9s = T8j - T8g;
+			 T9u = T9s - T9t;
+			 T9S = T9t + T9s;
+			 T9v = FNMS(KP514102744, T9u, KP857728610 * T9r);
+			 Ta1 = FMA(KP970031253, T9S, KP242980179 * T9R);
+			 T9H = FMA(KP857728610, T9u, KP514102744 * T9r);
+			 T9T = FNMS(KP242980179, T9S, KP970031253 * T9R);
+		    }
+		    {
+			 E T9k, T9O, T9n, T9P, T9i, T9m;
+			 T9i = T80 - T7X;
+			 T9k = T9i - T9j;
+			 T9O = T9j + T9i;
+			 T9m = T87 - T86;
+			 T9n = T9l - T9m;
+			 T9P = T9l + T9m;
+			 T9o = FMA(KP514102744, T9k, KP857728610 * T9n);
+			 Ta0 = FNMS(KP242980179, T9P, KP970031253 * T9O);
+			 T9G = FNMS(KP514102744, T9n, KP857728610 * T9k);
+			 T9Q = FMA(KP242980179, T9O, KP970031253 * T9P);
+		    }
+	       }
+	       {
+		    E T9h, T9w, T9F, T9I;
+		    T9h = T99 + T9g;
+		    T9w = T9o + T9v;
+		    ro[WS(ros, 53)] = T9h - T9w;
+		    ro[WS(ros, 11)] = T9h + T9w;
+		    T9F = T9C + T9z;
+		    T9I = T9G + T9H;
+		    io[WS(ios, 11)] = T9F + T9I;
+		    io[WS(ios, 53)] = T9I - T9F;
+	       }
+	       {
+		    E T9D, T9E, T9J, T9K;
+		    T9D = T9z - T9C;
+		    T9E = T9v - T9o;
+		    io[WS(ios, 21)] = T9D + T9E;
+		    io[WS(ios, 43)] = T9E - T9D;
+		    T9J = T99 - T9g;
+		    T9K = T9H - T9G;
+		    ro[WS(ros, 43)] = T9J - T9K;
+		    ro[WS(ros, 21)] = T9J + T9K;
+	       }
+	       {
+		    E T9N, T9U, T9Z, Ta2;
+		    T9N = T9L + T9M;
+		    T9U = T9Q + T9T;
+		    ro[WS(ros, 59)] = T9N - T9U;
+		    ro[WS(ros, 5)] = T9N + T9U;
+		    T9Z = T9W + T9V;
+		    Ta2 = Ta0 + Ta1;
+		    io[WS(ios, 5)] = T9Z + Ta2;
+		    io[WS(ios, 59)] = Ta2 - T9Z;
+	       }
+	       {
+		    E T9X, T9Y, Ta3, Ta4;
+		    T9X = T9V - T9W;
+		    T9Y = T9T - T9Q;
+		    io[WS(ios, 27)] = T9X + T9Y;
+		    io[WS(ios, 37)] = T9Y - T9X;
+		    Ta3 = T9L - T9M;
+		    Ta4 = Ta1 - Ta0;
+		    ro[WS(ros, 37)] = Ta3 - Ta4;
+		    ro[WS(ros, 27)] = Ta3 + Ta4;
+	       }
+	  }
+	  {
+	       E T2z, T69, T3g, T6j, T60, T6k, T5P, T6a, T5L, T6p, T65, T6h, T4w, T6o, T64;
+	       E T6e;
+	       {
+		    E T2f, T2y, T5N, T5O;
+		    T2f = T27 + T2e;
+		    T2y = T2o + T2x;
+		    T2z = T2f + T2y;
+		    T69 = T2f - T2y;
+		    {
+			 E T2U, T3f, T5W, T5Z;
+			 T2U = FMA(KP098017140, T2M, KP995184726 * T2T);
+			 T3f = FNMS(KP098017140, T3e, KP995184726 * T37);
+			 T3g = T2U + T3f;
+			 T6j = T3f - T2U;
+			 T5W = T5S + T5V;
+			 T5Z = T5X + T5Y;
+			 T60 = T5W + T5Z;
+			 T6k = T5Z - T5W;
+		    }
+		    T5N = FNMS(KP098017140, T2T, KP995184726 * T2M);
+		    T5O = FMA(KP995184726, T3e, KP098017140 * T37);
+		    T5P = T5N + T5O;
+		    T6a = T5O - T5N;
+		    {
+			 E T5x, T6f, T5K, T6g, T5w, T5J;
+			 T5w = T5e + T5v;
+			 T5x = T4X + T5w;
+			 T6f = T4X - T5w;
+			 T5J = T5H + T5I;
+			 T5K = T5G + T5J;
+			 T6g = T5J - T5G;
+			 T5L = FNMS(KP049067674, T5K, KP998795456 * T5x);
+			 T6p = FMA(KP671558954, T6f, KP740951125 * T6g);
+			 T65 = FMA(KP049067674, T5x, KP998795456 * T5K);
+			 T6h = FNMS(KP671558954, T6g, KP740951125 * T6f);
+		    }
+		    {
+			 E T4i, T6c, T4v, T6d, T4h, T4u;
+			 T4h = T3Z + T4g;
+			 T4i = T3I + T4h;
+			 T6c = T3I - T4h;
+			 T4u = T4s + T4t;
+			 T4v = T4r + T4u;
+			 T6d = T4u - T4r;
+			 T4w = FMA(KP998795456, T4i, KP049067674 * T4v);
+			 T6o = FNMS(KP671558954, T6c, KP740951125 * T6d);
+			 T64 = FNMS(KP049067674, T4i, KP998795456 * T4v);
+			 T6e = FMA(KP740951125, T6c, KP671558954 * T6d);
+		    }
+	       }
+	       {
+		    E T3h, T5M, T63, T66;
+		    T3h = T2z + T3g;
+		    T5M = T4w + T5L;
+		    ro[WS(ros, 63)] = T3h - T5M;
+		    ro[WS(ros, 1)] = T3h + T5M;
+		    T63 = T60 + T5P;
+		    T66 = T64 + T65;
+		    io[WS(ios, 1)] = T63 + T66;
+		    io[WS(ios, 63)] = T66 - T63;
+	       }
+	       {
+		    E T61, T62, T67, T68;
+		    T61 = T5P - T60;
+		    T62 = T5L - T4w;
+		    io[WS(ios, 31)] = T61 + T62;
+		    io[WS(ios, 33)] = T62 - T61;
+		    T67 = T2z - T3g;
+		    T68 = T65 - T64;
+		    ro[WS(ros, 33)] = T67 - T68;
+		    ro[WS(ros, 31)] = T67 + T68;
+	       }
+	       {
+		    E T6b, T6i, T6n, T6q;
+		    T6b = T69 + T6a;
+		    T6i = T6e + T6h;
+		    ro[WS(ros, 49)] = T6b - T6i;
+		    ro[WS(ros, 15)] = T6b + T6i;
+		    T6n = T6k + T6j;
+		    T6q = T6o + T6p;
+		    io[WS(ios, 15)] = T6n + T6q;
+		    io[WS(ios, 49)] = T6q - T6n;
+	       }
+	       {
+		    E T6l, T6m, T6r, T6s;
+		    T6l = T6j - T6k;
+		    T6m = T6h - T6e;
+		    io[WS(ios, 17)] = T6l + T6m;
+		    io[WS(ios, 47)] = T6m - T6l;
+		    T6r = T69 - T6a;
+		    T6s = T6p - T6o;
+		    ro[WS(ros, 47)] = T6r - T6s;
+		    ro[WS(ros, 17)] = T6r + T6s;
+	       }
+	  }
+	  {
+	       E T7B, T8N, T7Q, T8X, T8E, T8Y, T8x, T8O, T8t, T93, T8J, T8V, T8a, T92, T8I;
+	       E T8S;
+	       {
+		    E T7t, T7A, T8v, T8w;
+		    T7t = T7r + T7s;
+		    T7A = T7w + T7z;
+		    T7B = T7t + T7A;
+		    T8N = T7t - T7A;
+		    {
+			 E T7I, T7P, T8A, T8D;
+			 T7I = FMA(KP956940335, T7E, KP290284677 * T7H);
+			 T7P = FNMS(KP290284677, T7O, KP956940335 * T7L);
+			 T7Q = T7I + T7P;
+			 T8X = T7P - T7I;
+			 T8A = T8y + T8z;
+			 T8D = T8B + T8C;
+			 T8E = T8A + T8D;
+			 T8Y = T8D - T8A;
+		    }
+		    T8v = FNMS(KP290284677, T7E, KP956940335 * T7H);
+		    T8w = FMA(KP290284677, T7L, KP956940335 * T7O);
+		    T8x = T8v + T8w;
+		    T8O = T8w - T8v;
+		    {
+			 E T8l, T8T, T8s, T8U, T8k, T8r;
+			 T8k = T8g + T8j;
+			 T8l = T8d + T8k;
+			 T8T = T8d - T8k;
+			 T8r = T8p + T8q;
+			 T8s = T8o + T8r;
+			 T8U = T8r - T8o;
+			 T8t = FNMS(KP146730474, T8s, KP989176509 * T8l);
+			 T93 = FMA(KP595699304, T8T, KP803207531 * T8U);
+			 T8J = FMA(KP146730474, T8l, KP989176509 * T8s);
+			 T8V = FNMS(KP595699304, T8U, KP803207531 * T8T);
+		    }
+		    {
+			 E T82, T8Q, T89, T8R, T81, T88;
+			 T81 = T7X + T80;
+			 T82 = T7U + T81;
+			 T8Q = T7U - T81;
+			 T88 = T86 + T87;
+			 T89 = T85 + T88;
+			 T8R = T88 - T85;
+			 T8a = FMA(KP989176509, T82, KP146730474 * T89);
+			 T92 = FNMS(KP595699304, T8Q, KP803207531 * T8R);
+			 T8I = FNMS(KP146730474, T82, KP989176509 * T89);
+			 T8S = FMA(KP803207531, T8Q, KP595699304 * T8R);
+		    }
+	       }
+	       {
+		    E T7R, T8u, T8H, T8K;
+		    T7R = T7B + T7Q;
+		    T8u = T8a + T8t;
+		    ro[WS(ros, 61)] = T7R - T8u;
+		    ro[WS(ros, 3)] = T7R + T8u;
+		    T8H = T8E + T8x;
+		    T8K = T8I + T8J;
+		    io[WS(ios, 3)] = T8H + T8K;
+		    io[WS(ios, 61)] = T8K - T8H;
+	       }
+	       {
+		    E T8F, T8G, T8L, T8M;
+		    T8F = T8x - T8E;
+		    T8G = T8t - T8a;
+		    io[WS(ios, 29)] = T8F + T8G;
+		    io[WS(ios, 35)] = T8G - T8F;
+		    T8L = T7B - T7Q;
+		    T8M = T8J - T8I;
+		    ro[WS(ros, 35)] = T8L - T8M;
+		    ro[WS(ros, 29)] = T8L + T8M;
+	       }
+	       {
+		    E T8P, T8W, T91, T94;
+		    T8P = T8N + T8O;
+		    T8W = T8S + T8V;
+		    ro[WS(ros, 51)] = T8P - T8W;
+		    ro[WS(ros, 13)] = T8P + T8W;
+		    T91 = T8Y + T8X;
+		    T94 = T92 + T93;
+		    io[WS(ios, 13)] = T91 + T94;
+		    io[WS(ios, 51)] = T94 - T91;
+	       }
+	       {
+		    E T8Z, T90, T95, T96;
+		    T8Z = T8X - T8Y;
+		    T90 = T8V - T8S;
+		    io[WS(ios, 19)] = T8Z + T90;
+		    io[WS(ios, 45)] = T90 - T8Z;
+		    T95 = T8N - T8O;
+		    T96 = T93 - T92;
+		    ro[WS(ros, 45)] = T95 - T96;
+		    ro[WS(ros, 19)] = T95 + T96;
+	       }
+	  }
+     }
+}
+
+static void mr2hc_128(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mr2hc_128_0(I, ro, io, is, ros, ios);
+	  I += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kr2hc_desc desc = { 128, "mr2hc_128", {812, 186, 144, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mr2hc_128) (planner *p) {
+     X(kr2hc_register) (p, mr2hc_128, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/mr2hc_32.c b/src/fftw3/rdft/codelets/r2hc/mr2hc_32.c
new file mode 100644
index 0000000..4f351a4
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/mr2hc_32.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:45 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc_noinline -compact -variables 4 -n 32 -name mr2hc_32 -include r2hc.h */
+
+/*
+ * This function contains 156 FP additions, 42 FP multiplications,
+ * (or, 140 additions, 26 multiplications, 16 fused multiply/add),
+ * 53 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mr2hc_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hc_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hc_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void mr2hc_32_0(const R *I, R *ro, R *io, stride is, stride ros, stride ios)
+{
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T7, T2b, Tv, T1l, Te, T2o, Ty, T1k, Tt, T2d, TF, T1h, Tm, T2c, TC;
+	  E T1i, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z;
+	  E TS, T1y;
+	  {
+	       E T1, T2, T3, T4, T5, T6;
+	       T1 = I[0];
+	       T2 = I[WS(is, 16)];
+	       T3 = T1 + T2;
+	       T4 = I[WS(is, 8)];
+	       T5 = I[WS(is, 24)];
+	       T6 = T4 + T5;
+	       T7 = T3 + T6;
+	       T2b = T3 - T6;
+	       Tv = T1 - T2;
+	       T1l = T4 - T5;
+	  }
+	  {
+	       E Ta, Tw, Td, Tx;
+	       {
+		    E T8, T9, Tb, Tc;
+		    T8 = I[WS(is, 4)];
+		    T9 = I[WS(is, 20)];
+		    Ta = T8 + T9;
+		    Tw = T8 - T9;
+		    Tb = I[WS(is, 28)];
+		    Tc = I[WS(is, 12)];
+		    Td = Tb + Tc;
+		    Tx = Tb - Tc;
+	       }
+	       Te = Ta + Td;
+	       T2o = Td - Ta;
+	       Ty = KP707106781 * (Tw + Tx);
+	       T1k = KP707106781 * (Tx - Tw);
+	  }
+	  {
+	       E Tp, TD, Ts, TE;
+	       {
+		    E Tn, To, Tq, Tr;
+		    Tn = I[WS(is, 30)];
+		    To = I[WS(is, 14)];
+		    Tp = Tn + To;
+		    TD = Tn - To;
+		    Tq = I[WS(is, 6)];
+		    Tr = I[WS(is, 22)];
+		    Ts = Tq + Tr;
+		    TE = Tq - Tr;
+	       }
+	       Tt = Tp + Ts;
+	       T2d = Tp - Ts;
+	       TF = FMA(KP923879532, TD, KP382683432 * TE);
+	       T1h = FNMS(KP923879532, TE, KP382683432 * TD);
+	  }
+	  {
+	       E Ti, TA, Tl, TB;
+	       {
+		    E Tg, Th, Tj, Tk;
+		    Tg = I[WS(is, 2)];
+		    Th = I[WS(is, 18)];
+		    Ti = Tg + Th;
+		    TA = Tg - Th;
+		    Tj = I[WS(is, 10)];
+		    Tk = I[WS(is, 26)];
+		    Tl = Tj + Tk;
+		    TB = Tj - Tk;
+	       }
+	       Tm = Ti + Tl;
+	       T2c = Ti - Tl;
+	       TC = FNMS(KP382683432, TB, KP923879532 * TA);
+	       T1i = FMA(KP382683432, TA, KP923879532 * TB);
+	  }
+	  {
+	       E T11, T1X, T1d, T1Y, T14, T20, T17, T21, T1a, T18;
+	       {
+		    E TZ, T10, T1b, T1c;
+		    TZ = I[WS(is, 31)];
+		    T10 = I[WS(is, 15)];
+		    T11 = TZ - T10;
+		    T1X = TZ + T10;
+		    T1b = I[WS(is, 7)];
+		    T1c = I[WS(is, 23)];
+		    T1d = T1b - T1c;
+		    T1Y = T1b + T1c;
+	       }
+	       {
+		    E T12, T13, T15, T16;
+		    T12 = I[WS(is, 3)];
+		    T13 = I[WS(is, 19)];
+		    T14 = T12 - T13;
+		    T20 = T12 + T13;
+		    T15 = I[WS(is, 27)];
+		    T16 = I[WS(is, 11)];
+		    T17 = T15 - T16;
+		    T21 = T15 + T16;
+	       }
+	       T1Z = T1X + T1Y;
+	       T22 = T20 + T21;
+	       T2k = T21 - T20;
+	       T2j = T1X - T1Y;
+	       T1a = KP707106781 * (T17 - T14);
+	       T1e = T1a - T1d;
+	       T1C = T1d + T1a;
+	       T18 = KP707106781 * (T14 + T17);
+	       T19 = T11 + T18;
+	       T1B = T11 - T18;
+	  }
+	  {
+	       E TK, T1Q, TW, T1R, TN, T1T, TQ, T1U, TT, TR;
+	       {
+		    E TI, TJ, TU, TV;
+		    TI = I[WS(is, 1)];
+		    TJ = I[WS(is, 17)];
+		    TK = TI - TJ;
+		    T1Q = TI + TJ;
+		    TU = I[WS(is, 9)];
+		    TV = I[WS(is, 25)];
+		    TW = TU - TV;
+		    T1R = TU + TV;
+	       }
+	       {
+		    E TL, TM, TO, TP;
+		    TL = I[WS(is, 5)];
+		    TM = I[WS(is, 21)];
+		    TN = TL - TM;
+		    T1T = TL + TM;
+		    TO = I[WS(is, 29)];
+		    TP = I[WS(is, 13)];
+		    TQ = TO - TP;
+		    T1U = TO + TP;
+	       }
+	       T1S = T1Q + T1R;
+	       T1V = T1T + T1U;
+	       T2h = T1U - T1T;
+	       T2g = T1Q - T1R;
+	       TT = KP707106781 * (TQ - TN);
+	       TX = TT - TW;
+	       T1z = TW + TT;
+	       TR = KP707106781 * (TN + TQ);
+	       TS = TK + TR;
+	       T1y = TK - TR;
+	  }
+	  {
+	       E Tf, Tu, T27, T28, T29, T2a;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       T27 = Tf + Tu;
+	       T28 = T1S + T1V;
+	       T29 = T1Z + T22;
+	       T2a = T28 + T29;
+	       ro[WS(ros, 8)] = Tf - Tu;
+	       io[WS(ios, 8)] = T29 - T28;
+	       ro[WS(ros, 16)] = T27 - T2a;
+	       ro[0] = T27 + T2a;
+	  }
+	  {
+	       E T1P, T25, T24, T26, T1W, T23;
+	       T1P = T7 - Te;
+	       T25 = Tt - Tm;
+	       T1W = T1S - T1V;
+	       T23 = T1Z - T22;
+	       T24 = KP707106781 * (T1W + T23);
+	       T26 = KP707106781 * (T23 - T1W);
+	       ro[WS(ros, 12)] = T1P - T24;
+	       io[WS(ios, 12)] = T26 - T25;
+	       ro[WS(ros, 4)] = T1P + T24;
+	       io[WS(ios, 4)] = T25 + T26;
+	  }
+	  {
+	       E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2n;
+	       T2e = KP707106781 * (T2c + T2d);
+	       T2f = T2b + T2e;
+	       T2v = T2b - T2e;
+	       T2n = KP707106781 * (T2d - T2c);
+	       T2p = T2n - T2o;
+	       T2r = T2o + T2n;
+	       {
+		    E T2i, T2l, T2s, T2t;
+		    T2i = FMA(KP923879532, T2g, KP382683432 * T2h);
+		    T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
+		    T2m = T2i + T2l;
+		    T2q = T2l - T2i;
+		    T2s = FNMS(KP382683432, T2g, KP923879532 * T2h);
+		    T2t = FMA(KP382683432, T2j, KP923879532 * T2k);
+		    T2u = T2s + T2t;
+		    T2w = T2t - T2s;
+	       }
+	       ro[WS(ros, 14)] = T2f - T2m;
+	       io[WS(ios, 14)] = T2u - T2r;
+	       ro[WS(ros, 2)] = T2f + T2m;
+	       io[WS(ios, 2)] = T2r + T2u;
+	       io[WS(ios, 6)] = T2p + T2q;
+	       ro[WS(ros, 6)] = T2v + T2w;
+	       io[WS(ios, 10)] = T2q - T2p;
+	       ro[WS(ros, 10)] = T2v - T2w;
+	  }
+	  {
+	       E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p;
+	       {
+		    E Tz, TG, T1q, T1r;
+		    Tz = Tv + Ty;
+		    TG = TC + TF;
+		    TH = Tz + TG;
+		    T1t = Tz - TG;
+		    T1q = FNMS(KP195090322, TS, KP980785280 * TX);
+		    T1r = FMA(KP195090322, T19, KP980785280 * T1e);
+		    T1s = T1q + T1r;
+		    T1u = T1r - T1q;
+	       }
+	       {
+		    E TY, T1f, T1j, T1m;
+		    TY = FMA(KP980785280, TS, KP195090322 * TX);
+		    T1f = FNMS(KP195090322, T1e, KP980785280 * T19);
+		    T1g = TY + T1f;
+		    T1o = T1f - TY;
+		    T1j = T1h - T1i;
+		    T1m = T1k - T1l;
+		    T1n = T1j - T1m;
+		    T1p = T1m + T1j;
+	       }
+	       ro[WS(ros, 15)] = TH - T1g;
+	       io[WS(ios, 15)] = T1s - T1p;
+	       ro[WS(ros, 1)] = TH + T1g;
+	       io[WS(ios, 1)] = T1p + T1s;
+	       io[WS(ios, 7)] = T1n + T1o;
+	       ro[WS(ros, 7)] = T1t + T1u;
+	       io[WS(ios, 9)] = T1o - T1n;
+	       ro[WS(ros, 9)] = T1t - T1u;
+	  }
+	  {
+	       E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J;
+	       {
+		    E T1v, T1w, T1K, T1L;
+		    T1v = Tv - Ty;
+		    T1w = T1i + T1h;
+		    T1x = T1v + T1w;
+		    T1N = T1v - T1w;
+		    T1K = FNMS(KP555570233, T1y, KP831469612 * T1z);
+		    T1L = FMA(KP555570233, T1B, KP831469612 * T1C);
+		    T1M = T1K + T1L;
+		    T1O = T1L - T1K;
+	       }
+	       {
+		    E T1A, T1D, T1F, T1G;
+		    T1A = FMA(KP831469612, T1y, KP555570233 * T1z);
+		    T1D = FNMS(KP555570233, T1C, KP831469612 * T1B);
+		    T1E = T1A + T1D;
+		    T1I = T1D - T1A;
+		    T1F = TF - TC;
+		    T1G = T1l + T1k;
+		    T1H = T1F - T1G;
+		    T1J = T1G + T1F;
+	       }
+	       ro[WS(ros, 13)] = T1x - T1E;
+	       io[WS(ios, 13)] = T1M - T1J;
+	       ro[WS(ros, 3)] = T1x + T1E;
+	       io[WS(ios, 3)] = T1J + T1M;
+	       io[WS(ios, 5)] = T1H + T1I;
+	       ro[WS(ros, 5)] = T1N + T1O;
+	       io[WS(ios, 11)] = T1I - T1H;
+	       ro[WS(ros, 11)] = T1N - T1O;
+	  }
+     }
+}
+
+static void mr2hc_32(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mr2hc_32_0(I, ro, io, is, ros, ios);
+	  I += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kr2hc_desc desc = { 32, "mr2hc_32", {140, 26, 16, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mr2hc_32) (planner *p) {
+     X(kr2hc_register) (p, mr2hc_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/mr2hc_64.c b/src/fftw3/rdft/codelets/r2hc/mr2hc_64.c
new file mode 100644
index 0000000..c859fbf
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/mr2hc_64.c
@@ -0,0 +1,729 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:49 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc_noinline -compact -variables 4 -n 64 -name mr2hc_64 -include r2hc.h */
+
+/*
+ * This function contains 394 FP additions, 124 FP multiplications,
+ * (or, 342 additions, 72 multiplications, 52 fused multiply/add),
+ * 105 stack variables, and 128 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: mr2hc_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hc_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: mr2hc_64.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void mr2hc_64_0(const R *I, R *ro, R *io, stride is, stride ros, stride ios)
+{
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T4l, T5a, T15, T3n, T2T, T3Q, T7, Te, Tf, T4A, T4L, T1X, T3B, T23, T3y;
+	  E T5I, T66, T4R, T52, T2j, T3F, T2H, T3I, T5P, T69, T1i, T3t, T1l, T3u, TZ;
+	  E T63, T4v, T58, T1r, T3r, T1u, T3q, TK, T62, T4s, T57, Tm, Tt, Tu, T4o;
+	  E T5b, T1c, T3R, T2Q, T3o, T1M, T3z, T5L, T67, T26, T3C, T4H, T4M, T2y, T3J;
+	  E T5S, T6a, T2C, T3G, T4Y, T53;
+	  {
+	       E T3, T11, Td, T13, T6, T2S, Ta, T12, T14, T2R;
+	       {
+		    E T1, T2, Tb, Tc;
+		    T1 = I[0];
+		    T2 = I[WS(is, 32)];
+		    T3 = T1 + T2;
+		    T11 = T1 - T2;
+		    Tb = I[WS(is, 56)];
+		    Tc = I[WS(is, 24)];
+		    Td = Tb + Tc;
+		    T13 = Tb - Tc;
+	       }
+	       {
+		    E T4, T5, T8, T9;
+		    T4 = I[WS(is, 16)];
+		    T5 = I[WS(is, 48)];
+		    T6 = T4 + T5;
+		    T2S = T4 - T5;
+		    T8 = I[WS(is, 8)];
+		    T9 = I[WS(is, 40)];
+		    Ta = T8 + T9;
+		    T12 = T8 - T9;
+	       }
+	       T4l = T3 - T6;
+	       T5a = Td - Ta;
+	       T14 = KP707106781 * (T12 + T13);
+	       T15 = T11 + T14;
+	       T3n = T11 - T14;
+	       T2R = KP707106781 * (T13 - T12);
+	       T2T = T2R - T2S;
+	       T3Q = T2S + T2R;
+	       T7 = T3 + T6;
+	       Te = Ta + Td;
+	       Tf = T7 + Te;
+	  }
+	  {
+	       E T1P, T4J, T21, T4y, T1S, T4K, T1W, T4z;
+	       {
+		    E T1N, T1O, T1Z, T20;
+		    T1N = I[WS(is, 57)];
+		    T1O = I[WS(is, 25)];
+		    T1P = T1N - T1O;
+		    T4J = T1N + T1O;
+		    T1Z = I[WS(is, 1)];
+		    T20 = I[WS(is, 33)];
+		    T21 = T1Z - T20;
+		    T4y = T1Z + T20;
+	       }
+	       {
+		    E T1Q, T1R, T1U, T1V;
+		    T1Q = I[WS(is, 9)];
+		    T1R = I[WS(is, 41)];
+		    T1S = T1Q - T1R;
+		    T4K = T1Q + T1R;
+		    T1U = I[WS(is, 17)];
+		    T1V = I[WS(is, 49)];
+		    T1W = T1U - T1V;
+		    T4z = T1U + T1V;
+	       }
+	       T4A = T4y - T4z;
+	       T4L = T4J - T4K;
+	       {
+		    E T1T, T22, T5G, T5H;
+		    T1T = KP707106781 * (T1P - T1S);
+		    T1X = T1T - T1W;
+		    T3B = T1W + T1T;
+		    T22 = KP707106781 * (T1S + T1P);
+		    T23 = T21 + T22;
+		    T3y = T21 - T22;
+		    T5G = T4y + T4z;
+		    T5H = T4K + T4J;
+		    T5I = T5G + T5H;
+		    T66 = T5G - T5H;
+	       }
+	  }
+	  {
+	       E T2b, T4P, T2G, T4Q, T2e, T51, T2h, T50;
+	       {
+		    E T29, T2a, T2E, T2F;
+		    T29 = I[WS(is, 63)];
+		    T2a = I[WS(is, 31)];
+		    T2b = T29 - T2a;
+		    T4P = T29 + T2a;
+		    T2E = I[WS(is, 15)];
+		    T2F = I[WS(is, 47)];
+		    T2G = T2E - T2F;
+		    T4Q = T2E + T2F;
+	       }
+	       {
+		    E T2c, T2d, T2f, T2g;
+		    T2c = I[WS(is, 7)];
+		    T2d = I[WS(is, 39)];
+		    T2e = T2c - T2d;
+		    T51 = T2c + T2d;
+		    T2f = I[WS(is, 55)];
+		    T2g = I[WS(is, 23)];
+		    T2h = T2f - T2g;
+		    T50 = T2f + T2g;
+	       }
+	       T4R = T4P - T4Q;
+	       T52 = T50 - T51;
+	       {
+		    E T2i, T2D, T5N, T5O;
+		    T2i = KP707106781 * (T2e + T2h);
+		    T2j = T2b + T2i;
+		    T3F = T2b - T2i;
+		    T2D = KP707106781 * (T2h - T2e);
+		    T2H = T2D - T2G;
+		    T3I = T2G + T2D;
+		    T5N = T4P + T4Q;
+		    T5O = T51 + T50;
+		    T5P = T5N + T5O;
+		    T69 = T5N - T5O;
+	       }
+	  }
+	  {
+	       E TN, T1e, TX, T1g, TQ, T1k, TU, T1f, T1h, T1j;
+	       {
+		    E TL, TM, TV, TW;
+		    TL = I[WS(is, 62)];
+		    TM = I[WS(is, 30)];
+		    TN = TL + TM;
+		    T1e = TL - TM;
+		    TV = I[WS(is, 54)];
+		    TW = I[WS(is, 22)];
+		    TX = TV + TW;
+		    T1g = TV - TW;
+	       }
+	       {
+		    E TO, TP, TS, TT;
+		    TO = I[WS(is, 14)];
+		    TP = I[WS(is, 46)];
+		    TQ = TO + TP;
+		    T1k = TO - TP;
+		    TS = I[WS(is, 6)];
+		    TT = I[WS(is, 38)];
+		    TU = TS + TT;
+		    T1f = TS - TT;
+	       }
+	       T1h = KP707106781 * (T1f + T1g);
+	       T1i = T1e + T1h;
+	       T3t = T1e - T1h;
+	       T1j = KP707106781 * (T1g - T1f);
+	       T1l = T1j - T1k;
+	       T3u = T1k + T1j;
+	       {
+		    E TR, TY, T4t, T4u;
+		    TR = TN + TQ;
+		    TY = TU + TX;
+		    TZ = TR + TY;
+		    T63 = TR - TY;
+		    T4t = TN - TQ;
+		    T4u = TX - TU;
+		    T4v = FNMS(KP382683432, T4u, KP923879532 * T4t);
+		    T58 = FMA(KP382683432, T4t, KP923879532 * T4u);
+	       }
+	  }
+	  {
+	       E Ty, T1s, TI, T1n, TB, T1q, TF, T1o, T1p, T1t;
+	       {
+		    E Tw, Tx, TG, TH;
+		    Tw = I[WS(is, 2)];
+		    Tx = I[WS(is, 34)];
+		    Ty = Tw + Tx;
+		    T1s = Tw - Tx;
+		    TG = I[WS(is, 58)];
+		    TH = I[WS(is, 26)];
+		    TI = TG + TH;
+		    T1n = TG - TH;
+	       }
+	       {
+		    E Tz, TA, TD, TE;
+		    Tz = I[WS(is, 18)];
+		    TA = I[WS(is, 50)];
+		    TB = Tz + TA;
+		    T1q = Tz - TA;
+		    TD = I[WS(is, 10)];
+		    TE = I[WS(is, 42)];
+		    TF = TD + TE;
+		    T1o = TD - TE;
+	       }
+	       T1p = KP707106781 * (T1n - T1o);
+	       T1r = T1p - T1q;
+	       T3r = T1q + T1p;
+	       T1t = KP707106781 * (T1o + T1n);
+	       T1u = T1s + T1t;
+	       T3q = T1s - T1t;
+	       {
+		    E TC, TJ, T4q, T4r;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    T62 = TC - TJ;
+		    T4q = Ty - TB;
+		    T4r = TI - TF;
+		    T4s = FMA(KP923879532, T4q, KP382683432 * T4r);
+		    T57 = FNMS(KP382683432, T4q, KP923879532 * T4r);
+	       }
+	  }
+	  {
+	       E Ti, T16, Ts, T1a, Tl, T17, Tp, T19, T4m, T4n;
+	       {
+		    E Tg, Th, Tq, Tr;
+		    Tg = I[WS(is, 4)];
+		    Th = I[WS(is, 36)];
+		    Ti = Tg + Th;
+		    T16 = Tg - Th;
+		    Tq = I[WS(is, 12)];
+		    Tr = I[WS(is, 44)];
+		    Ts = Tq + Tr;
+		    T1a = Tq - Tr;
+	       }
+	       {
+		    E Tj, Tk, Tn, To;
+		    Tj = I[WS(is, 20)];
+		    Tk = I[WS(is, 52)];
+		    Tl = Tj + Tk;
+		    T17 = Tj - Tk;
+		    Tn = I[WS(is, 60)];
+		    To = I[WS(is, 28)];
+		    Tp = Tn + To;
+		    T19 = Tn - To;
+	       }
+	       Tm = Ti + Tl;
+	       Tt = Tp + Ts;
+	       Tu = Tm + Tt;
+	       T4m = Ti - Tl;
+	       T4n = Tp - Ts;
+	       T4o = KP707106781 * (T4m + T4n);
+	       T5b = KP707106781 * (T4n - T4m);
+	       {
+		    E T18, T1b, T2O, T2P;
+		    T18 = FNMS(KP382683432, T17, KP923879532 * T16);
+		    T1b = FMA(KP923879532, T19, KP382683432 * T1a);
+		    T1c = T18 + T1b;
+		    T3R = T1b - T18;
+		    T2O = FNMS(KP923879532, T1a, KP382683432 * T19);
+		    T2P = FMA(KP382683432, T16, KP923879532 * T17);
+		    T2Q = T2O - T2P;
+		    T3o = T2P + T2O;
+	       }
+	  }
+	  {
+	       E T1A, T4E, T1K, T4C, T1D, T4F, T1H, T4B;
+	       {
+		    E T1y, T1z, T1I, T1J;
+		    T1y = I[WS(is, 61)];
+		    T1z = I[WS(is, 29)];
+		    T1A = T1y - T1z;
+		    T4E = T1y + T1z;
+		    T1I = I[WS(is, 21)];
+		    T1J = I[WS(is, 53)];
+		    T1K = T1I - T1J;
+		    T4C = T1I + T1J;
+	       }
+	       {
+		    E T1B, T1C, T1F, T1G;
+		    T1B = I[WS(is, 13)];
+		    T1C = I[WS(is, 45)];
+		    T1D = T1B - T1C;
+		    T4F = T1B + T1C;
+		    T1F = I[WS(is, 5)];
+		    T1G = I[WS(is, 37)];
+		    T1H = T1F - T1G;
+		    T4B = T1F + T1G;
+	       }
+	       {
+		    E T1E, T1L, T5J, T5K;
+		    T1E = FNMS(KP923879532, T1D, KP382683432 * T1A);
+		    T1L = FMA(KP382683432, T1H, KP923879532 * T1K);
+		    T1M = T1E - T1L;
+		    T3z = T1L + T1E;
+		    T5J = T4B + T4C;
+		    T5K = T4E + T4F;
+		    T5L = T5J + T5K;
+		    T67 = T5K - T5J;
+	       }
+	       {
+		    E T24, T25, T4D, T4G;
+		    T24 = FNMS(KP382683432, T1K, KP923879532 * T1H);
+		    T25 = FMA(KP923879532, T1A, KP382683432 * T1D);
+		    T26 = T24 + T25;
+		    T3C = T25 - T24;
+		    T4D = T4B - T4C;
+		    T4G = T4E - T4F;
+		    T4H = KP707106781 * (T4D + T4G);
+		    T4M = KP707106781 * (T4G - T4D);
+	       }
+	  }
+	  {
+	       E T2m, T4S, T2w, T4W, T2p, T4T, T2t, T4V;
+	       {
+		    E T2k, T2l, T2u, T2v;
+		    T2k = I[WS(is, 3)];
+		    T2l = I[WS(is, 35)];
+		    T2m = T2k - T2l;
+		    T4S = T2k + T2l;
+		    T2u = I[WS(is, 11)];
+		    T2v = I[WS(is, 43)];
+		    T2w = T2u - T2v;
+		    T4W = T2u + T2v;
+	       }
+	       {
+		    E T2n, T2o, T2r, T2s;
+		    T2n = I[WS(is, 19)];
+		    T2o = I[WS(is, 51)];
+		    T2p = T2n - T2o;
+		    T4T = T2n + T2o;
+		    T2r = I[WS(is, 59)];
+		    T2s = I[WS(is, 27)];
+		    T2t = T2r - T2s;
+		    T4V = T2r + T2s;
+	       }
+	       {
+		    E T2q, T2x, T5Q, T5R;
+		    T2q = FNMS(KP382683432, T2p, KP923879532 * T2m);
+		    T2x = FMA(KP923879532, T2t, KP382683432 * T2w);
+		    T2y = T2q + T2x;
+		    T3J = T2x - T2q;
+		    T5Q = T4S + T4T;
+		    T5R = T4V + T4W;
+		    T5S = T5Q + T5R;
+		    T6a = T5R - T5Q;
+	       }
+	       {
+		    E T2A, T2B, T4U, T4X;
+		    T2A = FNMS(KP923879532, T2w, KP382683432 * T2t);
+		    T2B = FMA(KP382683432, T2m, KP923879532 * T2p);
+		    T2C = T2A - T2B;
+		    T3G = T2B + T2A;
+		    T4U = T4S - T4T;
+		    T4X = T4V - T4W;
+		    T4Y = KP707106781 * (T4U + T4X);
+		    T53 = KP707106781 * (T4X - T4U);
+	       }
+	  }
+	  {
+	       E Tv, T10, T5X, T5Y, T5Z, T60;
+	       Tv = Tf + Tu;
+	       T10 = TK + TZ;
+	       T5X = Tv + T10;
+	       T5Y = T5I + T5L;
+	       T5Z = T5P + T5S;
+	       T60 = T5Y + T5Z;
+	       ro[WS(ros, 16)] = Tv - T10;
+	       io[WS(ios, 16)] = T5Z - T5Y;
+	       ro[WS(ros, 32)] = T5X - T60;
+	       ro[0] = T5X + T60;
+	  }
+	  {
+	       E T5F, T5V, T5U, T5W, T5M, T5T;
+	       T5F = Tf - Tu;
+	       T5V = TZ - TK;
+	       T5M = T5I - T5L;
+	       T5T = T5P - T5S;
+	       T5U = KP707106781 * (T5M + T5T);
+	       T5W = KP707106781 * (T5T - T5M);
+	       ro[WS(ros, 24)] = T5F - T5U;
+	       io[WS(ios, 24)] = T5W - T5V;
+	       ro[WS(ros, 8)] = T5F + T5U;
+	       io[WS(ios, 8)] = T5V + T5W;
+	  }
+	  {
+	       E T65, T6l, T6k, T6m, T6c, T6g, T6f, T6h;
+	       {
+		    E T61, T64, T6i, T6j;
+		    T61 = T7 - Te;
+		    T64 = KP707106781 * (T62 + T63);
+		    T65 = T61 + T64;
+		    T6l = T61 - T64;
+		    T6i = FNMS(KP382683432, T66, KP923879532 * T67);
+		    T6j = FMA(KP382683432, T69, KP923879532 * T6a);
+		    T6k = T6i + T6j;
+		    T6m = T6j - T6i;
+	       }
+	       {
+		    E T68, T6b, T6d, T6e;
+		    T68 = FMA(KP923879532, T66, KP382683432 * T67);
+		    T6b = FNMS(KP382683432, T6a, KP923879532 * T69);
+		    T6c = T68 + T6b;
+		    T6g = T6b - T68;
+		    T6d = KP707106781 * (T63 - T62);
+		    T6e = Tt - Tm;
+		    T6f = T6d - T6e;
+		    T6h = T6e + T6d;
+	       }
+	       ro[WS(ros, 28)] = T65 - T6c;
+	       io[WS(ios, 28)] = T6k - T6h;
+	       ro[WS(ros, 4)] = T65 + T6c;
+	       io[WS(ios, 4)] = T6h + T6k;
+	       io[WS(ios, 12)] = T6f + T6g;
+	       ro[WS(ros, 12)] = T6l + T6m;
+	       io[WS(ios, 20)] = T6g - T6f;
+	       ro[WS(ros, 20)] = T6l - T6m;
+	  }
+	  {
+	       E T5n, T5D, T5x, T5z, T5q, T5A, T5t, T5B;
+	       {
+		    E T5l, T5m, T5v, T5w;
+		    T5l = T4l - T4o;
+		    T5m = T58 - T57;
+		    T5n = T5l + T5m;
+		    T5D = T5l - T5m;
+		    T5v = T4v - T4s;
+		    T5w = T5b - T5a;
+		    T5x = T5v - T5w;
+		    T5z = T5w + T5v;
+	       }
+	       {
+		    E T5o, T5p, T5r, T5s;
+		    T5o = T4A - T4H;
+		    T5p = T4M - T4L;
+		    T5q = FMA(KP831469612, T5o, KP555570233 * T5p);
+		    T5A = FNMS(KP555570233, T5o, KP831469612 * T5p);
+		    T5r = T4R - T4Y;
+		    T5s = T53 - T52;
+		    T5t = FNMS(KP555570233, T5s, KP831469612 * T5r);
+		    T5B = FMA(KP555570233, T5r, KP831469612 * T5s);
+	       }
+	       {
+		    E T5u, T5C, T5y, T5E;
+		    T5u = T5q + T5t;
+		    ro[WS(ros, 26)] = T5n - T5u;
+		    ro[WS(ros, 6)] = T5n + T5u;
+		    T5C = T5A + T5B;
+		    io[WS(ios, 6)] = T5z + T5C;
+		    io[WS(ios, 26)] = T5C - T5z;
+		    T5y = T5t - T5q;
+		    io[WS(ios, 10)] = T5x + T5y;
+		    io[WS(ios, 22)] = T5y - T5x;
+		    T5E = T5B - T5A;
+		    ro[WS(ros, 22)] = T5D - T5E;
+		    ro[WS(ros, 10)] = T5D + T5E;
+	       }
+	  }
+	  {
+	       E T4x, T5j, T5d, T5f, T4O, T5g, T55, T5h;
+	       {
+		    E T4p, T4w, T59, T5c;
+		    T4p = T4l + T4o;
+		    T4w = T4s + T4v;
+		    T4x = T4p + T4w;
+		    T5j = T4p - T4w;
+		    T59 = T57 + T58;
+		    T5c = T5a + T5b;
+		    T5d = T59 - T5c;
+		    T5f = T5c + T59;
+	       }
+	       {
+		    E T4I, T4N, T4Z, T54;
+		    T4I = T4A + T4H;
+		    T4N = T4L + T4M;
+		    T4O = FMA(KP980785280, T4I, KP195090322 * T4N);
+		    T5g = FNMS(KP195090322, T4I, KP980785280 * T4N);
+		    T4Z = T4R + T4Y;
+		    T54 = T52 + T53;
+		    T55 = FNMS(KP195090322, T54, KP980785280 * T4Z);
+		    T5h = FMA(KP195090322, T4Z, KP980785280 * T54);
+	       }
+	       {
+		    E T56, T5i, T5e, T5k;
+		    T56 = T4O + T55;
+		    ro[WS(ros, 30)] = T4x - T56;
+		    ro[WS(ros, 2)] = T4x + T56;
+		    T5i = T5g + T5h;
+		    io[WS(ios, 2)] = T5f + T5i;
+		    io[WS(ios, 30)] = T5i - T5f;
+		    T5e = T55 - T4O;
+		    io[WS(ios, 14)] = T5d + T5e;
+		    io[WS(ios, 18)] = T5e - T5d;
+		    T5k = T5h - T5g;
+		    ro[WS(ros, 18)] = T5j - T5k;
+		    ro[WS(ros, 14)] = T5j + T5k;
+	       }
+	  }
+	  {
+	       E T3p, T41, T4c, T3S, T3w, T4b, T49, T4h, T3P, T42, T3E, T3W, T46, T4g, T3L;
+	       E T3X;
+	       {
+		    E T3s, T3v, T3A, T3D;
+		    T3p = T3n + T3o;
+		    T41 = T3n - T3o;
+		    T4c = T3R - T3Q;
+		    T3S = T3Q + T3R;
+		    T3s = FMA(KP831469612, T3q, KP555570233 * T3r);
+		    T3v = FNMS(KP555570233, T3u, KP831469612 * T3t);
+		    T3w = T3s + T3v;
+		    T4b = T3v - T3s;
+		    {
+			 E T47, T48, T3N, T3O;
+			 T47 = T3F - T3G;
+			 T48 = T3J - T3I;
+			 T49 = FNMS(KP471396736, T48, KP881921264 * T47);
+			 T4h = FMA(KP471396736, T47, KP881921264 * T48);
+			 T3N = FNMS(KP555570233, T3q, KP831469612 * T3r);
+			 T3O = FMA(KP555570233, T3t, KP831469612 * T3u);
+			 T3P = T3N + T3O;
+			 T42 = T3O - T3N;
+		    }
+		    T3A = T3y + T3z;
+		    T3D = T3B + T3C;
+		    T3E = FMA(KP956940335, T3A, KP290284677 * T3D);
+		    T3W = FNMS(KP290284677, T3A, KP956940335 * T3D);
+		    {
+			 E T44, T45, T3H, T3K;
+			 T44 = T3y - T3z;
+			 T45 = T3C - T3B;
+			 T46 = FMA(KP881921264, T44, KP471396736 * T45);
+			 T4g = FNMS(KP471396736, T44, KP881921264 * T45);
+			 T3H = T3F + T3G;
+			 T3K = T3I + T3J;
+			 T3L = FNMS(KP290284677, T3K, KP956940335 * T3H);
+			 T3X = FMA(KP290284677, T3H, KP956940335 * T3K);
+		    }
+	       }
+	       {
+		    E T3x, T3M, T3V, T3Y;
+		    T3x = T3p + T3w;
+		    T3M = T3E + T3L;
+		    ro[WS(ros, 29)] = T3x - T3M;
+		    ro[WS(ros, 3)] = T3x + T3M;
+		    T3V = T3S + T3P;
+		    T3Y = T3W + T3X;
+		    io[WS(ios, 3)] = T3V + T3Y;
+		    io[WS(ios, 29)] = T3Y - T3V;
+	       }
+	       {
+		    E T3T, T3U, T3Z, T40;
+		    T3T = T3P - T3S;
+		    T3U = T3L - T3E;
+		    io[WS(ios, 13)] = T3T + T3U;
+		    io[WS(ios, 19)] = T3U - T3T;
+		    T3Z = T3p - T3w;
+		    T40 = T3X - T3W;
+		    ro[WS(ros, 19)] = T3Z - T40;
+		    ro[WS(ros, 13)] = T3Z + T40;
+	       }
+	       {
+		    E T43, T4a, T4f, T4i;
+		    T43 = T41 + T42;
+		    T4a = T46 + T49;
+		    ro[WS(ros, 27)] = T43 - T4a;
+		    ro[WS(ros, 5)] = T43 + T4a;
+		    T4f = T4c + T4b;
+		    T4i = T4g + T4h;
+		    io[WS(ios, 5)] = T4f + T4i;
+		    io[WS(ios, 27)] = T4i - T4f;
+	       }
+	       {
+		    E T4d, T4e, T4j, T4k;
+		    T4d = T4b - T4c;
+		    T4e = T49 - T46;
+		    io[WS(ios, 11)] = T4d + T4e;
+		    io[WS(ios, 21)] = T4e - T4d;
+		    T4j = T41 - T42;
+		    T4k = T4h - T4g;
+		    ro[WS(ros, 21)] = T4j - T4k;
+		    ro[WS(ros, 11)] = T4j + T4k;
+	       }
+	  }
+	  {
+	       E T1d, T33, T3e, T2U, T1w, T3d, T3b, T3j, T2N, T34, T28, T2Y, T38, T3i, T2J;
+	       E T2Z;
+	       {
+		    E T1m, T1v, T1Y, T27;
+		    T1d = T15 - T1c;
+		    T33 = T15 + T1c;
+		    T3e = T2T + T2Q;
+		    T2U = T2Q - T2T;
+		    T1m = FMA(KP195090322, T1i, KP980785280 * T1l);
+		    T1v = FNMS(KP195090322, T1u, KP980785280 * T1r);
+		    T1w = T1m - T1v;
+		    T3d = T1v + T1m;
+		    {
+			 E T39, T3a, T2L, T2M;
+			 T39 = T2j + T2y;
+			 T3a = T2H + T2C;
+			 T3b = FNMS(KP098017140, T3a, KP995184726 * T39);
+			 T3j = FMA(KP995184726, T3a, KP098017140 * T39);
+			 T2L = FNMS(KP195090322, T1l, KP980785280 * T1i);
+			 T2M = FMA(KP980785280, T1u, KP195090322 * T1r);
+			 T2N = T2L - T2M;
+			 T34 = T2M + T2L;
+		    }
+		    T1Y = T1M - T1X;
+		    T27 = T23 - T26;
+		    T28 = FMA(KP634393284, T1Y, KP773010453 * T27);
+		    T2Y = FNMS(KP634393284, T27, KP773010453 * T1Y);
+		    {
+			 E T36, T37, T2z, T2I;
+			 T36 = T1X + T1M;
+			 T37 = T23 + T26;
+			 T38 = FMA(KP098017140, T36, KP995184726 * T37);
+			 T3i = FNMS(KP098017140, T37, KP995184726 * T36);
+			 T2z = T2j - T2y;
+			 T2I = T2C - T2H;
+			 T2J = FNMS(KP634393284, T2I, KP773010453 * T2z);
+			 T2Z = FMA(KP773010453, T2I, KP634393284 * T2z);
+		    }
+	       }
+	       {
+		    E T1x, T2K, T2X, T30;
+		    T1x = T1d + T1w;
+		    T2K = T28 + T2J;
+		    ro[WS(ros, 25)] = T1x - T2K;
+		    ro[WS(ros, 7)] = T1x + T2K;
+		    T2X = T2U + T2N;
+		    T30 = T2Y + T2Z;
+		    io[WS(ios, 7)] = T2X + T30;
+		    io[WS(ios, 25)] = T30 - T2X;
+	       }
+	       {
+		    E T2V, T2W, T31, T32;
+		    T2V = T2N - T2U;
+		    T2W = T2J - T28;
+		    io[WS(ios, 9)] = T2V + T2W;
+		    io[WS(ios, 23)] = T2W - T2V;
+		    T31 = T1d - T1w;
+		    T32 = T2Z - T2Y;
+		    ro[WS(ros, 23)] = T31 - T32;
+		    ro[WS(ros, 9)] = T31 + T32;
+	       }
+	       {
+		    E T35, T3c, T3h, T3k;
+		    T35 = T33 + T34;
+		    T3c = T38 + T3b;
+		    ro[WS(ros, 31)] = T35 - T3c;
+		    ro[WS(ros, 1)] = T35 + T3c;
+		    T3h = T3e + T3d;
+		    T3k = T3i + T3j;
+		    io[WS(ios, 1)] = T3h + T3k;
+		    io[WS(ios, 31)] = T3k - T3h;
+	       }
+	       {
+		    E T3f, T3g, T3l, T3m;
+		    T3f = T3d - T3e;
+		    T3g = T3b - T38;
+		    io[WS(ios, 15)] = T3f + T3g;
+		    io[WS(ios, 17)] = T3g - T3f;
+		    T3l = T33 - T34;
+		    T3m = T3j - T3i;
+		    ro[WS(ros, 17)] = T3l - T3m;
+		    ro[WS(ros, 15)] = T3l + T3m;
+	       }
+	  }
+     }
+}
+
+static void mr2hc_64(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  mr2hc_64_0(I, ro, io, is, ros, ios);
+	  I += ivs;
+	  ro += ovs;
+	  io += ovs;
+     }
+}
+
+static const kr2hc_desc desc = { 64, "mr2hc_64", {342, 72, 52, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_mr2hc_64) (planner *p) {
+     X(kr2hc_register) (p, mr2hc_64, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_10.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_10.c
new file mode 100644
index 0000000..d163e75
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_10.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:12 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 10 -name r2hcII_10 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 32 FP additions, 12 FP multiplications,
+ * (or, 26 additions, 6 multiplications, 6 fused multiply/add),
+ * 21 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_10(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, To, T8, Tq, T9, Tp, Te, Ts, Th, Tn;
+	  T1 = I[0];
+	  To = I[WS(is, 5)];
+	  {
+	       E T2, T3, T4, T5, T6, T7;
+	       T2 = I[WS(is, 4)];
+	       T3 = I[WS(is, 6)];
+	       T4 = T2 - T3;
+	       T5 = I[WS(is, 8)];
+	       T6 = I[WS(is, 2)];
+	       T7 = T5 - T6;
+	       T8 = T4 + T7;
+	       Tq = T5 + T6;
+	       T9 = KP559016994 * (T4 - T7);
+	       Tp = T2 + T3;
+	  }
+	  {
+	       E Tc, Td, Tm, Tf, Tg, Tl;
+	       Tc = I[WS(is, 1)];
+	       Td = I[WS(is, 9)];
+	       Tm = Tc + Td;
+	       Tf = I[WS(is, 3)];
+	       Tg = I[WS(is, 7)];
+	       Tl = Tf + Tg;
+	       Te = Tc - Td;
+	       Ts = KP559016994 * (Tm + Tl);
+	       Th = Tf - Tg;
+	       Tn = Tl - Tm;
+	  }
+	  ro[WS(ros, 2)] = T1 + T8;
+	  io[WS(ios, 2)] = Tn - To;
+	  {
+	       E Ti, Tk, Tb, Tj, Ta;
+	       Ti = FMA(KP951056516, Te, KP587785252 * Th);
+	       Tk = FNMS(KP587785252, Te, KP951056516 * Th);
+	       Ta = FNMS(KP250000000, T8, T1);
+	       Tb = T9 + Ta;
+	       Tj = Ta - T9;
+	       ro[WS(ros, 4)] = Tb - Ti;
+	       ro[WS(ros, 3)] = Tj + Tk;
+	       ro[0] = Tb + Ti;
+	       ro[WS(ros, 1)] = Tj - Tk;
+	  }
+	  {
+	       E Tr, Tw, Tu, Tv, Tt;
+	       Tr = FMA(KP951056516, Tp, KP587785252 * Tq);
+	       Tw = FNMS(KP587785252, Tp, KP951056516 * Tq);
+	       Tt = FMA(KP250000000, Tn, To);
+	       Tu = Ts + Tt;
+	       Tv = Tt - Ts;
+	       io[0] = -(Tr + Tu);
+	       io[WS(ios, 3)] = Tw + Tv;
+	       io[WS(ios, 4)] = Tr - Tu;
+	       io[WS(ios, 1)] = Tv - Tw;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 10, "r2hcII_10", {26, 6, 6, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_10) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_10, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_12.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_12.c
new file mode 100644
index 0000000..9e01c7b
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_12.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:14 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 12 -name r2hcII_12 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 43 FP additions, 12 FP multiplications,
+ * (or, 39 additions, 8 multiplications, 4 fused multiply/add),
+ * 28 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_12(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP353553390, +0.353553390593273762200422181052424519642417969);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP612372435, +0.612372435695794524549321018676472847991486870);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E Tx, Tg, T4, Tz, Ty, Tj, TA, T9, Tm, Tl, Te, Tp, To, Tf, TE;
+	  E TF;
+	  {
+	       E T1, T3, T2, Th, Ti;
+	       T1 = I[0];
+	       T3 = I[WS(is, 4)];
+	       T2 = I[WS(is, 8)];
+	       Tx = KP866025403 * (T2 + T3);
+	       Tg = FMA(KP500000000, T3 - T2, T1);
+	       T4 = T1 + T2 - T3;
+	       Tz = I[WS(is, 6)];
+	       Th = I[WS(is, 10)];
+	       Ti = I[WS(is, 2)];
+	       Ty = Th + Ti;
+	       Tj = KP866025403 * (Th - Ti);
+	       TA = FMA(KP500000000, Ty, Tz);
+	  }
+	  {
+	       E T5, T6, T7, T8;
+	       T5 = I[WS(is, 3)];
+	       T6 = I[WS(is, 11)];
+	       T7 = I[WS(is, 7)];
+	       T8 = T6 - T7;
+	       T9 = T5 + T8;
+	       Tm = KP612372435 * (T6 + T7);
+	       Tl = FNMS(KP353553390, T8, KP707106781 * T5);
+	  }
+	  {
+	       E Td, Ta, Tb, Tc;
+	       Td = I[WS(is, 9)];
+	       Ta = I[WS(is, 5)];
+	       Tb = I[WS(is, 1)];
+	       Tc = Ta - Tb;
+	       Te = Tc - Td;
+	       Tp = FMA(KP353553390, Tc, KP707106781 * Td);
+	       To = KP612372435 * (Ta + Tb);
+	  }
+	  Tf = KP707106781 * (T9 + Te);
+	  ro[WS(ros, 1)] = T4 - Tf;
+	  ro[WS(ros, 4)] = T4 + Tf;
+	  TE = KP707106781 * (Te - T9);
+	  TF = Tz - Ty;
+	  io[WS(ios, 4)] = TE - TF;
+	  io[WS(ios, 1)] = TE + TF;
+	  {
+	       E Tk, TB, Tr, Tw, Tn, Tq;
+	       Tk = Tg - Tj;
+	       TB = Tx - TA;
+	       Tn = Tl - Tm;
+	       Tq = To - Tp;
+	       Tr = Tn + Tq;
+	       Tw = Tn - Tq;
+	       ro[WS(ros, 5)] = Tk - Tr;
+	       io[WS(ios, 2)] = Tw + TB;
+	       ro[0] = Tk + Tr;
+	       io[WS(ios, 3)] = Tw - TB;
+	  }
+	  {
+	       E Ts, TD, Tv, TC, Tt, Tu;
+	       Ts = Tg + Tj;
+	       TD = Tx + TA;
+	       Tt = To + Tp;
+	       Tu = Tm + Tl;
+	       Tv = Tt - Tu;
+	       TC = Tu + Tt;
+	       ro[WS(ros, 3)] = Ts - Tv;
+	       io[WS(ios, 5)] = TD - TC;
+	       ro[WS(ros, 2)] = Ts + Tv;
+	       io[0] = -(TC + TD);
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 12, "r2hcII_12", {39, 8, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_12) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_12, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_15.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_15.c
new file mode 100644
index 0000000..72e014a
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_15.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:16 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 15 -name r2hcII_15 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 72 FP additions, 33 FP multiplications,
+ * (or, 54 additions, 15 multiplications, 18 fused multiply/add),
+ * 37 stack variables, and 30 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_15(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP809016994, +0.809016994374947424102293417182819058860154590);
+     DK(KP309016994, +0.309016994374947424102293417182819058860154590);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2, Tx, TR, TE, T7, TD, Th, Tm, Tr, TQ, TA, TB, Tf, Te;
+	  E Tu, TS, Td, TH, TO;
+	  T1 = I[WS(is, 10)];
+	  {
+	       E T3, Tv, T6, Tw, T4, T5;
+	       T2 = I[WS(is, 4)];
+	       T3 = I[WS(is, 1)];
+	       Tv = T2 + T3;
+	       T4 = I[WS(is, 7)];
+	       T5 = I[WS(is, 13)];
+	       T6 = T4 + T5;
+	       Tw = T4 - T5;
+	       Tx = FMA(KP951056516, Tv, KP587785252 * Tw);
+	       TR = FNMS(KP587785252, Tv, KP951056516 * Tw);
+	       TE = KP559016994 * (T3 - T6);
+	       T7 = T3 + T6;
+	       TD = KP250000000 * T7;
+	  }
+	  {
+	       E Ti, Tl, Tj, Tk, Tp, Tq;
+	       Th = I[0];
+	       Ti = I[WS(is, 9)];
+	       Tl = I[WS(is, 12)];
+	       Tj = I[WS(is, 3)];
+	       Tk = I[WS(is, 6)];
+	       Tp = Tk + Ti;
+	       Tq = Tl + Tj;
+	       Tm = Ti + Tj - (Tk + Tl);
+	       Tr = FMA(KP951056516, Tp, KP587785252 * Tq);
+	       TQ = FNMS(KP951056516, Tq, KP587785252 * Tp);
+	       TA = FMA(KP250000000, Tm, Th);
+	       TB = KP559016994 * (Tl + Ti - (Tk + Tj));
+	  }
+	  {
+	       E T9, Tt, Tc, Ts, Ta, Tb, TG;
+	       Tf = I[WS(is, 5)];
+	       T9 = I[WS(is, 14)];
+	       Te = I[WS(is, 11)];
+	       Tt = T9 + Te;
+	       Ta = I[WS(is, 2)];
+	       Tb = I[WS(is, 8)];
+	       Tc = Ta + Tb;
+	       Ts = Ta - Tb;
+	       Tu = FNMS(KP951056516, Tt, KP587785252 * Ts);
+	       TS = FMA(KP951056516, Ts, KP587785252 * Tt);
+	       Td = T9 + Tc;
+	       TG = KP559016994 * (T9 - Tc);
+	       TH = FNMS(KP309016994, Te, TG) + FNMA(KP250000000, Td, Tf);
+	       TO = FMS(KP809016994, Te, Tf) + FNMA(KP250000000, Td, TG);
+	  }
+	  {
+	       E Tn, T8, Tg, To;
+	       Tn = Th - Tm;
+	       T8 = T1 + T2 - T7;
+	       Tg = Td - Te - Tf;
+	       To = T8 + Tg;
+	       io[WS(ios, 2)] = KP866025403 * (T8 - Tg);
+	       ro[WS(ros, 2)] = FNMS(KP500000000, To, Tn);
+	       ro[WS(ros, 7)] = Tn + To;
+	  }
+	  {
+	       E TM, TX, TT, TV, TP, TU, TN, TW;
+	       TM = TB + TA;
+	       TX = KP866025403 * (TR + TS);
+	       TT = TR - TS;
+	       TV = FMS(KP500000000, TT, TQ);
+	       TN = T1 + TE + FNMS(KP809016994, T2, TD);
+	       TP = TN + TO;
+	       TU = KP866025403 * (TO - TN);
+	       ro[WS(ros, 1)] = TM + TP;
+	       io[WS(ios, 1)] = TQ + TT;
+	       io[WS(ios, 6)] = TU - TV;
+	       io[WS(ios, 3)] = TU + TV;
+	       TW = FNMS(KP500000000, TP, TM);
+	       ro[WS(ros, 3)] = TW - TX;
+	       ro[WS(ros, 6)] = TW + TX;
+	  }
+	  {
+	       E Tz, TC, Ty, TK, TI, TL, TF, TJ;
+	       Tz = KP866025403 * (Tx + Tu);
+	       TC = TA - TB;
+	       Ty = Tu - Tx;
+	       TK = FMS(KP500000000, Ty, Tr);
+	       TF = FMA(KP309016994, T2, T1) + TD - TE;
+	       TI = TF + TH;
+	       TL = KP866025403 * (TH - TF);
+	       io[WS(ios, 4)] = Tr + Ty;
+	       ro[WS(ros, 4)] = TC + TI;
+	       io[WS(ios, 5)] = TK - TL;
+	       io[0] = TK + TL;
+	       TJ = FNMS(KP500000000, TI, TC);
+	       ro[0] = Tz + TJ;
+	       ro[WS(ros, 5)] = TJ - Tz;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 15, "r2hcII_15", {54, 15, 18, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_15) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_15, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_16.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_16.c
new file mode 100644
index 0000000..4535bde
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_16.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:21 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 16 -name r2hcII_16 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 66 FP additions, 30 FP multiplications,
+ * (or, 54 additions, 18 multiplications, 12 fused multiply/add),
+ * 32 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_16(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T5, T11, TB, TV, Tr, TK, Tu, TJ, Ti, TH, Tl, TG, Tc, T10, TE;
+	  E TS;
+	  {
+	       E T1, TU, T4, TT, T2, T3;
+	       T1 = I[0];
+	       TU = I[WS(is, 8)];
+	       T2 = I[WS(is, 4)];
+	       T3 = I[WS(is, 12)];
+	       T4 = KP707106781 * (T2 - T3);
+	       TT = KP707106781 * (T2 + T3);
+	       T5 = T1 + T4;
+	       T11 = TU - TT;
+	       TB = T1 - T4;
+	       TV = TT + TU;
+	  }
+	  {
+	       E Tq, Tt, Tp, Ts, Tn, To;
+	       Tq = I[WS(is, 15)];
+	       Tt = I[WS(is, 7)];
+	       Tn = I[WS(is, 3)];
+	       To = I[WS(is, 11)];
+	       Tp = KP707106781 * (Tn - To);
+	       Ts = KP707106781 * (Tn + To);
+	       Tr = Tp - Tq;
+	       TK = Tt - Ts;
+	       Tu = Ts + Tt;
+	       TJ = Tp + Tq;
+	  }
+	  {
+	       E Te, Tk, Th, Tj, Tf, Tg;
+	       Te = I[WS(is, 1)];
+	       Tk = I[WS(is, 9)];
+	       Tf = I[WS(is, 5)];
+	       Tg = I[WS(is, 13)];
+	       Th = KP707106781 * (Tf - Tg);
+	       Tj = KP707106781 * (Tf + Tg);
+	       Ti = Te + Th;
+	       TH = Tk - Tj;
+	       Tl = Tj + Tk;
+	       TG = Te - Th;
+	  }
+	  {
+	       E T8, TC, Tb, TD;
+	       {
+		    E T6, T7, T9, Ta;
+		    T6 = I[WS(is, 2)];
+		    T7 = I[WS(is, 10)];
+		    T8 = FNMS(KP382683432, T7, KP923879532 * T6);
+		    TC = FMA(KP382683432, T6, KP923879532 * T7);
+		    T9 = I[WS(is, 6)];
+		    Ta = I[WS(is, 14)];
+		    Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
+		    TD = FMA(KP923879532, T9, KP382683432 * Ta);
+	       }
+	       Tc = T8 + Tb;
+	       T10 = Tb - T8;
+	       TE = TC - TD;
+	       TS = TC + TD;
+	  }
+	  {
+	       E Td, TW, Tw, TR, Tm, Tv;
+	       Td = T5 - Tc;
+	       TW = TS + TV;
+	       Tm = FMA(KP195090322, Ti, KP980785280 * Tl);
+	       Tv = FNMS(KP980785280, Tu, KP195090322 * Tr);
+	       Tw = Tm + Tv;
+	       TR = Tv - Tm;
+	       ro[WS(ros, 4)] = Td - Tw;
+	       io[WS(ios, 7)] = TR + TW;
+	       ro[WS(ros, 3)] = Td + Tw;
+	       io[0] = TR - TW;
+	  }
+	  {
+	       E Tx, TY, TA, TX, Ty, Tz;
+	       Tx = T5 + Tc;
+	       TY = TV - TS;
+	       Ty = FNMS(KP195090322, Tl, KP980785280 * Ti);
+	       Tz = FMA(KP980785280, Tr, KP195090322 * Tu);
+	       TA = Ty + Tz;
+	       TX = Tz - Ty;
+	       ro[WS(ros, 7)] = Tx - TA;
+	       io[WS(ios, 3)] = TX + TY;
+	       ro[0] = Tx + TA;
+	       io[WS(ios, 4)] = TX - TY;
+	  }
+	  {
+	       E TF, T12, TM, TZ, TI, TL;
+	       TF = TB + TE;
+	       T12 = T10 - T11;
+	       TI = FMA(KP831469612, TG, KP555570233 * TH);
+	       TL = FMA(KP831469612, TJ, KP555570233 * TK);
+	       TM = TI - TL;
+	       TZ = TI + TL;
+	       ro[WS(ros, 6)] = TF - TM;
+	       io[WS(ios, 2)] = T12 - TZ;
+	       ro[WS(ros, 1)] = TF + TM;
+	       io[WS(ios, 5)] = -(TZ + T12);
+	  }
+	  {
+	       E TN, T14, TQ, T13, TO, TP;
+	       TN = TB - TE;
+	       T14 = T10 + T11;
+	       TO = FNMS(KP555570233, TJ, KP831469612 * TK);
+	       TP = FNMS(KP555570233, TG, KP831469612 * TH);
+	       TQ = TO - TP;
+	       T13 = TP + TO;
+	       ro[WS(ros, 5)] = TN - TQ;
+	       io[WS(ios, 1)] = T13 + T14;
+	       ro[WS(ros, 2)] = TN + TQ;
+	       io[WS(ios, 6)] = T13 - T14;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 16, "r2hcII_16", {54, 18, 12, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_16) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_2.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_2.c
new file mode 100644
index 0000000..00ca93b
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_2.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:02 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 2 -name r2hcII_2 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 0 FP additions, 0 FP multiplications,
+ * (or, 0 additions, 0 multiplications, 0 fused multiply/add),
+ * 3 stack variables, and 4 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_2(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2;
+	  T1 = I[0];
+	  ro[0] = T1;
+	  T2 = I[WS(is, 1)];
+	  io[0] = -T2;
+     }
+}
+
+static const kr2hc_desc desc = { 2, "r2hcII_2", {0, 0, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_2) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_2, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_3.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_3.c
new file mode 100644
index 0000000..afdc6d1
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_3.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:02 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 3 -name r2hcII_3 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 4 FP additions, 2 FP multiplications,
+ * (or, 3 additions, 1 multiplications, 1 fused multiply/add),
+ * 7 stack variables, and 6 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_3(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2, T3, T4;
+	  T1 = I[0];
+	  T2 = I[WS(is, 1)];
+	  T3 = I[WS(is, 2)];
+	  T4 = T2 - T3;
+	  ro[WS(ros, 1)] = T1 - T4;
+	  io[0] = -(KP866025403 * (T2 + T3));
+	  ro[0] = FMA(KP500000000, T4, T1);
+     }
+}
+
+static const kr2hc_desc desc = { 3, "r2hcII_3", {3, 1, 1, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_3) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_3, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_32.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_32.c
new file mode 100644
index 0000000..728ae3d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_32.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:28 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 32 -name r2hcII_32 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 174 FP additions, 82 FP multiplications,
+ * (or, 138 additions, 46 multiplications, 36 fused multiply/add),
+ * 62 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_32(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP471396736, +0.471396736825997648556387625905254377657460319);
+     DK(KP881921264, +0.881921264348355029712756863660388349508442621);
+     DK(KP634393284, +0.634393284163645498215171613225493370675687095);
+     DK(KP773010453, +0.773010453362736960810906609758469800971041293);
+     DK(KP290284677, +0.290284677254462367636192375817395274691476278);
+     DK(KP956940335, +0.956940335732208864935797886980269969482849206);
+     DK(KP995184726, +0.995184726672196886244836953109479921575474869);
+     DK(KP098017140, +0.098017140329560601994195563888641845861136673);
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T5, T2D, T1z, T2q, Tc, T2C, T1C, T2n, Tm, T1k, T1J, T26, Tv, T1l, T1G;
+	  E T27, T15, T1r, T1Y, T2e, T1c, T1s, T1V, T2d, TK, T1o, T1R, T2b, TR, T1p;
+	  E T1O, T2a;
+	  {
+	       E T1, T2p, T4, T2o, T2, T3;
+	       T1 = I[0];
+	       T2p = I[WS(is, 16)];
+	       T2 = I[WS(is, 8)];
+	       T3 = I[WS(is, 24)];
+	       T4 = KP707106781 * (T2 - T3);
+	       T2o = KP707106781 * (T2 + T3);
+	       T5 = T1 + T4;
+	       T2D = T2p - T2o;
+	       T1z = T1 - T4;
+	       T2q = T2o + T2p;
+	  }
+	  {
+	       E T8, T1A, Tb, T1B;
+	       {
+		    E T6, T7, T9, Ta;
+		    T6 = I[WS(is, 4)];
+		    T7 = I[WS(is, 20)];
+		    T8 = FNMS(KP382683432, T7, KP923879532 * T6);
+		    T1A = FMA(KP382683432, T6, KP923879532 * T7);
+		    T9 = I[WS(is, 12)];
+		    Ta = I[WS(is, 28)];
+		    Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
+		    T1B = FMA(KP923879532, T9, KP382683432 * Ta);
+	       }
+	       Tc = T8 + Tb;
+	       T2C = Tb - T8;
+	       T1C = T1A - T1B;
+	       T2n = T1A + T1B;
+	  }
+	  {
+	       E Te, Tk, Th, Tj, Tf, Tg;
+	       Te = I[WS(is, 2)];
+	       Tk = I[WS(is, 18)];
+	       Tf = I[WS(is, 10)];
+	       Tg = I[WS(is, 26)];
+	       Th = KP707106781 * (Tf - Tg);
+	       Tj = KP707106781 * (Tf + Tg);
+	       {
+		    E Ti, Tl, T1H, T1I;
+		    Ti = Te + Th;
+		    Tl = Tj + Tk;
+		    Tm = FNMS(KP195090322, Tl, KP980785280 * Ti);
+		    T1k = FMA(KP195090322, Ti, KP980785280 * Tl);
+		    T1H = Tk - Tj;
+		    T1I = Te - Th;
+		    T1J = FNMS(KP555570233, T1I, KP831469612 * T1H);
+		    T26 = FMA(KP831469612, T1I, KP555570233 * T1H);
+	       }
+	  }
+	  {
+	       E Tq, Tt, Tp, Ts, Tn, To;
+	       Tq = I[WS(is, 30)];
+	       Tt = I[WS(is, 14)];
+	       Tn = I[WS(is, 6)];
+	       To = I[WS(is, 22)];
+	       Tp = KP707106781 * (Tn - To);
+	       Ts = KP707106781 * (Tn + To);
+	       {
+		    E Tr, Tu, T1E, T1F;
+		    Tr = Tp - Tq;
+		    Tu = Ts + Tt;
+		    Tv = FMA(KP980785280, Tr, KP195090322 * Tu);
+		    T1l = FNMS(KP980785280, Tu, KP195090322 * Tr);
+		    T1E = Tt - Ts;
+		    T1F = Tp + Tq;
+		    T1G = FNMS(KP555570233, T1F, KP831469612 * T1E);
+		    T27 = FMA(KP831469612, T1F, KP555570233 * T1E);
+	       }
+	  }
+	  {
+	       E TW, T1a, TV, T19, T10, T16, T13, T17, TT, TU;
+	       TW = I[WS(is, 31)];
+	       T1a = I[WS(is, 15)];
+	       TT = I[WS(is, 7)];
+	       TU = I[WS(is, 23)];
+	       TV = KP707106781 * (TT - TU);
+	       T19 = KP707106781 * (TT + TU);
+	       {
+		    E TY, TZ, T11, T12;
+		    TY = I[WS(is, 3)];
+		    TZ = I[WS(is, 19)];
+		    T10 = FNMS(KP382683432, TZ, KP923879532 * TY);
+		    T16 = FMA(KP382683432, TY, KP923879532 * TZ);
+		    T11 = I[WS(is, 11)];
+		    T12 = I[WS(is, 27)];
+		    T13 = FNMS(KP923879532, T12, KP382683432 * T11);
+		    T17 = FMA(KP923879532, T11, KP382683432 * T12);
+	       }
+	       {
+		    E TX, T14, T1W, T1X;
+		    TX = TV - TW;
+		    T14 = T10 + T13;
+		    T15 = TX + T14;
+		    T1r = TX - T14;
+		    T1W = T13 - T10;
+		    T1X = T1a - T19;
+		    T1Y = T1W - T1X;
+		    T2e = T1W + T1X;
+	       }
+	       {
+		    E T18, T1b, T1T, T1U;
+		    T18 = T16 + T17;
+		    T1b = T19 + T1a;
+		    T1c = T18 + T1b;
+		    T1s = T1b - T18;
+		    T1T = TV + TW;
+		    T1U = T16 - T17;
+		    T1V = T1T + T1U;
+		    T2d = T1U - T1T;
+	       }
+	  }
+	  {
+	       E Ty, TP, TB, TO, TF, TL, TI, TM, Tz, TA;
+	       Ty = I[WS(is, 1)];
+	       TP = I[WS(is, 17)];
+	       Tz = I[WS(is, 9)];
+	       TA = I[WS(is, 25)];
+	       TB = KP707106781 * (Tz - TA);
+	       TO = KP707106781 * (Tz + TA);
+	       {
+		    E TD, TE, TG, TH;
+		    TD = I[WS(is, 5)];
+		    TE = I[WS(is, 21)];
+		    TF = FNMS(KP382683432, TE, KP923879532 * TD);
+		    TL = FMA(KP382683432, TD, KP923879532 * TE);
+		    TG = I[WS(is, 13)];
+		    TH = I[WS(is, 29)];
+		    TI = FNMS(KP923879532, TH, KP382683432 * TG);
+		    TM = FMA(KP923879532, TG, KP382683432 * TH);
+	       }
+	       {
+		    E TC, TJ, T1P, T1Q;
+		    TC = Ty + TB;
+		    TJ = TF + TI;
+		    TK = TC + TJ;
+		    T1o = TC - TJ;
+		    T1P = TI - TF;
+		    T1Q = TP - TO;
+		    T1R = T1P - T1Q;
+		    T2b = T1P + T1Q;
+	       }
+	       {
+		    E TN, TQ, T1M, T1N;
+		    TN = TL + TM;
+		    TQ = TO + TP;
+		    TR = TN + TQ;
+		    T1p = TQ - TN;
+		    T1M = Ty - TB;
+		    T1N = TL - TM;
+		    T1O = T1M - T1N;
+		    T2a = T1M + T1N;
+	       }
+	  }
+	  {
+	       E Tx, T1f, T2s, T2u, T1e, T2l, T1i, T2t;
+	       {
+		    E Td, Tw, T2m, T2r;
+		    Td = T5 + Tc;
+		    Tw = Tm + Tv;
+		    Tx = Td - Tw;
+		    T1f = Td + Tw;
+		    T2m = T1l - T1k;
+		    T2r = T2n + T2q;
+		    T2s = T2m - T2r;
+		    T2u = T2m + T2r;
+	       }
+	       {
+		    E TS, T1d, T1g, T1h;
+		    TS = FMA(KP098017140, TK, KP995184726 * TR);
+		    T1d = FNMS(KP995184726, T1c, KP098017140 * T15);
+		    T1e = TS + T1d;
+		    T2l = T1d - TS;
+		    T1g = FNMS(KP098017140, TR, KP995184726 * TK);
+		    T1h = FMA(KP995184726, T15, KP098017140 * T1c);
+		    T1i = T1g + T1h;
+		    T2t = T1h - T1g;
+	       }
+	       ro[WS(ros, 8)] = Tx - T1e;
+	       io[WS(ios, 8)] = T2t - T2u;
+	       ro[WS(ros, 7)] = Tx + T1e;
+	       io[WS(ios, 7)] = T2t + T2u;
+	       ro[WS(ros, 15)] = T1f - T1i;
+	       io[WS(ios, 15)] = T2l - T2s;
+	       ro[0] = T1f + T1i;
+	       io[0] = T2l + T2s;
+	  }
+	  {
+	       E T29, T2h, T2M, T2O, T2g, T2J, T2k, T2N;
+	       {
+		    E T25, T28, T2K, T2L;
+		    T25 = T1z + T1C;
+		    T28 = T26 - T27;
+		    T29 = T25 + T28;
+		    T2h = T25 - T28;
+		    T2K = T1J + T1G;
+		    T2L = T2C + T2D;
+		    T2M = T2K - T2L;
+		    T2O = T2K + T2L;
+	       }
+	       {
+		    E T2c, T2f, T2i, T2j;
+		    T2c = FMA(KP956940335, T2a, KP290284677 * T2b);
+		    T2f = FNMS(KP290284677, T2e, KP956940335 * T2d);
+		    T2g = T2c + T2f;
+		    T2J = T2f - T2c;
+		    T2i = FMA(KP290284677, T2d, KP956940335 * T2e);
+		    T2j = FNMS(KP290284677, T2a, KP956940335 * T2b);
+		    T2k = T2i - T2j;
+		    T2N = T2j + T2i;
+	       }
+	       ro[WS(ros, 14)] = T29 - T2g;
+	       io[WS(ios, 14)] = T2N - T2O;
+	       ro[WS(ros, 1)] = T29 + T2g;
+	       io[WS(ios, 1)] = T2N + T2O;
+	       ro[WS(ros, 9)] = T2h - T2k;
+	       io[WS(ios, 9)] = T2J - T2M;
+	       ro[WS(ros, 6)] = T2h + T2k;
+	       io[WS(ios, 6)] = T2J + T2M;
+	  }
+	  {
+	       E T1n, T1v, T2y, T2A, T1u, T2v, T1y, T2z;
+	       {
+		    E T1j, T1m, T2w, T2x;
+		    T1j = T5 - Tc;
+		    T1m = T1k + T1l;
+		    T1n = T1j + T1m;
+		    T1v = T1j - T1m;
+		    T2w = Tv - Tm;
+		    T2x = T2q - T2n;
+		    T2y = T2w - T2x;
+		    T2A = T2w + T2x;
+	       }
+	       {
+		    E T1q, T1t, T1w, T1x;
+		    T1q = FMA(KP773010453, T1o, KP634393284 * T1p);
+		    T1t = FNMS(KP634393284, T1s, KP773010453 * T1r);
+		    T1u = T1q + T1t;
+		    T2v = T1t - T1q;
+		    T1w = FMA(KP634393284, T1r, KP773010453 * T1s);
+		    T1x = FNMS(KP634393284, T1o, KP773010453 * T1p);
+		    T1y = T1w - T1x;
+		    T2z = T1x + T1w;
+	       }
+	       ro[WS(ros, 12)] = T1n - T1u;
+	       io[WS(ios, 12)] = T2z - T2A;
+	       ro[WS(ros, 3)] = T1n + T1u;
+	       io[WS(ios, 3)] = T2z + T2A;
+	       ro[WS(ros, 11)] = T1v - T1y;
+	       io[WS(ios, 11)] = T2v - T2y;
+	       ro[WS(ros, 4)] = T1v + T1y;
+	       io[WS(ios, 4)] = T2v + T2y;
+	  }
+	  {
+	       E T1L, T21, T2G, T2I, T20, T2H, T24, T2B;
+	       {
+		    E T1D, T1K, T2E, T2F;
+		    T1D = T1z - T1C;
+		    T1K = T1G - T1J;
+		    T1L = T1D + T1K;
+		    T21 = T1D - T1K;
+		    T2E = T2C - T2D;
+		    T2F = T26 + T27;
+		    T2G = T2E - T2F;
+		    T2I = T2F + T2E;
+	       }
+	       {
+		    E T1S, T1Z, T22, T23;
+		    T1S = FMA(KP881921264, T1O, KP471396736 * T1R);
+		    T1Z = FMA(KP881921264, T1V, KP471396736 * T1Y);
+		    T20 = T1S - T1Z;
+		    T2H = T1S + T1Z;
+		    T22 = FNMS(KP471396736, T1V, KP881921264 * T1Y);
+		    T23 = FNMS(KP471396736, T1O, KP881921264 * T1R);
+		    T24 = T22 - T23;
+		    T2B = T23 + T22;
+	       }
+	       ro[WS(ros, 13)] = T1L - T20;
+	       io[WS(ios, 13)] = T2B - T2G;
+	       ro[WS(ros, 2)] = T1L + T20;
+	       io[WS(ios, 2)] = T2B + T2G;
+	       ro[WS(ros, 10)] = T21 - T24;
+	       io[WS(ios, 10)] = T2I - T2H;
+	       ro[WS(ros, 5)] = T21 + T24;
+	       io[WS(ios, 5)] = -(T2H + T2I);
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 32, "r2hcII_32", {138, 46, 36, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_32) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_4.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_4.c
new file mode 100644
index 0000000..94aaf84
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_4.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:02 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 4 -name r2hcII_4 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 6 FP additions, 2 FP multiplications,
+ * (or, 6 additions, 2 multiplications, 0 fused multiply/add),
+ * 8 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_4(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T6, T4, T5, T2, T3;
+	  T1 = I[0];
+	  T6 = I[WS(is, 2)];
+	  T2 = I[WS(is, 1)];
+	  T3 = I[WS(is, 3)];
+	  T4 = KP707106781 * (T2 - T3);
+	  T5 = KP707106781 * (T2 + T3);
+	  ro[WS(ros, 1)] = T1 - T4;
+	  io[WS(ios, 1)] = T6 - T5;
+	  ro[0] = T1 + T4;
+	  io[0] = -(T5 + T6);
+     }
+}
+
+static const kr2hc_desc desc = { 4, "r2hcII_4", {6, 2, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_4) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_5.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_5.c
new file mode 100644
index 0000000..bb40e64
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_5.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:03 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 5 -name r2hcII_5 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 12 FP additions, 6 FP multiplications,
+ * (or, 9 additions, 3 multiplications, 3 fused multiply/add),
+ * 17 stack variables, and 10 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_5(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T8, T3, T6, T9, Tc, Tb, T7, Ta;
+	  T8 = I[0];
+	  {
+	       E T1, T2, T4, T5;
+	       T1 = I[WS(is, 2)];
+	       T2 = I[WS(is, 3)];
+	       T3 = T1 - T2;
+	       T4 = I[WS(is, 4)];
+	       T5 = I[WS(is, 1)];
+	       T6 = T4 - T5;
+	       T9 = T3 + T6;
+	       Tc = T4 + T5;
+	       Tb = T1 + T2;
+	  }
+	  ro[WS(ros, 2)] = T8 + T9;
+	  io[WS(ios, 1)] = FNMS(KP951056516, Tc, KP587785252 * Tb);
+	  io[0] = -(FMA(KP951056516, Tb, KP587785252 * Tc));
+	  T7 = KP559016994 * (T3 - T6);
+	  Ta = FNMS(KP250000000, T9, T8);
+	  ro[0] = T7 + Ta;
+	  ro[WS(ros, 1)] = Ta - T7;
+     }
+}
+
+static const kr2hc_desc desc = { 5, "r2hcII_5", {9, 3, 3, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_5) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_5, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_6.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_6.c
new file mode 100644
index 0000000..04e76c9
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_6.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:03 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 6 -name r2hcII_6 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 13 FP additions, 4 FP multiplications,
+ * (or, 11 additions, 2 multiplications, 2 fused multiply/add),
+ * 14 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_6(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E Ta, T7, T9, T1, T3, T2, T8, T4, T5, T6, Tb;
+	  Ta = I[WS(is, 3)];
+	  T5 = I[WS(is, 5)];
+	  T6 = I[WS(is, 1)];
+	  T7 = KP866025403 * (T5 - T6);
+	  T9 = T5 + T6;
+	  T1 = I[0];
+	  T3 = I[WS(is, 2)];
+	  T2 = I[WS(is, 4)];
+	  T8 = KP866025403 * (T2 + T3);
+	  T4 = FMA(KP500000000, T3 - T2, T1);
+	  ro[0] = T4 - T7;
+	  ro[WS(ros, 2)] = T4 + T7;
+	  io[WS(ios, 1)] = Ta - T9;
+	  ro[WS(ros, 1)] = T1 + T2 - T3;
+	  Tb = FMA(KP500000000, T9, Ta);
+	  io[0] = -(T8 + Tb);
+	  io[WS(ios, 2)] = T8 - Tb;
+     }
+}
+
+static const kr2hc_desc desc = { 6, "r2hcII_6", {11, 2, 2, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_6) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_6, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_7.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_7.c
new file mode 100644
index 0000000..13fb676
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_7.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:04 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 7 -name r2hcII_7 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 24 FP additions, 18 FP multiplications,
+ * (or, 12 additions, 6 multiplications, 12 fused multiply/add),
+ * 20 stack variables, and 14 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_7(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, Ta, Td, T4, Tb, T7, Tc, T8, T9;
+	  T1 = I[0];
+	  T8 = I[WS(is, 1)];
+	  T9 = I[WS(is, 6)];
+	  Ta = T8 - T9;
+	  Td = T8 + T9;
+	  {
+	       E T2, T3, T5, T6;
+	       T2 = I[WS(is, 2)];
+	       T3 = I[WS(is, 5)];
+	       T4 = T2 - T3;
+	       Tb = T2 + T3;
+	       T5 = I[WS(is, 3)];
+	       T6 = I[WS(is, 4)];
+	       T7 = T5 - T6;
+	       Tc = T5 + T6;
+	  }
+	  io[0] = -(FMA(KP781831482, Tb, KP974927912 * Tc) + (KP433883739 * Td));
+	  io[WS(ios, 1)] = FNMS(KP974927912, Td, KP781831482 * Tc) - (KP433883739 * Tb);
+	  ro[0] = FMA(KP623489801, T4, T1) + FMA(KP222520933, T7, KP900968867 * Ta);
+	  io[WS(ios, 2)] = FNMS(KP781831482, Td, KP974927912 * Tb) - (KP433883739 * Tc);
+	  ro[WS(ros, 2)] = FMA(KP900968867, T7, T1) + FNMA(KP623489801, Ta, KP222520933 * T4);
+	  ro[WS(ros, 1)] = FMA(KP222520933, Ta, T1) + FNMA(KP623489801, T7, KP900968867 * T4);
+	  ro[WS(ros, 3)] = T1 + T4 - (T7 + Ta);
+     }
+}
+
+static const kr2hc_desc desc = { 7, "r2hcII_7", {12, 6, 12, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_7) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_7, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_8.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_8.c
new file mode 100644
index 0000000..d6d2aaf
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_8.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:04 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 8 -name r2hcII_8 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 22 FP additions, 10 FP multiplications,
+ * (or, 18 additions, 6 multiplications, 4 fused multiply/add),
+ * 18 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_8(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, Tj, T4, Ti, T8, Te, Tb, Tf, T2, T3;
+	  T1 = I[0];
+	  Tj = I[WS(is, 4)];
+	  T2 = I[WS(is, 2)];
+	  T3 = I[WS(is, 6)];
+	  T4 = KP707106781 * (T2 - T3);
+	  Ti = KP707106781 * (T2 + T3);
+	  {
+	       E T6, T7, T9, Ta;
+	       T6 = I[WS(is, 1)];
+	       T7 = I[WS(is, 5)];
+	       T8 = FNMS(KP382683432, T7, KP923879532 * T6);
+	       Te = FMA(KP382683432, T6, KP923879532 * T7);
+	       T9 = I[WS(is, 3)];
+	       Ta = I[WS(is, 7)];
+	       Tb = FNMS(KP923879532, Ta, KP382683432 * T9);
+	       Tf = FMA(KP923879532, T9, KP382683432 * Ta);
+	  }
+	  {
+	       E T5, Tc, Th, Tk;
+	       T5 = T1 + T4;
+	       Tc = T8 + Tb;
+	       ro[WS(ros, 3)] = T5 - Tc;
+	       ro[0] = T5 + Tc;
+	       Th = Te + Tf;
+	       Tk = Ti + Tj;
+	       io[0] = -(Th + Tk);
+	       io[WS(ios, 3)] = Tk - Th;
+	  }
+	  {
+	       E Td, Tg, Tl, Tm;
+	       Td = T1 - T4;
+	       Tg = Te - Tf;
+	       ro[WS(ros, 2)] = Td - Tg;
+	       ro[WS(ros, 1)] = Td + Tg;
+	       Tl = Tb - T8;
+	       Tm = Tj - Ti;
+	       io[WS(ios, 2)] = Tl - Tm;
+	       io[WS(ios, 1)] = Tl + Tm;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 8, "r2hcII_8", {18, 6, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_8) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hcII_9.c b/src/fftw3/rdft/codelets/r2hc/r2hcII_9.c
new file mode 100644
index 0000000..a046b93
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hcII_9.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:58:09 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 9 -name r2hcII_9 -dft-II -include r2hcII.h */
+
+/*
+ * This function contains 42 FP additions, 30 FP multiplications,
+ * (or, 25 additions, 13 multiplications, 17 fused multiply/add),
+ * 39 stack variables, and 18 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hcII_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hcII_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hcII.h"
+
+static void r2hcII_9(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP663413948, +0.663413948168938396205421319635891297216863310);
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP556670399, +0.556670399226419366452912952047023132968291906);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP852868531, +0.852868531952443209628250963940074071936020296);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP150383733, +0.150383733180435296639271897612501926072238258);
+     DK(KP813797681, +0.813797681349373692844693217248393223289101568);
+     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
+     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
+     DK(KP296198132, +0.296198132726023843175338011893050938967728390);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T4, To, Ta, Tl, Tk, Tf, Ti, Th, T2, T3, T5, Tg;
+	  T1 = I[0];
+	  T2 = I[WS(is, 3)];
+	  T3 = I[WS(is, 6)];
+	  T4 = T2 - T3;
+	  To = T2 + T3;
+	  {
+	       E T6, T7, T8, T9;
+	       T6 = I[WS(is, 2)];
+	       T7 = I[WS(is, 5)];
+	       T8 = I[WS(is, 8)];
+	       T9 = T7 - T8;
+	       Ta = T6 - T9;
+	       Tl = T7 + T8;
+	       Tk = FMA(KP500000000, T9, T6);
+	  }
+	  {
+	       E Tb, Tc, Td, Te;
+	       Tb = I[WS(is, 4)];
+	       Tc = I[WS(is, 1)];
+	       Td = I[WS(is, 7)];
+	       Te = Tc + Td;
+	       Tf = Tb - Te;
+	       Ti = FMA(KP500000000, Te, Tb);
+	       Th = Tc - Td;
+	  }
+	  io[WS(ios, 1)] = KP866025403 * (Tf - Ta);
+	  T5 = T1 - T4;
+	  Tg = Ta + Tf;
+	  ro[WS(ros, 1)] = FNMS(KP500000000, Tg, T5);
+	  ro[WS(ros, 4)] = T5 + Tg;
+	  {
+	       E Tr, Tt, Tw, Tv, Tu, Tp, Tq, Ts, Tj, Tm, Tn;
+	       Tr = FMA(KP500000000, T4, T1);
+	       Tt = FMA(KP296198132, Th, KP939692620 * Ti);
+	       Tw = FNMS(KP813797681, Th, KP342020143 * Ti);
+	       Tv = FNMS(KP984807753, Tk, KP150383733 * Tl);
+	       Tu = FMA(KP173648177, Tk, KP852868531 * Tl);
+	       Tp = FNMS(KP556670399, Tl, KP766044443 * Tk);
+	       Tq = FMA(KP852868531, Th, KP173648177 * Ti);
+	       Ts = Tp + Tq;
+	       Tj = FNMS(KP984807753, Ti, KP150383733 * Th);
+	       Tm = FMA(KP642787609, Tk, KP663413948 * Tl);
+	       Tn = Tj - Tm;
+	       io[0] = FNMS(KP866025403, To, Tn);
+	       ro[0] = Tr + Ts;
+	       io[WS(ios, 3)] = FNMS(KP500000000, Tn, KP866025403 * ((Tp - Tq) - To));
+	       ro[WS(ros, 3)] = FMA(KP866025403, Tm + Tj, Tr) - (KP500000000 * Ts);
+	       io[WS(ios, 2)] = FMA(KP866025403, To - (Tu + Tt), KP500000000 * (Tw - Tv));
+	       ro[WS(ros, 2)] = FMA(KP500000000, Tt - Tu, Tr) + (KP866025403 * (Tv + Tw));
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 9, "r2hcII_9", {25, 13, 17, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hcII_9) (planner *p) {
+     X(kr2hcII_register) (p, r2hcII_9, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_10.c b/src/fftw3/rdft/codelets/r2hc/r2hc_10.c
new file mode 100644
index 0000000..38e1766
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_10.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 10 -name r2hc_10 -include r2hc.h */
+
+/*
+ * This function contains 34 FP additions, 12 FP multiplications,
+ * (or, 28 additions, 6 multiplications, 6 fused multiply/add),
+ * 26 stack variables, and 20 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_10.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_10(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E Ti, Tt, Ta, Tn, Td, To, Te, Tv, T3, Tq, T6, Tr, T7, Tu, Tg;
+	  E Th;
+	  Tg = I[0];
+	  Th = I[WS(is, 5)];
+	  Ti = Tg - Th;
+	  Tt = Tg + Th;
+	  {
+	       E T8, T9, Tb, Tc;
+	       T8 = I[WS(is, 4)];
+	       T9 = I[WS(is, 9)];
+	       Ta = T8 - T9;
+	       Tn = T8 + T9;
+	       Tb = I[WS(is, 6)];
+	       Tc = I[WS(is, 1)];
+	       Td = Tb - Tc;
+	       To = Tb + Tc;
+	  }
+	  Te = Ta + Td;
+	  Tv = Tn + To;
+	  {
+	       E T1, T2, T4, T5;
+	       T1 = I[WS(is, 2)];
+	       T2 = I[WS(is, 7)];
+	       T3 = T1 - T2;
+	       Tq = T1 + T2;
+	       T4 = I[WS(is, 8)];
+	       T5 = I[WS(is, 3)];
+	       T6 = T4 - T5;
+	       Tr = T4 + T5;
+	  }
+	  T7 = T3 + T6;
+	  Tu = Tq + Tr;
+	  {
+	       E Tl, Tm, Tf, Tj, Tk;
+	       Tl = Td - Ta;
+	       Tm = T3 - T6;
+	       io[WS(ios, 1)] = FNMS(KP951056516, Tm, KP587785252 * Tl);
+	       io[WS(ios, 3)] = FMA(KP587785252, Tm, KP951056516 * Tl);
+	       Tf = KP559016994 * (T7 - Te);
+	       Tj = T7 + Te;
+	       Tk = FNMS(KP250000000, Tj, Ti);
+	       ro[WS(ros, 1)] = Tf + Tk;
+	       ro[WS(ros, 5)] = Ti + Tj;
+	       ro[WS(ros, 3)] = Tk - Tf;
+	  }
+	  {
+	       E Tp, Ts, Ty, Tw, Tx;
+	       Tp = Tn - To;
+	       Ts = Tq - Tr;
+	       io[WS(ios, 2)] = FNMS(KP587785252, Ts, KP951056516 * Tp);
+	       io[WS(ios, 4)] = FMA(KP951056516, Ts, KP587785252 * Tp);
+	       Ty = KP559016994 * (Tu - Tv);
+	       Tw = Tu + Tv;
+	       Tx = FNMS(KP250000000, Tw, Tt);
+	       ro[WS(ros, 2)] = Tx - Ty;
+	       ro[0] = Tt + Tw;
+	       ro[WS(ros, 4)] = Ty + Tx;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 10, "r2hc_10", {28, 6, 6, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_10) (planner *p) {
+     X(kr2hc_register) (p, r2hc_10, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_11.c b/src/fftw3/rdft/codelets/r2hc/r2hc_11.c
new file mode 100644
index 0000000..cceb091
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_11.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 11 -name r2hc_11 -include r2hc.h */
+
+/*
+ * This function contains 60 FP additions, 50 FP multiplications,
+ * (or, 20 additions, 10 multiplications, 40 fused multiply/add),
+ * 28 stack variables, and 22 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_11.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_11.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_11.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_11(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP654860733, +0.654860733945285064056925072466293553183791199);
+     DK(KP142314838, +0.142314838273285140443792668616369668791051361);
+     DK(KP959492973, +0.959492973614497389890368057066327699062454848);
+     DK(KP415415013, +0.415415013001886425529274149229623203524004910);
+     DK(KP841253532, +0.841253532831181168861811648919367717513292498);
+     DK(KP989821441, +0.989821441880932732376092037776718787376519372);
+     DK(KP909631995, +0.909631995354518371411715383079028460060241051);
+     DK(KP281732556, +0.281732556841429697711417915346616899035777899);
+     DK(KP540640817, +0.540640817455597582107635954318691695431770608);
+     DK(KP755749574, +0.755749574354258283774035843972344420179717445);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T4, Tl, Tg, Th, Td, Ti, Ta, Tk, T7, Tj, Tb, Tc;
+	  T1 = I[0];
+	  {
+	       E T2, T3, Te, Tf;
+	       T2 = I[WS(is, 2)];
+	       T3 = I[WS(is, 9)];
+	       T4 = T2 + T3;
+	       Tl = T3 - T2;
+	       Te = I[WS(is, 1)];
+	       Tf = I[WS(is, 10)];
+	       Tg = Te + Tf;
+	       Th = Tf - Te;
+	  }
+	  Tb = I[WS(is, 3)];
+	  Tc = I[WS(is, 8)];
+	  Td = Tb + Tc;
+	  Ti = Tc - Tb;
+	  {
+	       E T8, T9, T5, T6;
+	       T8 = I[WS(is, 5)];
+	       T9 = I[WS(is, 6)];
+	       Ta = T8 + T9;
+	       Tk = T9 - T8;
+	       T5 = I[WS(is, 4)];
+	       T6 = I[WS(is, 7)];
+	       T7 = T5 + T6;
+	       Tj = T6 - T5;
+	  }
+	  io[WS(ios, 4)] = FMA(KP755749574, Th, KP540640817 * Ti) + FNMS(KP909631995, Tk, KP281732556 * Tj) - (KP989821441 * Tl);
+	  ro[WS(ros, 4)] = FMA(KP841253532, Td, T1) + FNMS(KP959492973, T7, KP415415013 * Ta) + FNMA(KP142314838, T4, KP654860733 * Tg);
+	  io[WS(ios, 2)] = FMA(KP909631995, Th, KP755749574 * Tl) + FNMA(KP540640817, Tk, KP989821441 * Tj) - (KP281732556 * Ti);
+	  io[WS(ios, 5)] = FMA(KP281732556, Th, KP755749574 * Ti) + FNMS(KP909631995, Tj, KP989821441 * Tk) - (KP540640817 * Tl);
+	  io[WS(ios, 1)] = FMA(KP540640817, Th, KP909631995 * Tl) + FMA(KP989821441, Ti, KP755749574 * Tj) + (KP281732556 * Tk);
+	  io[WS(ios, 3)] = FMA(KP989821441, Th, KP540640817 * Tj) + FNMS(KP909631995, Ti, KP755749574 * Tk) - (KP281732556 * Tl);
+	  ro[WS(ros, 3)] = FMA(KP415415013, Td, T1) + FNMS(KP654860733, Ta, KP841253532 * T7) + FNMA(KP959492973, T4, KP142314838 * Tg);
+	  ro[WS(ros, 1)] = FMA(KP841253532, Tg, T1) + FNMS(KP959492973, Ta, KP415415013 * T4) + FNMA(KP654860733, T7, KP142314838 * Td);
+	  ro[0] = T1 + Tg + T4 + Td + T7 + Ta;
+	  ro[WS(ros, 2)] = FMA(KP415415013, Tg, T1) + FNMS(KP142314838, T7, KP841253532 * Ta) + FNMA(KP959492973, Td, KP654860733 * T4);
+	  ro[WS(ros, 5)] = FMA(KP841253532, T4, T1) + FNMS(KP142314838, Ta, KP415415013 * T7) + FNMA(KP654860733, Td, KP959492973 * Tg);
+     }
+}
+
+static const kr2hc_desc desc = { 11, "r2hc_11", {20, 10, 40, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_11) (planner *p) {
+     X(kr2hc_register) (p, r2hc_11, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_12.c b/src/fftw3/rdft/codelets/r2hc/r2hc_12.c
new file mode 100644
index 0000000..81a5273
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_12.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 12 -name r2hc_12 -include r2hc.h */
+
+/*
+ * This function contains 38 FP additions, 8 FP multiplications,
+ * (or, 34 additions, 4 multiplications, 4 fused multiply/add),
+ * 21 stack variables, and 24 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_12.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_12(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T5, Tp, Tb, Tn, Ty, Tt, Ta, Tq, Tc, Ti, Tz, Tu, Td, To;
+	  {
+	       E T1, T2, T3, T4;
+	       T1 = I[0];
+	       T2 = I[WS(is, 4)];
+	       T3 = I[WS(is, 8)];
+	       T4 = T2 + T3;
+	       T5 = T1 + T4;
+	       Tp = FNMS(KP500000000, T4, T1);
+	       Tb = T3 - T2;
+	  }
+	  {
+	       E Tj, Tk, Tl, Tm;
+	       Tj = I[WS(is, 3)];
+	       Tk = I[WS(is, 7)];
+	       Tl = I[WS(is, 11)];
+	       Tm = Tk + Tl;
+	       Tn = FNMS(KP500000000, Tm, Tj);
+	       Ty = Tl - Tk;
+	       Tt = Tj + Tm;
+	  }
+	  {
+	       E T6, T7, T8, T9;
+	       T6 = I[WS(is, 6)];
+	       T7 = I[WS(is, 10)];
+	       T8 = I[WS(is, 2)];
+	       T9 = T7 + T8;
+	       Ta = T6 + T9;
+	       Tq = FNMS(KP500000000, T9, T6);
+	       Tc = T8 - T7;
+	  }
+	  {
+	       E Te, Tf, Tg, Th;
+	       Te = I[WS(is, 9)];
+	       Tf = I[WS(is, 1)];
+	       Tg = I[WS(is, 5)];
+	       Th = Tf + Tg;
+	       Ti = FNMS(KP500000000, Th, Te);
+	       Tz = Tg - Tf;
+	       Tu = Te + Th;
+	  }
+	  ro[WS(ros, 3)] = T5 - Ta;
+	  io[WS(ios, 3)] = Tt - Tu;
+	  Td = KP866025403 * (Tb - Tc);
+	  To = Ti - Tn;
+	  io[WS(ios, 1)] = Td + To;
+	  io[WS(ios, 5)] = To - Td;
+	  {
+	       E Tx, TA, Tv, Tw;
+	       Tx = Tp - Tq;
+	       TA = KP866025403 * (Ty - Tz);
+	       ro[WS(ros, 5)] = Tx - TA;
+	       ro[WS(ros, 1)] = Tx + TA;
+	       Tv = T5 + Ta;
+	       Tw = Tt + Tu;
+	       ro[WS(ros, 6)] = Tv - Tw;
+	       ro[0] = Tv + Tw;
+	  }
+	  {
+	       E Tr, Ts, TB, TC;
+	       Tr = Tp + Tq;
+	       Ts = Tn + Ti;
+	       ro[WS(ros, 2)] = Tr - Ts;
+	       ro[WS(ros, 4)] = Tr + Ts;
+	       TB = Ty + Tz;
+	       TC = Tb + Tc;
+	       io[WS(ios, 2)] = KP866025403 * (TB - TC);
+	       io[WS(ios, 4)] = KP866025403 * (TC + TB);
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 12, "r2hc_12", {34, 4, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_12) (planner *p) {
+     X(kr2hc_register) (p, r2hc_12, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_13.c b/src/fftw3/rdft/codelets/r2hc/r2hc_13.c
new file mode 100644
index 0000000..deb7914
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_13.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 13 -name r2hc_13 -include r2hc.h */
+
+/*
+ * This function contains 76 FP additions, 34 FP multiplications,
+ * (or, 57 additions, 15 multiplications, 19 fused multiply/add),
+ * 55 stack variables, and 26 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_13.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_13.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_13.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_13(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP083333333, +0.083333333333333333333333333333333333333333333);
+     DK(KP075902986, +0.075902986037193865983102897245103540356428373);
+     DK(KP251768516, +0.251768516431883313623436926934233488546674281);
+     DK(KP503537032, +0.503537032863766627246873853868466977093348562);
+     DK(KP113854479, +0.113854479055790798974654345867655310534642560);
+     DK(KP265966249, +0.265966249214837287587521063842185948798330267);
+     DK(KP387390585, +0.387390585467617292130675966426762851778775217);
+     DK(KP300462606, +0.300462606288665774426601772289207995520941381);
+     DK(KP132983124, +0.132983124607418643793760531921092974399165133);
+     DK(KP258260390, +0.258260390311744861420450644284508567852516811);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
+     DK(KP300238635, +0.300238635966332641462884626667381504676006424);
+     DK(KP011599105, +0.011599105605768290721655456654083252189827041);
+     DK(KP156891391, +0.156891391051584611046832726756003269660212636);
+     DK(KP256247671, +0.256247671582936600958684654061725059144125175);
+     DK(KP174138601, +0.174138601152135905005660794929264742616964676);
+     DK(KP575140729, +0.575140729474003121368385547455453388461001608);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T13, Tb, Tm, TW, TX, T14, TU, T10, Tz, TB, Tu, TC, TR, T11;
+	  T13 = I[0];
+	  {
+	       E Te, TO, Ta, Tv, To, T5, Tw, Tp, Th, Tr, Tk, Ts, Tl, TP, Tc;
+	       E Td;
+	       Tc = I[WS(is, 8)];
+	       Td = I[WS(is, 5)];
+	       Te = Tc - Td;
+	       TO = Tc + Td;
+	       {
+		    E T6, T7, T8, T9;
+		    T6 = I[WS(is, 1)];
+		    T7 = I[WS(is, 3)];
+		    T8 = I[WS(is, 9)];
+		    T9 = T7 + T8;
+		    Ta = T6 + T9;
+		    Tv = T7 - T8;
+		    To = FNMS(KP500000000, T9, T6);
+	       }
+	       {
+		    E T1, T2, T3, T4;
+		    T1 = I[WS(is, 12)];
+		    T2 = I[WS(is, 10)];
+		    T3 = I[WS(is, 4)];
+		    T4 = T2 + T3;
+		    T5 = T1 + T4;
+		    Tw = T2 - T3;
+		    Tp = FNMS(KP500000000, T4, T1);
+	       }
+	       {
+		    E Tf, Tg, Ti, Tj;
+		    Tf = I[WS(is, 11)];
+		    Tg = I[WS(is, 6)];
+		    Th = Tf - Tg;
+		    Tr = Tf + Tg;
+		    Ti = I[WS(is, 7)];
+		    Tj = I[WS(is, 2)];
+		    Tk = Ti - Tj;
+		    Ts = Ti + Tj;
+	       }
+	       Tl = Th + Tk;
+	       TP = Tr + Ts;
+	       Tb = T5 - Ta;
+	       Tm = Te + Tl;
+	       TW = Ta + T5;
+	       TX = TO + TP;
+	       T14 = TW + TX;
+	       {
+		    E TS, TT, Tx, Ty;
+		    TS = Tv + Tw;
+		    TT = Th - Tk;
+		    TU = TS - TT;
+		    T10 = TS + TT;
+		    Tx = KP866025403 * (Tv - Tw);
+		    Ty = FNMS(KP500000000, Tl, Te);
+		    Tz = Tx + Ty;
+		    TB = Ty - Tx;
+	       }
+	       {
+		    E Tq, Tt, TN, TQ;
+		    Tq = To - Tp;
+		    Tt = KP866025403 * (Tr - Ts);
+		    Tu = Tq - Tt;
+		    TC = Tq + Tt;
+		    TN = To + Tp;
+		    TQ = FNMS(KP500000000, TP, TO);
+		    TR = TN - TQ;
+		    T11 = TN + TQ;
+	       }
+	  }
+	  ro[0] = T13 + T14;
+	  {
+	       E Tn, TG, TE, TF, TJ, TM, TK, TL;
+	       Tn = FNMS(KP174138601, Tm, KP575140729 * Tb);
+	       TG = FMA(KP174138601, Tb, KP575140729 * Tm);
+	       {
+		    E TA, TD, TH, TI;
+		    TA = FNMS(KP156891391, Tz, KP256247671 * Tu);
+		    TD = FNMS(KP300238635, TC, KP011599105 * TB);
+		    TE = TA + TD;
+		    TF = KP1_732050807 * (TD - TA);
+		    TH = FMA(KP300238635, TB, KP011599105 * TC);
+		    TI = FMA(KP256247671, Tz, KP156891391 * Tu);
+		    TJ = TH - TI;
+		    TM = KP1_732050807 * (TI + TH);
+	       }
+	       io[WS(ios, 5)] = FMA(KP2_000000000, TE, Tn);
+	       io[WS(ios, 1)] = FMA(KP2_000000000, TJ, TG);
+	       TK = TG - TJ;
+	       io[WS(ios, 4)] = TF - TK;
+	       io[WS(ios, 3)] = TF + TK;
+	       TL = Tn - TE;
+	       io[WS(ios, 2)] = TL - TM;
+	       io[WS(ios, 6)] = TL + TM;
+	  }
+	  {
+	       E TZ, T1b, T19, T1e, T16, T1a, TV, TY, T1c, T1d;
+	       TV = FNMS(KP132983124, TU, KP258260390 * TR);
+	       TY = KP300462606 * (TW - TX);
+	       TZ = FMA(KP2_000000000, TV, TY);
+	       T1b = TY - TV;
+	       {
+		    E T17, T18, T12, T15;
+		    T17 = FMA(KP387390585, TU, KP265966249 * TR);
+		    T18 = FNMS(KP503537032, T11, KP113854479 * T10);
+		    T19 = T17 - T18;
+		    T1e = T17 + T18;
+		    T12 = FMA(KP251768516, T10, KP075902986 * T11);
+		    T15 = FNMS(KP083333333, T14, T13);
+		    T16 = FMA(KP2_000000000, T12, T15);
+		    T1a = T15 - T12;
+	       }
+	       ro[WS(ros, 1)] = TZ + T16;
+	       ro[WS(ros, 5)] = T16 - TZ;
+	       T1c = T1a - T1b;
+	       ro[WS(ros, 2)] = T19 + T1c;
+	       ro[WS(ros, 6)] = T1c - T19;
+	       T1d = T1b + T1a;
+	       ro[WS(ros, 3)] = T1d - T1e;
+	       ro[WS(ros, 4)] = T1e + T1d;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 13, "r2hc_13", {57, 15, 19, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_13) (planner *p) {
+     X(kr2hc_register) (p, r2hc_13, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_14.c b/src/fftw3/rdft/codelets/r2hc/r2hc_14.c
new file mode 100644
index 0000000..497d84a
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_14.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 14 -name r2hc_14 -include r2hc.h */
+
+/*
+ * This function contains 62 FP additions, 36 FP multiplications,
+ * (or, 38 additions, 12 multiplications, 24 fused multiply/add),
+ * 29 stack variables, and 28 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_14.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_14.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_14.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_14(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, TB, T6, Tv, Tn, Ts, Tk, Tt, Td, Ty, T9, Tw, Tg, Tz, T1;
+	  E T2;
+	  T1 = I[0];
+	  T2 = I[WS(is, 7)];
+	  T3 = T1 - T2;
+	  TB = T1 + T2;
+	  {
+	       E T4, T5, Tl, Tm;
+	       T4 = I[WS(is, 4)];
+	       T5 = I[WS(is, 11)];
+	       T6 = T4 - T5;
+	       Tv = T4 + T5;
+	       Tl = I[WS(is, 12)];
+	       Tm = I[WS(is, 5)];
+	       Tn = Tl - Tm;
+	       Ts = Tl + Tm;
+	  }
+	  {
+	       E Ti, Tj, Tb, Tc;
+	       Ti = I[WS(is, 2)];
+	       Tj = I[WS(is, 9)];
+	       Tk = Ti - Tj;
+	       Tt = Ti + Tj;
+	       Tb = I[WS(is, 6)];
+	       Tc = I[WS(is, 13)];
+	       Td = Tb - Tc;
+	       Ty = Tb + Tc;
+	  }
+	  {
+	       E T7, T8, Te, Tf;
+	       T7 = I[WS(is, 10)];
+	       T8 = I[WS(is, 3)];
+	       T9 = T7 - T8;
+	       Tw = T7 + T8;
+	       Te = I[WS(is, 8)];
+	       Tf = I[WS(is, 1)];
+	       Tg = Te - Tf;
+	       Tz = Te + Tf;
+	  }
+	  {
+	       E Tp, Tr, Tq, Ta, To, Th;
+	       Tp = Tn - Tk;
+	       Tr = Tg - Td;
+	       Tq = T9 - T6;
+	       io[WS(ios, 1)] = FMA(KP781831482, Tp, KP974927912 * Tq) + (KP433883739 * Tr);
+	       io[WS(ios, 5)] = FMA(KP433883739, Tq, KP781831482 * Tr) - (KP974927912 * Tp);
+	       io[WS(ios, 3)] = FMA(KP433883739, Tp, KP974927912 * Tr) - (KP781831482 * Tq);
+	       Ta = T6 + T9;
+	       To = Tk + Tn;
+	       Th = Td + Tg;
+	       ro[WS(ros, 3)] = FMA(KP623489801, Ta, T3) + FNMA(KP222520933, Th, KP900968867 * To);
+	       ro[WS(ros, 7)] = T3 + To + Ta + Th;
+	       ro[WS(ros, 1)] = FMA(KP623489801, To, T3) + FNMA(KP900968867, Th, KP222520933 * Ta);
+	       ro[WS(ros, 5)] = FMA(KP623489801, Th, T3) + FNMA(KP900968867, Ta, KP222520933 * To);
+	  }
+	  {
+	       E Tu, TA, Tx, TC, TE, TD;
+	       Tu = Ts - Tt;
+	       TA = Ty - Tz;
+	       Tx = Tv - Tw;
+	       io[WS(ios, 2)] = FMA(KP974927912, Tu, KP433883739 * Tx) + (KP781831482 * TA);
+	       io[WS(ios, 6)] = FMA(KP974927912, Tx, KP433883739 * TA) - (KP781831482 * Tu);
+	       io[WS(ios, 4)] = FNMS(KP781831482, Tx, KP974927912 * TA) - (KP433883739 * Tu);
+	       TC = Tt + Ts;
+	       TE = Tv + Tw;
+	       TD = Ty + Tz;
+	       ro[WS(ros, 6)] = FMA(KP623489801, TC, TB) + FNMA(KP900968867, TD, KP222520933 * TE);
+	       ro[WS(ros, 2)] = FMA(KP623489801, TD, TB) + FNMA(KP900968867, TE, KP222520933 * TC);
+	       ro[WS(ros, 4)] = FMA(KP623489801, TE, TB) + FNMA(KP222520933, TD, KP900968867 * TC);
+	       ro[0] = TB + TC + TE + TD;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 14, "r2hc_14", {38, 12, 24, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_14) (planner *p) {
+     X(kr2hc_register) (p, r2hc_14, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_15.c b/src/fftw3/rdft/codelets/r2hc/r2hc_15.c
new file mode 100644
index 0000000..71c4aba
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_15.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:42 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 15 -name r2hc_15 -include r2hc.h */
+
+/*
+ * This function contains 64 FP additions, 25 FP multiplications,
+ * (or, 50 additions, 11 multiplications, 14 fused multiply/add),
+ * 47 stack variables, and 30 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_15.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_15(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP484122918, +0.484122918275927110647408174972799951354115213);
+     DK(KP216506350, +0.216506350946109661690930792688234045867850657);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP509036960, +0.509036960455127183450980863393907648510733164);
+     DK(KP823639103, +0.823639103546331925877420039278190003029660514);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E Ti, TR, TL, TD, TE, T7, Te, Tf, TV, TW, TX, Tv, Ty, TH, To;
+	  E Tr, TG, TS, TT, TU;
+	  {
+	       E TJ, Tg, Th, TK;
+	       TJ = I[0];
+	       Tg = I[WS(is, 10)];
+	       Th = I[WS(is, 5)];
+	       TK = Th + Tg;
+	       Ti = Tg - Th;
+	       TR = TJ + TK;
+	       TL = FNMS(KP500000000, TK, TJ);
+	  }
+	  {
+	       E Tm, Tt, Tw, Tp, T3, Tx, Ta, Tn, Td, Tq, T6, Tu;
+	       Tm = I[WS(is, 3)];
+	       Tt = I[WS(is, 6)];
+	       Tw = I[WS(is, 9)];
+	       Tp = I[WS(is, 12)];
+	       {
+		    E T1, T2, T8, T9;
+		    T1 = I[WS(is, 14)];
+		    T2 = I[WS(is, 4)];
+		    T3 = T1 - T2;
+		    Tx = T1 + T2;
+		    T8 = I[WS(is, 13)];
+		    T9 = I[WS(is, 8)];
+		    Ta = T8 - T9;
+		    Tn = T9 + T8;
+	       }
+	       {
+		    E Tb, Tc, T4, T5;
+		    Tb = I[WS(is, 7)];
+		    Tc = I[WS(is, 2)];
+		    Td = Tb - Tc;
+		    Tq = Tc + Tb;
+		    T4 = I[WS(is, 1)];
+		    T5 = I[WS(is, 11)];
+		    T6 = T4 - T5;
+		    Tu = T5 + T4;
+	       }
+	       TD = Ta - Td;
+	       TE = T6 + T3;
+	       T7 = T3 - T6;
+	       Te = Ta + Td;
+	       Tf = T7 - Te;
+	       TV = Tt + Tu;
+	       TW = Tw + Tx;
+	       TX = TV + TW;
+	       Tv = FNMS(KP500000000, Tu, Tt);
+	       Ty = FNMS(KP500000000, Tx, Tw);
+	       TH = Tv + Ty;
+	       To = FNMS(KP500000000, Tn, Tm);
+	       Tr = FNMS(KP500000000, Tq, Tp);
+	       TG = To + Tr;
+	       TS = Tm + Tn;
+	       TT = Tp + Tq;
+	       TU = TS + TT;
+	  }
+	  io[WS(ios, 5)] = KP866025403 * (Tf - Ti);
+	  {
+	       E TF, TP, TI, TM, TN, TQ, TO;
+	       TF = FMA(KP823639103, TD, KP509036960 * TE);
+	       TP = FNMS(KP509036960, TD, KP823639103 * TE);
+	       TI = KP559016994 * (TG - TH);
+	       TM = TG + TH;
+	       TN = FNMS(KP250000000, TM, TL);
+	       ro[WS(ros, 5)] = TL + TM;
+	       TQ = TN - TI;
+	       ro[WS(ros, 2)] = TP + TQ;
+	       ro[WS(ros, 7)] = TQ - TP;
+	       TO = TI + TN;
+	       ro[WS(ros, 1)] = TF + TO;
+	       ro[WS(ros, 4)] = TO - TF;
+	  }
+	  {
+	       E T11, T12, T10, TY, TZ;
+	       T11 = TS - TT;
+	       T12 = TW - TV;
+	       io[WS(ios, 3)] = FMA(KP587785252, T11, KP951056516 * T12);
+	       io[WS(ios, 6)] = FNMS(KP951056516, T11, KP587785252 * T12);
+	       T10 = KP559016994 * (TU - TX);
+	       TY = TU + TX;
+	       TZ = FNMS(KP250000000, TY, TR);
+	       ro[WS(ros, 3)] = TZ - T10;
+	       ro[0] = TR + TY;
+	       ro[WS(ros, 6)] = T10 + TZ;
+	       {
+		    E Tl, TB, TA, TC;
+		    {
+			 E Tj, Tk, Ts, Tz;
+			 Tj = FMA(KP866025403, Ti, KP216506350 * Tf);
+			 Tk = KP484122918 * (Te + T7);
+			 Tl = Tj + Tk;
+			 TB = Tk - Tj;
+			 Ts = To - Tr;
+			 Tz = Tv - Ty;
+			 TA = FMA(KP951056516, Ts, KP587785252 * Tz);
+			 TC = FNMS(KP587785252, Ts, KP951056516 * Tz);
+		    }
+		    io[WS(ios, 1)] = Tl - TA;
+		    io[WS(ios, 7)] = TC - TB;
+		    io[WS(ios, 4)] = Tl + TA;
+		    io[WS(ios, 2)] = TB + TC;
+	       }
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 15, "r2hc_15", {50, 11, 14, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_15) (planner *p) {
+     X(kr2hc_register) (p, r2hc_15, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_16.c b/src/fftw3/rdft/codelets/r2hc/r2hc_16.c
new file mode 100644
index 0000000..e28d6d5
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_16.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:43 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 16 -name r2hc_16 -include r2hc.h */
+
+/*
+ * This function contains 58 FP additions, 12 FP multiplications,
+ * (or, 54 additions, 8 multiplications, 4 fused multiply/add),
+ * 34 stack variables, and 32 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_16.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_16(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, T6, T7, Tz, Ti, Ta, Td, Te, TA, Th, Tq, TV, TF, TP, Tx;
+	  E TU, TE, TM, Tg, Tf, TJ, TQ;
+	  {
+	       E T1, T2, T4, T5;
+	       T1 = I[0];
+	       T2 = I[WS(is, 8)];
+	       T3 = T1 + T2;
+	       T4 = I[WS(is, 4)];
+	       T5 = I[WS(is, 12)];
+	       T6 = T4 + T5;
+	       T7 = T3 + T6;
+	       Tz = T1 - T2;
+	       Ti = T4 - T5;
+	  }
+	  {
+	       E T8, T9, Tb, Tc;
+	       T8 = I[WS(is, 2)];
+	       T9 = I[WS(is, 10)];
+	       Ta = T8 + T9;
+	       Tg = T8 - T9;
+	       Tb = I[WS(is, 14)];
+	       Tc = I[WS(is, 6)];
+	       Td = Tb + Tc;
+	       Tf = Tb - Tc;
+	  }
+	  Te = Ta + Td;
+	  TA = KP707106781 * (Tg + Tf);
+	  Th = KP707106781 * (Tf - Tg);
+	  {
+	       E Tm, TN, Tp, TO;
+	       {
+		    E Tk, Tl, Tn, To;
+		    Tk = I[WS(is, 15)];
+		    Tl = I[WS(is, 7)];
+		    Tm = Tk - Tl;
+		    TN = Tk + Tl;
+		    Tn = I[WS(is, 3)];
+		    To = I[WS(is, 11)];
+		    Tp = Tn - To;
+		    TO = Tn + To;
+	       }
+	       Tq = FNMS(KP923879532, Tp, KP382683432 * Tm);
+	       TV = TN + TO;
+	       TF = FMA(KP923879532, Tm, KP382683432 * Tp);
+	       TP = TN - TO;
+	  }
+	  {
+	       E Tt, TK, Tw, TL;
+	       {
+		    E Tr, Ts, Tu, Tv;
+		    Tr = I[WS(is, 1)];
+		    Ts = I[WS(is, 9)];
+		    Tt = Tr - Ts;
+		    TK = Tr + Ts;
+		    Tu = I[WS(is, 5)];
+		    Tv = I[WS(is, 13)];
+		    Tw = Tu - Tv;
+		    TL = Tu + Tv;
+	       }
+	       Tx = FMA(KP382683432, Tt, KP923879532 * Tw);
+	       TU = TK + TL;
+	       TE = FNMS(KP382683432, Tw, KP923879532 * Tt);
+	       TM = TK - TL;
+	  }
+	  ro[WS(ros, 4)] = T7 - Te;
+	  io[WS(ios, 4)] = TV - TU;
+	  {
+	       E Tj, Ty, TD, TG;
+	       Tj = Th - Ti;
+	       Ty = Tq - Tx;
+	       io[WS(ios, 1)] = Tj + Ty;
+	       io[WS(ios, 7)] = Ty - Tj;
+	       TD = Tz + TA;
+	       TG = TE + TF;
+	       ro[WS(ros, 7)] = TD - TG;
+	       ro[WS(ros, 1)] = TD + TG;
+	  }
+	  {
+	       E TB, TC, TH, TI;
+	       TB = Tz - TA;
+	       TC = Tx + Tq;
+	       ro[WS(ros, 5)] = TB - TC;
+	       ro[WS(ros, 3)] = TB + TC;
+	       TH = Ti + Th;
+	       TI = TF - TE;
+	       io[WS(ios, 3)] = TH + TI;
+	       io[WS(ios, 5)] = TI - TH;
+	  }
+	  TJ = T3 - T6;
+	  TQ = KP707106781 * (TM + TP);
+	  ro[WS(ros, 6)] = TJ - TQ;
+	  ro[WS(ros, 2)] = TJ + TQ;
+	  {
+	       E TR, TS, TT, TW;
+	       TR = Td - Ta;
+	       TS = KP707106781 * (TP - TM);
+	       io[WS(ios, 2)] = TR + TS;
+	       io[WS(ios, 6)] = TS - TR;
+	       TT = T7 + Te;
+	       TW = TU + TV;
+	       ro[WS(ros, 8)] = TT - TW;
+	       ro[0] = TT + TW;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 16, "r2hc_16", {54, 8, 4, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_16) (planner *p) {
+     X(kr2hc_register) (p, r2hc_16, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_2.c b/src/fftw3/rdft/codelets/r2hc/r2hc_2.c
new file mode 100644
index 0000000..b0d8450
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_2.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:40 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 2 -name r2hc_2 -include r2hc.h */
+
+/*
+ * This function contains 2 FP additions, 0 FP multiplications,
+ * (or, 2 additions, 0 multiplications, 0 fused multiply/add),
+ * 3 stack variables, and 4 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_2.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_2(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2;
+	  T1 = I[0];
+	  T2 = I[WS(is, 1)];
+	  ro[WS(ros, 1)] = T1 - T2;
+	  ro[0] = T1 + T2;
+     }
+}
+
+static const kr2hc_desc desc = { 2, "r2hc_2", {2, 0, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_2) (planner *p) {
+     X(kr2hc_register) (p, r2hc_2, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_3.c b/src/fftw3/rdft/codelets/r2hc/r2hc_3.c
new file mode 100644
index 0000000..84041fc
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_3.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:40 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 3 -name r2hc_3 -include r2hc.h */
+
+/*
+ * This function contains 4 FP additions, 2 FP multiplications,
+ * (or, 3 additions, 1 multiplications, 1 fused multiply/add),
+ * 7 stack variables, and 6 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_3.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_3(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2, T3, T4;
+	  T1 = I[0];
+	  T2 = I[WS(is, 1)];
+	  T3 = I[WS(is, 2)];
+	  T4 = T2 + T3;
+	  ro[WS(ros, 1)] = FNMS(KP500000000, T4, T1);
+	  io[WS(ios, 1)] = KP866025403 * (T3 - T2);
+	  ro[0] = T1 + T4;
+     }
+}
+
+static const kr2hc_desc desc = { 3, "r2hc_3", {3, 1, 1, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_3) (planner *p) {
+     X(kr2hc_register) (p, r2hc_3, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_32.c b/src/fftw3/rdft/codelets/r2hc/r2hc_32.c
new file mode 100644
index 0000000..b3e0a9d
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_32.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:44 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 32 -name r2hc_32 -include r2hc.h */
+
+/*
+ * This function contains 156 FP additions, 42 FP multiplications,
+ * (or, 140 additions, 26 multiplications, 16 fused multiply/add),
+ * 54 stack variables, and 64 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_32.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_32(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP555570233, +0.555570233019602224742830813948532874374937191);
+     DK(KP831469612, +0.831469612302545237078788377617905756738560812);
+     DK(KP195090322, +0.195090322016128267848284868477022240927691618);
+     DK(KP980785280, +0.980785280403230449126182236134239036973933731);
+     DK(KP382683432, +0.382683432365089771728459984030398866761344562);
+     DK(KP923879532, +0.923879532511286756128183189396788286822416626);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T7, T2b, Tv, T1l, Te, T2o, Ty, T1k, Tt, T2d, TF, T1h, Tm, T2c, TC;
+	  E T1i, T1Z, T22, T2k, T2j, T1e, T1C, T19, T1B, T1S, T1V, T2h, T2g, TX, T1z;
+	  E TS, T1y;
+	  {
+	       E T1, T2, T3, T4, T5, T6;
+	       T1 = I[0];
+	       T2 = I[WS(is, 16)];
+	       T3 = T1 + T2;
+	       T4 = I[WS(is, 8)];
+	       T5 = I[WS(is, 24)];
+	       T6 = T4 + T5;
+	       T7 = T3 + T6;
+	       T2b = T3 - T6;
+	       Tv = T1 - T2;
+	       T1l = T4 - T5;
+	  }
+	  {
+	       E Ta, Tw, Td, Tx;
+	       {
+		    E T8, T9, Tb, Tc;
+		    T8 = I[WS(is, 4)];
+		    T9 = I[WS(is, 20)];
+		    Ta = T8 + T9;
+		    Tw = T8 - T9;
+		    Tb = I[WS(is, 28)];
+		    Tc = I[WS(is, 12)];
+		    Td = Tb + Tc;
+		    Tx = Tb - Tc;
+	       }
+	       Te = Ta + Td;
+	       T2o = Td - Ta;
+	       Ty = KP707106781 * (Tw + Tx);
+	       T1k = KP707106781 * (Tx - Tw);
+	  }
+	  {
+	       E Tp, TD, Ts, TE;
+	       {
+		    E Tn, To, Tq, Tr;
+		    Tn = I[WS(is, 30)];
+		    To = I[WS(is, 14)];
+		    Tp = Tn + To;
+		    TD = Tn - To;
+		    Tq = I[WS(is, 6)];
+		    Tr = I[WS(is, 22)];
+		    Ts = Tq + Tr;
+		    TE = Tq - Tr;
+	       }
+	       Tt = Tp + Ts;
+	       T2d = Tp - Ts;
+	       TF = FMA(KP923879532, TD, KP382683432 * TE);
+	       T1h = FNMS(KP923879532, TE, KP382683432 * TD);
+	  }
+	  {
+	       E Ti, TA, Tl, TB;
+	       {
+		    E Tg, Th, Tj, Tk;
+		    Tg = I[WS(is, 2)];
+		    Th = I[WS(is, 18)];
+		    Ti = Tg + Th;
+		    TA = Tg - Th;
+		    Tj = I[WS(is, 10)];
+		    Tk = I[WS(is, 26)];
+		    Tl = Tj + Tk;
+		    TB = Tj - Tk;
+	       }
+	       Tm = Ti + Tl;
+	       T2c = Ti - Tl;
+	       TC = FNMS(KP382683432, TB, KP923879532 * TA);
+	       T1i = FMA(KP382683432, TA, KP923879532 * TB);
+	  }
+	  {
+	       E T11, T1X, T1d, T1Y, T14, T20, T17, T21, T1a, T18;
+	       {
+		    E TZ, T10, T1b, T1c;
+		    TZ = I[WS(is, 31)];
+		    T10 = I[WS(is, 15)];
+		    T11 = TZ - T10;
+		    T1X = TZ + T10;
+		    T1b = I[WS(is, 7)];
+		    T1c = I[WS(is, 23)];
+		    T1d = T1b - T1c;
+		    T1Y = T1b + T1c;
+	       }
+	       {
+		    E T12, T13, T15, T16;
+		    T12 = I[WS(is, 3)];
+		    T13 = I[WS(is, 19)];
+		    T14 = T12 - T13;
+		    T20 = T12 + T13;
+		    T15 = I[WS(is, 27)];
+		    T16 = I[WS(is, 11)];
+		    T17 = T15 - T16;
+		    T21 = T15 + T16;
+	       }
+	       T1Z = T1X + T1Y;
+	       T22 = T20 + T21;
+	       T2k = T21 - T20;
+	       T2j = T1X - T1Y;
+	       T1a = KP707106781 * (T17 - T14);
+	       T1e = T1a - T1d;
+	       T1C = T1d + T1a;
+	       T18 = KP707106781 * (T14 + T17);
+	       T19 = T11 + T18;
+	       T1B = T11 - T18;
+	  }
+	  {
+	       E TK, T1Q, TW, T1R, TN, T1T, TQ, T1U, TT, TR;
+	       {
+		    E TI, TJ, TU, TV;
+		    TI = I[WS(is, 1)];
+		    TJ = I[WS(is, 17)];
+		    TK = TI - TJ;
+		    T1Q = TI + TJ;
+		    TU = I[WS(is, 9)];
+		    TV = I[WS(is, 25)];
+		    TW = TU - TV;
+		    T1R = TU + TV;
+	       }
+	       {
+		    E TL, TM, TO, TP;
+		    TL = I[WS(is, 5)];
+		    TM = I[WS(is, 21)];
+		    TN = TL - TM;
+		    T1T = TL + TM;
+		    TO = I[WS(is, 29)];
+		    TP = I[WS(is, 13)];
+		    TQ = TO - TP;
+		    T1U = TO + TP;
+	       }
+	       T1S = T1Q + T1R;
+	       T1V = T1T + T1U;
+	       T2h = T1U - T1T;
+	       T2g = T1Q - T1R;
+	       TT = KP707106781 * (TQ - TN);
+	       TX = TT - TW;
+	       T1z = TW + TT;
+	       TR = KP707106781 * (TN + TQ);
+	       TS = TK + TR;
+	       T1y = TK - TR;
+	  }
+	  {
+	       E Tf, Tu, T27, T28, T29, T2a;
+	       Tf = T7 + Te;
+	       Tu = Tm + Tt;
+	       T27 = Tf + Tu;
+	       T28 = T1S + T1V;
+	       T29 = T1Z + T22;
+	       T2a = T28 + T29;
+	       ro[WS(ros, 8)] = Tf - Tu;
+	       io[WS(ios, 8)] = T29 - T28;
+	       ro[WS(ros, 16)] = T27 - T2a;
+	       ro[0] = T27 + T2a;
+	  }
+	  {
+	       E T1P, T25, T24, T26, T1W, T23;
+	       T1P = T7 - Te;
+	       T25 = Tt - Tm;
+	       T1W = T1S - T1V;
+	       T23 = T1Z - T22;
+	       T24 = KP707106781 * (T1W + T23);
+	       T26 = KP707106781 * (T23 - T1W);
+	       ro[WS(ros, 12)] = T1P - T24;
+	       io[WS(ios, 12)] = T26 - T25;
+	       ro[WS(ros, 4)] = T1P + T24;
+	       io[WS(ios, 4)] = T25 + T26;
+	  }
+	  {
+	       E T2f, T2v, T2p, T2r, T2m, T2q, T2u, T2w, T2e, T2n;
+	       T2e = KP707106781 * (T2c + T2d);
+	       T2f = T2b + T2e;
+	       T2v = T2b - T2e;
+	       T2n = KP707106781 * (T2d - T2c);
+	       T2p = T2n - T2o;
+	       T2r = T2o + T2n;
+	       {
+		    E T2i, T2l, T2s, T2t;
+		    T2i = FMA(KP923879532, T2g, KP382683432 * T2h);
+		    T2l = FNMS(KP382683432, T2k, KP923879532 * T2j);
+		    T2m = T2i + T2l;
+		    T2q = T2l - T2i;
+		    T2s = FNMS(KP382683432, T2g, KP923879532 * T2h);
+		    T2t = FMA(KP382683432, T2j, KP923879532 * T2k);
+		    T2u = T2s + T2t;
+		    T2w = T2t - T2s;
+	       }
+	       ro[WS(ros, 14)] = T2f - T2m;
+	       io[WS(ios, 14)] = T2u - T2r;
+	       ro[WS(ros, 2)] = T2f + T2m;
+	       io[WS(ios, 2)] = T2r + T2u;
+	       io[WS(ios, 6)] = T2p + T2q;
+	       ro[WS(ros, 6)] = T2v + T2w;
+	       io[WS(ios, 10)] = T2q - T2p;
+	       ro[WS(ros, 10)] = T2v - T2w;
+	  }
+	  {
+	       E TH, T1t, T1s, T1u, T1g, T1o, T1n, T1p;
+	       {
+		    E Tz, TG, T1q, T1r;
+		    Tz = Tv + Ty;
+		    TG = TC + TF;
+		    TH = Tz + TG;
+		    T1t = Tz - TG;
+		    T1q = FNMS(KP195090322, TS, KP980785280 * TX);
+		    T1r = FMA(KP195090322, T19, KP980785280 * T1e);
+		    T1s = T1q + T1r;
+		    T1u = T1r - T1q;
+	       }
+	       {
+		    E TY, T1f, T1j, T1m;
+		    TY = FMA(KP980785280, TS, KP195090322 * TX);
+		    T1f = FNMS(KP195090322, T1e, KP980785280 * T19);
+		    T1g = TY + T1f;
+		    T1o = T1f - TY;
+		    T1j = T1h - T1i;
+		    T1m = T1k - T1l;
+		    T1n = T1j - T1m;
+		    T1p = T1m + T1j;
+	       }
+	       ro[WS(ros, 15)] = TH - T1g;
+	       io[WS(ios, 15)] = T1s - T1p;
+	       ro[WS(ros, 1)] = TH + T1g;
+	       io[WS(ios, 1)] = T1p + T1s;
+	       io[WS(ios, 7)] = T1n + T1o;
+	       ro[WS(ros, 7)] = T1t + T1u;
+	       io[WS(ios, 9)] = T1o - T1n;
+	       ro[WS(ros, 9)] = T1t - T1u;
+	  }
+	  {
+	       E T1x, T1N, T1M, T1O, T1E, T1I, T1H, T1J;
+	       {
+		    E T1v, T1w, T1K, T1L;
+		    T1v = Tv - Ty;
+		    T1w = T1i + T1h;
+		    T1x = T1v + T1w;
+		    T1N = T1v - T1w;
+		    T1K = FNMS(KP555570233, T1y, KP831469612 * T1z);
+		    T1L = FMA(KP555570233, T1B, KP831469612 * T1C);
+		    T1M = T1K + T1L;
+		    T1O = T1L - T1K;
+	       }
+	       {
+		    E T1A, T1D, T1F, T1G;
+		    T1A = FMA(KP831469612, T1y, KP555570233 * T1z);
+		    T1D = FNMS(KP555570233, T1C, KP831469612 * T1B);
+		    T1E = T1A + T1D;
+		    T1I = T1D - T1A;
+		    T1F = TF - TC;
+		    T1G = T1l + T1k;
+		    T1H = T1F - T1G;
+		    T1J = T1G + T1F;
+	       }
+	       ro[WS(ros, 13)] = T1x - T1E;
+	       io[WS(ios, 13)] = T1M - T1J;
+	       ro[WS(ros, 3)] = T1x + T1E;
+	       io[WS(ios, 3)] = T1J + T1M;
+	       io[WS(ios, 5)] = T1H + T1I;
+	       ro[WS(ros, 5)] = T1N + T1O;
+	       io[WS(ios, 11)] = T1I - T1H;
+	       ro[WS(ros, 11)] = T1N - T1O;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 32, "r2hc_32", {140, 26, 16, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_32) (planner *p) {
+     X(kr2hc_register) (p, r2hc_32, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_4.c b/src/fftw3/rdft/codelets/r2hc/r2hc_4.c
new file mode 100644
index 0000000..0ef1567
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_4.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:40 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 4 -name r2hc_4 -include r2hc.h */
+
+/*
+ * This function contains 6 FP additions, 0 FP multiplications,
+ * (or, 6 additions, 0 multiplications, 0 fused multiply/add),
+ * 7 stack variables, and 8 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_4.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_4(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T2, T3, T4, T5, T6;
+	  T1 = I[0];
+	  T2 = I[WS(is, 2)];
+	  T3 = T1 + T2;
+	  T4 = I[WS(is, 1)];
+	  T5 = I[WS(is, 3)];
+	  T6 = T4 + T5;
+	  ro[WS(ros, 1)] = T1 - T2;
+	  io[WS(ios, 1)] = T5 - T4;
+	  ro[WS(ros, 2)] = T3 - T6;
+	  ro[0] = T3 + T6;
+     }
+}
+
+static const kr2hc_desc desc = { 4, "r2hc_4", {6, 0, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_4) (planner *p) {
+     X(kr2hc_register) (p, r2hc_4, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_5.c b/src/fftw3/rdft/codelets/r2hc/r2hc_5.c
new file mode 100644
index 0000000..14f4849
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_5.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:40 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 5 -name r2hc_5 -include r2hc.h */
+
+/*
+ * This function contains 12 FP additions, 6 FP multiplications,
+ * (or, 9 additions, 3 multiplications, 3 fused multiply/add),
+ * 17 stack variables, and 10 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_5.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_5(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP250000000, +0.250000000000000000000000000000000000000000000);
+     DK(KP559016994, +0.559016994374947424102293417182819058860154590);
+     DK(KP587785252, +0.587785252292473129168705954639072768597652438);
+     DK(KP951056516, +0.951056516295153572116439333379382143405698634);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E Ta, T7, T8, T3, Tb, T6, T9, Tc;
+	  Ta = I[0];
+	  {
+	       E T1, T2, T4, T5;
+	       T1 = I[WS(is, 4)];
+	       T2 = I[WS(is, 1)];
+	       T7 = T2 + T1;
+	       T4 = I[WS(is, 2)];
+	       T5 = I[WS(is, 3)];
+	       T8 = T4 + T5;
+	       T3 = T1 - T2;
+	       Tb = T7 + T8;
+	       T6 = T4 - T5;
+	  }
+	  io[WS(ios, 1)] = FNMS(KP587785252, T6, KP951056516 * T3);
+	  ro[0] = Ta + Tb;
+	  io[WS(ios, 2)] = FMA(KP587785252, T3, KP951056516 * T6);
+	  T9 = KP559016994 * (T7 - T8);
+	  Tc = FNMS(KP250000000, Tb, Ta);
+	  ro[WS(ros, 1)] = T9 + Tc;
+	  ro[WS(ros, 2)] = Tc - T9;
+     }
+}
+
+static const kr2hc_desc desc = { 5, "r2hc_5", {9, 3, 3, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_5) (planner *p) {
+     X(kr2hc_register) (p, r2hc_5, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_6.c b/src/fftw3/rdft/codelets/r2hc/r2hc_6.c
new file mode 100644
index 0000000..5fadce3
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_6.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:40 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 6 -name r2hc_6 -include r2hc.h */
+
+/*
+ * This function contains 14 FP additions, 4 FP multiplications,
+ * (or, 12 additions, 2 multiplications, 2 fused multiply/add),
+ * 17 stack variables, and 12 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_6.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_6(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, Td, T9, Tc, T6, Tb, T1, T2, Ta, Te;
+	  T1 = I[0];
+	  T2 = I[WS(is, 3)];
+	  T3 = T1 - T2;
+	  Td = T1 + T2;
+	  {
+	       E T7, T8, T4, T5;
+	       T7 = I[WS(is, 4)];
+	       T8 = I[WS(is, 1)];
+	       T9 = T7 - T8;
+	       Tc = T7 + T8;
+	       T4 = I[WS(is, 2)];
+	       T5 = I[WS(is, 5)];
+	       T6 = T4 - T5;
+	       Tb = T4 + T5;
+	  }
+	  io[WS(ios, 1)] = KP866025403 * (T9 - T6);
+	  Ta = T6 + T9;
+	  ro[WS(ros, 1)] = FNMS(KP500000000, Ta, T3);
+	  ro[WS(ros, 3)] = T3 + Ta;
+	  io[WS(ios, 2)] = KP866025403 * (Tb - Tc);
+	  Te = Tb + Tc;
+	  ro[WS(ros, 2)] = FNMS(KP500000000, Te, Td);
+	  ro[0] = Td + Te;
+     }
+}
+
+static const kr2hc_desc desc = { 6, "r2hc_6", {12, 2, 2, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_6) (planner *p) {
+     X(kr2hc_register) (p, r2hc_6, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_7.c b/src/fftw3/rdft/codelets/r2hc/r2hc_7.c
new file mode 100644
index 0000000..8548e93
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_7.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 7 -name r2hc_7 -include r2hc.h */
+
+/*
+ * This function contains 24 FP additions, 18 FP multiplications,
+ * (or, 12 additions, 6 multiplications, 12 fused multiply/add),
+ * 20 stack variables, and 14 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_7.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_7(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP222520933, +0.222520933956314404288902564496794759466355569);
+     DK(KP900968867, +0.900968867902419126236102319507445051165919162);
+     DK(KP623489801, +0.623489801858733530525004884004239810632274731);
+     DK(KP433883739, +0.433883739117558120475768332848358754609990728);
+     DK(KP781831482, +0.781831482468029808708444526674057750232334519);
+     DK(KP974927912, +0.974927912181823607018131682993931217232785801);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, Ta, Tb, T4, Td, T7, Tc, T8, T9;
+	  T1 = I[0];
+	  T8 = I[WS(is, 1)];
+	  T9 = I[WS(is, 6)];
+	  Ta = T8 + T9;
+	  Tb = T9 - T8;
+	  {
+	       E T2, T3, T5, T6;
+	       T2 = I[WS(is, 2)];
+	       T3 = I[WS(is, 5)];
+	       T4 = T2 + T3;
+	       Td = T3 - T2;
+	       T5 = I[WS(is, 3)];
+	       T6 = I[WS(is, 4)];
+	       T7 = T5 + T6;
+	       Tc = T6 - T5;
+	  }
+	  io[WS(ios, 2)] = FNMS(KP781831482, Tc, KP974927912 * Tb) - (KP433883739 * Td);
+	  io[WS(ios, 1)] = FMA(KP781831482, Tb, KP974927912 * Td) + (KP433883739 * Tc);
+	  ro[WS(ros, 2)] = FMA(KP623489801, T7, T1) + FNMA(KP900968867, T4, KP222520933 * Ta);
+	  io[WS(ios, 3)] = FMA(KP433883739, Tb, KP974927912 * Tc) - (KP781831482 * Td);
+	  ro[WS(ros, 3)] = FMA(KP623489801, T4, T1) + FNMA(KP222520933, T7, KP900968867 * Ta);
+	  ro[WS(ros, 1)] = FMA(KP623489801, Ta, T1) + FNMA(KP900968867, T7, KP222520933 * T4);
+	  ro[0] = T1 + Ta + T4 + T7;
+     }
+}
+
+static const kr2hc_desc desc = { 7, "r2hc_7", {12, 6, 12, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_7) (planner *p) {
+     X(kr2hc_register) (p, r2hc_7, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_8.c b/src/fftw3/rdft/codelets/r2hc/r2hc_8.c
new file mode 100644
index 0000000..b502b94
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_8.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:41 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 8 -name r2hc_8 -include r2hc.h */
+
+/*
+ * This function contains 20 FP additions, 2 FP multiplications,
+ * (or, 20 additions, 2 multiplications, 0 fused multiply/add),
+ * 14 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_8.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_8(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T3, T7, Td, Tj, T6, Tg, Ta, Ti;
+	  {
+	       E T1, T2, Tb, Tc;
+	       T1 = I[0];
+	       T2 = I[WS(is, 4)];
+	       T3 = T1 + T2;
+	       T7 = T1 - T2;
+	       Tb = I[WS(is, 7)];
+	       Tc = I[WS(is, 3)];
+	       Td = Tb - Tc;
+	       Tj = Tb + Tc;
+	  }
+	  {
+	       E T4, T5, T8, T9;
+	       T4 = I[WS(is, 2)];
+	       T5 = I[WS(is, 6)];
+	       T6 = T4 + T5;
+	       Tg = T4 - T5;
+	       T8 = I[WS(is, 1)];
+	       T9 = I[WS(is, 5)];
+	       Ta = T8 - T9;
+	       Ti = T8 + T9;
+	  }
+	  ro[WS(ros, 2)] = T3 - T6;
+	  io[WS(ios, 2)] = Tj - Ti;
+	  {
+	       E Te, Tf, Th, Tk;
+	       Te = KP707106781 * (Ta + Td);
+	       ro[WS(ros, 3)] = T7 - Te;
+	       ro[WS(ros, 1)] = T7 + Te;
+	       Tf = KP707106781 * (Td - Ta);
+	       io[WS(ios, 1)] = Tf - Tg;
+	       io[WS(ios, 3)] = Tg + Tf;
+	       Th = T3 + T6;
+	       Tk = Ti + Tj;
+	       ro[WS(ros, 4)] = Th - Tk;
+	       ro[0] = Th + Tk;
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 8, "r2hc_8", {20, 2, 0, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_8) (planner *p) {
+     X(kr2hc_register) (p, r2hc_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/r2hc_9.c b/src/fftw3/rdft/codelets/r2hc/r2hc_9.c
new file mode 100644
index 0000000..df3808f
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/r2hc_9.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 21:56:40 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2hc -compact -variables 4 -n 9 -name r2hc_9 -include r2hc.h */
+
+/*
+ * This function contains 38 FP additions, 26 FP multiplications,
+ * (or, 21 additions, 9 multiplications, 17 fused multiply/add),
+ * 36 stack variables, and 18 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: r2hc_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ * $Id: r2hc_9.c,v 1.1 2008/10/17 06:12:34 scuri Exp $
+ */
+
+#include "r2hc.h"
+
+static void r2hc_9(const R *I, R *ro, R *io, stride is, stride ros, stride ios, int v, int ivs, int ovs)
+{
+     DK(KP939692620, +0.939692620785908384054109277324731469936208134);
+     DK(KP296198132, +0.296198132726023843175338011893050938967728390);
+     DK(KP342020143, +0.342020143325668733044099614682259580763083368);
+     DK(KP813797681, +0.813797681349373692844693217248393223289101568);
+     DK(KP984807753, +0.984807753012208059366743024589523013670643252);
+     DK(KP150383733, +0.150383733180435296639271897612501926072238258);
+     DK(KP642787609, +0.642787609686539326322643409907263432907559884);
+     DK(KP663413948, +0.663413948168938396205421319635891297216863310);
+     DK(KP852868531, +0.852868531952443209628250963940074071936020296);
+     DK(KP173648177, +0.173648177666930348851716626769314796000375677);
+     DK(KP556670399, +0.556670399226419366452912952047023132968291906);
+     DK(KP766044443, +0.766044443118978035202392650555416673935832457);
+     DK(KP866025403, +0.866025403784438646763723170752936183471402627);
+     DK(KP500000000, +0.500000000000000000000000000000000000000000000);
+     int i;
+     for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs) {
+	  E T1, T4, Tr, Ta, Tl, Ti, Tf, Tk, Tj, T2, T3, T5, Tg;
+	  T1 = I[0];
+	  T2 = I[WS(is, 3)];
+	  T3 = I[WS(is, 6)];
+	  T4 = T2 + T3;
+	  Tr = T3 - T2;
+	  {
+	       E T6, T7, T8, T9;
+	       T6 = I[WS(is, 1)];
+	       T7 = I[WS(is, 4)];
+	       T8 = I[WS(is, 7)];
+	       T9 = T7 + T8;
+	       Ta = T6 + T9;
+	       Tl = T8 - T7;
+	       Ti = FNMS(KP500000000, T9, T6);
+	  }
+	  {
+	       E Tb, Tc, Td, Te;
+	       Tb = I[WS(is, 2)];
+	       Tc = I[WS(is, 5)];
+	       Td = I[WS(is, 8)];
+	       Te = Tc + Td;
+	       Tf = Tb + Te;
+	       Tk = FNMS(KP500000000, Te, Tb);
+	       Tj = Td - Tc;
+	  }
+	  io[WS(ios, 3)] = KP866025403 * (Tf - Ta);
+	  T5 = T1 + T4;
+	  Tg = Ta + Tf;
+	  ro[WS(ros, 3)] = FNMS(KP500000000, Tg, T5);
+	  ro[0] = T5 + Tg;
+	  {
+	       E Tt, Th, Tm, Tn, To, Tp, Tq, Ts;
+	       Tt = KP866025403 * Tr;
+	       Th = FNMS(KP500000000, T4, T1);
+	       Tm = FMA(KP766044443, Ti, KP556670399 * Tl);
+	       Tn = FMA(KP173648177, Tk, KP852868531 * Tj);
+	       To = Tm + Tn;
+	       Tp = FNMS(KP642787609, Ti, KP663413948 * Tl);
+	       Tq = FNMS(KP984807753, Tk, KP150383733 * Tj);
+	       Ts = Tp + Tq;
+	       ro[WS(ros, 1)] = Th + To;
+	       io[WS(ios, 1)] = Tt + Ts;
+	       ro[WS(ros, 4)] = FMA(KP866025403, Tp - Tq, Th) - (KP500000000 * To);
+	       io[WS(ios, 4)] = FNMS(KP500000000, Ts, KP866025403 * (Tr + (Tn - Tm)));
+	       io[WS(ios, 2)] = FNMS(KP342020143, Tk, KP813797681 * Tj) + FNMA(KP150383733, Tl, KP984807753 * Ti) - Tt;
+	       ro[WS(ros, 2)] = FMA(KP173648177, Ti, Th) + FNMA(KP296198132, Tj, KP939692620 * Tk) - (KP852868531 * Tl);
+	  }
+     }
+}
+
+static const kr2hc_desc desc = { 9, "r2hc_9", {21, 9, 17, 0}, &GENUS, 0, 0, 0, 0, 0 };
+
+void X(codelet_r2hc_9) (planner *p) {
+     X(kr2hc_register) (p, r2hc_9, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2hc/rhcodlist.c b/src/fftw3/rdft/codelets/r2hc/rhcodlist.c
new file mode 100644
index 0000000..a4ac1bc
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hc/rhcodlist.c
@@ -0,0 +1,114 @@
+#include "ifftw.h"
+
+extern void X(codelet_r2hc_2)(planner *);
+extern void X(codelet_r2hc_3)(planner *);
+extern void X(codelet_r2hc_4)(planner *);
+extern void X(codelet_r2hc_5)(planner *);
+extern void X(codelet_r2hc_6)(planner *);
+extern void X(codelet_r2hc_7)(planner *);
+extern void X(codelet_r2hc_8)(planner *);
+extern void X(codelet_r2hc_9)(planner *);
+extern void X(codelet_r2hc_10)(planner *);
+extern void X(codelet_r2hc_11)(planner *);
+extern void X(codelet_r2hc_12)(planner *);
+extern void X(codelet_r2hc_13)(planner *);
+extern void X(codelet_r2hc_14)(planner *);
+extern void X(codelet_r2hc_15)(planner *);
+extern void X(codelet_r2hc_16)(planner *);
+extern void X(codelet_r2hc_32)(planner *);
+extern void X(codelet_mr2hc_32)(planner *);
+extern void X(codelet_mr2hc_64)(planner *);
+extern void X(codelet_mr2hc_128)(planner *);
+extern void X(codelet_hf_2)(planner *);
+extern void X(codelet_hf_3)(planner *);
+extern void X(codelet_hf_4)(planner *);
+extern void X(codelet_hf_5)(planner *);
+extern void X(codelet_hf_6)(planner *);
+extern void X(codelet_hf_7)(planner *);
+extern void X(codelet_hf_8)(planner *);
+extern void X(codelet_hf_9)(planner *);
+extern void X(codelet_hf_10)(planner *);
+extern void X(codelet_hf_12)(planner *);
+extern void X(codelet_hf_15)(planner *);
+extern void X(codelet_hf_16)(planner *);
+extern void X(codelet_hf_32)(planner *);
+extern void X(codelet_hf_64)(planner *);
+extern void X(codelet_hf2_4)(planner *);
+extern void X(codelet_hf2_8)(planner *);
+extern void X(codelet_hf2_16)(planner *);
+extern void X(codelet_hf2_32)(planner *);
+extern void X(codelet_hf2_64)(planner *);
+extern void X(codelet_r2hcII_2)(planner *);
+extern void X(codelet_r2hcII_3)(planner *);
+extern void X(codelet_r2hcII_4)(planner *);
+extern void X(codelet_r2hcII_5)(planner *);
+extern void X(codelet_r2hcII_6)(planner *);
+extern void X(codelet_r2hcII_7)(planner *);
+extern void X(codelet_r2hcII_8)(planner *);
+extern void X(codelet_r2hcII_9)(planner *);
+extern void X(codelet_r2hcII_10)(planner *);
+extern void X(codelet_r2hcII_12)(planner *);
+extern void X(codelet_r2hcII_15)(planner *);
+extern void X(codelet_r2hcII_16)(planner *);
+extern void X(codelet_r2hcII_32)(planner *);
+extern void X(codelet_mr2hcII_32)(planner *);
+extern void X(codelet_mr2hcII_64)(planner *);
+
+
+extern const solvtab X(solvtab_rdft_r2hc);
+const solvtab X(solvtab_rdft_r2hc) = {
+   SOLVTAB(X(codelet_r2hc_2)),
+   SOLVTAB(X(codelet_r2hc_3)),
+   SOLVTAB(X(codelet_r2hc_4)),
+   SOLVTAB(X(codelet_r2hc_5)),
+   SOLVTAB(X(codelet_r2hc_6)),
+   SOLVTAB(X(codelet_r2hc_7)),
+   SOLVTAB(X(codelet_r2hc_8)),
+   SOLVTAB(X(codelet_r2hc_9)),
+   SOLVTAB(X(codelet_r2hc_10)),
+   SOLVTAB(X(codelet_r2hc_11)),
+   SOLVTAB(X(codelet_r2hc_12)),
+   SOLVTAB(X(codelet_r2hc_13)),
+   SOLVTAB(X(codelet_r2hc_14)),
+   SOLVTAB(X(codelet_r2hc_15)),
+   SOLVTAB(X(codelet_r2hc_16)),
+   SOLVTAB(X(codelet_r2hc_32)),
+   SOLVTAB(X(codelet_mr2hc_32)),
+   SOLVTAB(X(codelet_mr2hc_64)),
+   SOLVTAB(X(codelet_mr2hc_128)),
+   SOLVTAB(X(codelet_hf_2)),
+   SOLVTAB(X(codelet_hf_3)),
+   SOLVTAB(X(codelet_hf_4)),
+   SOLVTAB(X(codelet_hf_5)),
+   SOLVTAB(X(codelet_hf_6)),
+   SOLVTAB(X(codelet_hf_7)),
+   SOLVTAB(X(codelet_hf_8)),
+   SOLVTAB(X(codelet_hf_9)),
+   SOLVTAB(X(codelet_hf_10)),
+   SOLVTAB(X(codelet_hf_12)),
+   SOLVTAB(X(codelet_hf_15)),
+   SOLVTAB(X(codelet_hf_16)),
+   SOLVTAB(X(codelet_hf_32)),
+   SOLVTAB(X(codelet_hf_64)),
+   SOLVTAB(X(codelet_hf2_4)),
+   SOLVTAB(X(codelet_hf2_8)),
+   SOLVTAB(X(codelet_hf2_16)),
+   SOLVTAB(X(codelet_hf2_32)),
+   SOLVTAB(X(codelet_hf2_64)),
+   SOLVTAB(X(codelet_r2hcII_2)),
+   SOLVTAB(X(codelet_r2hcII_3)),
+   SOLVTAB(X(codelet_r2hcII_4)),
+   SOLVTAB(X(codelet_r2hcII_5)),
+   SOLVTAB(X(codelet_r2hcII_6)),
+   SOLVTAB(X(codelet_r2hcII_7)),
+   SOLVTAB(X(codelet_r2hcII_8)),
+   SOLVTAB(X(codelet_r2hcII_9)),
+   SOLVTAB(X(codelet_r2hcII_10)),
+   SOLVTAB(X(codelet_r2hcII_12)),
+   SOLVTAB(X(codelet_r2hcII_15)),
+   SOLVTAB(X(codelet_r2hcII_16)),
+   SOLVTAB(X(codelet_r2hcII_32)),
+   SOLVTAB(X(codelet_mr2hcII_32)),
+   SOLVTAB(X(codelet_mr2hcII_64)),
+   SOLVTAB_END
+};
diff --git a/src/fftw3/rdft/codelets/r2hcII.h b/src/fftw3/rdft/codelets/r2hcII.h
new file mode 100644
index 0000000..cb90935
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2hcII.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_r2hcII_genus)
+extern const kr2hcII_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/r2r.c b/src/fftw3/rdft/codelets/r2r.c
new file mode 100644
index 0000000..18d9528
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2r.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "codelet-rdft.h"
+#include "r2r.h"
+
+static int okp(const kr2r_desc *d,
+	       const R *I,
+	       const R *O,
+	       int is, int os, int vl, int ivs, int ovs)
+{
+     UNUSED(I); UNUSED(O); UNUSED(vl);
+     return (1
+	     && (!d->is || (d->is == is))
+	     && (!d->os || (d->os == os))
+	     && (!d->ivs || (d->ivs == ivs))
+	     && (!d->ovs || (d->ovs == ovs))
+	  );
+}
+
+const kr2r_genus GENUS = { okp, 1 };
diff --git a/src/fftw3/rdft/codelets/r2r.h b/src/fftw3/rdft/codelets/r2r.h
new file mode 100644
index 0000000..614427c
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2r.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+
+#define GENUS X(rdft_r2r_genus)
+extern const kr2r_genus GENUS;
diff --git a/src/fftw3/rdft/codelets/r2r/e01_8.c b/src/fftw3/rdft/codelets/r2r/e01_8.c
new file mode 100644
index 0000000..b48371c
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2r/e01_8.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:22:02 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2r -compact -variables 4 -redft01 -n 8 -name e01_8 -include r2r.h */
+
+/*
+ * This function contains 26 FP additions, 15 FP multiplications,
+ * (or, 20 additions, 9 multiplications, 6 fused multiply/add),
+ * 27 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: e01_8.c,v 1.1 2008/10/17 06:13:17 scuri Exp $
+ * $Id: e01_8.c,v 1.1 2008/10/17 06:13:17 scuri Exp $
+ * $Id: e01_8.c,v 1.1 2008/10/17 06:13:17 scuri Exp $
+ */
+
+#include "r2r.h"
+
+static void e01_8_0(const R *I, R *O, stride istride, stride ostride)
+{
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     {
+	  E T7, Tl, T4, Tk, Td, To, Tg, Tn;
+	  {
+	       E T5, T6, T1, T3, T2;
+	       T5 = I[WS(istride, 2)];
+	       T6 = I[WS(istride, 6)];
+	       T7 = FMA(KP1_847759065, T5, KP765366864 * T6);
+	       Tl = FNMS(KP1_847759065, T6, KP765366864 * T5);
+	       T1 = I[0];
+	       T2 = I[WS(istride, 4)];
+	       T3 = KP1_414213562 * T2;
+	       T4 = T1 + T3;
+	       Tk = T1 - T3;
+	       {
+		    E T9, Tf, Tc, Te, Ta, Tb;
+		    T9 = I[WS(istride, 1)];
+		    Tf = I[WS(istride, 7)];
+		    Ta = I[WS(istride, 5)];
+		    Tb = I[WS(istride, 3)];
+		    Tc = KP707106781 * (Ta + Tb);
+		    Te = KP707106781 * (Ta - Tb);
+		    Td = T9 + Tc;
+		    To = Te + Tf;
+		    Tg = Te - Tf;
+		    Tn = T9 - Tc;
+	       }
+	  }
+	  {
+	       E T8, Th, Tq, Tr;
+	       T8 = T4 + T7;
+	       Th = FNMS(KP390180644, Tg, KP1_961570560 * Td);
+	       O[WS(ostride, 7)] = T8 - Th;
+	       O[0] = T8 + Th;
+	       Tq = Tk - Tl;
+	       Tr = FMA(KP1_111140466, Tn, KP1_662939224 * To);
+	       O[WS(ostride, 5)] = Tq - Tr;
+	       O[WS(ostride, 2)] = Tq + Tr;
+	  }
+	  {
+	       E Ti, Tj, Tm, Tp;
+	       Ti = T4 - T7;
+	       Tj = FMA(KP390180644, Td, KP1_961570560 * Tg);
+	       O[WS(ostride, 4)] = Ti - Tj;
+	       O[WS(ostride, 3)] = Ti + Tj;
+	       Tm = Tk + Tl;
+	       Tp = FNMS(KP1_111140466, To, KP1_662939224 * Tn);
+	       O[WS(ostride, 6)] = Tm - Tp;
+	       O[WS(ostride, 1)] = Tm + Tp;
+	  }
+     }
+}
+
+static void e01_8(const R *I, R *O, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  e01_8_0(I, O, is, os);
+	  I += ivs;
+	  O += ovs;
+     }
+}
+
+static const kr2r_desc desc = { 8, "e01_8", {20, 9, 6, 0}, &GENUS, REDFT01, 0, 0, 0, 0 };
+
+void X(codelet_e01_8) (planner *p) {
+     X(kr2r_register) (p, e01_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2r/e10_8.c b/src/fftw3/rdft/codelets/r2r/e10_8.c
new file mode 100644
index 0000000..6d7be49
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2r/e10_8.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* This file was automatically generated --- DO NOT EDIT */
+/* Generated on Sat Jul  5 22:22:02 EDT 2003 */
+
+#include "codelet-rdft.h"
+
+/* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft/gen_r2r -compact -variables 4 -redft10 -n 8 -name e10_8 -include r2r.h */
+
+/*
+ * This function contains 26 FP additions, 16 FP multiplications,
+ * (or, 20 additions, 10 multiplications, 6 fused multiply/add),
+ * 27 stack variables, and 16 memory accesses
+ */
+/*
+ * Generator Id's : 
+ * $Id: e10_8.c,v 1.1 2008/10/17 06:13:17 scuri Exp $
+ * $Id: e10_8.c,v 1.1 2008/10/17 06:13:17 scuri Exp $
+ * $Id: e10_8.c,v 1.1 2008/10/17 06:13:17 scuri Exp $
+ */
+
+#include "r2r.h"
+
+static void e10_8_0(const R *I, R *O, stride istride, stride ostride)
+{
+     DK(KP765366864, +0.765366864730179543456919968060797733522689125);
+     DK(KP1_847759065, +1.847759065022573512256366378793576573644833252);
+     DK(KP390180644, +0.390180644032256535696569736954044481855383236);
+     DK(KP1_961570560, +1.961570560806460898252364472268478073947867462);
+     DK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
+     DK(KP1_414213562, +1.414213562373095048801688724209698078569671875);
+     DK(KP1_111140466, +1.111140466039204449485661627897065748749874382);
+     DK(KP1_662939224, +1.662939224605090474157576755235811513477121624);
+     DK(KP707106781, +0.707106781186547524400844362104849039284835938);
+     {
+	  E T3, Tj, Tf, Tk, Ta, Tn, Tc, Tm;
+	  {
+	       E T1, T2, Td, Te;
+	       T1 = I[0];
+	       T2 = I[WS(istride, 7)];
+	       T3 = T1 - T2;
+	       Tj = T1 + T2;
+	       Td = I[WS(istride, 4)];
+	       Te = I[WS(istride, 3)];
+	       Tf = Td - Te;
+	       Tk = Td + Te;
+	       {
+		    E T4, T5, T6, T7, T8, T9;
+		    T4 = I[WS(istride, 2)];
+		    T5 = I[WS(istride, 5)];
+		    T6 = T4 - T5;
+		    T7 = I[WS(istride, 1)];
+		    T8 = I[WS(istride, 6)];
+		    T9 = T7 - T8;
+		    Ta = KP707106781 * (T6 + T9);
+		    Tn = T7 + T8;
+		    Tc = KP707106781 * (T6 - T9);
+		    Tm = T4 + T5;
+	       }
+	  }
+	  {
+	       E Tb, Tg, Tp, Tq;
+	       Tb = T3 - Ta;
+	       Tg = Tc - Tf;
+	       O[WS(ostride, 3)] = FNMS(KP1_111140466, Tg, KP1_662939224 * Tb);
+	       O[WS(ostride, 5)] = FMA(KP1_662939224, Tg, KP1_111140466 * Tb);
+	       Tp = Tj + Tk;
+	       Tq = Tm + Tn;
+	       O[WS(ostride, 4)] = KP1_414213562 * (Tp - Tq);
+	       O[0] = KP2_000000000 * (Tp + Tq);
+	  }
+	  {
+	       E Th, Ti, Tl, To;
+	       Th = T3 + Ta;
+	       Ti = Tf + Tc;
+	       O[WS(ostride, 1)] = FNMS(KP390180644, Ti, KP1_961570560 * Th);
+	       O[WS(ostride, 7)] = FMA(KP1_961570560, Ti, KP390180644 * Th);
+	       Tl = Tj - Tk;
+	       To = Tm - Tn;
+	       O[WS(ostride, 2)] = FNMS(KP765366864, To, KP1_847759065 * Tl);
+	       O[WS(ostride, 6)] = FMA(KP765366864, Tl, KP1_847759065 * To);
+	  }
+     }
+}
+
+static void e10_8(const R *I, R *O, stride is, stride os, int v, int ivs, int ovs)
+{
+     int i;
+     for (i = v; i > 0; --i) {
+	  e10_8_0(I, O, is, os);
+	  I += ivs;
+	  O += ovs;
+     }
+}
+
+static const kr2r_desc desc = { 8, "e10_8", {20, 10, 6, 0}, &GENUS, REDFT10, 0, 0, 0, 0 };
+
+void X(codelet_e10_8) (planner *p) {
+     X(kr2r_register) (p, e10_8, &desc);
+}
diff --git a/src/fftw3/rdft/codelets/r2r/rrcodlist.c b/src/fftw3/rdft/codelets/r2r/rrcodlist.c
new file mode 100644
index 0000000..28c1ebc
--- /dev/null
+++ b/src/fftw3/rdft/codelets/r2r/rrcodlist.c
@@ -0,0 +1,12 @@
+#include "ifftw.h"
+
+extern void X(codelet_e01_8)(planner *);
+extern void X(codelet_e10_8)(planner *);
+
+
+extern const solvtab X(solvtab_rdft_r2r);
+const solvtab X(solvtab_rdft_r2r) = {
+   SOLVTAB(X(codelet_e01_8)),
+   SOLVTAB(X(codelet_e10_8)),
+   SOLVTAB_END
+};
diff --git a/src/fftw3/rdft/dft-r2hc.c b/src/fftw3/rdft/dft-r2hc.c
new file mode 100644
index 0000000..10abf68
--- /dev/null
+++ b/src/fftw3/rdft/dft-r2hc.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: dft-r2hc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* Compute the complex DFT by combining R2HC RDFTs on the real
+   and imaginary parts.   This could be useful for people just wanting
+   to link to the real codelets and not the complex ones.  It could
+   also even be faster than the complex algorithms for split (as opposed
+   to interleaved) real/imag complex data. */
+
+#include "rdft.h"
+#include "dft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_dft super;
+     plan *cld;
+     int os;
+     int n;
+} P;
+
+static void apply(const plan *ego_, R *ri, R *ii, R *ro, R *io)
+{
+     const P *ego = (const P *) ego_;
+     int os;
+     int i, n;
+
+     UNUSED(ii);
+
+     { /* transform vector of real & imag parts: */
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, ri, ro);
+     }
+
+     os = ego->os;
+     n = ego->n;
+     for (i = 1; i < (n + 1)/2; ++i) {
+	  R rop, iop, iom, rom;
+	  rop = ro[os * i];
+	  iop = io[os * i];
+	  rom = ro[os * (n - i)];
+	  iom = io[os * (n - i)];
+	  ro[os * i] = rop - iom;
+	  io[os * i] = iop + rom;
+	  ro[os * (n - i)] = rop + iom;
+	  io[os * (n - i)] = iop - rom;
+     }
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(dft-r2hc-%d%(%p%))", ego->n, ego->cld);
+}
+
+#define ALLOW_RANK0 0 /* disable for now, subject to testing */
+
+static int applicable0(const problem *p_)
+{
+     if (DFTP(p_)) {
+          const problem_dft *p = (const problem_dft *) p_;
+          return ((p->sz->rnk == 1 && p->vecsz->rnk == 0)
+#if ALLOW_RANK0
+		  || p->sz->rnk == 0
+#endif
+	       );
+     }
+
+     return 0;
+}
+
+static int split(R *r, R *i, int n, int s)
+{
+     return ((r > i ? r - i : i - r) >= ((int)n) * (s > 0 ? s : -s));
+}
+
+static int applicable(const problem *p_, const planner *plnr)
+{
+     if (!applicable0(p_)) return 0;
+
+     {
+	  const problem_dft *p = (const problem_dft *) p_;
+	  if (NO_UGLYP(plnr) && DFT_R2HC_ICKYP(plnr)) return 0;
+
+	  if (p->sz->rnk == 1 &&
+	      split(p->ri, p->ii, p->sz->dims[0].n, p->sz->dims[0].is) &&
+	      split(p->ro, p->io, p->sz->dims[0].n, p->sz->dims[0].os))
+	       return 1;
+
+	  return !(NO_UGLYP(plnr));
+     }
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_dft *p;
+     plan *cld;
+
+     static const plan_adt padt = {
+	  X(dft_solve), awake, print, destroy
+     };
+
+     UNUSED(ego_);
+     if (!applicable(p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_dft *) p_;
+
+     {
+	  tensor *ri_vec = X(mktensor_1d)(2, p->ii - p->ri, p->io - p->ro);
+	  tensor *cld_vec = X(tensor_append)(ri_vec, p->vecsz);
+	  cld = X(mkplan_d)(plnr, 
+			    X(mkproblem_rdft_1)(p->sz, cld_vec, 
+						p->ri, p->ro, R2HC));
+	  X(tensor_destroy2)(ri_vec, cld_vec);
+     }
+     if (!cld) return (plan *)0;
+
+     pln = MKPLAN_DFT(P, &padt, apply);
+
+#if ALLOW_RANK0
+     if (p->sz->rnk == 0) {
+	  pln->n = 1;
+	  pln->os = 0;
+     }
+     else
+#endif
+     {
+	  pln->n = p->sz->dims[0].n;
+	  pln->os = p->sz->dims[0].os;
+     }
+
+     pln->cld = cld;
+     
+     pln->super.super.ops = cld->ops;
+     pln->super.super.ops.other += 8 * ((pln->n - 1)/2);
+     pln->super.super.ops.add += 4 * ((pln->n - 1)/2);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(dft_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/dht-r2hc.c b/src/fftw3/rdft/dht-r2hc.c
new file mode 100644
index 0000000..e66e614
--- /dev/null
+++ b/src/fftw3/rdft/dht-r2hc.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: dht-r2hc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* Solve a DHT problem (Discrete Hartley Transform) via post-processing
+   of an R2HC problem. */
+
+#include "rdft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     int os;
+     int n;
+} P;
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int os = ego->os;
+     int i, n = ego->n;
+
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+
+     for (i = 1; i < n - i; ++i) {
+	  E a, b;
+	  a = O[os * i];
+	  b = O[os * (n - i)];
+#if FFT_SIGN == -1
+	  O[os * i] = a - b;
+	  O[os * (n - i)] = a + b;
+#else
+	  O[os * i] = a + b;
+	  O[os * (n - i)] = a - b;
+#endif
+     }
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(dht-r2hc-%d%(%p%))", ego->n, ego->cld);
+}
+
+static int applicable0(const problem *p_, const planner *plnr)
+{
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && !NO_DHT_R2HCP(plnr)
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk == 0
+		  && p->kind[0] == DHT
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     UNUSED(ego);
+     return (!NO_UGLYP(plnr) && applicable0(p, plnr));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     /* stop infinite loops with rdft-dht.c */
+     plnr->problem_flags |= NO_DHT_R2HC; 
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_rdft_1)(p->sz, p->vecsz, p->I, p->O, R2HC));
+     if (!cld) return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->n = p->sz->dims[0].n;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     
+     pln->super.super.ops = cld->ops;
+     pln->super.super.ops.other += 4 * ((pln->n - 1)/2);
+     pln->super.super.ops.add += 2 * ((pln->n - 1)/2);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(dht_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/dht-rader.c b/src/fftw3/rdft/dht-rader.c
new file mode 100644
index 0000000..b9a2a74
--- /dev/null
+++ b/src/fftw3/rdft/dht-rader.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "rdft.h"
+
+/*
+ * Compute DHTs of prime sizes using Rader's trick: turn them
+ * into convolutions of size n - 1, which we then perform via a pair
+ * of FFTs.   (We can then do prime real FFTs via rdft-dht.c.)
+ */
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+
+     plan *cld1, *cld2;
+     R *omega;
+     int n, g, ginv;
+     int is, os;
+     plan *cld_omega;
+} P;
+
+static rader_tl *omegas = 0;
+
+/***************************************************************************/
+
+/* If R2HC_ONLY_CONV is 1, we use a trick to perform the convolution
+   purely in terms of R2HC transforms, as opposed to R2HC followed by H2RC.
+   This requires a few more operations, but allows us to share the same
+   plan/codelets for both Rader children. */
+#define R2HC_ONLY_CONV 1
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int r = ego->n;
+     int is = ego->is, os;
+     int k, gpower, g;
+     R *buf, *omega;
+     R r0;
+
+     buf = (R *) MALLOC(sizeof(R) * (r - 1), BUFFERS);
+
+     /* First, permute the input, storing in buf: */
+     g = ego->g; 
+     for (gpower = 1, k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	  buf[k] = I[gpower * is];
+     }
+     /* gpower == g^(r-1) mod r == 1 */;
+
+     os = ego->os;
+
+     /* compute RDFT of buf, storing in output (except DC): */
+     {
+	    plan_rdft *cld = (plan_rdft *) ego->cld1;
+	    cld->apply((plan *) cld, buf, O + os);
+     }
+
+     /* set output DC component: */
+     O[0] = (r0 = I[0]) + O[os];
+
+     /* now, multiply by omega: */
+     omega = ego->omega;
+
+     O[(0 + 1) * os] *= omega[0];
+#if R2HC_ONLY_CONV
+     for (k = 1; k < (r - 1)/2; ++k) {
+	  E rB, iB, rW, iW, a, b;
+	  rW = omega[k];
+	  iW = omega[(r-1) - k];
+	  rB = O[(k + 1) * os];
+	  iB = O[((r-1) - k + 1) * os];
+	  a = rW * rB - iW * iB;
+	  b = rW * iB + iW * rB;
+	  O[(k + 1) * os] = a + b;
+	  O[((r-1) - k + 1) * os] = a - b;
+     }
+#else
+     for (k = 1; k < (r - 1)/2; ++k) {
+	  E rB, iB, rW, iW;
+	  rW = omega[k];
+	  iW = omega[(r-1) - k];
+	  rB = O[(k + 1) * os];
+	  iB = O[((r-1) - k + 1) * os];
+	  O[(k + 1) * os] = rW * rB - iW * iB;
+	  O[((r-1) - k + 1) * os] = rW * iB + iW * rB;
+     }
+#endif
+     /* Nyquist component: */
+     O[(k + 1) * os] *= omega[k]; /* k == (r-1)/2, since r-1 is even */
+     
+     /* this will add input[0] to all of the outputs after the ifft */
+     O[os] += r0;
+
+     /* inverse FFT: */
+     {
+	    plan_rdft *cld = (plan_rdft *) ego->cld2;
+	    cld->apply((plan *) cld, O + os, buf);
+     }
+     
+     /* do inverse permutation to unshuffle the output: */
+     A(gpower == 1);
+#if R2HC_ONLY_CONV
+     O[os] = buf[0];
+     gpower = g = ego->ginv;
+     for (k = 1; k < (r - 1)/2; ++k, gpower = MULMOD(gpower, g, r)) {
+	  O[gpower * os] = buf[k] + buf[r - 1 - k];
+     }
+     O[gpower * os] = buf[k];
+     ++k, gpower = MULMOD(gpower, g, r);
+     for (; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	  O[gpower * os] = buf[r - 1 - k] - buf[k];
+     }
+#else
+     g = ego->ginv;
+     for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	  O[gpower * os] = buf[k];
+     }
+#endif
+     A(gpower == 1);
+
+     X(ifree)(buf);
+}
+
+static R *mkomega(plan *p_, int n, int ginv)
+{
+     plan_rdft *p = (plan_rdft *) p_;
+     R *omega;
+     int i, gpower;
+     trigreal scale;
+
+     if ((omega = X(rader_tl_find)(n, n, ginv, omegas))) 
+	  return omega;
+
+     omega = (R *)MALLOC(sizeof(R) * (n - 1), TWIDDLES);
+
+     scale = n - 1.0; /* normalization for convolution */
+
+     for (i = 0, gpower = 1; i < n-1; ++i, gpower = MULMOD(gpower, ginv, n)) {
+	  omega[i] = (X(cos2pi)(gpower, n) + X(sin2pi)(gpower, n)) / scale;
+     }
+     A(gpower == 1);
+
+     AWAKE(p_, 1);
+     p->apply(p_, omega, omega);
+     AWAKE(p_, 0);
+
+     X(rader_tl_insert)(n, n, ginv, omega, &omegas);
+     return omega;
+}
+
+static void free_omega(R *omega)
+{
+     X(rader_tl_delete)(omega, &omegas);
+}
+
+/***************************************************************************/
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+
+     AWAKE(ego->cld1, flg);
+     AWAKE(ego->cld2, flg);
+
+     if (flg) {
+	  if (!ego->omega) 
+	       ego->omega = mkomega(ego->cld_omega,ego->n,ego->ginv);
+     } else {
+	  free_omega(ego->omega);
+	  ego->omega = 0;
+     }
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld_omega);
+     X(plan_destroy_internal)(ego->cld2);
+     X(plan_destroy_internal)(ego->cld1);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+
+     p->print(p, "(dht-rader-%d%ois=%oos=%(%p%)",
+              ego->n, ego->is, ego->os, ego->cld1);
+     if (ego->cld2 != ego->cld1)
+          p->print(p, "%(%p%)", ego->cld2);
+     if (ego->cld_omega != ego->cld1 && ego->cld_omega != ego->cld2)
+          p->print(p, "%(%p%)", ego->cld_omega);
+     p->putchr(p, ')');
+}
+
+static int applicable0(const problem *p_)
+{
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk == 0
+		  && p->kind[0] == DHT
+		  && X(is_prime)(p->sz->dims[0].n)
+		  && p->sz->dims[0].n > 2
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     UNUSED(ego);
+     return (!NO_UGLYP(plnr) && applicable0(p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const problem_rdft *p = (const problem_rdft *) p_;
+     P *pln;
+     int n;
+     int is, os;
+     plan *cld1 = (plan *) 0;
+     plan *cld2 = (plan *) 0;
+     plan *cld_omega = (plan *) 0;
+     R *buf = (R *) 0;
+     R *O;
+     problem *cldp;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+	  return (plan *) 0;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+     O = p->O;
+
+     /* initial allocation for the purpose of planning */
+     buf = (R *) MALLOC(sizeof(R) * (n - 1), BUFFERS);
+
+     cld1 = X(mkplan_d)(plnr, 
+			X(mkproblem_rdft_1_d)(X(mktensor_1d)(n - 1, 1, os),
+					      X(mktensor_1d)(1, 0, 0),
+					      buf, 
+					      O + os,
+					      R2HC));
+     if (!cld1) goto nada;
+
+     cldp =
+          X(mkproblem_rdft_1_d)(
+               X(mktensor_1d)(n - 1, os, 1),
+               X(mktensor_1d)(1, 0, 0),
+	       O + os,
+	       buf, 
+#if R2HC_ONLY_CONV
+	       R2HC
+#else
+	       HC2R
+#endif
+	       );
+     if (!(cld2 = X(mkplan_d)(plnr, cldp))) goto nada;
+
+
+     /* plan for omega */
+     plnr->planner_flags |= ESTIMATE;
+     cld_omega = X(mkplan_d)(plnr, 
+			     X(mkproblem_rdft_1_d)(X(mktensor_1d)(n - 1, 1, 1),
+						   X(mktensor_1d)(1, 0, 0),
+						   buf, buf, R2HC));
+     if (!cld_omega) goto nada;
+
+     /* deallocate buffers; let awake() or apply() allocate them for real */
+     X(ifree)(buf);
+     buf = 0;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+     pln->cld1 = cld1;
+     pln->cld2 = cld2;
+     pln->cld_omega = cld_omega;
+     pln->omega = 0;
+     pln->n = n;
+     pln->is = is;
+     pln->os = os;
+     pln->g = X(find_generator)(n);
+     pln->ginv = X(power_mod)(pln->g, n - 2, n);
+     A(MULMOD(pln->g, pln->ginv, n) == 1);
+
+     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
+     pln->super.super.ops.other += (n - 3) * 3 + (n - 2) * 2 + 5;
+     pln->super.super.ops.add += (n - 3) * 1;
+     pln->super.super.ops.mul += (n - 3) * 2 + 2;
+#if R2HC_ONLY_CONV
+     pln->super.super.ops.other += (n - 2) + 4;
+     pln->super.super.ops.add += (n - 3) * 1 + (n - 2) * 1;
+#endif
+
+     return &(pln->super.super);
+
+ nada:
+     X(ifree0)(buf);
+     X(plan_destroy_internal)(cld_omega);
+     X(plan_destroy_internal)(cld2);
+     X(plan_destroy_internal)(cld1);
+     return 0;
+}
+
+/* constructors */
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(dht_rader_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/direct2.c b/src/fftw3/rdft/direct2.c
new file mode 100644
index 0000000..29a1394
--- /dev/null
+++ b/src/fftw3/rdft/direct2.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: direct2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* direct RDFT2 R2HC/HC2R solver, if we have a codelet */
+
+#include "rdft.h"
+
+typedef union {
+     kr2hc r2hc;
+     khc2r hc2r;
+} kodelet;
+
+typedef struct {
+     solver super;
+     union {
+	  const kr2hc_desc *r2hc;
+	  const khc2r_desc *hc2r;
+     } desc;
+     kodelet k;
+     int sz;
+     rdft_kind kind;
+     const char *nam;
+} S;
+
+typedef struct {
+     plan_rdft2 super;
+
+     stride is, os;
+     int vl;
+     int ivs, ovs;
+     kodelet k;
+     const S *slv;
+     int ilast;
+} P;
+
+static void apply_r2hc(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl, ovs = ego->ovs;
+     ASSERT_ALIGNED_DOUBLE;
+     ego->k.r2hc(r, rio, iio, ego->is, ego->os, ego->os,
+		 vl, ego->ivs, ovs);
+     for (i = 0; i < vl; ++i, iio += ovs)
+	  iio[0] = iio[ego->ilast] = 0;
+}
+
+static void apply_hc2r(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     ASSERT_ALIGNED_DOUBLE;
+     ego->k.hc2r(rio, iio, r, ego->os, ego->os, ego->is,
+		 ego->vl, ego->ivs, ego->ovs);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(stride_destroy)(ego->is);
+     X(stride_destroy)(ego->os);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->slv;
+
+     p->print(p, "(rdft2-%s-direct-%d%v \"%s\")", 
+	      X(rdft_kind_str)(s->kind), s->sz, ego->vl, s->nam);
+}
+
+static int applicable(const solver *ego_, const problem *p_)
+{
+     if (RDFT2P(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  int vl;
+	  int ivs, ovs;
+
+          return (
+	       1
+	       && p->sz->rnk == 1
+	       && p->vecsz->rnk <= 1
+	       && p->sz->dims[0].n == ego->sz
+	       && p->kind == ego->kind
+
+	       /* check strides etc */
+	       && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
+
+	       && (ego->kind != R2HC ||
+		   ego->desc.r2hc->genus->okp(ego->desc.r2hc, 
+					      p->r, p->rio, p->rio,
+					      p->sz->dims[0].is,
+					      p->sz->dims[0].os,
+					      p->sz->dims[0].os,
+					      vl, ivs, ovs))
+	       && (ego->kind != HC2R ||
+		   ego->desc.hc2r->genus->okp(ego->desc.hc2r,
+					      p->rio, p->rio, p->r,
+					      p->sz->dims[0].is,
+					      p->sz->dims[0].is,
+					      p->sz->dims[0].os,
+					      vl, ivs, ovs))
+	       
+	       && (0
+		   /* can operate out-of-place */
+		   || p->r != p->rio
+
+		   /*
+		    * can compute one transform in-place, no matter
+		    * what the strides are.
+		    */
+		   || p->vecsz->rnk == 0
+
+		   /* can operate in-place as long as strides are the same */
+		   || X(rdft2_inplace_strides)(p, RNK_MINFTY)
+		    )
+	       );
+     }
+
+     return 0;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     P *pln;
+     const problem_rdft2 *p;
+     iodim d;
+     int r2hc_kindp;
+
+     static const plan_adt padt = {
+	  X(rdft2_solve), X(null_awake), print, destroy
+     };
+
+     UNUSED(plnr);
+
+     if (!applicable(ego_, p_))
+          return (plan *)0;
+
+     p = (const problem_rdft2 *) p_;
+
+     r2hc_kindp = p->kind == R2HC;
+     A(r2hc_kindp || p->kind == HC2R);
+
+     pln = MKPLAN_RDFT2(P, &padt, r2hc_kindp ? apply_r2hc : apply_hc2r);
+
+     d = p->sz->dims[0];
+
+     pln->k = ego->k;
+
+     pln->is = X(mkstride)(ego->sz, r2hc_kindp ? d.is : d.os);
+     pln->os = X(mkstride)(d.n/2 + 1, r2hc_kindp ? d.os : d.is);
+
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+
+     pln->ilast = (d.n % 2) ? 0 : (d.n/2) * d.os; /* Nyquist freq., if any */
+
+     pln->slv = ego;
+     X(ops_zero)(&pln->super.super.ops);
+     if (r2hc_kindp)
+	  X(ops_madd2)(pln->vl / ego->desc.r2hc->genus->vl,
+		       &ego->desc.r2hc->ops,
+		       &pln->super.super.ops);
+     else {
+	  X(ops_madd2)(pln->vl / ego->desc.hc2r->genus->vl,
+		       &ego->desc.hc2r->ops,
+		       &pln->super.super.ops);
+	  pln->super.super.ops.other += 2 * pln->vl; /* + 2 stores */
+     }
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+solver *X(mksolver_rdft2_r2hc_direct)(kr2hc k, const kr2hc_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->k.r2hc = k;
+     slv->desc.r2hc = desc;
+     slv->sz = desc->sz;
+     slv->nam = desc->nam;
+     slv->kind = desc->genus->kind;
+     return &(slv->super);
+}
+
+solver *X(mksolver_rdft2_hc2r_direct)(khc2r k, const khc2r_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->k.hc2r = k;
+     slv->desc.hc2r = desc;
+     slv->sz = desc->sz;
+     slv->nam = desc->nam;
+     slv->kind = desc->genus->kind;
+     return &(slv->super);
+}
diff --git a/src/fftw3/rdft/hc2hc-buf.c b/src/fftw3/rdft/hc2hc-buf.c
new file mode 100644
index 0000000..75f761c
--- /dev/null
+++ b/src/fftw3/rdft/hc2hc-buf.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: hc2hc-buf.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* decimation in time Cooley-Tukey */
+#include "rdft.h"
+#include "hc2hc.h"
+
+/*
+   Copy A -> B, where A and B are n0 x n1 complex matrices
+   such that the (i0, i1) element has index (i0 * s0 + i1 * s1).
+   The imaginary strides are of opposite signs to the real strides.
+*/
+static void cpy(int n0, int n1,
+                const R *rA, const R *iA, int sa0, int sa1,
+                R *rB, R *iB, int sb0, int sb1)
+{
+     int i0, i1;
+
+     for (i0 = 0; i0 < n0; ++i0) {
+          const R *pra, *pia;
+          R *prb, *pib;
+          pra = rA; rA += sa0;
+          pia = iA; iA -= sa0;
+          prb = rB; rB += sb0;
+          pib = iB; iB -= sb0;
+
+          for (i1 = 0; i1 < n1; ++i1) {
+               R xr, xi;
+               xr = *pra; pra += sa1;
+               xi = *pia; pia -= sa1;
+               *prb = xr; prb += sb1;
+               *pib = xi; pib -= sb1;
+          }
+     }
+}
+
+static const R *doit(khc2hc k, R *rA, R *iA, const R *W, int ios, int dist,
+                     int r, int batchsz, R *buf, stride bufstride)
+{
+     cpy(r, batchsz, rA, iA, ios, dist, buf, buf + 2*batchsz*r-1, 1, r);
+     W = k(buf, buf + 2*batchsz*r-1, W, bufstride, 2*batchsz + 1, r);
+     cpy(r, batchsz, buf, buf + 2*batchsz*r-1, 1, r, rA, iA, ios, dist);
+     return W;
+}
+
+#define BATCHSZ 4 /* FIXME: parametrize? */
+
+static void apply_dit(const plan *ego_, R *I, R *O)
+{
+     const plan_hc2hc *ego = (const plan_hc2hc *) ego_;
+
+     /* two-dimensional r x vl sub-transform: */
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+
+     {
+          plan_rdft *cld0 = (plan_rdft *) ego->cld0;
+          plan_rdft *cldm = (plan_rdft *) ego->cldm;
+          int i, j, r = ego->r, m = ego->m, vl = ego->vl;
+          int os = ego->os, ovs = ego->ovs, ios = ego->iios;
+	  R *buf;
+
+	  STACK_MALLOC(R *, buf, r * BATCHSZ * 2 * sizeof(R));
+
+          for (i = 0; i < vl; ++i, O += ovs) {
+	       R *rA, *iA;
+	       const R *W;
+
+	       cld0->apply((plan *) cld0, O, O);
+	       
+	       rA = O + os; iA = O + (r * m - 1) * os;
+	       W = ego->W;
+	       for (j = (m-1)/2; j >= BATCHSZ; j -= BATCHSZ) {
+		    W = doit(ego->k, rA, iA, W, ios, os, r, BATCHSZ, buf,
+			     ego->vs);
+		    rA += os * (int)BATCHSZ;
+		    iA -= os * (int)BATCHSZ;
+	       }
+	       /* do remaining j calls, if any */
+               if (j > 0)
+                    doit(ego->k, rA, iA, W, ios, os, r, j, buf, ego->vs);
+
+	       cldm->apply((plan *) cldm, O + os*(m/2), O + os*(m/2));
+	  }
+
+	  STACK_FREE(buf);
+     }
+}
+
+static void apply_dif(const plan *ego_, R *I, R *O)
+{
+     const plan_hc2hc *ego = (const plan_hc2hc *) ego_;
+     R *I0 = I;
+
+     {
+          plan_rdft *cld0 = (plan_rdft *) ego->cld0;
+          plan_rdft *cldm = (plan_rdft *) ego->cldm;
+          int i, j, r = ego->r, m = ego->m, vl = ego->vl;
+          int is = ego->is, ivs = ego->ivs, ios = ego->iios;
+	  R *buf;
+
+	  STACK_MALLOC(R *, buf, r * BATCHSZ * 2 * sizeof(R));
+
+          for (i = 0; i < vl; ++i, I += ivs) {
+	       R *rA, *iA;
+	       const R *W;
+
+	       cld0->apply((plan *) cld0, I, I);
+	       
+	       rA = I + is; iA = I + (r * m - 1) * is;
+	       W = ego->W;
+	       for (j = (m-1)/2; j >= BATCHSZ; j -= BATCHSZ) {
+		    W = doit(ego->k, rA, iA, W, ios, is, r, BATCHSZ, buf,
+			     ego->vs);
+		    rA += is * (int)BATCHSZ;
+		    iA -= is * (int)BATCHSZ;
+	       }
+	       /* do remaining j calls, if any */
+               if (j > 0)
+                    doit(ego->k, rA, iA, W, ios, is, r, j, buf, ego->vs);
+
+	       cldm->apply((plan *) cldm, I + is*(m/2), I + is*(m/2));
+	  }
+
+	  STACK_FREE(buf);
+     }
+
+     /* two-dimensional r x vl sub-transform: */
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I0, O);
+     }
+}
+
+static int applicable0(const solver_hc2hc *ego, const problem *p_,
+		       const planner *plnr)
+{
+     if (X(rdft_hc2hc_applicable)(ego, p_)) {
+          const hc2hc_desc *e = ego->desc;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          iodim *d = p->sz->dims;
+	  int r = e->radix, m = d[0].n / e->radix;
+          return (1
+		  && (p->kind[0]==R2HC || p->I == p->O || DESTROY_INPUTP(plnr))
+                  /* check both batch size and remainder */
+                  && (m < BATCHSZ ||
+                      (e->genus->okp(e, 0, ((const R *)0)+2*BATCHSZ*r-1, 1,0, 
+				     2*BATCHSZ + 1, r)))
+                  && (m < BATCHSZ ||
+                      (e->genus->okp(e, 0, ((const R *)0) 
+				     + 2*(((m-1)/2) % BATCHSZ)*r-1, 1, 0, 
+				     2*(((m-1)/2) % BATCHSZ) + 1, r)))
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver_hc2hc *ego, const problem *p_,
+		      const planner *plnr)
+{
+     const problem_rdft *p;
+
+     if (!applicable0(ego, p_, plnr)) return 0;
+
+     p = (const problem_rdft *) p_;
+
+     /* emulate fftw2 behavior */
+     if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0)) return 0;
+
+     if (NO_UGLYP(plnr) && 
+	 X(ct_uglyp)(512, p->sz->dims[0].n, ego->desc->radix))
+	  return 0;
+
+     return 1;
+}
+
+static void finish(plan_hc2hc *ego)
+{
+     const hc2hc_desc *d = ego->slv->desc;
+     opcnt t;
+
+     ego->iios = ego->m * (R2HC_KINDP(d->genus->kind) ? ego->os : ego->is);
+     ego->vs = X(mkstride)(ego->r, 1);
+
+     X(ops_add)(&ego->cld0->ops, &ego->cldm->ops, &t);
+     X(ops_madd)(ego->vl, &t, &ego->cld->ops, &ego->super.super.ops);
+     ego->super.super.ops.other += 4 * ego->r * ((ego->m - 1)/2) * ego->vl;
+     X(ops_madd2)(ego->vl * ((ego->m - 1)/2) / d->genus->vl, &d->ops,
+		  &ego->super.super.ops);
+}
+
+static plan *mkplan_ditbuf(const solver *ego, const problem *p, planner *plnr)
+{
+     static const hc2hcadt adt = {
+	  sizeof(plan_hc2hc), 
+	  X(rdft_mkcldrn_dit), finish, applicable, apply_dit
+     };
+     return X(mkplan_rdft_hc2hc)((const solver_hc2hc *) ego, p, plnr, &adt);
+}
+
+solver *X(mksolver_rdft_hc2hc_ditbuf)(khc2hc codelet, const hc2hc_desc *desc)
+{
+     static const solver_adt sadt = { mkplan_ditbuf };
+     static const char name[] = "rdft-ditbuf";
+
+     return X(mksolver_rdft_hc2hc)(codelet, desc, name, &sadt);
+}
+
+static plan *mkplan_difbuf(const solver *ego, const problem *p, planner *plnr)
+{
+     static const hc2hcadt adt = {
+	  sizeof(plan_hc2hc), 
+	  X(rdft_mkcldrn_dif), finish, applicable, apply_dif
+     };
+     return X(mkplan_rdft_hc2hc)((const solver_hc2hc *) ego, p, plnr, &adt);
+}
+
+solver *X(mksolver_rdft_hc2hc_difbuf)(khc2hc codelet, const hc2hc_desc *desc)
+{
+     static const solver_adt sadt = { mkplan_difbuf };
+     static const char name[] = "rdft-difbuf";
+
+     return X(mksolver_rdft_hc2hc)(codelet, desc, name, &sadt);
+}
diff --git a/src/fftw3/rdft/hc2hc-dif.c b/src/fftw3/rdft/hc2hc-dif.c
new file mode 100644
index 0000000..2bcdca2
--- /dev/null
+++ b/src/fftw3/rdft/hc2hc-dif.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: hc2hc-dif.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* decimation in frequency Cooley-Tukey */
+#include "rdft.h"
+#include "hc2hc.h"
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const plan_hc2hc *ego = (const plan_hc2hc *) ego_;
+     R *I0 = I;
+
+     {
+          plan_rdft *cld0 = (plan_rdft *) ego->cld0;
+          plan_rdft *cldm = (plan_rdft *) ego->cldm;
+          int i, r = ego->r, m = ego->m, vl = ego->vl;
+          int is = ego->is, ivs = ego->ivs;
+	  
+          for (i = 0; i < vl; ++i, I += ivs) {
+	       cld0->apply((plan *) cld0, I, I);
+               ego->k(I + is, I + (r * m - 1) * is, ego->W, ego->ios, m, is);
+	       cldm->apply((plan *) cldm, I + is*(m/2), I + is*(m/2));
+	  }
+     }
+
+     /* two-dimensional r x vl sub-transform: */
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I0, O);
+     }
+}
+
+static int applicable0(const solver_hc2hc *ego, const problem *p_,
+		       const planner *plnr)
+{
+     if (X(rdft_hc2hc_applicable)(ego, p_)) {
+	  int ivs, ovs;
+	  int vl;
+          const hc2hc_desc *e = ego->desc;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          iodim *d = p->sz->dims;
+	  int m = d[0].n / e->radix;
+	  X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+          return (1
+		  && (p->I == p->O || DESTROY_INPUTP(plnr))
+		  && (e->genus->okp(e, p->I + d[0].is,
+				    p->I + (e->radix * m - 1) * d[0].is, 
+				    (int)m * d[0].is, 0, m, d[0].is))
+		  && (e->genus->okp(e, p->I + ivs + d[0].is,
+				    p->I + ivs + (e->radix * m - 1) * d[0].is, 
+				    (int)m * d[0].is, 0, m, d[0].is))
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver_hc2hc *ego, const problem *p_,
+		      const planner *plnr)
+{
+     const problem_rdft *p;
+
+     if (!applicable0(ego, p_, plnr)) return 0;
+
+     p = (const problem_rdft *) p_;
+
+     /* emulate fftw2 behavior */
+     if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0)) return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  if (X(ct_uglyp)(16, p->sz->dims[0].n, ego->desc->radix)) return 0;
+	  if (NONTHREADED_ICKYP(plnr)) return 0; /* prefer threaded version */
+     }
+     return 1;
+}
+
+static void finish(plan_hc2hc *ego)
+{
+     const hc2hc_desc *d = ego->slv->desc;
+     opcnt t;
+
+     ego->ios = X(mkstride)(ego->r, ego->m * ego->is);
+
+     X(ops_add)(&ego->cld0->ops, &ego->cldm->ops, &t);
+     X(ops_madd)(ego->vl, &t, &ego->cld->ops, &ego->super.super.ops);
+     X(ops_madd2)(ego->vl * ((ego->m - 1)/2) / d->genus->vl, &d->ops,
+		  &ego->super.super.ops);
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const hc2hcadt adt = {
+	  sizeof(plan_hc2hc), 
+	  X(rdft_mkcldrn_dif), finish, applicable, apply
+     };
+     return X(mkplan_rdft_hc2hc)((const solver_hc2hc *) ego, p, plnr, &adt);
+}
+
+
+solver *X(mksolver_rdft_hc2hc_dif)(khc2hc codelet, const hc2hc_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     static const char name[] = "rdft-dif";
+
+     return X(mksolver_rdft_hc2hc)(codelet, desc, name, &sadt);
+}
diff --git a/src/fftw3/rdft/hc2hc-dit.c b/src/fftw3/rdft/hc2hc-dit.c
new file mode 100644
index 0000000..04aa776
--- /dev/null
+++ b/src/fftw3/rdft/hc2hc-dit.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: hc2hc-dit.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* decimation in time Cooley-Tukey */
+#include "rdft.h"
+#include "hc2hc.h"
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const plan_hc2hc *ego = (const plan_hc2hc *) ego_;
+
+     /* two-dimensional r x vl sub-transform: */
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+
+     {
+          plan_rdft *cld0 = (plan_rdft *) ego->cld0;
+          plan_rdft *cldm = (plan_rdft *) ego->cldm;
+          int i, r = ego->r, m = ego->m, vl = ego->vl;
+          int os = ego->os, ovs = ego->ovs;
+
+          for (i = 0; i < vl; ++i, O += ovs) {
+	       cld0->apply((plan *) cld0, O, O);
+               ego->k(O + os, O + (r * m - 1) * os, ego->W, ego->ios, m, os);
+	       cldm->apply((plan *) cldm, O + os*(m/2), O + os*(m/2));
+	  }
+     }
+}
+
+static int applicable0(const solver_hc2hc *ego, const problem *p_,
+		       const planner *plnr)
+{
+     UNUSED(plnr);
+     if (X(rdft_hc2hc_applicable)(ego, p_)) {
+	  int ivs, ovs;
+	  int vl;
+          const hc2hc_desc *e = ego->desc;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          iodim *d = p->sz->dims;
+	  int m = d[0].n / e->radix;
+	  X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+          return (1
+		  && (e->genus->okp(e, p->O + d[0].os,
+				    p->O + (e->radix * m - 1) * d[0].os, 
+				    (int)m * d[0].os, 0, m, d[0].os))
+		  && (e->genus->okp(e, p->O + ovs + d[0].os,
+				    p->O + ovs + (e->radix * m - 1) * d[0].os, 
+				    (int)m * d[0].os, 0, m, d[0].os))
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver_hc2hc *ego, const problem *p_,
+		      const planner *plnr)
+{
+     const problem_rdft *p;
+
+     if (!applicable0(ego, p_, plnr)) return 0;
+
+     p = (const problem_rdft *) p_;
+
+     /* emulate fftw2 behavior */
+     if (NO_VRECURSEP(plnr) && (p->vecsz->rnk > 0)) return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  if (X(ct_uglyp)(16, p->sz->dims[0].n, ego->desc->radix)) return 0;
+	  if (NONTHREADED_ICKYP(plnr))
+	       return 0; /* prefer threaded version */
+     }
+
+     return 1;
+}
+
+static void finish(plan_hc2hc *ego)
+{
+     const hc2hc_desc *d = ego->slv->desc;
+     opcnt t;
+
+     ego->ios = X(mkstride)(ego->r, ego->m * ego->os);
+
+     X(ops_add)(&ego->cld0->ops, &ego->cldm->ops, &t);
+     X(ops_madd)(ego->vl, &t, &ego->cld->ops, &ego->super.super.ops);
+     X(ops_madd2)(ego->vl * ((ego->m - 1)/2) / d->genus->vl, &d->ops,
+		  &ego->super.super.ops);
+}
+
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const hc2hcadt adt = {
+	  sizeof(plan_hc2hc), 
+	  X(rdft_mkcldrn_dit), finish, applicable, apply
+     };
+     return X(mkplan_rdft_hc2hc)((const solver_hc2hc *) ego, p, plnr, &adt);
+}
+
+
+solver *X(mksolver_rdft_hc2hc_dit)(khc2hc codelet, const hc2hc_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     static const char name[] = "rdft-dit";
+
+     return X(mksolver_rdft_hc2hc)(codelet, desc, name, &sadt);
+}
diff --git a/src/fftw3/rdft/hc2hc.c b/src/fftw3/rdft/hc2hc.c
new file mode 100644
index 0000000..926273c
--- /dev/null
+++ b/src/fftw3/rdft/hc2hc.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: hc2hc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* generic Cooley-Tukey routines */
+#include "rdft.h"
+#include "hc2hc.h"
+
+static void destroy(plan *ego_)
+{
+     plan_hc2hc *ego = (plan_hc2hc *) ego_;
+
+     X(plan_destroy_internal)(ego->cld);
+     X(plan_destroy_internal)(ego->cld0);
+     X(plan_destroy_internal)(ego->cldm);
+     X(stride_destroy)(ego->ios);
+     X(stride_destroy)(ego->vs);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     plan_hc2hc *ego = (plan_hc2hc *) ego_;
+
+     AWAKE(ego->cld, flg);
+     AWAKE(ego->cld0, flg);
+     AWAKE(ego->cldm, flg);
+
+     if (flg) {
+	  const tw_instr *tw = ego->slv->desc->tw;
+	  X(mktwiddle)(&ego->td, tw, ego->n, ego->r, (ego->m + 1) / 2);
+	  /* 0th twiddle is handled by cld0: */
+	  ego->W = ego->td->W + X(twiddle_length)(ego->r, tw);
+     } else {
+	  X(twiddle_destroy)(&ego->td);
+          ego->W = 0;
+     }
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const plan_hc2hc *ego = (const plan_hc2hc *) ego_;
+     const solver_hc2hc *slv = ego->slv;
+     const hc2hc_desc *e = slv->desc;
+
+     p->print(p, "(%s-%d/%d%v \"%s\"%(%p%)%(%p%)%(%p%))",
+              slv->nam, ego->r, X(twiddle_length)(ego->r, e->tw),
+	      ego->vl, e->nam, ego->cld0, ego->cldm, ego->cld);
+}
+
+#define divides(a, b) (((int)(b) % (int)(a)) == 0)
+
+int X(rdft_hc2hc_applicable)(const solver_hc2hc *ego, const problem *p_)
+{
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          const hc2hc_desc *d = ego->desc;
+          return (1
+                  && p->sz->rnk == 1
+                  && p->vecsz->rnk <= 1
+		  && p->kind[0] == d->genus->kind
+                  && divides(d->radix, p->sz->dims[0].n)
+		  && d->radix < p->sz->dims[0].n /* avoid inf. loops in cld0 */
+	       );
+     }
+     return 0;
+}
+
+
+static const plan_adt padt =
+{
+     X(rdft_solve),
+     awake,
+     print,
+     destroy
+};
+
+plan *X(mkplan_rdft_hc2hc)(const solver_hc2hc *ego,
+			   const problem *p_,
+			   planner *plnr,
+			   const hc2hcadt *adt)
+{
+     plan_hc2hc *pln;
+     plan *cld = 0, *cld0 = 0, *cldm = 0;
+     int n, r, m;
+     problem *cldp = 0, *cld0p = 0, *cldmp = 0;
+     iodim *d;
+     const problem_rdft *p;
+     const hc2hc_desc *e = ego->desc;
+
+     if (!adt->applicable(ego, p_, plnr))
+          return (plan *) 0;
+
+     p = (const problem_rdft *) p_;
+     d = p->sz->dims;
+     n = d[0].n;
+     r = e->radix;
+     m = n / r;
+
+     adt->mkcldrn(ego, p, &cldp, &cld0p, &cldmp);
+
+     cld = X(mkplan_d)(plnr, cldp); cldp = 0;
+     if (!cld) goto nada;
+
+     cld0 = X(mkplan_d)(plnr, cld0p); cld0p = 0;
+     if (!cld0) goto nada;
+
+     cldm = X(mkplan_d)(plnr, cldmp); cldmp = 0;
+     if (!cldm) goto nada;
+
+     A(adt->pln_size >= sizeof(plan_hc2hc));
+     pln = (plan_hc2hc *) X(mkplan_rdft)(adt->pln_size, &padt, adt->apply);
+
+     pln->slv = ego;
+     pln->cld = cld;
+     pln->cld0 = cld0;
+     pln->cldm = cldm;
+     pln->k = ego->k;
+     pln->n = n;
+     pln->r = r;
+     pln->m = m;
+
+     pln->is = d[0].is;
+     pln->os = d[0].os;
+
+     pln->ios = pln->vs = 0;
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     pln->td = 0;
+     adt->finish(pln);
+
+     return &(pln->super.super);
+
+ nada:
+     X(problem_destroy)(cldmp);
+     X(problem_destroy)(cld0p);
+     X(problem_destroy)(cldp);
+     X(plan_destroy_internal)(cldm);
+     X(plan_destroy_internal)(cld0);
+     X(plan_destroy_internal)(cld);
+     return (plan *) 0;
+}
+
+solver *X(mksolver_rdft_hc2hc)(khc2hc k, const hc2hc_desc *desc,
+			       const char *nam, const solver_adt *adt)
+{
+     solver_hc2hc *slv;
+
+     slv = MKSOLVER(solver_hc2hc, adt);
+
+     slv->desc = desc;
+     slv->k = k;
+     slv->nam = nam;
+     return &(slv->super);
+}
+
+/* routines to create children are shared by many solvers */
+
+void X(rdft_mkcldrn_dit)(const solver_hc2hc *ego, const problem_rdft *p,
+                         problem **cldp, problem **cld0p, problem **cldmp)
+{
+     iodim *d = p->sz->dims;
+     const hc2hc_desc *e = ego->desc;
+     int m = d[0].n / e->radix;
+     int omid = d[0].os * (m/2);
+
+     tensor *null, *radix = X(mktensor_1d)(e->radix, d[0].is, m * d[0].os);
+     tensor *cld_vec = X(tensor_append)(radix, p->vecsz);
+     X(tensor_destroy)(radix);
+     A(p->kind[0] == R2HC);
+
+     *cldp = X(mkproblem_rdft_d)(X(mktensor_1d)(m, e->radix*d[0].is, d[0].os),
+				 cld_vec, p->I, p->O, p->kind);
+
+     radix = X(mktensor_1d)(e->radix, m * d[0].os, m * d[0].os);
+     null = X(mktensor_0d)();
+     *cld0p = X(mkproblem_rdft_1)(radix, null, p->O, p->O, R2HC);
+     *cldmp = X(mkproblem_rdft_1)(m%2 ? null : radix, null,
+				  p->O + omid, p->O + omid, R2HCII);
+     X(tensor_destroy2)(null, radix);
+}
+
+
+void X(rdft_mkcldrn_dif)(const solver_hc2hc *ego, const problem_rdft *p,
+                         problem **cldp, problem **cld0p, problem **cldmp)
+{
+     iodim *d = p->sz->dims;
+     const hc2hc_desc *e = ego->desc;
+     int m = d[0].n / e->radix;
+     int imid = d[0].is * (m/2);
+
+     tensor *null, *radix = X(mktensor_1d)(e->radix, m * d[0].is, d[0].os);
+     tensor *cld_vec = X(tensor_append)(radix, p->vecsz);
+     X(tensor_destroy)(radix);
+     A(p->kind[0] == HC2R);
+
+     *cldp = X(mkproblem_rdft_d)(X(mktensor_1d)(m, d[0].is, e->radix*d[0].os),
+				 cld_vec, p->I, p->O, p->kind);
+
+     radix = X(mktensor_1d)(e->radix, m * d[0].is, m * d[0].is);
+     null = X(mktensor_0d)();
+     *cld0p = X(mkproblem_rdft_1)(radix, null, p->I, p->I, HC2R);
+     *cldmp = X(mkproblem_rdft_1)(m%2 ? null : radix, null, 
+				  p->I + imid, p->I + imid, HC2RIII);
+     X(tensor_destroy2)(null, radix);
+}
diff --git a/src/fftw3/rdft/hc2hc.h b/src/fftw3/rdft/hc2hc.h
new file mode 100644
index 0000000..6c062cd
--- /dev/null
+++ b/src/fftw3/rdft/hc2hc.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+typedef struct {
+     solver super;
+     const char *nam;
+     const hc2hc_desc *desc;
+     khc2hc k;
+} solver_hc2hc;
+
+typedef struct {
+     plan_rdft super;
+     khc2hc k;
+     plan *cld0, *cldm; /* children for 0th and middle butterflies */
+     plan *cld;
+     R *W;
+     int n, r, m, vl;
+     int is, os, ivs, ovs, iios;
+     stride ios, vs;
+     const solver_hc2hc *slv;
+     twid *td;
+} plan_hc2hc;
+
+/* data type describing a generic Cooley-Tukey solver */
+typedef struct
+{
+     size_t pln_size;
+     void (*mkcldrn)(const solver_hc2hc *, const problem_rdft *p,
+		     problem **cldp, problem **cld0p, problem **cldmp);
+     void (*finish)(plan_hc2hc *ego);
+     int (*applicable)(const solver_hc2hc *ego, const problem *p,
+		       const planner *plnr);
+     rdftapply apply;
+} hc2hcadt;
+
+int X(rdft_hc2hc_applicable)(const solver_hc2hc *ego, const problem *p_);
+
+plan *X(mkplan_rdft_hc2hc)(const solver_hc2hc *ego,
+			   const problem *p_,
+			   planner *plnr,
+			   const hc2hcadt *adt);
+
+solver *X(mksolver_rdft_hc2hc)(khc2hc k, const hc2hc_desc *desc,
+			       const char *nam, const solver_adt *adt);
+
+void X(rdft_mkcldrn_dit)(const solver_hc2hc *, const problem_rdft *p,
+			 problem **cldp, problem **cld0p, problem **cldmp);
+void X(rdft_mkcldrn_dif)(const solver_hc2hc *, const problem_rdft *p,
+			 problem **cldp, problem **cld0p, problem **cldmp);
diff --git a/src/fftw3/rdft/khc2hc-dif.c b/src/fftw3/rdft/khc2hc-dif.c
new file mode 100644
index 0000000..14806f1
--- /dev/null
+++ b/src/fftw3/rdft/khc2hc-dif.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: khc2hc-dif.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+void (*X(khc2hc_dif_register_hook))(planner *, khc2hc, const hc2hc_desc *)=0;
+
+void X(khc2hc_dif_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_rdft_hc2hc_dif)(codelet, desc));
+     REGISTER_SOLVER(p, X(mksolver_rdft_hc2hc_difbuf)(codelet, desc));
+     if (X(khc2hc_dif_register_hook))
+	  X(khc2hc_dif_register_hook)(p, codelet, desc);
+}
diff --git a/src/fftw3/rdft/khc2hc-dit.c b/src/fftw3/rdft/khc2hc-dit.c
new file mode 100644
index 0000000..24ba8d3
--- /dev/null
+++ b/src/fftw3/rdft/khc2hc-dit.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: khc2hc-dit.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+void (*X(khc2hc_dit_register_hook))(planner *, khc2hc, const hc2hc_desc *)=0;
+
+void X(khc2hc_dit_register)(planner *p, khc2hc codelet, const hc2hc_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_rdft_hc2hc_dit)(codelet, desc));
+     REGISTER_SOLVER(p, X(mksolver_rdft_hc2hc_ditbuf)(codelet, desc));
+     if (X(khc2hc_dit_register_hook))
+	  X(khc2hc_dit_register_hook)(p, codelet, desc);
+}
diff --git a/src/fftw3/rdft/khc2r.c b/src/fftw3/rdft/khc2r.c
new file mode 100644
index 0000000..d796bca
--- /dev/null
+++ b/src/fftw3/rdft/khc2r.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: khc2r.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+void X(khc2r_register)(planner *p, khc2r codelet, const khc2r_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_rdft_hc2r_direct)(codelet, desc));
+     REGISTER_SOLVER(p, X(mksolver_rdft2_hc2r_direct)(codelet, desc));
+}
diff --git a/src/fftw3/rdft/kr2hc.c b/src/fftw3/rdft/kr2hc.c
new file mode 100644
index 0000000..9840186
--- /dev/null
+++ b/src/fftw3/rdft/kr2hc.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kr2hc.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+void X(kr2hc_register)(planner *p, kr2hc codelet, const kr2hc_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_rdft_r2hc_direct)(codelet, desc));
+     REGISTER_SOLVER(p, X(mksolver_rdft2_r2hc_direct)(codelet, desc));
+}
diff --git a/src/fftw3/rdft/kr2r.c b/src/fftw3/rdft/kr2r.c
new file mode 100644
index 0000000..31b1c10
--- /dev/null
+++ b/src/fftw3/rdft/kr2r.c
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: kr2r.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+void X(kr2r_register)(planner *p, kr2r codelet, const kr2r_desc *desc)
+{
+     REGISTER_SOLVER(p, X(mksolver_rdft_r2r_direct)(codelet, desc));
+}
diff --git a/src/fftw3/rdft/nop2.c b/src/fftw3/rdft/nop2.c
new file mode 100644
index 0000000..f647b29
--- /dev/null
+++ b/src/fftw3/rdft/nop2.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: nop2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* plans for vrank -infty RDFT2s (nothing to do), as well as in-place
+   rank-0 HC2R.  Note that in-place rank-0 R2HC is *not* a no-op, because
+   we have to set the imaginary parts of the output to zero. */
+
+#include "rdft.h"
+
+static void apply(const plan *ego_, R *r, R *rio, R *iio)
+{
+     UNUSED(ego_);
+     UNUSED(r);
+     UNUSED(rio);
+     UNUSED(iio);
+}
+
+static int applicable(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFT2P(p_)) {
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+          return(0
+		 /* case 1 : -infty vector rank */
+		 || (p->vecsz->rnk == RNK_MINFTY)
+		 
+		 /* case 2 : rank-0 in-place HC2R rdft */
+		 || (1
+		     && p->kind == HC2R
+		     && p->sz->rnk == 0
+		     && FINITE_RNK(p->vecsz->rnk)
+		     && (p->r == p->rio || p->r == p->iio)
+		     && X(rdft2_inplace_strides)(p, RNK_MINFTY)
+		      ));
+     }
+     return 0;
+}
+
+static void print(const plan *ego, printer *p)
+{
+     UNUSED(ego);
+     p->print(p, "(rdft2-nop)");
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const plan_adt padt = {
+	  X(rdft2_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+     plan_rdft2 *pln;
+
+     UNUSED(plnr);
+
+     if (!applicable(ego, p))
+          return (plan *) 0;
+     pln = MKPLAN_RDFT2(plan_rdft2, &padt, apply);
+     X(ops_zero)(&pln->super.ops);
+
+     return &(pln->super);
+}
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     return MKSOLVER(solver, &sadt);
+}
+
+void X(rdft2_nop_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/plan2.c b/src/fftw3/rdft/plan2.c
new file mode 100644
index 0000000..b04c19d
--- /dev/null
+++ b/src/fftw3/rdft/plan2.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: plan2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+plan *X(mkplan_rdft2)(size_t size, const plan_adt *adt, rdft2apply apply)
+{
+     plan_rdft2 *ego;
+
+     ego = (plan_rdft2 *) X(mkplan)(size, adt);
+     ego->apply = apply;
+
+     return &(ego->super);
+}
diff --git a/src/fftw3/rdft/problem2.c b/src/fftw3/rdft/problem2.c
new file mode 100644
index 0000000..57a5764
--- /dev/null
+++ b/src/fftw3/rdft/problem2.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: problem2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "dft.h"
+#include "rdft.h"
+#include <stddef.h>
+
+static void destroy(problem *ego_)
+{
+     problem_rdft2 *ego = (problem_rdft2 *) ego_;
+     X(tensor_destroy2)(ego->vecsz, ego->sz);
+     X(ifree)(ego_);
+}
+
+static void hash(const problem *p_, md5 *m)
+{
+     const problem_rdft2 *p = (const problem_rdft2 *) p_;
+     X(md5puts)(m, "rdft2");
+     X(md5int)(m, p->r == p->rio);
+     X(md5int)(m, p->r == p->iio);
+     X(md5ptrdiff)(m, p->iio - p->rio);
+     X(md5int)(m, X(alignment_of)(p->r));
+     X(md5int)(m, X(alignment_of)(p->rio)); 
+     X(md5int)(m, X(alignment_of)(p->iio)); 
+     X(md5int)(m, p->kind);
+     X(tensor_md5)(m, p->sz);
+     X(tensor_md5)(m, p->vecsz);
+}
+
+static void print(problem *ego_, printer *p)
+{
+     problem_rdft2 *ego = (problem_rdft2 *) ego_;
+     p->print(p, "(rdft2 %d %td %td %d %T %T)", 
+	      X(alignment_of)(ego->r),
+	      ego->rio - ego->r, 
+	      ego->iio - ego->r,
+	      (int)(ego->kind),
+	      ego->sz,
+	      ego->vecsz);
+}
+
+static void zero(const problem *ego_)
+{
+     const problem_rdft2 *ego = (const problem_rdft2 *) ego_;
+     tensor *sz;
+     if (ego->kind == R2HC) {
+	  sz = X(tensor_append)(ego->vecsz, ego->sz);
+	  X(rdft_zerotens)(sz, UNTAINT(ego->r));
+     }
+     else {
+	  tensor *sz2 = X(tensor_copy)(ego->sz);
+	  if (sz2->rnk > 0) /* ~half as many complex outputs */
+	       sz2->dims[0].n = sz2->dims[0].n / 2 + 1;
+	  sz = X(tensor_append)(ego->vecsz, sz2);
+	  X(tensor_destroy)(sz2);
+	  X(dft_zerotens)(sz, UNTAINT(ego->rio), UNTAINT(ego->iio));
+     }
+     X(tensor_destroy)(sz);
+}
+
+static const problem_adt padt =
+{
+     hash,
+     zero,
+     print,
+     destroy
+};
+
+int X(problem_rdft2_p)(const problem *p)
+{
+     return (p->adt == &padt);
+}
+
+problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz,
+			    R *r, R *rio, R *iio, rdft_kind kind)
+{
+     problem_rdft2 *ego =
+          (problem_rdft2 *)X(mkproblem)(sizeof(problem_rdft2), &padt);
+
+     A(X(tensor_kosherp)(sz));
+     A(X(tensor_kosherp)(vecsz));
+     A(FINITE_RNK(sz->rnk));
+
+     if (UNTAINT(r) == UNTAINT(rio))
+	  r = rio = JOIN_TAINT(r, rio);
+     if (UNTAINT(r) == UNTAINT(iio))
+	  r = iio = JOIN_TAINT(r, iio);
+
+     /* correctness condition: */
+     A(TAINTOF(rio) == TAINTOF(iio));
+
+     if (sz->rnk > 1) { /* have to compress rnk-1 dims separately, ugh */
+	  tensor *szc = X(tensor_copy_except)(sz, sz->rnk - 1);
+	  tensor *szr = X(tensor_copy_sub)(sz, sz->rnk - 1, 1);
+	  tensor *szcc = X(tensor_compress)(szc);
+	  if (szcc->rnk > 0)
+	       ego->sz = X(tensor_append)(szcc, szr);
+	  else
+	       ego->sz = X(tensor_compress)(szr);
+	  X(tensor_destroy2)(szc, szr); X(tensor_destroy)(szcc);
+     }
+     else
+	  ego->sz = X(tensor_compress)(sz);
+     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
+     ego->r = r;
+     ego->rio = rio;
+     ego->iio = iio;
+     ego->kind = kind;
+
+     A(kind == R2HC || kind == HC2R);
+     A(FINITE_RNK(ego->sz->rnk));
+     return &(ego->super);
+}
+
+/* Same as X(mkproblem_rdft2), but also destroy input tensors. */
+problem *X(mkproblem_rdft2_d)(tensor *sz, tensor *vecsz,
+			      R *r, R *rio, R *iio, rdft_kind kind)
+{
+     problem *p;
+     p = X(mkproblem_rdft2)(sz, vecsz, r, rio, iio, kind);
+     X(tensor_destroy2)(vecsz, sz);
+     return p;
+}
diff --git a/src/fftw3/rdft/rader-hc2hc.c b/src/fftw3/rdft/rader-hc2hc.c
new file mode 100644
index 0000000..f1b6f34
--- /dev/null
+++ b/src/fftw3/rdft/rader-hc2hc.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "rdft.h"
+#include "dft.h"
+
+/*
+ * Compute transforms with large prime factors using Rader's trick:
+ * turn the factors into convolutions of size n - 1, which you then
+ * perform via a pair of FFTs.  This file contains only twiddle hc2hc
+ * transforms, which are actually ordinary complex transforms in a
+ * slightly funny order.
+ */
+
+typedef struct {
+     solver super;
+     rdft_kind kind;
+} S;
+
+typedef struct {
+     plan_rdft super;
+
+     plan *cldr, *cldr0;
+     plan *cld;
+     R *W;
+     R *omega;
+     int m, r, g, ginv;
+     int os, ios;
+     rdft_kind kind;
+} P;
+
+static rader_tl *twiddles = 0;
+
+/***************************************************************************/
+
+/* Below, we extensively use the identity that fft(x*)* = ifft(x) in
+   order to share data between forward and backward transforms and to
+   obviate the necessity of having separate forward and backward
+   plans. */
+
+static void apply_aux(int r, plan_dft *cldr, const R *omega,
+		      R *buf, R *ro, R i0, R *io)
+{
+     R r0;
+     int k;
+
+     /* compute DFT of buf, operating in-place */
+     cldr->apply((plan *) cldr, buf, buf+1, buf, buf+1);
+
+     /* set output DC component: */
+     ro[0] = (r0 = ro[0]) + buf[0];
+     io[0] = i0 + buf[1];
+
+     /* now, multiply by omega: */
+     for (k = 0; k < r - 1; ++k) {
+	  R rB, iB, rW, iW;
+	  rW = omega[2*k];
+	  iW = omega[2*k+1];
+	  rB = buf[2*k];
+	  iB = buf[2*k+1];
+	  buf[2*k] = rW * rB - iW * iB;
+	  buf[2*k+1] = -(rW * iB + iW * rB);
+     }
+     
+     /* this will add input[0] to all of the outputs after the ifft */
+     buf[0] += r0;
+     buf[1] -= i0;
+
+     /* inverse FFT: */
+     cldr->apply((plan *) cldr, buf, buf+1, buf, buf+1);
+}
+
+static void apply_dit(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     plan_dft *cldr;
+     int os, ios;
+     int j, k, gpower, g, ginv, r, m;
+     R *buf, *rio, *ii, *io;
+     const R *omega, *W;
+
+     /* size-m child transforms: */
+     {
+	   plan_rdft *cld = (plan_rdft *) ego->cld;
+	   cld->apply((plan *) cld, I, O);
+     }
+
+     /* 0th twiddle transform is just size-r (prime) R2HC: */
+     {
+	   plan_rdft *cldr0 = (plan_rdft *) ego->cldr0;
+	   cldr0->apply((plan *) cldr0, O, O);
+     }
+
+     cldr = (plan_dft *) ego->cldr;
+     r = ego->r;
+     m = ego->m;
+     g = ego->g; 
+     ginv = ego->ginv;
+     omega = ego->omega;
+     W = ego->W;
+     os = ego->os;
+     ios = ego->ios;
+     gpower = 1;
+     rio = O + os;
+     ii = O + (m - 1) * os;
+     io = O + (r * m - 1) * os;
+
+     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
+
+     for (j = 2; j < m; j += 2, rio += os, ii -= os, io -= os, W += 2*(r-1)) {
+	  /* First, permute the input and multiply by W, storing in buf: */
+	  A(gpower == 1);
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	       R rA, iA, rW, iW;
+	       rA = rio[gpower * ios];
+	       iA = ii[gpower * ios];
+	       rW = W[2*k];
+	       iW = W[2*k+1];
+	       buf[2*k] = rW * rA - iW * iA;
+	       buf[2*k+1] = rW * iA + iW * rA;
+	  }
+	  /* gpower == g^(r-1) mod r == 1 */;
+	  
+	  apply_aux(r, cldr, omega, buf, rio, ii[0], io);
+
+	  /* finally, do inverse permutation to unshuffle the output: */
+	  A(gpower == 1);
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+	       rio[gpower * ios] = buf[2*k];
+	       io[-gpower * ios] = -buf[2*k+1];
+	  }
+	  A(gpower == 1);
+
+	  /* second half of array must be fiddled to get real/imag
+             parts in correct spots: */
+	  for (k = (r+1)/2; k < r; ++k) {
+	       R t;
+	       t = rio[k * ios];
+	       rio[k * ios] = -io[-k * ios];
+	       io[-k * ios] = t;
+	  }
+     }
+
+     /* Avoid funny m/2-th iter by requiring m odd.  This always
+	happens anyway because all the factors of 2 get divided out
+	first by codelets (Rader is UGLY for small factors). */
+
+     X(ifree)(buf);
+}
+
+static void apply_dif(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     plan_dft *cldr;
+     int is, ios;
+     int j, k, gpower, g, ginv, r, m;
+     R *buf, *rio, *ii, *io;
+     const R *omega, *W;
+
+     /* 0th twiddle transform is just size-r (prime) HC2R: */
+     {
+	   plan_rdft *cldr0 = (plan_rdft *) ego->cldr0;
+	   cldr0->apply((plan *) cldr0, I, I);
+     }
+
+     cldr = (plan_dft *) ego->cldr;
+     r = ego->r;
+     m = ego->m;
+     g = ego->g; 
+     ginv = ego->ginv;
+     omega = ego->omega;
+     W = ego->W + 2*(r-1); /* simplify reverse indexing of W */
+     is = ego->os;
+     ios = ego->ios;
+     gpower = 1;
+     rio = I + is;
+     io = I + (m - 1) * is;
+     ii = I + (r * m - 1) * is;
+
+     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
+
+     for (j = 2; j < m; j += 2, rio += is, ii -= is, io -= is, W += 2*(r-1)) {
+	  /* second half of array must be unfiddled to get real/imag
+             parts from correct spots: */
+	  for (k = (r+1)/2; k < r; ++k) {
+	       R t;
+	       t = rio[k * ios];
+	       rio[k * ios] = ii[-k * ios];
+	       ii[-k * ios] = -t;
+	  }
+
+	  /* First, permute the input, storing in buf: */
+	  A(gpower == 1);
+	  for (k = 0; k < r - 1; ++k, gpower = MULMOD(gpower, g, r)) {
+	       buf[2*k] = rio[gpower * ios];
+	       buf[2*k+1] = -ii[-gpower * ios];
+	  }
+	  /* gpower == g^(r-1) mod r == 1 */;
+	  A(gpower == 1);
+	  
+	  apply_aux(r, cldr, omega, buf, rio, -ii[0], io);
+	  io[0] = -io[0];
+
+	  /* finally, do inverse permutation to unshuffle the output,
+             also multiplying by the inverse twiddle factors W*.
+	     The twiddle factors are accessed in reverse order W[-k],
+	     because here we exponentiating ginv and not g as in
+	     mktwiddle. */
+	  { /* W[-0] = W[0] case must be handled specially */
+               R rA, iA, rW, iW;
+               rA = buf[0]; iA = buf[1];
+               rW = W[-2*(r-1)]; iW = W[-2*(r-1) + 1];
+               rio[ios] = rA * rW + iA * iW;
+               io[ios] = iA * rW - rA * iW;
+	  }
+	  gpower = ginv;
+	  for (k = 1; k < r - 1; ++k, gpower = MULMOD(gpower, ginv, r)) {
+	       R rA, iA, rW, iW;
+	       rA = buf[2*k]; iA = buf[2*k+1];
+	       rW = W[-2*k]; iW = W[-2*k+1];
+	       rio[gpower * ios] = rA * rW + iA * iW;
+	       io[gpower * ios] = iA * rW - rA * iW;
+	  }
+	  A(gpower == 1);
+     }
+
+     /* Avoid funny m/2-th iter by requiring m odd.  This always
+	happens anyway because all the factors of 2 get divided out
+	first by codelets (Rader is UGLY for small factors). */
+
+     X(ifree)(buf);
+
+     /* size-m child transforms: */
+     {
+	   plan_rdft *cld = (plan_rdft *) ego->cld;
+	   cld->apply((plan *) cld, I, O);
+     }
+}
+
+static R *mktwiddle(int m, int r, int g)
+{
+     int i, j, gpower;
+     int n = r * m;
+     R *W;
+
+     if ((W = X(rader_tl_find)(m, r, g, twiddles)))
+	  return W;
+
+     W = (R *)MALLOC(sizeof(R) * (r - 1) * ((m-1)/2) * 2, TWIDDLES);
+     for (i = 1; i < (m+1)/2; ++i) {
+	  for (gpower = 1, j = 0; j < r - 1;
+	       ++j, gpower = MULMOD(gpower, g, r)) {
+	       int k = (i - 1) * (r - 1) + j;
+	       W[2*k] = X(cos2pi)(i * gpower, n);
+	       W[2*k+1] = FFT_SIGN * X(sin2pi)(i * gpower, n);
+	  }
+	  A(gpower == 1);
+     }
+
+     X(rader_tl_insert)(m, r, g, W, &twiddles);
+     return W;
+}
+
+static void free_twiddle(R *twiddle)
+{
+     X(rader_tl_delete)(twiddle, &twiddles);
+}
+
+/***************************************************************************/
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+
+     AWAKE(ego->cldr0, flg);
+     AWAKE(ego->cldr, flg);
+     AWAKE(ego->cld, flg);
+
+     if (flg) {
+	  if (!ego->omega) 
+	       ego->omega = 
+		    X(dft_rader_mkomega)(ego->cldr, ego->r, ego->ginv);
+	  if (!ego->W)
+	       ego->W = mktwiddle(ego->m, ego->r, ego->g);
+     } else {
+	  X(dft_rader_free_omega)(&ego->omega);
+	  free_twiddle(ego->W);
+	  ego->W = 0;
+     }
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+     X(plan_destroy_internal)(ego->cldr);
+     X(plan_destroy_internal)(ego->cldr0);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+
+     p->print(p, "(rdft-rader-%s-%d%(%p%)%(%p%)%(%p%))",
+	      ego->kind == R2HC ? "r2hc-dit" : "hc2r-dif",
+              ego->r, ego->cldr0, ego->cldr, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     if (RDFTP(p_)) {
+	  const S *ego = (const S *) ego_;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk == 0
+		  && p->sz->dims[0].n > 1
+		  && p->sz->dims[0].n % 4 /* make sure n / r = m is odd */
+		  && p->kind[0] == ego->kind
+		  && !X(is_prime)(p->sz->dims[0].n) /* avoid inf. loops planning cldr0 */
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego_, p_));
+}
+
+static int mkP(P *pln, int r, R *O, int ios, rdft_kind kind, planner *plnr)
+{
+     plan *cldr = (plan *) 0;
+     plan *cldr0 = (plan *) 0;
+     R *buf = (R *) 0;
+
+     cldr0 = X(mkplan_d)(plnr, 
+			 X(mkproblem_rdft_1_d)(X(mktensor_1d)(r, ios, ios),
+					       X(mktensor_1d)(1, 0, 0),
+					       O, O, kind));
+     if (!cldr0) goto nada;
+
+     /* initial allocation for the purpose of planning */
+     buf = (R *) MALLOC(sizeof(R) * (r - 1) * 2, BUFFERS);
+
+     cldr = X(mkplan_d)(plnr, X(mkproblem_dft_d)(X(mktensor_1d)(r - 1, 2, 2),
+						 X(mktensor_1d)(1, 0, 0),
+						 buf, buf + 1, buf, buf + 1));
+     if (!cldr) goto nada;
+
+     X(ifree)(buf);
+
+     pln->cldr = cldr;
+     pln->cldr0 = cldr0;
+     pln->omega = 0;
+     pln->r = r;
+     pln->g = X(find_generator)(r);
+     pln->ginv = X(power_mod)(pln->g, r - 2, r);
+     pln->kind = kind;
+     A(MULMOD(pln->g, pln->ginv, r) == 1);
+
+     X(ops_add)(&cldr->ops, &cldr->ops, &pln->super.super.ops);
+     pln->super.super.ops.other += (r - 1) * (4 * 2 + 6) + 6;
+     pln->super.super.ops.add += 2 * (r - 1) * 2 + 4;
+     pln->super.super.ops.mul += 2 * (r - 1) * 4;
+
+     return 1;
+
+ nada:
+     X(ifree0)(buf);
+     X(plan_destroy_internal)(cldr);
+     X(plan_destroy_internal)(cldr0);
+     return 0;
+}
+
+static plan *mkplan_dit(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_rdft *p = (const problem_rdft *) p_;
+     P *pln = 0;
+     int n, is, os, r, m;
+     plan *cld = (plan *) 0;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego, p_, plnr))
+          goto nada;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+
+     r = X(first_divisor)(n);
+     m = n / r;
+
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_rdft_d)(X(mktensor_1d)(m, r * is, os),
+					   X(mktensor_1d)(r, is, m * os),
+					   p->I, p->O, p->kind));
+     if (!cld) goto nada;
+
+     pln = MKPLAN_RDFT(P, &padt, apply_dit);
+     if (!mkP(pln, r, p->O, os*m, p->kind[0], plnr))
+	  goto nada;
+
+     pln->ios = os*m;
+     pln->os = os;
+     pln->m = m;
+     pln->cld = cld;
+     pln->W = 0;
+
+     X(ops_madd)((m - 1)/2, &pln->super.super.ops, &cld->ops,
+		 &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(ifree0)(pln);
+     return (plan *) 0;
+}
+
+static plan *mkplan_dif(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_rdft *p = (const problem_rdft *) p_;
+     P *pln = 0;
+     int n, is, os, r, m;
+     plan *cld = (plan *) 0;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego, p_, plnr))
+          goto nada;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+
+     r = X(first_divisor)(n);
+     m = n / r;
+
+     cld = X(mkplan_d)(plnr,
+		       X(mkproblem_rdft_d)(X(mktensor_1d)(m, is, r * os),
+					   X(mktensor_1d)(r, m * is, os),
+					   p->I, p->O, p->kind));
+     if (!cld) goto nada;
+
+     pln = MKPLAN_RDFT(P, &padt, apply_dif);
+     if (!mkP(pln, r, p->I, is*m, p->kind[0], plnr)) goto nada;
+
+     pln->ios = is*m;
+     pln->os = is;
+     pln->m = m;
+     pln->cld = cld;
+     pln->W = 0;
+
+     X(ops_madd)((m - 1)/2, &pln->super.super.ops, &cld->ops,
+		 &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(ifree0)(pln);
+     return (plan *) 0;
+}
+
+/* constructors */
+
+static solver *mksolver_dit(void)
+{
+     static const solver_adt sadt = { mkplan_dit };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->kind = R2HC;
+     return &(slv->super);
+}
+
+static solver *mksolver_dif(void)
+{
+     static const solver_adt sadt = { mkplan_dif };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->kind = HC2R;
+     return &(slv->super);
+}
+
+void X(rdft_rader_hc2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver_dit());
+     REGISTER_SOLVER(p, mksolver_dif());
+}
diff --git a/src/fftw3/rdft/rank-geq2-rdft2.c b/src/fftw3/rdft/rank-geq2-rdft2.c
new file mode 100644
index 0000000..c1809c9
--- /dev/null
+++ b/src/fftw3/rdft/rank-geq2-rdft2.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rank-geq2-rdft2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* plans for RDFT2 of rank >= 2 (multidimensional) */
+
+#include "rdft.h"
+#include "dft.h"
+
+typedef struct {
+     solver super;
+     int spltrnk;
+     const int *buddies;
+     int nbuddies;
+} S;
+
+typedef struct {
+     plan_dft super;
+     plan *cldr, *cldc;
+     const S *solver;
+} P;
+
+static void apply_r2hc(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+	  plan_rdft2 *cldr = (plan_rdft2 *) ego->cldr;
+	  cldr->apply((plan *) cldr, r, rio, iio);
+     }
+     
+     {
+	  plan_dft *cldc = (plan_dft *) ego->cldc;
+	  cldc->apply((plan *) cldc, rio, iio, rio, iio);
+     }
+}
+
+static void apply_hc2r(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+	  plan_dft *cldc = (plan_dft *) ego->cldc;
+	  cldc->apply((plan *) cldc, iio, rio, iio, rio);
+     }
+
+     {
+	  plan_rdft2 *cldr = (plan_rdft2 *) ego->cldr;
+	  cldr->apply((plan *) cldr, r, rio, iio);
+     }
+     
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cldr, flg);
+     AWAKE(ego->cldc, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cldr);
+     X(plan_destroy_internal)(ego->cldc);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->solver;
+     p->print(p, "(rdft2-rank>=2/%d%(%p%)%(%p%))", 
+	      s->spltrnk, ego->cldr, ego->cldc);
+}
+ 
+static int picksplit(const S *ego, const tensor *sz, int *rp)
+{
+     A(sz->rnk > 1); /* cannot split rnk <= 1 */
+     if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp))
+          return 0;
+     *rp += 1; /* convert from dim. index to rank */
+     if (*rp >= sz->rnk) /* split must reduce rank */
+          return 0;
+     return 1;
+}
+
+static int applicable0(const solver *ego_, const problem *p_, int *rp,
+		       const planner *plnr)
+{
+     if (RDFT2P(p_)) {
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  const S *ego = (const S *)ego_;
+          return (1
+                  && p->sz->rnk >= 2
+		  && picksplit(ego, p->sz, rp)
+                  && (0
+
+		      /* can work out-of-place, but HC2R destroys input */
+                      || (p->r != p->rio && p->r != p->iio && 
+			  (p->kind == R2HC || DESTROY_INPUTP(plnr)))
+
+		      /* FIXME: what are sufficient conditions for inplace? */
+                      || (!(p->r != p->rio && p->r != p->iio))
+		       )
+	       );
+     }
+
+     return 0;
+}
+
+/* TODO: revise this. */
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr, int *rp)
+{
+     const S *ego = (const S *)ego_;
+
+     if (!applicable0(ego_, p_, rp, plnr)) return 0;
+
+     /* fixed spltrnk (unlike fftw2's spltrnk=1, default buddies[0] is
+        spltrnk=0, which is an asymptotic "theoretical optimum" for
+        an ideal cache; it's equivalent to spltrnk=1 for rnk < 4). */
+     if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0]))
+          return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  const problem_rdft2 *p = (const problem_rdft2 *) p_;
+
+	  /* Heuristic: if the vector stride is greater than the transform
+	     sz, don't use (prefer to do the vector loop first with a
+	     vrank-geq1 plan). */
+	  if (p->vecsz->rnk > 0 &&
+	      X(tensor_min_stride)(p->vecsz) 
+	      > X(rdft2_tensor_max_index)(p->sz, p->kind))
+	       return 0;
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_rdft2 *p;
+     P *pln;
+     plan *cldr = 0, *cldc = 0;
+     tensor *sz1, *sz2, *vecszi, *sz2i;
+     int spltrnk;
+     inplace_kind k;
+     problem *cldp;
+
+     static const plan_adt padt = {
+	  X(rdft2_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr, &spltrnk))
+          return (plan *) 0;
+
+     p = (const problem_rdft2 *) p_;
+     X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);
+
+     k = p->kind == R2HC ? INPLACE_OS : INPLACE_IS;
+     vecszi = X(tensor_copy_inplace)(p->vecsz, k);
+     sz2i = X(tensor_copy_inplace)(sz2, k);
+
+     /* complex data is ~half of real */
+     sz2i->dims[sz2i->rnk - 1].n = sz2i->dims[sz2i->rnk - 1].n/2 + 1;
+
+     cldr = X(mkplan_d)(plnr, 
+		       X(mkproblem_rdft2_d)(X(tensor_copy)(sz2),
+					    X(tensor_append)(p->vecsz, sz1),
+					    p->r, p->rio, p->iio, p->kind));
+     if (!cldr) goto nada;
+
+     if (p->kind == R2HC)
+	  cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k),
+				    X(tensor_append)(vecszi, sz2i),
+				    p->rio, p->iio, p->rio, p->iio);
+     else /* HC2R must swap re/im parts to get IDFT */
+	  cldp = X(mkproblem_dft_d)(X(tensor_copy_inplace)(sz1, k),
+				    X(tensor_append)(vecszi, sz2i),
+				    p->iio, p->rio, p->iio, p->rio);
+     cldc = X(mkplan_d)(plnr, cldp);
+     if (!cldc) goto nada;
+
+     pln = MKPLAN_RDFT2(P, &padt, p->kind == R2HC ? apply_r2hc : apply_hc2r);
+
+     pln->cldr = cldr;
+     pln->cldc = cldc;
+
+     pln->solver = ego;
+     X(ops_add)(&cldr->ops, &cldc->ops, &pln->super.super.ops);
+
+     X(tensor_destroy4)(sz2i, vecszi, sz2, sz1);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cldr);
+     X(plan_destroy_internal)(cldc);
+     X(tensor_destroy4)(sz2i, vecszi, sz2, sz1);
+     return (plan *) 0;
+}
+
+static solver *mksolver(int spltrnk, const int *buddies, int nbuddies)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->spltrnk = spltrnk;
+     slv->buddies = buddies;
+     slv->nbuddies = nbuddies;
+     return &(slv->super);
+}
+
+void X(rdft2_rank_geq2_register)(planner *p)
+{
+     int i;
+     static const int buddies[] = { 0, 1, -2 };
+
+     const int nbuddies = sizeof(buddies) / sizeof(buddies[0]);
+
+     for (i = 0; i < nbuddies; ++i)
+          REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies));
+
+     /* FIXME: Should we try more buddies?  See also dft/rank-geq2. */
+}
diff --git a/src/fftw3/rdft/rank0-rdft2.c b/src/fftw3/rdft/rank0-rdft2.c
new file mode 100644
index 0000000..5a2a649
--- /dev/null
+++ b/src/fftw3/rdft/rank0-rdft2.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rank0-rdft2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* plans for rank-0 RDFT2 (copy operations, plus setting 0 imag. parts) */
+
+#include "rdft.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>		/* for memcpy() */
+#endif
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     int vl;
+     int ivs, ovs;
+     plan *cldcpy;
+} P;
+
+static int applicable(const problem *p_)
+{
+     if (RDFT2P(p_)) {
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+          return (1
+                  && p->sz->rnk == 0
+		  && (p->kind == HC2R
+		      || (((p->r != p->rio && p->r != p->iio)
+			   || X(rdft2_inplace_strides)(p, RNK_MINFTY))
+			  && p->vecsz->rnk <= 1))
+	       );
+     }
+     return 0;
+}
+
+static void apply_r2hc(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+
+     for (i = 4; i <= vl; i += 4) {
+          R r0, r1, r2, r3;
+          r0 = *r; r += ivs;
+          r1 = *r; r += ivs;
+          r2 = *r; r += ivs;
+          r3 = *r; r += ivs;
+          *rio = r0; rio += ovs;
+	  *iio = 0.0; iio += ovs;
+          *rio = r1; rio += ovs;
+	  *iio = 0.0; iio += ovs;
+          *rio = r2; rio += ovs;
+	  *iio = 0.0; iio += ovs;
+	  *rio = r3; rio += ovs;
+	  *iio = 0.0; iio += ovs;
+     }
+     for (; i < vl + 4; ++i) {
+          R r0;
+          r0 = *r; r += ivs;
+          *rio = r0; rio += ovs;
+	  *iio = 0.0; iio += ovs;
+     }
+}
+
+/* in-place r2hc rank-0: set imaginary parts of output to 0 */
+static void apply_r2hc_inplace(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ovs = ego->ovs;
+
+     UNUSED(r);
+     UNUSED(rio);
+     for (i = 4; i <= vl; i += 4) {
+	  *iio = 0.0; iio += ovs;
+	  *iio = 0.0; iio += ovs;
+	  *iio = 0.0; iio += ovs;
+	  *iio = 0.0; iio += ovs;
+     }
+     for (; i < vl + 4; ++i) {
+	  *iio = 0.0; iio += ovs;
+     }
+}
+
+/* a rank-0 HC2R rdft2 problem is just a copy from rio to r,
+   so we can use a rank-0 rdft plan */
+static void apply_hc2r(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
+     UNUSED(iio);
+     cldcpy->apply((plan *) cldcpy, rio, r);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     if (ego->cldcpy)
+	  AWAKE(ego->cldcpy, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     if (ego->cldcpy)
+	  X(plan_destroy_internal)(ego->cldcpy);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     if (ego->cldcpy)
+	  p->print(p, "(rdft2-hc2r-rank0%(%p%))", ego->cldcpy);
+     else
+	  p->print(p, "(rdft2-r2hc-rank0%v)", ego->vl);
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const problem_rdft2 *p;
+     plan *cldcpy = (plan *) 0;
+     P *pln;
+
+     static const plan_adt padt = {
+	  X(rdft2_solve), awake, print, destroy
+     };
+
+     UNUSED(ego_);
+
+     if (!applicable(p_))
+          return (plan *) 0;
+
+     p = (const problem_rdft2 *) p_;
+
+     if (p->kind == HC2R) {
+	  cldcpy = X(mkplan_d)(plnr,
+			       X(mkproblem_rdft_d)(
+				    X(mktensor_0d)(),
+				    X(tensor_copy)(p->vecsz),
+				    p->rio, p->r, (rdft_kind *) 0));
+	  if (!cldcpy) return (plan *) 0;
+     }
+
+     pln = MKPLAN_RDFT2(P, &padt, 
+			p->kind == R2HC ? 
+			(p->r == p->rio ? apply_r2hc_inplace : apply_r2hc) 
+			: apply_hc2r);
+     
+     if (p->kind == R2HC)
+	  X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     pln->cldcpy = cldcpy;
+
+     if (p->kind == R2HC) {
+	  /* vl loads, 2*vl stores */
+	  X(ops_other)(3 * pln->vl, &pln->super.super.ops);
+     }
+     else {
+	  pln->super.super.ops = cldcpy->ops;
+     }
+
+     return &(pln->super.super);
+}
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(rdft2_rank0_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/rbuffered.c b/src/fftw3/rdft/rbuffered.c
new file mode 100644
index 0000000..19ccc7b
--- /dev/null
+++ b/src/fftw3/rdft/rbuffered.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rbuffered.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+typedef struct {
+     int nbuf;
+     int maxbufsz;
+     int skew_alignment;
+     int skew;
+     const char *nam;
+} bufadt;
+
+typedef struct {
+     solver super;
+     const bufadt *adt;
+} S;
+
+typedef struct {
+     plan_rdft super;
+
+     plan *cld, *cldcpy, *cldrest;
+     int n, vl, nbuf, bufdist;
+     int ivs, ovs;
+
+     const S *slv;
+} P;
+
+/* transform a vector input with the help of bufs */
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     plan_rdft *cld = (plan_rdft *) ego->cld;
+     plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
+     plan_rdft *cldrest;
+     int i, vl = ego->vl, nbuf = ego->nbuf;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *bufs;
+
+     bufs = (R *)MALLOC(sizeof(R) * nbuf * ego->bufdist, BUFFERS);
+
+     for (i = nbuf; i <= vl; i += nbuf) {
+          /* transform to bufs: */
+          cld->apply((plan *) cld, I, bufs);
+	  I += ivs;
+
+          /* copy back */
+          cldcpy->apply((plan *) cldcpy, bufs, O);
+	  O += ovs;
+     }
+
+     /* Do the remaining transforms, if any: */
+     cldrest = (plan_rdft *) ego->cldrest;
+     cldrest->apply((plan *) cldrest, I, O);
+
+     X(ifree)(bufs);
+}
+
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+
+     AWAKE(ego->cld, flg);
+     AWAKE(ego->cldcpy, flg);
+     AWAKE(ego->cldrest, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cldrest);
+     X(plan_destroy_internal)(ego->cldcpy);
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s-%d%v/%d-%d%(%p%)%(%p%)%(%p%))",
+              ego->slv->adt->nam,
+              ego->n, ego->nbuf,
+              ego->vl, ego->bufdist % ego->n,
+              ego->cld, ego->cldcpy, ego->cldrest);
+}
+
+
+static int compute_nbuf(int n, int vl, const S *ego)
+{
+     return X(compute_nbuf)(n, vl, ego->adt->nbuf, ego->adt->maxbufsz);
+}
+
+static int toobig(int n, const S *ego)
+{
+     return (n > ego->adt->maxbufsz);
+}
+
+static int applicable0(const problem *p_, const S *ego, const planner *plnr)
+{
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          iodim *d = p->sz->dims;
+
+          if (1
+	      && p->vecsz->rnk <= 1
+	      && p->sz->rnk == 1
+	       ) {
+
+	       if (toobig(p->sz->dims[0].n, ego) && CONSERVE_MEMORYP(plnr))
+		    return 0;
+
+               /*
+		 In principle, the buffered transforms might be useful
+		 when working out of place.  However, in order to
+		 prevent infinite loops in the planner, we require
+		 that the output stride of the buffered transforms be
+		 greater than 1.
+               */
+               if (p->I != p->O)
+                    return (d[0].os > 1);
+
+               /* We can always do a single transform in-place */
+               if (p->vecsz->rnk == 0)
+                    return 1;
+
+               /*
+		* If the problem is in place, the input/output strides must
+		* be the same or the whole thing must fit in the buffer.
+		*/
+               return ((X(tensor_inplace_strides2)(p->sz, p->vecsz))
+                       || (compute_nbuf(d[0].n, p->vecsz->dims[0].n, ego)
+                           == p->vecsz->dims[0].n));
+          }
+     }
+     return 0;
+}
+
+static int applicable(const problem *p_, const S *ego, const planner *plnr)
+{
+     const problem_rdft *p;
+
+     if (NO_BUFFERINGP(plnr)) return 0;
+     if (!applicable0(p_, ego, plnr)) return 0;
+
+     p = (const problem_rdft *) p_;
+     if (NO_UGLYP(plnr)) {
+	  if (p->I != p->O) return 0;
+	  if (toobig(p->sz->dims[0].n, ego)) return 0;
+     }
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const bufadt *adt = ego->adt;
+     P *pln;
+     plan *cld = (plan *) 0;
+     plan *cldcpy = (plan *) 0;
+     plan *cldrest = (plan *) 0;
+     const problem_rdft *p = (const problem_rdft *) p_;
+     R *bufs = (R *) 0;
+     int nbuf = 0, bufdist, n, vl;
+     int ivs, ovs;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+
+     if (!applicable(p_, ego, plnr))
+          goto nada;
+
+     n = X(tensor_sz)(p->sz);
+     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+
+     nbuf = compute_nbuf(n, vl, ego);
+     A(nbuf > 0);
+
+     /*
+      * Determine BUFDIST, the offset between successive array bufs.
+      * bufdist = n + skew, where skew is chosen such that bufdist %
+      * skew_alignment = skew.
+      */
+     if (vl == 1) {
+          bufdist = n;
+     } else {
+          bufdist =
+               n + ((adt->skew_alignment + adt->skew - n % adt->skew_alignment)
+                    % adt->skew_alignment);
+          A(p->vecsz->rnk == 1);
+     }
+
+     /* initial allocation for the purpose of planning */
+     bufs = (R *) MALLOC(sizeof(R) * nbuf * bufdist, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_rdft_d)(
+			    X(mktensor_1d)(n, p->sz->dims[0].is, 1),
+			    X(mktensor_1d)(nbuf, ivs, bufdist),
+			    TAINT(p->I, ivs * nbuf), bufs, p->kind));
+     if (!cld) goto nada;
+
+     /* copying back from the buffer is a rank-0 transform: */
+     cldcpy = X(mkplan_d)(plnr, 
+			  X(mkproblem_rdft_d)(
+			       X(mktensor_0d)(),
+			       X(mktensor_2d)(nbuf, bufdist, ovs,
+					      n, 1, p->sz->dims[0].os),
+			       bufs, TAINT(p->O, ovs * nbuf), 
+			       (rdft_kind *) 0));
+     if (!cldcpy) goto nada;
+
+     /* deallocate buffers, let apply() allocate them for real */
+     X(ifree)(bufs);
+     bufs = 0;
+
+     /* plan the leftover transforms (cldrest): */
+     {
+	  int id = ivs * (nbuf * (vl / nbuf));
+	  int od = ovs * (nbuf * (vl / nbuf));
+	  cldrest = X(mkplan_d)(plnr, 
+				X(mkproblem_rdft_d)(
+				     X(tensor_copy)(p->sz),
+				     X(mktensor_1d)(vl % nbuf, ivs, ovs),
+				     p->I + id, p->O + od, p->kind));
+     }
+     if (!cldrest) goto nada;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+     pln->cld = cld;
+     pln->cldcpy = cldcpy;
+     pln->cldrest = cldrest;
+     pln->slv = ego;
+     pln->n = n;
+     pln->vl = vl;
+     pln->ivs = ivs * nbuf;
+     pln->ovs = ovs * nbuf;
+
+     pln->nbuf = nbuf;
+     pln->bufdist = bufdist;
+
+     {
+	  opcnt t;
+	  X(ops_add)(&cld->ops, &cldcpy->ops, &t);
+	  X(ops_madd)(vl / nbuf, &t, &cldrest->ops, &pln->super.super.ops);
+     }
+
+     return &(pln->super.super);
+
+ nada:
+     X(ifree0)(bufs);
+     X(plan_destroy_internal)(cldrest);
+     X(plan_destroy_internal)(cldcpy);
+     X(plan_destroy_internal)(cld);
+     return (plan *) 0;
+}
+
+static solver *mksolver(const bufadt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+
+void X(rdft_buffered_register)(planner *p)
+{
+     /* FIXME: what are good defaults? */
+     static const bufadt adt = {
+	  /* nbuf */           8,
+	  /* maxbufsz */       (65536 / sizeof(R)),
+	  /* skew_alignment */ 8,
+	  /* skew */           5,
+	  /* nam */            "rdft-buffered"
+     };
+
+     REGISTER_SOLVER(p, mksolver(&adt));
+}
diff --git a/src/fftw3/rdft/rconf.c b/src/fftw3/rdft/rconf.c
new file mode 100644
index 0000000..2c65d4c
--- /dev/null
+++ b/src/fftw3/rdft/rconf.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rconf.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+static const solvtab s =
+{
+     SOLVTAB(X(rdft_indirect_register)),
+     SOLVTAB(X(rdft_rank0_register)),
+     SOLVTAB(X(rdft_vrank_geq1_register)),
+     /* 
+	SOLVTAB(X(rdft_vrank2_transpose_register)),
+	SOLVTAB(X(rdft_vrank3_transpose_register)),
+     */
+     SOLVTAB(X(rdft_nop_register)),
+     SOLVTAB(X(rdft_buffered_register)),
+     SOLVTAB(X(rdft_rader_hc2hc_register)),
+     SOLVTAB(X(rdft_generic_register)),
+     SOLVTAB(X(rdft_rank_geq2_register)),
+
+     SOLVTAB(X(dft_r2hc_register)),
+
+     SOLVTAB(X(rdft_dht_register)),
+     SOLVTAB(X(dht_r2hc_register)),
+     SOLVTAB(X(dht_rader_register)),
+
+     SOLVTAB(X(rdft2_vrank_geq1_register)),
+     SOLVTAB(X(rdft2_nop_register)),
+     SOLVTAB(X(rdft2_rank0_register)),
+     SOLVTAB(X(rdft2_buffered_register)),
+     SOLVTAB(X(rdft2_rank_geq2_register)),
+     SOLVTAB(X(rdft2_radix2_register)),
+
+     SOLVTAB_END
+};
+
+void X(rdft_conf_standard)(planner *p)
+{
+     X(solvtab_exec)(s, p);
+     X(solvtab_exec)(X(solvtab_rdft_r2hc), p);
+     X(solvtab_exec)(X(solvtab_rdft_hc2r), p);
+     X(solvtab_exec)(X(solvtab_rdft_r2r), p);
+}
diff --git a/src/fftw3/rdft/rdft-dht.c b/src/fftw3/rdft/rdft-dht.c
new file mode 100644
index 0000000..f384fef
--- /dev/null
+++ b/src/fftw3/rdft/rdft-dht.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rdft-dht.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* Solve an R2HC/HC2R problem via post/pre processing of a DHT.  This
+   is mainly useful because we can use Rader to compute DHTs of prime
+   sizes.  It also allows us to express hc2r problems in terms of r2hc
+   (via dht-r2hc), and to do hc2r problems without destroying the input. */
+
+#include "rdft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     int is, os;
+     int n;
+} P;
+
+static void apply_r2hc(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int os;
+     int i, n;
+
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+
+     n = ego->n;
+     os = ego->os;
+     for (i = 1; i < n - i; ++i) {
+	  E a, b;
+	  a = K(0.5) * O[os * i];
+	  b = K(0.5) * O[os * (n - i)];
+	  O[os * i] = a + b;
+#if FFT_SIGN == -1
+	  O[os * (n - i)] = b - a;
+#else
+	  O[os * (n - i)] = a - b;
+#endif
+     }
+}
+
+/* hc2r, destroying input as usual */
+static void apply_hc2r(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is;
+     int i, n = ego->n;
+
+     for (i = 1; i < n - i; ++i) {
+	  E a, b;
+	  a = I[is * i];
+	  b = I[is * (n - i)];
+#if FFT_SIGN == -1
+	  I[is * i] = a - b;
+	  I[is * (n - i)] = a + b;
+#else
+	  I[is * i] = a + b;
+	  I[is * (n - i)] = a - b;
+#endif
+     }
+
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+}
+
+/* hc2r, without destroying input */
+static void apply_hc2r_save(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+
+     O[0] = I[0];
+     for (i = 1; i < n - i; ++i) {
+	  E a, b;
+	  a = I[is * i];
+	  b = I[is * (n - i)];
+#if FFT_SIGN == -1
+	  O[os * i] = a - b;
+	  O[os * (n - i)] = a + b;
+#else
+	  O[os * i] = a + b;
+	  O[os * (n - i)] = a - b;
+#endif
+     }
+     if (i == n - i)
+	  O[os * i] = I[is * i];
+
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, O, O);
+     }
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s-dht-%d%(%p%))", 
+	      ego->super.apply == apply_r2hc ? "r2hc" : "hc2r",
+	      ego->n, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk == 0
+		  && (p->kind[0] == R2HC || p->kind[0] == HC2R)
+
+		  /* hack: size-2 DHT etc. are defined as being equivalent
+		     to size-2 R2HC in problem.c, so we need this to prevent
+		     infinite loops for size 2 in EXHAUSTIVE mode: */
+		  && p->sz->dims[0].n > 2
+	       );
+     }
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p_, 
+		      const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p_));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     problem *cldp;
+     plan *cld;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     if (p->kind[0] == R2HC || DESTROY_INPUTP(plnr))
+	  cldp = X(mkproblem_rdft_1)(p->sz, p->vecsz, p->I, p->O, DHT);
+     else {
+	  tensor *sz = X(tensor_copy_inplace)(p->sz, INPLACE_OS);
+	  cldp = X(mkproblem_rdft_1)(sz, p->vecsz, p->O, p->O, DHT);
+	  X(tensor_destroy)(sz);
+     }
+     cld = X(mkplan_d)(plnr, cldp);
+     if (!cld) return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, p->kind[0] == R2HC ? 
+		       apply_r2hc : (DESTROY_INPUTP(plnr) ?
+				     apply_hc2r : apply_hc2r_save));
+     pln->n = p->sz->dims[0].n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     
+     pln->super.super.ops = cld->ops;
+     pln->super.super.ops.other += 4 * ((pln->n - 1)/2);
+     pln->super.super.ops.add += 2 * ((pln->n - 1)/2);
+     if (p->kind[0] == R2HC)
+	  pln->super.super.ops.mul += 2 * ((pln->n - 1)/2);
+     if (pln->super.apply == apply_hc2r_save)
+	  pln->super.super.ops.other += 2 + (pln->n % 2 ? 0 : 2);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(rdft_dht_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/rdft.h b/src/fftw3/rdft/rdft.h
new file mode 100644
index 0000000..375b278
--- /dev/null
+++ b/src/fftw3/rdft/rdft.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __RDFT_H__
+#define __RDFT_H__
+
+#include "ifftw.h"
+#include "codelet-rdft.h"
+
+/* problem.c: */
+typedef struct {
+     problem super;
+     tensor *sz, *vecsz;
+     R *I, *O;
+#if defined(STRUCT_HACK_KR)
+     rdft_kind kind[1];
+#elif defined(STRUCT_HACK_C99)
+     rdft_kind kind[];
+#else
+     rdft_kind *kind;
+#endif
+} problem_rdft;
+
+int X(problem_rdft_p)(const problem *p);
+#define RDFTP X(problem_rdft_p)  /* shorthand */
+
+void X(rdft_zerotens)(tensor *sz, R *I);
+problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz,
+			   R *I, R *O, const rdft_kind *kind);
+problem *X(mkproblem_rdft_d)(tensor *sz, tensor *vecsz,
+			     R *I, R *O, const rdft_kind *kind);
+problem *X(mkproblem_rdft_1)(const tensor *sz, const tensor *vecsz,
+			     R *I, R *O, rdft_kind kind);
+problem *X(mkproblem_rdft_1_d)(tensor *sz, tensor *vecsz,
+			       R *I, R *O, rdft_kind kind);
+
+const char *X(rdft_kind_str)(rdft_kind kind);
+
+/* solve.c: */
+void X(rdft_solve)(const plan *ego_, const problem *p_);
+
+/* plan.c: */
+typedef void (*rdftapply) (const plan *ego, R *I, R *O);
+
+typedef struct {
+     plan super;
+     rdftapply apply;
+} plan_rdft;
+
+plan *X(mkplan_rdft)(size_t size, const plan_adt *adt, rdftapply apply);
+
+#define MKPLAN_RDFT(type, adt, apply) \
+  (type *)X(mkplan_rdft)(sizeof(type), adt, apply)
+
+/* various solvers */
+
+solver *X(mksolver_rdft_r2hc_direct)(kr2hc k, const kr2hc_desc *desc);
+solver *X(mksolver_rdft_hc2r_direct)(khc2r k, const khc2r_desc *desc);
+solver *X(mksolver_rdft_r2r_direct)(kr2r k, const kr2r_desc *desc);
+solver *X(mksolver_rdft_hc2hc_dit)(khc2hc k, const hc2hc_desc *desc);
+solver *X(mksolver_rdft_hc2hc_ditbuf)(khc2hc k, const hc2hc_desc *desc);
+solver *X(mksolver_rdft_hc2hc_dif)(khc2hc k, const hc2hc_desc *desc);
+solver *X(mksolver_rdft_hc2hc_difbuf)(khc2hc k, const hc2hc_desc *desc);
+
+extern void (*X(khc2hc_dit_register_hook))(planner *, khc2hc, const hc2hc_desc *);
+extern void (*X(khc2hc_dif_register_hook))(planner *, khc2hc, const hc2hc_desc *);
+
+void X(rdft_rank0_register)(planner *p);
+void X(rdft_rank_geq2_register)(planner *p);
+void X(rdft_indirect_register)(planner *p);
+void X(rdft_vrank_geq1_register)(planner *p);
+void X(rdft_vrank2_transpose_register)(planner *p);
+void X(rdft_vrank3_transpose_register)(planner *p);
+void X(rdft_buffered_register)(planner *p);
+void X(rdft_generic_register)(planner *p);
+void X(rdft_rader_hc2hc_register)(planner *p);
+void X(rdft_dht_register)(planner *p);
+void X(dht_r2hc_register)(planner *p);
+void X(dht_rader_register)(planner *p);
+void X(dft_r2hc_register)(planner *p);
+void X(rdft_nop_register)(planner *p);
+
+/****************************************************************************/
+/* problem2.c: */
+/* an RDFT2 problem transforms a 1d real array r[n] with stride is/os
+   to/from an "unpacked" complex array {rio,iio}[n/2 + 1] with stride
+   os/is.  Multidimensional transforms use complex DFTs for the
+   noncontiguous dimensions.  vecsz has the usual interpretation.  */
+typedef struct {
+     problem super;
+     tensor *sz;
+     tensor *vecsz;
+     R *r, *rio, *iio;
+     rdft_kind kind; /* R2HC or HC2R */
+} problem_rdft2;
+
+int X(problem_rdft2_p)(const problem *p);
+#define RDFT2P X(problem_rdft2_p)  /* shorthand */
+
+problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz,
+			    R *r, R *rio, R *iio, rdft_kind kind);
+problem *X(mkproblem_rdft2_d)(tensor *sz, tensor *vecsz,
+			      R *r, R *rio, R *iio, rdft_kind kind);
+int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim);
+int X(rdft2_tensor_max_index)(const tensor *sz, rdft_kind k);
+void X(rdft2_strides)(rdft_kind kind, const iodim *d, int *is, int *os);
+
+/* verify.c: */
+void X(rdft2_verify)(plan *pln, const problem_rdft2 *p, int rounds);
+
+/* solve.c: */
+void X(rdft2_solve)(const plan *ego_, const problem *p_);
+
+/* plan.c: */
+typedef void (*rdft2apply) (const plan *ego, R *r, R *rio, R *iio);
+
+typedef struct {
+     plan super;
+     rdft2apply apply;
+} plan_rdft2;
+
+plan *X(mkplan_rdft2)(size_t size, const plan_adt *adt, rdft2apply apply);
+
+#define MKPLAN_RDFT2(type, adt, apply) \
+  (type *)X(mkplan_rdft2)(sizeof(type), adt, apply)
+
+/* various solvers */
+
+solver *X(mksolver_rdft2_r2hc_direct)(kr2hc k, const kr2hc_desc *desc);
+solver *X(mksolver_rdft2_hc2r_direct)(khc2r k, const khc2r_desc *desc);
+
+void X(rdft2_vrank_geq1_register)(planner *p);
+void X(rdft2_buffered_register)(planner *p);
+void X(rdft2_nop_register)(planner *p);
+void X(rdft2_rank0_register)(planner *p);
+void X(rdft2_rank_geq2_register)(planner *p);
+void X(rdft2_radix2_register)(planner *p);
+
+/****************************************************************************/
+
+/* configurations */
+void X(rdft_conf_standard)(planner *p);
+
+#endif /* __RDFT_H__ */
diff --git a/src/fftw3/rdft/rdft2-inplace-strides.c b/src/fftw3/rdft/rdft2-inplace-strides.c
new file mode 100644
index 0000000..2171672
--- /dev/null
+++ b/src/fftw3/rdft/rdft2-inplace-strides.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rdft2-inplace-strides.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+/* Check if the vecsz/sz strides are consistent with the problem
+   being in-place for vecsz.dim[vdim], or for all dimensions
+   if vdim == RNK_MINFTY.  We can't just use tensor_inplace_strides
+   because rdft transforms have the unfortunate property of
+   differing input and output sizes.   This routine is not
+   exhaustive; we only return 1 for the most common case.  */
+int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim)
+{
+     int N, Nc;
+     int is, os;
+     int i;
+     
+     for (i = 0; i + 1 < p->sz->rnk; ++i)
+	  if (p->sz->dims[i].is != p->sz->dims[i].os)
+	       return 0;
+
+     if (!FINITE_RNK(p->vecsz->rnk) || p->vecsz->rnk == 0)
+	  return 1;
+     if (!FINITE_RNK(vdim)) { /* check all vector dimensions */
+	  for (vdim = 0; vdim < p->vecsz->rnk; ++vdim)
+	       if (!X(rdft2_inplace_strides)(p, vdim))
+		    return 0;
+	  return 1;
+     }
+
+     A(vdim < p->vecsz->rnk);
+     if (p->sz->rnk == 0)
+	  return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os);
+
+     N = X(tensor_sz)(p->sz);
+     Nc = (N / p->sz->dims[p->sz->rnk-1].n) *
+	  (p->sz->dims[p->sz->rnk-1].n/2 + 1);
+     X(rdft2_strides)(p->kind, p->sz->dims + p->sz->rnk - 1, &is, &os);
+     return(p->vecsz->dims[vdim].is == p->vecsz->dims[vdim].os
+	    && X(iabs)(p->vecsz->dims[vdim].os)
+	    >= X(imax)(Nc * X(iabs)(os), N * X(iabs)(is)));
+}
diff --git a/src/fftw3/rdft/rdft2-radix2.c b/src/fftw3/rdft/rdft2-radix2.c
new file mode 100644
index 0000000..280d642
--- /dev/null
+++ b/src/fftw3/rdft/rdft2-radix2.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rdft2-radix2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/*
+  Compute RDFT2 of even size via either a DFT or a vector RDFT of
+  size n/2.
+
+  This file is meant as a temporary hack until we do the right thing.
+
+  The right thing is: 1) get rid of reduction to DFT, and 2) implement
+  arbitrary even-radix reduction to RDFT.  We currently reduce to DFT
+  so as to exploit the SIMD code.  We currently do only radix-2 in
+  order to avoid generating yet another set of codelets.
+*/
+
+#include "rdft.h"
+#include "dft.h"
+
+typedef struct {
+     int (*applicable) (const problem *p_, const planner *plnr);
+     void (*apply) (const plan *ego_, R *r, R *rio, R *iio);
+     problem *(*mkcld) (const problem_rdft2 *p);
+     opcnt ops;
+     const char *nam;
+} madt;
+
+typedef struct {
+     solver super;
+     const madt *adt;
+} S;
+
+typedef struct {
+     plan_dft super;
+     plan *cld;
+     twid *td;
+     int is, os, ivs, ovs;
+     int n, vl;
+     const S *slv;
+} P;
+
+/* common applicability function of forward problems */
+static int applicable_f(const problem *p_, const planner *plnr)
+{
+     UNUSED(plnr);
+     if (RDFT2P(p_)) {
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+          return (1
+                  && p->kind == R2HC
+                  && p->vecsz->rnk <= 1
+                  && p->sz->rnk == 1
+		  && (p->sz->dims[0].n % 2) == 0
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable_f_dft(const problem *p_, const planner *plnr)
+{
+     UNUSED(plnr);
+     if (applicable_f(p_, plnr)) {
+	  const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  return(p->r != p->rio
+		 || (p->iio == p->rio + p->sz->dims[0].is
+		     && p->sz->dims[0].os == 2 * p->sz->dims[0].is));
+     }
+     return 0;
+}
+
+/* common applicability function of backward problems */
+static int applicable_b(const problem *p_, const planner *plnr)
+{
+     if (RDFT2P(p_)) {
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+          return (1
+                  && p->kind == HC2R
+		  && (p->r == p->rio || DESTROY_INPUTP(plnr))
+                  && p->vecsz->rnk <= 1
+                  && p->sz->rnk == 1
+		  && (p->sz->dims[0].n % 2) == 0
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable_b_dft(const problem *p_, const planner *plnr)
+{
+     UNUSED(plnr);
+     if (applicable_b(p_, plnr)) {
+	  const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  return(p->r != p->rio
+		 || (p->iio == p->rio + p->sz->dims[0].os
+		     && p->sz->dims[0].is == 2 * p->sz->dims[0].os));
+     }
+     return 0;
+}
+
+/*
+ * forward rdft2 via dft
+ */
+static void k_f_dft(R *rio, R *iio, const R *W, int n, int dist)
+{
+     int i;
+     R *pp = rio, *pm = rio + n * dist;
+     int im = iio - rio;
+
+     /* i = 0 and i = n */
+     {
+          E rop = pp[0], iop = pp[im];
+          pp[0] = rop + iop;
+          pm[0] = rop - iop;
+          pp[im] = K(0.0);
+          pm[im] = K(0.0);
+	  pp += dist; pm -= dist;
+     }
+
+     /* middle elements */
+     for (W += 2, i = 2; i < n; i += 2, W += 2) {
+          E rop = pp[0], iop = pp[im], rom = pm[0], iom = pm[im];
+          E wr = W[0], wi = W[1];
+          E re = rop + rom;
+          E ie = iop - iom;
+          E rd = rom - rop;
+          E id = iop + iom;
+          E tr = rd * wr - id * wi;
+          E ti = id * wr + rd * wi;
+          pp[0] = K(0.5) * (re + ti);
+          pp[im] = K(0.5) * (ie + tr);
+          pm[0] = K(0.5) * (re - ti);
+          pm[im] = K(0.5) * (tr - ie);
+	  pp += dist; pm -= dist;
+     }
+
+     /* i = n/2 when n is even */
+     if (!(n & 1)) pp[im] = -pp[im];
+}
+
+static void apply_f_dft(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          /* transform input as a vector of complex numbers */
+          plan_dft *cld = (plan_dft *) ego->cld;
+          cld->apply((plan *) cld, r, r + ego->is, rio, iio);
+     }
+
+     {
+          int i, vl = ego->vl, n2 = ego->n / 2;
+          int ovs = ego->ovs, os = ego->os;
+          const R *W = ego->td->W;
+          for (i = 0; i < vl; ++i, rio += ovs, iio += ovs)
+               k_f_dft(rio, iio, W, n2, os);
+     }
+}
+
+static problem *mkcld_f_dft(const problem_rdft2 *p)
+{
+     const iodim *d = p->sz->dims;
+     return X(mkproblem_dft_d) (
+	  X(mktensor_1d)(d[0].n / 2, d[0].is * 2, d[0].os),
+	  X(tensor_copy)(p->vecsz),
+	  p->r, p->r + d[0].is, p->rio, p->iio);
+}
+
+static const madt adt_f_dft = {
+     applicable_f_dft, apply_f_dft, mkcld_f_dft, {10, 8, 0, 0}, "r2hc2-dft"
+};
+
+/*
+ * forward rdft2 via rdft
+ */
+static void k_f_rdft(R *rio, R *iio, const R *W, int n, int dist)
+{
+     int i;
+     R *pp = rio, *pm = rio + n * dist;
+     int im = iio - rio;
+
+     /* i = 0 and i = n */
+     {
+          E rop = pp[0], iop = pp[im];
+          pp[0] = rop + iop;
+          pm[0] = rop - iop;
+          pp[im] = K(0.0);
+          pm[im] = K(0.0);
+	  pp += dist; pm -= dist;
+     }
+
+     /* middle elements */
+     for (W += 2, i = 2; i < n; i += 2, W += 2) {
+          E r0 = pp[0], r1 = pp[im], i0 = pm[0], i1 = pm[im];
+          E wr = W[0], wi = W[1];
+          E tr = r1 * wr + i1 * wi;
+          E ti = i1 * wr - r1 * wi;
+          pp[0] = r0 + tr;
+          pp[im] = i0 + ti;
+          pm[0] = r0 - tr;
+          pm[im] = ti - i0;
+	  pp += dist; pm -= dist;
+     }
+
+     /* i = n/2 when n is even */
+     if (!(n & 1)) pp[im] = -pp[im];
+}
+
+static void apply_f_rdft(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          plan_rdft *cld = (plan_rdft *) ego->cld;
+          cld->apply((plan *) cld, r, rio);
+     }
+
+     {
+          int i, vl = ego->vl, n2 = ego->n / 2;
+          int ovs = ego->ovs, os = ego->os;
+          const R *W = ego->td->W;
+          for (i = 0; i < vl; ++i, rio += ovs, iio += ovs)
+               k_f_rdft(rio, iio, W, n2, os);
+     }
+}
+
+static problem *mkcld_f_rdft(const problem_rdft2 *p)
+{
+     const iodim *d = p->sz->dims;
+
+     tensor *radix = X(mktensor_1d)(2, d[0].is, p->iio - p->rio);
+     tensor *cld_vec = X(tensor_append)(radix, p->vecsz);
+     X(tensor_destroy)(radix);
+
+     return X(mkproblem_rdft_1_d) (
+	  X(mktensor_1d)(d[0].n / 2, 2 * d[0].is, d[0].os),
+	  cld_vec, p->r, p->rio, R2HC);
+}
+
+static const madt adt_f_rdft = {
+     applicable_f, apply_f_rdft, mkcld_f_rdft, {6, 4, 0, 0}, "r2hc2-rdft"
+};
+
+
+/*
+ * backward rdft2 via dft
+ */
+static void k_b_dft(R *rio, R *iio, const R *W, int n, int dist)
+{
+     int i;
+     R *pp = rio, *pm = rio + n * dist;
+     int im = iio - rio;
+
+     /* i = 0 and i = n */
+     {
+          E rop = pp[0], iop = pm[0];
+          pp[0] = rop + iop;
+          pp[im] = rop - iop;
+	  pp += dist; pm -= dist;
+     }
+
+     /* middle elements */
+     for (W += 2, i = 2; i < n; i += 2, W += 2) {
+          E a = pp[0], b = pp[im], c = pm[0], d = pm[im];
+          E wr = W[0], wi = W[1];
+	  E re = a + c, ti = a - c, ie = b - d, tr = b + d;
+	  E rd = tr * wr + ti * wi;
+	  E id = ti * wr - tr * wi;
+	  pp[0] = re - rd;
+	  pp[im] = ie + id;
+	  pm[0] = re + rd;
+	  pm[im] = id - ie;
+	  pp += dist; pm -= dist;
+     }
+
+     /* i = n/2 when n is even */
+     if (!(n & 1)) { pp[0] *= K(2.0); pp[im] *= -K(2.0); }
+}
+
+static void apply_b_dft(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     {
+          int i, vl = ego->vl, n2 = ego->n / 2;
+          int ivs = ego->ivs, is = ego->is;
+          const R *W = ego->td->W;
+	  R *rio1 = rio, *iio1 = iio;
+          for (i = 0; i < vl; ++i, rio1 += ivs, iio1 += ivs)
+               k_b_dft(rio1, iio1, W, n2, is);
+     }
+
+     {
+          plan_dft *cld = (plan_dft *) ego->cld;
+	  /* swap r/i because of backward transform */
+          cld->apply((plan *) cld, iio, rio, r + ego->os, r);
+     }
+}
+
+static problem *mkcld_b_dft(const problem_rdft2 *p)
+{
+     const iodim *d = p->sz->dims;
+
+     return X(mkproblem_dft_d) (
+	  X(mktensor_1d)(d[0].n / 2, d[0].is, 2 * d[0].os),
+	  X(tensor_copy)(p->vecsz),
+	  p->iio, p->rio, p->r + d[0].os, p->r);
+}
+
+static const madt adt_b_dft = {
+     applicable_b_dft, apply_b_dft, mkcld_b_dft, {10, 8, 0, 0}, "hc2r2-dft"
+};
+
+/*
+ * backward rdft2 via backward rdft
+ */
+static void k_b_rdft(R *rio, R *iio, const R *W, int n, int dist)
+{
+     int i;
+     R *pp = rio, *pm = rio + n * dist;
+     int im = iio - rio;
+
+     /* i = 0 and i = n */
+     {
+          E rop = pp[0], iop = pm[0];
+          pp[0] = rop + iop;
+          pp[im] = rop - iop;
+	  pp += dist; pm -= dist;
+     }
+
+     /* middle elements */
+     for (W += 2, i = 2; i < n; i += 2, W += 2) {
+          E a = pp[0], b = pp[im], c = pm[0], d = pm[im];
+          E wr = W[0], wi = W[1];
+	  E r0 = a + c, r1 = a - c, i0 = b - d, i1 = b + d;
+	  pp[0] = r0;
+	  pm[0] = i0;
+	  pp[im] = r1 * wr - i1 * wi;
+	  pm[im] = i1 * wr + r1 * wi;
+	  pp += dist; pm -= dist;
+     }
+
+     /* i = n/2 when n is even */
+     if (!(n & 1)) { pp[0] *= K(2.0); pp[im] *= -K(2.0); }
+}
+
+static void apply_b_rdft(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          int i, vl = ego->vl, n2 = ego->n / 2;
+          int ivs = ego->ivs, is = ego->is;
+          const R *W = ego->td->W;
+	  R *rio1 = rio, *iio1 = iio;
+          for (i = 0; i < vl; ++i, rio1 += ivs, iio1 += ivs)
+               k_b_rdft(rio1, iio1, W, n2, is);
+     }
+
+     {
+          plan_rdft *cld = (plan_rdft *) ego->cld;
+          cld->apply((plan *) cld, rio, r);
+     }
+}
+
+static problem *mkcld_b_rdft(const problem_rdft2 *p)
+{
+     const iodim *d = p->sz->dims;
+
+     tensor *radix = X(mktensor_1d)(2, p->iio - p->rio, d[0].os);
+     tensor *cld_vec = X(tensor_append)(radix, p->vecsz);
+     X(tensor_destroy)(radix);
+
+     return X(mkproblem_rdft_1_d) (
+	  X(mktensor_1d)(d[0].n / 2, d[0].is, 2 * d[0].os),
+	  cld_vec, p->rio, p->r, HC2R);
+}
+
+static const madt adt_b_rdft = {
+     applicable_b, apply_b_rdft, mkcld_b_rdft, {6, 4, 0, 0}, "hc2r2-rdft"
+};
+
+/*
+ * common stuff
+ */
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr twinstr[] = { {TW_FULL, 0, 2}, {TW_NEXT, 1, 0} };
+     AWAKE(ego->cld, flg);
+     X(twiddle_awake)(flg, &ego->td, twinstr, ego->n, 2, (ego->n / 2 + 1) / 2);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal) (ego->cld);
+}
+
+static void print(const plan *ego_, printer * p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s-%d%v%(%p%))", ego->slv->adt->nam,
+              ego->n, ego->vl, ego->cld);
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     P *pln;
+     const problem_rdft2 *p;
+     plan *cld;
+     const iodim *d;
+
+     static const plan_adt padt = {
+          X(rdft2_solve), awake, print, destroy
+     };
+
+     if (!ego->adt->applicable(p_, plnr))
+          return (plan *) 0;
+
+     p = (const problem_rdft2 *) p_;
+
+     cld = X(mkplan_d)(plnr, ego->adt->mkcld(p));
+     if (!cld) return (plan *) 0;
+
+     pln = MKPLAN_RDFT2(P, &padt, ego->adt->apply);
+
+     d = p->sz->dims;
+     pln->n = d[0].n;
+     pln->os = d[0].os;
+     pln->is = d[0].is;
+     X(tensor_tornk1) (p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     pln->cld = cld;
+     pln->td = 0;
+     pln->slv = ego;
+
+     /* approximately */
+     X(ops_madd)(pln->vl * ((pln->n/2 + 1) / 2), &ego->adt->ops,
+		 &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+static solver *mksolver(const madt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+void X(rdft2_radix2_register)(planner *p)
+{
+     unsigned i;
+     static const madt *const adts[] = {
+	  &adt_f_dft, &adt_f_rdft,
+	  &adt_b_dft, &adt_b_rdft
+     };
+
+     for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i)
+          REGISTER_SOLVER(p, mksolver(adts[i]));
+}
diff --git a/src/fftw3/rdft/rdft2-strides.c b/src/fftw3/rdft/rdft2-strides.c
new file mode 100644
index 0000000..bcd98d2
--- /dev/null
+++ b/src/fftw3/rdft/rdft2-strides.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "rdft.h"
+
+/* Deal with annoyance because the tensor (is,os) applies to
+   (r,rio/iio) for R2HC and vice-versa for HC2R.  We originally had
+   (is,os) always apply to (r,rio/iio), but this causes other
+   headaches with the tensor functions. */
+void X(rdft2_strides)(rdft_kind kind, const iodim *d, int *is, int *os)
+{
+     if (kind == R2HC) {
+	  *is = d->is;
+	  *os = d->os;
+     }
+     else {
+	  A(kind == HC2R);
+	  *is = d->os;
+	  *os = d->is;
+     }
+}
diff --git a/src/fftw3/rdft/rdft2-tensor-max-index.c b/src/fftw3/rdft/rdft2-tensor-max-index.c
new file mode 100644
index 0000000..0d29af3
--- /dev/null
+++ b/src/fftw3/rdft/rdft2-tensor-max-index.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rdft2-tensor-max-index.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+/* like X(tensor_max_index), but takes into account the special n/2+1
+   final dimension for the complex output/input of an R2HC/HC2R transform. */
+int X(rdft2_tensor_max_index)(const tensor *sz, rdft_kind k)
+{
+     int i;
+     int n = 0;
+
+     A(FINITE_RNK(sz->rnk));
+     for (i = 0; i + 1 < sz->rnk; ++i) {
+          const iodim *p = sz->dims + i;
+          n += (p->n - 1) * X(imax)(X(iabs)(p->is), X(iabs)(p->os));
+     }
+     if (i < sz->rnk) {
+	  const iodim *p = sz->dims + i;
+	  int is, os;
+	  X(rdft2_strides)(k, p, &is, &os);
+	  n += X(imax)((p->n - 1) * X(iabs)(is), (p->n/2) * X(iabs)(os));
+     }
+     return n;
+}
diff --git a/src/fftw3/rdft/rdirect.c b/src/fftw3/rdft/rdirect.c
new file mode 100644
index 0000000..97dd16e
--- /dev/null
+++ b/src/fftw3/rdft/rdirect.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rdirect.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* direct RDFT R2HC/HC2R solver, if we have a codelet */
+
+#include "rdft.h"
+
+typedef union {
+     kr2hc r2hc;
+     khc2r hc2r;
+     kr2r r2r;
+} kodelet;
+
+typedef struct {
+     solver super;
+     union {
+	  const kr2hc_desc *r2hc;
+	  const khc2r_desc *hc2r;
+	  const kr2r_desc *r2r;
+     } desc;
+     kodelet k;
+     int sz;
+     rdft_kind kind;
+     const char *nam;
+} S;
+
+typedef struct {
+     plan_rdft super;
+
+     stride is, ros, ios;
+     int ioffset;
+     int vl;
+     int ivs, ovs;
+     kodelet k;
+     const S *slv;
+} P;
+
+static void apply_r2hc(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     ASSERT_ALIGNED_DOUBLE;
+     ego->k.r2hc(I, O, O + ego->ioffset, ego->is, ego->ros, ego->ios,
+		 ego->vl, ego->ivs, ego->ovs);
+}
+
+static void apply_hc2r(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     ASSERT_ALIGNED_DOUBLE;
+     ego->k.hc2r(I, I + ego->ioffset, O, ego->ros, ego->ios, ego->is,
+		 ego->vl, ego->ivs, ego->ovs);
+}
+
+static void apply_r2r(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     ASSERT_ALIGNED_DOUBLE;
+     ego->k.r2r(I, O, ego->is, ego->ros, ego->vl, ego->ivs, ego->ovs);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(stride_destroy)(ego->is);
+     X(stride_destroy)(ego->ros);
+     if (!R2R_KINDP(ego->slv->kind))
+	  X(stride_destroy)(ego->ios);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->slv;
+
+     p->print(p, "(rdft-%s-direct-%d%v \"%s\")", 
+	      X(rdft_kind_str)(s->kind), s->sz, ego->vl, s->nam);
+}
+
+static int ioffset(rdft_kind kind, int sz, int s)
+{
+     return(s * ((kind == R2HC || kind == HC2R) ? sz : (sz - 1)));
+}
+
+static int applicable(const solver *ego_, const problem *p_)
+{
+     if (RDFTP(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_rdft *p = (const problem_rdft *) p_;
+	  int vl;
+	  int ivs, ovs;
+
+          return (
+	       1
+	       && p->sz->rnk == 1
+	       && p->vecsz->rnk <= 1
+	       && p->sz->dims[0].n == ego->sz
+	       && p->kind[0] == ego->kind
+
+	       /* check strides etc */
+	       && X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs)
+
+	       && (!R2HC_KINDP(ego->kind) ||
+		   ego->desc.r2hc->genus->okp(ego->desc.r2hc, p->I, p->O, p->O
+					      + ioffset(ego->kind, ego->sz, p->sz->dims[0].os),
+					      p->sz->dims[0].is,
+					      p->sz->dims[0].os, -p->sz->dims[0].os,
+					      vl, ivs, ovs))
+	       && (!HC2R_KINDP(ego->kind) ||
+		   ego->desc.hc2r->genus->okp(ego->desc.hc2r, p->I, p->I
+					      + ioffset(ego->kind, ego->sz, p->sz->dims[0].is), p->O,
+					      p->sz->dims[0].is, -p->sz->dims[0].is,
+					      p->sz->dims[0].os, 
+					      vl, ivs, ovs))
+	       
+	       && (!R2R_KINDP(ego->kind) ||
+		   ego->desc.r2r->genus->okp(ego->desc.r2r, p->I, p->O,
+					     p->sz->dims[0].is,
+					     p->sz->dims[0].os, 
+					     vl, ivs, ovs))
+	       
+	       && (0
+		   /* can operate out-of-place */
+		   || p->I != p->O
+
+		   /*
+		    * can compute one transform in-place, no matter
+		    * what the strides are.
+		    */
+		   || p->vecsz->rnk == 0
+
+		   /* can operate in-place as long as strides are the same */
+		   || (X(tensor_inplace_strides2)(p->sz, p->vecsz))
+		    )
+	       );
+     }
+
+     return 0;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     P *pln;
+     const problem_rdft *p;
+     iodim *d;
+     int hc2r_kindp, r2r_kindp;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), X(null_awake), print, destroy
+     };
+
+     UNUSED(plnr);
+
+     if (!applicable(ego_, p_))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     hc2r_kindp = HC2R_KINDP(ego->kind);
+     r2r_kindp = R2R_KINDP(ego->kind);
+
+     pln = MKPLAN_RDFT(P, &padt, 
+		       r2r_kindp ? apply_r2r :
+		       (hc2r_kindp ? apply_hc2r : apply_r2hc));
+
+     d = p->sz->dims;
+
+     pln->k = ego->k;
+     pln->ioffset = ioffset(ego->kind, d[0].n, hc2r_kindp ? d[0].is : d[0].os);
+
+     pln->is = X(mkstride)(ego->sz, hc2r_kindp ? d[0].os : d[0].is);
+     if (r2r_kindp) {
+	  pln->ros = X(mkstride)(ego->sz, d[0].os);
+	  pln->ios = 0;
+     }
+     else {  
+	  int nr = (ego->kind == R2HC || ego->kind == HC2R) 
+	       ?(d[0].n + 2) / 2 : /* R2HCII */ (d[0].n + 1) / 2;
+	  pln->ros = X(mkstride)(nr, hc2r_kindp ? d[0].is : d[0].os);
+	  pln->ios = X(mkstride)(ego->sz - nr + 1, 
+				 hc2r_kindp ? -d[0].is : -d[0].os);
+     }
+
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+
+     pln->slv = ego;
+     X(ops_zero)(&pln->super.super.ops);
+     if (r2r_kindp)
+	  X(ops_madd2)(pln->vl / ego->desc.r2r->genus->vl,
+		       &ego->desc.r2r->ops,
+		       &pln->super.super.ops);
+     else if (hc2r_kindp)
+	  X(ops_madd2)(pln->vl / ego->desc.hc2r->genus->vl,
+		       &ego->desc.hc2r->ops,
+		       &pln->super.super.ops);
+     else
+	  X(ops_madd2)(pln->vl / ego->desc.r2hc->genus->vl,
+		       &ego->desc.r2hc->ops,
+		       &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+solver *X(mksolver_rdft_r2hc_direct)(kr2hc k, const kr2hc_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->k.r2hc = k;
+     slv->desc.r2hc = desc;
+     slv->sz = desc->sz;
+     slv->nam = desc->nam;
+     slv->kind = desc->genus->kind;
+     return &(slv->super);
+}
+
+solver *X(mksolver_rdft_hc2r_direct)(khc2r k, const khc2r_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->k.hc2r = k;
+     slv->desc.hc2r = desc;
+     slv->sz = desc->sz;
+     slv->nam = desc->nam;
+     slv->kind = desc->genus->kind;
+     return &(slv->super);
+}
+
+solver *X(mksolver_rdft_r2r_direct)(kr2r k, const kr2r_desc *desc)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->k.r2r = k;
+     slv->desc.r2r = desc;
+     slv->sz = desc->sz;
+     slv->nam = desc->nam;
+     slv->kind = desc->kind;
+     return &(slv->super);
+}
+
diff --git a/src/fftw3/rdft/rgeneric.c b/src/fftw3/rdft/rgeneric.c
new file mode 100644
index 0000000..ebc48e9
--- /dev/null
+++ b/src/fftw3/rdft/rgeneric.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "rdft.h"
+
+typedef struct {
+     solver super;
+     rdft_kind kind;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     twid *td;
+     int os;
+     int r, m;
+     rdft_kind kind;
+} P;
+
+/***************************************************************************/
+
+static void apply_dit(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int n, m, r;
+     int i, j, k;
+     int os, osm;
+     E *buf;
+     const R *W;
+     R *X, *YO, *YI;
+     E rsum, isum;
+     int wp, wincr;
+
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+
+     r = ego->r;
+
+     STACK_MALLOC(E *, buf, r * 2 * sizeof(E));
+     
+     osm = (m = ego->m) * (os = ego->os);
+     n = m * r;
+     W = ego->td->W;
+
+     X = O;
+     YO = O + r * osm;
+     YI = O + osm;
+
+     /* compute the transform of the r 0th elements (which are real) */
+     for (i = 0; i + i < r; ++i) {
+	  rsum = K(0.0);
+	  isum = K(0.0);
+	  wincr = m * i;
+	  for (j = 0, wp = 0; j < r; ++j) {
+	       E tw_r = W[2*wp];
+	       E tw_i = W[2*wp+1] ;
+	       E re = X[j * osm];
+	       rsum += re * tw_r;
+	       isum += re * tw_i;
+	       wp += wincr;
+	       if (wp >= n)
+		    wp -= n;
+	  }
+	  buf[2*i] = rsum;
+	  buf[2*i+1] = isum;
+     }
+
+     /* store the transform back onto the A array */
+     X[0] = buf[0];
+     for (i = 1; i + i < r; ++i) {
+	  X[i * osm] = buf[2*i];
+	  YO[-i * osm] = buf[2*i+1];
+     }
+
+     X += os;
+     YI -= os;
+     YO -= os;
+
+     /* compute the transform of the middle elements (which are complex) */
+     for (k = 1; k + k < m; ++k, X += os, YI -= os, YO -= os) {
+	  for (i = 0; i < r; ++i) {
+	       rsum = K(0.0);
+	       isum = K(0.0);
+	       wincr = k + m * i;
+	       for (j = 0, wp = 0; j < r; ++j) {
+		    E tw_r = W[2*wp];
+		    E tw_i = W[2*wp+1] ;
+		    E re = X[j * osm];
+		    E im = YI[j * osm];
+		    rsum += re * tw_r - im * tw_i;
+		    isum += re * tw_i + im * tw_r;
+		    wp += wincr;
+		    if (wp >= n)
+			 wp -= n;
+	       }
+	       buf[2*i] = rsum;
+	       buf[2*i+1] = isum;
+	  }
+
+	  /* store the transform back onto the A array */
+	  for (i = 0; i + i < r; ++i) {
+	       X[i * osm] = buf[2*i];
+	       YO[-i * osm] = buf[2*i+1];
+	  }
+	  for (; i < r; ++i) {
+	       X[i * osm] = -buf[2*i+1];
+	       YO[-i * osm] = buf[2*i];
+	  }
+     }
+
+     /* no final element, since m is odd */
+
+     STACK_FREE(buf);
+}
+
+static void apply_dif(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int n, m, r;
+     int i, j, k;
+     int is, ism;
+     E *buf;
+     const R *W;
+     R *X, *YO, *YI;
+     E rsum, isum;
+     int wp, wincr;
+
+     r = ego->r;
+
+     STACK_MALLOC(E *, buf, r * 2 * sizeof(E));
+     
+     ism = (m = ego->m) * (is = ego->os);
+     n = m * r;
+     W = ego->td->W;
+
+     X = I;
+     YI = I + r * ism;
+     YO = I + ism;
+
+     /* 
+      * compute the transform of the r 0th elements (which are halfcomplex)
+      * yielding real numbers
+      */
+     /* copy the input into the temporary array */
+     buf[0] = X[0];
+     for (i = 1; i + i < r; ++i) {
+	  buf[2*i] = X[i * ism];
+	  buf[2*i+1] = YI[-i * ism];
+     }
+
+     for (i = 0; i < r; ++i) {
+	  rsum = K(0.0);
+	  wincr = m * i;
+	  for (j = 1, wp = wincr; j + j < r; ++j) {
+	       E tw_r = W[2*wp];
+	       E tw_i = W[2*wp+1];
+	       E re = buf[2*j];
+	       E im = buf[2*j+1];
+	       rsum += re * tw_r + im * tw_i;
+	       wp += wincr;
+	       if (wp >= n)
+		    wp -= n;
+	  }
+	  X[i * ism] = K(2.0) * rsum + buf[0];
+     }
+
+     X += is;
+     YI -= is;
+     YO -= is;
+
+     /* compute the transform of the middle elements (which are complex) */
+     for (k = 1; k + k < m; ++k, X += is, YI -= is, YO -= is) {
+	  /* copy the input into the temporary array */
+	  for (i = 0; i + i < r; ++i) {
+	       buf[2*i] = X[i * ism];
+	       buf[2*i+1] = YI[-i * ism];
+	  }
+	  for (; i < r; ++i) {
+	       buf[2*i+1] = -X[i * ism];
+	       buf[2*i] = YI[-i * ism];
+	  }
+
+	  for (i = 0; i < r; ++i) {
+	       rsum = K(0.0);
+	       isum = K(0.0);
+	       wincr = m * i;
+	       for (j = 0, wp = k * i; j < r; ++j) {
+		    E tw_r = W[2*wp];
+		    E tw_i = W[2*wp+1];
+		    E re = buf[2*j];
+		    E im = buf[2*j+1];
+		    rsum += re * tw_r + im * tw_i;
+		    isum += im * tw_r - re * tw_i;
+		    wp += wincr;
+		    if (wp >= n)
+			 wp -= n;
+	       }
+	       X[i * ism] = rsum;
+	       YO[i * ism] = isum;
+	  }
+     }
+
+     /* no final element, since m is odd */
+
+     STACK_FREE(buf);
+
+     {
+	  plan_rdft *cld = (plan_rdft *) ego->cld;
+	  cld->apply((plan *) cld, I, O);
+     }
+
+}
+
+/***************************************************************************/
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr generic_tw[] = {
+	  { TW_GENERIC, 0, 0 },
+	  { TW_NEXT, 1, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+     /* FIXME: can we get away with fewer twiddles? */
+     X(twiddle_awake)(flg, &ego->td, generic_tw,
+		      ego->r * ego->m, ego->r, ego->m);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+
+     p->print(p, "(rdft-generic-%s-%d%(%p%))", 
+	      ego->kind == R2HC ? "r2hc-dit" : "hc2r-dif",
+	      ego->r, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     if (RDFTP(p_)) {
+	  const S *ego = (const S *) ego_;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk == 0
+		  && p->sz->dims[0].n > 1
+                  && p->sz->dims[0].n % 2 /* ensure r and n/r odd */
+                  && p->kind[0] == ego->kind
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr)
+{
+     if (NO_UGLYP(plnr)) return 0; /* always ugly */
+     if (!applicable0(ego_, p_)) return 0;
+
+     if (NO_LARGE_GENERICP(plnr)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+	  if (X(first_divisor)(p->sz->dims[0].n) >= GENERIC_MIN_BAD) return 0; 
+     }
+     return 1;
+}
+
+static plan *mkplan(const solver *ego, const problem *p_, planner *plnr)
+{
+     const problem_rdft *p = (const problem_rdft *) p_;
+     P *pln = 0;
+     int n, r, m;
+     int is, os;
+     plan *cld = (plan *) 0;
+     problem *cldp;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego, p_, plnr))
+          goto nada;
+
+     n = p->sz->dims[0].n;
+     is = p->sz->dims[0].is;
+     os = p->sz->dims[0].os;
+
+     r = X(first_divisor)(n);
+     m = n / r;
+
+     if (R2HC_KINDP(p->kind[0])) {
+	  cldp = X(mkproblem_rdft_d)(X(mktensor_1d)(m, r * is, os),
+				     X(mktensor_1d)(r, is, m * os),
+				     p->I, p->O, p->kind);
+     }
+     else {
+	  cldp = X(mkproblem_rdft_d)(X(mktensor_1d)(m, is, r * os),
+				     X(mktensor_1d)(r, m * is, os),
+				     p->I, p->O, p->kind);
+     }
+     if (!(cld = X(mkplan_d)(plnr, cldp))) goto nada;
+
+     pln = MKPLAN_RDFT(P, &padt, R2HC_KINDP(p->kind[0]) ? apply_dit:apply_dif);
+
+     pln->os = R2HC_KINDP(p->kind[0]) ? os : is;
+     pln->r = r;
+     pln->m = m;
+     pln->cld = cld;
+     pln->td = 0;
+     pln->kind = p->kind[0];
+
+     X(ops_zero)(&pln->super.super.ops);
+     pln->super.super.ops.add = 4 * r * r;
+     pln->super.super.ops.mul = 4 * r * r;
+     /* loads + stores, minus loads + stores for all DIT codelets */
+     pln->super.super.ops.other = 4 * r + 4 * r * r - (6*r - 2);
+     X(ops_madd)((m - 1)/2, &pln->super.super.ops, &cld->ops,
+		 &pln->super.super.ops);
+     pln->super.super.ops.add += 2 * r * r;
+     pln->super.super.ops.mul += 2 * r * r;
+     pln->super.super.ops.other += 3 * r + 3 * r * r - 2*r;
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(ifree0)(pln);
+     return (plan *) 0;
+}
+
+/* constructors */
+
+static solver *mksolver(rdft_kind kind)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->kind = kind;
+     return &(slv->super);
+}
+
+void X(rdft_generic_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver(R2HC));
+     REGISTER_SOLVER(p, mksolver(HC2R));
+}
diff --git a/src/fftw3/rdft/rindirect.c b/src/fftw3/rdft/rindirect.c
new file mode 100644
index 0000000..a53020f
--- /dev/null
+++ b/src/fftw3/rdft/rindirect.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rindirect.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+
+/* solvers/plans for vectors of small RDFT's that cannot be done
+   in-place directly.  Use a rank-0 plan to rearrange the data
+   before or after the transform.  Can also change an out-of-place
+   plan into a copy + in-place (where the in-place transform
+   is e.g. unit stride). */
+
+/* FIXME: merge with rank-geq2.c(?), since this is just a special case
+   of a rank split where the first/second transform has rank 0. */
+
+#include "rdft.h"
+
+typedef problem *(*mkcld_t) (const problem_rdft *p);
+
+typedef struct {
+     rdftapply apply;
+     problem *(*mkcld)(const problem_rdft *p);
+     const char *nam;
+} ndrct_adt;
+
+typedef struct {
+     solver super;
+     const ndrct_adt *adt;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cldcpy, *cld;
+     const S *slv;
+} P;
+
+/*-----------------------------------------------------------------------*/
+/* first rearrange, then transform */
+static void apply_before(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
+          cldcpy->apply(ego->cldcpy, I, O);
+     }
+     {
+          plan_rdft *cld = (plan_rdft *) ego->cld;
+          cld->apply(ego->cld, O, O);
+     }
+}
+
+static problem *mkcld_before(const problem_rdft *p)
+{
+     return X(mkproblem_rdft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_OS),
+				X(tensor_copy_inplace)(p->vecsz, INPLACE_OS),
+				p->O, p->O, p->kind);
+}
+
+static const ndrct_adt adt_before =
+{
+     apply_before, mkcld_before, "rdft-indirect-before"
+};
+
+/*-----------------------------------------------------------------------*/
+/* first transform, then rearrange */
+
+static void apply_after(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+
+     {
+          plan_rdft *cld = (plan_rdft *) ego->cld;
+          cld->apply(ego->cld, I, I);
+     }
+     {
+          plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
+          cldcpy->apply(ego->cldcpy, I, O);
+     }
+}
+
+static problem *mkcld_after(const problem_rdft *p)
+{
+     return X(mkproblem_rdft_d)(X(tensor_copy_inplace)(p->sz, INPLACE_IS),
+				X(tensor_copy_inplace)(p->vecsz, INPLACE_IS),
+				p->I, p->I, p->kind);
+}
+
+static const ndrct_adt adt_after =
+{
+     apply_after, mkcld_after, "rdft-indirect-after"
+};
+
+/*-----------------------------------------------------------------------*/
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+     X(plan_destroy_internal)(ego->cldcpy);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cldcpy, flg);
+     AWAKE(ego->cld, flg);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->slv;
+     p->print(p, "(%s%(%p%)%(%p%))", s->adt->nam, ego->cld, ego->cldcpy);
+}
+
+static int applicable0(const solver *ego_, const problem *p_,
+		       const planner *plnr)
+{
+     if (RDFTP(p_)) {
+	  const S *ego = (const S *) ego_;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+                  && FINITE_RNK(p->vecsz->rnk)
+
+                  /* problem must be a nontrivial transform, not just a copy */
+                  && p->sz->rnk > 0
+
+                  && (0
+
+		      /* problem must be in-place & require some
+		         rearrangement of the data */
+		      || (p->I == p->O
+			  && !(X(tensor_inplace_strides2)(p->sz, p->vecsz)))
+
+		      /* or problem must be out of place, transforming
+			 from stride 1/2 to bigger stride, for apply_after */
+		      || (p->I != p->O && ego->adt->apply == apply_after
+			  && DESTROY_INPUTP(plnr)
+			  && X(tensor_min_istride)(p->sz) <= 2
+			  && X(tensor_min_ostride)(p->sz) > 2)
+			  
+		      /* or problem must be out of place, transforming
+			 to stride 1/2 from bigger stride, for apply_before */
+		      || (p->I != p->O && ego->adt->apply == apply_before
+			  && X(tensor_min_ostride)(p->sz) <= 2
+			  && X(tensor_min_istride)(p->sz) > 2)
+			  
+		       )
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_,
+		      const planner *plnr)
+{
+     if (!applicable0(ego_, p_, plnr)) return 0;
+	  
+     if (NO_INDIRECT_OP_P(plnr)) {
+	  const problem_rdft *p = (const problem_rdft *)p_;
+	  if (p->I != p->O) return 0;
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const problem_rdft *p = (const problem_rdft *) p_;
+     const S *ego = (const S *) ego_;
+     P *pln;
+     plan *cld = 0, *cldcpy = 0;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *) 0;
+
+     plnr->planner_flags |= NO_BUFFERING;
+
+     cldcpy = X(mkplan_d)(plnr, 
+			  X(mkproblem_rdft_d)(
+			       X(mktensor_0d)(),
+			       X(tensor_append)(p->vecsz, p->sz),
+			       p->I, p->O, (rdft_kind *) 0));
+     if (!cldcpy) goto nada;
+
+     cld = X(mkplan_d)(plnr, ego->adt->mkcld(p));
+     if (!cld) goto nada;
+
+     pln = MKPLAN_RDFT(P, &padt, ego->adt->apply);
+     pln->cld = cld;
+     pln->cldcpy = cldcpy;
+     pln->slv = ego;
+     X(ops_add)(&cld->ops, &cldcpy->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld);
+     X(plan_destroy_internal)(cldcpy);
+     return (plan *)0;
+}
+
+static solver *mksolver(const ndrct_adt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+void X(rdft_indirect_register)(planner *p)
+{
+     unsigned i;
+     static const ndrct_adt *const adts[] = {
+	  &adt_before, &adt_after
+     };
+
+     for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i)
+          REGISTER_SOLVER(p, mksolver(adts[i]));
+}
diff --git a/src/fftw3/rdft/rnop.c b/src/fftw3/rdft/rnop.c
new file mode 100644
index 0000000..e784a5a
--- /dev/null
+++ b/src/fftw3/rdft/rnop.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rnop.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* plans for vrank -infty RDFTs (nothing to do) */
+
+#include "rdft.h"
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     UNUSED(ego_);
+     UNUSED(I);
+     UNUSED(O);
+}
+
+static int applicable(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return 0
+	       /* case 1 : -infty vector rank */
+	       || (p->vecsz->rnk == RNK_MINFTY)
+
+	       /* case 2 : rank-0 in-place rdft */
+	       || (1
+		   && p->sz->rnk == 0
+		   && FINITE_RNK(p->vecsz->rnk)
+		   && p->O == p->I
+		   && X(tensor_inplace_strides)(p->vecsz)
+                    );
+     }
+     return 0;
+}
+
+static void print(const plan *ego, printer *p)
+{
+     UNUSED(ego);
+     p->print(p, "(rdft-nop)");
+}
+
+static plan *mkplan(const solver *ego, const problem *p, planner *plnr)
+{
+     static const plan_adt padt = {
+	  X(rdft_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+     plan_rdft *pln;
+
+     UNUSED(plnr);
+
+     if (!applicable(ego, p))
+          return (plan *) 0;
+     pln = MKPLAN_RDFT(plan_rdft, &padt, apply);
+     X(ops_zero)(&pln->super.ops);
+
+     return &(pln->super);
+}
+
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     return MKSOLVER(solver, &sadt);
+}
+
+void X(rdft_nop_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/rdft/rplan.c b/src/fftw3/rdft/rplan.c
new file mode 100644
index 0000000..066e925
--- /dev/null
+++ b/src/fftw3/rdft/rplan.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rplan.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+plan *X(mkplan_rdft)(size_t size, const plan_adt *adt, rdftapply apply)
+{
+     plan_rdft *ego;
+
+     ego = (plan_rdft *) X(mkplan)(size, adt);
+     ego->apply = apply;
+
+     return &(ego->super);
+}
diff --git a/src/fftw3/rdft/rproblem.c b/src/fftw3/rdft/rproblem.c
new file mode 100644
index 0000000..0e3441c
--- /dev/null
+++ b/src/fftw3/rdft/rproblem.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rproblem.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+#include <stddef.h>
+
+static void destroy(problem *ego_)
+{
+     problem_rdft *ego = (problem_rdft *) ego_;
+#if !defined(STRUCT_HACK_C99) && !defined(STRUCT_HACK_KR)
+     X(ifree0)(ego->kind);
+#endif
+     X(tensor_destroy2)(ego->vecsz, ego->sz);
+     X(ifree)(ego_);
+}
+
+static void kind_hash(md5 *m, const rdft_kind *kind, int rnk)
+{
+     int i;
+     for (i = 0; i < rnk; ++i)
+	  X(md5int)(m, kind[i]);
+}
+
+static void hash(const problem *p_, md5 *m)
+{
+     const problem_rdft *p = (const problem_rdft *) p_;
+     X(md5puts)(m, "rdft");
+     X(md5int)(m, p->I == p->O);
+     kind_hash(m, p->kind, p->sz->rnk);
+     X(md5int)(m, X(alignment_of)(p->I));
+     X(md5int)(m, X(alignment_of)(p->O));
+     X(tensor_md5)(m, p->sz);
+     X(tensor_md5)(m, p->vecsz);
+}
+
+static void recur(const iodim *dims, int rnk, R *I)
+{
+     if (rnk == RNK_MINFTY)
+          return;
+     else if (rnk == 0)
+          I[0] = K(0.0);
+     else if (rnk > 0) {
+          int i, n = dims[0].n;
+          int is = dims[0].is;
+
+	  if (rnk == 1) {
+	       /* this case is redundant but faster */
+	       for (i = 0; i < n; ++i)
+		    I[i * is] = K(0.0);
+	  } else {
+	       for (i = 0; i < n; ++i)
+		    recur(dims + 1, rnk - 1, I + i * is);
+	  }
+     }
+}
+
+void X(rdft_zerotens)(tensor *sz, R *I)
+{
+     recur(sz->dims, sz->rnk, I);
+}
+
+#define KSTR_LEN 8
+
+const char *X(rdft_kind_str)(rdft_kind kind)
+{
+     static const char kstr[][KSTR_LEN] = {
+	  "r2hc", "r2hc01", "r2hc10", "r2hc11",
+	  "hc2r", "hc2r01", "hc2r10", "hc2r11",
+	  "dht",
+	  "redft00", "redft01", "redft10", "redft11",
+	  "rodft00", "rodft01", "rodft10", "rodft11"
+     };
+     A(kind >= 0 && kind < sizeof(kstr) / KSTR_LEN);
+     return kstr[kind];
+}
+
+static void print(problem *ego_, printer *p)
+{
+     const problem_rdft *ego = (const problem_rdft *) ego_;
+     int i;
+     p->print(p, "(rdft %d %td %T %T", 
+	      X(alignment_of)(ego->I),
+	      ego->O - ego->I, 
+	      ego->sz,
+	      ego->vecsz);
+     for (i = 0; i < ego->sz->rnk; ++i)
+	  p->print(p, " %d", (int)ego->kind[i]);
+     p->print(p, ")");
+}
+
+static void zero(const problem *ego_)
+{
+     const problem_rdft *ego = (const problem_rdft *) ego_;
+     tensor *sz = X(tensor_append)(ego->vecsz, ego->sz);
+     X(rdft_zerotens)(sz, UNTAINT(ego->I));
+     X(tensor_destroy)(sz);
+}
+
+static const problem_adt padt =
+{
+     hash,
+     zero,
+     print,
+     destroy
+};
+
+int X(problem_rdft_p)(const problem *p)
+{
+     return (p->adt == &padt);
+}
+
+/* Dimensions of size 1 that are not REDFT/RODFT are no-ops and can be
+   eliminated.  REDFT/RODFT unit dimensions often have factors of 2.0
+   and suchlike from normalization and phases, although in principle
+   these constant factors from different dimensions could be combined. */
+static int nontrivial(const iodim *d, rdft_kind kind)
+{
+     return (d->n > 1 || kind == R2HC11 || kind == HC2R11
+	     || (REODFT_KINDP(kind) && kind != REDFT01 && kind != RODFT01));
+}
+
+problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz,
+			   R *I, R *O, const rdft_kind *kind)
+{
+     problem_rdft *ego;
+     int rnk = sz->rnk;
+     int i;
+
+     A(X(tensor_kosherp)(sz));
+     A(X(tensor_kosherp)(vecsz));
+     A(FINITE_RNK(sz->rnk));
+
+     if (UNTAINT(I) == UNTAINT(O))
+	  I = O = JOIN_TAINT(I, O);
+
+     for (i = rnk = 0; i < sz->rnk; ++i) {
+          A(sz->dims[i].n > 0);
+          if (nontrivial(sz->dims + i, kind[i]))
+               ++rnk;
+     }
+
+#if defined(STRUCT_HACK_KR)
+     ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft)
+					 + sizeof(rdft_kind)
+					 * (rnk > 0 ? rnk - 1 : 0), &padt);
+#elif defined(STRUCT_HACK_C99)
+     ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft)
+					 + sizeof(rdft_kind) * rnk, &padt);
+#else
+     ego = (problem_rdft *) X(mkproblem)(sizeof(problem_rdft), &padt);
+     ego->kind = (rdft_kind *) MALLOC(sizeof(rdft_kind) * rnk, PROBLEMS);
+#endif
+
+     /* do compression and sorting as in X(tensor_compress), but take
+	transform kind into account (sigh) */
+     ego->sz = X(mktensor)(rnk);
+     for (i = rnk = 0; i < sz->rnk; ++i) {
+          if (nontrivial(sz->dims + i, kind[i])) {
+	       ego->kind[rnk] = kind[i];
+               ego->sz->dims[rnk++] = sz->dims[i];
+	  }
+     }
+     for (i = 0; i + 1 < rnk; ++i) {
+	  int j;
+	  for (j = i + 1; j < rnk; ++j)
+	       if (X(dimcmp)(ego->sz->dims + i, ego->sz->dims + j) > 0) {
+		    iodim dswap;
+		    rdft_kind kswap;
+		    dswap = ego->sz->dims[i];
+		    ego->sz->dims[i] = ego->sz->dims[j];
+		    ego->sz->dims[j] = dswap;
+		    kswap = ego->kind[i];
+		    ego->kind[i] = ego->kind[j];
+		    ego->kind[j] = kswap;
+	       }
+     }
+
+     for (i = 0; i < rnk; ++i)
+	  if (ego->sz->dims[i].n == 2 && (ego->kind[i] == REDFT00
+					  || ego->kind[i] == DHT
+					  || ego->kind[i] == HC2R))
+	       ego->kind[i] = R2HC; /* size-2 transforms are equivalent */
+
+     ego->vecsz = X(tensor_compress_contiguous)(vecsz);
+     ego->I = I;
+     ego->O = O;
+
+     A(FINITE_RNK(ego->sz->rnk));
+
+     return &(ego->super);
+}
+
+/* Same as X(mkproblem_rdft), but also destroy input tensors. */
+problem *X(mkproblem_rdft_d)(tensor *sz, tensor *vecsz,
+			     R *I, R *O, const rdft_kind *kind)
+{
+     problem *p;
+     p = X(mkproblem_rdft)(sz, vecsz, I, O, kind);
+     X(tensor_destroy2)(vecsz, sz);
+     return p;
+}
+
+/* As above, but for rnk <= 1 only and takes a scalar kind parameter */
+problem *X(mkproblem_rdft_1)(const tensor *sz, const tensor *vecsz,
+			     R *I, R *O, rdft_kind kind)
+{
+     A(sz->rnk <= 1);
+     return X(mkproblem_rdft)(sz, vecsz, I, O, &kind);
+}
+
+problem *X(mkproblem_rdft_1_d)(tensor *sz, tensor *vecsz,
+			       R *I, R *O, rdft_kind kind)
+{
+     A(sz->rnk <= 1);
+     return X(mkproblem_rdft_d)(sz, vecsz, I, O, &kind);
+}
diff --git a/src/fftw3/rdft/rrank-geq2.c b/src/fftw3/rdft/rrank-geq2.c
new file mode 100644
index 0000000..78f359f
--- /dev/null
+++ b/src/fftw3/rdft/rrank-geq2.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rrank-geq2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* plans for RDFT of rank >= 2 (multidimensional) */
+
+/* FIXME: this solver cannot strictly be applied to multidimensional
+   DHTs, since the latter are not separable...up to rnk-1 additional
+   post-processing passes may be required.  See also:
+
+   R. N. Bracewell, O. Buneman, H. Hao, and J. Villasenor, "Fast
+   two-dimensional Hartley transform," Proc. IEEE 74, 1282-1283 (1986).
+
+   H. Hao and R. N. Bracewell, "A three-dimensional DFT algorithm
+   using the fast Hartley transform," Proc. IEEE 75(2), 264-266 (1987).
+*/
+
+#include "rdft.h"
+
+typedef struct {
+     solver super;
+     int spltrnk;
+     const int *buddies;
+     int nbuddies;
+} S;
+
+typedef struct {
+     plan_rdft super;
+
+     plan *cld1, *cld2;
+     const S *solver;
+} P;
+
+/* Compute multi-dimensional RDFT by applying the two cld plans
+   (lower-rnk RDFTs). */
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     plan_rdft *cld1, *cld2;
+
+     cld1 = (plan_rdft *) ego->cld1;
+     cld1->apply(ego->cld1, I, O);
+
+     cld2 = (plan_rdft *) ego->cld2;
+     cld2->apply(ego->cld2, O, O);
+}
+
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld1, flg);
+     AWAKE(ego->cld2, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld2);
+     X(plan_destroy_internal)(ego->cld1);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->solver;
+     p->print(p, "(rdft-rank>=2/%d%(%p%)%(%p%))",
+	      s->spltrnk, ego->cld1, ego->cld2);
+}
+
+static int picksplit(const S *ego, const tensor *sz, int *rp)
+{
+     A(sz->rnk > 1); /* cannot split rnk <= 1 */
+     if (!X(pickdim)(ego->spltrnk, ego->buddies, ego->nbuddies, sz, 1, rp))
+	  return 0;
+     *rp += 1; /* convert from dim. index to rank */
+     if (*rp >= sz->rnk) /* split must reduce rank */
+	  return 0;
+     return 1;
+}
+
+static int applicable0(const solver *ego_, const problem *p_, int *rp)
+{
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          const S *ego = (const S *)ego_;
+          return (1
+                  && p->sz->rnk >= 2
+                  && picksplit(ego, p->sz, rp)
+	       );
+     }
+
+     return 0;
+}
+
+/* TODO: revise this. */
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr, int *rp)
+{
+     const S *ego = (const S *)ego_;
+
+     if (!applicable0(ego_, p_, rp)) return 0;
+
+     /* fixed spltrnk (unlike fftw2's spltrnk=1, default buddies[0] is
+        spltrnk=0, which is an asymptotic "theoretical optimum" for
+        an ideal cache; it's equivalent to spltrnk=1 for rnk < 4). */
+     if (NO_RANK_SPLITSP(plnr) && (ego->spltrnk != ego->buddies[0]))
+	  return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  /* Heuristic: if the vector stride is greater than the transform
+	     sz, don't use (prefer to do the vector loop first with a
+	     vrank-geq1 plan). */
+	  const problem_rdft *p = (const problem_rdft *) p_;
+
+	  if (p->vecsz->rnk > 0 &&
+	      X(tensor_min_stride)(p->vecsz) > X(tensor_max_index)(p->sz))
+	       return 0;
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_rdft *p;
+     P *pln;
+     plan *cld1 = 0, *cld2 = 0;
+     tensor *sz1, *sz2, *vecszi, *sz2i;
+     int spltrnk;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr, &spltrnk))
+          return (plan *) 0;
+
+     p = (const problem_rdft *) p_;
+     X(tensor_split)(p->sz, &sz1, spltrnk, &sz2);
+     vecszi = X(tensor_copy_inplace)(p->vecsz, INPLACE_OS);
+     sz2i = X(tensor_copy_inplace)(sz2, INPLACE_OS);
+
+     cld1 = X(mkplan_d)(plnr, 
+			X(mkproblem_rdft_d)(X(tensor_copy)(sz2),
+					    X(tensor_append)(p->vecsz, sz1),
+					    p->I, p->O, p->kind + spltrnk));
+     if (!cld1) goto nada;
+
+     cld2 = X(mkplan_d)(plnr, 
+			X(mkproblem_rdft_d)(
+			     X(tensor_copy_inplace)(sz1, INPLACE_OS),
+			     X(tensor_append)(vecszi, sz2i),
+			     p->O, p->O, p->kind));
+     if (!cld2) goto nada;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->cld1 = cld1;
+     pln->cld2 = cld2;
+
+     pln->solver = ego;
+     X(ops_add)(&cld1->ops, &cld2->ops, &pln->super.super.ops);
+
+     X(tensor_destroy4)(sz2, sz1, vecszi, sz2i);
+
+     return &(pln->super.super);
+
+ nada:
+     X(plan_destroy_internal)(cld2);
+     X(plan_destroy_internal)(cld1);
+     X(tensor_destroy4)(sz2, sz1, vecszi, sz2i);
+     return (plan *) 0;
+}
+
+static solver *mksolver(int spltrnk, const int *buddies, int nbuddies)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->spltrnk = spltrnk;
+     slv->buddies = buddies;
+     slv->nbuddies = nbuddies;
+     return &(slv->super);
+}
+
+void X(rdft_rank_geq2_register)(planner *p)
+{
+     int i;
+     static const int buddies[] = { 0, 1, -2 };
+
+     const int nbuddies = sizeof(buddies) / sizeof(buddies[0]);
+
+     for (i = 0; i < nbuddies; ++i)
+          REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies));
+
+     /* FIXME: Should we try more buddies?  See also dft/rank-geq2. */
+}
diff --git a/src/fftw3/rdft/rrank0.c b/src/fftw3/rdft/rrank0.c
new file mode 100644
index 0000000..4e212ab
--- /dev/null
+++ b/src/fftw3/rdft/rrank0.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rrank0.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+/* plans for rank-0 RDFTs (copy operations) */
+
+#include "rdft.h"
+
+#ifdef HAVE_STRING_H
+#include <string.h>		/* for memcpy() */
+#endif
+
+
+typedef struct {
+     rdftapply apply;
+     int (*applicable)(const problem_rdft *p);
+     const char *nam;
+} rnk0adt;
+
+typedef struct {
+     solver super;
+     const rnk0adt *adt;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     int vl;
+     int ivs, ovs;
+     const S *slv;
+} P;
+
+/* generic applicability function */
+static int applicable(const solver *ego_, const problem *p_)
+{
+     if (RDFTP(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->I != p->O
+                  && p->sz->rnk == 0
+                  && ego->adt->applicable(p)
+	       );
+     }
+     return 0;
+}
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 rdft, vl == 1: just a copy */
+static void apply_1(const plan *ego_, R *I, R *O)
+{
+     UNUSED(ego_);
+     *O = *I;
+}
+
+static int applicable_1(const problem_rdft *p)
+{
+     return (p->vecsz->rnk == 0);
+}
+
+static const rnk0adt adt_cpy1 =
+{
+     apply_1, applicable_1, "rdft-rank0-cpy1"
+};
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 rdft, vl > 1: just a copy loop (unroll 4) */
+static void apply_vec(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+
+     for (i = 4; i <= vl; i += 4) {
+          R r0, r1, r2, r3;
+          r0 = *I; I += ivs;
+          r1 = *I; I += ivs;
+          r2 = *I; I += ivs;
+          r3 = *I; I += ivs;
+          *O = r0; O += ovs;
+          *O = r1; O += ovs;
+          *O = r2; O += ovs;
+	  *O = r3; O += ovs;
+     }
+     for (; i < vl + 4; ++i) {
+          R r0;
+          r0 = *I; I += ivs;
+          *O = r0; O += ovs;
+     }
+}
+
+static int applicable_vec(const problem_rdft *p)
+{
+     return (p->vecsz->rnk == 1 && p->O != p->I);
+}
+
+static const rnk0adt adt_vec =
+{
+     apply_vec, applicable_vec, "rdft-rank0-vec"
+};
+
+/*-----------------------------------------------------------------------*/
+/* rank-0 rdft, vl > 1, [io]vs == 1, using memcpy */
+static void apply_io1(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int vl = ego->vl;
+     memcpy(O, I, vl * sizeof(R));
+}
+
+static int applicable_io1(const problem_rdft *p)
+{
+     return (1
+             && applicable_vec(p)
+             && p->vecsz->dims[0].is == 1
+             && p->vecsz->dims[0].os == 1
+	  );
+}
+
+static const rnk0adt adt_io1 =
+{
+     apply_io1, applicable_io1, "rdft-rank0-io1-memcpy"
+};
+
+/*-----------------------------------------------------------------------*/
+/* generic stuff: */
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%s%v)", ego->slv->adt->nam, ego->vl);
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_rdft *p;
+     P *pln;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), X(null_awake), print, X(plan_null_destroy)
+     };
+
+     UNUSED(plnr);
+
+     if (!applicable(ego_, p_))
+          return (plan *) 0;
+
+     p = (const problem_rdft *) p_;
+     pln = MKPLAN_RDFT(P, &padt, ego->adt->apply);
+
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     pln->slv = ego;
+
+     /* vl loads, vl stores */
+     X(ops_other)(2 * pln->vl, &pln->super.super.ops);
+     return &(pln->super.super);
+}
+
+static solver *mksolver(const rnk0adt *adt)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->adt = adt;
+     return &(slv->super);
+}
+
+void X(rdft_rank0_register)(planner *p)
+{
+     unsigned i;
+     static const rnk0adt *const adts[] = {
+	  &adt_cpy1, &adt_vec, &adt_io1
+     };
+
+     for (i = 0; i < sizeof(adts) / sizeof(adts[0]); ++i)
+          REGISTER_SOLVER(p, mksolver(adts[i]));
+}
diff --git a/src/fftw3/rdft/rsolve.c b/src/fftw3/rdft/rsolve.c
new file mode 100644
index 0000000..a000a56
--- /dev/null
+++ b/src/fftw3/rdft/rsolve.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rsolve.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+/* use the apply() operation for RDFT problems */
+void X(rdft_solve)(const plan *ego_, const problem *p_)
+{
+     const plan_rdft *ego = (const plan_rdft *) ego_;
+     const problem_rdft *p = (const problem_rdft *) p_;
+     ego->apply(ego_, UNTAINT(p->I), UNTAINT(p->O));
+}
diff --git a/src/fftw3/rdft/rvrank-geq1.c b/src/fftw3/rdft/rvrank-geq1.c
new file mode 100644
index 0000000..2bac2d5
--- /dev/null
+++ b/src/fftw3/rdft/rvrank-geq1.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rvrank-geq1.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+
+/* Plans for handling vector transform loops.  These are *just* the
+   loops, and rely on child plans for the actual RDFTs.
+ 
+   They form a wrapper around solvers that don't have apply functions
+   for non-null vectors.
+ 
+   vrank-geq1 plans also recursively handle the case of multi-dimensional
+   vectors, obviating the need for most solvers to deal with this.  We
+   can also play games here, such as reordering the vector loops.
+ 
+   Each vrank-geq1 plan reduces the vector rank by 1, picking out a
+   dimension determined by the vecloop_dim field of the solver. */
+
+#include "rdft.h"
+
+typedef struct {
+     solver super;
+     int vecloop_dim;
+     const int *buddies;
+     int nbuddies;
+} S;
+
+typedef struct {
+     plan_rdft super;
+
+     plan *cld;
+     int vl;
+     int ivs, ovs;
+     const S *solver;
+} P;
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     rdftapply cldapply = ((plan_rdft *) ego->cld)->apply;
+
+     for (i = 0; i < vl; ++i) {
+          cldapply(ego->cld, I + i * ivs, O + i * ovs);
+     }
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->solver;
+     p->print(p, "(rdft-vrank>=1-x%d/%d%(%p%))",
+	      ego->vl, s->vecloop_dim, ego->cld);
+}
+
+static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp)
+{
+     return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies,
+		       vecsz, oop, dp);
+}
+
+static int applicable0(const solver *ego_, const problem *p_, int *dp)
+{
+     if (RDFTP(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_rdft *p = (const problem_rdft *) p_;
+
+          return (1
+                  && FINITE_RNK(p->vecsz->rnk)
+                  && p->vecsz->rnk > 0
+                  && pickdim(ego, p->vecsz, p->I != p->O, dp)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_, 
+		      const planner *plnr, int *dp)
+{
+     const S *ego = (const S *)ego_;
+     const problem_rdft *p;
+
+     if (!applicable0(ego_, p_, dp)) return 0;
+
+     /* fftw2 behavior */
+     if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0]))
+	  return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  p = (const problem_rdft *) p_;
+
+	  /* Heuristic: if the transform is multi-dimensional, and the
+	     vector stride is less than the transform size, then we
+	     probably want to use a rank>=2 plan first in order to combine
+	     this vector with the transform-dimension vectors. */
+	  {
+	       iodim *d = p->vecsz->dims + *dp;
+	       if (1
+		   && p->sz->rnk > 1 
+		   && X(imin)(X(iabs)(d->is), X(iabs)(d->os))
+		   < X(tensor_max_index)(p->sz)
+		    )
+		    return 0;
+	  }
+
+	  /* Heuristic: don't use a vrank-geq1 for rank-0 vrank-1
+	     transforms, since this case is better handled by rank-0
+	     solvers. */
+	  if (p->sz->rnk == 0 && p->vecsz->rnk == 1) return 0;
+
+	  /* prefer threaded version */
+	  if (NONTHREADED_ICKYP(plnr)) return 0;
+
+	  /* exploit built-in vecloops of (ugly) r{e,o}dft solvers */
+	  if (p->vecsz->rnk == 1 && p->sz->rnk == 1 
+	      && REODFT_KINDP(p->kind[0]))
+	       return 0;
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_rdft *p;
+     P *pln;
+     plan *cld;
+     int vdim;
+     iodim *d;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr, &vdim))
+          return (plan *) 0;
+     p = (const problem_rdft *) p_;
+
+     d = p->vecsz->dims + vdim;
+
+     A(d->n > 1); 
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_rdft_d)(
+			    X(tensor_copy)(p->sz),
+			    X(tensor_copy_except)(p->vecsz, vdim),
+			    TAINT(p->I, d->is), TAINT(p->O, d->os),
+			    p->kind));
+     if (!cld) return (plan *) 0;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->cld = cld;
+     pln->vl = d->n;
+     pln->ivs = d->is;
+     pln->ovs = d->os;
+
+     pln->solver = ego;
+     X(ops_zero)(&pln->super.super.ops);
+     pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+     pln->super.super.pcost = pln->vl * cld->pcost;
+
+     return &(pln->super.super);
+}
+
+static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->vecloop_dim = vecloop_dim;
+     slv->buddies = buddies;
+     slv->nbuddies = nbuddies;
+     return &(slv->super);
+}
+
+void X(rdft_vrank_geq1_register)(planner *p)
+{
+     int i;
+
+     /* FIXME: Should we try other vecloop_dim values? */
+     static const int buddies[] = { 1, -1 };
+
+     const int nbuddies = sizeof(buddies) / sizeof(buddies[0]);
+
+     for (i = 0; i < nbuddies; ++i)
+          REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies));
+}
diff --git a/src/fftw3/rdft/solve2.c b/src/fftw3/rdft/solve2.c
new file mode 100644
index 0000000..adaee27
--- /dev/null
+++ b/src/fftw3/rdft/solve2.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: solve2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+#include "rdft.h"
+
+/* use the apply() operation for RDFT2 problems */
+void X(rdft2_solve)(const plan *ego_, const problem *p_)
+{
+     const plan_rdft2 *ego = (const plan_rdft2 *) ego_;
+     const problem_rdft2 *p = (const problem_rdft2 *) p_;
+     ego->apply(ego_, UNTAINT(p->r), UNTAINT(p->rio), UNTAINT(p->iio));
+}
diff --git a/src/fftw3/rdft/vrank-geq1-rdft2.c b/src/fftw3/rdft/vrank-geq1-rdft2.c
new file mode 100644
index 0000000..0557c13
--- /dev/null
+++ b/src/fftw3/rdft/vrank-geq1-rdft2.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: vrank-geq1-rdft2.c,v 1.1 2008/10/17 06:11:29 scuri Exp $ */
+
+
+/* Plans for handling vector transform loops.  These are *just* the
+   loops, and rely on child plans for the actual RDFT2s.
+ 
+   They form a wrapper around solvers that don't have apply functions
+   for non-null vectors.
+ 
+   vrank-geq1-rdft2 plans also recursively handle the case of
+   multi-dimensional vectors, obviating the need for most solvers to
+   deal with this.  We can also play games here, such as reordering
+   the vector loops.
+ 
+   Each vrank-geq1-rdft2 plan reduces the vector rank by 1, picking out a
+   dimension determined by the vecloop_dim field of the solver. */
+
+#include "rdft.h"
+
+typedef struct {
+     solver super;
+     int vecloop_dim;
+     const int *buddies;
+     int nbuddies;
+} S;
+
+typedef struct {
+     plan_rdft2 super;
+
+     plan *cld;
+     int vl;
+     int ivs, ovs;
+     const S *solver;
+} P;
+
+static void apply(const plan *ego_, R *r, R *rio, R *iio)
+{
+     const P *ego = (const P *) ego_;
+     int i, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     rdft2apply cldapply = ((plan_rdft2 *) ego->cld)->apply;
+
+     for (i = 0; i < vl; ++i) {
+          cldapply(ego->cld, r + i * ivs, rio + i * ovs, iio + i * ovs);
+     }
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     const S *s = ego->solver;
+     p->print(p, "(rdft2-vrank>=1-x%d/%d%(%p%))",
+	      ego->vl, s->vecloop_dim, ego->cld);
+}
+
+static int pickdim(const S *ego, const tensor *vecsz, int oop, int *dp)
+{
+     return X(pickdim)(ego->vecloop_dim, ego->buddies, ego->nbuddies,
+		       vecsz, oop, dp);
+}
+
+static int applicable0(const solver *ego_, const problem *p_, int *dp)
+{
+     if (RDFT2P(p_)) {
+          const S *ego = (const S *) ego_;
+          const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  if (FINITE_RNK(p->vecsz->rnk)
+	      && p->vecsz->rnk > 0
+	      && pickdim(ego, p->vecsz, 
+			 p->r != p->rio && p->r != p->iio, dp)) {
+	       if (p->r != p->rio && p->r != p->iio)
+		    return 1;  /* can always operate out-of-place */
+
+	       return(X(rdft2_inplace_strides)(p, *dp));
+	  }
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego_, const problem *p_,
+		      const planner *plnr, int *dp)
+{
+     const S *ego = (const S *)ego_;
+
+     if (!applicable0(ego_, p_, dp)) return 0;
+
+     /* fftw2 behavior */
+     if (NO_VRANK_SPLITSP(plnr) && (ego->vecloop_dim != ego->buddies[0]))
+	  return 0;
+
+     if (NO_UGLYP(plnr)) {
+	  const problem_rdft2 *p = (const problem_rdft2 *) p_;
+	  iodim *d = p->vecsz->dims + *dp;
+	       
+	  /* Heuristic: if the transform is multi-dimensional, and the
+	     vector stride is less than the transform size, then we
+	     probably want to use a rank>=2 plan first in order to combine
+	     this vector with the transform-dimension vectors. */
+	  if (p->sz->rnk > 1
+	      && X(imin)(X(iabs)(d->is), X(iabs)(d->os))
+	      < X(rdft2_tensor_max_index)(p->sz, p->kind)
+	       )
+	       return 0;
+
+	  /* Heuristic: don't use a vrank-geq1 for rank-0 vrank-1
+	     transforms, since this case is better handled by rank-0
+	     solvers. */
+	  if (p->sz->rnk == 0 && p->vecsz->rnk == 1) return 0;
+
+	  if (NONTHREADED_ICKYP(plnr)) 
+	       return 0; /* prefer threaded version */
+     }
+
+     return 1;
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     const S *ego = (const S *) ego_;
+     const problem_rdft2 *p;
+     P *pln;
+     plan *cld;
+     int vdim;
+     iodim *d;
+     int ivs, ovs;
+
+     static const plan_adt padt = {
+	  X(rdft2_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr, &vdim))
+          return (plan *) 0;
+     p = (const problem_rdft2 *) p_;
+
+     d = p->vecsz->dims + vdim;
+
+     A(d->n > 1);  /* or else, p->ri + d->is etc. are invalid */
+
+     X(rdft2_strides)(p->kind, d, &ivs, &ovs);
+
+     cld = X(mkplan_d)(plnr, 
+		       X(mkproblem_rdft2_d)(
+			    X(tensor_copy)(p->sz),
+			    X(tensor_copy_except)(p->vecsz, vdim),
+			    TAINT(p->r, ivs), 
+			    TAINT(p->rio, ovs), TAINT(p->iio, ovs),
+			    p->kind));
+     if (!cld) return (plan *) 0;
+
+     pln = MKPLAN_RDFT2(P, &padt, apply);
+
+     pln->cld = cld;
+     pln->vl = d->n;
+     pln->ivs = ivs;
+     pln->ovs = ovs;
+
+     pln->solver = ego;
+     X(ops_zero)(&pln->super.super.ops);
+     pln->super.super.ops.other = 3.14159; /* magic to prefer codelet loops */
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+     pln->super.super.pcost = pln->vl * cld->pcost;
+
+     return &(pln->super.super);
+}
+
+static solver *mksolver(int vecloop_dim, const int *buddies, int nbuddies)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     slv->vecloop_dim = vecloop_dim;
+     slv->buddies = buddies;
+     slv->nbuddies = nbuddies;
+     return &(slv->super);
+}
+
+void X(rdft2_vrank_geq1_register)(planner *p)
+{
+     int i;
+
+     /* FIXME: Should we try other vecloop_dim values? */
+     static const int buddies[] = { 1, -1 };
+
+     const int nbuddies = sizeof(buddies) / sizeof(buddies[0]);
+
+     for (i = 0; i < nbuddies; ++i)
+          REGISTER_SOLVER(p, mksolver(buddies[i], buddies, nbuddies));
+}
diff --git a/src/fftw3/reodft/redft00e-r2hc-pad.c b/src/fftw3/reodft/redft00e-r2hc-pad.c
new file mode 100644
index 0000000..ec3fa35
--- /dev/null
+++ b/src/fftw3/reodft/redft00e-r2hc-pad.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: redft00e-r2hc-pad.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do a REDFT00 problem via an R2HC problem, padded symmetrically to
+   twice the size.  This is asymptotically a factor of ~2 worse than
+   redft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical
+   Recipes), but we abandoned the latter after we discovered that it
+   has intrinsic accuracy problems. */
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld, *cldcpy;
+     int is;
+     int n;
+     int vl;
+     int ivs, ovs;
+} P;
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = I[0];
+	  for (i = 1; i < n; ++i) {
+	       R a = I[i * is];
+	       buf[i] = a;
+	       buf[2*n - i] = a;
+	  }
+	  buf[i] = I[i * is]; /* i == n, Nyquist */
+	  
+	  /* r2hc transform of size 2*n */
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  /* copy n+1 real numbers (real parts of hc array) from buf to O */
+	  {
+	       plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
+	       cldcpy->apply((plan *) cldcpy, buf, O);
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+     AWAKE(ego->cldcpy, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cldcpy);
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(redft00e-r2hc-pad-%d%v%(%p%)%(%p%))", 
+	      ego->n + 1, ego->vl, ego->cld, ego->cldcpy);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && p->kind[0] == REDFT00
+		  && p->sz->dims[0].n > 1  /* n == 1 is not well-defined */
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld = (plan *) 0, *cldcpy;
+     R *buf = (R *) 0;
+     int n;
+     int vl, ivs, ovs;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+	  goto nada;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n - 1;
+     A(n > 0);
+     buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
+
+     cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), 
+						  X(mktensor_0d)(), 
+						  buf, buf, R2HC));
+     if (!cld)
+	  goto nada;
+
+     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+     cldcpy =
+	  X(mkplan_d)(plnr,
+		      X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
+					    X(mktensor_1d)(n+1,1,
+							   p->sz->dims[0].os), 
+					    buf, TAINT(p->O, ovs), R2HC));
+     if (!cldcpy)
+	  goto nada;
+
+     X(ifree)(buf);
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->cld = cld;
+     pln->cldcpy = cldcpy;
+     pln->vl = vl;
+     pln->ivs = ivs;
+     pln->ovs = ovs;
+     
+     X(ops_zero)(&ops);
+     ops.other = n + 2*n; /* loads + stores (input -> buf) */
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(ifree0)(buf);
+     if (cld)
+	  X(plan_destroy_internal)(cld);  
+     return (plan *)0;
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(redft00e_r2hc_pad_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/redft00e-r2hc.c b/src/fftw3/reodft/redft00e-r2hc.c
new file mode 100644
index 0000000..0cd742f
--- /dev/null
+++ b/src/fftw3/reodft/redft00e-r2hc.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: redft00e-r2hc.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do a REDFT00 problem via an R2HC problem, with some pre/post-processing.
+
+   This code uses the trick from FFTPACK, also documented in a similar
+   form by Numerical Recipes.  Unfortunately, this algorithm seems to
+   have intrinsic numerical problems (similar to those in
+   reodft11e-r2hc.c), possibly due to the fact that it multiplies its
+   input by a cosine, causing a loss of precision near the zero.  For
+   transforms of 16k points, it has already lost three or four decimal
+   places of accuracy, which we deem unacceptable.
+
+   So, we have abandoned this algorithm in favor of the one in
+   redft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
+   The only other alternative in the literature that does not have
+   similar numerical difficulties seems to be the direct adaptation of
+   the Cooley-Tukey decomposition for symmetric data, but this would
+   require a whole new set of codelets and it's not clear that it's
+   worth it at this point. */
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     twid *td;
+     int is, os;
+     int n;
+     int vl;
+     int ivs, ovs;
+} P;
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *buf;
+     E csum;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = I[0] + I[is * n];
+	  csum = I[0] - I[is * n];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, apb, amb;
+	       a = I[is * i];
+	       b = I[is * (n - i)];
+	       csum += W[2*i] * (amb = K(2.0)*(a - b));
+	       amb = W[2*i+1] * amb;
+	       apb = (a + b);
+	       buf[i] = apb - amb;
+	       buf[n - i] = apb + amb;
+	  }
+	  if (i == n - i) {
+	       buf[i] = K(2.0) * I[is * i];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  /* FIXME: use recursive/cascade summation for better stability? */
+	  O[0] = buf[0];
+	  O[os] = csum;
+	  for (i = 1; i + i < n; ++i) {
+	       int k = i + i;
+	       O[os * k] = buf[i];
+	       O[os * (k + 1)] = O[os * (k - 1)] - buf[n - i];
+	  }
+	  if (i + i == n) {
+	       O[os * n] = buf[i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr redft00e_tw[] = {
+          { TW_COS, 0, 1 },
+          { TW_SIN, 0, 1 },
+          { TW_NEXT, 1, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+     X(twiddle_awake)(flg, &ego->td, redft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(redft00e-r2hc-%d%v%(%p%))", ego->n + 1, ego->vl, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && p->kind[0] == REDFT00
+		  && p->sz->dims[0].n > 1  /* n == 1 is not well-defined */
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+     R *buf;
+     int n;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n - 1;
+     A(n > 0);
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1), 
+						   X(mktensor_0d)(), 
+						   buf, buf, R2HC));
+     X(ifree)(buf);
+     if (!cld)
+          return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     pln->td = 0;
+
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     
+     X(ops_zero)(&ops);
+     ops.other = 8 + (n-1)/2 * 11 + (1 - n % 2) * 5;
+     ops.add = 2 + (n-1)/2 * 5;
+     ops.mul = (n-1)/2 * 3 + (1 - n % 2) * 1;
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(redft00e_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/reoconf.c b/src/fftw3/reodft/reoconf.c
new file mode 100644
index 0000000..1cd41b6
--- /dev/null
+++ b/src/fftw3/reodft/reoconf.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: reoconf.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+#include "reodft.h"
+
+static const solvtab s =
+{
+     /* SOLVTAB(X(redft00e_r2hc_register)),
+	SOLVTAB(X(rodft00e_r2hc_register)), */
+     SOLVTAB(X(redft00e_r2hc_pad_register)),
+     SOLVTAB(X(rodft00e_r2hc_pad_register)),
+     SOLVTAB(X(reodft010e_r2hc_register)),
+     /* SOLVTAB(X(reodft11e_r2hc_register)), */
+     SOLVTAB(X(reodft11e_radix2_r2hc_register)),
+     SOLVTAB(X(reodft11e_r2hc_odd_register)),
+
+     SOLVTAB_END
+};
+
+void X(reodft_conf_standard)(planner *p)
+{
+     X(solvtab_exec)(s, p);
+}
diff --git a/src/fftw3/reodft/reodft.h b/src/fftw3/reodft/reodft.h
new file mode 100644
index 0000000..8c67144
--- /dev/null
+++ b/src/fftw3/reodft/reodft.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef __REODFT_H__
+#define __REODFT_H__
+
+#include "ifftw.h"
+#include "rdft.h"
+
+#define REODFT_KINDP(k) ((k) >= REDFT00 && (k) <= RODFT11)
+
+void X(redft00e_r2hc_register)(planner *p);
+void X(redft00e_r2hc_pad_register)(planner *p);
+void X(rodft00e_r2hc_register)(planner *p);
+void X(rodft00e_r2hc_pad_register)(planner *p);
+void X(reodft010e_r2hc_register)(planner *p);
+void X(reodft11e_r2hc_register)(planner *p);
+void X(reodft11e_radix2_r2hc_register)(planner *p);
+void X(reodft11e_r2hc_odd_register)(planner *p);
+
+/* configurations */
+void X(reodft_conf_standard)(planner *p);
+
+#endif /* __REODFT_H__ */
diff --git a/src/fftw3/reodft/reodft010e-r2hc.c b/src/fftw3/reodft/reodft010e-r2hc.c
new file mode 100644
index 0000000..ace14de
--- /dev/null
+++ b/src/fftw3/reodft/reodft010e-r2hc.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: reodft010e-r2hc.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do an R{E,O}DFT{01,10} problem via an R2HC problem, with some
+   pre/post-processing ala FFTPACK. */
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     twid *td;
+     int is, os;
+     int n;
+     int vl;
+     int ivs, ovs;
+     rdft_kind kind;
+} P;
+
+/* A real-even-01 DFT operates logically on a size-4N array:
+                   I 0 -r(I*) -I 0 r(I*),
+   where r denotes reversal and * denotes deletion of the 0th element.
+   To compute the transform of this, we imagine performing a radix-4
+   (real-input) DIF step, which turns the size-4N DFT into 4 size-N
+   (contiguous) DFTs, two of which are zero and two of which are
+   conjugates.  The non-redundant size-N DFT has halfcomplex input, so
+   we can do it with a size-N hc2r transform.  (In order to share
+   plans with the re10 (inverse) transform, however, we use the DHT
+   trick to re-express the hc2r problem as r2hc.  This has little cost
+   since we are already pre- and post-processing the data in {i,n-i}
+   order.)  Finally, we have to write out the data in the correct
+   order...the two size-N redundant (conjugate) hc2r DFTs correspond
+   to the even and odd outputs in O (i.e. the usual interleaved output
+   of DIF transforms); since this data has even symmetry, we only
+   write the first half of it.
+
+   The real-even-10 DFT is just the reverse of these steps, i.e. a
+   radix-4 DIT transform.  There, however, we just use the r2hc
+   transform naturally without resorting to the DHT trick.
+
+   A real-odd-01 DFT is very similar, except that the input is
+   0 I (rI)* 0 -I -(rI)*.  This format, however, can be transformed
+   into precisely the real-even-01 format above by sending I -> rI
+   and shifting the array by N.  The former swap is just another
+   transformation on the input during preprocessing; the latter
+   multiplies the even/odd outputs by i/-i, which combines with
+   the factor of -i (to take the imaginary part) to simply flip
+   the sign of the odd outputs.  Vice-versa for real-odd-10.
+
+   The FFTPACK source code was very helpful in working this out.
+   (They do unnecessary passes over the array, though.)
+
+   Note that Numerical Recipes suggests a different algorithm that
+   requires more operations and uses trig. functions for both the pre-
+   and post-processing passes.
+*/
+
+static void apply_re01(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = I[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, apb, amb, wa, wb;
+	       a = I[is * i];
+	       b = I[is * (n - i)];
+	       apb = a + b;
+	       amb = a - b;
+	       wa = W[2*i];
+	       wb = W[2*i + 1];
+	       buf[i] = wa * amb + wb * apb; 
+	       buf[n - i] = wa * apb - wb * amb; 
+	  }
+	  if (i == n - i) {
+	       buf[i] = K(2.0) * I[is * i] * W[2*i];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  O[0] = buf[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b;
+	       int k;
+	       a = buf[i];
+	       b = buf[n - i];
+	       k = i + i;
+	       O[os * (k - 1)] = a - b;
+	       O[os * k] = a + b;
+	  }
+	  if (i == n - i) {
+	       O[os * (n - 1)] = buf[i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+/* ro01 is same as re01, but with i <-> n - 1 - i in the input and
+   the sign of the odd output elements flipped. */
+static void apply_ro01(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = I[is * (n - 1)];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, apb, amb, wa, wb;
+	       a = I[is * (n - 1 - i)];
+	       b = I[is * (i - 1)];
+	       apb = a + b;
+	       amb = a - b;
+	       wa = W[2*i];
+	       wb = W[2*i+1];
+	       buf[i] = wa * amb + wb * apb; 
+	       buf[n - i] = wa * apb - wb * amb; 
+	  }
+	  if (i == n - i) {
+	       buf[i] = K(2.0) * I[is * (i - 1)] * W[2*i];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  O[0] = buf[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b;
+	       int k;
+	       a = buf[i];
+	       b = buf[n - i];
+	       k = i + i;
+	       O[os * (k - 1)] = b - a;
+	       O[os * k] = a + b;
+	  }
+	  if (i == n - i) {
+	       O[os * (n - 1)] = -buf[i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void apply_re10(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = I[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E u, v;
+	       int k = i + i;
+	       u = I[is * (k - 1)];
+	       v = I[is * k];
+	       buf[n - i] = u;
+	       buf[i] = v;
+	  }
+	  if (i == n - i) {
+	       buf[i] = I[is * (n - 1)];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  O[0] = K(2.0) * buf[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, wa, wb;
+	       a = K(2.0) * buf[i];
+	       b = K(2.0) * buf[n - i];
+	       wa = W[2*i];
+	       wb = W[2*i + 1];
+	       O[os * i] = wa * a + wb * b;
+	       O[os * (n - i)] = wb * a - wa * b;
+	  }
+	  if (i == n - i) {
+	       O[os * i] = K(2.0) * buf[i] * W[2*i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+/* ro10 is same as re10, but with i <-> n - 1 - i in the output and
+   the sign of the odd input elements flipped. */
+static void apply_ro10(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = I[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E u, v;
+	       int k = i + i;
+	       u = -I[is * (k - 1)];
+	       v = I[is * k];
+	       buf[n - i] = u;
+	       buf[i] = v;
+	  }
+	  if (i == n - i) {
+	       buf[i] = -I[is * (n - 1)];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  O[os * (n - 1)] = K(2.0) * buf[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, wa, wb;
+	       a = K(2.0) * buf[i];
+	       b = K(2.0) * buf[n - i];
+	       wa = W[2*i];
+	       wb = W[2*i + 1];
+	       O[os * (n - 1 - i)] = wa * a + wb * b;
+	       O[os * (i - 1)] = wb * a - wa * b;
+	  }
+	  if (i == n - i) {
+	       O[os * (i - 1)] = K(2.0) * buf[i] * W[2*i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr reodft010e_tw[] = {
+          { TW_COS, 0, 1 },
+          { TW_SIN, 0, 1 },
+          { TW_NEXT, 1, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+
+     X(twiddle_awake)(flg, &ego->td, reodft010e_tw, 4*ego->n, 1, ego->n/2+1);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%se-r2hc-%d%v%(%p%))",
+	      X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && (p->kind[0] == REDFT01 || p->kind[0] == REDFT10
+		      || p->kind[0] == RODFT01 || p->kind[0] == RODFT10)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+     R *buf;
+     int n;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n;
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
+                                                   X(mktensor_0d)(),
+                                                   buf, buf, R2HC));
+     X(ifree)(buf);
+     if (!cld)
+          return (plan *)0;
+
+     switch (p->kind[0]) {
+	 case REDFT01: pln = MKPLAN_RDFT(P, &padt, apply_re01); break;
+	 case REDFT10: pln = MKPLAN_RDFT(P, &padt, apply_re10); break;
+	 case RODFT01: pln = MKPLAN_RDFT(P, &padt, apply_ro01); break;
+	 case RODFT10: pln = MKPLAN_RDFT(P, &padt, apply_ro10); break;
+	 default: A(0); return (plan*)0;
+     }
+
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     pln->td = 0;
+     pln->kind = p->kind[0];
+     
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     
+     X(ops_zero)(&ops);
+     ops.other = 4 + (n-1)/2 * 10 + (1 - n % 2) * 5;
+     if (p->kind[0] == REDFT01 || p->kind[0] == RODFT01) {
+	  ops.add = (n-1)/2 * 6;
+	  ops.mul = (n-1)/2 * 4 + (1 - n % 2) * 2;
+     }
+     else { /* 10 transforms */
+	  ops.add = (n-1)/2 * 2;
+	  ops.mul = 1 + (n-1)/2 * 6 + (1 - n % 2) * 2;
+     }
+     
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(reodft010e_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/reodft11e-r2hc-odd.c b/src/fftw3/reodft/reodft11e-r2hc-odd.c
new file mode 100644
index 0000000..471f7ca
--- /dev/null
+++ b/src/fftw3/reodft/reodft11e-r2hc-odd.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: reodft11e-r2hc-odd.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do an R{E,O}DFT11 problem via an R2HC problem of the same *odd* size,
+   with some permutations and post-processing, as described in:
+
+     S. C. Chan and K. L. Ho, "Fast algorithms for computing the
+     discrete cosine transform," IEEE Trans. Circuits Systems II:
+     Analog & Digital Sig. Proc. 39 (3), 185--190 (1992).
+
+   (For even sizes, see reodft11e-radix2.c.)  
+
+   This algorithm is related to the 8 x n prime-factor-algorithm (PFA)
+   decomposition of the size 8n "logical" DFT corresponding to the
+   R{EO}DFT11.
+
+   Aside from very confusing notation (several symbols are redefined
+   from one line to the next), be aware that this paper has some
+   errors.  In particular, the signs are wrong in Eqs. (34-35).  Also,
+   Eqs. (36-37) should be simply C(k) = C(2k + 1 mod N), and similarly
+   for S (or, equivalently, the second cases should have 2*N - 2*k - 1
+   instead of N - k - 1).  Note also that in their definition of the
+   DFT, similarly to FFTW's, the exponent's sign is -1, but they
+   forgot to correspondingly multiply S (the sine terms) by -1.
+*/
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     int is, os;
+     int n;
+     int vl;
+     int ivs, ovs;
+     rdft_kind kind;
+} P;
+
+static DK(SQRT2, +1.4142135623730950488016887242096980785696718753769);
+
+#define SGN_SET(x, i) ((i) % 2 ? -(x) : (x))
+
+static void apply_re11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n, n2 = n/2;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  {
+	       int m;
+	       for (i = 0, m = n2; m < n; ++i, m += 4)
+		    buf[i] = I[is * m];
+	       for (; m < 2 * n; ++i, m += 4)
+		    buf[i] = -I[is * (2*n - m - 1)];
+	       for (; m < 3 * n; ++i, m += 4)
+		    buf[i] = -I[is * (m - 2*n)];
+	       for (; m < 4 * n; ++i, m += 4)
+		    buf[i] = I[is * (4*n - m - 1)];
+	       m -= 4 * n;
+	       for (; i < n; ++i, m += 4)
+		    buf[i] = I[is * m];
+	  }
+
+	  { /* child plan: R2HC of size n */
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  /* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */
+	  for (i = 0; i + i + 1 < n2; ++i) {
+	       int k = i + i + 1;
+	       E c1, s1;
+	       E c2, s2;
+	       c1 = buf[k];
+	       c2 = buf[k + 1];
+	       s2 = buf[n - (k + 1)];
+	       s1 = buf[n - k];
+	       
+	       O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2) +
+				    SGN_SET(s1, i/2));
+	       O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2) -
+					      SGN_SET(s1, (n-(i+1))/2));
+	       
+	       O[os * (n2 - (i+1))] = SQRT2 * (SGN_SET(c2, (n2-i)/2) -
+					       SGN_SET(s2, (n2-(i+1))/2));
+	       O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2) +
+					       SGN_SET(s2, (n2+(i+1))/2));
+	  }
+	  if (i + i + 1 == n2) {
+	       E c, s;
+	       c = buf[n2];
+	       s = buf[n - n2];
+	       O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2) +
+				    SGN_SET(s, i/2));
+	       O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2) +
+					      SGN_SET(s, (i+1)/2));
+	  }
+	  O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2);
+     }
+
+     X(ifree)(buf);
+}
+
+/* like for rodft01, rodft11 is obtained from redft11 by
+   reversing the input and flipping the sign of every other output. */
+static void apply_ro11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n, n2 = n/2;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  {
+	       int m;
+	       for (i = 0, m = n2; m < n; ++i, m += 4)
+		    buf[i] = I[is * (n - 1 - m)];
+	       for (; m < 2 * n; ++i, m += 4)
+		    buf[i] = -I[is * (m - n)];
+	       for (; m < 3 * n; ++i, m += 4)
+		    buf[i] = -I[is * (3*n - 1 - m)];
+	       for (; m < 4 * n; ++i, m += 4)
+		    buf[i] = I[is * (m - 3*n)];
+	       m -= 4 * n;
+	       for (; i < n; ++i, m += 4)
+		    buf[i] = I[is * (n - 1 - m)];
+	  }
+
+	  { /* child plan: R2HC of size n */
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  /* FIXME: strength-reduce loop by 4 to eliminate ugly sgn_set? */
+	  for (i = 0; i + i + 1 < n2; ++i) {
+	       int k = i + i + 1;
+	       int j;
+	       E c1, s1;
+	       E c2, s2;
+	       c1 = buf[k];
+	       c2 = buf[k + 1];
+	       s2 = buf[n - (k + 1)];
+	       s1 = buf[n - k];
+	       
+	       O[os * i] = SQRT2 * (SGN_SET(c1, (i+1)/2 + i) +
+				    SGN_SET(s1, i/2 + i));
+	       O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c1, (n-i)/2 + i) -
+					      SGN_SET(s1, (n-(i+1))/2 + i));
+	       
+	       j = n2 - (i+1);
+	       O[os * j] = SQRT2 * (SGN_SET(c2, (n2-i)/2 + j) -
+				    SGN_SET(s2, (n2-(i+1))/2 + j));
+	       O[os * (n2 + (i+1))] = SQRT2 * (SGN_SET(c2, (n2+i+2)/2 + j) +
+					       SGN_SET(s2, (n2+(i+1))/2 + j));
+	  }
+	  if (i + i + 1 == n2) {
+	       E c, s;
+	       c = buf[n2];
+	       s = buf[n - n2];
+	       O[os * i] = SQRT2 * (SGN_SET(c, (i+1)/2 + i) +
+				    SGN_SET(s, i/2 + i));
+	       O[os * (n - (i+1))] = SQRT2 * (SGN_SET(c, (i+2)/2 + i) +
+					      SGN_SET(s, (i+1)/2 + i));
+	  }
+	  O[os * n2] = SQRT2 * SGN_SET(buf[0], (n2+1)/2 + n2);
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%se-r2hc-odd-%d%v%(%p%))",
+	      X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && p->sz->dims[0].n % 2 == 1
+		  && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+     R *buf;
+     int n;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n;
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
+                                                   X(mktensor_0d)(),
+                                                   buf, buf, R2HC));
+     X(ifree)(buf);
+     if (!cld)
+          return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     pln->kind = p->kind[0];
+     
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     
+     X(ops_zero)(&ops);
+     ops.add = n - 1;
+     ops.mul = n;
+     ops.other = 4*n;
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(reodft11e_r2hc_odd_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/reodft11e-r2hc.c b/src/fftw3/reodft/reodft11e-r2hc.c
new file mode 100644
index 0000000..d4366e3
--- /dev/null
+++ b/src/fftw3/reodft/reodft11e-r2hc.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: reodft11e-r2hc.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do an R{E,O}DFT11 problem via an R2HC problem, with some
+   pre/post-processing ala FFTPACK.  Use a trick from: 
+
+     S. C. Chan and K. L. Ho, "Direct methods for computing discrete
+     sinusoidal transforms," IEE Proceedings F 137 (6), 433--442 (1990).
+
+   to re-express as an REDFT01 (DCT-III) problem.
+
+   NOTE: We no longer use this algorithm, because it turns out to suffer
+   a catastrophic loss of accuracy for certain inputs, apparently because
+   its post-processing multiplies the output by a cosine.  Near the zero
+   of the cosine, the REDFT01 must produce a near-singular output.
+*/
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     twid *td, *td2;
+     int is, os;
+     int n;
+     int vl;
+     int ivs, ovs;
+     rdft_kind kind;
+} P;
+
+static void apply_re11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W;
+     R *buf;
+     E cur;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  /* I wish that this didn't require an extra pass. */
+	  /* FIXME: use recursive/cascade summation for better stability? */
+	  buf[n - 1] = cur = K(2.0) * I[is * (n - 1)];
+	  for (i = n - 1; i > 0; --i) {
+	       E curnew;
+	       buf[(i - 1)] = curnew = K(2.0) * I[is * (i - 1)] - cur;
+	       cur = curnew;
+	  }
+	  
+	  W = ego->td->W;
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, apb, amb, wa, wb;
+	       a = buf[i];
+	       b = buf[n - i];
+	       apb = a + b;
+	       amb = a - b;
+	       wa = W[2*i];
+	       wb = W[2*i + 1];
+	       buf[i] = wa * amb + wb * apb; 
+	       buf[n - i] = wa * apb - wb * amb; 
+	  }
+	  if (i == n - i) {
+	       buf[i] = K(2.0) * buf[i] * W[2*i];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  W = ego->td2->W;
+	  O[0] = W[0] * buf[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b;
+	       int k;
+	       a = buf[i];
+	       b = buf[n - i];
+	       k = i + i;
+	       O[os * (k - 1)] = W[k - 1] * (a - b);
+	       O[os * k] = W[k] * (a + b);
+	  }
+	  if (i == n - i) {
+	       O[os * (n - 1)] = W[n - 1] * buf[i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+/* like for rodft01, rodft11 is obtained from redft11 by
+   reversing the input and flipping the sign of every other output. */
+static void apply_ro11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W;
+     R *buf;
+     E cur;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  /* I wish that this didn't require an extra pass. */
+	  /* FIXME: use recursive/cascade summation for better stability? */
+	  buf[n - 1] = cur = K(2.0) * I[0];
+	  for (i = n - 1; i > 0; --i) {
+	       E curnew;
+	       buf[(i - 1)] = curnew = K(2.0) * I[is * (n - i)] - cur;
+	       cur = curnew;
+	  }
+	  
+	  W = ego->td->W;
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, apb, amb, wa, wb;
+	       a = buf[i];
+	       b = buf[n - i];
+	       apb = a + b;
+	       amb = a - b;
+	       wa = W[2*i];
+	       wb = W[2*i + 1];
+	       buf[i] = wa * amb + wb * apb; 
+	       buf[n - i] = wa * apb - wb * amb; 
+	  }
+	  if (i == n - i) {
+	       buf[i] = K(2.0) * buf[i] * W[2*i];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  W = ego->td2->W;
+	  O[0] = W[0] * buf[0];
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b;
+	       int k;
+	       a = buf[i];
+	       b = buf[n - i];
+	       k = i + i;
+	       O[os * (k - 1)] = W[k - 1] * (b - a);
+	       O[os * k] = W[k] * (a + b);
+	  }
+	  if (i == n - i) {
+	       O[os * (n - 1)] = -W[n - 1] * buf[i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr reodft010e_tw[] = {
+          { TW_COS, 0, 1 },
+          { TW_SIN, 0, 1 },
+          { TW_NEXT, 1, 0 }
+     };
+     static const tw_instr reodft11e_tw[] = {
+          { TW_COS, 1, 1 },
+          { TW_NEXT, 2, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+
+     X(twiddle_awake)(flg, &ego->td, reodft010e_tw, 4*ego->n, 1, ego->n/2+1);
+     X(twiddle_awake)(flg, &ego->td2, reodft11e_tw, 8*ego->n, 1, ego->n * 2);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%se-r2hc-%d%v%(%p%))",
+	      X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+     R *buf;
+     int n;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n;
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
+                                                   X(mktensor_0d)(),
+                                                   buf, buf, R2HC));
+     X(ifree)(buf);
+     if (!cld)
+          return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     pln->td = pln->td2 = 0;
+     pln->kind = p->kind[0];
+     
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     
+     X(ops_zero)(&ops);
+     ops.other = 5 + (n-1) * 2 + (n-1)/2 * 12 + (1 - n % 2) * 6;
+     ops.add = (n - 1) * 1 + (n-1)/2 * 6;
+     ops.mul = 2 + (n-1) * 1 + (n-1)/2 * 6 + (1 - n % 2) * 3;
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(reodft11e_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/reodft11e-radix2.c b/src/fftw3/reodft/reodft11e-radix2.c
new file mode 100644
index 0000000..674f7b4
--- /dev/null
+++ b/src/fftw3/reodft/reodft11e-radix2.c
@@ -0,0 +1,515 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: reodft11e-radix2.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do an R{E,O}DFT11 problem of *even* size by a pair of R2HC problems
+   of half the size, plus some pre/post-processing.  Use a trick from:
+
+   Zhongde Wang, "On computing the discrete Fourier and cosine transforms,"
+   IEEE Trans. Acoust. Speech Sig. Proc. ASSP-33 (4), 1341--1344 (1985).
+
+   to re-express as a pair of half-size REDFT01 (DCT-III) problems.  Our
+   implementation looks quite a bit different from the algorithm described
+   in the paper because we combined the paper's pre/post-processing with
+   the pre/post-processing used to turn REDFT01 into R2HC.  (Also, the
+   paper uses a DCT/DST pair, but we turn the DST into a DCT via the
+   usual reordering/sign-flip trick.  We additionally combined a couple
+   of the matrices/transformations of the paper into a single pass.)
+
+   NOTE: We originally used a simpler method by S. C. Chan and K. L. Ho
+   that turned out to have numerical problems; see reodft11e-r2hc.c.
+
+   (For odd sizes, see reodft11e-r2hc-odd.c.)
+*/
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     twid *td, *td2;
+     int is, os;
+     int n;
+     int vl;
+     int ivs, ovs;
+     rdft_kind kind;
+} P;
+
+static void apply_re11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n, n2 = n/2;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *W2;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = K(2.0) * I[0];
+	  buf[n2] = K(2.0) * I[is * (n - 1)];
+	  for (i = 1; i + i < n2; ++i) {
+	       int k = i + i;
+	       E a, b, a2, b2;
+	       {
+		    E u, v;
+		    u = I[is * (k - 1)];
+		    v = I[is * k];
+		    a = u + v;
+		    b2 = u - v;
+	       }
+	       {
+		    E u, v;
+		    u = I[is * (n - k - 1)];
+		    v = I[is * (n - k)];
+		    b = u + v;
+		    a2 = u - v;
+	       }
+	       {
+		    E wa, wb;
+		    wa = W[2*i];
+		    wb = W[2*i + 1];
+		    {
+			 E apb, amb;
+			 apb = a + b;
+			 amb = a - b;
+			 buf[i] = wa * amb + wb * apb; 
+			 buf[n2 - i] = wa * apb - wb * amb; 
+		    }
+		    {
+			 E apb, amb;
+			 apb = a2 + b2;
+			 amb = a2 - b2;
+			 buf[n2 + i] = wa * amb + wb * apb; 
+			 buf[n - i] = wa * apb - wb * amb; 
+		    }
+	       }
+	  }
+	  if (i + i == n2) {
+	       E u, v;
+	       u = I[is * (n2 - 1)];
+	       v = I[is * n2];
+	       buf[i] = K(2.0) * (u + v) * W[2*i];
+	       buf[n - i] = K(2.0) * (u - v) * W[2*i];
+	  }
+
+
+	  /* child plan: two r2hc's of size n/2 */
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  W2 = ego->td2->W;
+	  { /* i == 0 case */
+	       E wa, wb;
+	       E a, b;
+	       wa = W2[0]; /* cos */
+	       wb = W2[1]; /* sin */
+	       a = buf[0];
+	       b = buf[n2];
+	       O[0] = wa * a + wb * b;
+	       O[os * (n - 1)] = wb * a - wa * b;
+	  }
+	  W2 += 2;
+	  for (i = 1; i + i < n2; ++i, W2 += 2) {
+	       int k;
+	       E u, v, u2, v2;
+	       u = buf[i];
+	       v = buf[n2 - i];
+	       u2 = buf[n2 + i];
+	       v2 = buf[n - i];
+	       k = (i + i) - 1;
+	       {
+                    E wa, wb;
+                    E a, b;
+                    wa = W2[0]; /* cos */
+                    wb = W2[1]; /* sin */
+                    a = u - v;
+                    b = v2 - u2;
+                    O[os * k] = wa * a + wb * b;
+                    O[os * (n - 1 - k)] = wb * a - wa * b;
+               }
+	       ++k;
+	       W2 += 2;
+	       {
+		    E wa, wb;
+		    E a, b;
+		    wa = W2[0]; /* cos */
+		    wb = W2[1]; /* sin */
+		    a = u + v;
+		    b = u2 + v2;
+		    O[os * k] = wa * a + wb * b;
+		    O[os * (n - 1 - k)] = wb * a - wa * b;
+	       }
+	  }
+	  if (i + i == n2) {
+	       int k = (i + i) - 1;
+	       E wa, wb;
+	       E a, b;
+	       wa = W2[0]; /* cos */
+	       wb = W2[1]; /* sin */
+	       a = buf[i];
+	       b = buf[n2 + i];
+	       O[os * k] = wa * a - wb * b;
+	       O[os * (n - 1 - k)] = wb * a + wa * b;
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+#if 0
+
+/* This version of apply_re11 uses REDFT01 child plans, more similar
+   to the original paper by Z. Wang.  We keep it around for reference
+   (it is simpler) and because it may become more efficient if we
+   ever implement REDFT01 codelets. */
+
+static void apply_re11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = K(2.0) * I[0];
+	  buf[n/2] = K(2.0) * I[is * (n - 1)];
+	  for (i = 1; i + i < n; ++i) {
+	       int k = i + i;
+	       E a, b;
+	       a = I[is * (k - 1)];
+	       b = I[is * k];
+	       buf[i] = a + b;
+	       buf[n - i] = a - b;
+	  }
+
+	  /* child plan: two redft01's (DCT-III) */
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  W = ego->td2->W;
+	  for (i = 0; i + 1 < n/2; ++i, W += 2) {
+	       {
+		    E wa, wb;
+		    E a, b;
+		    wa = W[0]; /* cos */
+		    wb = W[1]; /* sin */
+		    a = buf[i];
+		    b = buf[n/2 + i];
+		    O[os * i] = wa * a + wb * b;
+		    O[os * (n - 1 - i)] = wb * a - wa * b;
+	       }
+	       ++i;
+	       W += 2;
+	       {
+                    E wa, wb;
+                    E a, b;
+                    wa = W[0]; /* cos */
+                    wb = W[1]; /* sin */
+                    a = buf[i];
+                    b = buf[n/2 + i];
+                    O[os * i] = wa * a - wb * b;
+                    O[os * (n - 1 - i)] = wb * a + wa * b;
+               }
+	  }
+	  if (i < n/2) {
+	       E wa, wb;
+	       E a, b;
+	       wa = W[0]; /* cos */
+	       wb = W[1]; /* sin */
+	       a = buf[i];
+	       b = buf[n/2 + i];
+	       O[os * i] = wa * a + wb * b;
+	       O[os * (n - 1 - i)] = wb * a - wa * b;
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+#endif /* 0 */
+
+/* like for rodft01, rodft11 is obtained from redft11 by
+   reversing the input and flipping the sign of every other output. */
+static void apply_ro11(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n, n2 = n/2;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *W2;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = K(2.0) * I[is * (n - 1)];
+	  buf[n2] = K(2.0) * I[0];
+	  for (i = 1; i + i < n2; ++i) {
+	       int k = i + i;
+	       E a, b, a2, b2;
+	       {
+		    E u, v;
+		    u = I[is * (n - k)];
+		    v = I[is * (n - 1 - k)];
+		    a = u + v;
+		    b2 = u - v;
+	       }
+	       {
+		    E u, v;
+		    u = I[is * (k)];
+		    v = I[is * (k - 1)];
+		    b = u + v;
+		    a2 = u - v;
+	       }
+	       {
+		    E wa, wb;
+		    wa = W[2*i];
+		    wb = W[2*i + 1];
+		    {
+			 E apb, amb;
+			 apb = a + b;
+			 amb = a - b;
+			 buf[i] = wa * amb + wb * apb; 
+			 buf[n2 - i] = wa * apb - wb * amb; 
+		    }
+		    {
+			 E apb, amb;
+			 apb = a2 + b2;
+			 amb = a2 - b2;
+			 buf[n2 + i] = wa * amb + wb * apb; 
+			 buf[n - i] = wa * apb - wb * amb; 
+		    }
+	       }
+	  }
+	  if (i + i == n2) {
+	       E u, v;
+	       u = I[is * n2];
+	       v = I[is * (n2 - 1)];
+	       buf[i] = K(2.0) * (u + v) * W[2*i];
+	       buf[n - i] = K(2.0) * (u - v) * W[2*i];
+	  }
+
+
+	  /* child plan: two r2hc's of size n/2 */
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  W2 = ego->td2->W;
+	  { /* i == 0 case */
+	       E wa, wb;
+	       E a, b;
+	       wa = W2[0]; /* cos */
+	       wb = W2[1]; /* sin */
+	       a = buf[0];
+	       b = buf[n2];
+	       O[0] = wa * a + wb * b;
+	       O[os * (n - 1)] = wa * b - wb * a;
+	  }
+	  W2 += 2;
+	  for (i = 1; i + i < n2; ++i, W2 += 2) {
+	       int k;
+	       E u, v, u2, v2;
+	       u = buf[i];
+	       v = buf[n2 - i];
+	       u2 = buf[n2 + i];
+	       v2 = buf[n - i];
+	       k = (i + i) - 1;
+	       {
+                    E wa, wb;
+                    E a, b;
+                    wa = W2[0]; /* cos */
+                    wb = W2[1]; /* sin */
+                    a = v - u;
+                    b = u2 - v2;
+                    O[os * k] = wa * a + wb * b;
+                    O[os * (n - 1 - k)] = wa * b - wb * a;
+               }
+	       ++k;
+	       W2 += 2;
+	       {
+		    E wa, wb;
+		    E a, b;
+		    wa = W2[0]; /* cos */
+		    wb = W2[1]; /* sin */
+		    a = u + v;
+		    b = u2 + v2;
+		    O[os * k] = wa * a + wb * b;
+		    O[os * (n - 1 - k)] = wa * b - wb * a;
+	       }
+	  }
+	  if (i + i == n2) {
+	       int k = (i + i) - 1;
+	       E wa, wb;
+	       E a, b;
+	       wa = W2[0]; /* cos */
+	       wb = W2[1]; /* sin */
+	       a = buf[i];
+	       b = buf[n2 + i];
+	       O[os * k] = wb * b - wa * a;
+	       O[os * (n - 1 - k)] = wa * b + wb * a;
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr reodft010e_tw[] = {
+          { TW_COS, 0, 1 },
+          { TW_SIN, 0, 1 },
+          { TW_NEXT, 1, 0 }
+     };
+     static const tw_instr reodft11e_tw[] = {
+          { TW_COS, 1, 1 },
+          { TW_SIN, 1, 1 },
+          { TW_NEXT, 2, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+
+     X(twiddle_awake)(flg, &ego->td, reodft010e_tw, 2*ego->n, 1, ego->n/4+1);
+     X(twiddle_awake)(flg, &ego->td2, reodft11e_tw, 8*ego->n, 1, ego->n);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(%se-radix2-r2hc-%d%v%(%p%))",
+	      X(rdft_kind_str)(ego->kind), ego->n, ego->vl, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && p->sz->dims[0].n % 2 == 0
+		  && (p->kind[0] == REDFT11 || p->kind[0] == RODFT11)
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+     R *buf;
+     int n;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n;
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n/2, 1, 1),
+                                                   X(mktensor_1d)(2, n/2, n/2),
+                                                   buf, buf, R2HC));
+     X(ifree)(buf);
+     if (!cld)
+          return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, p->kind[0]==REDFT11 ? apply_re11:apply_ro11);
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     pln->td = pln->td2 = 0;
+     pln->kind = p->kind[0];
+     
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     
+     X(ops_zero)(&ops);
+     ops.add = 2 + (n/2 - 1)/2 * 20;
+     ops.mul = 6 + (n/2 - 1)/2 * 16;
+     ops.other = 4*n + 2 + (n/2 - 1)/2 * 6;
+     if ((n/2) % 2 == 0) {
+	  ops.add += 4;
+	  ops.mul += 8;
+	  ops.other += 4;
+     }
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(reodft11e_radix2_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/rodft00e-r2hc-pad.c b/src/fftw3/reodft/rodft00e-r2hc-pad.c
new file mode 100644
index 0000000..0b48585
--- /dev/null
+++ b/src/fftw3/reodft/rodft00e-r2hc-pad.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rodft00e-r2hc-pad.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do a RODFT00 problem via an R2HC problem, padded antisymmetrically to
+   twice the size.  This is asymptotically a factor of ~2 worse than
+   rodft00e-r2hc.c (the algorithm used in e.g. FFTPACK and Numerical
+   Recipes), but we abandoned the latter after we discovered that it
+   has intrinsic accuracy problems. */
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld, *cldcpy;
+     int is;
+     int n;
+     int vl;
+     int ivs, ovs;
+} P;
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = 0.0;
+	  for (i = 1; i < n; ++i) {
+	       R a = I[(i-1) * is];
+	       buf[i] = -a;
+	       buf[2*n - i] = a;
+	  }
+	  buf[i] = 0.0; /* i == n, Nyquist */
+	  
+	  /* r2hc transform of size 2*n */
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  /* copy n-1 real numbers (imag. parts of hc array) from buf to O */
+	  {
+	       plan_rdft *cldcpy = (plan_rdft *) ego->cldcpy;
+	       cldcpy->apply((plan *) cldcpy, buf+2*n-1, O);
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     AWAKE(ego->cld, flg);
+     AWAKE(ego->cldcpy, flg);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cldcpy);
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(rodft00e-r2hc-pad-%d%v%(%p%)%(%p%))", 
+	      ego->n - 1, ego->vl, ego->cld, ego->cldcpy);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && p->kind[0] == RODFT00
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld = (plan *) 0, *cldcpy;
+     R *buf = (R *) 0;
+     int n;
+     int vl, ivs, ovs;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+	  goto nada;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n + 1;
+     A(n > 0);
+     buf = (R *) MALLOC(sizeof(R) * (2*n), BUFFERS);
+
+     cld = X(mkplan_d)(plnr,X(mkproblem_rdft_1_d)(X(mktensor_1d)(2*n,1,1), 
+						  X(mktensor_0d)(), 
+						  buf, buf, R2HC));
+     if (!cld)
+	  goto nada;
+
+     X(tensor_tornk1)(p->vecsz, &vl, &ivs, &ovs);
+     cldcpy =
+	  X(mkplan_d)(plnr,
+		      X(mkproblem_rdft_1_d)(X(mktensor_0d)(),
+					    X(mktensor_1d)(n-1,-1,
+							   p->sz->dims[0].os), 
+					    buf+2*n-1,TAINT(p->O, ovs), R2HC));
+     if (!cldcpy)
+	  goto nada;
+
+     X(ifree)(buf);
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->cld = cld;
+     pln->cldcpy = cldcpy;
+     pln->vl = vl;
+     pln->ivs = ivs;
+     pln->ovs = ovs;
+     
+     X(ops_zero)(&ops);
+     ops.other = n-1 + 2*n; /* loads + stores (input -> buf) */
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cldcpy->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+
+ nada:
+     X(ifree0)(buf);
+     if (cld)
+	  X(plan_destroy_internal)(cld);  
+     return (plan *)0;
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(rodft00e_r2hc_pad_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/fftw3/reodft/rodft00e-r2hc.c b/src/fftw3/reodft/rodft00e-r2hc.c
new file mode 100644
index 0000000..46bb299
--- /dev/null
+++ b/src/fftw3/reodft/rodft00e-r2hc.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2003 Matteo Frigo
+ * Copyright (c) 2003 Massachusetts Institute of Technology
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/* $Id: rodft00e-r2hc.c,v 1.1 2008/10/17 06:13:18 scuri Exp $ */
+
+/* Do a RODFT00 problem via an R2HC problem, with some pre/post-processing.
+
+   This code uses the trick from FFTPACK, also documented in a similar
+   form by Numerical Recipes.  Unfortunately, this algorithm seems to
+   have intrinsic numerical problems (similar to those in
+   reodft11e-r2hc.c), possibly due to the fact that it multiplies its
+   input by a sine, causing a loss of precision near the zero.  For
+   transforms of 16k points, it has already lost three or four decimal
+   places of accuracy, which we deem unacceptable.
+
+   So, we have abandoned this algorithm in favor of the one in
+   rodft00-r2hc-pad.c, which unfortunately sacrifices 30-50% in speed.
+   The only other alternative in the literature that does not have
+   similar numerical difficulties seems to be the direct adaptation of
+   the Cooley-Tukey decomposition for antisymmetric data, but this
+   would require a whole new set of codelets and it's not clear that
+   it's worth it at this point. */
+
+#include "reodft.h"
+
+typedef struct {
+     solver super;
+} S;
+
+typedef struct {
+     plan_rdft super;
+     plan *cld;
+     twid *td;
+     int is, os;
+     int n;
+     int vl;
+     int ivs, ovs;
+} P;
+
+static void apply(const plan *ego_, R *I, R *O)
+{
+     const P *ego = (const P *) ego_;
+     int is = ego->is, os = ego->os;
+     int i, n = ego->n;
+     int iv, vl = ego->vl;
+     int ivs = ego->ivs, ovs = ego->ovs;
+     R *W = ego->td->W;
+     R *buf;
+
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     for (iv = 0; iv < vl; ++iv, I += ivs, O += ovs) {
+	  buf[0] = 0;
+	  for (i = 1; i < n - i; ++i) {
+	       E a, b, apb, amb;
+	       a = I[is * (i - 1)];
+	       b = I[is * ((n - i) - 1)];
+	       apb =  K(2.0) * W[i] * (a + b);
+	       amb = (a - b);
+	       buf[i] = apb + amb;
+	       buf[n - i] = apb - amb;
+	  }
+	  if (i == n - i) {
+	       buf[i] = K(4.0) * I[is * (i - 1)];
+	  }
+	  
+	  {
+	       plan_rdft *cld = (plan_rdft *) ego->cld;
+	       cld->apply((plan *) cld, buf, buf);
+	  }
+	  
+	  /* FIXME: use recursive/cascade summation for better stability? */
+	  O[0] = buf[0] * 0.5;
+	  for (i = 1; i + i < n - 1; ++i) {
+	       int k = i + i;
+	       O[os * (k - 1)] = -buf[n - i];
+	       O[os * k] = O[os * (k - 2)] + buf[i];
+	  }
+	  if (i + i == n - 1) {
+	       O[os * (n - 2)] = -buf[n - i];
+	  }
+     }
+
+     X(ifree)(buf);
+}
+
+static void awake(plan *ego_, int flg)
+{
+     P *ego = (P *) ego_;
+     static const tw_instr rodft00e_tw[] = {
+          { TW_SIN, 0, 1 },
+          { TW_NEXT, 1, 0 }
+     };
+
+     AWAKE(ego->cld, flg);
+
+     X(twiddle_awake)(flg, &ego->td, rodft00e_tw, 2*ego->n, 1, (ego->n+1)/2);
+}
+
+static void destroy(plan *ego_)
+{
+     P *ego = (P *) ego_;
+     X(plan_destroy_internal)(ego->cld);
+}
+
+static void print(const plan *ego_, printer *p)
+{
+     const P *ego = (const P *) ego_;
+     p->print(p, "(rodft00e-r2hc-%d%v%(%p%))", ego->n - 1, ego->vl, ego->cld);
+}
+
+static int applicable0(const solver *ego_, const problem *p_)
+{
+     UNUSED(ego_);
+     if (RDFTP(p_)) {
+          const problem_rdft *p = (const problem_rdft *) p_;
+          return (1
+		  && p->sz->rnk == 1
+		  && p->vecsz->rnk <= 1
+		  && p->kind[0] == RODFT00
+	       );
+     }
+
+     return 0;
+}
+
+static int applicable(const solver *ego, const problem *p, const planner *plnr)
+{
+     return (!NO_UGLYP(plnr) && applicable0(ego, p));
+}
+
+static plan *mkplan(const solver *ego_, const problem *p_, planner *plnr)
+{
+     P *pln;
+     const problem_rdft *p;
+     plan *cld;
+     R *buf;
+     int n;
+     opcnt ops;
+
+     static const plan_adt padt = {
+	  X(rdft_solve), awake, print, destroy
+     };
+
+     if (!applicable(ego_, p_, plnr))
+          return (plan *)0;
+
+     p = (const problem_rdft *) p_;
+
+     n = p->sz->dims[0].n + 1;
+     buf = (R *) MALLOC(sizeof(R) * n, BUFFERS);
+
+     cld = X(mkplan_d)(plnr, X(mkproblem_rdft_1_d)(X(mktensor_1d)(n, 1, 1),
+                                                   X(mktensor_0d)(),
+                                                   buf, buf, R2HC));
+     X(ifree)(buf);
+     if (!cld)
+          return (plan *)0;
+
+     pln = MKPLAN_RDFT(P, &padt, apply);
+
+     pln->n = n;
+     pln->is = p->sz->dims[0].is;
+     pln->os = p->sz->dims[0].os;
+     pln->cld = cld;
+     pln->td = 0;
+     
+     X(tensor_tornk1)(p->vecsz, &pln->vl, &pln->ivs, &pln->ovs);
+     
+     X(ops_zero)(&ops);
+     ops.other = 4 + (n-1)/2 * 5 + (n-2)/2 * 5;
+     ops.add = (n-1)/2 * 4 + (n-2)/2 * 1;
+     ops.mul = 1 + (n-1)/2 * 2;
+     if (n % 2 == 0)
+	  ops.mul += 1;
+
+     X(ops_zero)(&pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &ops, &pln->super.super.ops);
+     X(ops_madd2)(pln->vl, &cld->ops, &pln->super.super.ops);
+
+     return &(pln->super.super);
+}
+
+/* constructor */
+static solver *mksolver(void)
+{
+     static const solver_adt sadt = { mkplan };
+     S *slv = MKSOLVER(S, &sadt);
+     return &(slv->super);
+}
+
+void X(rodft00e_r2hc_register)(planner *p)
+{
+     REGISTER_SOLVER(p, mksolver());
+}
diff --git a/src/im.def b/src/im.def
new file mode 100644
index 0000000..1a39bcf
--- /dev/null
+++ b/src/im.def
@@ -0,0 +1,196 @@
+EXPORTS        
+  imFileGetAttribute
+  imFileNew
+  imFileOpen
+  imFileFormat
+  imFileReadImageData
+  imFileReadImageInfo
+  imFileWriteImageData
+  imFileWriteImageInfo
+  imFileClose
+  imFileGetInfo
+  imFileGetPalette
+  imFileSetAttribute
+  imFileGetAttributeList
+  imFileSetInfo
+  imFileSetPalette
+  imFileOpenAs 
+  imFileHandle
+  imFileLineBufferCount
+  imFileLineSizeAligned
+  imFileLineBufferInc
+  imFileLineBufferRead
+  imFileLineBufferWrite
+  imFileImageLoad
+  imFileImageLoadBitmap
+  imFileImageSave
+  imFileLoadImageFrame
+  imFileLoadBitmapFrame
+  imFileSaveImage
+  imFileLoadBitmap
+  imFileLoadImage
+  imVersion
+  imVersionDate
+  imVersionNumber
+  imPaletteFindColor
+  imPaletteFindNearest
+  imPaletteUniformIndex
+  imPaletteUniformIndexHalftoned
+  imPaletteBlackBody
+  imPaletteBlue
+  imPaletteBlueIce
+  imPaletteCian
+  imPaletteGray
+  imPaletteGreen
+  imPaletteHotIron
+  imPaletteHues
+  imPaletteMagenta
+  imPaletteRainbow
+  imPaletteRed
+  imPaletteUniform
+  imPaletteYellow
+  imPaletteHighContrast
+  imFormatRegister
+  imFormatRegisterInternal
+  imFormatRemoveAll
+  imFormatCanWriteImage
+  imFormatCompressions
+  imFormatInfo
+  imFormatList
+  imColorModeSpaceName
+  imDataTypeName
+  imBinCPUByteOrder
+  imColorModeDepth
+  imColorModeIsBitmap
+  imColorModeToBitmap
+  imDataTypeSize
+  imStrCheck
+  imStrEqual
+  imStrNLen
+  imColorEncode
+  imDataTypeIntMin
+  imDataTypeIntMax
+  imBinSwapBytes2
+  imBinSwapBytes4
+  imBinSwapBytes8
+  imColorDecode
+  imCounterSetCallback
+  imCounterBegin
+  imCounterInc
+  imCounterIncTo
+  imCounterEnd
+  imCounterTotal
+  imAttribTableCreate
+  imAttribTableDestroy
+  imAttribTableCount
+  imAttribTableRemoveAll
+  imAttribTableGet
+  imAttribTableSet
+  imAttribTableUnSet
+  imAttribTableCopyFrom
+  imAttribTableForEach
+  imImageGetAttribute
+  imImageClone
+  imImageCreate
+  imImageDuplicate
+  imImageInit
+  imImageCheckFormat
+  imImageDataSize
+  imImageLineCount
+  imImageLineSize
+  imImageIsBitmap
+  imImageMatch
+  imImageMatchColor
+  imImageMatchColorSpace
+  imImageMatchDataType
+  imImageMatchSize
+  imImageClear
+  imImageCopyAttributes
+  imImageDestroy
+  imImageGetAttributeList
+  imImageReshape
+  imImageSetAttribute
+  imImageSetBinary
+  imImageMakeBinary
+  imImageSetPalette
+  imImageCopy
+  imImageCopyData
+  imImageCreateBased
+  imImageAddAlpha
+  imDibToHBitmap
+  imDibLogicalPalette
+  imDibCaptureScreen
+  imDibCreate
+  imDibCreateCopy
+  imDibCreateReference
+  imDibCreateSection
+  imDibFromHBitmap
+  imDibFromImage
+  imDibLoadFile
+  imDibPasteClipboard
+  imDibLineGetPixelFunc
+  imDibLineSetPixelFunc
+  imDibToImage
+  imDibIsClipboardAvailable
+  imDibSaveFile
+  imDibCopyClipboard
+  imDibDecodeToBitmap
+  imDibDecodeToMap
+  imDibDecodeToRGBA
+  imDibDestroy
+  imDibEncodeFromBitmap
+  imDibEncodeFromMap
+  imDibEncodeFromRGBA
+  imConvertColorSpace
+  imConvertDataType
+  imConvertToBitmap
+  imConvertPacking
+  imConvertMapToRGB
+  imConvertRGB2Map
+  imFileNewRaw
+  imFileOpenRaw
+  imBinFileNew
+  imBinFileOpen
+  imBinFileByteOrder
+  imBinFileEndOfFile
+  imBinFileError
+  imBinFileRegisterModule
+  imBinFileSetCurrentModule
+  imBinFilePrintf
+  imBinFileRead
+  imBinFileSize
+  imBinFileTell
+  imBinFileWrite
+  imBinFileClose
+  imBinFileSeekFrom
+  imBinFileSeekOffset
+  imBinFileSeekTo
+  imColorHSI_ImaxS
+  imColorHSI_Smax
+  imColorHSI2RGB
+  imColorHSI2RGBbyte
+  imColorRGB2HSI
+  imColorRGB2HSIbyte
+  imEncodeColor
+  imDecodeColor
+  imImageInfo
+  imLoadRGB
+  imSaveRGB
+  imLoadMap
+  imSaveMap
+  imRGB2Map
+  imMap2RGB
+  imRGB2Gray
+  imMap2Gray
+  imResize
+  imStretch
+  imRegisterCallback
+  imCompressDataZ
+  imCompressDataUnZ
+  imAttribArrayCreate
+  imAttribArrayGet
+  imAttribArraySet
+  imAttribArrayCopyFrom
+  imBinMemoryRelease
+  imFileImageLoadRegion
+  imFileLoadImageRegion
diff --git a/src/im.rc b/src/im.rc
new file mode 100644
index 0000000..e702957
--- /dev/null
+++ b/src/im.rc
@@ -0,0 +1,19 @@
+1 VERSIONINFO
+ FILEVERSION 3,3,1,0
+ PRODUCTVERSION 3,3,1,0
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "Comments", "www.tecgraf.puc-rio.br/im\0"
+            VALUE "CompanyName", "Tecgraf/PUC-Rio\0"
+            VALUE "FileDescription", "IM - Image Representation, Storage, Capture and Processing\0"
+            VALUE "FileVersion", "3.3.1\0"
+            VALUE "LegalCopyright", "Copyright � 1994-2008 Tecgraf, PUC-Rio.\0"
+            VALUE "OriginalFilename", "im.dll\0"
+            VALUE "ProductName", "IM for Windows\0"
+            VALUE "ProductVersion", "3.3.1\0"
+        END
+    END
+END
diff --git a/src/im_attrib.cpp b/src/im_attrib.cpp
new file mode 100644
index 0000000..a1b95b7
--- /dev/null
+++ b/src/im_attrib.cpp
@@ -0,0 +1,316 @@
+/** \file
+ * \brief Attributes Table
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_attrib.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+#include <assert.h>
+
+#include "im_attrib.h"
+#include "im_util.h"
+
+#define IM_DEFAULTSIZE 101
+#define IM_MULTIPLIER 31
+
+// Unique Hash index for a name
+static int iHashIndex(const char *name, int hash_size)
+{
+  unsigned short hash = 0;
+  const unsigned char *p_name = (const unsigned char*)name;
+
+  for(; *p_name; p_name++)
+    hash = hash*IM_MULTIPLIER + *p_name;
+
+  return hash % hash_size;
+}
+
+
+/*******************************************************************/
+
+
+class imAttribNode
+{
+public:
+  int data_type;
+  int count;
+  void* data;
+  char* name;
+
+  imAttribNode* next;
+
+  imAttribNode(const char* name, int _data_type, int _count, const void* _data, imAttribNode* next);
+  ~imAttribNode();
+};
+
+static char* utlStrDup(const char* str)
+{
+  int size;
+  char* new_str;
+
+  assert(str);
+
+  size = strlen(str) + 1;
+  new_str = (char*)malloc(size);
+  memcpy(new_str, str, size);
+
+  return new_str;
+}
+
+imAttribNode::imAttribNode(const char* name, int _data_type, int _count, const void* _data, imAttribNode* _next)
+{
+  this->name = utlStrDup(name);
+  this->data_type = _data_type;
+  this->count = _count;
+  this->next = _next;
+
+  int size = count * imDataTypeSize(_data_type);
+  this->data = malloc(size);
+  if (_data) memcpy(this->data, _data, size);
+  else memset(this->data, 0, size);
+}
+
+imAttribNode::~imAttribNode()
+{
+  free(this->name); 
+  free(this->data);
+}
+
+
+/*******************************************************************/
+
+struct imAttribTablePrivate
+{
+  int count,       
+      hash_size;   
+  imAttribNode* *hash_table;
+};
+
+imAttribTablePrivate* imAttribTableCreate(int hash_size)
+{
+  imAttribTablePrivate* ptable = (imAttribTablePrivate*)malloc(sizeof(imAttribTablePrivate));
+  ptable->count = 0;
+  ptable->hash_size = (hash_size == 0)? IM_DEFAULTSIZE: hash_size;
+  ptable->hash_table = (imAttribNode**)malloc(ptable->hash_size*sizeof(imAttribNode*));
+  memset(ptable->hash_table, 0, ptable->hash_size*sizeof(imAttribNode*));
+  return ptable;
+}
+
+imAttribTablePrivate* imAttribArrayCreate(int count)
+{
+  imAttribTablePrivate* ptable = (imAttribTablePrivate*)malloc(sizeof(imAttribTablePrivate));
+  ptable->hash_size = ptable->count = count;
+  ptable->hash_table = (imAttribNode**)malloc(ptable->count*sizeof(imAttribNode*));
+  memset(ptable->hash_table, 0, ptable->hash_size*sizeof(imAttribNode*));
+  return ptable;
+}
+
+void imAttribTableDestroy(imAttribTablePrivate* ptable)
+{
+  imAttribTableRemoveAll(ptable);
+  free(ptable->hash_table);
+  free(ptable);
+}
+
+int imAttribTableCount(imAttribTablePrivate* ptable)
+{
+  return ptable->count;
+}
+
+void imAttribTableRemoveAll(imAttribTablePrivate* ptable)
+{
+  if (ptable->count == 0) return;
+
+  int n = 0;
+  for(int i = 0; i < ptable->hash_size; i++) 
+  {
+    imAttribNode* cur_node = ptable->hash_table[i];
+    while (cur_node) 
+    {
+      imAttribNode* next_node = cur_node->next;
+      delete cur_node;
+      cur_node = next_node;
+      n++;
+    }
+
+    ptable->hash_table[i] = NULL;
+
+    if (n == ptable->count)
+      break;
+  }
+  
+  ptable->count = 0;
+}
+
+void imAttribTableSet(imAttribTablePrivate* ptable, const char* name, int data_type, int count, const void* data)
+{
+  assert(name);
+
+  int index = iHashIndex(name, ptable->hash_size);
+  imAttribNode* first_node = ptable->hash_table[index];
+
+  // The name already exists ?
+  imAttribNode* cur_node = first_node;
+  imAttribNode* prev_node = NULL;
+  while (cur_node) 
+  {
+    if (imStrEqual(cur_node->name, name))
+    {
+      // Found, replace current node.
+      imAttribNode* new_node = new imAttribNode(name, data_type, count, data, cur_node->next);
+
+      // Is first node ?
+      if (cur_node == first_node)
+        ptable->hash_table[index] = new_node;
+      else
+        prev_node->next = new_node;
+
+      delete cur_node;
+      return;
+    }
+
+    prev_node = cur_node;
+    cur_node = cur_node->next;
+  }
+
+  // Not found, the new item goes first.
+  cur_node = new imAttribNode(name, data_type, count, data, first_node);
+  ptable->hash_table[index] = cur_node;
+	ptable->count++;
+}
+
+void imAttribTableUnSet(imAttribTablePrivate* ptable, const char *name)
+{
+  assert(name);
+
+  if (ptable->count == 0) return;
+
+  int index = iHashIndex(name, ptable->hash_size);
+
+  imAttribNode* cur_node = ptable->hash_table[index];
+  imAttribNode* prev_node = cur_node;
+  while (cur_node) 
+  {
+    if (imStrEqual(cur_node->name, name))
+    {
+      // Is first node ?
+      if (cur_node == prev_node)
+        ptable->hash_table[index] = cur_node->next;
+      else
+        prev_node->next = cur_node->next;
+
+      delete cur_node;
+      ptable->count--;
+      return;
+    }
+
+    prev_node = cur_node;
+    cur_node = cur_node->next;
+  }
+}
+
+const void* imAttribTableGet(const imAttribTablePrivate* ptable, const char *name, int *data_type, int *count)
+{
+  assert(name);
+
+  if (ptable->count == 0) return NULL;
+
+  int index = iHashIndex(name, ptable->hash_size);
+
+  imAttribNode* cur_node = ptable->hash_table[index];
+  while (cur_node) 
+  {
+    if (imStrEqual(cur_node->name, name))
+    {
+      if (data_type) *data_type = cur_node->data_type;
+      if (count) *count = cur_node->count;
+      return cur_node->data;
+    }
+
+    cur_node = cur_node->next;
+  }
+
+  return NULL;
+}
+
+void imAttribArraySet(imAttribTablePrivate* ptable, int index, const char* name, int data_type, int count, const void* data)
+{
+  assert(name);
+  assert(index < ptable->count);
+
+  if (index >= ptable->count) return;
+
+  imAttribNode* node = ptable->hash_table[index];
+  if (node) delete node;
+
+  ptable->hash_table[index] = new imAttribNode(name, data_type, count, data, NULL);
+}
+
+const void* imAttribArrayGet(const imAttribTablePrivate* ptable, int index, char *name, int *data_type, int *count)
+{
+  if (ptable->count == 0) return NULL;
+
+  imAttribNode* node = ptable->hash_table[index];
+  if (node) 
+  {
+    if (name) strcpy(name, node->name);
+    if (data_type) *data_type = node->data_type;
+    if (count) *count = node->count;
+    return node->data;
+  }
+
+  return NULL;
+}
+
+void imAttribTableForEach(const imAttribTablePrivate* ptable, void* user_data, imAttribTableCallback attrib_func)
+{
+  assert(attrib_func);
+
+  if (ptable->count == 0) return;
+
+  int index = 0;
+  for(int i = 0; i < ptable->hash_size; i++) 
+  {
+    imAttribNode* cur_node = ptable->hash_table[i];
+    while (cur_node) 
+    {
+      if (!attrib_func(user_data, index, cur_node->name, cur_node->data_type, cur_node->count, cur_node->data))
+        return;
+
+      index++;
+      cur_node = cur_node->next;
+    }
+
+    if (index == ptable->count)
+      return;
+  }
+}
+
+static int iCopyFunc(void* user_data, int index, const char* name, int data_type, int count, const void* data)
+{                  
+  (void)index;
+  imAttribTablePrivate* ptable = (imAttribTablePrivate*)user_data;
+  imAttribTableSet(ptable, name, data_type, count, data);
+  return 1;
+}
+
+void imAttribTableCopyFrom(imAttribTablePrivate* ptable_dst, const imAttribTablePrivate* ptable_src)
+{
+  imAttribTableForEach(ptable_src, (void*)ptable_dst, iCopyFunc);
+}
+
+static int iCopyArrayFunc(void* user_data, int index, const char* name, int data_type, int count, const void* data)
+{                  
+  (void)index;
+  imAttribTablePrivate* ptable = (imAttribTablePrivate*)user_data;
+  imAttribArraySet(ptable, index, name, data_type, count, data);
+  return 1;
+}
+
+void imAttribArrayCopyFrom(imAttribTablePrivate* ptable_dst, const imAttribTablePrivate* ptable_src)
+{
+  imAttribTableForEach(ptable_src, (void*)ptable_dst, iCopyArrayFunc);
+}
diff --git a/src/im_avi.def b/src/im_avi.def
new file mode 100644
index 0000000..5970f68
--- /dev/null
+++ b/src/im_avi.def
@@ -0,0 +1,2 @@
+EXPORTS
+  imFormatRegisterAVI
\ No newline at end of file
diff --git a/src/im_avi.mak b/src/im_avi.mak
new file mode 100644
index 0000000..45d39ba
--- /dev/null
+++ b/src/im_avi.mak
@@ -0,0 +1,10 @@
+PROJNAME = im
+LIBNAME = im_avi
+OPT = YES
+
+SRC = im_format_avi.cpp
+                                       
+LIBS = vfw32
+
+USE_IM=Yes
+IM = ..
diff --git a/src/im_bin.cpp b/src/im_bin.cpp
new file mode 100644
index 0000000..de279b5
--- /dev/null
+++ b/src/im_bin.cpp
@@ -0,0 +1,111 @@
+/** \file
+ * \brief Binary Data Utilities
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_bin.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <assert.h>
+
+#include "im_util.h"
+
+
+int imBinCPUByteOrder(void)
+{
+  static int CPUByteOrder = -1;
+
+  if (CPUByteOrder == -1)
+  {
+	  unsigned short w = 0x0001;
+	  unsigned char* b = (unsigned char*)&w;
+	  CPUByteOrder = (b[0] == 0x01)? IM_LITTLEENDIAN: IM_BIGENDIAN;
+  }
+
+  return CPUByteOrder;
+}
+
+void imBinSwapBytes(void *data, int count, int size)
+{
+  switch(size)
+  {
+  case 2:
+    imBinSwapBytes2(data, count);
+    break;
+  case 4:
+    imBinSwapBytes4(data, count);
+    break;
+  case 8:
+    imBinSwapBytes8(data, count);
+    break;
+  }
+}
+
+void imBinSwapBytes2(void *data, int count)
+{
+	assert(data);
+
+	unsigned char lTemp;
+  unsigned char *values = (unsigned char *)data;
+
+	while (count-- != 0)
+	{
+		lTemp = values[1];
+		values[1] = values[0];
+		values[0] = lTemp;
+
+		values += 2;
+	}
+}
+
+void imBinSwapBytes4(void *data, int count)
+{
+	assert(data);
+
+	unsigned char lTemp;
+  unsigned char *values = (unsigned char *)data;
+
+	while (count-- != 0)
+	{
+		lTemp = values[3];
+		values[3] = values[0];
+		values[0] = lTemp;
+
+		lTemp = values[2];
+		values[2] = values[1];
+		values[1] = lTemp;
+
+		values += 4;
+	}
+}
+
+void imBinSwapBytes8(void *data, int count)
+{
+  assert(data);
+
+	unsigned char lTemp;
+  unsigned char *values = (unsigned char *)data;
+	
+	assert(values);
+
+	while (count-- != 0)
+	{
+		lTemp = values[7];
+		values[7] = values[0];
+		values[0] = lTemp;
+
+		lTemp = values[6];
+		values[6] = values[1];
+		values[1] = lTemp;
+
+		lTemp = values[5];
+		values[5] = values[2];
+		values[2] = lTemp;
+
+		lTemp = values[4];
+		values[4] = values[3];
+		values[3] = lTemp;
+
+		values += 8;
+	}
+}
+
diff --git a/src/im_binfile.cpp b/src/im_binfile.cpp
new file mode 100644
index 0000000..670623c
--- /dev/null
+++ b/src/im_binfile.cpp
@@ -0,0 +1,644 @@
+/** \file
+ * \brief Binary File Access
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_binfile.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#include "im_util.h"
+#include "im_binfile.h"
+
+
+/**************************************************
+                imBinMemoryFile
+***************************************************/
+
+class imBinMemoryFile: public imBinFileBase
+{
+protected:
+  unsigned long CurrentSize, BufferSize;  
+  unsigned char* Buffer, *CurPos;
+  int Error;
+  float Reallocate;
+  imBinMemoryFileName* file_name;
+
+  unsigned long ReadBuf(void* pValues, unsigned long pSize);
+  unsigned long WriteBuf(void* pValues, unsigned long pSize);
+
+public:
+  void Open(const char* pFileName);
+  void New(const char* pFileName);
+  void Close() {} // Does nothing, the memory belongs to the user
+
+  unsigned long FileSize();
+  int HasError() const;
+  void SeekTo(unsigned long pOffset);
+  void SeekOffset(long pOffset);
+  void SeekFrom(long pOffset);
+  unsigned long Tell() const;
+  int EndOfFile() const;
+};
+
+static imBinFileBase* iBinMemoryFileNewFunc()
+{
+  return new imBinMemoryFile();
+}
+
+void imBinMemoryRelease(unsigned char *buffer)
+{
+  free(buffer);
+}
+
+void imBinMemoryFile::Open(const char* pFileName)
+{
+  this->file_name = (imBinMemoryFileName*)pFileName;
+
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 0;
+
+  assert(this->file_name->size);
+
+  this->Buffer = this->file_name->buffer;
+  this->BufferSize = this->file_name->size;
+  this->Reallocate = this->file_name->reallocate;
+  this->CurrentSize = this->BufferSize;
+  this->CurPos = this->Buffer;
+  this->Error = 0;
+}
+
+void imBinMemoryFile::New(const char* pFileName)
+{
+  this->file_name = (imBinMemoryFileName*)pFileName;
+
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 1;
+
+  assert(this->file_name->size);
+
+  this->Buffer = this->file_name->buffer;
+  this->BufferSize = this->file_name->size;
+  this->Reallocate = this->file_name->reallocate;
+  this->CurrentSize = 0;
+
+  if (!this->Buffer)
+  {
+    this->Buffer = (unsigned char*)malloc(this->BufferSize);
+    this->file_name->buffer = this->Buffer;
+  }
+
+  this->CurPos = this->Buffer;
+  this->Error = 0;
+}
+
+unsigned long imBinMemoryFile::ReadBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->Buffer);
+
+  unsigned long lOffset = this->CurPos - this->Buffer;
+
+  this->Error = 0;
+  if (lOffset + pSize > this->CurrentSize)
+  {
+    this->Error = 1;
+    pSize = this->CurrentSize - lOffset;
+  }
+
+  if (pSize)
+  {
+    memcpy(pValues, this->CurPos, pSize);
+    this->CurPos += pSize;
+  }
+
+  return pSize;
+}
+                             
+unsigned long imBinMemoryFile::WriteBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->Buffer);
+
+  unsigned long lOffset = this->CurPos - this->Buffer;
+
+  this->Error = 0;
+  if (lOffset + pSize > this->BufferSize)
+  {
+    if (this->Reallocate != 0.0)
+    {
+      unsigned long nSize = this->BufferSize;
+      while (lOffset + pSize > nSize)
+        nSize += (unsigned long)(this->Reallocate*(float)this->BufferSize);
+
+      this->Buffer = (unsigned char*)realloc(this->Buffer, nSize);
+
+      if (this->Buffer)
+      {
+        this->BufferSize = nSize;
+        this->file_name->buffer = this->Buffer;
+        this->file_name->size = this->BufferSize;
+      }
+      else
+      {
+        this->Buffer = this->file_name->buffer;
+        this->Error = 1;
+        pSize = this->BufferSize - lOffset;
+      }
+      
+      this->CurPos = this->Buffer + lOffset;
+    }
+    else
+    {
+      this->Error = 1;
+      pSize = this->BufferSize - lOffset;
+    }
+  }
+
+  memcpy(this->CurPos, pValues, pSize);
+
+  if (lOffset + pSize > this->CurrentSize)
+    this->CurrentSize = lOffset + pSize;
+
+  this->CurPos += pSize;
+
+  return pSize;
+}
+
+unsigned long imBinMemoryFile::FileSize()
+{
+  assert(this->Buffer);
+  return this->CurrentSize;
+}
+
+int imBinMemoryFile::HasError() const
+{
+  if (!this->Buffer) return 1;
+  return this->Error;
+}
+
+void imBinMemoryFile::SeekTo(unsigned long pOffset)
+{
+  assert(this->Buffer);
+
+  this->Error = 0;
+  if (pOffset > this->BufferSize)
+  {
+    this->Error = 1;
+    return;
+  }
+
+  this->CurPos = this->Buffer + pOffset;
+
+  /* update size if we seek after EOF */
+  if (pOffset > this->CurrentSize)
+    this->CurrentSize = pOffset;
+}
+
+void imBinMemoryFile::SeekFrom(long pOffset)
+{
+  assert(this->Buffer);
+
+  /* remember that offset is usually a negative value in this case */
+
+  this->Error = 0;
+  if (this->CurrentSize + pOffset > this->BufferSize || 
+      (long)this->CurrentSize + pOffset < 0)
+  {
+    this->Error = 1;
+    return;
+  }
+
+  this->CurPos = this->Buffer + this->CurrentSize + pOffset;
+
+  /* update size if we seek after EOF */
+  if (pOffset > 0)
+    this->CurrentSize = this->CurrentSize + pOffset;
+}
+
+void imBinMemoryFile::SeekOffset(long pOffset)
+{
+  assert(this->Buffer);
+  long lOffset = this->CurPos - this->Buffer;
+
+  this->Error = 0;
+  if (lOffset + pOffset < 0 || lOffset + pOffset > (long)this->BufferSize)
+  {
+    this->Error = 1;
+    return;
+  }
+
+  this->CurPos += pOffset;
+
+  /* update size if we seek after EOF */
+  if (lOffset + pOffset > (long)this->CurrentSize)
+    this->CurrentSize = lOffset + pOffset;
+}
+
+unsigned long imBinMemoryFile::Tell() const
+{
+  assert(this->Buffer);
+  unsigned long lOffset = this->CurPos - this->Buffer;
+  return lOffset;
+}
+
+int imBinMemoryFile::EndOfFile() const
+{
+  assert(this->Buffer);
+  unsigned long lOffset = this->CurPos - this->Buffer;
+  return lOffset == this->CurrentSize? 1: 0;
+}
+
+/**************************************************
+                imBinSubFile
+**************************************************/
+
+static imBinFileBase* iBinFileBaseHandle(const char* pFileName);
+
+class imBinSubFile: public imBinFileBase
+{
+protected:
+  imBinFileBase* FileHandle;
+  unsigned long StartOffset;
+
+  unsigned long ReadBuf(void* pValues, unsigned long pSize);
+  unsigned long WriteBuf(void* pValues, unsigned long pSize);
+
+public:
+  void Open(const char* pFileName);
+  void New(const char* pFileName);
+  void Close() {} // Does nothing, the file should be close by the parent file.
+
+  unsigned long FileSize();
+  int HasError() const;
+  void SeekTo(unsigned long pOffset);
+  void SeekOffset(long pOffset);
+  void SeekFrom(long pOffset);
+  unsigned long Tell() const;
+  int EndOfFile() const;
+};
+
+static imBinFileBase* iBinSubFileNewFunc()
+{
+  return new imBinSubFile();
+}
+
+void imBinSubFile::Open(const char* pFileName)
+{
+  this->FileHandle = iBinFileBaseHandle(pFileName);
+  this->FileByteOrder = this->FileByteOrder;
+  this->IsNew = 0;
+  
+  StartOffset = this->FileHandle->Tell();
+}
+
+void imBinSubFile::New(const char* pFileName)
+{
+  this->FileHandle = iBinFileBaseHandle(pFileName);
+  this->FileByteOrder = this->FileByteOrder;
+  this->IsNew = 1;
+  
+  StartOffset = this->FileHandle->Tell();
+}
+
+unsigned long imBinSubFile::FileSize()
+{
+  assert(this->FileHandle);
+  return this->FileHandle->FileSize();
+}
+
+unsigned long imBinSubFile::ReadBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle);
+  return this->FileHandle->ReadBuf(pValues, pSize);
+}
+                             
+unsigned long imBinSubFile::WriteBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle);
+  return this->FileHandle->WriteBuf(pValues, pSize);
+}
+
+int imBinSubFile::HasError() const
+{
+  assert(this->FileHandle);
+  return this->FileHandle->HasError();
+}
+
+void imBinSubFile::SeekTo(unsigned long pOffset)
+{
+  assert(this->FileHandle);
+  this->FileHandle->SeekTo(StartOffset + pOffset);
+}
+
+void imBinSubFile::SeekOffset(long pOffset)
+{
+  assert(this->FileHandle);
+  this->FileHandle->SeekOffset(pOffset);
+}
+
+void imBinSubFile::SeekFrom(long pOffset)
+{
+  assert(this->FileHandle);
+  this->FileHandle->SeekFrom(pOffset);
+}
+
+unsigned long imBinSubFile::Tell() const
+{
+  assert(this->FileHandle);
+  return this->FileHandle->Tell() - StartOffset;
+}
+
+int imBinSubFile::EndOfFile() const
+{
+  assert(this->FileHandle);
+  return this->FileHandle->EndOfFile();
+}
+
+/**************************************************
+                imBinStreamFile
+**************************************************/
+
+class imBinStreamFile: public imBinFileBase
+{
+protected:
+  FILE* FileHandle;
+
+  unsigned long ReadBuf(void* pValues, unsigned long pSize);
+  unsigned long WriteBuf(void* pValues, unsigned long pSize);
+
+public:
+  void Open(const char* pFileName);
+  void New(const char* pFileName);
+  void Close();
+
+  unsigned long FileSize();
+  int HasError() const;
+  void SeekTo(unsigned long pOffset);
+  void SeekOffset(long pOffset);
+  void SeekFrom(long pOffset);
+  unsigned long Tell() const;
+  int EndOfFile() const;
+};
+
+static imBinFileBase* iBinStreamFileNewFunc()
+{
+  return new imBinStreamFile();
+}
+
+void imBinStreamFile::Open(const char* pFileName)
+{
+  this->FileHandle = fopen(pFileName, "rb");
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 0;
+}
+
+void imBinStreamFile::New(const char* pFileName)
+{
+  this->FileHandle = fopen(pFileName, "wb");
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 1;
+}
+
+void imBinStreamFile::Close()
+{
+  if (this->FileHandle) fclose(this->FileHandle);
+}
+
+unsigned long imBinStreamFile::FileSize()
+{
+  assert(this->FileHandle);
+  unsigned long lCurrentPosition = ftell(this->FileHandle);
+  fseek(this->FileHandle, 0L, SEEK_END);
+  unsigned long lSize = ftell(this->FileHandle);
+  fseek(this->FileHandle, lCurrentPosition, SEEK_SET);
+  return lSize;
+}
+
+unsigned long imBinStreamFile::ReadBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle);
+	return fread(pValues, 1, pSize, this->FileHandle);
+}
+                             
+unsigned long imBinStreamFile::WriteBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle);
+	return fwrite(pValues, 1, pSize, this->FileHandle);
+}
+
+int imBinStreamFile::HasError() const
+{
+  if (!this->FileHandle) return 1;
+  return ferror(this->FileHandle) == 0? 0: 1;
+}
+
+void imBinStreamFile::SeekTo(unsigned long pOffset)
+{
+  assert(this->FileHandle);
+  fseek(this->FileHandle, pOffset, SEEK_SET);
+}
+
+void imBinStreamFile::SeekOffset(long pOffset)
+{
+  assert(this->FileHandle);
+  fseek(this->FileHandle, pOffset, SEEK_CUR);
+}
+
+void imBinStreamFile::SeekFrom(long pOffset)
+{
+  assert(this->FileHandle);
+  fseek(this->FileHandle, pOffset, SEEK_END);
+}
+
+unsigned long imBinStreamFile::Tell() const
+{
+  assert(this->FileHandle);
+  return ftell(this->FileHandle);
+}
+
+int imBinStreamFile::EndOfFile() const
+{
+  assert(this->FileHandle);
+  return feof(this->FileHandle) == 0? 0: 1;
+}
+
+/**************************************************
+                 NewFuncModules
+**************************************************/
+
+/* implemented in "im_sysfile*.cpp" */
+imBinFileBase* iBinSystemFileNewFunc();
+imBinFileBase* iBinSystemFileHandleNewFunc();
+
+#define MAX_MODULES 10
+
+static imBinFileNewFunc iBinFileModule[MAX_MODULES] = 
+{
+  iBinSystemFileNewFunc,
+  iBinStreamFileNewFunc, 
+  iBinMemoryFileNewFunc,
+  iBinSubFileNewFunc,
+  iBinSystemFileHandleNewFunc
+};
+static int iBinFileModuleCount = 5;
+static int iBinFileModuleCurrent = 0; // default module is the first
+
+int imBinFileSetCurrentModule(int pModule)
+{
+  int old_module = iBinFileModuleCurrent;
+
+  if (pModule >= iBinFileModuleCount)
+    return -1;
+
+  iBinFileModuleCurrent = pModule;
+
+  return old_module;
+}
+
+int imBinFileRegisterModule(imBinFileNewFunc pNewFunc)
+{
+  if (iBinFileModuleCount == MAX_MODULES) return -1;
+  int id = iBinFileModuleCount;
+  iBinFileModule[id] = pNewFunc;
+  iBinFileModuleCount++;
+  return id;
+}
+
+/**************************************************
+                 imBinFile
+**************************************************/
+
+struct _imBinFile
+{
+  imBinFileBase* binfile;
+};
+
+imBinFile* imBinFileOpen(const char* pFileName)
+{
+  assert(pFileName);
+
+  assert(iBinFileModuleCurrent < iBinFileModuleCount);
+  assert(iBinFileModuleCurrent < MAX_MODULES);
+
+  imBinFileNewFunc NewFunc = iBinFileModule[iBinFileModuleCurrent];
+  imBinFileBase* binfile = NewFunc();
+
+  binfile->Open(pFileName);
+  if (binfile->HasError())
+  {
+    delete binfile;
+    return NULL;
+  }
+
+  imBinFile* bfile = new imBinFile;
+  bfile->binfile = binfile;
+
+  return bfile;
+}
+
+imBinFile* imBinFileNew(const char* pFileName)
+{
+  assert(pFileName);
+
+  imBinFileNewFunc NewFunc = iBinFileModule[iBinFileModuleCurrent];
+  imBinFileBase* binfile = NewFunc();
+
+  binfile->New(pFileName);
+  if (binfile->HasError())
+  {
+    delete binfile;
+    return NULL;
+  }
+
+  imBinFile* bfile = new imBinFile;
+  bfile->binfile = binfile;
+
+  return bfile;
+}
+
+void imBinFileClose(imBinFile* bfile)
+{
+  assert(bfile);
+  bfile->binfile->Close();
+  delete bfile->binfile;
+  delete bfile;
+}
+
+int imBinFileByteOrder(imBinFile* bfile, int pByteOrder)
+{
+  assert(bfile);
+  return bfile->binfile->InitByteOrder(pByteOrder);
+}
+
+int imBinFileError(imBinFile* bfile)
+{
+  assert(bfile);
+  return bfile->binfile->HasError();
+}
+
+unsigned long imBinFileSize(imBinFile* bfile)
+{
+  assert(bfile);
+  return bfile->binfile->FileSize();
+}
+
+unsigned long imBinFileRead(imBinFile* bfile, void* pValues, unsigned long pCount, int pSizeOf)
+{
+  assert(bfile);
+  return bfile->binfile->Read(pValues, pCount, pSizeOf);
+}
+
+unsigned long imBinFileWrite(imBinFile* bfile, void* pValues, unsigned long pCount, int pSizeOf)
+{
+  assert(bfile);
+  return bfile->binfile->Write(pValues, pCount, pSizeOf);
+}
+
+void imBinFileSeekTo(imBinFile* bfile, unsigned long pOffset)
+{
+  assert(bfile);
+  bfile->binfile->SeekTo(pOffset);
+}
+
+void imBinFileSeekOffset(imBinFile* bfile, long pOffset)
+{
+  assert(bfile);
+  bfile->binfile->SeekOffset(pOffset);
+}
+
+void imBinFileSeekFrom(imBinFile* bfile, long pOffset)
+{
+  assert(bfile);
+  bfile->binfile->SeekFrom(pOffset);
+}
+
+unsigned long imBinFileTell(imBinFile* bfile)
+{
+  assert(bfile);
+  return bfile->binfile->Tell();
+}
+
+int imBinFileEndOfFile(imBinFile* bfile)
+{
+  assert(bfile);
+  return bfile->binfile->EndOfFile();
+}
+
+unsigned long imBinFilePrintf(imBinFile* bfile, char *format, ...)
+{
+  va_list arglist;
+  va_start(arglist, format);
+  char buffer[4096];
+  int size = vsprintf(buffer, format, arglist);
+  return imBinFileWrite(bfile, buffer, size, 1);
+}
+
+static imBinFileBase* iBinFileBaseHandle(const char* pFileName)
+{
+  imBinFile* bfile = (imBinFile*)pFileName;
+  return (imBinFileBase*)bfile->binfile;
+}
diff --git a/src/im_capture.def b/src/im_capture.def
new file mode 100644
index 0000000..6b44ac3
--- /dev/null
+++ b/src/im_capture.def
@@ -0,0 +1,27 @@
+EXPORTS
+  imVideoCaptureDeviceCount
+  imVideoCaptureDeviceDesc
+  imVideoCaptureReloadDevices
+  imVideoCaptureCreate
+  imVideoCaptureDestroy
+  imVideoCaptureConnect
+  imVideoCaptureDisconnect
+  imVideoCaptureShowDialog
+  imVideoCaptureDialogCount
+  imVideoCaptureDialogDesc
+  imVideoCaptureGetImageSize
+  imVideoCaptureSetImageSize
+  imVideoCaptureFrame
+  imVideoCaptureOneFrame
+  imVideoCaptureLive
+  imVideoCaptureResetAttribute
+  imVideoCaptureGetAttribute
+  imVideoCaptureSetAttribute
+  imVideoCaptureGetAttributeList
+  imVideoCaptureFormatCount
+  imVideoCaptureGetFormat
+  imVideoCaptureSetFormat
+  imVideoCaptureSetInOut 
+  imVideoCaptureDeviceExDesc
+  imVideoCaptureDevicePath
+  imVideoCaptureDeviceVendorInfo
diff --git a/src/im_capture.mak b/src/im_capture.mak
new file mode 100644
index 0000000..b08c610
--- /dev/null
+++ b/src/im_capture.mak
@@ -0,0 +1,67 @@
+PROJNAME = im
+LIBNAME = im_capture
+OPT = YES
+             
+INCLUDES = ../include
+
+# New Direct X does not includes Direct Show
+# Direct Show is included in latest Platform SDK, but depends on Direct X...
+DXSDK = d:/lng/dxsdk
+WINSDK = d:/lng/winsdk
+
+ifeq ($(TEC_UNAME), vc6)  
+  #Use old Direct X with Direct Show
+  #But do NOT use the VC6 strmiids.lib
+  PLATSDK = d:/lng/vc7/PlatformSDK
+endif
+
+ifeq ($(TEC_UNAME), dll)  
+  #Use old Direct X with Direct Show
+  PLATSDK = d:/lng/vc7/PlatformSDK
+  LDIR = ../lib/$(TEC_UNAME)
+endif
+  
+ifeq ($(TEC_UNAME), vc8)
+  INCLUDES += $(WINSDK)/include
+  LDIR = $(WINSDK)/lib
+endif
+
+ifeq ($(TEC_UNAME), dll8)  
+  INCLUDES += $(WINSDK)/include
+  LDIR = $(WINSDK)/lib
+endif
+  
+ifeq ($(TEC_UNAME), vc8_64)
+  INCLUDES += $(WINSDK)/include
+  LDIR = $(WINSDK)/lib/amd64
+endif
+
+ifeq ($(TEC_UNAME), dll8_64)  
+  INCLUDES += $(WINSDK)/include
+  LDIR = $(WINSDK)/lib/amd64
+endif
+  
+ifneq ($(findstring Win, $(TEC_SYSNAME)), )
+  INCLUDES += $(DXSDK)/include
+  SRC = im_capture_dx.cpp
+endif
+
+#ifneq ($(findstring Linux, $(TEC_UNAME)), )
+#  SRC = im_capture_v4l.cpp
+#endif
+             
+LIBS = strmiids
+
+mingw3-dll:                    
+	@echo Importing MingW stub library
+	@cd ../lib/dll
+	@dlltool -d im_capture.def -D im_capture.dll -l ../lib/mingw3/libim_capture.a
+	@cd ../src
+
+bc56-dll:                    
+	@echo Importing Bcc stub library
+	@d:/lng/cbuilderx/bin/implib -a ../lib/bc56/im_capture.lib ../lib/dll/im_capture.dll
+
+#owc1-dll:                    
+#	@wlib -b -c -n -q -fo -io ../lib/owc1/im_capture.lib @im_capture.wlib
+# TEST	@wlib -b -c -n -q -fo -io ../lib/owc1/im_capture.lib +../lib/dll/im_capture.dll
diff --git a/src/im_capture_dx.cpp b/src/im_capture_dx.cpp
new file mode 100644
index 0000000..cb4749b
--- /dev/null
+++ b/src/im_capture_dx.cpp
@@ -0,0 +1,2255 @@
+/** \file
+ * \brief Video Capture Using Direct Show 9
+ *
+ * See Copyright Notice in im.h
+ * $Id: im_capture_dx.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+/*
+  The Direct Show Graph is composed by 3 components:
+    capture source, sample grabber and null renderer.
+
+  Filters are connected:
+    capture_filter(out)->(int)grabber_filter(out)->(in)null_filter
+
+  But when the graph is rendered other transform filters 
+  can be inserted to connect the capture and the grabber.
+
+  We do not use MFC, ATL and the Direct Show Base Classes.
+  This module only needs the library "strmiids.lib".
+  If the extra error functions were used, you will need to link with "quartz.lib" or "dxerr9.lib".
+
+  We use the buffer of the ISampleGrabber. But this can not be done in a user callback,
+  so we leave the grab loop for the application, it can also be done in the idle function.
+
+  If you use the idle function for the grab loop, then WDM Source Dialog will interrupt the "live" mode.
+  Just because it is a modal dialog and it does not use the application message loop.
+  It can be solved if the grab loop is implemented using a timer.
+
+  Since there is no gray format, bpp is always 24bpp.
+*/
+
+#define _WIN32_WINNT 0x0500   // Because of TryEnterCriticalSection
+
+#include <dshow.h>
+#include <qedit.h>
+
+#include <memory.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include <im.h>
+#include <im_util.h>
+
+#include "im_capture.h"
+
+#define VC_CAMERADELAY   200  // This vary from camera to camera, so we use a reasonable value and hope it will work for all.
+#define VC_MAXVIDDEVICES 30   // Maximum number of devices to list
+
+//#define VC_REGISTER_FILTERGRAPH // Use this to allow GraphEdit to spy the graph
+//#define VC_INCLUDE_VFW_DEVICES  // Use this to allow old video for windows devices
+//#define VC_PRINT_ERROR_MESSAGES // Use this to display a system custom error message
+
+#if defined(_DEBUG) | defined(DEBUG)
+#define VC_REGISTER_FILTERGRAPH
+#define VC_PRINT_ERROR_MESSAGES
+#endif
+
+
+/**************************************************************************
+                       imTrackingGrabberCB
+***************************************************************************/
+
+// This is better than using the sample grabber internal buffer 
+// because we have a more precise control of the data flow.
+
+class imTrackingGrabberCB: public ISampleGrabberCB
+{
+public:
+  imTrackingGrabberCB();
+  ~imTrackingGrabberCB();
+
+  STDMETHODIMP SampleCB(double SampleTime, IMediaSample *pSample);
+  STDMETHODIMP BufferCB(double SampleTime, BYTE *pBuffer, long BufferLen) {return E_NOTIMPL;}
+  STDMETHODIMP_(ULONG) AddRef() {return 2;}
+  STDMETHODIMP_(ULONG) Release() {return 1;}
+  STDMETHODIMP QueryInterface(REFIID riid, void ** ppv);
+
+  void SetImageSize(int width, int height);
+  int GetImage(unsigned char* data, int color_mode, int timeout);
+
+protected:
+  int  m_Width, m_Height;
+  bool m_newImageFlag;
+  unsigned char *m_ImageData;
+  CRITICAL_SECTION m_sect;
+  HANDLE m_imageReady;
+};
+
+imTrackingGrabberCB::imTrackingGrabberCB()
+{
+  InitializeCriticalSection(&m_sect);
+  m_newImageFlag = 0;
+  m_ImageData = NULL;
+  m_imageReady = CreateEvent(NULL, FALSE, TRUE, NULL);
+}
+
+imTrackingGrabberCB::~imTrackingGrabberCB() 
+{
+  CloseHandle(m_imageReady);
+  EnterCriticalSection(&m_sect);
+  DeleteCriticalSection(&m_sect);
+  if (m_ImageData) delete m_ImageData;
+}
+
+STDMETHODIMP imTrackingGrabberCB::QueryInterface(REFIID riid, void ** ppv) 
+{
+  if( riid == IID_ISampleGrabberCB || riid == IID_IUnknown ) {
+    *ppv = (void *) static_cast<ISampleGrabberCB*> (this);
+    return NOERROR;
+  }    
+  return E_NOINTERFACE;
+}
+
+void imTrackingGrabberCB::SetImageSize(int width, int height) 
+{
+  EnterCriticalSection(&m_sect);
+
+  // This can be done because the capture system always returns 
+  // images that are a multiple of 4.
+  int new_size = width * height * 3;
+
+  if (!m_ImageData) 
+  {
+    m_ImageData = (BYTE*)calloc(new_size, 1);
+    m_Width = width; 
+    m_Height = height;
+  }
+
+  if (m_Width*m_Height < new_size)
+    m_ImageData = (BYTE*)realloc(m_ImageData, new_size);
+
+  m_Width = width; 
+  m_Height = height;
+
+  LeaveCriticalSection(&m_sect);
+}
+
+STDMETHODIMP imTrackingGrabberCB::SampleCB(double, IMediaSample *pSample)
+{
+  if (!m_ImageData) return S_OK;
+
+  EnterCriticalSection(&m_sect);
+
+  int size = pSample->GetSize();
+  if (size > m_Width*m_Height*3)
+  {
+    LeaveCriticalSection(&m_sect);
+    return S_OK;
+  }
+
+  BYTE *pData;
+  pSample->GetPointer(&pData);
+  CopyMemory(m_ImageData, pData, size);
+  m_newImageFlag = 1;
+
+  LeaveCriticalSection(&m_sect);
+
+  SetEvent(m_imageReady);
+  
+  return S_OK;
+}
+
+int imTrackingGrabberCB::GetImage(unsigned char* data, int color_mode, int timeout)
+{
+  if (timeout != 0)
+  {
+    DWORD ret = WaitForSingleObject(m_imageReady, timeout);
+    if (ret != WAIT_OBJECT_0)
+      return 0;
+  }
+
+  if (!TryEnterCriticalSection(&m_sect))
+    return 0;
+
+  if (m_newImageFlag == 1)
+  {
+    int count = m_Width*m_Height;
+    unsigned char* src_data = m_ImageData;
+
+    if (imColorModeSpace(color_mode) == IM_RGB)
+    {
+      if (imColorModeIsPacked(color_mode))
+      {
+        unsigned char* dst_data = data;
+        for (int i = 0; i < count; i++)
+        {
+          *(dst_data+2) = *src_data++;
+          *(dst_data+1) = *src_data++;
+          *dst_data = *src_data++;
+          dst_data += 3;
+        }
+      }
+      else
+      {
+        unsigned char* red = data;
+        unsigned char* green = data + count;
+        unsigned char* blue = data + 2*count;
+        for (int i = 0; i < count; i++)
+        {
+          *blue++ = *src_data++;
+          *green++ = *src_data++;
+          *red++ = *src_data++;
+        }
+      }
+    }
+    else
+    {
+      unsigned char* map = data;
+      for (int i = 0; i < count; i++)
+      {
+        *map++ = *src_data;
+        src_data += 3;
+      }
+    }
+
+    m_newImageFlag = 0;
+
+    LeaveCriticalSection(&m_sect);
+    return 1;
+  }
+
+  LeaveCriticalSection(&m_sect);
+  return 0;
+}
+
+
+/**************************************************************************
+                       Direct Show Only
+***************************************************************************/
+
+
+struct vcDevice
+{
+  IBaseFilter *filter;
+  char vendorinfo[128];
+  char desc[128];
+  char ex_desc[256];
+  char path[512];
+};
+static vcDevice vc_DeviceList[VC_MAXVIDDEVICES];
+static int vc_DeviceCount = 0;
+
+static void vc_AddDevice(IBaseFilter *filter, char* desc, char* ex_desc, char* path, char* vendorinfo)
+{
+  int i = vc_DeviceCount;
+  vcDevice* device = &vc_DeviceList[i];
+
+  memset(device, 0, sizeof(vcDevice));
+
+  device->filter = filter;
+
+  if (!desc) desc = "device";
+  sprintf(device->desc, "%d - %s", i, desc);
+
+  if (ex_desc) strcpy(device->ex_desc, ex_desc);
+  if (path) strcpy(device->path, path);
+  if (vendorinfo) strcpy(device->vendorinfo, vendorinfo);
+
+  vc_DeviceCount++;
+}
+
+
+#ifdef VC_PRINT_ERROR_MESSAGES
+//#include <dxerr9.h>
+
+static int vc_ShowError(HRESULT hr)
+{
+  if (FAILED(hr))
+  {
+    TCHAR szErr[MAX_ERROR_TEXT_LEN];
+    DWORD res = AMGetErrorText(hr, szErr, MAX_ERROR_TEXT_LEN);  // Must link with quartz.lib
+    if (res == 0) wsprintf(szErr, "Unknown Error: 0x%2x", hr);
+    MessageBox(0, szErr, "imCapture Error!", MB_OK | MB_ICONERROR);
+//    MessageBox(NULL, DXGetErrorDescription9(hr), DXGetErrorString9(hr), MB_OK | MB_ICONERROR);
+    return 1;
+  }
+
+  return 0;
+}
+
+#define VC_HARDFAILED(_x) vc_ShowError(_x)
+#else
+#define VC_HARDFAILED FAILED
+#endif
+
+static char* vc_Wide2Char(WCHAR* wstr)
+{
+  if (wstr)
+  {
+    int n = wcslen(wstr)+1;
+    char* str = (char*)malloc(n);
+    WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, n, NULL, NULL);
+    return str;
+  }
+
+  return NULL;
+}
+
+#ifdef VC_INCLUDE_VFW_DEVICES
+#define VC_CATEGORY_FLAG 0
+#else
+#define VC_CATEGORY_FLAG CDEF_DEVMON_FILTER|CDEF_DEVMON_PNP_DEVICE
+#endif
+
+static char* vc_GetDeviceProp(IPropertyBag *pPropBag, const WCHAR* PropName)
+{
+  VARIANT varProp;
+  VariantInit(&varProp);
+  HRESULT hr = pPropBag->Read(PropName, &varProp, 0);
+  if (SUCCEEDED(hr))
+  {
+    char* str = vc_Wide2Char(varProp.bstrVal);
+    VariantClear(&varProp); 
+    return str;
+  }
+  return NULL;
+}
+
+static void vc_EnumerateDevices(void)
+{
+  // Selecting a Capture Device
+  ICreateDevEnum *pDevEnum = NULL;
+  IEnumMoniker *pEnum = NULL;
+
+  CoInitialize(NULL);
+
+  // Create the System Device Enumerator.
+  HRESULT hr = CoCreateInstance(CLSID_SystemDeviceEnum, NULL,
+                                CLSCTX_INPROC_SERVER, IID_ICreateDevEnum, 
+                                reinterpret_cast<void**>(&pDevEnum));
+  if (FAILED(hr)) return;
+                                       
+  // Create an enumerator for the video capture category.
+  hr = pDevEnum->CreateClassEnumerator(CLSID_VideoInputDeviceCategory, &pEnum, VC_CATEGORY_FLAG);
+  if (FAILED(hr) || !pEnum)
+  {
+    pDevEnum->Release();
+    return;
+  }
+
+  IMoniker *pMoniker = NULL;
+  while (pEnum->Next(1, &pMoniker, NULL) == S_OK)
+  {
+    IBaseFilter *capture_filter = NULL;
+    hr = pMoniker->BindToObject(0, 0, IID_IBaseFilter, (void**)&capture_filter);
+    if (FAILED(hr))
+    {
+      pMoniker->Release();
+      continue;  // Skip this one, maybe the next one will work.
+    } 
+
+    IPropertyBag *pPropBag;
+    hr = pMoniker->BindToStorage(0, 0, IID_IPropertyBag, (void**)(&pPropBag));
+    if (FAILED(hr))
+    {
+      capture_filter->Release();
+      pMoniker->Release();
+      continue;  // Skip this one, maybe the next one will work.
+    } 
+
+    char* desc = vc_GetDeviceProp(pPropBag, L"FriendlyName");
+    char* ex_desc = vc_GetDeviceProp(pPropBag, L"Description");
+    char* path = vc_GetDeviceProp(pPropBag, L"DevicePath");
+
+    char* vendorinfo = NULL;
+    LPWSTR VendorInfo;
+    if (capture_filter->QueryVendorInfo(&VendorInfo) == S_OK)
+    {
+      vendorinfo = vc_Wide2Char(VendorInfo);
+      CoTaskMemFree(VendorInfo);
+    }
+
+    vc_AddDevice(capture_filter, desc, ex_desc, path, vendorinfo);
+
+    if (desc) free(desc);
+    if (ex_desc) free(ex_desc);
+    if (path) free(path);
+    if (vendorinfo) free(vendorinfo);
+
+    pPropBag->Release();
+    pMoniker->Release();
+  }
+
+  pEnum->Release();
+  pDevEnum->Release();
+}
+
+static IPin* vc_GetPin(IBaseFilter* pFilter, PIN_DIRECTION dir)
+{
+  IEnumPins*  pEnumPins = NULL;
+  IPin*       pPin = NULL;
+
+  pFilter->EnumPins(&pEnumPins);
+  if(!pEnumPins)
+    return NULL;
+
+  for(;;)
+  {
+    ULONG  cFetched = 0;
+    PIN_DIRECTION pinDir = PIN_DIRECTION(-1); 
+    pPin = 0;
+
+    if (FAILED(pEnumPins->Next(1, &pPin, &cFetched)))
+    {
+      pEnumPins->Release();
+      return NULL;
+    }
+
+    if(cFetched == 1 && pPin != 0)
+    {
+      pPin->QueryDirection(&pinDir);
+      if(pinDir == dir) break;
+      pPin->Release();
+    }
+  }
+
+  pEnumPins->Release();
+  return pPin;
+}
+
+static void vc_NukeDownstream(IGraphBuilder* filter_builder, IBaseFilter *filter)
+{
+  IPin *pPin=0, *pPinTo=0;
+  IEnumPins *pEnumPins = NULL;
+  PIN_INFO pininfo;
+
+  HRESULT hr = filter->EnumPins(&pEnumPins);
+  if (FAILED(hr)) return;
+
+  pEnumPins->Reset();
+
+  while(hr == NOERROR)
+  {
+    hr = pEnumPins->Next(1, &pPin, NULL);
+    if(hr == S_OK && pPin)
+    {
+      pPin->ConnectedTo(&pPinTo);
+      if(pPinTo)
+      {
+        hr = pPinTo->QueryPinInfo(&pininfo);
+        if(hr == NOERROR)
+        {
+          if(pininfo.dir == PINDIR_INPUT)
+          {
+            vc_NukeDownstream(filter_builder, pininfo.pFilter);
+            filter_builder->Disconnect(pPinTo);
+            filter_builder->Disconnect(pPin);
+            filter_builder->RemoveFilter(pininfo.pFilter);
+          }
+
+          pininfo.pFilter->Release();
+        }
+
+        pPinTo->Release();
+      }
+
+      pPin->Release();
+    }
+  }
+
+  pEnumPins->Release();
+}
+
+static int vc_DisconnectFilters(IGraphBuilder* filter_builder, IBaseFilter* source, IBaseFilter* destiny)
+{
+  IPin *pOut = vc_GetPin(source, PINDIR_OUTPUT);
+  IPin *pIn = vc_GetPin(destiny, PINDIR_INPUT);
+  HRESULT hr = filter_builder->Disconnect(pOut);
+  hr = filter_builder->Disconnect(pIn);
+  pOut->Release(); 
+  pIn->Release();
+  if (VC_HARDFAILED(hr))  return 0;
+  return 1;
+}
+
+static int vc_DisconnectFilterPin(IGraphBuilder* filter_builder, IBaseFilter* filter, PIN_DIRECTION dir)
+{
+  IPin *pIn = vc_GetPin(filter, dir);
+  IPin *pOut;
+  pIn->ConnectedTo(&pOut);
+  
+  HRESULT hr = filter_builder->Disconnect(pIn);
+  pIn->Release();
+  if (VC_HARDFAILED(hr))
+  {
+    if (pOut) pOut->Release();
+    return 0;
+  }
+
+  if (pOut)
+  {
+    hr = filter_builder->Disconnect(pOut);
+    pOut->Release();
+
+    if (VC_HARDFAILED(hr))
+      return 0;
+  }
+
+  return 1;
+}
+
+static int vc_ConnectFilters(IGraphBuilder* filter_builder, IBaseFilter* source, IBaseFilter* destiny, int direct)
+{
+  HRESULT hr;
+  IPin *pOut = vc_GetPin(source, PINDIR_OUTPUT);
+  IPin *pIn = vc_GetPin(destiny, PINDIR_INPUT);
+  if (direct)
+    hr = filter_builder->ConnectDirect(pOut, pIn, NULL);
+  else
+    hr = filter_builder->Connect(pOut, pIn);
+  pOut->Release(); 
+  pIn->Release();
+  if (VC_HARDFAILED(hr)) return 0;
+  return 1;
+}
+
+static DWORD vc_AddGraphToRot(IUnknown *pUnkGraph) 
+{
+  IMoniker * pMoniker;
+  IRunningObjectTable *pROT;
+  WCHAR wsz[128];
+  HRESULT hr;
+
+  if (FAILED(GetRunningObjectTable(0, &pROT)))
+    return 0;
+
+  wsprintfW(wsz, L"FilterGraph %08x pid %08x\0", (DWORD_PTR)pUnkGraph, GetCurrentProcessId());
+
+  hr = CreateItemMoniker(L"!", wsz, &pMoniker);
+  if (SUCCEEDED(hr)) 
+  {
+    DWORD dwRegister;
+    hr = pROT->Register(ROTFLAGS_REGISTRATIONKEEPSALIVE, pUnkGraph, pMoniker, &dwRegister);
+    pROT->Release();
+
+    pMoniker->Release();
+
+    if (SUCCEEDED(hr)) 
+      return dwRegister;
+  }
+
+  pROT->Release();
+  return 0;
+}
+
+static void vc_RemoveGraphFromRot(DWORD pdwRegister)
+{
+  IRunningObjectTable *pROT;
+
+  if (SUCCEEDED(GetRunningObjectTable(0, &pROT))) 
+  {
+    pROT->Revoke(pdwRegister);
+    pROT->Release();
+  }
+}
+
+/**************************************************************************
+                          imVideoCapture
+***************************************************************************/
+
+typedef int (*vcDialogFunc)(imVideoCapture* vc, HWND parent);
+
+struct _imVideoCapture
+{
+  int registered_graph,
+      live,
+      device;                     /* current connected device. -1 if not connected. */
+
+  char* dialog_desc[6];
+  vcDialogFunc dialog_func[6];
+  int dialog_count;                /* number of available configuration dialogs for the current connection. */
+
+  IGraphBuilder* filter_builder;  /* The Filter Graph Manager */
+  ICaptureGraphBuilder2* capture_graph_builder; /* Helps the Filter Graph Manager */
+  IBaseFilter* capture_filter;    /* the capture device (can vary), it's a source filter. */
+  IBaseFilter* grabber_filter;    /* returns the capture data, it's a transform filter */
+  IBaseFilter* null_filter;       /* does nothing, act as a terminator, it's a rendering filter */
+  ISampleGrabber* sample_grabber; /* Used to access the ISampleGrabber interface, since grabber_filter is a generic IBaseFilter interface based on ISampleGrabber. */
+  IMediaControl* media_control;   /* Used to Run and Stop the graph flow. */
+  IBaseFilter* overlay_renderer;  /* Used when there is a video port without a preview */    
+  IBaseFilter *overlay_mixer;
+
+  IAMVideoProcAmp* video_prop;    /* Used to set/get video properties */
+  IAMCameraControl* camera_prop;  /* Used to set/get camera properties */
+  IAMVideoControl* videoctrl_prop; /* Used to set/get video properties */
+
+  imTrackingGrabberCB* sample_callback; /* Used to intercept the samples. */
+
+  int format_count;   /* number of supported formats */
+  int format_current; /* current format */
+  int format_map[50]; /* table to map returned formats to direct X formats */
+};
+
+int imVideoCaptureDeviceCount(void)
+{
+  return vc_DeviceCount;
+}
+
+int imVideoCaptureReloadDevices(void)
+{
+  for (int i = 0; i < vc_DeviceCount; i++)
+  {
+    vc_DeviceList[i].filter->Release();
+  }
+
+  vc_DeviceCount = 0;
+  vc_EnumerateDevices();
+  return vc_DeviceCount;
+}
+
+static int vc_CheckDeviceList(int device)
+{
+  // List available Devices once
+  if (vc_DeviceCount == 0)
+  {
+    vc_EnumerateDevices();
+
+    if (vc_DeviceCount == 0)
+      return 0;
+  }
+
+  if (device < 0 || device >= vc_DeviceCount)
+    return 0;
+
+  return 1;
+}
+
+const char* imVideoCaptureDeviceDesc(int device)
+{
+  if (!vc_CheckDeviceList(device))
+    return NULL;
+
+  return vc_DeviceList[device].desc;
+}
+
+const char* imVideoCaptureDeviceExDesc(int device)
+{
+  if (!vc_CheckDeviceList(device))
+    return NULL;
+
+  return vc_DeviceList[device].ex_desc;
+}
+
+const char* imVideoCaptureDevicePath(int device)
+{
+  if (!vc_CheckDeviceList(device))
+    return NULL;
+
+  return vc_DeviceList[device].path;
+}
+
+const char* imVideoCaptureDeviceVendorInfo(int device)
+{
+  if (!vc_CheckDeviceList(device))
+    return NULL;
+
+  return vc_DeviceList[device].vendorinfo;
+}
+
+#define vc_SafeRelease(_p) { if( (_p) != 0 ) { (_p)->Release(); (_p)= NULL; } }
+
+static void vc_CheckVideoPort(imVideoCapture* vc)
+{
+/*
+  If the video capture card supports the video port pin without a video preview pin this will not work. 
+  The DirectShow architecture requires that the video port pin be connected to the Overlay Mixer Filter. 
+  If this pin is not connected, data cannot be captured in DirectShow. 
+*/
+  HRESULT hr;       
+
+  IPin *pPreviewPin = NULL;
+  hr = vc->capture_graph_builder->FindPin(
+      vc->capture_filter,      // Pointer to the capture filter.
+      PINDIR_OUTPUT,           // Look for an output pin.
+      &PIN_CATEGORY_PREVIEW,   // Look for a preview pin.
+      NULL,                    // Any media type.
+      FALSE,                   // Pin can be connected.
+      0,                       // Retrieve the first matching pin.
+      &pPreviewPin             // Receives a pointer to the pin.
+  );
+  if (hr == S_OK)
+  {
+    pPreviewPin->Release();
+    return;
+  }
+
+  IPin *pVideoPortPin = NULL;
+  hr = vc->capture_graph_builder->FindPin(
+      vc->capture_filter,      // Pointer to the capture filter.
+      PINDIR_OUTPUT,           // Look for an output pin.
+      &PIN_CATEGORY_VIDEOPORT, // Look for a video port pin.
+      NULL,                    // Any media type.
+      FALSE,                   // Pin can be connected.
+      0,                       // Retrieve the first matching pin.
+      &pVideoPortPin           // Receives a pointer to the pin.
+  );
+  if (FAILED(hr)) return; 
+
+  // Create the overlay mixer.
+  CoCreateInstance(CLSID_OverlayMixer, NULL, CLSCTX_INPROC,
+                   IID_IBaseFilter, (void **)&vc->overlay_mixer);
+
+  // Add it to the filter graph.
+  vc->filter_builder->AddFilter(vc->overlay_mixer, L"Overlay Mixer");
+
+  IPin *pOverlayPin = NULL;
+  vc->capture_graph_builder->FindPin(vc->overlay_mixer, PINDIR_INPUT, NULL, NULL, TRUE, 0, &pOverlayPin);
+
+  vc->filter_builder->Connect(pVideoPortPin, pOverlayPin);
+  if (FAILED(hr)) return; 
+
+  vc_SafeRelease(pVideoPortPin); 
+  vc_SafeRelease(pOverlayPin);
+
+  CoCreateInstance(CLSID_VideoRenderer, NULL, CLSCTX_INPROC_SERVER, 
+                   IID_IBaseFilter, reinterpret_cast<void**>(&vc->overlay_renderer));
+  vc->filter_builder->AddFilter(vc->overlay_renderer, L"Overlay Renderer");
+
+  vc_ConnectFilters(vc->filter_builder, vc->overlay_mixer, vc->overlay_renderer, 1);
+
+  IVideoWindow* pVideoWindow = NULL;
+  vc->overlay_renderer->QueryInterface(IID_IVideoWindow,(void**)&pVideoWindow);
+  pVideoWindow->put_AutoShow(OAFALSE);
+  pVideoWindow->Release();
+}
+
+static void vc_ReleaseMixer(imVideoCapture* vc)
+{
+  IPin *pOverlayPin = vc_GetPin(vc->overlay_mixer, PINDIR_INPUT);
+  IPin *pVideoPortPin = NULL;
+  pOverlayPin->ConnectedTo(&pVideoPortPin);
+  vc->filter_builder->Disconnect(pOverlayPin);
+  vc->filter_builder->Disconnect(pVideoPortPin);
+  vc_SafeRelease(pVideoPortPin); 
+  vc_SafeRelease(pOverlayPin);
+
+  vc_DisconnectFilters(vc->filter_builder, vc->overlay_mixer, vc->overlay_renderer);
+
+  vc->filter_builder->RemoveFilter(vc->overlay_renderer);
+  vc->filter_builder->RemoveFilter(vc->overlay_mixer);
+  vc_SafeRelease(vc->overlay_renderer);
+  vc_SafeRelease(vc->overlay_mixer);
+}
+
+static int vc_InitCaptureGraphBuilder(imVideoCapture* vc)
+{
+  HRESULT hr = CoCreateInstance(CLSID_CaptureGraphBuilder2, NULL, CLSCTX_INPROC_SERVER, 
+                                IID_ICaptureGraphBuilder2, reinterpret_cast<void**>(&vc->capture_graph_builder));
+  if (FAILED(hr)) return 0; 
+
+  hr = CoCreateInstance(CLSID_FilterGraph, NULL, CLSCTX_INPROC_SERVER,
+                        IID_IGraphBuilder, reinterpret_cast<void**>(&vc->filter_builder));
+  if (FAILED(hr)) return 0; 
+
+  hr = CoCreateInstance(CLSID_SampleGrabber, NULL, CLSCTX_INPROC_SERVER, 
+                        IID_IBaseFilter, reinterpret_cast<void**>(&vc->grabber_filter));
+  if (FAILED(hr)) return 0; 
+
+  hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER, 
+                        IID_IBaseFilter, reinterpret_cast<void**>(&vc->null_filter));
+  if (FAILED(hr)) return 0; 
+
+  // Initialize the Capture Graph Builder.
+  vc->capture_graph_builder->SetFiltergraph(vc->filter_builder);
+
+  hr = vc->filter_builder->QueryInterface(IID_IMediaControl,(void**)&vc->media_control);
+  hr = vc->grabber_filter->QueryInterface(IID_ISampleGrabber, (void **)&vc->sample_grabber);
+
+  AM_MEDIA_TYPE mt;
+  ZeroMemory(&mt, sizeof(AM_MEDIA_TYPE));
+  mt.majortype = MEDIATYPE_Video;
+  mt.subtype = MEDIASUBTYPE_RGB24;  // Force 24 bpp
+  vc->sample_grabber->SetMediaType(&mt);
+  vc->sample_grabber->SetOneShot(FALSE);
+  vc->sample_grabber->SetBufferSamples(FALSE);
+
+  vc->sample_callback = new imTrackingGrabberCB();
+
+  hr = vc->filter_builder->AddFilter(vc->grabber_filter, L"imSampleGrabber");
+  hr = vc->filter_builder->AddFilter(vc->null_filter, L"imNullRenderer");
+
+  // Remove clock to speed up things
+  IMediaFilter* pMediaFilter = NULL;
+  vc->filter_builder->QueryInterface(IID_IMediaFilter, (void**)&pMediaFilter);
+  pMediaFilter->SetSyncSource(NULL);
+  pMediaFilter->Release();
+
+#ifdef VC_REGISTER_FILTERGRAPH
+  vc->registered_graph = vc_AddGraphToRot(vc->filter_builder);
+#endif
+
+  return 1;
+}
+
+imVideoCapture* imVideoCaptureCreate(void)
+{
+  imVideoCapture* vc = (imVideoCapture*)malloc(sizeof(imVideoCapture));
+  memset(vc, 0, sizeof(imVideoCapture));
+
+  // List available Devices once
+  if (vc_DeviceCount == 0)
+  {
+    vc_EnumerateDevices();
+
+    if (vc_DeviceCount == 0)
+    {
+      free(vc);
+      return NULL;
+    }
+  }
+
+  if (!vc_InitCaptureGraphBuilder(vc))
+  {
+    vc_SafeRelease(vc->grabber_filter);
+    vc_SafeRelease(vc->filter_builder);
+    vc_SafeRelease(vc->capture_graph_builder);
+    vc_SafeRelease(vc->null_filter);
+    free(vc);
+    return NULL;
+  }
+
+  vc->device = -1;
+
+  return vc;
+}
+
+static void vc_CaptureRemove(imVideoCapture* vc)
+{
+  vc->filter_builder->RemoveFilter(vc->capture_filter);
+
+  vc->capture_filter = NULL; /* do not release here */
+  vc_SafeRelease(vc->video_prop);
+  vc_SafeRelease(vc->camera_prop);
+  vc_SafeRelease(vc->videoctrl_prop);
+
+  vc->dialog_count = 0;
+  vc->live = 0;
+  vc->device = -1;
+}
+
+void imVideoCaptureDestroy(imVideoCapture* vc)
+{
+  assert(vc);
+
+#ifdef VC_REGISTER_FILTERGRAPH
+  if (vc->registered_graph) vc_RemoveGraphFromRot(vc->registered_graph);
+#endif
+
+  imVideoCaptureDisconnect(vc);
+
+  delete vc->sample_callback;
+
+  vc_SafeRelease(vc->overlay_mixer);
+  vc_SafeRelease(vc->overlay_renderer);
+  vc_SafeRelease(vc->media_control);
+  vc_SafeRelease(vc->sample_grabber);
+
+  vc->null_filter->Release();
+  vc->grabber_filter->Release();
+  vc->filter_builder->Release();
+  vc->capture_graph_builder->Release();
+
+  free(vc);
+}
+
+static void vc_StopLive(imVideoCapture* vc)
+{
+  if (vc->live)  // If it is live, stop it
+  {
+    vc->media_control->Stop();
+    Sleep(VC_CAMERADELAY);
+  }
+}
+
+static int vc_StartLive(imVideoCapture* vc)
+{
+  if (vc->live) // If it should be started, start it
+  {
+    HRESULT hr = vc->media_control->Run();
+    if (VC_HARDFAILED(hr))
+    {
+      vc->live = 0;
+      return 0;
+    }
+
+    Sleep(VC_CAMERADELAY);
+  }
+
+  return 1;
+}
+
+int imVideoCaptureOneFrame(imVideoCapture* vc, unsigned char* data, int color_mode)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  vc_StopLive(vc);
+  vc->live = 0;
+
+  vc->sample_grabber->SetOneShot(TRUE);
+
+  vc->live = 1;
+  if (!vc_StartLive(vc))
+  {
+    vc->sample_grabber->SetOneShot(FALSE);
+    return 0;
+  }
+
+  int ret = imVideoCaptureFrame(vc, data, color_mode, -1);
+
+  vc_StopLive(vc);
+  vc->live = 0;
+
+  vc->sample_grabber->SetOneShot(FALSE);
+
+  return ret;
+}
+
+int imVideoCaptureFrame(imVideoCapture* vc, unsigned char* data, int color_mode, int timeout)
+{
+  assert(vc);
+  assert(vc->device != -1);
+  assert(vc->live);
+  return vc->sample_callback->GetImage(data, color_mode, timeout);
+}
+
+static int vc_CaptureDisconnect(imVideoCapture* vc)
+{
+  vc->sample_grabber->SetCallback(NULL, 0);
+
+  if (vc->overlay_mixer)
+    vc_ReleaseMixer(vc);
+
+  vc_DisconnectFilters(vc->filter_builder, vc->grabber_filter, vc->null_filter);
+
+  // Disconnect the grabber to preserve it
+  if (!vc_DisconnectFilterPin(vc->filter_builder, vc->grabber_filter, PINDIR_INPUT))
+    return 0;
+
+  // Remove everything downstream the capture filter, except the null renderer
+  vc_NukeDownstream(vc->filter_builder, vc->capture_filter);
+
+  return 1;
+}
+
+void imVideoCaptureDisconnect(imVideoCapture* vc)
+{
+  assert(vc);
+
+  if (vc->device == -1)
+    return;
+
+  vc_StopLive(vc);
+  vc->live = 0;
+
+  vc_CaptureDisconnect(vc);
+  vc_CaptureRemove(vc);
+}
+
+static void vc_UpdateSize(imVideoCapture* vc)
+{
+  int width, height;
+  imVideoCaptureGetImageSize(vc, &width, &height);
+  vc->sample_callback->SetImageSize(width, height);
+}
+
+static void vc_UpdateDialogs(imVideoCapture* vc);
+static void vc_UpdateFormatList(imVideoCapture* vc);
+
+static int vc_CaptureConnect(imVideoCapture* vc)
+{
+  vc_CheckVideoPort(vc);
+
+  if (!vc_ConnectFilters(vc->filter_builder, vc->capture_filter, vc->grabber_filter, 0))
+  {
+    vc_CaptureRemove(vc);
+    return 0;
+  }
+
+  vc_ConnectFilters(vc->filter_builder, vc->grabber_filter, vc->null_filter, 1);
+
+  vc_UpdateDialogs(vc);
+  vc_UpdateFormatList(vc);
+  vc_UpdateSize(vc);
+  vc->sample_grabber->SetCallback(vc->sample_callback, 0);  // associate the sample_grabber with the sample_callback
+
+  return 1;
+}
+
+int imVideoCaptureConnect(imVideoCapture* vc, int device)
+{
+  assert(vc);
+
+  if (device == -1)
+    return vc->device;
+
+  if (device == vc->device)
+    return 1;
+
+  if (device < -1 || device > vc_DeviceCount)
+    return 0;
+
+  if (vc->device != -1)
+    imVideoCaptureDisconnect(vc);
+
+  vc->capture_filter = vc_DeviceList[device].filter;
+  if (!vc->capture_filter)
+    return 0;
+
+  vc->filter_builder->AddFilter(vc->capture_filter, L"imCaptureSource");
+  vc->device = device;
+
+  if (!vc_CaptureConnect(vc))
+    return 0;
+
+  return 1;
+}
+
+int imVideoCaptureLive(imVideoCapture* vc, int live)
+{
+  assert(vc);
+
+  if (live == -1)
+    return vc->live;
+
+  if (vc->device == -1)
+    return 0;
+
+  if (live == vc->live)
+    return 1;
+
+  if (live)
+  {
+    vc->live = 1;
+    if (!vc_StartLive(vc))
+      return 0;
+  }
+  else
+  {
+    vc_StopLive(vc); 
+    vc->live = 0;
+  }
+
+  return 1;
+}
+
+
+/**************************************************************************
+                            Format and Size
+***************************************************************************/
+
+
+void imVideoCaptureGetImageSize(imVideoCapture* vc, int *width, int *height)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  AM_MEDIA_TYPE mt;
+  ZeroMemory(&mt, sizeof(AM_MEDIA_TYPE));
+  HRESULT hr = vc->sample_grabber->GetConnectedMediaType(&mt);
+
+  if ( SUCCEEDED(hr) &&
+      (mt.majortype == MEDIATYPE_Video) &&
+      (mt.formattype == FORMAT_VideoInfo) &&
+      (mt.cbFormat >= sizeof (VIDEOINFOHEADER)) &&
+      (mt.pbFormat != NULL))
+  {
+    VIDEOINFOHEADER *pVih = (VIDEOINFOHEADER*)mt.pbFormat;
+    *width = pVih->bmiHeader.biWidth;
+    *height = abs(pVih->bmiHeader.biHeight);
+    CoTaskMemFree((PVOID)mt.pbFormat);
+  }
+  else
+  {
+    *width = 0;
+    *height = 0;
+  }
+}
+
+static IIPDVDec* vc_GetDVDecoder(imVideoCapture* vc)
+{
+  IIPDVDec *pDV = NULL;
+  HRESULT hr = vc->capture_graph_builder->FindInterface(NULL,
+            &MEDIATYPE_Video, vc->capture_filter, IID_IIPDVDec, (void **)&pDV);
+  if(FAILED(hr))
+    return NULL;
+
+  return pDV;
+}
+
+static IAMStreamConfig* vc_GetStreamConfig(imVideoCapture* vc)
+{
+  IAMStreamConfig *pSC = NULL;                           
+  if (FAILED(vc->capture_graph_builder->FindInterface(&PIN_CATEGORY_CAPTURE,
+        &MEDIATYPE_Video, vc->capture_filter, IID_IAMStreamConfig, (void **)&pSC)))
+    return NULL;
+
+  return pSC;
+}
+
+static void vc_DeleteMediaType(AM_MEDIA_TYPE *pmt)
+{
+  CoTaskMemFree((PVOID)pmt->pbFormat);
+  CoTaskMemFree(pmt);
+}
+
+static int vc_SetStreamSize(imVideoCapture* vc, int width, int height)
+{
+  IAMStreamConfig *pSC = vc_GetStreamConfig(vc);
+  if (!pSC) return 0;
+
+  AM_MEDIA_TYPE *pmt;
+  HRESULT hr = pSC->GetFormat(&pmt);
+  if (FAILED(hr)) return 0;
+
+  VIDEOINFOHEADER* vih = (VIDEOINFOHEADER*)pmt->pbFormat;
+  BITMAPINFOHEADER* bih = &vih->bmiHeader;
+
+  /* dibs are DWORD aligned */
+  int data_size = height * ((width * bih->biBitCount + 31) / 32) * 4;   /* 4 bytes boundary */
+
+  bih->biSize = sizeof(BITMAPINFOHEADER);
+  bih->biHeight = height;
+  bih->biWidth = width;
+  bih->biSizeImage = data_size;
+
+  int fps = 30;  // desired frame rate
+  vih->dwBitRate = fps * data_size;
+  vih->AvgTimePerFrame = 10000000 / fps;
+
+  pmt->cbFormat = sizeof(VIDEOINFOHEADER);
+  pmt->lSampleSize = data_size;
+
+  hr = pSC->SetFormat(pmt);
+  pSC->Release();
+
+  vc_DeleteMediaType(pmt);
+
+  return SUCCEEDED(hr);
+}
+
+static int vc_SetImageSize(imVideoCapture* vc, int width, int height)
+{
+  IIPDVDec* pDV = vc_GetDVDecoder(vc);
+  if (pDV)
+  {
+    int size = 0;
+
+    switch(width)
+    {
+    case 720:
+      size = DVRESOLUTION_FULL;
+      break;
+    case 360:
+      size = DVRESOLUTION_HALF;
+      break;
+    case 180:
+      size = DVRESOLUTION_QUARTER;
+      break;
+    case 88:
+      size = DVRESOLUTION_DC;
+      break;
+    }
+
+    if (!size)
+      return 0;
+
+    int ret = SUCCEEDED(pDV->put_IPDisplay(size));
+    if (ret) 
+      vc->sample_callback->SetImageSize(width, height);
+
+    return ret;
+  }
+
+  int ret = vc_SetStreamSize(vc, width, height);
+  if (ret)
+    vc->sample_callback->SetImageSize(width, height);
+
+  return ret;
+}
+
+int imVideoCaptureSetImageSize(imVideoCapture* vc, int width, int height)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  vc_StopLive(vc);
+
+  // must be disconnected to change size or format
+  vc_CaptureDisconnect(vc);
+
+  int ret = vc_SetImageSize(vc, width, height);
+
+  if (!vc_CaptureConnect(vc))
+    ret = 0;
+
+  vc_StartLive(vc);
+
+  return ret;
+}
+
+static void vc_UpdateFormatList(imVideoCapture* vc)
+{
+  vc->format_count = 0;
+  vc->format_current = -1;
+
+  IAMStreamConfig *pSC = vc_GetStreamConfig(vc);
+  if (!pSC) return;
+
+  int iCount = 0, iSize = 0;
+  if (FAILED(pSC->GetNumberOfCapabilities(&iCount, &iSize)))
+  {
+    pSC->Release();
+    return;
+  }
+
+  AM_MEDIA_TYPE *curr_pmt;
+  HRESULT hr = pSC->GetFormat(&curr_pmt);
+  if (FAILED(hr)) 
+  {
+    pSC->Release();
+    return;
+  }
+
+  for (int iFormat = 0; iFormat < iCount; iFormat++)
+  {
+    VIDEO_STREAM_CONFIG_CAPS scc;
+    AM_MEDIA_TYPE *pmt;
+    if (SUCCEEDED(pSC->GetStreamCaps(iFormat, &pmt, (BYTE*)&scc)))
+    {
+      if (scc.guid == FORMAT_VideoInfo)
+      {
+        VIDEOINFOHEADER* vih = (VIDEOINFOHEADER*)curr_pmt->pbFormat;
+        BITMAPINFOHEADER* bih = &vih->bmiHeader;
+        int width = bih->biWidth;
+        int height = abs(bih->biHeight);
+
+        if (curr_pmt->subtype == pmt->subtype && 
+            width == scc.InputSize.cx && 
+            height == scc.InputSize.cy)
+        {
+          vc->format_current = vc->format_count;
+        }
+
+        vc->format_map[vc->format_count] = iFormat;
+        vc->format_count++;
+      }                                    
+
+      vc_DeleteMediaType(pmt);
+    }
+  }
+
+  vc_DeleteMediaType(curr_pmt);
+  pSC->Release();
+}
+
+int imVideoCaptureFormatCount(imVideoCapture* vc)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  return vc->format_count;
+}
+
+static void vc_GetFormatName(GUID subtype, char* desc)
+{
+#define VC_NUM_FORMATS 7
+  typedef struct _guid2name {
+    char* name;
+    const GUID* subtype;
+  } guid2name;
+  static guid2name map_table[VC_NUM_FORMATS] = {
+    {"RGB1",&MEDIASUBTYPE_RGB1},
+    {"RGB4",&MEDIASUBTYPE_RGB4},
+    {"RGB8",&MEDIASUBTYPE_RGB8},
+    {"RGB565",&MEDIASUBTYPE_RGB565},
+    {"RGB555",&MEDIASUBTYPE_RGB555},
+    {"RGB24",&MEDIASUBTYPE_RGB24},
+    {"RGB32",&MEDIASUBTYPE_RGB32}
+  };
+
+  for (int i = 0; i < VC_NUM_FORMATS; i++)
+  {
+    if (*(map_table[i].subtype) == subtype)
+    {
+      strcpy(desc, map_table[i].name);
+      return;
+    }
+  }
+
+  desc[0] = (char)(subtype.Data1);
+  desc[1] = (char)(subtype.Data1 >> 8);
+  desc[2] = (char)(subtype.Data1 >> 16);
+  desc[3] = (char)(subtype.Data1 >> 32);
+  desc[4] = 0;      
+}
+
+int imVideoCaptureGetFormat(imVideoCapture* vc, int format, int *width, int *height, char* desc)
+{
+  assert(vc);
+  assert(vc->device != -1);
+  assert(vc->format_count);
+
+  if (format >= vc->format_count)
+    return 0;
+
+  IAMStreamConfig *pSC = vc_GetStreamConfig(vc);
+  if (!pSC) return 0;
+
+  VIDEO_STREAM_CONFIG_CAPS scc;
+  AM_MEDIA_TYPE *pmt;
+  if (SUCCEEDED(pSC->GetStreamCaps(vc->format_map[format], &pmt, (BYTE*)&scc)))
+  {
+    *width = scc.InputSize.cx;
+    *height = scc.InputSize.cy;
+    vc_GetFormatName(pmt->subtype, desc);
+
+    pSC->Release();
+    vc_DeleteMediaType(pmt);
+    return 1;
+  }
+
+  pSC->Release();
+  return 0;
+}
+
+static int vc_SetStreamFormat(imVideoCapture* vc, int format)
+{
+  IAMStreamConfig *pSC = vc_GetStreamConfig(vc);
+  if (!pSC) return 0;
+
+  VIDEO_STREAM_CONFIG_CAPS scc;
+  AM_MEDIA_TYPE *pmt;
+  if (FAILED(pSC->GetStreamCaps(vc->format_map[format], &pmt, (BYTE*)&scc)))
+  {
+    pSC->Release();
+    return 0;
+  }
+
+  pSC->SetFormat(pmt);
+  pSC->Release();
+
+  vc->sample_callback->SetImageSize(scc.InputSize.cx, scc.InputSize.cy);
+
+  vc_DeleteMediaType(pmt);
+
+  return 1;
+}
+
+int imVideoCaptureSetFormat(imVideoCapture* vc, int format)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  if (format == -1)
+    return vc->format_current;
+
+  if (format >= vc->format_count)
+    return 0;
+
+  vc_StopLive(vc);
+
+  // must be disconnected to change size or format
+  vc_CaptureDisconnect(vc);
+
+  int ok = vc_SetStreamFormat(vc, format);
+
+  if (!vc_CaptureConnect(vc))
+    ok = 0;
+
+  if (ok)
+    vc->format_current = format;
+
+  vc_StartLive(vc);
+
+  return ok;
+}
+
+
+/**************************************************************************
+                            Dialogs
+***************************************************************************/
+
+
+static ISpecifyPropertyPages* vc_GetPropertyPages(IUnknown* obj)
+{
+  ISpecifyPropertyPages *pSpec = NULL;
+
+  HRESULT hr = obj->QueryInterface(IID_ISpecifyPropertyPages,  (void **)&pSpec);
+  if (FAILED(hr)) return NULL;
+
+  CAUUID cauuid;
+  hr = pSpec->GetPages(&cauuid);
+  CoTaskMemFree(cauuid.pElems);
+
+  if (FAILED(hr)) 
+  {
+    pSpec->Release();
+    return NULL;
+  }
+
+  return pSpec;
+}
+
+static int vc_ShowPropertyPages(HWND parent, IUnknown* obj, WCHAR* title)
+{
+  ISpecifyPropertyPages *pSpec = vc_GetPropertyPages(obj);
+
+  CAUUID cauuid;
+  pSpec->GetPages(&cauuid);
+
+  HRESULT hr = OleCreatePropertyFrame(parent, 30, 30, title, 1,
+                        &obj, cauuid.cElems, (GUID *)cauuid.pElems, 0, 0, NULL);
+
+  CoTaskMemFree(cauuid.pElems);
+  pSpec->Release();
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static IAMVfwCaptureDialogs* vc_getVfwDialogs(imVideoCapture* vc)
+{
+  IAMVfwCaptureDialogs* pDlg = NULL;
+  HRESULT hr = vc->capture_graph_builder->FindInterface(&PIN_CATEGORY_CAPTURE,
+        &MEDIATYPE_Video, vc->capture_filter, IID_IAMVfwCaptureDialogs, (void **)&pDlg);
+
+  if (FAILED(hr))
+    return NULL;
+
+  return pDlg;
+}
+
+static int vc_ShowVfwDialog(imVideoCapture* vc, HWND parent, VfwCaptureDialogs dialog)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IAMVfwCaptureDialogs *pDlg = vc_getVfwDialogs(vc);
+  if(!pDlg) return 0;
+
+  HRESULT hr = pDlg->HasDialog(dialog);
+  if (FAILED(hr))
+  {
+    pDlg->Release();
+    return 0;
+  }
+
+  int ret = 0;
+  vc_StopLive(vc);
+
+  // must be disconnected to change size or format
+  vc_CaptureDisconnect(vc);
+
+  hr = pDlg->ShowDialog(dialog, parent);
+  if (SUCCEEDED(hr))
+    ret = 1;
+
+  if (!vc_CaptureConnect(vc))
+    ret = 0;
+
+  vc_StartLive(vc);
+
+  pDlg->Release();
+  return ret;
+}
+
+static int vc_ShowVfwFormatDialog(imVideoCapture* vc, HWND parent)
+{
+  return vc_ShowVfwDialog(vc, parent, VfwCaptureDialog_Format);
+}
+
+static int vc_ShowVfwSourceDialog(imVideoCapture* vc, HWND parent)
+{
+  return vc_ShowVfwDialog(vc, parent, VfwCaptureDialog_Source);
+}
+
+static int vc_ShowVfwDisplayDialog(imVideoCapture* vc, HWND parent)
+{
+  return vc_ShowVfwDialog(vc, parent, VfwCaptureDialog_Display);
+}
+
+static int vc_ShowFormatDialog(imVideoCapture* vc, HWND parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IAMStreamConfig *pSC = vc_GetStreamConfig(vc);
+  if (!pSC) return 0;
+
+  vc_StopLive(vc);
+
+  // must be disconnected to change size or format
+  vc_CaptureDisconnect(vc);
+
+  int ok = vc_ShowPropertyPages(parent, (IUnknown*)pSC, L"Format");
+  pSC->Release();
+
+  if (!vc_CaptureConnect(vc))
+    ok = 0;
+
+  vc_StartLive(vc);
+
+  return ok;
+}
+
+static int vc_ShowSourceDialog(imVideoCapture* vc, HWND parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  return vc_ShowPropertyPages(parent, (IUnknown*)vc->capture_filter, L"Source");
+}
+
+static IAMTVTuner* vc_GetTVTuner(imVideoCapture* vc)
+{
+  IAMTVTuner *pTVT = NULL;
+
+  HRESULT hr = vc->capture_graph_builder->FindInterface(&PIN_CATEGORY_CAPTURE,
+            &MEDIATYPE_Video, vc->capture_filter, IID_IAMTVTuner, (void **)&pTVT);
+  if(FAILED(hr))
+    return NULL;
+
+  return pTVT;
+}
+
+static int vc_ShowTVTunerDialog(imVideoCapture* vc, HWND parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IAMTVTuner* pTVT = vc_GetTVTuner(vc);
+  if (!pTVT)
+    return 0;
+
+  int ret = vc_ShowPropertyPages(parent, (IUnknown*)pTVT, L"TV Turner");
+  pTVT->Release();
+  return ret;
+}
+
+static IAMCrossbar* vc_GetCrossBar(imVideoCapture* vc)
+{
+  IAMCrossbar *pX = NULL;
+  HRESULT hr = vc->capture_graph_builder->FindInterface(&PIN_CATEGORY_CAPTURE,
+            &MEDIATYPE_Video, vc->capture_filter, IID_IAMCrossbar, (void **)&pX);
+  if(FAILED(hr))
+    return NULL;
+
+  return pX;
+}
+
+static int vc_ShowCrossbarDialog(imVideoCapture* vc, HWND parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IAMCrossbar* pX = vc_GetCrossBar(vc);
+  if (!pX)
+    return 0;
+
+  int ret = vc_ShowPropertyPages(parent, (IUnknown*)pX, L"Crossbar");
+  pX->Release();
+  return ret;
+}
+
+static IAMCrossbar* vc_GetSecondCrossBar(imVideoCapture* vc, IAMCrossbar *pX)
+{
+  IAMCrossbar *pX2 = NULL;
+  IBaseFilter *pXF;
+  HRESULT hr = pX->QueryInterface(IID_IBaseFilter, (void **)&pXF);
+  if(hr != S_OK) return NULL;
+
+  hr = vc->capture_graph_builder->FindInterface(&LOOK_UPSTREAM_ONLY,
+                 NULL, pXF, IID_IAMCrossbar, (void **)&pX2);
+  pXF->Release();
+  if(FAILED(hr)) return NULL;
+
+  return pX2;
+}
+
+static int vc_ShowSecondCrossbarDialog(imVideoCapture* vc, HWND parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IAMCrossbar* pX = vc_GetCrossBar(vc);
+  if (!pX)
+    return 0;
+
+  IAMCrossbar* pX2 = vc_GetSecondCrossBar(vc, pX);
+  if (!pX2)
+  {
+    pX->Release();
+    return 0;
+  }
+
+  int ret = vc_ShowPropertyPages(parent, (IUnknown*)pX2, L"Second Crossbar");
+  pX->Release();
+  pX2->Release();
+  return ret;
+}
+
+static int vc_ShowDVDecDialog(imVideoCapture* vc, HWND parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IIPDVDec* pDV = vc_GetDVDecoder(vc);
+  if (!pDV)
+    return 0;
+
+  vc_StopLive(vc);
+
+  int ret = vc_ShowPropertyPages(parent, (IUnknown*)pDV, L"DV Decoder");
+  pDV->Release();
+
+  vc_StartLive(vc);
+
+  return ret;
+}
+
+static void vc_UpdateDialogs(imVideoCapture* vc)
+{
+  vc->dialog_count = 0;
+
+  IAMVfwCaptureDialogs *pDlg = vc_getVfwDialogs(vc);
+  if(pDlg)
+  {
+    if(pDlg->HasDialog(VfwCaptureDialog_Format) == S_OK)
+    {
+      vc->dialog_desc[vc->dialog_count] = "Format... (VFW)";
+      vc->dialog_func[vc->dialog_count] = vc_ShowVfwFormatDialog;
+      vc->dialog_count++;
+    }
+
+    if(pDlg->HasDialog(VfwCaptureDialog_Source) == S_OK)
+    {
+      vc->dialog_desc[vc->dialog_count] = "Source... (VFW)";
+      vc->dialog_func[vc->dialog_count] = vc_ShowVfwSourceDialog;
+      vc->dialog_count++;
+    }
+
+    if(pDlg->HasDialog(VfwCaptureDialog_Display) == S_OK)
+    {
+      vc->dialog_desc[vc->dialog_count] = "Display... (VFW)";
+      vc->dialog_func[vc->dialog_count] = vc_ShowVfwDisplayDialog;
+      vc->dialog_count++;
+    }
+
+    return;
+  }
+
+  ISpecifyPropertyPages *pSpec;
+  IAMStreamConfig *pSC = vc_GetStreamConfig(vc);
+  if (pSC)
+  {
+    pSpec = vc_GetPropertyPages((IUnknown*)pSC);
+    if (pSpec)
+    {
+      vc->dialog_desc[vc->dialog_count] = "Format...";
+      vc->dialog_func[vc->dialog_count] = vc_ShowFormatDialog;
+      vc->dialog_count++;
+      pSpec->Release();  
+    }
+
+    pSC->Release();  
+  }
+
+  pSpec = vc_GetPropertyPages((IUnknown*)vc->capture_filter);
+  if (pSpec)
+  {
+    vc->dialog_desc[vc->dialog_count] = "Source...";
+    vc->dialog_func[vc->dialog_count] = vc_ShowSourceDialog;
+    vc->dialog_count++;
+    pSpec->Release();  
+  }
+
+  IIPDVDec* pDV = vc_GetDVDecoder(vc);
+  if (pDV)
+  {
+    pSpec = vc_GetPropertyPages((IUnknown*)pDV);
+    if (pSpec)
+    {
+      vc->dialog_desc[vc->dialog_count] = "DV Decoder...";
+      vc->dialog_func[vc->dialog_count] = vc_ShowDVDecDialog;
+      vc->dialog_count++;
+      pSpec->Release();  
+    }
+
+    pDV->Release();  
+  }
+
+  IAMCrossbar* pX = vc_GetCrossBar(vc);
+  if (pX)
+  {
+    pSpec = vc_GetPropertyPages((IUnknown*)pX);
+    if (pSpec)
+    {
+      vc->dialog_desc[vc->dialog_count] = "Crossbar...";
+      vc->dialog_func[vc->dialog_count] = vc_ShowCrossbarDialog;
+      vc->dialog_count++;
+      pSpec->Release();  
+    }
+
+    IAMCrossbar* pX2 = vc_GetSecondCrossBar(vc, pX);
+    if (pX2)
+    {
+      pSpec = vc_GetPropertyPages((IUnknown*)pX2);
+      if (pSpec)
+      {
+        vc->dialog_desc[vc->dialog_count] = "Second Crossbar...";
+        vc->dialog_func[vc->dialog_count] = vc_ShowSecondCrossbarDialog;
+        vc->dialog_count++;
+        pSpec->Release();  
+      }
+
+      pX2->Release();  
+    }
+
+    pX->Release();  
+  }
+
+  IAMTVTuner* pTVT = vc_GetTVTuner(vc);
+  if (pTVT)
+  {
+    pSpec = vc_GetPropertyPages((IUnknown*)pTVT);
+    if (pSpec)
+    {
+      vc->dialog_desc[vc->dialog_count] = "TV Tuner...";
+      vc->dialog_func[vc->dialog_count] = vc_ShowTVTunerDialog;
+      vc->dialog_count++;
+      pSpec->Release();  
+    }
+
+    pTVT->Release();  
+  }
+}
+
+int imVideoCaptureDialogCount(imVideoCapture* vc)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  return vc->dialog_count;
+}
+
+const char* imVideoCaptureDialogDesc(imVideoCapture* vc, int dialog)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  if (dialog >= vc->dialog_count)
+    return NULL;
+
+  return vc->dialog_desc[dialog];
+}
+
+int imVideoCaptureShowDialog(imVideoCapture* vc, int dialog, void* parent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  if (dialog >= vc->dialog_count)
+    return 0;
+
+  return vc->dialog_func[dialog](vc, (HWND)parent);
+}
+
+int imVideoCaptureSetInOut(imVideoCapture* vc, int input, int output, int cross)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  IAMCrossbar* pX = vc_GetCrossBar(vc);
+  if (pX)
+  {
+    HRESULT hr = S_FALSE;
+
+    if (cross == 1)
+      hr = pX->Route(output, input);
+    else
+    {
+      IAMCrossbar* pX2 = vc_GetSecondCrossBar(vc, pX);
+      if (pX2)
+      {
+        hr = pX2->Route(output, input);
+        pX2->Release();
+      }
+    }
+
+    pX->Release();
+    if (hr == S_OK)
+      return 1;
+  }
+
+  return 0;
+}
+
+/**************************************************************************
+                            Attributes
+***************************************************************************/
+
+
+static float vc_Value2Percent(long Min, long Max, long Val)
+{
+  return ((Val - Min)*100.0f)/((float)(Max - Min));
+}
+
+static long vc_Percent2Value(long Min, long Max, long Step, float Per)
+{
+  long Val = (long)((Per/100.)*(Max - Min) + Min);
+  if (Step == 1)
+    return Val;
+
+  long num_step = (Val - Min + Step-1) / Step;
+  return num_step*Step + Min;
+}
+
+static IAMVideoProcAmp* vc_InitVideoProcAmp(IBaseFilter* capture_filter, IAMVideoProcAmp* *video_prop)
+{
+  if (*video_prop)
+    return *video_prop;
+
+  HRESULT hr = capture_filter->QueryInterface(IID_IAMVideoProcAmp, (void**)video_prop);
+  if (FAILED(hr))
+    return NULL;
+
+  return *video_prop;
+}
+
+static int vc_SetVideoProcAmpProperty(IBaseFilter* capture_filter, IAMVideoProcAmp* *video_prop, long property, float percent)
+{
+  IAMVideoProcAmp *pProp = vc_InitVideoProcAmp(capture_filter, video_prop);
+  if (!pProp) return 0;
+  HRESULT hr;
+  VideoProcAmpProperty prop = (VideoProcAmpProperty)property;
+  long Min, Max, Step, Default, Flags;
+  hr = pProp->GetRange(prop, &Min, &Max, &Step, &Default, &Flags);
+  hr = pProp->Set(prop, vc_Percent2Value(Min, Max, Step, percent), VideoProcAmp_Flags_Manual);
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static int vc_GetVideoProcAmpProperty(IBaseFilter* capture_filter, IAMVideoProcAmp* *video_prop, long property, float *percent)
+{
+  IAMVideoProcAmp *pProp = vc_InitVideoProcAmp(capture_filter, video_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  VideoProcAmpProperty prop = (VideoProcAmpProperty)property;
+  long Min, Max, Step, Default, Flags, Val;
+  hr = pProp->GetRange(prop, &Min, &Max, &Step, &Default, &Flags);
+  hr = pProp->Get(prop, &Val, &Flags);
+
+  if (FAILED(hr)) return 0;
+  *percent = vc_Value2Percent(Min, Max, Val);
+  return 1;
+}
+
+static int vc_ResetVideoProcAmpProperty(IBaseFilter* capture_filter, IAMVideoProcAmp* *video_prop, long property, int fauto)
+{
+  IAMVideoProcAmp *pProp = vc_InitVideoProcAmp(capture_filter, video_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  VideoProcAmpProperty prop = (VideoProcAmpProperty)property;
+  long Min, Max, Step, Default, Flags;
+  hr = pProp->GetRange(prop, &Min, &Max, &Step, &Default, &Flags);
+
+  if (fauto && (Flags & VideoProcAmp_Flags_Auto))
+    hr = pProp->Set(prop, Default, VideoProcAmp_Flags_Auto);
+  else
+    hr = pProp->Set(prop, Default, VideoProcAmp_Flags_Manual);
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static IAMCameraControl* vc_InitCameraControl(IBaseFilter* capture_filter, IAMCameraControl* *camera_prop)
+{
+  if (*camera_prop)
+    return *camera_prop;
+
+  HRESULT hr = capture_filter->QueryInterface(IID_IAMCameraControl, (void**)camera_prop);
+  if (FAILED(hr))
+    return NULL;
+
+  return *camera_prop;
+}
+
+static int vc_SetCameraControlProperty(IBaseFilter* capture_filter, IAMCameraControl* *camera_prop, long property, float percent)
+{
+  IAMCameraControl *pProp = vc_InitCameraControl(capture_filter, camera_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  CameraControlProperty prop = (CameraControlProperty)property;
+  long Min, Max, Step, Default, Flags;
+  hr = pProp->GetRange(prop, &Min, &Max, &Step, &Default, &Flags);
+  hr = pProp->Set(prop, vc_Percent2Value(Min, Max, Step, percent), CameraControl_Flags_Manual);
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static int vc_GetCameraControlProperty(IBaseFilter* capture_filter, IAMCameraControl* *camera_prop, long property, float *percent)
+{
+  IAMCameraControl *pProp = vc_InitCameraControl(capture_filter, camera_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  CameraControlProperty prop = (CameraControlProperty)property;
+  long Min, Max, Step, Default, Flags, Val;
+  hr = pProp->GetRange(prop, &Min, &Max, &Step, &Default, &Flags);
+  hr = pProp->Get(prop, &Val, &Flags);
+
+  if (FAILED(hr)) return 0;
+  *percent = vc_Value2Percent(Min, Max, Val);
+  return 1;
+}
+
+static int vc_ResetCameraControlProperty(IBaseFilter* capture_filter, IAMCameraControl* *camera_prop, long property, int fauto)
+{
+  IAMCameraControl *pProp = vc_InitCameraControl(capture_filter, camera_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  CameraControlProperty prop = (CameraControlProperty)property;
+  long Min, Max, Step, Default, Flags;
+  hr = pProp->GetRange(prop, &Min, &Max, &Step, &Default, &Flags);
+
+  if (fauto && (Flags & CameraControl_Flags_Auto))
+    hr = pProp->Set(prop, Default, CameraControl_Flags_Auto);
+  else
+    hr = pProp->Set(prop, Default, CameraControl_Flags_Manual);
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static IAMVideoControl* vc_InitVideoControl(IBaseFilter* capture_filter, IAMVideoControl* *video_prop)
+{
+  if (*video_prop)
+    return *video_prop;
+
+  HRESULT hr = capture_filter->QueryInterface(IID_IAMVideoControl, (void**)video_prop);
+  if (FAILED(hr))
+    return NULL;
+
+  return *video_prop;
+}
+
+static int vc_SetVideoControlProperty(IBaseFilter* capture_filter, IAMVideoControl* *video_prop, long property, float percent)
+{
+  IAMVideoControl *pProp = vc_InitVideoControl(capture_filter, video_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  IPin *pOutPin = vc_GetPin(capture_filter, PINDIR_OUTPUT);
+  long Mode;
+  hr = pProp->GetMode(pOutPin, &Mode);
+  if (percent)
+    Mode = Mode | property;
+  else
+    Mode = Mode & ~property;
+  hr = pProp->SetMode(pOutPin, Mode);
+  pOutPin->Release();
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static int vc_GetVideoControlProperty(IBaseFilter* capture_filter, IAMVideoControl* *video_prop, long property, float *percent)
+{
+  IAMVideoControl *pProp = vc_InitVideoControl(capture_filter, video_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  long Mode;
+  IPin *pOutPin = vc_GetPin(capture_filter, PINDIR_OUTPUT);
+  hr = pProp->GetMode(pOutPin, &Mode);
+  pOutPin->Release();
+
+  if (FAILED(hr)) return 0;
+  if (Mode & property)
+    *percent = 100.;
+  else
+    *percent = 0.;
+  return 1;
+}
+
+static int vc_ResetVideoControlProperty(IBaseFilter* capture_filter, IAMVideoControl* *video_prop, long property, int fauto)
+{
+  IAMVideoControl *pProp = vc_InitVideoControl(capture_filter, video_prop);
+  if (!pProp) return 0;
+
+  HRESULT hr;
+  long Mode;
+  IPin *pOutPin = vc_GetPin(capture_filter, PINDIR_OUTPUT);
+  hr = pProp->GetMode(pOutPin, &Mode);
+  if (Mode & property)
+    Mode = Mode & ~property;
+  else
+    Mode = Mode | property;
+  hr = pProp->SetMode(pOutPin, Mode);
+  pOutPin->Release();
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static long vc_AnalogFormat[19] =
+{
+  AnalogVideo_NTSC_M, 
+  AnalogVideo_NTSC_M_J,  
+  AnalogVideo_NTSC_433,
+  AnalogVideo_PAL_B,
+  AnalogVideo_PAL_D,
+  AnalogVideo_PAL_H,
+  AnalogVideo_PAL_I,
+  AnalogVideo_PAL_M,
+  AnalogVideo_PAL_N,
+  AnalogVideo_PAL_60,
+  AnalogVideo_SECAM_B,
+  AnalogVideo_SECAM_D,
+  AnalogVideo_SECAM_G,
+  AnalogVideo_SECAM_H,
+  AnalogVideo_SECAM_K,
+  AnalogVideo_SECAM_K1,
+  AnalogVideo_SECAM_L,
+  AnalogVideo_SECAM_L1,
+  AnalogVideo_PAL_N_COMBO
+};
+
+static int vc_SetAnalogFormat(IBaseFilter* capture_filter, float percent)
+{
+  IAMAnalogVideoDecoder* video_decoder = NULL;
+  HRESULT hr = capture_filter->QueryInterface(IID_IAMAnalogVideoDecoder, (void**)video_decoder);
+  if (FAILED(hr))
+    return 0;
+
+  hr = video_decoder->put_TVFormat(vc_AnalogFormat[(int)percent]);
+  video_decoder->Release();
+
+  if (FAILED(hr)) return 0;
+  return 1;
+}
+
+static int vc_GetAnalogFormat(IBaseFilter* capture_filter, float *percent)
+{
+  IAMAnalogVideoDecoder* video_decoder = NULL;
+  HRESULT hr = capture_filter->QueryInterface(IID_IAMAnalogVideoDecoder, (void**)video_decoder);
+  if (FAILED(hr))
+    return 0;
+
+  long format;
+  hr = video_decoder->get_TVFormat(&format);
+  video_decoder->Release();
+
+  if (FAILED(hr)) return 0;
+  for (int i = 0; i < 19; i++)
+  {
+    if (vc_AnalogFormat[i] == format)
+    {
+      *percent = (float)i;
+      return 1;
+    }
+  }
+  return 0;
+}
+
+#define VC_HASH_SIZE 101
+#define VC_HASH_MULTIPLIER 31
+
+/** Unique Hash index for a key
+ * We use the hash function described in "The Pratice of Programming" of Kernighan & Pike. */
+static int vc_HashIndex(const char *key, int hash_size)
+{
+  unsigned short hash = 0;
+  const unsigned char *p_key = (const unsigned char*)key;
+
+  for(; *p_key; p_key++)
+    hash = hash*VC_HASH_MULTIPLIER + *p_key;
+
+  return hash % hash_size;
+}
+
+#define VC_CAMERASHIFT 20
+#define VC_VIDEOSHIFT  40
+#define VC_ANALOGSHIFT 60
+
+static long vc_Attrib2Property(const char* attrib)
+{
+  static long prop_table[VC_HASH_SIZE];
+  static int first = 1;
+  if (first)
+  {
+    memset(prop_table, 0, VC_HASH_SIZE*sizeof(long));
+    prop_table[vc_HashIndex("CameraPanAngle", VC_HASH_SIZE)] = (long)CameraControl_Pan + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("CameraTiltAngle", VC_HASH_SIZE)] = (long)CameraControl_Tilt + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("CameraRollAngle", VC_HASH_SIZE)] = (long)CameraControl_Roll + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("CameraLensZoom", VC_HASH_SIZE)] = (long)CameraControl_Zoom + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("CameraExposure", VC_HASH_SIZE)] = (long)CameraControl_Exposure + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("CameraIris", VC_HASH_SIZE)] = (long)CameraControl_Iris + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("CameraFocus", VC_HASH_SIZE)] = (long)CameraControl_Focus + VC_CAMERASHIFT + 1;
+    prop_table[vc_HashIndex("VideoBrightness", VC_HASH_SIZE)] = (long)VideoProcAmp_Brightness + 1;
+    prop_table[vc_HashIndex("VideoContrast", VC_HASH_SIZE)] = (long)VideoProcAmp_Contrast + 1;
+    prop_table[vc_HashIndex("VideoHue", VC_HASH_SIZE)] = (long)VideoProcAmp_Hue + 1;
+    prop_table[vc_HashIndex("VideoSaturation", VC_HASH_SIZE)] = (long)VideoProcAmp_Saturation + 1;
+    prop_table[vc_HashIndex("VideoSharpness", VC_HASH_SIZE)] = (long)VideoProcAmp_Sharpness + 1;
+    prop_table[vc_HashIndex("VideoGamma", VC_HASH_SIZE)] = (long)VideoProcAmp_Gamma + 1;
+    prop_table[vc_HashIndex("VideoColorEnable", VC_HASH_SIZE)] = (long)VideoProcAmp_ColorEnable + 1;
+    prop_table[vc_HashIndex("VideoWhiteBalance", VC_HASH_SIZE)] = (long)VideoProcAmp_WhiteBalance + 1;
+    prop_table[vc_HashIndex("VideoBacklightCompensation", VC_HASH_SIZE)] = (long)VideoProcAmp_BacklightCompensation + 1;
+    prop_table[vc_HashIndex("VideoGain", VC_HASH_SIZE)] = (long)VideoProcAmp_Gain + 1;
+    prop_table[vc_HashIndex("FlipHorizontal", VC_HASH_SIZE)] = (long)VideoControlFlag_FlipHorizontal + VC_VIDEOSHIFT + 1;
+    prop_table[vc_HashIndex("FlipVertical", VC_HASH_SIZE)] = (long)VideoControlFlag_FlipVertical + VC_VIDEOSHIFT + 1;
+    prop_table[vc_HashIndex("AnalogFormat", VC_HASH_SIZE)] = (long)0 + VC_ANALOGSHIFT + 1;
+    first = 0;
+  }
+  long prop = prop_table[vc_HashIndex(attrib, VC_HASH_SIZE)];
+  if (!prop)
+    return 0;
+  return prop-1;
+}
+
+int imVideoCaptureSetAttribute(imVideoCapture* vc, const char* attrib, float percent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  long property = vc_Attrib2Property(attrib);
+  if (property == -1) return 0;
+  if (property < VC_CAMERASHIFT)
+    return vc_SetVideoProcAmpProperty(vc->capture_filter, &vc->video_prop, property, percent);
+  else if (property < VC_VIDEOSHIFT)
+    return vc_SetCameraControlProperty(vc->capture_filter, &vc->camera_prop, property-VC_CAMERASHIFT, percent);
+  else if (property < VC_ANALOGSHIFT)
+    return vc_SetVideoControlProperty(vc->capture_filter, &vc->videoctrl_prop, property-VC_VIDEOSHIFT, percent);
+  else
+    return vc_SetAnalogFormat(vc->capture_filter, percent);
+}
+
+int imVideoCaptureGetAttribute(imVideoCapture* vc, const char* attrib, float *percent)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  long property = vc_Attrib2Property(attrib);
+  if (property == -1) return 0;
+  if (property < VC_CAMERASHIFT)
+    return vc_GetVideoProcAmpProperty(vc->capture_filter, &vc->video_prop, property, percent);
+  else if (property < VC_VIDEOSHIFT)
+    return vc_GetCameraControlProperty(vc->capture_filter, &vc->camera_prop, property-VC_CAMERASHIFT, percent);
+  else if (property < VC_ANALOGSHIFT)
+    return vc_GetVideoControlProperty(vc->capture_filter, &vc->videoctrl_prop, property-VC_VIDEOSHIFT, percent);
+  else
+    return vc_GetAnalogFormat(vc->capture_filter, percent);
+}
+
+int imVideoCaptureResetAttribute(imVideoCapture* vc, const char* attrib, int fauto)
+{
+  assert(vc);
+  assert(vc->device != -1);
+
+  long property = vc_Attrib2Property(attrib);
+  if (property == -1) return 0;
+  if (property < VC_CAMERASHIFT)
+    return vc_ResetVideoProcAmpProperty(vc->capture_filter, &vc->video_prop, property, fauto);
+  else if (property < VC_VIDEOSHIFT)
+    return vc_ResetCameraControlProperty(vc->capture_filter, &vc->camera_prop, property-VC_CAMERASHIFT, fauto);
+  else if (property < VC_ANALOGSHIFT)
+    return vc_ResetVideoControlProperty(vc->capture_filter, &vc->videoctrl_prop, property-VC_VIDEOSHIFT, fauto);
+  return 0;
+}
+
+const char** imVideoCaptureGetAttributeList(imVideoCapture* vc, int *num_attrib)
+{
+#define VC_VIDEOPROC_MAX 10
+#define VC_CAMERACONTROL_MAX 7
+#define VC_VIDEOCONTROL_MAX 2
+#define VC_VIDEODECODER_MAX 1
+#define VC_NUM_ATTRIB_MAX (VC_VIDEOPROC_MAX+VC_CAMERACONTROL_MAX+VC_VIDEOCONTROL_MAX+VC_VIDEODECODER_MAX)
+  static char* attrib_list[VC_NUM_ATTRIB_MAX];
+  static char* all_attrib_list[VC_NUM_ATTRIB_MAX] = 
+  {                                  //Pre-calculated Hash Index:
+    "VideoBrightness",               //  (97)
+    "VideoContrast",                 //  (80)
+    "VideoHue",                      //  (98)
+    "VideoSaturation",               //  (4)
+    "VideoSharpness",                //  (56)
+    "VideoGamma",                    //  (67)
+    "VideoColorEnable",              //  (91)
+    "VideoWhiteBalance",             //  (26)
+    "VideoBacklightCompensation",    //  (50)
+    "VideoGain",                     //  (36)
+    "CameraPanAngle",                //  (64)
+    "CameraTiltAngle",               //  (54)
+    "CameraRollAngle",               //  (85)
+    "CameraLensZoom",                //  (57)
+    "CameraExposure",                //  (84)
+    "CameraIris",                    //  (20)
+    "CameraFocus",                   //  (62)
+    "FlipHorizontal",                //  (21)
+    "FlipVertical",                  //  (28)
+    "AnalogFormat"};                 //  (89)
+
+  int i;
+  *num_attrib = 0;
+
+  IAMVideoProcAmp *video_prop;
+  HRESULT hr = vc->capture_filter->QueryInterface(IID_IAMVideoProcAmp, (void**)&video_prop);
+  if (SUCCEEDED(hr))
+  {
+    for (i = 0; i < VC_VIDEOPROC_MAX; i++)
+      attrib_list[i] = all_attrib_list[i];
+    *num_attrib = VC_VIDEOPROC_MAX;
+    video_prop->Release();
+  }
+
+  IAMCameraControl *camera_prop;
+  hr = vc->capture_filter->QueryInterface(IID_IAMCameraControl, (void**)&camera_prop);
+  if (SUCCEEDED(hr))
+  {
+    for (i = 0; i < VC_CAMERACONTROL_MAX; i++)
+      attrib_list[i+*num_attrib] = all_attrib_list[i+VC_VIDEOPROC_MAX];
+    *num_attrib += VC_CAMERACONTROL_MAX;
+    camera_prop->Release();
+  }
+
+  IAMVideoControl* video_ctrl;
+  hr = vc->capture_filter->QueryInterface(IID_IAMVideoControl, (void**)&video_ctrl);
+  if (SUCCEEDED(hr))
+  {
+    for (i = 0; i < VC_VIDEOCONTROL_MAX; i++)
+      attrib_list[i+*num_attrib] = all_attrib_list[i+VC_VIDEOPROC_MAX+VC_CAMERACONTROL_MAX];
+    *num_attrib += VC_VIDEOCONTROL_MAX;
+    video_ctrl->Release();
+  }
+
+  IAMAnalogVideoDecoder* video_decoder = NULL;
+  hr = vc->capture_filter->QueryInterface(IID_IAMAnalogVideoDecoder, (void**)&video_decoder);
+  if (SUCCEEDED(hr))
+  {
+    for (i = 0; i < VC_VIDEODECODER_MAX; i++)
+      attrib_list[i+*num_attrib] = all_attrib_list[i+VC_VIDEOPROC_MAX+VC_CAMERACONTROL_MAX+VC_VIDEOCONTROL_MAX];
+    *num_attrib += VC_VIDEODECODER_MAX;
+    video_decoder->Release();
+  }
+
+  return (const char**)attrib_list;
+}
+
+//VIDEOINFOHEADER
+// AvgTimePerFrame
diff --git a/src/im_colorhsi.cpp b/src/im_colorhsi.cpp
new file mode 100644
index 0000000..3852527
--- /dev/null
+++ b/src/im_colorhsi.cpp
@@ -0,0 +1,243 @@
+/** \file
+ * \brief HSI Color Manipulation
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_colorhsi.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <math.h>
+
+#include "im_colorhsi.h"
+#include "im_color.h"
+
+static const float rad60 =  1.0471975f;
+static const float rad120 = 2.0943951f;
+static const float rad180 = 3.1415926f;
+static const float rad240 = 4.1887902f;
+static const float rad300 = 5.2359877f;
+static const float rad360 = 6.2831853f;
+static const float sqrt3 = 1.7320508f;
+
+/**********************************/               
+/*         HSI MAX S              */
+/**********************************/               
+               
+static void iSmax01(float h, float hr, float hb, float hg, float *h0, float *h1)
+{
+  if (h < rad60)
+  {
+    *h0 = hb;
+    *h1 = hr;
+  }
+  else if (h < rad120)
+  {
+    *h0 = hb;
+    *h1 = hg;
+  }
+  else if (h < rad180)
+  {
+    *h0 = hr;
+    *h1 = hg;
+  }
+  else if (h < rad240)
+  {
+    *h0 = hr;
+    *h1 = hb;
+  }
+  else if (h < rad300)
+  {
+    *h0 = hg;
+    *h1 = hb;
+  }
+  else
+  {
+    *h0 = hg;
+    *h1 = hr;
+  }
+}
+
+float imColorHSI_Smax(float h, double cosh, double sinh, float i)
+{
+  float hr, hb, hg, imax, h0, h1;
+
+  if (i == 0 || i == 1)
+    return 0.0f;
+
+  if (i == 1.0f/3.0f || i == 2.0f/3.0f)
+    return 1.0f;
+
+  hr = (float)(cosh / 1.5);
+  hg = (float)((-cosh + sinh*sqrt3)/ 3.0);
+  hb = (float)((-cosh - sinh*sqrt3)/ 3.0);
+
+  /* at bottom */
+  if (i < 1.0f/3.0f)
+  {
+    /* face B=0 */
+    if (h < rad120)
+      return -i/hb;
+
+    /* face R=0 */
+    if (h < rad240)
+      return -i/hr;
+
+    /* face G=0 */
+    return -i/hg;
+  }
+
+  /* at top */
+  if (i > 2.0f/3.0f)
+  {
+    /* face R=1 */
+    if (h < rad60 || h > rad300)
+      return (1-i)/hr;
+
+    /* face G=1 */
+    if (h < rad180)
+      return (1-i)/hg;
+
+    /* face B=1 */
+    return (1-i)/hb;
+  }
+
+  /* in the middle */
+
+  iSmax01(h, hr, hb, hg, &h0, &h1);
+
+  if (h == 0 || h == rad120 || h == rad240)
+    imax = 1.0f/3.0f;
+  else if (h == rad60 || h == rad180 || h == rad300)
+    imax = 2.0f/3.0f;
+  else
+    imax = h0 / (h0 - h1);
+
+  if (i < imax) 
+    return -i/h0;
+  else
+    return (1-i)/h1;
+}
+
+float imColorHSI_ImaxS(float h, double cosh, double sinh)
+{
+  float i, h0, h1;
+  float hr, hb, hg;
+
+  if (h == 0 || h == rad120 || h == rad240)
+    return 1.0f/3.0f;
+
+  if (h == rad60 || h == rad180 || h == rad300)
+    return 2.0f/3.0f;
+
+  hr = (float)(cosh / 1.5f);
+  hg = (float)((-cosh + sinh*sqrt3)/ 3.0);
+  hb = (float)((-cosh - sinh*sqrt3)/ 3.0);
+
+  iSmax01(h, hr, hb, hg, &h0, &h1);
+
+  i = h0 / (h0 - h1);
+
+  return i;
+}
+
+/**********************************/               
+/*         RGB 2 HSI              */
+/**********************************/               
+
+void imColorRGB2HSI(float r, float g, float b, float *h, float *s, float *i)
+{            
+  float v, u;
+  double H;
+
+  v = r - (g + b)/2;
+  u = (g - b) * (sqrt3/2);
+
+  *i = (r + g + b)/3;
+  *s = (float)sqrt(v*v + u*u);
+  
+  if (*s == 0)
+    *h = 360.0f;  /* by definition */
+  else
+  {
+    H = atan2(u, v);
+    if (H < 0.0f) H += rad360;
+    *h = (float)(H * 57.2957795131);
+  }
+}
+
+void imColorRGB2HSIbyte(unsigned char r, unsigned char g, unsigned char b, float *h, float *s, float *i)
+{
+  float fr = imColorReconstruct(r, (imbyte)255);
+  float fg = imColorReconstruct(g, (imbyte)255);
+  float fb = imColorReconstruct(b, (imbyte)255);
+  
+  imColorRGB2HSI(fr, fg, fb, h, s, i);
+}
+
+/**********************************/               
+/*         HSI 2 RGB              */
+/**********************************/               
+
+void imColorHSI2RGB(float h, float s, float i, float *r, float *g, float *b)
+{
+  static int first = 1;
+  static double _sqrt3;
+  double cosh, sinh, H, v, u;
+
+  if (first)
+  {
+    _sqrt3 = sqrt(3.0);
+    first = 0;
+  }
+
+  if (s == 0.0f || i == 1.0f || i == 0.0f || (int)h == 360)
+  {
+    *r = i;
+    *g = i;
+    *b = i;
+    return;
+  }
+
+  if (i < 0) i = 0;
+  if (i > 1) i = 1;
+
+  if (h > 360)  h = (float)fmod(h, 360);
+  if (h < 0.0f) h += 360;
+
+  H = h / 57.2957795131;
+
+  cosh = cos(H);
+  sinh = sin(H);
+    
+  {
+    float smax = imColorHSI_Smax((float)H, cosh, sinh, i);
+    if (s < 0) s = 0;
+    if (s > smax) s = smax;
+  }
+
+  v = s * cosh;
+  u = s * sinh * _sqrt3;
+
+  *r = (float)(i + v/1.5);
+  *g = (float)(i - (v - u)/3.0);
+  *b = (float)(i - (v + u)/3.0);
+
+  if (*r < 0) *r = -*r;
+  if (*g < 0) *g = -*g;
+  if (*b < 0) *b = -*b;
+
+  if (*r > 1) *r = 1.0f;
+  if (*g > 1) *g = 1.0f;
+  if (*b > 1) *b = 1.0f;
+}
+
+void imColorHSI2RGBbyte(float h, float s, float i, unsigned char *r, unsigned char *g, unsigned char *b)
+{
+  float fr, fg, fb;
+  
+  imColorHSI2RGB(h, s, i, &fr, &fg, &fb);
+  
+  *r = imColorQuantize(fr, (imbyte)255);
+  *g = imColorQuantize(fg, (imbyte)255);
+  *b = imColorQuantize(fb, (imbyte)255);
+}
diff --git a/src/im_colormode.cpp b/src/im_colormode.cpp
new file mode 100644
index 0000000..2a99183
--- /dev/null
+++ b/src/im_colormode.cpp
@@ -0,0 +1,87 @@
+/** \file
+ * \brief Color Mode Utilities
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_colormode.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+
+#include "im.h"
+#include "im_util.h"
+
+const char* imColorModeSpaceName(int color_mode)
+{
+  int color_space = imColorModeSpace(color_mode);
+  switch (color_space)
+  {
+  case IM_RGB:    return "RGB";
+  case IM_MAP:    return "Map";
+  case IM_GRAY:   return "Gray";
+  case IM_BINARY: return "Binary";
+  case IM_CMYK:   return "CMYK";
+  case IM_YCBCR:  return "Y'CbCr";
+  case IM_LAB:    return "CIE L*a*b*";
+  case IM_LUV:    return "CIE L*u*v*";
+  case IM_XYZ:    return "CIE XYZ";
+  }
+
+  return NULL;
+}
+
+int imColorModeDepth(int color_mode)
+{
+  int depth = 0;
+
+  int color_space = imColorModeSpace(color_mode);
+  switch (color_space)
+  {
+  case IM_GRAY:
+  case IM_BINARY:
+  case IM_MAP:   
+    depth = 1; 
+    break;
+  case IM_CMYK:
+    depth = 4; 
+    break;
+  default:
+    depth = 3; 
+    break;
+  }
+
+  if (imColorModeHasAlpha(color_mode))
+    depth++;
+
+  return depth;
+}
+
+int imColorModeToBitmap(int color_mode)
+{
+  int color_space = imColorModeSpace(color_mode);
+  switch (color_space)
+  {
+  case IM_BINARY:
+  case IM_GRAY:
+  case IM_MAP:
+    return color_space;
+  default:
+    return IM_RGB;
+  }
+}
+
+int imColorModeIsBitmap(int color_mode, int data_type)
+{
+  if (imColorModeSpace(color_mode) == IM_BINARY || 
+      imColorModeSpace(color_mode) == IM_MAP)
+    return 1;
+
+  if ((imColorModeSpace(color_mode) == IM_RGB || 
+       imColorModeSpace(color_mode) == IM_GRAY) &&
+      (data_type == IM_BYTE))
+    return 1;
+
+  return 0;
+}
diff --git a/src/im_colorutil.cpp b/src/im_colorutil.cpp
new file mode 100644
index 0000000..9e96c65
--- /dev/null
+++ b/src/im_colorutil.cpp
@@ -0,0 +1,27 @@
+/** \file
+ * \brief Color Utilities
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_colorutil.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+
+#include "im.h"
+#include "im_util.h"
+
+long imColorEncode(unsigned char Red, unsigned char Green, unsigned char Blue)
+{
+	return (((long)Red) << 16) | (((long)Green) << 8) | ((long)Blue);
+}
+
+void imColorDecode(unsigned char* Red, unsigned char* Green, unsigned char* Blue, long Color)
+{
+	if (Red) *Red = (imbyte)(Color >> 16);
+	if (Green) *Green = (imbyte)(Color >> 8);
+	if (Blue) *Blue = (imbyte)Color;
+}
+
diff --git a/src/im_compress.cpp b/src/im_compress.cpp
new file mode 100644
index 0000000..8a9a863
--- /dev/null
+++ b/src/im_compress.cpp
@@ -0,0 +1,44 @@
+/** \file
+ * \brief Data Compression Utilities
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_compress.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <math.h>
+
+#include "im_util.h"
+#include "zlib.h"
+
+extern "C" {
+#include "lzf.h"
+}
+
+int imCompressDataZ(const void* src_data, int src_size, void* dst_data, int dst_size, int zip_quality)
+{
+  uLongf ret_size = (uLongf)dst_size;
+	if (compress2((Bytef*)dst_data, &ret_size, (const Bytef*)src_data, src_size, zip_quality) != Z_OK)
+    return 0;
+
+  return (int)ret_size;
+}
+
+int imCompressDataUnZ(const void* src_data, int src_size, void* dst_data, int dst_size)
+{
+  uLongf ret_size = (uLongf)dst_size;
+	if (uncompress((Bytef*)dst_data, &ret_size, (const Bytef*)src_data, src_size) != Z_OK)
+    return 0;
+
+  return (int)ret_size;
+}
+
+int imCompressDataLZF(const void* src_data, int src_size, void* dst_data, int dst_size)
+{
+  return lzf_compress(src_data, src_size, dst_data, dst_size);
+}
+
+int imCompressDataUnLZF(const void* src_data, int src_size, void* dst_data, int dst_size)
+{
+  return lzf_decompress(src_data, src_size, dst_data, dst_size);
+}
diff --git a/src/im_convertbitmap.cpp b/src/im_convertbitmap.cpp
new file mode 100644
index 0000000..c9570d7
--- /dev/null
+++ b/src/im_convertbitmap.cpp
@@ -0,0 +1,121 @@
+/** \file
+ * \brief Image Conversion
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_convertbitmap.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_util.h"
+#include "im_complex.h"
+#include "im_image.h"
+#include "im_convert.h"
+#include "im_counter.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+int imConvertToBitmap(const imImage* src_image, imImage* dst_image, int cpx2real, float gamma, int abssolute, int cast_mode)
+{
+  assert(src_image);
+  assert(dst_image);
+
+  if (!imImageMatchSize(src_image, dst_image) || !imImageIsBitmap(dst_image))
+    return IM_ERR_DATA;
+
+  int counter = imCounterBegin("Building Bitmap");
+
+  int ret;
+  if (src_image->data_type == IM_BYTE)
+  {
+    // NO data type conversion, only color mode conversion
+    ret = imConvertColorSpace(src_image, dst_image);
+  }
+  else
+  {
+    if (src_image->color_space == IM_RGB || 
+        src_image->color_space == IM_GRAY)
+    {
+      // data type conversion, but NO color mode conversion
+      ret = imConvertDataType(src_image, dst_image, cpx2real, gamma, abssolute, cast_mode);
+    }
+    else
+    {
+      // data type conversion AND color mode conversion
+      imImage* temp_image = imImageCreate(src_image->width, src_image->height, dst_image->color_space, src_image->data_type);
+      if (!temp_image)
+        ret = IM_ERR_MEM;
+      else
+      {
+        // first convert color_mode in the bigger precision
+        ret = imConvertColorSpace(src_image, temp_image);
+        if (ret == IM_ERR_NONE)
+        {
+          // second just convert data type
+          ret = imConvertDataType(temp_image, dst_image, cpx2real, gamma, abssolute, cast_mode);
+        }
+        imImageDestroy(temp_image);
+      }
+    }
+  }
+
+  imCounterEnd(counter);
+  return ret;
+}
+
+
+template <class T>
+void iDoChangePacking(const T* src_data, T* dst_data, int width, int height, int depth, 
+                             int src_is_packed)
+{
+  int count = width*height;
+  if (src_is_packed)
+  {
+    for (int i = 0; i < count; i++)
+    {
+      for (int d = 0; d < depth; d++)
+      {
+        *(dst_data + d*count) = *src_data++;
+      }
+
+      dst_data++;
+    }
+  }
+  else
+  {
+    for (int i = 0; i < count; i++)
+    {
+      for (int d = 0; d < depth; d++)
+      {
+        *dst_data++ = *(src_data + d*count);
+      }
+
+      src_data++;
+    }
+  }
+}
+
+void imConvertPacking(const void* src_data, void* dst_data, int width, int height, int depth, 
+                      int data_type, int src_is_packed)
+{
+  switch(data_type)
+  {
+  case IM_BYTE:
+    iDoChangePacking((const imbyte*)src_data, (imbyte*)dst_data, width, height, depth, src_is_packed); 
+    break;
+  case IM_USHORT:
+    iDoChangePacking((const imushort*)src_data, (imushort*)dst_data, width, height, depth, src_is_packed); 
+    break;
+  case IM_INT:
+    iDoChangePacking((const int*)src_data, (int*)dst_data, width, height, depth, src_is_packed); 
+    break;
+  case IM_FLOAT:
+    iDoChangePacking((const float*)src_data, (float*)dst_data, width, height, depth, src_is_packed); 
+    break;
+  case IM_CFLOAT:
+    iDoChangePacking((const imcfloat*)src_data, (imcfloat*)dst_data, width, height, depth, src_is_packed); 
+    break;
+  }
+}
+
diff --git a/src/im_convertcolor.cpp b/src/im_convertcolor.cpp
new file mode 100644
index 0000000..4068b94
--- /dev/null
+++ b/src/im_convertcolor.cpp
@@ -0,0 +1,883 @@
+/** \file
+ * \brief Image Conversion
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_convertcolor.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_util.h"
+#include "im_complex.h"
+#include "im_image.h"
+#include "im_convert.h"
+#include "im_color.h"
+#include "im_counter.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+void imConvertMapToRGB(unsigned char* data, int count, int depth, int packed, long* palette, int palette_count)
+{
+  int c, i, delta;
+  unsigned char r[256], g[256], b[256];
+  unsigned char *r_data, *g_data, *b_data;
+
+  unsigned char* src_data = data + count-1;
+  if (packed)
+  {
+    r_data = data + depth*(count-1);
+    g_data = r_data + 1;
+    b_data = r_data + 2;
+    delta = depth;
+  }
+  else
+  {
+    r_data = data +   count - 1;
+    g_data = data + 2*count - 1;
+    b_data = data + 3*count - 1;
+    delta = 1;
+  }
+
+  for (c = 0; c < palette_count; c++)
+    imColorDecode(&r[c], &g[c], &b[c], palette[c]);
+
+  for (i = 0; i < count; i++)
+  {
+    int index = *src_data;
+    *r_data = r[index];
+    *g_data = g[index];
+    *b_data = b[index];
+
+    r_data -= delta;
+    g_data -= delta;
+    b_data -= delta;
+    src_data--;
+  }
+}
+
+// convert bin2gray and gray2bin
+inline void iConvertBinary(imbyte* map, int count, imbyte value)
+{
+  imbyte thres = (value == 255)? 1: 128;
+
+  // if gray2bin, check for invalid gray that already is binary
+  if (value != 255)
+  {
+    imbyte vmax = 0, *pmap = map;
+    for (int i = 0; i < count; i++)
+    {
+      if (*pmap > vmax)
+        vmax = *pmap;
+
+      pmap++;
+    }
+
+    if (vmax == 1)
+      thres = 1;
+    else
+      thres = vmax / 2;
+  }
+
+  for (int i = 0; i < count; i++)
+  {
+    if (*map >= thres)
+      *map = value;
+    else
+      *map = 0;
+
+    map++;
+  }
+}
+
+static void iConvertMap2Gray(const imbyte* src_map, imbyte* dst_map, int count, const long* palette, const int palette_count)
+{
+  imbyte r, g, b;
+  imbyte remap[256];
+
+  for (int c = 0; c < palette_count; c++)
+  {
+    imColorDecode(&r, &g, &b, palette[c]);
+    remap[c] = imColorRGB2Luma(r, g, b);
+  }
+
+  for (int i = 0; i < count; i++)
+  {
+    *dst_map++ = remap[*src_map++];
+  }
+}
+
+static void iConvertMapToRGB(const imbyte* src_map, imbyte* red, imbyte* green, imbyte* blue, int count, const long* palette, const int palette_count)
+{
+  imbyte r[256], g[256], b[256];
+  for (int c = 0; c < palette_count; c++)
+    imColorDecode(&r[c], &g[c], &b[c], palette[c]);
+
+  for (int i = 0; i < count; i++)
+  {
+    int index = *src_map++;
+    *red++ = r[index];
+    *green++ = g[index];
+    *blue++ = b[index];
+  }
+}
+
+template <class T> 
+int iDoConvert2Gray(int count, int data_type, 
+                           const T** src_data, int src_color_space, T** dst_data, int counter)
+{
+  int i;
+  T max;
+
+  const T* src_map0 = src_data[0];
+  const T* src_map1 = src_data[1];
+  const T* src_map2 = src_data[2];
+  const T* src_map3 = (src_color_space == IM_CMYK)? src_data[3]: 0;
+  T* dst_map = dst_data[0];
+
+  imCounterTotal(counter, count, "Converting To Gray...");
+
+  switch(src_color_space)
+  {
+  case IM_XYZ: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // scale to 0-1
+      float c1 = imColorReconstruct(*src_map1++, max);  // use only Y component
+
+      // do gamma correction then scale back to 0-max
+      *dst_map++ = imColorQuantize(imColorTransfer2Nonlinear(c1), max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_CMYK: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      T r, g, b;
+      // result is still 0-max
+      imColorCMYK2RGB(*src_map0++, *src_map1++, *src_map2++, *src_map3++, r, g, b, max);
+      *dst_map++ = imColorRGB2Luma(r, g, b);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_RGB:
+    for (i = 0; i < count; i++)
+    {
+      *dst_map++ = imColorRGB2Luma(*src_map0++, *src_map1++, *src_map2++);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_LUV:
+  case IM_LAB:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+      
+      float c0 = imColorReconstruct(*src_map0++, max); // scale to 0-1
+      c0 = imColorLightness2Luminance(c0);             // do the convertion
+
+      // do gamma correction then scale back to 0-max
+      *dst_map++ = imColorQuantize(imColorTransfer2Nonlinear(c0), max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+int iDoConvert2RGB(int count, int data_type, 
+                          const T** src_data, int src_color_space, T** dst_data, int counter)
+{
+  int i;
+  T max, zero;
+
+  const T* src_map0 = src_data[0];
+  const T* src_map1 = src_data[1];
+  const T* src_map2 = src_data[2];
+  const T* src_map3 = (src_color_space == IM_CMYK)? src_data[3]: 0;
+  T* dst_map0 = dst_data[0];
+  T* dst_map1 = dst_data[1];
+  T* dst_map2 = dst_data[2];
+
+  imCounterTotal(counter, count, "Converting To RGB...");
+
+  switch(src_color_space)
+  {
+  case IM_XYZ: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max);
+      float c2 = imColorReconstruct(*src_map2++, max);
+
+      // result is still 0-1
+      imColorXYZ2RGB(c0, c1, c2, 
+                     c0, c1, c2, 1.0f);
+
+      // do gamma correction then scale back to 0-max
+      *dst_map0++ = imColorQuantize(imColorTransfer2Nonlinear(c0), max);
+      *dst_map1++ = imColorQuantize(imColorTransfer2Nonlinear(c1), max);
+      *dst_map2++ = imColorQuantize(imColorTransfer2Nonlinear(c2), max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_YCBCR: 
+    max = (T)imColorMax(data_type);
+    zero = (T)imColorZero(data_type);
+    for (i = 0; i < count; i++)
+    {
+      imColorYCbCr2RGB(*src_map0++, *src_map1++, *src_map2++, 
+                       *dst_map0++, *dst_map1++, *dst_map2++, zero, max);
+    }
+    break;
+  case IM_CMYK: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // result is still 0-max
+      imColorCMYK2RGB(*src_map0++, *src_map1++, *src_map2++, *src_map3++, 
+                      *dst_map0++, *dst_map1++, *dst_map2++, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_LUV:
+  case IM_LAB:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max) - 0.5f;
+      float c2 = imColorReconstruct(*src_map2++, max) - 0.5f;
+
+      if (src_color_space == IM_LUV)
+        imColorLuv2XYZ(c0, c1, c2,  // conversion in-place
+                       c0, c1, c2);
+      else
+        imColorLab2XYZ(c0, c1, c2,  // conversion in-place
+                       c0, c1, c2);
+
+      imColorXYZ2RGB(c0, c1, c2,    // conversion in-place
+                     c0, c1, c2, 1.0f);
+
+      // do gamma correction then scale back to 0-max
+      *dst_map0++ = imColorQuantize(imColorTransfer2Nonlinear(c0), max);
+      *dst_map1++ = imColorQuantize(imColorTransfer2Nonlinear(c1), max);
+      *dst_map2++ = imColorQuantize(imColorTransfer2Nonlinear(c2), max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+int iDoConvert2YCbCr(int count, int data_type, 
+                            const T** src_data, int src_color_space, T** dst_data, int counter)
+{
+  int i;
+  T zero;
+
+  const T* src_map0 = src_data[0];
+  const T* src_map1 = src_data[1];
+  const T* src_map2 = src_data[2];
+  T* dst_map0 = dst_data[0];
+  T* dst_map1 = dst_data[1];
+  T* dst_map2 = dst_data[2];
+
+  imCounterTotal(counter, count, "Converting To YCbCr...");
+
+  switch(src_color_space)
+  {
+  case IM_RGB: 
+    zero = (T)imColorZero(data_type);
+    for (i = 0; i < count; i++)
+    {
+      imColorRGB2YCbCr(*src_map0++, *src_map1++, *src_map2++, 
+                       *dst_map0++, *dst_map1++, *dst_map2++, zero);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+int iDoConvert2XYZ(int count, int data_type, 
+                          const T** src_data, int src_color_space, T** dst_data, int counter)
+{
+  int i;
+  T max;
+
+  const T* src_map0 = src_data[0];
+  const T* src_map1 = (src_color_space == IM_GRAY)? 0: src_data[1];
+  const T* src_map2 = (src_color_space == IM_GRAY)? 0: src_data[2];
+  T* dst_map0 = dst_data[0];
+  T* dst_map1 = dst_data[1];
+  T* dst_map2 = dst_data[2];
+
+  imCounterTotal(counter, count, "Converting To XYZ...");
+
+  switch(src_color_space)
+  {
+  case IM_GRAY: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+
+      // do gamma correction
+      c0 = imColorTransfer2Linear(c0);
+
+      // then scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0*0.9505f, max);    // Compensate D65 white point
+      *dst_map1++ = imColorQuantize(c0, max);
+      *dst_map2++ = imColorQuantize(c0*1.0890f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_RGB: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max);
+      float c2 = imColorReconstruct(*src_map2++, max);
+
+      // do gamma correction
+      c0 = imColorTransfer2Linear(c0);
+      c1 = imColorTransfer2Linear(c1);
+      c2 = imColorTransfer2Linear(c2);
+
+      // result is still 0-1
+      imColorRGB2XYZ(c0, c1, c2, 
+                     c0, c1, c2);
+
+      // then scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1, max);
+      *dst_map2++ = imColorQuantize(c2, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_LUV:
+  case IM_LAB:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max) - 0.5f;
+      float c2 = imColorReconstruct(*src_map2++, max) - 0.5f;
+
+      if (src_color_space == IM_LUV)
+        imColorLuv2XYZ(c0, c1, c2,  // convertion in-place
+                       c0, c1, c2);
+      else
+        imColorLab2XYZ(c0, c1, c2,  // convertion in-place
+                       c0, c1, c2);
+
+      // scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1, max);
+      *dst_map2++ = imColorQuantize(c2, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+int iDoConvert2Lab(int count, int data_type, 
+                          const T** src_data, int src_color_space, T** dst_data, int counter)
+{
+  int i;
+  T max;
+
+  const T* src_map0 = src_data[0];
+  const T* src_map1 = (src_color_space == IM_GRAY)? 0: src_data[1];
+  const T* src_map2 = (src_color_space == IM_GRAY)? 0: src_data[2];
+  T* dst_map0 = dst_data[0];
+  T* dst_map1 = dst_data[1];
+  T* dst_map2 = dst_data[2];
+
+  imCounterTotal(counter, count, "Converting To Lab...");
+
+  switch(src_color_space)
+  {
+  case IM_GRAY: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+
+      // do gamma correction
+      c0 = imColorTransfer2Linear(c0);
+
+      // do conversion
+      c0 = imColorLuminance2Lightness(c0);
+
+      // then scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);  // update only the L component
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_RGB: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max);
+      float c2 = imColorReconstruct(*src_map2++, max);
+
+      // do gamma correction
+      c0 = imColorTransfer2Linear(c0);
+      c1 = imColorTransfer2Linear(c1);
+      c2 = imColorTransfer2Linear(c2);
+
+      imColorRGB2XYZ(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      imColorXYZ2Lab(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      // then scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1 + 0.5f, max);
+      *dst_map2++ = imColorQuantize(c2 + 0.5f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_XYZ:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max);
+      float c2 = imColorReconstruct(*src_map2++, max);
+
+      imColorXYZ2Lab(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      // scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1 + 0.5f, max);
+      *dst_map2++ = imColorQuantize(c2 + 0.5f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_LUV:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max) - 0.5f;
+      float c2 = imColorReconstruct(*src_map2++, max) - 0.5f;
+
+      imColorLuv2XYZ(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+      imColorXYZ2Lab(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      // scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1 + 0.5f, max);
+      *dst_map2++ = imColorQuantize(c2 + 0.5f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+int iDoConvert2Luv(int count, int data_type, 
+                          const T** src_data, int src_color_space, T** dst_data, int counter)
+{
+  int i;
+  T max;
+
+  const T* src_map0 = src_data[0];
+  const T* src_map1 = (src_color_space == IM_GRAY)? 0: src_data[1];
+  const T* src_map2 = (src_color_space == IM_GRAY)? 0: src_data[2];
+  T* dst_map0 = dst_data[0];
+  T* dst_map1 = dst_data[1];
+  T* dst_map2 = dst_data[2];
+
+  imCounterTotal(counter, count, "Converting To Luv...");
+
+  switch(src_color_space)
+  {
+  case IM_GRAY: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+
+      // do gamma correction
+      c0 = imColorTransfer2Linear(c0);
+
+      // do conversion
+      c0 = imColorLuminance2Lightness(c0);
+
+      // then scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);  // update only the L component
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_RGB: 
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+
+      // scale to 0-1
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max);
+      float c2 = imColorReconstruct(*src_map2++, max);
+
+      // do gamma correction
+      c0 = imColorTransfer2Linear(c0);
+      c1 = imColorTransfer2Linear(c1);
+      c2 = imColorTransfer2Linear(c2);
+
+      imColorRGB2XYZ(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      imColorXYZ2Luv(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      // then scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1 + 0.5f, max);
+      *dst_map2++ = imColorQuantize(c2 + 0.5f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_XYZ:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max);
+      float c2 = imColorReconstruct(*src_map2++, max);
+
+      imColorXYZ2Luv(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      // scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1 + 0.5f, max);
+      *dst_map2++ = imColorQuantize(c2 + 0.5f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  case IM_LAB:
+    max = (T)imColorMax(data_type);
+    for (i = 0; i < count; i++)
+    {
+      // to increase precision do intermediate conversions in float
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(*src_map0++, max);
+      float c1 = imColorReconstruct(*src_map1++, max) - 0.5f;
+      float c2 = imColorReconstruct(*src_map2++, max) - 0.5f;
+
+      imColorLab2XYZ(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+      imColorXYZ2Luv(c0, c1, c2,  // convertion in-place
+                     c0, c1, c2);
+
+      // scale back to 0-max
+      *dst_map0++ = imColorQuantize(c0, max);
+      *dst_map1++ = imColorQuantize(c1 + 0.5f, max);
+      *dst_map2++ = imColorQuantize(c2 + 0.5f, max);
+
+      if (!imCounterInc(counter))
+        return IM_ERR_COUNTER;
+    }
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+int iDoConvertColorSpace(int count, int data_type, 
+                                 const T** src_data, int src_color_space, 
+                                       T** dst_data, int dst_color_space)
+{
+  int ret = IM_ERR_DATA, 
+      convert2rgb = 0;
+
+  if ((dst_color_space == IM_XYZ ||
+       dst_color_space == IM_LAB ||
+       dst_color_space == IM_LUV) && 
+      (src_color_space == IM_CMYK ||
+       src_color_space == IM_YCBCR))
+  {
+    convert2rgb = 1;
+  }    
+
+  if (dst_color_space == IM_YCBCR && src_color_space != IM_RGB)
+    convert2rgb = 1;
+
+  int counter = imCounterBegin("Convert Color Space");
+
+  if (convert2rgb)
+  {
+    ret = iDoConvert2RGB(count, data_type, src_data, src_color_space, dst_data, counter);     
+
+    if (ret != IM_ERR_NONE) 
+    {
+      imCounterEnd(counter);
+      return ret;
+    }
+
+    src_data = (const T**)dst_data;
+    src_color_space = IM_RGB;
+  }
+
+  switch(dst_color_space)
+  {
+  case IM_GRAY: 
+    ret = iDoConvert2Gray(count, data_type, src_data, src_color_space, dst_data, counter);
+    break;
+  case IM_RGB: 
+    ret = iDoConvert2RGB(count, data_type, src_data, src_color_space, dst_data, counter);
+    break;
+  case IM_YCBCR: 
+    ret = iDoConvert2YCbCr(count, data_type, src_data, src_color_space, dst_data, counter); 
+    break;
+  case IM_XYZ: 
+    ret = iDoConvert2XYZ(count, data_type, src_data, src_color_space, dst_data, counter);
+    break;
+  case IM_LAB: 
+    ret = iDoConvert2Lab(count, data_type, src_data, src_color_space, dst_data, counter);
+    break;
+  case IM_LUV: 
+    ret = iDoConvert2Luv(count, data_type, src_data, src_color_space, dst_data, counter);
+    break;
+  default:
+    ret = IM_ERR_DATA;
+    break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static int iConvertColorSpace(const imImage* src_image, imImage* dst_image)
+{
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    return iDoConvertColorSpace(src_image->count, src_image->data_type,
+                         (const imbyte**)src_image->data, src_image->color_space, 
+                               (imbyte**)dst_image->data, dst_image->color_space);
+  case IM_USHORT:
+    return iDoConvertColorSpace(src_image->count, src_image->data_type, 
+                         (const imushort**)src_image->data, src_image->color_space, 
+                               (imushort**)dst_image->data, dst_image->color_space);
+  case IM_INT:
+    return iDoConvertColorSpace(src_image->count, src_image->data_type,
+                         (const int**)src_image->data, src_image->color_space, 
+                               (int**)dst_image->data, dst_image->color_space);
+  case IM_FLOAT:
+    return iDoConvertColorSpace(src_image->count, src_image->data_type, 
+                         (const float**)src_image->data, src_image->color_space, 
+                               (float**)dst_image->data, dst_image->color_space);
+  case IM_CFLOAT:
+    /* treat complex as two real values */
+    return iDoConvertColorSpace(2*src_image->count, src_image->data_type,
+                         (const float**)src_image->data, src_image->color_space, 
+                               (float**)dst_image->data, dst_image->color_space);
+  }
+
+  return IM_ERR_DATA;
+}
+
+int imConvertColorSpace(const imImage* src_image, imImage* dst_image)
+{
+  assert(src_image);
+  assert(dst_image);
+
+  if (!imImageMatchDataType(src_image, dst_image))
+    return IM_ERR_DATA;
+
+  if (src_image->color_space == dst_image->color_space)
+    return IM_ERR_DATA;
+
+  switch(dst_image->color_space)
+  {
+  case IM_RGB:
+    switch(src_image->color_space)
+    {
+    case IM_BINARY:
+        memcpy(dst_image->data[0], src_image->data[0], dst_image->plane_size);
+        iConvertBinary((imbyte*)dst_image->data[0], dst_image->count, 255);
+        memcpy(dst_image->data[1], dst_image->data[0], dst_image->plane_size);
+        memcpy(dst_image->data[2], dst_image->data[0], dst_image->plane_size);
+      return IM_ERR_NONE;
+    case IM_MAP:
+      iConvertMapToRGB((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], (imbyte*)dst_image->data[1], (imbyte*)dst_image->data[2], dst_image->count, src_image->palette, src_image->palette_count);
+      return IM_ERR_NONE;
+    case IM_GRAY:
+        memcpy(dst_image->data[0], src_image->data[0], dst_image->plane_size);
+        memcpy(dst_image->data[1], src_image->data[0], dst_image->plane_size);
+        memcpy(dst_image->data[2], src_image->data[0], dst_image->plane_size);
+      return IM_ERR_NONE;
+    default: 
+      return iConvertColorSpace(src_image, dst_image);
+    }
+  case IM_GRAY:  
+    switch(src_image->color_space)
+    {
+    case IM_BINARY:
+      memcpy(dst_image->data[0], src_image->data[0], dst_image->size);
+      iConvertBinary((imbyte*)dst_image->data[0], dst_image->count, 255);
+      return IM_ERR_NONE;
+    case IM_MAP:
+      iConvertMap2Gray((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], dst_image->count, src_image->palette, src_image->palette_count);
+      return IM_ERR_NONE;
+    case IM_YCBCR: 
+      memcpy(dst_image->data[0], src_image->data[0], dst_image->plane_size);
+      return IM_ERR_NONE;
+    default:
+      return iConvertColorSpace(src_image, dst_image);
+    }
+  case IM_MAP:   
+    switch(src_image->color_space)
+    {
+    case IM_BINARY: // no break, same procedure as gray
+    case IM_GRAY:
+      memcpy(dst_image->data[0], src_image->data[0], dst_image->size);
+      dst_image->palette_count = src_image->palette_count;
+      memcpy(dst_image->palette, src_image->palette, dst_image->palette_count*sizeof(long));
+      return IM_ERR_NONE;
+    case IM_RGB:
+      dst_image->palette_count = 256;
+      return imConvertRGB2Map(src_image->width, src_image->height, 
+                             (imbyte*)src_image->data[0], (imbyte*)src_image->data[1], (imbyte*)src_image->data[2], 
+                             (imbyte*)dst_image->data[0], dst_image->palette, &dst_image->palette_count);
+    default:
+      return IM_ERR_DATA;
+    }
+  case IM_BINARY:
+    switch(src_image->color_space)
+    {
+    case IM_GRAY:
+      memcpy(dst_image->data[0], src_image->data[0], dst_image->size);
+      iConvertBinary((imbyte*)dst_image->data[0], dst_image->count, 1);
+      return IM_ERR_NONE;
+    case IM_MAP:           // convert to gray, then convert to binary
+      iConvertMap2Gray((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], dst_image->count, src_image->palette, src_image->palette_count);
+      iConvertBinary((imbyte*)dst_image->data[0], dst_image->count, 1);
+      return IM_ERR_NONE;
+    case IM_YCBCR:         // convert to gray, then convert to binary
+      memcpy(dst_image->data[0], src_image->data[0], dst_image->plane_size);
+      iConvertBinary((imbyte*)dst_image->data[0], dst_image->count, 1);
+      return IM_ERR_NONE;
+    default:               // convert to gray, then convert to binary
+      {
+        dst_image->color_space = IM_GRAY;
+        int ret = iConvertColorSpace(src_image, dst_image);
+        dst_image->color_space = IM_BINARY;
+        if (ret != IM_ERR_NONE) return ret;
+        iConvertBinary((imbyte*)dst_image->data[0], dst_image->count, 1);
+        return IM_ERR_NONE;
+      }
+    }
+  case IM_YCBCR: 
+    switch(src_image->color_space)
+    {
+    case IM_GRAY:
+      memcpy(dst_image->data[0], src_image->data[0], dst_image->plane_size);
+      return IM_ERR_NONE;
+    default:
+      return iConvertColorSpace(src_image, dst_image);
+    }
+  default: 
+    return iConvertColorSpace(src_image, dst_image);
+  }
+}
diff --git a/src/im_converttype.cpp b/src/im_converttype.cpp
new file mode 100644
index 0000000..c8816ef
--- /dev/null
+++ b/src/im_converttype.cpp
@@ -0,0 +1,551 @@
+/** \file
+ * \brief Image Data Type Conversion
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_converttype.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_util.h"
+#include "im_complex.h"
+#include "im_image.h"
+#include "im_convert.h"
+#include "im_color.h"
+#include "im_counter.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <memory.h>
+
+
+/* IMPORTANT: leave template functions not "static" 
+   because of some weird compiler bizarre errors. 
+   Report on AIX C++.
+*/
+
+/* if gamma is applied then factor contains two conversions
+   one for applying gamma,
+   and other for normal destiny conversion to dst_min-dst_max range.
+   because gamma(0) = 0
+     For EXP: gamma(x) = (e^(g*x))-1       
+     For LOG: gamma(x) = log((g*x)+1)      
+   because gamma(1) = 1
+     gfactor = exp(g)-1
+     gfactor = log(g+1)
+*/
+
+inline float iGammaFactor(float range, float gamma)
+{
+  if (gamma == 0)
+    return range;
+  else if (gamma < 0)
+    return range/float(log((-gamma) + 1));
+  else
+    return range/float(exp(gamma) - 1);
+}
+
+inline float iGammaFunc(float factor, float min, float gamma, float value)
+{
+  // Here  0<value<1   (always)
+  if (gamma != 0)
+  {
+    if (gamma < 0)
+      value = log(value*(-gamma) + 1);
+    else
+      value = exp(value*gamma) - 1;
+  }
+
+  return factor*value + min;
+}
+
+inline int iIntMin()
+{
+  return (int)(-8388608.0f);
+}
+inline int iIntMax()
+{
+  return (int)(+8388607.0f);
+}
+
+/// Generic Abssolute
+template <class T>
+inline T iAbs(const T& v)
+{
+  if (v <= 0)
+    return -1*v;
+  return v;
+}
+
+template <class T> 
+inline void iDataTypeIntMax(T& max)
+{
+  int size_of = sizeof(T);
+  int data_type = (size_of == 1)? IM_BYTE: (size_of == 2)? IM_USHORT: IM_INT;
+  max = (T)imColorMax(data_type);
+}
+
+template <class T> 
+inline void iMinMaxAbs(int count, const T *map, T& min, T& max, int abssolute)
+{
+  if (abssolute)
+    min = iAbs(*map++);
+  else
+    min = *map++;
+
+  max = min;
+
+  for (int i = 1; i < count; i++)
+  {
+    T value;
+
+    if (abssolute)
+      value = iAbs(*map++);
+    else
+      value = *map++;
+
+    if (value > max)
+      max = value;
+    else if (value < min)
+      min = value;
+  }
+
+  if (min == max)
+  {
+    max = min + 1;
+
+    if (min != 0)
+      min = min - 1;
+  }
+}
+
+template <class SRCT, class DSTT> 
+int iCopy(int count, const SRCT *src_map, DSTT *dst_map)
+{
+  for (int i = 0; i < count; i++)
+  {
+    *dst_map++ = (DSTT)(*src_map++);
+  }
+
+  return IM_ERR_NONE;
+}
+  
+template <class SRCT, class DSTT> 
+int iCopyCrop(int count, const SRCT *src_map, DSTT *dst_map, int abssolute)
+{
+  SRCT value;
+  DSTT dst_max;
+  iDataTypeIntMax(dst_max);
+
+  for (int i = 0; i < count; i++)
+  {
+    if (abssolute)
+      value = iAbs(*src_map++);
+    else
+      value = *src_map++;
+
+    if (value > dst_max)
+      value = (SRCT)dst_max;
+
+    if (!(value >= 0))
+      value = 0;
+
+    *dst_map++ = (DSTT)(value);
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class SRCT> 
+int iPromote2Cpx(int count, const SRCT *src_map, imcfloat *dst_map)
+{
+  for (int i = 0; i < count; i++)
+  {
+    dst_map->real = (float)(*src_map++);
+    dst_map++;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class SRCT, class DSTT> 
+int iConvertInt2Int(int count, const SRCT *src_map, DSTT *dst_map, int abssolute, int cast_mode, int counter)
+{
+  SRCT min, max;
+
+  if (cast_mode == IM_CAST_MINMAX)
+  {
+    iMinMaxAbs(count, src_map, min, max, abssolute);
+
+    if (min >= 0 && max <= 255)
+    {
+      min = 0;
+      max = 255;
+    }
+  }
+  else
+  {
+    min = 0;
+    iDataTypeIntMax(max);
+  }
+
+  DSTT dst_max;
+  iDataTypeIntMax(dst_max);
+
+  float factor = ((float)dst_max + 1.0f) / ((float)max - (float)min + 1.0f);
+
+  for (int i = 0; i < count; i++)
+  {
+    SRCT value;
+    if (abssolute)
+      value = iAbs(*src_map++);
+    else
+      value = *src_map++;
+
+    if (value >= max)
+      *dst_map++ = dst_max;
+    else if (value <= min)
+      *dst_map++ = 0;
+    else
+      *dst_map++ = (DSTT)imResample(value - min, factor);
+
+    if (!imCounterInc(counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class SRCT> 
+int iPromoteInt2Real(int count, const SRCT *src_map, float *dst_map, float gamma, int abssolute, int cast_mode, int counter)
+{
+  SRCT min, max;
+
+  if (cast_mode == IM_CAST_MINMAX)
+  {
+    iMinMaxAbs(count, src_map, min, max, abssolute);
+
+    if (min >= 0 && max <= 255)
+    {
+      min = 0;
+      max = 255;
+    }
+  }
+  else
+  {
+    min = 0;
+    iDataTypeIntMax(max);
+
+    if (max == 16777215 && !abssolute)  /* IM_INT */
+    {
+      min = (SRCT)iIntMin();
+      max = (SRCT)iIntMax();
+    }
+  }
+
+  float range = float(max - min + 1);
+  float dst_min = 0.0f;
+  float dst_max = 1.0f;
+  int size_of = sizeof(SRCT);
+  if (size_of == 4 && !abssolute)
+  {
+    dst_min = -0.5f;
+    dst_max = +0.5f;
+  }
+
+  gamma = -gamma; // gamma is inverted here, because we are promoting int2real
+  float factor = iGammaFactor(1.0f, gamma);
+
+  for (int i = 0; i < count; i++)
+  {
+    float fvalue;
+    if (abssolute)
+      fvalue = (iAbs(*src_map++) - min + 0.5f)/range; 
+    else
+      fvalue = (*src_map++ - min + 0.5f)/range; 
+
+    // Now 0 <= value <= 1 (if min-max are correct)
+
+    if (fvalue >= 1)
+      *dst_map++ = dst_max;
+    else if (fvalue <= 0)
+      *dst_map++ = dst_min;
+    else
+      *dst_map++ = iGammaFunc(factor, dst_min, gamma, fvalue);
+
+    if (!imCounterInc(counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+template <class DSTT> 
+int iDemoteReal2Int(int count, const float *src_map, DSTT *dst_map, float gamma, int abssolute, int cast_mode, int counter)
+{
+  float min, max;
+
+  DSTT dst_min = 0, dst_max;
+  iDataTypeIntMax(dst_max);
+  if (dst_max == 16777215 && !abssolute)  /* IM_INT */
+  {
+    dst_min = (DSTT)iIntMin();
+    dst_max = (DSTT)iIntMax();
+  }
+
+  if (cast_mode == IM_CAST_MINMAX)
+    iMinMaxAbs(count, src_map, min, max, abssolute);
+  else
+  {
+    min = 0;
+    max = 1;
+  }
+
+  int dst_range = dst_max - dst_min + 1;
+  float range = max - min;
+
+  float factor = iGammaFactor((float)dst_range, gamma);
+
+  for (int i = 0; i < count; i++)
+  {
+    float value;
+    if (abssolute)
+      value = ((float)iAbs(*src_map++) - min)/range; 
+    else
+      value = (*src_map++ - min)/range; 
+
+    // Now 0 <= value <= 1 (if min-max are correct)
+
+    if (value >= 1)
+      *dst_map++ = dst_max;
+    else if (value <= 0)
+      *dst_map++ = dst_min;
+    else
+    {
+      value = iGammaFunc(factor, (float)dst_min, gamma, value);
+      if (value >= dst_max)
+        *dst_map++ = dst_max;
+      else if (value <= dst_min)
+        *dst_map++ = dst_min;
+      else
+        *dst_map++ = (DSTT)imRound(value - 0.5f);
+    }
+
+    if (!imCounterInc(counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int iDemoteCpx2Real(int count, const imcfloat* src_map, float *dst_map, int cpx2real)
+{
+  float (*CpxCnv)(const imcfloat& cpx) = NULL;
+
+  switch(cpx2real)
+  {
+  case IM_CPX_REAL:  CpxCnv = cpxreal; break;
+  case IM_CPX_IMAG:  CpxCnv = cpximag; break;
+  case IM_CPX_MAG:   CpxCnv = cpxmag; break;
+  case IM_CPX_PHASE: CpxCnv = cpxphase; break;
+  }
+
+  for (int i = 0; i < count; i++)
+  {
+    *dst_map++ = CpxCnv(*src_map++);
+  }
+
+  return IM_ERR_NONE;
+}
+                                                                     
+template <class DSTT> 
+int iDemoteCpx2Int(int count, const imcfloat* src_map, DSTT *dst_map, int cpx2real, float gamma, int abssolute, int cast_mode, int counter)
+{
+  float* real_map = (float*)malloc(count*sizeof(float));
+  if (!real_map) return IM_ERR_MEM;
+
+  iDemoteCpx2Real(count, src_map, real_map, cpx2real);
+
+  if (iDemoteReal2Int(count, real_map, dst_map, gamma, abssolute, cast_mode, counter) != IM_ERR_NONE)
+  {
+    free(real_map);
+    return IM_ERR_COUNTER;
+  }
+
+  free(real_map);
+  return IM_ERR_NONE;
+}
+
+template <class SRCT> 
+int iPromoteInt2Cpx(int count, const SRCT* src_map, imcfloat *dst_map, float gamma, int abssolute, int cast_mode, int counter)
+{
+  float* real_map = (float*)malloc(count*sizeof(float));
+  if (!real_map) return IM_ERR_MEM;
+
+  if (iPromoteInt2Real(count, src_map, real_map, gamma, abssolute, cast_mode, counter) != IM_ERR_NONE)
+  {
+    free(real_map);
+    return IM_ERR_COUNTER;
+  }
+
+  iPromote2Cpx(count, real_map, dst_map);
+
+  free(real_map);
+  return IM_ERR_NONE;
+}
+
+int imConvertDataType(const imImage* src_image, imImage* dst_image, int cpx2real, float gamma, int abssolute, int cast_mode)
+{
+  assert(src_image);
+  assert(dst_image);
+
+  if (!imImageMatchColorSpace(src_image, dst_image))
+    return IM_ERR_DATA;
+
+  if (src_image->data_type == dst_image->data_type)
+    return IM_ERR_DATA;
+
+  int total_count = src_image->depth * src_image->count;
+  int ret = IM_ERR_DATA;
+  int counter = imCounterBegin("Convert Data Type");
+  char msg[50];
+  sprintf(msg, "Converting to %s...", imDataTypeName(dst_image->data_type));
+  imCounterTotal(counter, total_count, msg);
+
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    switch(dst_image->data_type)
+    {
+    case IM_USHORT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const imbyte*)src_image->data[0], (imushort*)dst_image->data[0]);
+      else
+        ret = iConvertInt2Int(total_count, (const imbyte*)src_image->data[0], (imushort*)dst_image->data[0], abssolute, cast_mode, counter);
+      break;
+    case IM_INT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const imbyte*)src_image->data[0], (int*)dst_image->data[0]);
+      else
+        ret = iConvertInt2Int(total_count, (const imbyte*)src_image->data[0], (int*)dst_image->data[0], abssolute, cast_mode, counter);
+      break;
+    case IM_FLOAT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const imbyte*)src_image->data[0], (float*)dst_image->data[0]);
+      else
+        ret = iPromoteInt2Real(total_count, (const imbyte*)src_image->data[0], (float*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_CFLOAT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iPromote2Cpx(total_count, (const imbyte*)src_image->data[0], (imcfloat*)dst_image->data[0]);
+      else
+        ret = iPromoteInt2Cpx(total_count, (const imbyte*)src_image->data[0], (imcfloat*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    }
+    break;
+  case IM_USHORT:
+    switch(dst_image->data_type)
+    {
+    case IM_BYTE:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopyCrop(total_count, (const imushort*)src_image->data[0], (imbyte*)dst_image->data[0], abssolute);
+      else
+        ret = iConvertInt2Int(total_count, (const imushort*)src_image->data[0], (imbyte*)dst_image->data[0], abssolute, cast_mode, counter);
+      break;
+    case IM_INT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const imushort*)src_image->data[0], (int*)dst_image->data[0]);
+      else
+        ret = iConvertInt2Int(total_count, (const imushort*)src_image->data[0], (int*)dst_image->data[0], abssolute, cast_mode, counter);
+      break;
+    case IM_FLOAT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const imushort*)src_image->data[0], (float*)dst_image->data[0]);
+      else
+        ret = iPromoteInt2Real(total_count, (const imushort*)src_image->data[0], (float*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_CFLOAT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iPromote2Cpx(total_count, (const imushort*)src_image->data[0], (imcfloat*)dst_image->data[0]);
+      else
+        ret = iPromoteInt2Cpx(total_count, (const imushort*)src_image->data[0], (imcfloat*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    }
+    break;
+  case IM_INT:
+    switch(dst_image->data_type)
+    {
+    case IM_BYTE:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopyCrop(total_count, (const int*)src_image->data[0], (imbyte*)dst_image->data[0], abssolute);
+      else
+        ret = iConvertInt2Int(total_count, (const int*)src_image->data[0], (imbyte*)dst_image->data[0], abssolute, cast_mode, counter);
+      break;
+    case IM_USHORT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopyCrop(total_count, (const int*)src_image->data[0], (imushort*)dst_image->data[0], abssolute);
+      else
+        ret = iConvertInt2Int(total_count, (const int*)src_image->data[0], (imushort*)dst_image->data[0], abssolute, cast_mode, counter);
+      break;
+    case IM_FLOAT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const int*)src_image->data[0], (float*)dst_image->data[0]);
+      else
+        ret = iPromoteInt2Real(total_count, (const int*)src_image->data[0], (float*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_CFLOAT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iPromote2Cpx(total_count, (const int*)src_image->data[0], (imcfloat*)dst_image->data[0]);
+      else
+        ret = iPromoteInt2Cpx(total_count, (const int*)src_image->data[0], (imcfloat*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    }
+    break;
+  case IM_FLOAT:
+    switch(dst_image->data_type)
+    {
+    case IM_BYTE:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopyCrop(total_count, (const float*)src_image->data[0], (imbyte*)dst_image->data[0], abssolute);
+      else
+        ret = iDemoteReal2Int(total_count, (const float*)src_image->data[0], (imbyte*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_USHORT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopyCrop(total_count, (const float*)src_image->data[0], (imushort*)dst_image->data[0], abssolute);
+      else
+        ret = iDemoteReal2Int(total_count, (const float*)src_image->data[0], (imushort*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_INT:
+      if (cast_mode == IM_CAST_DIRECT)
+        ret = iCopy(total_count, (const float*)src_image->data[0], (int*)dst_image->data[0]);
+      else
+        ret = iDemoteReal2Int(total_count, (const float*)src_image->data[0], (int*)dst_image->data[0], gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_CFLOAT:
+      ret = iPromote2Cpx(total_count, (const float*)src_image->data[0], (imcfloat*)dst_image->data[0]);
+      break;
+    }
+    break;
+  case IM_CFLOAT:
+    switch(dst_image->data_type)                                                                       
+    {
+    case IM_BYTE:
+      ret = iDemoteCpx2Int(total_count, (const imcfloat*)src_image->data[0], (imbyte*)dst_image->data[0], cpx2real, gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_USHORT:
+      ret = iDemoteCpx2Int(total_count, (const imcfloat*)src_image->data[0], (imushort*)dst_image->data[0], cpx2real, gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_INT:
+      ret = iDemoteCpx2Int(total_count, (const imcfloat*)src_image->data[0], (int*)dst_image->data[0], cpx2real, gamma, abssolute, cast_mode, counter);
+      break;
+    case IM_FLOAT:
+      ret = iDemoteCpx2Real(total_count, (const imcfloat*)src_image->data[0], (float*)dst_image->data[0], cpx2real);
+      break;
+    }
+    break;
+  }
+
+  imCounterEnd(counter);
+  return ret;
+}
diff --git a/src/im_counter.cpp b/src/im_counter.cpp
new file mode 100644
index 0000000..8c5cd5c
--- /dev/null
+++ b/src/im_counter.cpp
@@ -0,0 +1,151 @@
+/** \file
+ * \brief Processing Counter
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_counter.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_counter.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+
+static imCounterCallback iCounterFunc = NULL;
+static void* iCounterUserData = NULL;
+
+imCounterCallback imCounterSetCallback(void* user_data, imCounterCallback counter_func)
+{
+  imCounterCallback old_counter_func = iCounterFunc;
+  iCounterFunc = counter_func;
+  if (user_data)
+    iCounterUserData = user_data;
+  return old_counter_func;
+}
+
+struct iCounter
+{
+  int total;
+  int current;
+  int sequence;
+  const char* message;
+};
+
+#define MAX_COUNTERS 10
+static iCounter iCounterList[MAX_COUNTERS];
+
+int imCounterBegin(const char* title)
+{
+  static int first = 1;
+  if (first)
+  {
+    memset(iCounterList, 0, MAX_COUNTERS*sizeof(iCounter));
+    first = 0;
+  }
+
+  if (!iCounterFunc) // counter management is useless
+    return -1;
+
+  int counter = -1;
+  for (int i = 0; i < MAX_COUNTERS; i++)
+  {
+    if (iCounterList[i].sequence == 0 ||  // the counter is free
+        iCounterList[i].current == 0)     // or we are in a sequence
+    {
+      counter = i;
+      break;
+    }
+  }
+
+  if (counter == -1) return -1; // too many counters
+
+  iCounter *ct = &iCounterList[counter];
+
+  ct->sequence++;
+
+  if (ct->sequence == 1) // top level counter
+    iCounterFunc(counter, iCounterUserData, title, -1);
+
+  return counter;
+}
+
+void imCounterEnd(int counter)
+{
+  if (counter == -1 || !iCounterFunc) return;               // invalid counter
+
+  iCounter *ct = &iCounterList[counter];
+
+  if (ct->sequence == 1) // top level counter
+  {
+    iCounterFunc(counter, iCounterUserData, NULL, 1001);
+    memset(ct, 0, sizeof(iCounter));
+  }
+  else
+    ct->sequence--;
+}
+
+int imCounterInc(int counter)
+{
+  if (counter == -1 || !iCounterFunc)                       // invalid counter
+    return 1;
+
+  iCounter *ct = &iCounterList[counter];
+
+  if (ct->sequence == 0 || // counter with no begin or no total
+      ct->total == 0)
+    return 1;
+
+  const char* msg = NULL;
+  if (ct->current == 0)
+    msg = ct->message;
+
+  ct->current++;
+
+  int progress = (int)((ct->current * 1000.0f)/ct->total);
+
+  if (ct->current == ct->total)
+    ct->current = 0;
+
+  return iCounterFunc(counter, iCounterUserData, msg, progress);
+}
+
+int imCounterIncTo(int counter, int count)
+{
+  if (counter == -1 || !iCounterFunc)                       // invalid counter
+    return 1;
+
+  iCounter *ct = &iCounterList[counter];
+
+  if (ct->sequence == 0 || // counter with no begin or no total
+      ct->total == 0)
+    return 1;
+
+  if (count <= 0) count = 0;
+  if (count >= ct->total) count = ct->total;
+
+  ct->current = count;
+
+  const char* msg = NULL;
+  if (ct->current == 0)
+    msg = ct->message;
+
+  int progress = (int)((ct->current * 1000.0f)/ct->total);
+
+  if (ct->current == ct->total)
+    ct->current = 0;
+
+  return iCounterFunc(counter, iCounterUserData, msg, progress);
+}
+
+void imCounterTotal(int counter, int total, const char* message)
+{
+  if (counter == -1 || !iCounterFunc) return;               // invalid counter
+
+  iCounter *ct = &iCounterList[counter];
+
+  if (ct->sequence == 0) return; // counter with no begin
+
+  ct->message = message;
+  ct->total = total;
+  ct->current = 0;
+}
diff --git a/src/im_datatype.cpp b/src/im_datatype.cpp
new file mode 100644
index 0000000..c75483e
--- /dev/null
+++ b/src/im_datatype.cpp
@@ -0,0 +1,54 @@
+/** \file
+ * \brief Data Type Utilities
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_datatype.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include "im.h"
+#include "im_util.h"
+
+#include <assert.h>
+
+typedef struct _iTypeInfo
+{
+  int size;
+  unsigned long max;
+  long min;
+  char* name;
+} iTypeInfo;
+
+static iTypeInfo iTypeInfoTable[] =  
+{        
+  {1,  255,                  0,                       "byte"}, 
+  {2,  65535,                0,                       "ushort"},
+  {4,  2147483647,           -2147483647-1,           "int"},
+  {4,  0,                    0,                       "float"}, 
+  {8,  0,                    0,                       "cfloat"}
+};
+
+const char* imDataTypeName(int data_type)
+{
+  assert(data_type >= IM_BYTE && data_type <= IM_CFLOAT);
+  return iTypeInfoTable[data_type].name;
+}
+
+int imDataTypeSize(int data_type)
+{
+  assert(data_type >= IM_BYTE && data_type <= IM_CFLOAT);
+  assert(sizeof(int) == 4);
+  return iTypeInfoTable[data_type].size;
+}
+
+unsigned long imDataTypeIntMax(int data_type)
+{
+  assert(data_type >= IM_BYTE && data_type <= IM_CFLOAT);
+  return iTypeInfoTable[data_type].max;
+}
+
+long imDataTypeIntMin(int data_type)
+{
+  assert(data_type >= IM_BYTE && data_type <= IM_CFLOAT);
+  return iTypeInfoTable[data_type].min;
+}
diff --git a/src/im_dib.cpp b/src/im_dib.cpp
new file mode 100644
index 0000000..3dd6780
--- /dev/null
+++ b/src/im_dib.cpp
@@ -0,0 +1,1136 @@
+/** \file
+ * \brief Windows DIB
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_dib.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <windows.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include "im_dib.h"
+
+/*****************
+  Private Funtions
+*****************/
+
+/* Long returned in getpixel is an array of 4 bytes, not actually a DWORD */
+/* 32 bpp max */
+/* Windows use Little Endian always, this means LSB first: 0xF3F2F1F0 = "F0F1F2F3" */
+
+#define iSETDWORD(_vLong, _Line, _Nb)                    \
+        {                                                  \
+          unsigned char* _pLong = (unsigned char*)&_vLong; \
+          int b = _Nb;                                     \
+          while(b--)                                       \
+            *_pLong++ = *_Line++;                          \
+        }
+
+#define iGETDWORD(_vLong, _Line, _Nb)                    \
+        {                                                  \
+          unsigned char* _pLong = (unsigned char*)&_vLong; \
+          int b = _Nb;                                     \
+          while(b--)                                       \
+            *_Line++ = *_pLong++;                          \
+        }
+
+#define iGETDWORDMASK(_vLong, _vMask, _Line, _Nb)        \
+        {                                                  \
+          unsigned char* _pLong = (unsigned char*)&_vLong; \
+          unsigned char* _pMask = (unsigned char*)&_vMask; \
+          int b = _Nb;                                     \
+          while(b--)                                       \
+            *_Line++ = *_pLong++ | (~*_pMask++ & *_Line);  \
+        }
+
+static unsigned int iMakeBitMask(int bpp)
+{
+  unsigned int mask = 1;
+
+  while (bpp > 1)
+  {
+    mask = (mask << 1) + 1;
+    bpp--;
+  }
+
+  return mask;
+}
+
+static unsigned int iLineGetPixel1(unsigned char* line, int col)
+{
+  return (line[col / 8] >> (7 - col % 8)) & 0x01;        /* LSB is filled */
+}
+
+static void iLineSetPixel1(unsigned char* line, int col, unsigned int pixel)
+{
+  if (pixel)                                             /* only test 1/0 */
+    line[col / 8] |= (0x01 << (7 - (col % 8)));
+  else
+    line[col / 8] &= (0xFE << (7 - (col % 8)));
+}
+
+static unsigned int iLineGetPixel4(unsigned char* line, int col)
+{
+  return (line[col / 2] >> ((1 - col % 2) * 4)) & 0x0F;  /* LSB is filled */
+}
+
+static void iLineSetPixel4(unsigned char* line, int col, unsigned int pixel)
+{
+  unsigned char mask = (col % 2)? 0xF0: 0x0F;            /* LSB is used */
+  line[col/2] = (unsigned char)((mask & (((unsigned char)pixel) << ((1 - col % 2) * 4))) | (~mask & line[col/2]));
+}
+
+static unsigned int iLineGetPixel8(unsigned char* line, int col)
+{
+  return line[col];                                      /* LSB is filled */
+}
+
+static void iLineSetPixel8(unsigned char* line, int col, unsigned int pixel)
+{
+  line[col] = (unsigned char)pixel;                      /* LSB is used */
+}
+
+static unsigned int iLineGetPixel16(unsigned char* line, int col)
+{
+  return ((unsigned short*)line)[col];                   /* 0xF1F0 => "F0F10000" */
+}
+
+static void iLineSetPixel16(unsigned char* line, int col, unsigned int pixel)
+{
+  ((unsigned short*)line)[col] = (unsigned short)pixel;  /* inverse of above */
+}
+
+static unsigned int iLineGetPixel24(unsigned char* line, int col)
+{
+  unsigned int pixel = 0;
+  line += col*3;
+  iSETDWORD(pixel, line, 3);
+  return pixel;
+}
+
+static void iLineSetPixel24(unsigned char* line, int col, unsigned int pixel)
+{
+  line += col*3;
+  iGETDWORD(pixel, line, 3);
+}
+
+static unsigned int iLineGetPixel32(unsigned char* line, int col)
+{
+  return ((unsigned int*)line)[col];                    /* direct mapping */
+}
+
+static void iLineSetPixel32(unsigned char* line, int col, unsigned int pixel)
+{
+  ((unsigned int*)line)[col] = pixel;                   /* direct mapping */
+}
+
+static int iGetPixelAnyBpp = 0;
+static unsigned int iGetPixelAnyMask = 0;
+
+static unsigned int iAnyGet(unsigned char* line, int col, int bpp)
+{
+  int s_byte = (col*bpp) >> 3;
+  int s_bit = (col*bpp) & 0x7;
+  unsigned int pixel = 0;
+  unsigned int mask = (~0) >> (32-bpp);
+  int n_bytes = (bpp + s_bit + 7) >> 3;
+  int shift = (n_bytes << 3) - bpp - s_bit;
+  line += s_byte;
+  while (n_bytes)
+  {
+    pixel |= *line++;
+    if (--n_bytes > 0) pixel <<= 8;
+    else break;
+  }
+  pixel >>= shift;
+  return pixel & mask;
+}
+
+static void iAnySet(unsigned char* line, int col, int bpp, unsigned int pixel)
+{
+  int s_byte = (col*bpp) >> 3;
+  int s_bit = (col*bpp) & 0x7;
+  unsigned int mask = (~0) >> (32-bpp);
+  int n_bytes = (bpp + s_bit + 7) >> 3;
+  int shift = (n_bytes << 3) - bpp - s_bit;
+  unsigned char* p_pixel = (unsigned char*) &pixel, *p_mask = (unsigned char*) &mask;
+  line += s_byte + n_bytes - 1;
+  pixel <<= shift;
+  mask <<= shift;
+  while (n_bytes--) {
+    *line = (*line & ~(*p_mask)) | (*p_pixel & *p_mask);
+    p_mask++; p_pixel++; line--;
+  }
+}
+
+static unsigned int iLineGetPixelAny(unsigned char* line, int col)
+{
+  return iAnyGet(line, col, iGetPixelAnyBpp);
+#if 0
+  unsigned int pixel = 0;
+  int rbits  = (col * iGetPixelAnyBpp) % 8;       /* calc remaining bits */
+  line      += (col * iGetPixelAnyBpp) / 8;       /* position pointer */
+
+  /* transfer from pixel line to a DWORD in little endian, so it can be shifted */
+  {
+    int nbytes = (iGetPixelAnyBpp + rbits + 7) / 8; /* bytes used */
+    iSETDWORD(pixel, line, nbytes);
+  }
+
+  /* shift down pixel remaining bits and mask extra non pixel bits */
+  return (pixel >> rbits) & iGetPixelAnyMask;
+#endif
+}
+
+static int iSetPixelAnyBpp = 0;
+static unsigned int iSetPixelAnyMask = 0;
+
+static void iLineSetPixelAny(unsigned char* line, int col, unsigned int pixel)
+{
+  iAnySet(line, col, iSetPixelAnyBpp, pixel);
+#if 0
+  int rbits  = (col * iSetPixelAnyBpp) % 8;       /* calc remaining bits */
+  line      += (col * iSetPixelAnyBpp) / 8;       /* position pointer */
+
+  pixel = pixel << rbits; /* position bits */
+
+  {
+    unsigned int mask = iSetPixelAnyMask << rbits; /* position mask */
+    int nbytes = (iGetPixelAnyBpp + rbits + 7) / 8; /* bytes used */
+    iGETDWORDMASK(pixel, mask, line, nbytes);
+  }                      
+#endif
+}
+
+static long iQuad2Long(RGBQUAD* quad_color)
+{
+  return (((unsigned long)quad_color->rgbRed) << 16) |
+         (((unsigned long)quad_color->rgbGreen) <<  8) |
+         (((unsigned long)quad_color->rgbBlue) <<  0);
+}
+
+static RGBQUAD iLong2Quad(long long_color)
+{
+  RGBQUAD quad_color;
+  
+  quad_color.rgbRed = (unsigned char)(((long_color) >> 16) & 0xFF);
+  quad_color.rgbGreen = (unsigned char)(((long_color) >>  8) & 0xFF);
+  quad_color.rgbBlue = (unsigned char)(((long_color) >>  0) & 0xFF);
+ 
+  return quad_color;
+}
+
+static int iImageLineSize(int width, int bpp)
+{
+  return (width * bpp + 7) / 8;            /* 1 byte boundary */
+}
+
+static int iLineSize(int width, int bpp)
+{
+  return ((width * bpp + 31) / 32) * 4;   /* 4 bytes boundary */
+}
+
+static void iInitHeadersReference(imDib* dib)
+{
+  dib->bmi = (BITMAPINFO*)dib->dib;
+  dib->bmih = (BITMAPINFOHEADER*)dib->dib;
+  dib->bmic = (RGBQUAD*)(dib->dib + sizeof(BITMAPINFOHEADER));
+}
+
+static void iInitSizes(imDib* dib, int width, int height, int bpp)
+{
+  dib->line_size = iLineSize(width, bpp);
+  dib->pad_size = dib->line_size - iImageLineSize(width, bpp);
+  dib->bits_size = dib->line_size * height;
+  dib->size = sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * dib->palette_count + dib->bits_size;
+}
+
+static void iInitInfoHeader(BITMAPINFOHEADER* bmih, int width, int height, int bpp, int palette_count)
+{
+  bmih->biSize = sizeof(BITMAPINFOHEADER);
+  bmih->biWidth = width;
+  bmih->biHeight = height;
+  bmih->biPlanes = 1;
+  bmih->biBitCount = (WORD)bpp;
+  bmih->biCompression = 0;
+  bmih->biSizeImage = 0;
+  bmih->biClrUsed = palette_count;
+  bmih->biClrImportant = 0;
+
+  {
+    HDC ScreenDC = GetDC(NULL);
+
+    bmih->biXPelsPerMeter = (unsigned int)(GetDeviceCaps(ScreenDC, LOGPIXELSX) / 0.0254);
+    bmih->biYPelsPerMeter = (unsigned int)(GetDeviceCaps(ScreenDC, LOGPIXELSY) / 0.0254);
+
+    ReleaseDC(NULL, ScreenDC);
+  }
+}
+
+static void iInitBits(imDib* dib, BYTE* bits)
+{
+  if (bits == NULL)
+    dib->bits = dib->dib + sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * dib->palette_count;
+  else
+    dib->bits = bits;
+}
+
+static int iGetValidBpp(int bpp)
+{
+  if (bpp == 1) 
+    bpp = 1; 
+  else if (bpp <= 4) 
+    bpp = 4; 
+  else if (bpp <= 8) 
+    bpp = 8; 
+  else if (bpp <= 16) 
+    bpp = 16; 
+  else if (bpp <= 24) 
+    bpp = 24; 
+  else if (bpp <= 32)
+    bpp = 32; 
+  else
+    bpp = 0;
+
+  return bpp;
+}
+
+static int iCheckHeader(BITMAPINFOHEADER *bmih)
+{
+  if (bmih->biSize != sizeof(BITMAPINFOHEADER))
+    return 0;
+
+  if (bmih->biWidth <= 0)
+    return 0;
+
+  if (bmih->biHeight == 0)
+    return 0;
+
+  {
+    int bpp = iGetValidBpp(bmih->biBitCount);
+    if (!bpp)
+      return 0;
+
+    if (bmih->biCompression == BI_RLE8 && bpp != 8)
+      return 0;
+
+    if (bmih->biCompression == BI_RLE4 && bpp != 4)
+      return 0;
+
+    if (bmih->biCompression == BI_BITFIELDS && (bpp != 16 || bpp != 32))
+      return 0;
+
+    if (bmih->biHeight < 0 && (bmih->biCompression == BI_RLE8 || bmih->biCompression == BI_RLE4))
+      return 0;
+
+/*    if (bmih->biCompression == BI_JPEG || bmih->biCompression == BI_PNG)
+      return 0; */
+  }
+
+  return 1;
+}
+
+/*****************
+  Creation
+*****************/
+
+static void AllocDib(imDib* dib) 
+{
+  dib->dib = NULL;
+  dib->handle = GlobalAlloc(GMEM_MOVEABLE, dib->size); 
+  if (!dib->handle) return;
+  dib->dib = (BYTE*)GlobalLock(dib->handle); 
+}
+
+imDib* imDibCreate(int width, int height, int bpp)
+{
+  imDib* dib;
+  int obpp = bpp;
+
+  bpp = iGetValidBpp(abs(bpp));
+  
+  assert(width > 0 && height > 0);
+  assert(bpp);
+
+  dib = (imDib*)malloc(sizeof(imDib));
+
+  if (bpp > 8)
+  {
+    if ((bpp == 16 || bpp == 32) && obpp < 0)
+      dib->palette_count = 3;
+    else
+      dib->palette_count = 0;
+  }
+  else
+    dib->palette_count = 1 << bpp;
+  
+  iInitSizes(dib, width, height, bpp);
+                         
+  AllocDib(dib);
+  if (dib->dib == NULL)
+  {
+    free(dib);
+    return NULL;
+  }
+
+  iInitHeadersReference(dib);
+
+  iInitInfoHeader(dib->bmih, width, height, bpp, dib->palette_count);
+
+  iInitBits(dib, NULL);
+
+  dib->is_reference = 0;
+  
+  return dib;
+}
+
+imDib* imDibCreateSection(HDC hDC, HBITMAP *bitmap, int width, int height, int bpp)
+{
+  BITMAPINFO* bmi;
+  BYTE* bits;
+  int palette_count;
+  int obpp = bpp;
+
+  bpp = iGetValidBpp(abs(bpp));
+
+  assert(hDC);
+  assert(width > 0 && height > 0);
+  assert(bpp);
+
+  if (bpp > 8)
+  {
+    if ((bpp == 16 || bpp == 32) && obpp < 0)
+      palette_count = 3;  
+    else
+      palette_count = 0;
+  }
+  else
+    palette_count = 1 << bpp;
+
+  bmi = (BITMAPINFO*)malloc(sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * palette_count);
+
+  iInitInfoHeader(&bmi->bmiHeader, width, height, bpp, palette_count);
+
+  if (bpp > 8 && palette_count == 3)
+  {
+    DWORD *masks = (DWORD*)(bmi + sizeof(BITMAPINFOHEADER));
+    masks[0] = 0x001F;
+    masks[1] = 0x03E0;
+    masks[2] = 0x7C00;
+  }
+
+  *bitmap = CreateDIBSection(hDC, bmi, DIB_RGB_COLORS, (void**)&bits, NULL, 0);
+
+  {
+    imDib* dib;
+    dib = imDibCreateReference((BYTE*)bmi, bits);
+    dib->is_reference = 0;
+    return dib;
+  }
+}
+
+
+imDib* imDibCreateCopy(const imDib* src_dib)
+{
+  imDib* dib;
+
+  assert(src_dib);
+
+  dib = (imDib*)malloc(sizeof(imDib));
+
+  memcpy(dib, src_dib, sizeof(imDib));
+
+  AllocDib(dib);
+  if (dib->dib == NULL)
+  {
+    free(dib);
+    return NULL;
+  }
+
+  iInitHeadersReference(dib);
+
+  memcpy(dib->dib, src_dib->dib, dib->size - dib->bits_size);
+
+  iInitBits(dib, NULL);
+
+  memcpy(dib->bits, src_dib->bits, dib->bits_size);
+
+  dib->is_reference = 0;
+
+  return dib;
+}
+
+imDib* imDibCreateReference(BYTE* bmi, BYTE* bits)
+{
+  imDib* dib;
+
+  assert(bmi);
+
+  dib = (imDib*)malloc(sizeof(imDib));
+
+  dib->dib = bmi;
+  
+  iInitHeadersReference(dib);
+  
+  if (dib->bmih->biBitCount > 8)
+  {
+    dib->palette_count = 0;
+    
+    if (dib->bmih->biCompression == BI_BITFIELDS)
+      dib->palette_count = 3;
+  }
+  else
+  {
+    if (dib->bmih->biClrUsed != 0)
+      dib->palette_count = dib->bmih->biClrUsed;
+    else
+      dib->palette_count = 1 << dib->bmih->biBitCount;
+  }
+  
+  iInitBits(dib, bits);
+  
+  dib->is_reference = 1;
+
+  iInitSizes(dib, dib->bmih->biWidth, abs(dib->bmih->biHeight), dib->bmih->biBitCount);
+
+  return dib;
+}
+
+void imDibDestroy(imDib* dib)
+{
+  assert(dib);
+  if (!dib->is_reference) 
+  {
+    GlobalUnlock(dib->handle);
+    GlobalFree(dib->handle);
+  }
+  free(dib);
+}
+
+/*****************
+  Line Acess
+*****************/
+
+imDibLineGetPixel imDibLineGetPixelFunc(int bpp)
+{
+  switch(bpp)
+  {
+  case 1:
+    return &iLineGetPixel1;
+  case 4:
+    return &iLineGetPixel4;
+  case 8:
+    return &iLineGetPixel8;
+  case 16:
+    return &iLineGetPixel16;
+  case 24:
+    return &iLineGetPixel24;
+  case 32:
+    return &iLineGetPixel32;
+  default:
+    if (bpp > 32) return NULL;
+    iGetPixelAnyBpp = bpp;
+    iGetPixelAnyMask = iMakeBitMask(bpp);
+    return &iLineGetPixelAny;
+  }
+}
+
+imDibLineSetPixel imDibLineSetPixelFunc(int bpp)
+{
+  switch(bpp)
+  {
+  case 1:
+    return &iLineSetPixel1;
+  case 4:
+    return &iLineSetPixel4;
+  case 8:
+    return &iLineSetPixel8;
+  case 16:
+    return &iLineSetPixel16;
+  case 24:
+    return &iLineSetPixel24;
+  case 32:
+    return &iLineSetPixel32;
+  default:
+    if (bpp > 32) return NULL;
+    iSetPixelAnyBpp = bpp;
+    iSetPixelAnyMask = iMakeBitMask(bpp);
+    return &iLineSetPixelAny;
+  }
+}
+
+/*****************
+  DIB <-> Bitmap
+*****************/
+
+imDib* imDibFromHBitmap(const HBITMAP bitmap, const HPALETTE hPalette)
+{
+  imDib* dib;
+
+  assert(bitmap);
+
+  {
+    BITMAP bmp; 
+ 
+    if (!GetObject(bitmap, sizeof(BITMAP), (LPSTR)&bmp)) 
+      return NULL;
+ 
+    dib = imDibCreate(bmp.bmWidth, bmp.bmHeight, bmp.bmPlanes * bmp.bmBitsPixel);
+  }
+
+  if (!dib)
+    return NULL;
+  
+  {
+    HDC ScreenDC = GetDC(NULL);
+    HPALETTE hOldPalette = NULL;
+    if (hPalette) hOldPalette = SelectPalette(ScreenDC, hPalette, FALSE);
+    RealizePalette(ScreenDC);
+
+    GetDIBits(ScreenDC, bitmap, 0, dib->bmih->biHeight, dib->bits, dib->bmi, DIB_RGB_COLORS);	
+
+    if (hOldPalette) SelectPalette(ScreenDC, hOldPalette, FALSE);
+    ReleaseDC(NULL, ScreenDC);
+  }
+  
+  return dib;
+}
+
+HBITMAP imDibToHBitmap(const imDib* dib)
+{
+  HBITMAP bitmap;
+
+  assert(dib);
+
+  {
+    HDC ScreenDC = GetDC(NULL);
+    bitmap = CreateDIBitmap(ScreenDC, dib->bmih, CBM_INIT, dib->bits, dib->bmi, DIB_RGB_COLORS);
+    ReleaseDC(NULL, ScreenDC);
+  }
+
+/* 
+  Another Way
+  bitmap = CreateCompatibleBitmap(ScreenDC, dib->bmih->biWidth, dib->bmih->biHeight);
+  SetDIBits(ScreenDC, bitmap, 0, dib->bmih->biHeight, dib->bits, dib->bmi, DIB_RGB_COLORS);	
+*/
+
+  return bitmap;
+}
+
+/*******************
+  DIB <-> Clipboard
+*******************/
+
+int imDibIsClipboardAvailable(void)
+{
+  if (IsClipboardFormatAvailable(CF_DIB) ||
+      IsClipboardFormatAvailable(CF_BITMAP))
+    return 1;
+
+  return 0;
+}
+
+imDib* imDibPasteClipboard(void)
+{
+  int clip_type = 0;
+  if (IsClipboardFormatAvailable(CF_DIB)) 
+    clip_type = CF_DIB;
+  else if (IsClipboardFormatAvailable(CF_BITMAP)) 
+    clip_type = CF_BITMAP;
+
+  if (!clip_type)
+    return NULL;
+
+  OpenClipboard(NULL);
+  HANDLE Handle = GetClipboardData(clip_type);
+  if (Handle == NULL)
+  {
+    CloseClipboard();
+    return NULL;
+  }
+  
+  imDib *dib;
+  if (clip_type == CF_DIB)
+  {
+    BYTE* bmi = (BYTE*)GlobalLock(Handle);
+    if (!bmi || !iCheckHeader((BITMAPINFOHEADER*)bmi))
+    {
+      CloseClipboard();
+      return NULL;
+    }
+
+    {
+      imDib* clip_dib = imDibCreateReference(bmi, NULL);
+      dib = imDibCreateCopy(clip_dib);
+      imDibDestroy(clip_dib);
+      GlobalUnlock(Handle);
+    }
+  }
+  else
+  {
+    HPALETTE hpal = (HPALETTE)GetClipboardData(CF_PALETTE);
+
+    /* If there is a CF_PALETTE object in the clipboard, this is the palette to assume */
+    /* the bitmap is realized against.                                                 */
+    if (!hpal)
+      hpal = (HPALETTE)GetStockObject(DEFAULT_PALETTE);
+
+    dib = imDibFromHBitmap((HBITMAP)Handle, hpal);
+  }
+  
+  CloseClipboard();
+  
+  return dib;
+}  
+
+void imDibCopyClipboard(imDib* dib)
+{
+  assert(dib);
+
+  if (!OpenClipboard(NULL))
+    return;
+  EmptyClipboard();
+  GlobalUnlock(dib->handle);
+  SetClipboardData(CF_DIB, dib->handle);
+  CloseClipboard();
+
+  dib->dib = NULL;
+  dib->is_reference = 1;
+  imDibDestroy(dib);
+}
+
+/*******************
+  DIB -> Palette
+*******************/
+
+HPALETTE imDibLogicalPalette(const imDib* dib)
+{
+  LOGPALETTE* pLogPal;      
+  PALETTEENTRY* pPalEntry;
+  HPALETTE hPal;
+  RGBQUAD* bmic;
+  int c;
+  
+  assert(dib);
+  assert(dib->bmih->biBitCount <= 8);
+  
+  pLogPal = (LOGPALETTE*)malloc(sizeof(LOGPALETTE) + dib->palette_count * sizeof(PALETTEENTRY));
+  pLogPal->palVersion    = 0x300; 
+  pLogPal->palNumEntries = (WORD)dib->palette_count;
+  
+  bmic = dib->bmic;
+  pPalEntry = pLogPal->palPalEntry;
+  
+  for (c = 0; c < dib->palette_count; c++) 
+  {
+    pPalEntry->peRed   = bmic->rgbRed;
+    pPalEntry->peGreen = bmic->rgbGreen;
+    pPalEntry->peBlue  = bmic->rgbBlue;
+    pPalEntry->peFlags = PC_NOCOLLAPSE;
+    
+    pPalEntry++;
+    bmic++;
+  }
+  
+  hPal = CreatePalette(pLogPal);
+  free(pLogPal);
+  
+  return hPal;
+}
+
+/*******************
+  DIB <-> RGB Image
+*******************/
+
+void imDibEncodeFromRGBA(imDib* dib, const unsigned char* red, const unsigned char* green, const unsigned char* blue, const unsigned char* alpha)
+{
+  int x, y;
+  BYTE* bits;
+  
+  if (dib->bmih->biHeight < 0)
+    bits = dib->bits + (dib->bits_size - dib->line_size); /* start of last line */
+  else
+    bits = dib->bits;
+  
+  assert(dib->bmih->biBitCount > 16);
+  
+  for (y = 0; y < abs(dib->bmih->biHeight); y++)
+  {
+    for (x = 0; x < dib->bmih->biWidth; x++)
+    {
+      *bits++ = *blue++;
+      *bits++ = *green++;
+      *bits++ = *red++;
+
+      if (dib->bmih->biBitCount == 32)
+      {
+        if (alpha)
+          *bits++ = *alpha++;
+        else
+          *bits++ = 0xFF; /* opaque */
+      }
+    }
+    
+    bits += dib->pad_size;
+
+    if (dib->bmih->biHeight < 0)
+      bits -= 2*dib->line_size;
+  }
+}
+
+void imDibDecodeToRGBA(const imDib* dib, unsigned char* red, unsigned char* green, unsigned char* blue, unsigned char* alpha)
+{
+  int x, y, offset;
+  unsigned short color;
+  BYTE* bits;
+  unsigned int rmask = 0, gmask = 0, bmask = 0, 
+                roff = 0, goff = 0, boff = 0; /* pixel bit mask control when reading 16 and 32 bpp images */
+  
+  assert(dib);
+  assert(dib->bmih->biBitCount > 8);
+  assert(red && green && blue);
+
+  if (dib->bmih->biHeight < 0)
+    bits = dib->bits + (dib->bits_size - dib->line_size); /* start of last line */
+  else
+    bits = dib->bits;
+  
+  if (dib->bmih->biBitCount == 16)
+    offset = dib->line_size;  /* do not increment for each pixel, jump line */
+  else
+    offset = dib->pad_size;   /* increment for each pixel, jump pad */
+  
+  if (dib->bmih->biCompression == BI_BITFIELDS)
+  {
+    unsigned int Mask;
+    unsigned int* palette = (unsigned int*)dib->bmic;
+    
+    rmask = Mask = palette[0];
+    while (!(Mask & 0x01))
+    {Mask >>= 1; roff++;}
+    
+    gmask = Mask = palette[1];
+    while (!(Mask & 0x01))
+    {Mask >>= 1; goff++;}
+    
+    bmask = Mask = palette[2];
+    while (!(Mask & 0x01))
+    {Mask >>= 1; boff++;}
+  }
+  else if (dib->bmih->biBitCount == 16)
+  {
+    bmask = 0x001F;
+    gmask = 0x03E0;
+    rmask = 0x7C00;
+    boff = 0;
+    goff = 5;
+    roff = 10;
+  }
+  
+  for (y = 0; y < abs(dib->bmih->biHeight); y++)
+  {
+    for (x = 0; x < dib->bmih->biWidth; x++)
+    {
+      if (dib->bmih->biBitCount == 16)
+      {
+        color = ((unsigned short*)bits)[x];
+        *red++ = (unsigned char)((((rmask & color) >> roff) * 255) / (rmask >> roff));
+        *green++ = (unsigned char)((((gmask & color) >> goff) * 255) / (gmask >> goff));
+        *blue++ = (unsigned char)((((bmask & color) >> boff) * 255) / (bmask >> boff));
+      }
+      else
+      {
+        *blue++ = *bits++;
+        *green++ = *bits++;
+        *red++ = *bits++;
+        
+        if (dib->bmih->biBitCount == 32)
+        {
+          if (alpha)
+            *alpha++ = *bits++;
+          else
+            bits++;
+        }
+      }
+    }
+    
+    bits += offset;
+
+    if (dib->bmih->biHeight < 0)
+      bits -= 2*dib->line_size;
+  }
+}
+
+/*******************
+  DIB <-> Map Image
+*******************/
+
+void imDibEncodeFromMap(imDib* dib, const unsigned char* map, const long* palette, int palette_count)
+{
+  assert(dib);
+  assert(map && palette);
+  assert(dib->bmih->biBitCount <= 8);
+  assert(dib->bmih->biCompression != BI_RLE8);
+
+  {
+    int x, y;
+    BYTE* bits;
+    
+    if (dib->bmih->biHeight < 0)
+      bits = dib->bits + (dib->bits_size - dib->line_size); /* start of last line */
+    else
+      bits = dib->bits;
+
+    for (y = 0; y < abs(dib->bmih->biHeight); y++)
+    {
+      for (x = 0; x < dib->bmih->biWidth; x++)
+        bits[x] = *map++;
+    
+      if (dib->bmih->biHeight < 0)
+        bits -= dib->line_size;
+      else
+        bits += dib->line_size;
+    }
+  }
+
+  {
+    int c;
+    RGBQUAD* bmic = dib->bmic;
+
+    for (c = 0; c < palette_count; c++)
+      *bmic++ = iLong2Quad(palette[c]);
+  }
+
+  dib->bmih->biClrUsed = palette_count;
+  dib->bmih->biClrImportant = 0;
+  dib->palette_count = palette_count;
+}
+
+void imDibDecodeToMap(const imDib* dib, unsigned char* map, long* palette)
+{
+  assert(dib);
+  assert(dib->bmih->biBitCount <= 8);
+  assert(map && palette);
+
+  {
+    int x, y;
+    BYTE* bits;
+    
+    if (dib->bmih->biHeight < 0)
+      bits = dib->bits + (dib->bits_size - dib->line_size); /* start of last line */
+    else
+      bits = dib->bits;
+  
+    for (y = 0; y < abs(dib->bmih->biHeight); y++)
+    {
+      for (x = 0; x < dib->bmih->biWidth; x++)
+      {
+        switch (dib->bmih->biBitCount)
+        {
+        case 1:
+          *map++ = (unsigned char)((bits[x / 8] >> (7 - x % 8)) & 0x01);
+          break;
+        case 4:
+          *map++ = (unsigned char)((bits[x / 2] >> ((1 - x % 2) * 4)) & 0x0F);
+          break;
+        case 8:
+          *map++ = bits[x];
+          break;
+        }
+      }
+    
+      if (dib->bmih->biHeight < 0)
+        bits -= dib->line_size;
+      else
+        bits += dib->line_size;
+    }
+  }
+  
+  {
+    int c;
+    RGBQUAD* bmic = dib->bmic;
+
+    for (c = 0; c < dib->palette_count; c++)
+    {
+      palette[c] = iQuad2Long(bmic);
+      *bmic++;
+    }
+  }
+}
+
+/*******************
+  DIB <-> File
+*******************/
+
+int imDibSaveFile(const imDib* dib, char* filename)
+{ 
+  DWORD dwTmp; 
+  HANDLE hFile;                 /* file handle */ 
+  BITMAPFILEHEADER file_header; /* bitmap file-header */ 
+
+  assert(dib);
+  assert(filename);
+
+  hFile = CreateFile(filename, GENERIC_WRITE, (DWORD) 0, 
+                 (LPSECURITY_ATTRIBUTES)NULL, 
+                 CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, (HANDLE) NULL); 
+
+  if (hFile == INVALID_HANDLE_VALUE) 
+    return 0;
+
+  /* 0x42 = "B" 0x4d = "M" */ 
+  file_header.bfType = 0x4d42;        
+
+  /* Compute the size of the entire file. */ 
+  file_header.bfSize = (DWORD) (sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER) + dib->palette_count*sizeof(RGBQUAD) + dib->bits_size); 
+
+  file_header.bfReserved1 = 0; 
+  file_header.bfReserved2 = 0; 
+
+  /* Compute the offset to the bits array. */ 
+  file_header.bfOffBits = (DWORD) sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER) + dib->palette_count*sizeof(RGBQUAD); 
+
+  /* Copy the BITMAPFILEHEADER into the .BMP file. */ 
+  if (!WriteFile(hFile, (LPVOID)&file_header, sizeof(BITMAPFILEHEADER), (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+    goto save_error;
+
+  /* Copy the BITMAPINFOHEADER into the file. */ 
+  if (!WriteFile(hFile, (LPVOID)dib->bmih, sizeof(BITMAPINFOHEADER), (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+    goto save_error;
+
+  /* Copy the RGBQUAD array into the file. */ 
+  if (dib->palette_count > 0)
+  {
+    if (!WriteFile(hFile, (LPVOID)dib->bmic, dib->palette_count*sizeof(RGBQUAD), (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+      goto save_error;
+  }
+
+  /* Copy the bits array into the .BMP file. */ 
+  if (!WriteFile(hFile, dib->bits, dib->bits_size, (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+    goto save_error;
+
+  /* Close the .BMP file. */ 
+  CloseHandle(hFile);
+
+  return 1;
+
+save_error:
+  CloseHandle(hFile);
+  return 0;
+} 
+ 
+imDib* imDibLoadFile(const char* filename)
+{ 
+  HANDLE hFile;                 /* file handle */ 
+  DWORD dwTmp; 
+  imDib* dib = NULL;
+  BITMAPFILEHEADER file_header; /* bitmap file-header */ 
+  BITMAPINFOHEADER bmih;
+
+  assert(filename);
+
+  hFile = CreateFile(filename, GENERIC_READ, (DWORD) 0, 
+                 (LPSECURITY_ATTRIBUTES)NULL, 
+                 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, (HANDLE) NULL); 
+
+  if (hFile == INVALID_HANDLE_VALUE) 
+    return NULL;
+
+  /* Read the BITMAPFILEHEADER from the .BMP file. */ 
+  if (!ReadFile(hFile, (LPVOID)&file_header, sizeof(BITMAPFILEHEADER), (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+    goto load_error;
+
+  if (file_header.bfType != 0x4d42)
+    goto load_error;
+
+  /* Read the BITMAPINFOHEADER from the file. */ 
+  if (!ReadFile(hFile, (LPVOID)&bmih, sizeof(BITMAPINFOHEADER), (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+    goto load_error;
+
+  if(!iCheckHeader(&bmih))
+    goto load_error;
+
+  dib = imDibCreate(bmih.biWidth, abs(bmih.biHeight), bmih.biCompression==BI_BITFIELDS? -bmih.biBitCount: bmih.biBitCount);
+
+  memcpy(dib->bmih, &bmih, bmih.biSize);
+
+  if (bmih.biSize != sizeof(BITMAPINFOHEADER))
+  {
+    /* skip newer BIH definitions */
+    SetFilePointer(hFile, bmih.biSize - sizeof(BITMAPINFOHEADER), NULL, FILE_CURRENT);
+    dib->bmih->biSize = sizeof(BITMAPINFOHEADER);
+  }
+
+  /* Read the RGBQUAD array from the file. */ 
+  if (dib->palette_count > 0)
+  {
+    if (!ReadFile(hFile, (LPVOID)dib->bmic, dib->palette_count*sizeof(RGBQUAD), (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+      goto load_error;
+  }
+
+  /* Read the Bits array from the .BMP file. */ 
+  SetFilePointer(hFile, file_header.bfOffBits, NULL, FILE_BEGIN);
+
+  {
+    int bits_size = dib->bits_size;
+
+    if (bmih.biBitCount < 16 && bmih.biCompression != BI_RGB)
+      bits_size = GetFileSize(hFile, NULL) - file_header.bfOffBits;
+
+    if (bits_size > dib->bits_size)
+      goto load_error;
+
+    if (!ReadFile(hFile, dib->bits, bits_size, (LPDWORD)&dwTmp, (LPOVERLAPPED)NULL)) 
+      goto load_error;
+  }
+
+  /* Close the .BMP file. */ 
+  CloseHandle(hFile);
+
+  return dib;
+
+load_error:
+  if (dib) imDibDestroy(dib);
+  CloseHandle(hFile);
+  return NULL;
+} 
+
+/*******************
+  Screen -> DIB
+*******************/
+
+imDib* imDibCaptureScreen(int x, int y, int width, int height)
+{
+  HBITMAP bitmap;
+  HDC ScreenDC = GetDC(NULL);
+  HDC hdcCompatible = CreateCompatibleDC(ScreenDC); 
+
+  if (width == 0) width = GetDeviceCaps(ScreenDC, HORZRES);
+  if (height == 0) height = GetDeviceCaps(ScreenDC, VERTRES);
+
+  bitmap = CreateCompatibleBitmap(ScreenDC, width, height);
+
+  if (!bitmap) 
+  {
+    ReleaseDC(NULL, ScreenDC);
+    return NULL;
+  }
+
+  /* Select the bitmaps into the compatible DC.  */
+  SelectObject(hdcCompatible, bitmap);
+
+  /* Copy color data for the entire display into a */
+  /* bitmap that is selected into a compatible DC. */
+  BitBlt(hdcCompatible, 0, 0, width, height, ScreenDC, x, y, SRCCOPY);
+
+  ReleaseDC(NULL, ScreenDC);
+  DeleteDC(hdcCompatible);
+
+  {
+    imDib* dib = imDibFromHBitmap(bitmap, NULL);
+    DeleteObject(bitmap);
+    return dib;
+  }
+}
diff --git a/src/im_dibxbitmap.cpp b/src/im_dibxbitmap.cpp
new file mode 100644
index 0000000..8fabd4a
--- /dev/null
+++ b/src/im_dibxbitmap.cpp
@@ -0,0 +1,181 @@
+/** \file
+ * \brief Conversion between imDib and imImage
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_dibxbitmap.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <windows.h>
+#include <assert.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_dib.h"
+#include "im_util.h"
+
+
+void imDibEncodeFromBitmap(imDib* dib, const unsigned char* data)
+{
+  int x, y;
+  BYTE* bits;
+  
+  assert(dib);
+  assert(dib->bmih->biBitCount > 16);
+  assert(data);
+  
+  if (dib->bmih->biHeight < 0)
+    bits = dib->bits + (dib->bits_size - dib->line_size); /* start of last line */
+  else
+    bits = dib->bits;
+  
+  for (y = 0; y < abs(dib->bmih->biHeight); y++)
+  {
+    for (x = 0; x < dib->bmih->biWidth; x++)
+    {
+      *bits++ = *(data+2);   // R
+      *bits++ = *(data+1);   // G
+      *bits++ = *(data+0);   // B
+
+      data += 3;
+
+      if (dib->bmih->biBitCount == 32)
+        *bits++ = *data++;
+    }
+    
+    bits += dib->pad_size;
+
+    if (dib->bmih->biHeight < 0)
+      bits -= 2*dib->line_size;
+  }
+}
+
+void imDibDecodeToBitmap(const imDib* dib, unsigned char* data)
+{
+  int x, y, offset;
+  unsigned short color;
+  BYTE* bits;
+  unsigned int rmask = 0, gmask = 0, bmask = 0, 
+                roff = 0,  goff = 0,  boff = 0; /* pixel bit mask control when reading 16 and 32 bpp images */
+  
+  assert(dib);
+  assert(dib->bmih->biBitCount > 8);
+  assert(data);
+
+  if (dib->bmih->biHeight < 0)
+    bits = dib->bits + (dib->bits_size - dib->line_size); /* start of last line */
+  else
+    bits = dib->bits;
+  
+  if (dib->bmih->biBitCount == 16)
+    offset = dib->line_size;  /* do not increment for each pixel, jump line */
+  else
+    offset = dib->pad_size;   /* increment for each pixel, jump pad */
+  
+  if (dib->bmih->biCompression == BI_BITFIELDS)
+  {
+    unsigned int Mask;
+    unsigned int* palette = (unsigned int*)dib->bmic;
+    
+    rmask = Mask = palette[0];
+    while (!(Mask & 0x01))
+    {Mask >>= 1; roff++;}
+    
+    gmask = Mask = palette[1];
+    while (!(Mask & 0x01))
+    {Mask >>= 1; goff++;}
+    
+    bmask = Mask = palette[2];
+    while (!(Mask & 0x01))
+    {Mask >>= 1; boff++;}
+  }
+  else if (dib->bmih->biBitCount == 16)
+  {
+    bmask = 0x001F;
+    gmask = 0x03E0;
+    rmask = 0x7C00;
+    boff = 0;
+    goff = 5;
+    roff = 10;
+  }
+  
+  for (y = 0; y < abs(dib->bmih->biHeight); y++)
+  {
+    for (x = 0; x < dib->bmih->biWidth; x++)
+    {
+      if (dib->bmih->biBitCount == 16)
+      {
+        color = ((unsigned short*)bits)[x];
+        *data++ = (unsigned char)((((rmask & color) >> roff) * 255) / (rmask >> roff));
+        *data++ = (unsigned char)((((gmask & color) >> goff) * 255) / (gmask >> goff));
+        *data++ = (unsigned char)((((bmask & color) >> boff) * 255) / (bmask >> boff));
+      }
+      else
+      {
+        *(data+2) = *bits++; // B
+        *(data+1) = *bits++; // G
+        *(data+0) = *bits++; // R
+
+        data += 3;
+        
+        if (dib->bmih->biBitCount == 32)
+          *data++ = *bits++;
+      }
+    }
+    
+    bits += offset;
+
+    if (dib->bmih->biHeight < 0)
+      bits -= 2*dib->line_size;
+  }
+}
+
+imImage* imDibToImage(const imDib* dib)
+{
+  assert(dib);
+
+  int color_space = IM_RGB;
+  if (dib->bmih->biBitCount <= 8)
+    color_space = IM_MAP;
+
+  imImage* image = imImageCreate(dib->bmih->biWidth, abs(dib->bmih->biHeight), color_space, IM_BYTE);
+  if (!image) 
+    return NULL;
+
+  if (image->color_space == IM_MAP)
+  {
+    image->palette_count = dib->palette_count;
+    imDibDecodeToMap(dib, (imbyte*)image->data[0], image->palette);
+  }
+  else
+  {
+    imDibDecodeToRGBA(dib, (imbyte*)image->data[0], (imbyte*)image->data[1], (imbyte*)image->data[2], NULL);
+  }
+
+  return image;
+}
+
+imDib* imDibFromImage(const imImage* image)
+{
+  assert(image);
+  assert(imImageIsBitmap(image));
+
+  if (!imImageIsBitmap(image))
+    return NULL;
+
+  int bpp;
+  if (image->color_space != IM_RGB)
+    bpp = 8;
+  else
+    bpp = 24;
+
+  imDib* dib = imDibCreate(image->width, image->height, bpp);     
+  if (!dib) return NULL;
+
+  if (image->color_space != IM_RGB)
+    imDibEncodeFromMap(dib, (const imbyte*)image->data[0], image->palette, image->palette_count);
+  else
+    imDibEncodeFromRGBA(dib, (const imbyte*)image->data[0], (const imbyte*)image->data[1], (const imbyte*)image->data[2], NULL);
+
+  return dib;
+}
diff --git a/src/im_ecw.def b/src/im_ecw.def
new file mode 100644
index 0000000..a0c8858
--- /dev/null
+++ b/src/im_ecw.def
@@ -0,0 +1,2 @@
+EXPORTS
+  imFormatRegisterECW
\ No newline at end of file
diff --git a/src/im_ecw.mak b/src/im_ecw.mak
new file mode 100644
index 0000000..0894425
--- /dev/null
+++ b/src/im_ecw.mak
@@ -0,0 +1,16 @@
+PROJNAME = im
+LIBNAME = im_ecw
+OPT = YES
+
+SRC = im_format_ecw.cpp
+                                       
+ECWSDKINC = d:/lng/ecw_sdk/include
+ECWSDKLIB = d:/lng/ecw_sdk/lib/$(TEC_UNAME)
+                                       
+INCLUDES = ../include $(ECWSDKINC)
+
+LDIR = $(ECWSDKLIB)
+LIBS = NCSEcw
+
+IM = ..
+USE_IM = Yes
diff --git a/src/im_fftw.def b/src/im_fftw.def
new file mode 100644
index 0000000..6154317
--- /dev/null
+++ b/src/im_fftw.def
@@ -0,0 +1,7 @@
+EXPORTS
+  imProcessFFT
+  imProcessIFFT
+  imProcessSwapQuadrants
+  imProcessFFTraw
+  imProcessAutoCorrelation
+  imProcessCrossCorrelation
diff --git a/src/im_fftw.mak b/src/im_fftw.mak
new file mode 100644
index 0000000..eeed4f2
--- /dev/null
+++ b/src/im_fftw.mak
@@ -0,0 +1,45 @@
+PROJNAME = im
+LIBNAME = im_fftw
+OPT = YES
+
+DEF_FILE = im_fftw.def
+
+SRC = config.c executor.c fftwnd.c fn_1.c fn_10.c fn_11.c fn_12.c fn_13.c \
+  fn_14.c fn_15.c fn_16.c fn_2.c fn_3.c fn_32.c fn_4.c fn_5.c fn_6.c fn_64.c fn_7.c \
+  fn_8.c fn_9.c fni_1.c fni_10.c fni_11.c fni_12.c fni_13.c fni_14.c fni_15.c fni_16.c \
+  fni_2.c fni_3.c fni_32.c fni_4.c fni_5.c fni_6.c fni_64.c fni_7.c fni_8.c fni_9.c \
+  ftw_10.c ftw_16.c ftw_2.c ftw_3.c ftw_32.c ftw_4.c ftw_5.c ftw_6.c ftw_64.c ftw_7.c \
+  ftw_8.c ftw_9.c ftwi_10.c ftwi_16.c ftwi_2.c ftwi_3.c ftwi_32.c ftwi_4.c ftwi_5.c \
+  ftwi_6.c ftwi_64.c ftwi_7.c ftwi_8.c ftwi_9.c generic.c malloc.c planner.c putils.c \
+  rader.c timer.c twiddle.c wisdom.c wisdomio.c
+SRC := $(addprefix fftw/, $(SRC))
+
+SRC := process/im_fft.cpp $(SRC)
+
+INCLUDES := fftw
+
+DEFINES = FFTW_ENABLE_FLOAT
+
+USE_IM = Yes
+IM = ..
+LIBS = im_process
+    
+ifneq ($(findstring ow, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif   
+
+ifneq ($(findstring bc, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif         
+
+ifneq ($(findstring AIX, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif
+
+ifneq ($(findstring SunOS, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif
+      
+ifneq ($(findstring HP-UX, $(TEC_UNAME)), )
+  DEFINES += IM_DEFMATHFLOAT
+endif
diff --git a/src/im_fftw3.mak b/src/im_fftw3.mak
new file mode 100644
index 0000000..7bb1a94
--- /dev/null
+++ b/src/im_fftw3.mak
@@ -0,0 +1,151 @@
+PROJNAME = im
+LIBNAME = im_fftw3
+OPT = YES
+
+DEF_FILE = im_fftw.def
+
+RDFT_CODELETS_R2R = e01_8.c e10_8.c rrcodlist.c
+RDFT_CODELETS_R2R := $(addprefix r2r/, $(RDFT_CODELETS_R2R))
+
+R2HC = r2hc_2.c r2hc_3.c r2hc_4.c r2hc_5.c r2hc_6.c r2hc_7.c r2hc_8.c	\
+  r2hc_9.c r2hc_10.c r2hc_11.c r2hc_12.c r2hc_13.c r2hc_14.c r2hc_15.c	\
+  r2hc_16.c r2hc_32.c 
+MR2HC = mr2hc_32.c mr2hc_64.c mr2hc_128.c
+HF = hf_2.c hf_3.c hf_4.c hf_5.c hf_6.c hf_7.c hf_8.c hf_9.c	\
+  hf_10.c hf_12.c hf_15.c hf_16.c hf_32.c hf_64.c
+HF2 = hf2_4.c hf2_8.c hf2_16.c hf2_32.c hf2_64.c
+R2HCII = r2hcII_2.c r2hcII_3.c r2hcII_4.c r2hcII_5.c r2hcII_6.c		\
+  r2hcII_7.c r2hcII_8.c r2hcII_9.c r2hcII_10.c r2hcII_12.c r2hcII_15.c	\
+  r2hcII_16.c r2hcII_32.c
+MR2HCII =  mr2hcII_32.c mr2hcII_64.c
+RDFT_CODELETS_R2HC = $(R2HC) $(MR2HC) $(HF) $(HF2) $(R2HCII) $(MR2HCII) rhcodlist.c 
+RDFT_CODELETS_R2HC := $(addprefix r2hc/, $(RDFT_CODELETS_R2HC))
+
+HC2R = hc2r_3.c hc2r_4.c hc2r_5.c hc2r_6.c hc2r_7.c hc2r_8.c hc2r_9.c	\
+  hc2r_10.c hc2r_11.c hc2r_12.c hc2r_13.c hc2r_14.c hc2r_15.c hc2r_16.c	\
+  hc2r_32.c
+MHC2R = mhc2r_32.c mhc2r_64.c mhc2r_128.c
+HB = hb_2.c hb_3.c hb_4.c hb_5.c hb_6.c hb_7.c hb_8.c hb_9.c	\
+  hb_10.c hb_12.c hb_15.c hb_16.c hb_32.c hb_64.c
+HB2 = hb2_4.c hb2_8.c hb2_16.c hb2_32.c hb2_64.c
+HC2RIII = hc2rIII_2.c hc2rIII_3.c hc2rIII_4.c hc2rIII_5.c hc2rIII_6.c	\
+  hc2rIII_7.c hc2rIII_8.c hc2rIII_9.c hc2rIII_10.c hc2rIII_12.c		\
+  hc2rIII_15.c hc2rIII_16.c hc2rIII_32.c
+MHC2RIII = mhc2rIII_32.c mhc2rIII_64.c
+RDFT_CODELETS_HC2R = $(HC2R) $(MHC2R) $(HB) $(HC2RIII) $(MHC2RIII) hcodlist.c
+RDFT_CODELETS_HC2R := $(addprefix hc2r/, $(RDFT_CODELETS_HC2R))
+
+RDFT_CODELETS = hc2r.c hfb.c	\
+  r2hc.c r2r.c $(RDFT_CODELETS_R2R) $(RDFT_CODELETS_HC2R) $(RDFT_CODELETS_R2HC)
+RDFT_CODELETS := $(addprefix codelets/, $(RDFT_CODELETS))
+
+RDFT2 = buffered2.c direct2.c nop2.c rank0-rdft2.c rank-geq2-rdft2.c	\
+  plan2.c problem2.c solve2.c vrank-geq1-rdft2.c rdft2-radix2.c		\
+  rdft2-tensor-max-index.c rdft2-inplace-strides.c rdft2-strides.c
+RDFT = dft-r2hc.c dht-r2hc.c dht-rader.c rbuffered.c	\
+  rconf.c rdirect.c rgeneric.c khc2hc-dif.c khc2hc-dit.c	\
+  khc2r.c kr2hc.c kr2r.c hc2hc.c hc2hc-dif.c hc2hc-dit.c		\
+  hc2hc-buf.c rindirect.c rnop.c rplan.c rproblem.c rader-hc2hc.c rrank0.c	\
+  rrank-geq2.c rdft-dht.c rsolve.c rvrank-geq1.c $(RDFT2) $(RDFT_CODELETS)
+RDFT  := $(addprefix rdft/, $(RDFT))
+
+N1 = n1_2.c n1_3.c n1_4.c n1_5.c n1_6.c n1_7.c n1_8.c n1_9.c n1_10.c	\
+  n1_11.c n1_12.c n1_13.c n1_14.c n1_15.c n1_16.c
+M1 = m1_16.c m1_32.c m1_64.c
+T1 = t1_2.c t1_3.c t1_4.c t1_5.c t1_6.c t1_7.c t1_8.c t1_9.c	\
+  t1_10.c t1_12.c t1_15.c t1_16.c t1_32.c t1_64.c
+T2 = t2_4.c t2_8.c t2_16.c t2_32.c t2_64.c
+DFT_CODELETS_STANDARD = $(N1) $(M1) $(T1) $(T2) scodlist.c
+DFT_CODELETS_STANDARD  := $(addprefix standard/, $(DFT_CODELETS_STANDARD))
+
+DFT_CODELETS_INPLACE = q1_2.c q1_4.c q1_8.c  q1_3.c q1_5.c q1_6.c icodlist.c
+DFT_CODELETS_INPLACE := $(addprefix inplace/, $(DFT_CODELETS_INPLACE))
+
+DFT_CODELETS = n.c t.c $(DFT_CODELETS_INPLACE) $(DFT_CODELETS_STANDARD)
+DFT_CODELETS := $(addprefix codelets/, $(DFT_CODELETS))
+
+DFT = buffered.c conf.c ct-dif.c ct-dit.c ct-ditbuf.c	\
+  ct-ditf.c ct.c direct.c generic.c indirect.c kdft-dif.c kdft-difsq.c	\
+  kdft-dit.c kdft.c nop.c plan.c problem.c rader.c rader-omega.c		\
+  rank-geq2.c rank0.c solve.c vrank-geq1.c vrank2-transpose.c		\
+  vrank3-transpose.c zero.c $(DFT_CODELETS)
+DFT := $(addprefix dft/, $(DFT))
+
+REODFT = reoconf.c reodft010e-r2hc.c	\
+  reodft11e-radix2.c reodft11e-r2hc-odd.c redft00e-r2hc-pad.c	\
+  rodft00e-r2hc-pad.c
+REODFT := $(addprefix reodft/, $(REODFT))
+
+KERNEL = align.c alloc.c assert.c awake.c kbuffered.c	\
+  kct.c debug.c hash.c iabs.c md5.c md5-1.c minmax.c ops.c pickdim.c	\
+  kplan.c planner.c primes.c print.c kproblem.c krader.c scan.c solver.c	\
+  solvtab.c square.c stride.c tensor.c tensor1.c tensor2.c tensor4.c	\
+  tensor5.c tensor7.c tensor8.c tensor9.c timer.c transpose.c trig.c	\
+  trig1.c twiddle.c
+KERNEL := $(addprefix kernel/, $(KERNEL))
+
+API = apiplan.c configure.c execute.c execute-dft.c	\
+  execute-dft-c2r.c execute-dft-r2c.c execute-r2r.c			\
+  export-wisdom-to-file.c export-wisdom-to-string.c export-wisdom.c	\
+  extract-reim.c flops.c forget-wisdom.c import-system-wisdom.c		\
+  import-wisdom-from-file.c import-wisdom-from-string.c import-wisdom.c	\
+  mapflags.c mkprinter-file.c mktensor-iodims.c mktensor-rowmajor.c	\
+  plan-dft-1d.c plan-dft-2d.c plan-dft-3d.c plan-dft.c plan-guru-dft.c	\
+  plan-many-dft.c plan-many-dft-c2r.c plan-dft-c2r.c plan-dft-c2r-1d.c	\
+  plan-dft-c2r-2d.c plan-dft-c2r-3d.c plan-many-dft-r2c.c plan-dft-r2c.c	\
+  plan-dft-r2c-1d.c plan-dft-r2c-2d.c plan-dft-r2c-3d.c			\
+  plan-guru-dft-c2r.c plan-guru-dft-r2c.c map-r2r-kind.c plan-many-r2r.c	\
+  plan-r2r-1d.c plan-r2r-2d.c plan-r2r-3d.c plan-r2r.c plan-guru-r2r.c	\
+  print-plan.c the-planner.c rdft2-pad.c version.c \
+  execute-split-dft-c2r.c execute-split-dft-r2c.c execute-split-dft.c	\
+  plan-guru-split-dft-c2r.c plan-guru-split-dft-r2c.c	\
+  plan-guru-split-dft.c f77api.c
+API := $(addprefix api/, $(API))
+	
+SRC = $(API) $(REODFT) $(RDFT) $(DFT) $(KERNEL)
+SRC := $(addprefix fftw3/, $(SRC))
+
+SRC := process/im_fft.cpp $(SRC)
+
+INCLUDES := ../include fftw3/kernel fftw3/dft fftw3/rdft fftw3/api \
+  fftw3/reodft fftw3/rdft/codelets fftw3/dft/codelets
+
+DEFINES = USE_FFTW3
+
+USE_IM = Yes
+IM = ..
+LIBS = im_process
+
+
+ifneq ($(findstring Win, $(TEC_SYSNAME)), )
+  ifneq ($(findstring gcc, $(TEC_UNAME)), )
+    DEFINES += HAVE_UINTPTR_T
+  endif
+  ifneq ($(findstring ow, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT
+  endif         
+  ifneq ($(findstring bc, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT
+  endif
+else
+  ifneq ($(findstring IRIX, $(TEC_UNAME)), )
+    DEFINES += HAVE_UINTPTR_T
+  endif
+  ifneq ($(findstring Darwin, $(TEC_UNAME)), )
+    DEFINES += HAVE_UINTPTR_T
+  endif
+  ifneq ($(findstring FreeBSD, $(TEC_UNAME)), )
+    DEFINES += HAVE_UINTPTR_T
+  endif
+  ifneq ($(findstring AIX, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT HAVE_UINTPTR_T
+  endif
+  ifneq ($(findstring SunOS, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT
+  endif
+endif
+
+# Duplicated files: buffered.c conf.c direct.c indirect.c generic.c 
+#                   nop.c plan.c problem.c rader.c rank0.c rank-geq2.c
+#                   vrank-geq1.c solve.c ct.c codlist.c
+# These were renamed to "r*" when in the rdft folder, and to "k*" when in the kernel folder.
diff --git a/src/im_file.cpp b/src/im_file.cpp
new file mode 100644
index 0000000..81e6128
--- /dev/null
+++ b/src/im_file.cpp
@@ -0,0 +1,428 @@
+/** \file
+ * \brief File Access
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_file.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "im.h"
+#include "im_format.h"
+#include "im_util.h"
+#include "im_attrib.h"
+#include "im_counter.h"
+#include "im_plus.h"  // make shure that this file is compiled
+
+
+void imFileClear(imFile* ifile)
+{
+  // can not reset compression and image_count
+
+  ifile->is_new = 0;
+  ifile->attrib_table = 0;
+
+  ifile->line_buffer = 0;
+  ifile->line_buffer_size = 0;
+  ifile->line_buffer_extra = 0;
+  ifile->line_buffer_alloc = 0;
+
+  ifile->convert_bpp = 0;
+  ifile->switch_type = 0;
+
+  ifile->width = 0; 
+  ifile->height = 0; 
+  ifile->image_index = -1; 
+  ifile->user_data_type = 0;
+  ifile->user_color_mode = 0; 
+  ifile->file_data_type = 0;
+  ifile->file_color_mode = 0;
+
+  ifile->palette_count = 256;
+  for (int i = 0; i < 256; i++)
+    ifile->palette[i] = imColorEncode((imbyte)i, (imbyte)i, (imbyte)i);
+}
+
+imFile* imFileOpen(const char* file_name, int *error)
+{
+  assert(file_name);
+
+  imFormat* iformat = imFormatOpen(file_name, error);
+  if (!iformat) 
+    return NULL;
+
+  imFileClear(iformat);
+
+  iformat->attrib_table = new imAttribTable(599);
+
+  iformat->counter = imCounterBegin(file_name);
+
+  return iformat;
+}
+
+imFile* imFileOpenAs(const char* file_name, const char* format, int *error)
+{
+  assert(file_name);
+
+  imFormat* iformat = imFormatOpenAs(file_name, format, error);
+  if (!iformat) 
+    return NULL;
+
+  imFileClear(iformat);
+
+  iformat->attrib_table = new imAttribTable(599);
+
+  iformat->counter = imCounterBegin(file_name);
+
+  return iformat;
+}
+
+imFile* imFileNew(const char* file_name, const char* format, int *error)
+{
+  assert(file_name);
+
+  imFormat* iformat = imFormatNew(file_name, format, error);
+  if (!iformat) 
+    return NULL;
+
+  imFileClear(iformat);
+
+  iformat->is_new = 1;
+  iformat->image_count = 0;
+  iformat->compression[0] = 0;
+
+  iformat->attrib_table = new imAttribTable(101);
+
+  iformat->counter = imCounterBegin(file_name);
+
+  return iformat;
+}
+
+void imFileClose(imFile* ifile)
+{
+  assert(ifile);
+  imFormat* iformat = (imFormat*)ifile;
+  imAttribTable* attrib_table = (imAttribTable*)ifile->attrib_table;
+
+  imCounterEnd(ifile->counter);
+
+  iformat->Close();
+
+  if (ifile->line_buffer) free(ifile->line_buffer);
+  
+  delete attrib_table;
+}
+
+void* imFileHandle(imFile* ifile, int index)
+{
+  assert(ifile);
+  imFormat* iformat = (imFormat*)ifile;
+  return iformat->Handle(index);
+}
+
+void imFileSetAttribute(imFile* ifile, const char* attrib, int data_type, int count, const void* data)
+{
+  assert(ifile);
+  assert(attrib);
+  imFormat* iformat = (imFormat*)ifile;
+  imAttribTable* atable = (imAttribTable*)iformat->attrib_table;
+  if (data)
+    atable->Set(attrib, data_type, count, data);
+  else
+    atable->UnSet(attrib);
+}
+
+const void* imFileGetAttribute(imFile* ifile, const char* attrib, int *data_type, int *count)
+{
+  assert(ifile);
+  assert(attrib);
+  imFormat* iformat = (imFormat*)ifile;
+  imAttribTable* attrib_table = (imAttribTable*)iformat->attrib_table;
+  return attrib_table->Get(attrib, data_type, count);
+}
+
+static int iAttribCB(void* user_data, int index, const char* name, int data_type, int count, const void* data)
+{
+  (void)data_type;
+  (void)data;
+  (void)count;
+  char** attrib = (char**)user_data;
+  attrib[index] = (char*)name;
+  return 1;
+}
+
+void imFileGetAttributeList(imFile* ifile, char** attrib, int *attrib_count)
+{
+  assert(ifile);
+  assert(attrib_count);
+
+  imAttribTable* attrib_table = (imAttribTable*)ifile->attrib_table;
+  *attrib_count = attrib_table->Count();
+
+  if (attrib) attrib_table->ForEach((void*)attrib, iAttribCB);
+}
+
+void imFileGetInfo(imFile* ifile, char* format, char* compression, int *image_count)
+{
+  assert(ifile);
+  imFormat* iformat = (imFormat*)ifile;
+
+  if(compression) strcpy(compression, ifile->compression);
+  if(format) strcpy(format, iformat->format);
+  if (image_count) *image_count = ifile->image_count;
+}
+
+static int iFileCheckPaletteGray(imFile* ifile)
+{
+  imbyte r, g, b;
+  for (int i = 0; i < ifile->palette_count; i++)
+  {
+    imColorDecode(&r, &g, &b, ifile->palette[i]);
+
+    if (i != r || r != g || g != b)
+      return 0;
+  }
+
+  return 1;
+}
+
+static int iFileCheckPaletteBinary(imFile* ifile)
+{
+  if (ifile->palette_count > 2)
+    return 0;
+
+  imbyte r, g, b;
+
+  imColorDecode(&r, &g, &b, ifile->palette[0]);
+  if ((r != 0 || g != 0 || b != 0) &&
+      (r != 1 || g != 1 || b != 1) &&
+      (r != 255 || g != 255 || b != 255))
+    return 0;
+
+  imColorDecode(&r, &g, &b, ifile->palette[1]);
+  if ((r != 0 || g != 0 || b != 0) &&
+      (r != 1 || g != 1 || b != 1) &&
+      (r != 255 || g != 255 || b != 255))
+    return 0;
+
+  return 1;
+}
+
+int imFileReadImageInfo(imFile* ifile, int index, int *width, int *height, int *file_color_mode, int *file_data_type)
+{
+  assert(ifile);
+  assert(!ifile->is_new);
+  imFormat* iformat = (imFormat*)ifile;
+
+  if (index >= ifile->image_count)
+    return IM_ERR_DATA;
+
+  if (ifile->image_index != -1 &&
+      ifile->image_index == index)
+  {
+    if(width) *width = ifile->width;
+    if(height) *height = ifile->height;
+    if(file_color_mode) *file_color_mode = ifile->file_color_mode;
+    if(file_data_type) *file_data_type = ifile->file_data_type;
+
+    return IM_ERR_NONE;
+  }
+
+  ifile->convert_bpp = 0;
+  ifile->switch_type = 0;
+
+  int error = iformat->ReadImageInfo(index);
+  if (error) return error;
+
+  if (!imImageCheckFormat(ifile->file_color_mode, ifile->file_data_type))
+    return IM_ERR_DATA;
+
+  if (imColorModeSpace(ifile->file_color_mode) == IM_BINARY)
+  {
+    ifile->palette_count = 2;
+    ifile->palette[0] = imColorEncode(0, 0, 0);
+    ifile->palette[1] = imColorEncode(255, 255, 255);
+  }
+
+  if (imColorModeSpace(ifile->file_color_mode) == IM_MAP)
+  {    
+    if (iFileCheckPaletteGray(ifile))
+      ifile->file_color_mode = (ifile->file_color_mode & 0xFF00) | IM_GRAY;
+
+    if (iFileCheckPaletteBinary(ifile))
+      ifile->file_color_mode = (ifile->file_color_mode & 0xFF00) | IM_BINARY;
+  }
+
+  if(width) *width = ifile->width;
+  if(height) *height = ifile->height;
+  if(file_color_mode) *file_color_mode = ifile->file_color_mode;
+  if(file_data_type) *file_data_type = ifile->file_data_type;
+
+  ifile->image_index = index; 
+
+  return IM_ERR_NONE;
+}
+
+void imFileGetPalette(imFile* ifile, long* palette, int *palette_count)
+{
+  assert(ifile);
+  assert(palette);
+
+  if (ifile->palette_count != 0 && palette)
+    memcpy(palette, ifile->palette, ifile->palette_count*sizeof(long));
+
+ if (palette_count) *palette_count = ifile->palette_count;
+}
+
+static void iFileCheckConvertGray(imFile* ifile, imbyte* data)
+{
+  int i, do_remap = 0;
+  imbyte remap[256], r, g, b;
+
+  // enforce the palette to only have grays in the correct order.
+
+  for (i = 0; i < ifile->palette_count; i++)
+  {
+    imColorDecode(&r, &g, &b, ifile->palette[i]);
+
+    if (r != i)
+    {
+      ifile->palette[i] = imColorEncode((imbyte)i, (imbyte)i, (imbyte)i);
+      do_remap = 1;
+    }
+
+    remap[i] = r;
+  }
+
+  if (!do_remap)
+    return;
+
+  int count = ifile->width*ifile->height;
+  for(i = 0; i < count; i++)
+  {
+    *data = remap[*data];
+    data++;
+  }
+}
+
+static void iFileCheckConvertBinary(imFile* ifile, imbyte* data)
+{
+  int count = ifile->width*ifile->height;
+  for(int i = 0; i < count; i++)
+  {
+    if (*data)
+      *data = 1;
+    data++;
+  }
+}
+
+int imFileReadImageData(imFile* ifile, void* data, int convert2bitmap, int color_mode_flags)
+{
+  assert(ifile);
+  assert(!ifile->is_new);
+  imFormat* iformat = (imFormat*)ifile;
+
+  if (ifile->image_index == -1)
+    return IM_ERR_DATA;
+
+  ifile->user_color_mode = ifile->file_color_mode;
+  ifile->user_data_type = ifile->file_data_type;
+
+  if (convert2bitmap)
+  {
+    ifile->user_data_type = IM_BYTE;
+    ifile->user_color_mode = imColorModeToBitmap(ifile->file_color_mode);
+  }
+
+  if (color_mode_flags != -1)
+  {
+    ifile->user_color_mode = imColorModeSpace(ifile->user_color_mode);
+    ifile->user_color_mode |= color_mode_flags;
+  }
+
+  if (!imImageCheckFormat(ifile->user_color_mode, ifile->user_data_type))
+    return IM_ERR_DATA;
+
+  if (!imFileCheckConversion(ifile))
+    return IM_ERR_DATA;
+
+  imFileLineBufferInit(ifile);
+
+  int ret = iformat->ReadImageData(data);
+
+  // here we can NOT change the file_color_mode we already returned to the user
+  // so just check for gray and binary consistency
+
+  if (imColorModeSpace(ifile->file_color_mode) == IM_GRAY && ifile->file_data_type == IM_BYTE)
+    iFileCheckConvertGray(ifile, (imbyte*)data);
+
+  if (imColorModeSpace(ifile->file_color_mode) == IM_BINARY)
+    iFileCheckConvertBinary(ifile, (imbyte*)data);
+
+  return ret;
+}
+
+void imFileSetInfo(imFile* ifile, const char* compression)
+{
+  assert(ifile);
+  assert(ifile->is_new);
+
+  if (!compression)
+    ifile->compression[0] = 0;
+  else
+    strcpy(ifile->compression, compression);
+}
+
+void imFileSetPalette(imFile* ifile, long* palette, int palette_count)
+{
+  assert(ifile);
+  assert(palette);
+  assert(palette_count != 0);
+
+  memcpy(ifile->palette, palette, palette_count*sizeof(long));
+  ifile->palette_count = palette_count;
+}
+
+int imFileWriteImageInfo(imFile* ifile, int width, int height, int user_color_mode, int user_data_type)
+{
+  assert(ifile);
+  assert(ifile->is_new);
+  imFormat* iformat = (imFormat*)ifile;
+
+  if (!imImageCheckFormat(user_color_mode, user_data_type))
+    return IM_ERR_DATA;
+
+  int error = iformat->CanWrite(ifile->compression, user_color_mode, user_data_type);
+  if (error) return error;
+
+  ifile->width = width;
+  ifile->height = height;
+  ifile->user_color_mode = user_color_mode;
+  ifile->user_data_type = user_data_type;
+
+  if (imColorModeSpace(user_color_mode) == IM_BINARY)
+  {
+    ifile->palette_count = 2;
+    ifile->palette[0] = imColorEncode(0, 0, 0);
+    ifile->palette[1] = imColorEncode(255, 255, 255);
+  }
+
+  return iformat->WriteImageInfo();
+}
+
+int imFileWriteImageData(imFile* ifile, void* data)
+{
+  assert(ifile);
+  assert(ifile->is_new);
+  assert(data);
+  imFormat* iformat = (imFormat*)ifile;
+
+  if (!imFileCheckConversion(ifile))
+    return IM_ERR_DATA;
+
+  imFileLineBufferInit(ifile);
+
+  return iformat->WriteImageData(data);
+}
diff --git a/src/im_filebuffer.cpp b/src/im_filebuffer.cpp
new file mode 100644
index 0000000..9ab2fda
--- /dev/null
+++ b/src/im_filebuffer.cpp
@@ -0,0 +1,695 @@
+/** \file
+ * \brief File Access - Buffer Management
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_filebuffer.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "im.h"
+#include "im_format.h"
+#include "im_util.h"
+#include "im_complex.h"
+#include "im_color.h"
+
+
+int imFileLineSizeAligned(int width, int bpp, int align)
+{
+  if (align == 4)
+    return ((width * bpp + 31) / 32) * 4;
+  else if (align == 2)
+    return ((width * bpp + 15) / 16) * 2;
+  else
+    return (width * bpp + 7) / 8;
+}
+
+template <class T> 
+static void iDoFillLineBuffer(int width, int height, int line, int plane,  
+                              int file_color_mode, T* line_buffer, 
+                              int user_color_mode, const T* data)
+{
+  // (writing) from data to file
+  // will handle packing and alpha
+
+  int file_depth = imColorModeDepth(file_color_mode);  
+  int data_depth = imColorModeDepth(user_color_mode);
+  int data_plane_size = width*height;  // This will be used in UNpacked data
+
+  if (imColorModeIsPacked(user_color_mode))
+    data += line*width*data_depth;
+  else
+    data += line*width;
+
+  for (int x = 0; x < width; x++)
+  {
+    int x_data_offset = x*data_depth;    // This will be used in packed data
+
+    if (imColorModeIsPacked(file_color_mode))
+    {
+      int x_file_offset = x*file_depth;  // This will be used in packed data
+
+      // file is packed
+      // NO color space conversion, color_space must match
+      // If ignore alpha if necessary.
+      int depth = IM_MIN(file_depth, data_depth);      
+      for (int d = 0; d < depth; d++)
+      {
+        if (imColorModeIsPacked(user_color_mode))
+          line_buffer[x_file_offset + d] = data[x_data_offset + d];
+        else
+          line_buffer[x_file_offset + d] = data[d*data_plane_size + x];
+      }
+    }
+    else
+    {
+      // file NOT packed, copy just one plane
+      // NO color space conversion, color_space must match
+
+      if (plane >= imColorModeDepth(user_color_mode))
+        return;
+
+      if (imColorModeIsPacked(user_color_mode))
+        line_buffer[x] = data[x_data_offset + plane];
+      else
+        line_buffer[x] = data[plane*data_plane_size + x];
+    }
+  }
+}
+
+template <class T> 
+static void iDoFillData(int width, int height, int line, int plane,  
+                              int file_color_mode, const T* line_buffer, 
+                              int user_color_mode, T* data)
+{
+  // (reading) from file to data
+  // will handle packing and alpha
+
+  int file_depth = imColorModeDepth(file_color_mode);
+  int data_depth = imColorModeDepth(user_color_mode);
+  int data_plane_size = width*height;  // This will be used in UNpacked data
+
+  if (imColorModeIsPacked(user_color_mode))
+    data += line*width*data_depth;
+  else
+    data += line*width;
+
+  for (int x = 0; x < width; x++)
+  {
+    int x_data_offset = x*data_depth;    // This will be used in packed data
+
+    if (imColorModeIsPacked(file_color_mode))
+    {
+      int x_file_offset = x*file_depth;  // This will be used in packed data
+
+      // file is packed
+      // NO color space conversion, color_space must match
+      // ignore alpha if necessary.
+      int depth = IM_MIN(file_depth, data_depth);      
+      for (int d = 0; d < depth; d++)
+      {
+        if (imColorModeIsPacked(user_color_mode))
+          data[x_data_offset + d] = line_buffer[x_file_offset + d];
+        else
+          data[d*data_plane_size + x] = line_buffer[x_file_offset + d];
+      }
+    }
+    else
+    {
+      // file NOT packed, copy just one plane
+      // NO color space conversion, color_space must match
+
+      if (plane >= imColorModeDepth(user_color_mode))
+        return;
+
+      if (imColorModeIsPacked(user_color_mode))
+        data[x_data_offset + plane] = line_buffer[x];
+      else
+        data[plane*data_plane_size + x] = line_buffer[x];
+    }
+  }
+}
+
+template <class T> 
+static inline void iConvertColor2RGB(T* data, int color_space, int data_type)
+{
+  T zero, max = (T)imColorMax(data_type);
+
+  // These are identical procedures to iDoConvert2RGB in "im_filebuffer.cpp".
+
+  switch (color_space)
+  {
+  case IM_XYZ: 
+    {
+      // to increase precision do intermediate conversions in float
+
+      // scale to 0-1
+      float c0 = imColorReconstruct(data[0], max);
+      float c1 = imColorReconstruct(data[1], max);
+      float c2 = imColorReconstruct(data[2], max);
+
+      // result is still 0-1
+      imColorXYZ2RGB(c0, c1, c2, 
+                     c0, c1, c2, 1.0f);
+
+      // do gamma correction then scale back to 0-max
+      data[0] = imColorQuantize(imColorTransfer2Nonlinear(c0), max);
+      data[1] = imColorQuantize(imColorTransfer2Nonlinear(c1), max);
+      data[2] = imColorQuantize(imColorTransfer2Nonlinear(c2), max);
+    }
+    break;
+  case IM_YCBCR: 
+    zero = (T)imColorZero(data_type);
+    imColorYCbCr2RGB(data[0], data[1], data[2], 
+                      data[0], data[1], data[2], zero, max);
+    break;
+  case IM_CMYK: 
+    imColorCMYK2RGB(data[0], data[1], data[2], data[3], 
+                    data[0], data[1], data[2], max);
+    break;
+  case IM_LUV:
+  case IM_LAB:
+    {
+      // to increase precision do intermediate conversions in float
+      // scale to 0-1 and -0.5/+0.5
+      float c0 = imColorReconstruct(data[0], max);
+      float c1 = imColorReconstruct(data[1], max) - 0.5f;
+      float c2 = imColorReconstruct(data[2], max) - 0.5f;
+
+      if (color_space == IM_LUV)
+        imColorLuv2XYZ(c0, c1, c2,  // conversion in-place
+                       c0, c1, c2);
+      else
+        imColorLab2XYZ(c0, c1, c2,  // conversion in-place
+                       c0, c1, c2);
+
+      imColorXYZ2RGB(c0, c1, c2,    // conversion in-place
+                     c0, c1, c2, 1.0f);
+
+      // do gamma correction then scale back to 0-max
+      data[0] = imColorQuantize(imColorTransfer2Nonlinear(c0), max);
+      data[1] = imColorQuantize(imColorTransfer2Nonlinear(c1), max);
+      data[2] = imColorQuantize(imColorTransfer2Nonlinear(c2), max);
+    }
+    break;
+  }
+}
+
+// These functions will be always converting RGB -> RGB  (0-max) -> (0-255)
+
+static inline imbyte iConvertType2Byte(const imbyte& data)
+  { return data; }
+
+static inline imbyte iConvertType2Byte(const imushort& data)
+  { return imColorQuantize(imColorReconstruct(data, (imushort)65535), (imbyte)255); }
+
+static inline imbyte iConvertType2Byte(const int& data)
+  { return imColorQuantize(imColorReconstruct(data, 16777215), (imbyte)255); }
+
+static inline imbyte iConvertType2Byte(const float& data)
+  { return imColorQuantize(data, (imbyte)255); }
+
+// Fake float to avoid erros in the color conversion template rotines.
+// Since the color conversion use the double value, they are invalid,
+// so the automatic conversion to bitmap for complex images works only for RGB.
+static inline imbyte iConvertType2Byte(const double& data)
+{ 
+  imcfloat* fdata = (imcfloat*)&data;
+  return imColorQuantize(cpxmag(*fdata), (imbyte)255); 
+}
+
+template <class T> 
+static void iDoFillDataBitmap(int width, int height, int line, int plane, int data_type,
+                              int file_color_mode, const T* line_buffer, 
+                              int user_color_mode, imbyte* data)
+{
+  // (reading) from file to data
+  // will handle packing, alpha, color space conversion to RGB and data_type to BYTE
+
+  int file_depth = imColorModeDepth(file_color_mode);
+  int data_depth = imColorModeDepth(user_color_mode);
+  int copy_alpha = imColorModeHasAlpha(file_color_mode) && imColorModeHasAlpha(user_color_mode);
+  int data_plane_size = width*height;  // This will be used in UNpacked data
+
+  if (imColorModeIsPacked(user_color_mode))
+    data += line*width*data_depth;
+  else
+    data += line*width;
+
+  for (int x = 0; x < width; x++)
+  {
+    int x_data_offset = x*data_depth;    // This will be used in packed data
+
+    if (imColorModeIsPacked(file_color_mode))
+    {
+      int x_file_offset = x*file_depth;  // This will be used in packed data
+
+      if (imColorModeMatch(file_color_mode, user_color_mode))
+      {
+        // file is packed
+        // same color space components   (in this case means RGB)
+        // ignore alpha if necessary.
+        int depth = IM_MIN(file_depth, data_depth);      
+        for (int d = 0; d < depth; d++)
+        {
+          if (imColorModeIsPacked(user_color_mode))
+            data[x_data_offset + d] = iConvertType2Byte(line_buffer[x_file_offset + d]);
+          else
+            data[d*data_plane_size + x] = iConvertType2Byte(line_buffer[x_file_offset + d]);
+        }
+      }
+      else
+      {
+        // file is packed
+        // but different color space components
+        // only to RGB conversions are accepted
+
+        if (imColorModeSpace(user_color_mode) != IM_RGB)
+          return;
+
+        T src_data[4];
+        src_data[0] = line_buffer[x_file_offset];
+        src_data[1] = line_buffer[x_file_offset + 1];
+        src_data[2] = line_buffer[x_file_offset + 2];
+        if (imColorModeSpace(file_color_mode) == IM_CMYK)
+          src_data[3] = line_buffer[x_file_offset + 3];
+
+        // Do conversion in place
+        iConvertColor2RGB(src_data, imColorModeSpace(file_color_mode), data_type);
+
+        if (imColorModeIsPacked(user_color_mode))
+        {
+          data[x_data_offset] = iConvertType2Byte(src_data[0]);
+          data[x_data_offset + 1] = iConvertType2Byte(src_data[1]);
+          data[x_data_offset + 2] = iConvertType2Byte(src_data[2]);
+
+          if (copy_alpha)
+          {
+            if (imColorModeSpace(file_color_mode) == IM_CMYK)
+              data[x_data_offset + 3] = iConvertType2Byte(line_buffer[x_file_offset + 4]);
+            else
+              data[x_data_offset + 3] = iConvertType2Byte(line_buffer[x_file_offset + 3]);
+          }
+        }
+        else
+        {
+          data[x] = iConvertType2Byte(src_data[0]);
+          data[data_plane_size + x] = iConvertType2Byte(src_data[1]);
+          data[2*data_plane_size + x] = iConvertType2Byte(src_data[2]);
+
+          if (copy_alpha)
+          {
+            if (imColorModeSpace(file_color_mode) == IM_CMYK)
+              data[3*data_plane_size + x] = iConvertType2Byte(line_buffer[x_file_offset + 4]);
+            else
+              data[3*data_plane_size + x] = iConvertType2Byte(line_buffer[x_file_offset + 3]);
+          }
+        }
+      }
+    }
+    else
+    {
+      // file NOT packed, copy just one plane
+      // NO color space conversion possible now
+
+      if (plane >= imColorModeDepth(user_color_mode))
+        return;
+
+      if (imColorModeIsPacked(user_color_mode))
+        data[x_data_offset + plane] = iConvertType2Byte(line_buffer[x]);
+      else
+        data[plane*data_plane_size + x] = iConvertType2Byte(line_buffer[x]);
+    }
+  }
+}
+
+static void iFileExpandBits(imFile* ifile)
+{
+  // conversion will be done in place in backward order (from end to start)
+
+  if (abs(ifile->convert_bpp) < 8)
+  {
+    imbyte* byte_buffer = (imbyte*)ifile->line_buffer;
+    imbyte* bit_buffer = (imbyte*)ifile->line_buffer;
+
+    byte_buffer += ifile->width-1; 
+    int bpp = ifile->convert_bpp;
+    int expand_range = imColorModeSpace(ifile->file_color_mode) == IM_GRAY? 1: 0;
+
+    for (int i=ifile->width-1; i >= 0; i--)
+    {
+      if (bpp == 1)
+        *byte_buffer = (imbyte)((bit_buffer[i / 8] >> (7 - i % 8)) & 0x01);
+      else if (bpp == 4)
+        *byte_buffer = (imbyte)((bit_buffer[i / 2] >> ((1 - i % 2) * 4)) & 0x0F);
+      else if (bpp == 2)
+        *byte_buffer = (imbyte)((bit_buffer[i / 4] >> ((3 - i % 4) * 2)) & 0x03);
+
+      if (expand_range)   /* if convert_bpp<0 then only expand its range */
+      {
+        if (bpp == 4 || bpp == -4)
+          *byte_buffer *= 17;
+        else if (bpp == 2 || bpp == -2)
+          *byte_buffer *= 85;
+      }
+
+      byte_buffer--;
+    }
+  }
+  else if (ifile->convert_bpp == 12)
+  {
+    imushort* ushort_buffer = (imushort*)ifile->line_buffer;
+    imbyte* bit_buffer = (imbyte*)ifile->line_buffer;
+
+    for (int i=ifile->width-1; i >= 0; i--)
+    {
+      int byte_index = (3*i)/2;
+      if (i%2)
+        ushort_buffer[i] = (bit_buffer[byte_index] << 4) | (bit_buffer[byte_index+1] & 0x0F);
+      else
+        ushort_buffer[i] = ((bit_buffer[byte_index] & 0x0F) << 8) | (bit_buffer[byte_index+1]);
+    }
+  }
+}
+
+static void iFileCompactBits(imFile* ifile)
+{
+  // conversion will be done in place
+  imbyte* byte_buffer = (imbyte*)ifile->line_buffer;
+  imbyte* bit_buffer = (imbyte*)ifile->line_buffer;
+
+  if (ifile->convert_bpp == 1)
+  {
+    for (int i = 0; i < ifile->width; i++)
+    {
+      if (*byte_buffer)
+        bit_buffer[i / 8] |=  (0x01 << (7 - (i % 8)));
+      else
+        bit_buffer[i / 8] &= ~(0x01 << (7 - (i % 8)));
+
+      byte_buffer++;
+    }
+  }
+  else  // -1 == expand 1 to 255
+  {
+    for (int i = 0; i < ifile->width; i++)
+    {
+      if (*byte_buffer)
+        *byte_buffer = 255;
+
+      byte_buffer++;
+    }
+  }
+}
+
+template <class SRC, class DST> 
+static void iDoSwitchInt(int count, const SRC* src_data, DST* dst_data, int offset)
+{
+  for (int i = 0; i < count; i++)
+  {
+    *dst_data++ = (DST)((int)*src_data++ + offset);
+  }
+}
+
+template <class SRC, class DST> 
+static void iDoSwitchReal(int count, const SRC* src_data, DST* dst_data)
+{
+  for (int i = 0; i < count; i++)
+  {
+    *dst_data++ = (DST)(*src_data++);
+  }
+}
+
+static void iFileSwitchFromType(imFile* ifile)
+{
+  int line_count = imImageLineCount(ifile->width, ifile->file_color_mode);
+  switch(ifile->file_data_type)
+  {
+  case IM_BYTE:    // Source is char
+    iDoSwitchInt(line_count, (const char*)ifile->line_buffer, (imbyte*)ifile->line_buffer, 128);
+    break;
+  case IM_USHORT:  // Source is short
+    iDoSwitchInt(line_count, (const short*)ifile->line_buffer, (imushort*)ifile->line_buffer, 32768);
+    break;
+  case IM_INT:     // Source is uint
+    iDoSwitchInt(line_count, (const unsigned int*)ifile->line_buffer, (int*)ifile->line_buffer, -8388608);
+    break;
+  case IM_FLOAT:   // Source is double
+    iDoSwitchReal(line_count, (const double*)ifile->line_buffer, (float*)ifile->line_buffer);
+    break;
+  case IM_CFLOAT:  // Source is complex double
+    iDoSwitchReal(2*line_count, (const double*)ifile->line_buffer, (float*)ifile->line_buffer);
+    break;
+  }
+}
+
+static void iFileSwitchToType(imFile* ifile)
+{
+  int line_count = imImageLineCount(ifile->width, ifile->file_color_mode);
+  switch(ifile->file_data_type)
+  {
+  case IM_BYTE:    // Destiny is char
+    iDoSwitchInt(line_count, (const imbyte*)ifile->line_buffer, (char*)ifile->line_buffer, -128);
+    break;
+  case IM_USHORT:  // Destiny is short
+    iDoSwitchInt(line_count, (const imushort*)ifile->line_buffer, (short*)ifile->line_buffer, -32768);
+    break;
+  case IM_INT:     // Destiny is uint
+    iDoSwitchInt(line_count, (const int*)ifile->line_buffer, (unsigned int*)ifile->line_buffer, 8388608);
+    break;
+  case IM_FLOAT:   // Destiny is double
+    iDoSwitchReal(line_count, (const float*)ifile->line_buffer, (double*)ifile->line_buffer);
+    break;
+  case IM_CFLOAT:  // Destiny is complex double
+    iDoSwitchReal(2*line_count, (const float*)ifile->line_buffer, (double*)ifile->line_buffer);
+    break;
+  }
+}
+
+void imFileLineBufferWrite(imFile* ifile, const void* data, int line, int plane)
+{
+  // (writing) from data to file
+
+  if (imColorModeIsTopDown(ifile->file_color_mode) != imColorModeIsTopDown(ifile->user_color_mode))
+    line = ifile->height-1 - line;
+
+  if ((ifile->file_color_mode & 0x3FF) == 
+      (ifile->user_color_mode & 0x3FF)) // compare only packing, alpha and color space
+  {
+    int data_offset = line*ifile->line_buffer_size;
+    if (plane != 0)
+      data_offset += plane*ifile->height*ifile->line_buffer_size;
+
+    memcpy(ifile->line_buffer, (unsigned char*)data + data_offset, ifile->line_buffer_size);
+  }
+  else
+  {
+    switch(ifile->file_data_type)
+    {
+    case IM_BYTE:
+      iDoFillLineBuffer(ifile->width, ifile->height, line, plane, 
+                        ifile->file_color_mode, (imbyte*)ifile->line_buffer, 
+                        ifile->user_color_mode, (const imbyte*)data);
+      break;
+    case IM_USHORT:
+      iDoFillLineBuffer(ifile->width, ifile->height, line, plane,  
+                        ifile->file_color_mode, (imushort*)ifile->line_buffer, 
+                        ifile->user_color_mode, (const imushort*)data);
+      break;
+    case IM_INT:
+      iDoFillLineBuffer(ifile->width, ifile->height, line, plane,  
+                        ifile->file_color_mode, (int*)ifile->line_buffer, 
+                        ifile->user_color_mode, (const int*)data);
+      break;
+    case IM_FLOAT:
+      iDoFillLineBuffer(ifile->width, ifile->height, line, plane,  
+                        ifile->file_color_mode, (float*)ifile->line_buffer, 
+                        ifile->user_color_mode, (const float*)data);
+      break;
+    case IM_CFLOAT:
+      iDoFillLineBuffer(ifile->width, ifile->height, line, plane,  
+                        ifile->file_color_mode, (imcfloat*)ifile->line_buffer, 
+                        ifile->user_color_mode, (const imcfloat*)data);
+      break;
+    }
+  }
+
+  if (ifile->convert_bpp)
+    iFileCompactBits(ifile);
+
+  if (ifile->switch_type)
+    iFileSwitchToType(ifile);
+}
+
+void imFileLineBufferRead(imFile* ifile, void* data, int line, int plane)
+{
+  // (reading) from file to data
+
+  if (imColorModeIsTopDown(ifile->file_color_mode) != imColorModeIsTopDown(ifile->user_color_mode))
+    line = ifile->height-1 - line;
+
+  if (ifile->convert_bpp)
+    iFileExpandBits(ifile);
+
+  if (ifile->switch_type)
+    iFileSwitchFromType(ifile);
+
+  if ((ifile->file_color_mode & 0x3FF) == (ifile->user_color_mode & 0x3FF) && // compare only packing, alpha and color space, ignore bottom up.
+      ifile->file_data_type == ifile->user_data_type) // compare data type when reading
+  {
+    int data_offset = line*ifile->line_buffer_size;
+    if (plane != 0)
+      data_offset += plane*ifile->height*ifile->line_buffer_size;
+
+    memcpy((unsigned char*)data + data_offset, ifile->line_buffer, ifile->line_buffer_size);
+  }
+  else
+  {
+    // now we have 2 conversions groups
+    // one to convert only packing and alpha
+    // and the other to convert packing, alpha, color space and data type
+    int convert2bitmap = 0;
+    if (imColorModeSpace(ifile->user_color_mode) != imColorModeSpace(ifile->file_color_mode) ||
+        ifile->file_data_type != IM_BYTE)
+      convert2bitmap = 1;
+
+    switch(ifile->file_data_type)
+    {
+    case IM_BYTE:
+      if (convert2bitmap)
+        iDoFillDataBitmap(ifile->width, ifile->height, line, plane, ifile->file_data_type,
+                          ifile->file_color_mode, (const imbyte*)ifile->line_buffer, 
+                          ifile->user_color_mode, (imbyte*)data);
+      else
+        iDoFillData(ifile->width, ifile->height, line, plane, 
+                    ifile->file_color_mode, (const imbyte*)ifile->line_buffer, 
+                    ifile->user_color_mode, (imbyte*)data);
+      break;
+    case IM_USHORT:
+      if (convert2bitmap)
+        iDoFillDataBitmap(ifile->width, ifile->height, line, plane, ifile->file_data_type,
+                          ifile->file_color_mode, (const imushort*)ifile->line_buffer, 
+                          ifile->user_color_mode, (imbyte*)data);
+      else
+        iDoFillData(ifile->width, ifile->height, line, plane,  
+                    ifile->file_color_mode, (const imushort*)ifile->line_buffer, 
+                    ifile->user_color_mode, (imushort*)data);
+      break;
+    case IM_INT:
+      if (convert2bitmap)
+        iDoFillDataBitmap(ifile->width, ifile->height, line, plane, ifile->file_data_type,
+                          ifile->file_color_mode, (const int*)ifile->line_buffer, 
+                          ifile->user_color_mode, (imbyte*)data);
+      else
+        iDoFillData(ifile->width, ifile->height, line, plane,  
+                    ifile->file_color_mode, (const int*)ifile->line_buffer, 
+                    ifile->user_color_mode, (int*)data);
+      break;
+    case IM_FLOAT:
+      if (convert2bitmap)
+        iDoFillDataBitmap(ifile->width, ifile->height, line, plane, ifile->file_data_type,
+                          ifile->file_color_mode, (const float*)ifile->line_buffer, 
+                          ifile->user_color_mode, (imbyte*)data);
+      else
+        iDoFillData(ifile->width, ifile->height, line, plane,  
+                    ifile->file_color_mode, (const float*)ifile->line_buffer, 
+                    ifile->user_color_mode, (float*)data);
+      break;
+    case IM_CFLOAT:
+      if (convert2bitmap)
+        iDoFillDataBitmap(ifile->width, ifile->height, line, plane, ifile->file_data_type,
+                          ifile->file_color_mode, (const double*)ifile->line_buffer, 
+                          ifile->user_color_mode, (imbyte*)data);
+      else
+        iDoFillData(ifile->width, ifile->height, line, plane,  
+                    ifile->file_color_mode, (const imcfloat*)ifile->line_buffer, 
+                    ifile->user_color_mode, (imcfloat*)data);
+      break;
+    }
+  }
+}
+           
+void imFileLineBufferInit(imFile* ifile)
+{
+  ifile->line_buffer_size = imImageLineSize(ifile->width, ifile->file_color_mode, ifile->file_data_type);
+
+  if (ifile->switch_type && (ifile->file_data_type == IM_FLOAT || ifile->file_data_type == IM_CFLOAT))
+    ifile->line_buffer_extra += ifile->line_buffer_size; // double the size at least
+
+  if (ifile->line_buffer_size + ifile->line_buffer_extra > ifile->line_buffer_alloc)
+  {
+    ifile->line_buffer_alloc = ifile->line_buffer_size + ifile->line_buffer_extra;
+    ifile->line_buffer = realloc(ifile->line_buffer, ifile->line_buffer_alloc);
+  }
+}
+
+int imFileLineBufferCount(imFile* ifile)
+{
+  int count = ifile->height;
+  if (!imColorModeIsPacked(ifile->file_color_mode))
+  {
+    if (imColorModeHasAlpha(ifile->file_color_mode) && imColorModeHasAlpha(ifile->user_color_mode))
+      count *= imColorModeDepth(ifile->file_color_mode);
+    else
+      count *= imColorModeDepth(imColorModeSpace(ifile->file_color_mode));
+  }
+  return count;
+}
+
+void imFileLineBufferInc(imFile* ifile, int *row, int *plane)
+{
+  if (!imColorModeIsPacked(ifile->file_color_mode))
+  {
+    if (*row == ifile->height-1)
+    {
+      *row = 0;
+      (*plane)++;
+      return;
+    }
+  }
+
+  (*row)++;
+}
+
+int imFileCheckConversion(imFile* ifile)
+{
+  if ((ifile->file_color_mode & 0x3FF) == (ifile->user_color_mode & 0x3FF) && // compare only packing, alpha and color space
+      ifile->file_data_type == ifile->user_data_type)
+    return 1;
+
+  int user_color_space = imColorModeSpace(ifile->user_color_mode);
+  int file_color_space = imColorModeSpace(ifile->file_color_mode);
+
+  // NO color space conversion if file is not packed.
+  if(user_color_space != file_color_space &&
+     imColorModeDepth(file_color_space) > 1 &&
+     !imColorModeIsPacked(ifile->file_color_mode))
+    return 0;
+
+  if (ifile->is_new)
+  {
+    // (writing) from data to file
+
+    // NO data type conversions when writing.
+    if (ifile->file_data_type != ifile->user_data_type)
+      return 0;
+
+    // NO color space conversions when writing. 
+    // If there is a necessary conversion the format driver will do it.
+    if (user_color_space != file_color_space)
+      return 0;
+   }
+  else
+  {
+    // (reading) from file to data
+
+    // Data type conversions only to byte
+    if (ifile->file_data_type != ifile->user_data_type &&
+        ifile->user_data_type != IM_BYTE)
+      return 0;
+  }
+
+  return 1;
+}
diff --git a/src/im_fileraw.cpp b/src/im_fileraw.cpp
new file mode 100644
index 0000000..da7ef81
--- /dev/null
+++ b/src/im_fileraw.cpp
@@ -0,0 +1,64 @@
+/** \file
+ * \brief RAW File Format Open/New Functions
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_fileraw.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_image.h"
+#include "im_util.h"
+#include "im_counter.h"
+#include "im_raw.h"
+#include "im_format.h"
+#include "im_format_raw.h"
+
+#include <stdlib.h>
+#include <assert.h>
+
+
+imFile* imFileOpenRaw(const char* file_name, int *error)
+{
+  assert(file_name);
+
+  imFormat* iformat = imFormatInitRAW();
+  *error = iformat->Open(file_name);
+  if (*error)
+  {
+    delete iformat;
+    return NULL;
+  }
+
+  imFileClear(iformat);
+
+  iformat->attrib_table = new imAttribTable(599);
+
+  iformat->counter = imCounterBegin(file_name);
+
+  return iformat;
+}
+
+imFile* imFileNewRaw(const char* file_name, int *error)
+{
+  assert(file_name);
+
+  imFormat* iformat = imFormatInitRAW();
+  *error = iformat->New(file_name);
+  if (*error) 
+  {
+    delete iformat;
+    return NULL;
+  }
+   
+  imFileClear(iformat);
+
+  iformat->is_new = 1;
+  iformat->image_count = 0;
+  iformat->compression[0] = 0;
+
+  iformat->attrib_table = new imAttribTable(101);
+
+  iformat->counter = imCounterBegin(file_name);
+
+  return iformat;
+}
diff --git a/src/im_format.cpp b/src/im_format.cpp
new file mode 100644
index 0000000..be9892c
--- /dev/null
+++ b/src/im_format.cpp
@@ -0,0 +1,289 @@
+/** \file
+ * \brief File Format Access
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <memory.h>
+#include <string.h>
+#include <assert.h>
+
+#include "im.h"
+#include "im_format.h"
+#include "im_util.h"
+
+
+static imFormat* iFormatList[50];
+static int iFormatCount = 0;
+static int iFormatRegistredAll = 0;
+
+void imFormatRemoveAll(void)
+{
+  for (int i = 0; i < iFormatCount; i++)
+  {
+    imFormat* iformat = iFormatList[i];
+    delete iformat;
+    iFormatList[i] = NULL;
+  }
+  iFormatCount = 0;
+  iFormatRegistredAll = 0;
+}
+
+void imFormatRegister(imFormat* iformat)
+{
+  iFormatList[iFormatCount] = iformat;
+  iFormatCount++;
+}
+
+static imFormat* iFormatFind(const char* format)
+{
+  assert(format);
+
+  if (!iFormatRegistredAll) 
+  {
+    imFormatRegisterInternal();
+    iFormatRegistredAll = 1;
+  }
+
+  for (int i = 0; i < iFormatCount; i++)
+  {
+    imFormat* iformat = iFormatList[i];
+    if (imStrEqual(format, iformat->format))
+      return iformat;
+  }
+  return NULL;
+}
+
+void imFormatList(char** format_list, int *format_count)
+{
+  assert(format_list);
+  assert(format_count);
+
+  if (!iFormatRegistredAll) 
+  {
+    imFormatRegisterInternal();
+    iFormatRegistredAll = 1;
+  }
+
+  static char format_list_buffer[50][50];
+
+  *format_count = iFormatCount;
+  for (int i = 0; i < iFormatCount; i++)
+  {
+    imFormat* iformat = iFormatList[i];
+    strcpy(format_list_buffer[i], iformat->format);
+    format_list[i] = format_list_buffer[i];
+  }
+}
+
+int imFormatInfo(const char* format, char* desc, char* ext, int *can_sequence)
+{
+  imFormat* iformat = iFormatFind(format);
+  if (!iformat) return IM_ERR_FORMAT;
+
+  if (desc) strcpy(desc, iformat->desc);
+  if (ext) strcpy(ext, iformat->ext);
+  if (can_sequence) *can_sequence = iformat->can_sequence;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatCompressions(const char* format, char** comp, int *comp_count, int color_mode, int data_type)
+{
+  imFormat* iformat = iFormatFind(format);
+  if (!iformat) return IM_ERR_FORMAT;
+
+  int count = 0;
+
+  static char comp_buffer[50][50];
+
+  for (int i = 0; i < iformat->comp_count; i++)
+  {
+    if (color_mode == -1 || data_type == -1 || 
+        iformat->CanWrite(iformat->comp[i], color_mode, data_type) == IM_ERR_NONE)
+    {
+      strcpy(comp_buffer[count], iformat->comp[i]);
+      comp[count] = comp_buffer[count];
+      count++;
+    }
+  }
+
+  *comp_count = count;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatCanWriteImage(const char* format, const char* compression, int color_mode, int data_type)
+{
+  assert(format);
+
+  imFormat* iformat = iFormatFind(format);
+  if (!iformat) return IM_ERR_FORMAT;
+
+  int error = iformat->CanWrite(compression, color_mode, data_type);
+  return error;
+}
+
+static char* utlFileGetExt(const char *file_name)
+{
+  int len = strlen(file_name);
+
+  // Starts at the last character
+  int offset = len - 1;
+  while (offset != 0)
+  {
+    // if found a path separator, no extension found
+    if (file_name[offset] == '\\' || file_name[offset] == '/')
+      return NULL;
+
+    if (file_name[offset] == '.')
+    {
+      offset++;
+      break;
+    }
+
+    offset--;
+  }
+
+  // if at the first character, no extension found
+  if (offset == 0) 
+    return NULL;
+
+  int ext_size = len - offset + 1;
+  char* file_ext = (char*)malloc(ext_size);
+
+  for (int i = 0; i < ext_size-1; i++)
+    file_ext[i] = (char)tolower(file_name[i+offset]);
+  file_ext[ext_size-1] = 0;
+
+  return file_ext;
+}
+
+imFormat* imFormatOpen(const char* file_name, int *error)
+{
+  int i;
+
+  assert(file_name);
+  assert(error);
+
+  if (!iFormatRegistredAll) 
+  {
+    imFormatRegisterInternal();
+    iFormatRegistredAll = 1;
+  }
+
+  int* ext_mark = new int [iFormatCount];
+  memset(ext_mark, 0, sizeof(int)*iFormatCount);
+
+  // Search for the extension first, this usually is going to speed the search
+  char* extension = utlFileGetExt(file_name);
+  if (extension)
+  {
+    for(i = 0; i < iFormatCount; i++)
+    {
+      imFormat* iformat = iFormatList[i];
+
+      if (strstr(iformat->ext, extension) != NULL)
+      {
+        ext_mark[i] = 1; // Mark this format to avoid testing it again in the next phase
+
+        *error = iformat->Open(file_name);                                               
+        if (*error != IM_ERR_NONE && *error != IM_ERR_FORMAT)  // Error situation that must abort
+        {                                                      // Only IM_ERR_FORMAT is a valid error here
+          free(extension);
+          delete [] ext_mark;
+          return NULL;
+        }
+        else if (*error == IM_ERR_NONE) // Sucessfully oppened the file
+        {
+          free(extension);
+          delete [] ext_mark;
+          return iformat;
+        }
+      }
+    }
+
+    free(extension);
+  }
+
+  // If the search did not work, try all the formats
+  // except those already tested.
+
+  for(i = 0; i < iFormatCount; i++)
+  {
+    if (!ext_mark[i])
+    {
+      imFormat* iformat = iFormatList[i];
+
+      *error = iformat->Open(file_name);
+      if (*error != IM_ERR_NONE && *error != IM_ERR_FORMAT)  // Error situation that must abort
+      {                                                      // Only IM_ERR_FORMAT is a valid error here
+        delete [] ext_mark;
+        return NULL;
+      }
+      else if (*error == IM_ERR_NONE) // Sucessfully oppened the file
+      {
+        delete [] ext_mark;
+        return iformat;
+      }
+    }
+  }
+
+  *error = IM_ERR_FORMAT;
+  delete [] ext_mark;
+  return NULL;
+}
+
+imFormat* imFormatOpenAs(const char* file_name, const char* format, int *error)
+{
+  assert(file_name);
+  assert(format);
+  assert(error);
+
+  if (!iFormatRegistredAll) 
+  {
+    imFormatRegisterInternal();
+    iFormatRegistredAll = 1;
+  }
+
+  imFormat* iformat = iFormatFind(format);
+  if (!format)
+  {
+    *error = IM_ERR_FORMAT;
+    return NULL;
+  }
+
+  *error = iformat->Open(file_name);
+  if (*error != IM_ERR_NONE && *error != IM_ERR_FORMAT)  // Error situation that must abort
+    return NULL;
+  else if (*error == IM_ERR_NONE) // Sucessfully oppened the file
+    return iformat;
+
+  *error = IM_ERR_FORMAT;
+  return NULL;
+}
+
+imFormat* imFormatNew(const char* file_name, const char* format, int *error)
+{
+  assert(file_name);
+  assert(format);
+  assert(error);
+
+  imFormat* iformat = iFormatFind(format);
+  if (!iformat)
+  {
+    *error = IM_ERR_FORMAT;
+    return NULL;
+  }
+
+  *error = iformat->New(file_name);
+  if (*error)
+    return NULL;
+
+  return iformat;
+}
+
diff --git a/src/im_format_all.cpp b/src/im_format_all.cpp
new file mode 100644
index 0000000..0d66bd2
--- /dev/null
+++ b/src/im_format_all.cpp
@@ -0,0 +1,37 @@
+/** \file
+ * \brief Register all the internal File Format Classes
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_all.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+#include <assert.h>
+
+#include "im.h"
+#include "im_format.h"
+#include "im_format_all.h"
+
+void imFormatRegisterInternal(void)
+{
+  // IMPORTANT: RAW format is not registered.
+
+  // The internal formats registration
+  imFormatRegisterTIFF();
+  imFormatRegisterJPEG();
+  imFormatRegisterPNG();
+  imFormatRegisterGIF();
+  imFormatRegisterBMP();
+  imFormatRegisterRAS();
+  imFormatRegisterICO();
+  imFormatRegisterPNM();
+  imFormatRegisterKRN();
+  imFormatRegisterLED();
+  imFormatRegisterSGI();
+  imFormatRegisterPCX();
+  imFormatRegisterTGA();
+}
+
diff --git a/src/im_format_avi.cpp b/src/im_format_avi.cpp
new file mode 100644
index 0000000..3de98d4
--- /dev/null
+++ b/src/im_format_avi.cpp
@@ -0,0 +1,668 @@
+/** \file
+ * \brief AVI - Windows Audio-Video Interleaved RIFF
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_avi.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_avi.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <windows.h>
+#include <vfw.h> 
+
+#include "im_dib.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+
+static const char* iAVICompTable[15] = 
+{
+  "NONE",
+  "RLE",      // Microsoft RLE
+  "CINEPACK", // Cinepak Codec by Radius
+  "MSVC",     // Microsoft Video 1
+  "M261",     // Microsoft H.261 Video Codec
+  "M263",     // Microsoft H.263 Video Codec
+  "I420",     // Intel 4:2:0 Video Codec (same as M263)
+  "IV32",     // Intel Indeo Video Codec 3.2
+  "IV41",     // Intel Indeo Video Codec 4.5
+  "IV50",     // Intel Indeo Video 5.1
+  "IYUV",     // Intel IYUV Codec
+  "MPG4",     // Microsoft MPEG-4 Video Codec V1
+  "MP42",     // Microsoft MPEG-4 Video Codec V2
+  "DIVX",     // DivX 5.0.4 Codec (must be installed)
+  "CUSTOM"    // (show compression dialog)
+};
+
+class imFormatAVI: public imFormat
+{
+  PAVIFILE file;
+  PAVISTREAM stream;
+
+  imDib* dib;
+  float fps;
+  unsigned int rmask, gmask, bmask, 
+                roff, goff, boff; /* pixel bit mask control when reading 16 and 32 bpp images */
+
+  PGETFRAME frame;    // used when reading
+  int current_frame;
+
+  COMPVARS compvars;  // used when writing
+  int use_compressor;
+
+  void ReadPalette(unsigned char* bmp_colors);
+  void WritePalette(unsigned char* bmp_colors);
+  void FixRGB(int bpp);
+  void InitMasks(imDib* dib);
+
+public:
+  imFormatAVI()
+    :imFormat("AVI", 
+              "Windows Audio-Video Interleaved RIFF", 
+              "*.avi;", 
+              iAVICompTable, 
+              15, 
+              1)
+    {}
+  ~imFormatAVI() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterAVI(void)
+{
+  imFormatRegister(new imFormatAVI());
+}
+
+int imFormatAVI::Open(const char* file_name)
+{
+  /* initializes avi file library */
+  AVIFileInit();
+
+  /* open existing file */
+  HRESULT hr = AVIFileOpen(&file, file_name, OF_READ, NULL);
+  if (hr != 0)
+  {
+    AVIFileExit();
+
+    if (hr == AVIERR_FILEOPEN)
+      return IM_ERR_OPEN;
+    else if (hr == AVIERR_BADFORMAT || hr == REGDB_E_CLASSNOTREG)
+      return IM_ERR_FORMAT;
+    else
+      return IM_ERR_ACCESS;
+  }
+
+  /* get the video stream */
+  hr = AVIFileGetStream(file, &stream, streamtypeVIDEO, 0);
+  if (hr != 0)
+  {
+    AVIFileRelease(this->file);
+    AVIFileExit();
+
+    if (hr == AVIERR_NODATA)
+      return IM_ERR_DATA;
+    else
+      return IM_ERR_ACCESS;
+  }
+
+  /* get stream info */
+  AVISTREAMINFO streaminfo;
+  AVIStreamInfo(stream, &streaminfo, sizeof(AVISTREAMINFO));
+
+  this->image_count = streaminfo.dwLength;
+  this->fps = (float)streaminfo.dwRate / (float)streaminfo.dwScale;
+
+  if (streaminfo.fccHandler == mmioFOURCC('D','I','B',' '))
+    strcpy(this->compression, "NONE");
+  else if (streaminfo.fccHandler == mmioFOURCC('M','R','L','E'))
+    strcpy(this->compression, "RLE");
+  else if (streaminfo.fccHandler == mmioFOURCC('c','v','i','d'))
+    strcpy(this->compression, "CINEPACK");    
+  else
+  {
+    DWORD handler = streaminfo.fccHandler;
+    this->compression[0] = (char)handler;
+    this->compression[1] = (char)(handler >> 8);
+    this->compression[2] = (char)(handler >> 16);
+    this->compression[3] = (char)(handler >> 24);
+    this->compression[4] = 0;
+  }
+
+  this->frame = 0;
+  this->use_compressor = 0;
+  this->dib = 0;
+  this->current_frame = 0;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatAVI::New(const char* file_name)
+{
+  /* initializes avi file library */
+  AVIFileInit();
+
+  /* creates a new file */
+  HRESULT hr = AVIFileOpen(&file, file_name, OF_WRITE | OF_CREATE, NULL);
+  if (hr != 0)
+  {
+    AVIFileExit();
+
+    if (hr == AVIERR_FILEOPEN)
+      return IM_ERR_OPEN;
+    else if (hr == AVIERR_BADFORMAT || hr == REGDB_E_CLASSNOTREG)
+      return IM_ERR_FORMAT;
+    else
+      return IM_ERR_ACCESS;
+  }
+
+  this->frame = 0;
+  this->stream = 0;
+  this->use_compressor = 0;
+  this->dib = 0;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatAVI::Close()
+{
+  if (this->dib) imDibDestroy(this->dib);
+
+  if (this->use_compressor) 
+  {
+    ICSeqCompressFrameEnd(&this->compvars);
+    ICCompressorFree(&this->compvars);
+  }
+
+  if (this->frame) AVIStreamGetFrameClose(this->frame);
+  if (this->stream) AVIStreamRelease(this->stream);
+
+  AVIFileRelease(this->file);
+  AVIFileExit();
+}
+
+void* imFormatAVI::Handle(int index)
+{
+  if (index == 1)
+    return (void*)this->file;
+  else if (index == 2)
+    return (void*)this->stream;
+  else
+    return NULL;
+}
+
+int imFormatAVI::ReadImageInfo(int index)
+{
+  this->current_frame = index;
+
+  if (this->frame)       // frame reading already prepared
+    return IM_ERR_NONE;
+
+  /* get stream format */
+  LONG formsize;
+  AVIStreamReadFormat(stream, 0, NULL, &formsize);
+  BITMAPINFO *bmpinfo = (BITMAPINFO*)malloc(formsize);
+  HRESULT hr = AVIStreamReadFormat(stream, 0, bmpinfo, &formsize);
+  if (hr != 0)
+  {
+    free(bmpinfo);
+    return IM_ERR_ACCESS;
+  }
+
+  int top_down = 0;
+  if (bmpinfo->bmiHeader.biHeight < 0)
+    top_down = 1;
+
+  this->width = bmpinfo->bmiHeader.biWidth;
+  this->height = top_down? -bmpinfo->bmiHeader.biHeight: bmpinfo->bmiHeader.biHeight;
+
+  int bpp = bmpinfo->bmiHeader.biBitCount;
+
+  imAttribTable* attrib_table = AttribTable();
+  attrib_table->Set("FPS", IM_FLOAT, 1, &fps);
+
+  this->file_data_type = IM_BYTE;
+
+  if (bpp > 8)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+  }
+  else
+  {
+    this->palette_count = 1 << bpp;
+    this->file_color_mode = IM_MAP;
+  }
+
+  if (bpp < 8)
+    this->convert_bpp = bpp;
+
+  if (bpp == 32)
+    this->file_color_mode |= IM_ALPHA;
+
+  if (top_down)
+    this->file_color_mode |= IM_TOPDOWN;
+
+  if (bpp <= 8)
+  {
+    /* updates the palette_count based on the number of colors used */
+    if (bmpinfo->bmiHeader.biClrUsed != 0 && 
+        (int)bmpinfo->bmiHeader.biClrUsed < this->palette_count)
+      this->palette_count = bmpinfo->bmiHeader.biClrUsed;
+
+    ReadPalette((unsigned char*)bmpinfo->bmiColors);
+  }
+
+  free(bmpinfo);
+
+  this->line_buffer_extra = 4; // room enough for padding
+
+  /* prepares to read data from the stream */
+  frame = AVIStreamGetFrameOpen(stream, NULL);
+  if (!frame)
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatAVI::WriteImageInfo()
+{
+  if (dib)
+  {
+    if (dib->bmih->biWidth != width || dib->bmih->biHeight != height ||
+        imColorModeSpace(file_color_mode) != imColorModeSpace(user_color_mode))
+      return IM_ERR_DATA;
+
+    return IM_ERR_NONE;  // parameters can be set only once
+  }
+
+  // force bottom up orientation
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  int bpp;
+  if (this->file_color_mode == IM_RGB)
+  {
+    this->file_color_mode |= IM_PACKED;
+    bpp = 24;
+
+    if (imColorModeHasAlpha(this->user_color_mode))
+    {
+      this->file_color_mode |= IM_ALPHA;
+      bpp = 32;
+
+      this->rmask = 0x00FF0000;
+      this->roff = 16;
+
+      this->gmask = 0x0000FF00;
+      this->goff = 8;
+
+      this->bmask = 0x000000FF;
+      this->boff = 0;
+    }
+  }
+  else
+    bpp = 8;
+
+  this->line_buffer_extra = 4; // room enough for padding
+
+  imAttribTable* attrib_table = AttribTable();
+
+  const void* attrib_data = attrib_table->Get("FPS");
+  if (attrib_data)
+    fps = *(float*)attrib_data;
+  else
+    fps = 15;
+
+  if (this->compression[0] == 0 || imStrEqual(this->compression, "NONE"))
+    this->use_compressor = 0;
+  else
+    this->use_compressor = 1;
+
+  dib = imDibCreate(width, height, bpp);
+
+  if (use_compressor)
+  {
+    memset(&compvars, 0, sizeof(COMPVARS));
+    compvars.cbSize = sizeof(COMPVARS);
+
+    if (imStrEqual(this->compression, "CUSTOM"))
+    {
+      if (ICCompressorChoose(NULL, ICMF_CHOOSE_DATARATE | ICMF_CHOOSE_KEYFRAME, dib->dib, NULL, &compvars, "Choose Compression") == FALSE)
+        return IM_ERR_COMPRESS;
+    }
+    else
+    {
+      compvars.dwFlags = ICMF_COMPVARS_VALID;
+      compvars.fccType = ICTYPE_VIDEO;
+
+      int* attrib = (int*)attrib_table->Get("KeyFrameRate");
+      if (attrib)
+        compvars.lKey = *attrib;
+      else
+        compvars.lKey = 15;        // same defaults of the dialog
+
+      attrib = (int*)attrib_table->Get("DataRate");
+      if (attrib)
+        compvars.lDataRate = *attrib / 8;
+      else
+        compvars.lDataRate = 300;  // same defaults of the dialog
+
+      attrib = (int*)attrib_table->Get("AVIQuality");
+      if (attrib)
+        compvars.lQ = *attrib;
+      else
+        compvars.lQ = (DWORD)ICQUALITY_DEFAULT;
+
+      if (imStrEqual(this->compression, "RLE"))
+        compvars.fccHandler = mmioFOURCC('M','R','L','E');
+      else if (imStrEqual(this->compression, "CINEPACK"))
+        compvars.fccHandler = mmioFOURCC('c','v','i','d');    
+      else
+        compvars.fccHandler = mmioFOURCC(compression[0],compression[1],compression[2],compression[3]);
+
+      compvars.hic = ICOpen(ICTYPE_VIDEO, compvars.fccHandler, ICMODE_COMPRESS);
+    }
+
+    if (compvars.hic == NULL)
+      use_compressor = 0;
+  }
+
+  AVISTREAMINFO streaminfo;
+  memset(&streaminfo, 0, sizeof(AVISTREAMINFO));
+  streaminfo.fccType = streamtypeVIDEO;
+  streaminfo.dwScale = 1000;
+  streaminfo.dwRate  = (DWORD)(fps*1000);
+  SetRect(&streaminfo.rcFrame, 0, 0, width, height);
+
+  if (use_compressor)
+  {
+    streaminfo.fccHandler = compvars.fccHandler;
+    streaminfo.dwQuality = compvars.lQ;
+  }
+  else
+  {
+    streaminfo.fccHandler = mmioFOURCC('D','I','B',' ');
+    streaminfo.dwQuality = (DWORD)ICQUALITY_DEFAULT;
+  }
+
+  /* creates a new stream in the new file */
+  HRESULT hr = AVIFileCreateStream(file, &stream, &streaminfo);         
+  if (hr != 0)
+    return IM_ERR_ACCESS;
+
+  /* set stream format */
+  if (use_compressor)
+  {
+    if (!ICSeqCompressFrameStart(&compvars, dib->bmi))
+      return IM_ERR_COMPRESS;
+
+    hr = AVIStreamSetFormat(stream, 0, compvars.lpbiOut, dib->size - dib->bits_size); 
+  }
+  else
+    hr = AVIStreamSetFormat(stream, 0, dib->dib, dib->size - dib->bits_size); 
+
+  if (hr != 0)
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatAVI::ReadPalette(unsigned char* bmp_colors)
+{
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;
+    this->palette[c] = imColorEncode(bmp_colors[i + 2], 
+                                     bmp_colors[i + 1], 
+                                     bmp_colors[i]);
+  }
+}
+
+void imFormatAVI::WritePalette(unsigned char* bmp_colors)
+{
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;                       
+    imColorDecode(&bmp_colors[i + 2], &bmp_colors[i + 1], &bmp_colors[i], this->palette[c]);
+    bmp_colors[i + 3] = 0;
+  }
+}
+
+void imFormatAVI::InitMasks(imDib* dib)
+{
+  if (dib->bmih->biCompression == BI_BITFIELDS)
+  {
+    unsigned int Mask;
+    unsigned int *PalMask = (unsigned int*)dib->bmic;
+
+    this->roff = 0;
+    this->rmask = Mask = PalMask[0];
+    while (!(Mask & 0x01) && (Mask != 0))
+      {Mask >>= 1; this->roff++;}
+
+    this->goff = 0;
+    this->gmask = Mask = PalMask[1];
+    while (!(Mask & 0x01) && (Mask != 0))
+      {Mask >>= 1; this->goff++;}
+
+    this->boff = 0;
+    this->bmask = Mask = PalMask[2];
+    while (!(Mask & 0x01) && (Mask != 0))
+      {Mask >>= 1; this->boff++;}
+  }
+  else
+  {
+    if (dib->bmih->biBitCount == 16)
+    {                   
+      this->rmask = 0x7C00;
+      this->roff = 10;
+
+      this->gmask = 0x03E0;
+      this->goff = 5;
+
+      this->bmask = 0x001F;
+      this->boff = 0;
+    }
+    else
+    {
+      this->rmask = 0x00FF0000;
+      this->roff = 16;
+
+      this->gmask = 0x0000FF00;
+      this->goff = 8;
+
+      this->bmask = 0x000000FF;
+      this->boff = 0;
+    }
+  }
+}
+
+void imFormatAVI::FixRGB(int bpp)
+{
+  int x;
+
+  switch (bpp)
+  {
+  case 16:
+    {
+      /* inverts the WORD values if not intel */
+      if (imBinCPUByteOrder() == IM_BIGENDIAN)
+        imBinSwapBytes2(this->line_buffer, this->width);
+
+      imushort* word_data = (imushort*)this->line_buffer;
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+
+      // from end to start
+      for (x = this->width-1; x >= 0; x--)
+      {
+        imushort word_value = word_data[x];
+        int c = x*3;
+        byte_data[c]   = (imbyte)((((rmask & word_value) >> roff) * 255) / (rmask >> roff));
+        byte_data[c+1] = (imbyte)((((gmask & word_value) >> goff) * 255) / (gmask >> goff));
+        byte_data[c+2] = (imbyte)((((bmask & word_value) >> boff) * 255) / (bmask >> boff));
+      }
+    }
+    break;
+  case 32:
+    {
+      unsigned int* dword_data = (unsigned int*)this->line_buffer;
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+
+      for (x = 0; x < this->width; x++)
+      {
+        unsigned int dword_value = dword_data[x];
+        int c = x*3;
+        byte_data[c]   = (imbyte)((rmask & dword_value) >> roff);
+        byte_data[c+1] = (imbyte)((gmask & dword_value) >> goff);
+        byte_data[c+2] = (imbyte)((bmask & dword_value) >> boff);
+        byte_data[c+3] = (imbyte)((0xFF000000 & dword_value) >> 24);
+      }
+    }
+    break;
+  default: // 24
+    {
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+      for (x = 0; x < this->width; x++)
+      {
+        int c = x*3;
+        imbyte temp = byte_data[c];     // swap R and B
+        byte_data[c] = byte_data[c+2];
+        byte_data[c+2] = temp;
+      }
+    }
+    break;
+  }
+}
+
+int imFormatAVI::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading AVI Frame...");
+
+  void* packed_dib = AVIStreamGetFrame(this->frame, this->current_frame);
+  if (!packed_dib)
+    return IM_ERR_ACCESS;
+
+  dib = imDibCreateReference((imbyte*)packed_dib, NULL);
+
+  if (dib->bmih->biBitCount == 16 || dib->bmih->biBitCount == 32)
+    InitMasks(dib);
+  else if (dib->bmih->biBitCount <= 8)
+  {
+    this->palette_count = dib->palette_count;
+    ReadPalette((unsigned char*)dib->bmic);
+  }
+
+  imbyte* bits = dib->bits;
+  for (int row = 0; row < this->height; row++)
+  {
+    CopyMemory(this->line_buffer, bits, dib->line_size);
+    bits += dib->line_size;
+
+    if (dib->bmih->biBitCount > 8)
+      FixRGB(dib->bmih->biBitCount);
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+    {
+      imDibDestroy(dib);
+      dib = NULL;
+      return IM_ERR_COUNTER;
+    }
+  }
+
+  imDibDestroy(dib);
+  dib = NULL;
+  this->current_frame++;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatAVI::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing AVI Frame...");
+
+  if (dib->bmih->biBitCount <= 8)
+  {
+    WritePalette((unsigned char*)dib->bmic);
+
+    /* this must be called here to update the palette */
+    AVIStreamSetFormat(this->stream, 0, dib->dib, dib->size - dib->bits_size);
+  }
+
+  imbyte* bits = dib->bits;
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (dib->bmih->biBitCount > 8)
+      FixRGB(dib->bmih->biBitCount);
+
+    CopyMemory(bits, this->line_buffer, dib->line_size);
+    bits += dib->line_size;
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  bits = dib->bits;
+  LONG bits_size = dib->bits_size; 
+  DWORD flags = 0;
+
+  if (this->use_compressor)
+  {
+    BOOL key = FALSE;
+    bits = (imbyte*)ICSeqCompressFrame(&this->compvars, 0, bits, &key, &bits_size);
+    if (key == TRUE)
+      flags = AVIIF_KEYFRAME;
+
+    if (!bits)
+    {
+      bits = dib->bits;
+      bits_size = dib->bits_size; 
+    }
+  }
+                                               
+  HRESULT hr = AVIStreamWrite(this->stream, this->image_count, 1, bits, bits_size, flags, NULL, NULL);
+  if (hr != 0)
+    return IM_ERR_ACCESS;
+
+  this->image_count++;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatAVI::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  (void)compression;
+
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_bmp.cpp b/src/im_format_bmp.cpp
new file mode 100644
index 0000000..809169b
--- /dev/null
+++ b/src/im_format_bmp.cpp
@@ -0,0 +1,939 @@
+/** \file
+ * \brief BMP - Windows Device Independent Bitmap
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_bmp.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+
+#define BMP_ID              0x4d42  /* BMP "magic" number           */
+
+#define BMP_COMPRESS_RGB        0L      /* No compression               */
+#define BMP_COMPRESS_RLE8       1L      /* 8 bits per pixel compression */
+#define BMP_COMPRESS_RLE4       2L      /* 4 bits per pixel compression */
+#define BMP_BITFIELDS           3L      /* no compression, palette is mask for 16 and 32 bits images */
+
+/* State-machine definitions */
+#define BMP_READING 0         /* General READING mode */
+#define BMP_ENCODING 1        /* Encoding same-color pixel runs */
+#define BMP_ABSMODE 2         /* Absolute-mode encoding */
+#define BMP_SINGLE 3          /* Encoding short absolute-mode runs */
+#define BMP_ENDOFLINE 4       /* End of scan line detected */
+
+#define BMP_LSN(value)  (unsigned char)((value) & 0x0f)      /* Least-significant nibble */
+#define BMP_MSN(value)  (unsigned char)(((value) & 0xf0) >> 4)  /* Most-significant nibble  */
+
+
+/*  File Header Structure.
+ *  2   Type;       File Type Identifier        
+ *  4   FileSize;   Size of File                
+ *  2   Reserved1;  Reserved (should be 0)      
+ *  2   Reserved2;  Reserved (should be 0)      
+ *  4   Offset;     Offset to bitmap data       
+ *  14    TOTAL */
+
+/*  Information Header Structure. 
+ *  4  Size;            Size of Remaining Header         
+ *  4  Width;           Width of Bitmap in Pixels        
+ *  4  Height;          Height of Bitmap in Pixels       
+ *  2  Planes;          Number of Planes                 
+ *  2  BitCount;        Bits Per Pixel                   
+ *  4  Compression;     Compression Scheme      
+ *  4  SizeImage;       Size of bitmap         
+ *  4  XPelsPerMeter;   Horz. Resolution in Pixels/Meter 
+ *  4  YPelsPerMeter;   Vert. Resolution in Pixels/Meter 
+ *  4  ClrUsed;         Number of Colors in Color Table  
+ *  4  ClrImportant;    Number of Important Colors       
+ *  40   TOTAL V3 
+ *  4  RedMask; 
+ *  4  GreenMask; 
+ *  4  BlueMask; 
+ *  4  AlphaMask; 
+ *  4  CSType; 
+ *  12 ciexyzRed(x, y, z);     [3*FXPT2DOT30]
+ *  12 ciexyzGreen(x, y, z);         "
+ *  12 ciexyzBlue(x, y, z);          "
+ *  4  GammaRed; 
+ *  4  GammaGreen; 
+ *  4  GammaBlue; 
+ *  108  TOTAL V4   (not supported here)
+ *  4  Intent; 
+ *  4  ProfileData; 
+ *  4  ProfileSize; 
+ *  4  Reserved; 
+ *  120  TOTAL V5 (not supported here)
+ */
+
+/*  RGB Color Quadruple Structure. */
+/*  1   rgbBlue;      Blue Intensity Value   */
+/*  1   rgbGreen;     Green Intensity Value  */
+/*  1   rgbRed;       Red Intensity Value    */
+/*  1   rgbReserved;  Reserved (should be 0) */
+/*  4  */
+
+static int iBMPDecodeScanLine(imBinFile* handle, unsigned char* DecodedBuffer, int Width)
+{
+  unsigned char runCount;   /* Number of pixels in the run  */
+  unsigned char runValue;   /* Value of pixels in the run   */
+  int Index = 0;            /* The index of DecodedBuffer   */
+  int cont = 1, remain;
+
+  while (cont)
+  {
+    imBinFileRead(handle, &runCount, 1, 1);  /* Number of pixels in the run */
+    imBinFileRead(handle, &runValue, 1, 1);  /* Value of pixels in the run  */
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+
+    if (runCount)
+    {
+      while (runCount-- && Index < Width)
+        DecodedBuffer[Index++] = runValue;
+    }
+    else  /* Abssolute Mode or Escape Code */
+    {
+      switch(runValue)
+      {
+      case 0:             /* End of Scan Line Escape Code */
+      case 1:             /* End of Bitmap Escape Code */
+        cont = 0;
+        break;
+      case 2:             /* Delta Escape Code (ignored) */
+        imBinFileRead(handle, &runCount, 1, 1);
+        imBinFileRead(handle, &runCount, 1, 1);  
+        break;
+      default:            /* Abssolute Mode */
+        remain = runValue % 2;
+        runValue = (unsigned char)(Index + runValue < (Width + 1)? runValue: (Width - 1) - Index);
+        imBinFileRead(handle, DecodedBuffer + Index, runValue, 1);
+        if (remain) 
+          imBinFileSeekOffset(handle, 1);
+        Index += runValue;
+      }
+    }
+
+    if (imBinFileError(handle) || Index > Width)
+      return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+static int iBMPEncodeScanLine(unsigned char* EncodedBuffer, unsigned char* sl, int np)
+{
+  int slx = 0;             /* Scan line index */
+  int state = BMP_READING; /* State machine control variable */
+  int count = 0;           /* Used by various states */
+  unsigned char pixel;     /* Holds single pixels from sl */
+  int done = 0;            /* Ends while loop when true */
+  int oldcount, oldslx;    /* Copies of count and slx */
+  int BufSize = 0;
+
+  while (!done) 
+  {
+    switch (state) 
+    {
+    case BMP_READING:
+      /* Input: */
+      /* np == number of pixels in scan line */
+      /* sl == scan line */
+      /* sl[slx] == next pixel to process */
+
+      if (slx >= np)                      /* No pixels left */
+        state = BMP_ENDOFLINE;
+      else if (slx == np - 1)             /* One pixel left */
+      {
+        count = 1;
+        state = BMP_SINGLE;
+      } 
+      else if (sl[slx] == sl[slx + 1])    /* Next 2 pixels equal */
+        state = BMP_ENCODING;
+      else                                /* Next 2 pixels differ */
+        state = BMP_ABSMODE;
+
+      break;
+    case BMP_ENCODING:
+      /* Input: */
+      /* slx <= np - 2 (at least 2 pixels in run) */
+      /* sl[slx] == first pixel of run */
+      /* sl[slx] == sl[slx + 1] */
+
+      count = 2;
+      pixel = sl[slx];
+      slx += 2;
+
+      while ((slx < np) && (pixel == sl[slx]) && (count < 255)) 
+      {
+        count++;
+        slx++;
+      }
+
+      *EncodedBuffer++ = (unsigned char)count; 
+      BufSize++;
+      *EncodedBuffer++ = pixel; 
+      BufSize++;
+      state = BMP_READING;
+      
+      break;
+    case BMP_ABSMODE:
+      /* Input: */
+      /* slx <= np - 2 (at least 2 pixels in run) */
+      /* sl[slx] == first pixel of run */
+      /* sl[slx] != sl[slx + 1] */
+
+      oldslx = slx;
+      count = 2;
+      slx += 2;
+
+      /* Compute number of bytes in run */
+      while ((slx < np) && (sl[slx] != sl[slx - 1]) && (count < 255)) 
+      {
+        count++;
+        slx++;
+      }
+
+      /* If same-color run found, back up one byte */
+      if ((slx < np) && (sl[slx] == sl[slx - 1]))
+        if (count > 1)
+          count--;
+
+      slx = oldslx;  /* Restore scan-line index */
+
+      /* Output short absolute runs of less than 3 pixels */
+      if (count < 3 )
+        state = BMP_SINGLE;
+      else 
+      {
+        /* Output absolute-mode run */
+        *EncodedBuffer++ = 0; 
+        BufSize++;
+        *EncodedBuffer++ = (unsigned char)count; 
+        BufSize++;
+        oldcount = count;
+
+        while (count > 0) 
+        {
+          *EncodedBuffer++ = sl[slx]; 
+          BufSize++;
+          slx++;
+          count--;
+        }
+
+        if (oldcount % 2) 
+        {
+          *EncodedBuffer++ = 0; 
+          BufSize++;
+        }
+
+       state = BMP_READING;
+      }
+      break;
+
+    case BMP_SINGLE:
+      /* Input: */
+      /* count == number of pixels to output */
+      /* slx < np */
+      /* sl[slx] == first pixel of run */
+      /* sl[slx] != sl[slx + 1] */
+
+      while (count > 0) 
+      {
+        *EncodedBuffer++ = (unsigned char)1; 
+        BufSize++;
+        *EncodedBuffer++ = sl[slx]; 
+        BufSize++;
+        slx++;
+        count--;
+      }
+
+      state = BMP_READING;
+
+      break;
+    case BMP_ENDOFLINE:
+      *EncodedBuffer++ = (unsigned char)0; 
+      BufSize++;
+      *EncodedBuffer++ = (unsigned char)0; 
+      BufSize++;
+      done = 1;
+
+      break;
+    default:
+      break;
+    }
+  }
+
+  return BufSize;
+}
+
+static const char* iBMPCompTable[2] = 
+{
+  "NONE",
+  "RLE"
+};
+
+class imFormatBMP: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  unsigned short bpp;         /* number of bits per pixel */
+  unsigned int offset,        /* image data offset, used only when reading */
+               comp_type;     /* bmp compression information */
+  int is_os2,                 /* indicates an os2 1.x BMP */
+      line_raw_size;              // raw line size
+  unsigned int rmask, gmask, bmask, 
+                roff, goff, boff; /* pixel bit mask control when reading 16 and 32 bpp images */
+
+  int ReadPalette();
+  int WritePalette();
+  void FixRGBOrder();
+
+public:
+  imFormatBMP()
+    :imFormat("BMP", 
+              "Windows Device Independent Bitmap", 
+              "*.bmp;*.dib;", 
+              iBMPCompTable, 
+              2, 
+              0)
+    {}
+  ~imFormatBMP() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterBMP(void)
+{
+  imFormatRegister(new imFormatBMP());
+}
+
+int imFormatBMP::Open(const char* file_name)
+{
+  unsigned short id;
+  unsigned int dword;
+
+  /* opens the binary file for reading with intel byte order */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  /* reads the BMP format identifier */
+  imBinFileRead(handle, &id, 1, 2);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (id != BMP_ID)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* jump 8 bytes (file size,reserved) */
+  imBinFileSeekOffset(handle, 8);
+
+  /* reads the image offset */
+  imBinFileRead(handle, &this->offset, 1, 4);
+
+  /* reads the header size */
+  imBinFileRead(handle, &dword, 1, 4);
+
+  if (dword == 40)
+    this->is_os2 = 0;
+  else if (dword == 12)
+    this->is_os2 = 1;
+  else
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  this->image_count = 1;
+
+  /* reads the compression information */
+  if (this->is_os2)
+  {
+    this->comp_type = BMP_COMPRESS_RGB;
+    strcpy(this->compression, "NONE");
+  }
+  else
+  {
+    imBinFileSeekOffset(handle, 12);
+
+    imBinFileRead(handle, &this->comp_type, 1, 4);
+
+    switch (this->comp_type)
+    {
+    case BMP_COMPRESS_RGB:
+      strcpy(this->compression, "NONE");
+      break;
+    case BMP_COMPRESS_RLE8:
+      strcpy(this->compression, "RLE");
+      break;
+    case BMP_COMPRESS_RLE4:
+    default:
+      imBinFileClose(handle);
+      return IM_ERR_COMPRESS;
+    }
+
+    imBinFileSeekOffset(handle, -16);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatBMP::New(const char* file_name)
+{
+  /* opens the binary file for writing with intel byte order */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatBMP::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatBMP::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatBMP::ReadImageInfo(int index)
+{
+  (void)index;
+  unsigned int dword;
+
+  this->file_data_type = IM_BYTE;
+
+  if (this->is_os2)
+  {
+    short word;
+
+    /* reads the image width */
+    imBinFileRead(handle, &word, 1, 2);
+    this->width = (int)word;
+
+    /* reads the image height */
+    imBinFileRead(handle, &word, 1, 2);
+    this->height = (int)((word < 0)? -word: word);
+
+    dword = word; // it will be used later
+  }
+  else
+  {
+    /* reads the image width */
+    imBinFileRead(handle, &dword, 1, 4);
+    this->width = (int)dword;
+
+    /* reads the image height */
+    imBinFileRead(handle, &dword, 1, 4);
+    this->height = (int)dword;
+    if (this->height < 0)
+      this->height = -this->height;
+  }
+
+  /* jump 2 bytes (planes) */
+  imBinFileSeekOffset(handle, 2);
+
+  /* reads the number of bits per pixel */
+  imBinFileRead(handle, &this->bpp, 1, 2);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  // sanity check
+  if (this->bpp != 1 && this->bpp != 4 && this->bpp != 8 && 
+      this->bpp != 16 && this->bpp != 24 && this->bpp != 32)
+    return IM_ERR_DATA;
+
+  // another sanity check
+  if (this->comp_type == BMP_BITFIELDS && this->bpp != 16 && this->bpp != 32)
+    return IM_ERR_DATA;
+
+  if (this->bpp > 8)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+  }
+  else
+  {
+    this->palette_count = 1 << bpp;
+    this->file_color_mode = IM_MAP;
+  }
+
+  if (this->bpp < 8)
+    this->convert_bpp = this->bpp;
+
+  if (this->bpp == 32)
+    this->file_color_mode |= IM_ALPHA;
+
+  if (dword < 0)
+    this->file_color_mode |= IM_TOPDOWN;
+
+  this->line_raw_size = imFileLineSizeAligned(this->width, this->bpp, 4);
+  this->line_buffer_extra = 4; // room enough for padding
+
+  if (this->is_os2)
+  {
+    if (this->bpp < 24)
+      return ReadPalette();
+
+    return IM_ERR_NONE;
+  }
+
+  /* we already readed the compression information */
+  /* jump 8 bytes (compression, image size) */
+  imBinFileSeekOffset(handle, 8);
+
+  /* read the x resolution */
+  imBinFileRead(handle, &dword, 1, 4);
+  float xres = (float)dword / 100.0f;
+
+  /* read the y resolution */
+  imBinFileRead(handle, &dword, 1, 4);
+  float yres = (float)dword / 100.0f;
+
+  if (xres && yres)
+  {
+    imAttribTable* attrib_table = AttribTable();
+    attrib_table->Set("XResolution", IM_FLOAT, 1, &xres);
+    attrib_table->Set("YResolution", IM_FLOAT, 1, &yres);
+    attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPC");
+  }
+
+  if (this->bpp <= 8)
+  {
+    /* reads the number of colors used */
+    imBinFileRead(handle, &dword, 1, 4);
+
+    /* updates the palette_count based on the number of colors used */
+    if (dword != 0 && dword < (unsigned int)this->palette_count)
+      this->palette_count = dword;
+
+    /* jump 4 bytes (important colors) */
+    imBinFileSeekOffset(handle, 4);
+  }
+  else
+  {
+    /* jump 8 bytes (used colors, important colors) */
+    imBinFileSeekOffset(handle, 8);
+  }
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->bpp <= 8)
+    return ReadPalette();
+
+  if (this->bpp == 16 || this->bpp == 32)
+  {
+    if (this->comp_type == BMP_BITFIELDS)
+    {
+      unsigned int Mask;
+      unsigned int PalMask[3];
+
+      imBinFileRead(handle, PalMask, 3, 4);
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;
+
+      this->roff = 0;
+      this->rmask = Mask = PalMask[0];
+      while (!(Mask & 0x01) && (Mask != 0))
+        {Mask >>= 1; this->roff++;}
+
+      this->goff = 0;
+      this->gmask = Mask = PalMask[1];
+      while (!(Mask & 0x01) && (Mask != 0))
+        {Mask >>= 1; this->goff++;}
+
+      this->boff = 0;
+      this->bmask = Mask = PalMask[2];
+      while (!(Mask & 0x01) && (Mask != 0))
+        {Mask >>= 1; this->boff++;}
+    }
+    else
+    {
+      if (this->bpp == 16)
+      {                   
+        this->rmask = 0x7C00;
+        this->roff = 10;
+
+        this->gmask = 0x03E0;
+        this->goff = 5;
+
+        this->bmask = 0x001F;
+        this->boff = 0;
+      }
+      else
+      {
+        this->rmask = 0x00FF0000;
+        this->roff = 16;
+
+        this->gmask = 0x0000FF00;
+        this->goff = 8;
+
+        this->bmask = 0x000000FF;
+        this->boff = 0;
+      }
+    }
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatBMP::WriteImageInfo()
+{
+  // force bottom up orientation
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  if (imStrEqual(this->compression, "RLE"))
+    this->comp_type = BMP_COMPRESS_RLE8;
+  else
+    this->comp_type = BMP_COMPRESS_RGB;
+
+  if (this->file_color_mode == IM_BINARY)
+  {
+    this->bpp = 1;
+    this->convert_bpp = 1;
+  }
+  else if (this->file_color_mode == IM_RGB)
+  {
+    this->file_color_mode |= IM_PACKED;
+    this->bpp = 24;
+
+    if (imColorModeHasAlpha(this->user_color_mode))
+    {
+      this->file_color_mode |= IM_ALPHA;
+      this->bpp = 32;
+
+      this->rmask = 0x00FF0000;
+      this->roff = 16;
+
+      this->gmask = 0x0000FF00;
+      this->goff = 8;
+
+      this->bmask = 0x000000FF;
+      this->boff = 0;
+    }
+  }
+  else
+    this->bpp = 8;
+
+  this->line_raw_size = imFileLineSizeAligned(this->width, this->bpp, 4);
+  this->line_buffer_extra = 4; // room enough for padding
+
+  if (this->comp_type == BMP_COMPRESS_RLE8)
+  {
+    // allocates more than enough since compression algoritm can be ineficient
+    this->line_buffer_extra += 2*this->line_raw_size;
+  }
+
+  /* writes the BMP file header */
+  int palette_size = (this->bpp > 8)? 0: palette_count*4;
+  short word_value = BMP_ID;
+  imBinFileWrite(handle, &word_value, 1, 2); /* identifier */
+  unsigned int dword_value = 14 + 40 + palette_size + line_raw_size * this->height;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* file size for uncompressed images */
+  word_value = 0;
+  imBinFileWrite(handle, &word_value, 1, 2); /* reserved 1 */
+  imBinFileWrite(handle, &word_value, 1, 2); /* reserved 2 */
+  dword_value = 14 + 40 + palette_size;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* data offset */
+
+  /* writes the BMP info header */
+
+  dword_value = 40;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* header size */
+  dword_value = this->width;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* width */
+  dword_value = this->height;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* height */
+  word_value = 1;
+  imBinFileWrite(handle, &word_value, 1, 2);  /* planes */
+  word_value = this->bpp;
+  imBinFileWrite(handle, &word_value, 1, 2);  /* bpp */
+  dword_value = this->comp_type;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* compression */
+  dword_value = line_raw_size * this->height;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* image size */
+  
+  imAttribTable* attrib_table = AttribTable();
+  unsigned int xppm = 0, yppm = 0;
+
+  const void* attrib_data = attrib_table->Get("ResolutionUnit");
+  if (attrib_data)
+  {
+    char* res_unit = (char*)attrib_data;
+
+    float* xres = (float*)attrib_table->Get("XResolution");
+    float* yres = (float*)attrib_table->Get("YResolution");
+
+    if (xres && yres)
+    {
+      if (imStrEqual(res_unit, "DPI"))
+      {
+        xppm = (unsigned int)(*xres * 100. / 2.54);
+        yppm = (unsigned int)(*yres * 100. / 2.54);
+      }
+      else
+      {
+        xppm = (unsigned int)(*xres * 100.);
+        yppm = (unsigned int)(*yres * 100.);
+      }
+    }
+  }
+
+  imBinFileWrite(handle, &xppm, 1, 4); /* x dpm */
+  imBinFileWrite(handle, &yppm, 1, 4); /* y dpm */
+
+  dword_value = (this->bpp > 8)? 0: this->palette_count;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* colors used */
+  dword_value = 0;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* colors important (all) */
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->bpp < 24)
+    return WritePalette();
+
+  return IM_ERR_NONE;
+}
+
+int imFormatBMP::ReadPalette()
+{
+  int nc;
+  if (this->is_os2)
+    nc = 3;
+  else
+    nc = 4;
+
+  /* reads the color palette */
+  unsigned char bmp_colors[256 * 4];
+  imBinFileRead(handle, bmp_colors, this->palette_count * nc, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * nc;
+    this->palette[c] = imColorEncode(bmp_colors[i + 2], 
+                                     bmp_colors[i + 1], 
+                                     bmp_colors[i]);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatBMP::WritePalette()
+{
+  unsigned char bmp_colors[256 * 4];
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;                       
+    imColorDecode(&bmp_colors[i + 2], &bmp_colors[i + 1], &bmp_colors[i], this->palette[c]);
+    bmp_colors[i + 3] = 0;
+  }
+
+  /* writes the color palette */
+  imBinFileWrite(handle, bmp_colors, this->palette_count * 4, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatBMP::FixRGBOrder()
+{
+  int x;
+
+  switch (this->bpp)
+  {
+  case 16:
+    {
+      /* inverts the WORD values if not intel */
+      if (imBinCPUByteOrder() == IM_BIGENDIAN)
+        imBinSwapBytes2(this->line_buffer, this->width);
+
+      imushort* word_data = (imushort*)this->line_buffer;
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+
+      // from end to start
+      for (x = this->width-1; x >= 0; x--)
+      {
+        imushort word_value = word_data[x];
+        int c = x*3;
+        byte_data[c]   = (imbyte)((((this->rmask & word_value) >> this->roff) * 255) / (this->rmask >> this->roff));
+        byte_data[c+1] = (imbyte)((((this->gmask & word_value) >> this->goff) * 255) / (this->gmask >> this->goff));
+        byte_data[c+2] = (imbyte)((((this->bmask & word_value) >> this->boff) * 255) / (this->bmask >> this->boff));
+      }
+    }
+    break;
+  case 32:
+    {
+      /* inverts the DWORD values if not intel */
+      if (imBinCPUByteOrder() == IM_BIGENDIAN)
+        imBinSwapBytes4(this->line_buffer, this->width);
+
+      unsigned int* dword_data = (unsigned int*)this->line_buffer;
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+
+      for (x = 0; x < this->width; x++)
+      {
+        unsigned int dword_value = dword_data[x];
+        int c = x*4;
+        byte_data[c]   = (imbyte)((this->rmask & dword_value) >> this->roff);
+        byte_data[c+1] = (imbyte)((this->gmask & dword_value) >> this->goff);
+        byte_data[c+2] = (imbyte)((this->bmask & dword_value) >> this->boff);
+        byte_data[c+3] = (imbyte)((0xFF000000 & dword_value) >> 24);
+      }
+    }
+    break;
+  default: // 24
+    {
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+      for (x = 0; x < this->width; x++)
+      {
+        int c = x*3;
+        imbyte temp = byte_data[c];     // swap R and B
+        byte_data[c] = byte_data[c+2];
+        byte_data[c+2] = temp;
+      }
+    }
+    break;
+  }
+}
+
+int imFormatBMP::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading BMP...");
+
+  /* jump to the begin of image data */
+  imBinFileSeekTo(handle, this->offset);
+
+  for (int row = 0; row < this->height; row++)
+  {
+    /* read and decompress the data */
+    if (this->comp_type == BMP_COMPRESS_RGB)
+    {
+      imBinFileRead(handle, this->line_buffer, this->line_raw_size, 1);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+    }
+    else
+    {
+      if (iBMPDecodeScanLine(handle, (imbyte*)this->line_buffer, this->width) == IM_ERR_ACCESS)
+        return IM_ERR_ACCESS;     
+    }
+
+    if (this->bpp > 8)
+      FixRGBOrder();
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatBMP::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing BMP...");
+
+  imbyte* compressed_buffer = NULL;
+  if (this->comp_type == BMP_COMPRESS_RLE8) // point to the extra buffer
+    compressed_buffer = (imbyte*)this->line_buffer + this->line_buffer_size+4;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (this->bpp > 8)
+      FixRGBOrder();
+
+    if (this->comp_type == BMP_COMPRESS_RGB)
+    {
+      imBinFileWrite(handle, this->line_buffer, this->line_raw_size, 1);
+    }
+    else
+    {
+      int compressed_size = iBMPEncodeScanLine(compressed_buffer, (imbyte*)this->line_buffer, this->width);
+      imBinFileWrite(handle, compressed_buffer, compressed_size, 1);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatBMP::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE") && !imStrEqual(compression, "RLE"))
+    return IM_ERR_COMPRESS;
+
+  if (imStrEqual(compression, "RLE") && (color_space == IM_RGB || color_space == IM_BINARY))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_ecw.cpp b/src/im_format_ecw.cpp
new file mode 100644
index 0000000..0b77b56
--- /dev/null
+++ b/src/im_format_ecw.cpp
@@ -0,0 +1,375 @@
+/** \file
+ * \brief ECW - ECW JPEG 2000
+ *
+ * See Copyright Notice in im_lib.h
+ */
+
+#include "im_format.h"
+#include "im_util.h"
+#include "im_format_ecw.h"
+#include "im_counter.h"
+
+#include <NCSECWClient.h>
+// #include <NCSEcwCompressClient.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+
+static const char* iECWCompTable[2] = 
+{
+  "ECW",
+  "JPEG-2000",
+};
+
+class imFormatECW: public imFormat
+{
+  NCSFileView *pNCSFileView;
+//  NCSEcwCompressClient *pClient;
+
+public:
+  imFormatECW()
+    :imFormat("ECW", 
+              "ECW JPEG-2000 File Format", 
+              "*.ecw;*.jp2;*.j2k;*.jpc;*.j2c;", 
+              iECWCompTable, 
+              2, 
+              0)
+    {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo(){return 0;} // do nothing for now;
+  int WriteImageData(void* data){(void)data; return 0;} // do nothing for now;
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterECW(void)
+{
+  imFormatRegister(new imFormatECW());
+}
+
+int imFormatECW::Open(const char* file_name)
+{
+  NCSError eError = NCScbmOpenFileView((char*)file_name, &this->pNCSFileView, NULL);
+  if (eError != NCS_SUCCESS) 
+  {
+    if (eError == NCS_FILE_OPEN_ERROR || 
+        eError == NCS_FILE_NOT_FOUND || 
+        eError == NCS_FILE_INVALID)
+      return IM_ERR_OPEN;
+    else if (eError == NCS_FILE_OPEN_FAILED)
+      return IM_ERR_FORMAT;
+    else
+      return IM_ERR_ACCESS;
+  }
+
+  NCSFileType fileType = NCScbmGetFileType(this->pNCSFileView);
+  if (fileType == NCS_FILE_ECW)
+    strcpy(this->compression, "ECW");
+  else if (fileType == NCS_FILE_JP2)
+    strcpy(this->compression, "JPEG-2000");
+  else
+    return IM_ERR_COMPRESS;
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatECW::New(const char* file_name)
+{
+  strcpy(this->compression, "JPEG-2000");
+  this->image_count = 1;
+
+  (void)file_name;
+  return IM_ERR_FORMAT;
+}
+
+void imFormatECW::Close()
+{
+  if (this->is_new)
+    ;// NCSEcwCompressClose(this->pClient);
+  else
+    NCScbmCloseFileView(this->pNCSFileView);
+}
+
+void* imFormatECW::Handle(int index)
+{
+  (void)index;
+
+  if (this->is_new)
+    return NULL; // return (void*)this->pClient;
+  else
+    return (void*)this->pNCSFileView;
+}
+
+int imFormatECW::ReadImageInfo(int index)
+{
+  NCSFileViewFileInfoEx *pNCSFileInfo;
+  imAttribTable* attrib_table = AttribTable();
+  (void)index;
+
+  if (NCScbmGetViewFileInfoEx(this->pNCSFileView, &pNCSFileInfo) != NCS_SUCCESS)
+    return IM_ERR_ACCESS;
+
+  this->width = pNCSFileInfo->nSizeX;
+  this->height = pNCSFileInfo->nSizeY;
+
+  switch(pNCSFileInfo->eColorSpace)
+  {
+  case NCSCS_GREYSCALE:
+    this->file_color_mode = IM_GRAY;
+    break;
+  case NCSCS_YUV:
+  case NCSCS_sRGB:
+    this->file_color_mode = IM_RGB;
+    break;
+  case NCSCS_YCbCr:
+    this->file_color_mode = IM_YCBCR;
+    break;
+  case NCSCS_MULTIBAND:
+    /* multiband data, we read only one band */
+    this->file_color_mode = IM_GRAY;
+    attrib_table->Set("MultiBandCount", IM_USHORT, 1, (void*)&pNCSFileInfo->nBands);
+    break;
+  default: 
+    return IM_ERR_DATA;
+  }
+
+  switch(pNCSFileInfo->eCellType)
+  {
+  case NCSCT_INT8:
+  case NCSCT_UINT8:
+    this->file_data_type = IM_BYTE;
+    break;
+  case NCSCT_INT16:
+  case NCSCT_UINT16:
+    this->file_data_type = IM_USHORT;
+    break;
+  case NCSCT_UINT64:
+  case NCSCT_INT64:
+  case NCSCT_UINT32:
+  case NCSCT_INT32:
+    // Should be:  this->file_data_type = IM_INT;  
+    // but 32bits ints are not supported by the NCScbmReadViewLineBILEx function
+    this->file_data_type = IM_USHORT;
+    break;
+  case NCSCT_IEEE4:
+  case NCSCT_IEEE8:
+    this->file_data_type = IM_FLOAT;
+    break;
+  default: 
+    return IM_ERR_DATA;
+  }
+
+  int prec = pNCSFileInfo->pBands->nBits;
+  if (prec < 8)
+    this->convert_bpp = -prec; // just expand to 0-255
+
+  if (prec == 1 && this->file_color_mode == IM_GRAY)
+    this->file_color_mode = IM_BINARY;
+
+  if (pNCSFileInfo->nBands > imColorModeDepth(this->file_color_mode))
+    this->file_color_mode |= IM_ALPHA;
+
+  if (this->file_color_mode != IM_GRAY)
+    this->file_color_mode |= IM_PACKED;
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  float float_value = (float)pNCSFileInfo->fOriginX;
+  attrib_table->Set("OriginX", IM_FLOAT, 1, (void*)&float_value);
+
+  float_value = (float)pNCSFileInfo->fOriginY;
+  attrib_table->Set("OriginY", IM_FLOAT, 1, (void*)&float_value);
+
+  float_value = (float)pNCSFileInfo->fCWRotationDegrees;
+  attrib_table->Set("Rotation", IM_FLOAT, 1, (void*)&float_value);
+
+  float_value = (float)pNCSFileInfo->fCellIncrementX;
+  attrib_table->Set("CellIncrementX", IM_FLOAT, 1, (void*)&float_value);
+
+  float_value = (float)pNCSFileInfo->fCellIncrementY;
+  attrib_table->Set("CellIncrementY", IM_FLOAT, 1, (void*)&float_value);
+
+  attrib_table->Set("Datum", IM_BYTE, strlen(pNCSFileInfo->szDatum)+1, pNCSFileInfo->szDatum);
+  attrib_table->Set("Projection", IM_BYTE, strlen(pNCSFileInfo->szProjection)+1, pNCSFileInfo->szProjection);
+
+  switch (pNCSFileInfo->eCellSizeUnits)
+  {
+  case ECW_CELL_UNITS_INVALID:
+    attrib_table->Set("CellUnits", IM_BYTE, 8, "INVALID");
+    break;
+  case ECW_CELL_UNITS_METERS:
+    attrib_table->Set("CellUnits", IM_BYTE, 7, "METERS");
+    break;
+  case ECW_CELL_UNITS_DEGREES:
+    attrib_table->Set("CellUnits", IM_BYTE, 7, "DEGREES");
+    break;
+  case ECW_CELL_UNITS_FEET:
+    attrib_table->Set("CellUnits", IM_BYTE, 5, "FEET");
+    break;
+  case ECW_CELL_UNITS_UNKNOWN:
+    attrib_table->Set("CellUnits", IM_BYTE, 8, "UNKNOWN");
+    break;
+  }
+
+  float_value = (float)pNCSFileInfo->nCompressionRate;
+  attrib_table->Set("CompressionRatio", IM_FLOAT, 1, (void*)&float_value);
+
+  return IM_ERR_NONE;
+}
+
+static void iCopyDataBuffer(UINT8 **ppOutputLine, imbyte* line_buffer, int nBands, int view_width, int type_size)
+{
+  if (nBands > 1)
+  {
+    for(int i = 0; i < view_width; i++)
+    {
+      for(int j = 0; j < nBands; j++)
+      {
+        for(int k = 0; k < type_size; k++)
+        {
+          *line_buffer++ = (ppOutputLine[j])[i*type_size + k];
+        }
+      }
+    }
+  }
+  else
+    memcpy(line_buffer, ppOutputLine[0], nBands*type_size*view_width);
+}
+
+int imFormatECW::ReadImageData(void* data)
+{
+  imAttribTable* attrib_table = AttribTable();
+  int i, *attrib_data, view_width, view_height,
+    nBands = imColorModeDepth(this->file_color_mode);
+
+  // this size is free, can be anything, but we restricted to less than the image size
+  attrib_data = (int*)attrib_table->Get("ViewWidth");
+  view_width = attrib_data? *attrib_data: this->width; 
+  if (view_width > this->width) view_width = this->width;
+
+  attrib_data = (int*)attrib_table->Get("ViewHeight");
+  view_height = attrib_data? *attrib_data: this->height; 
+  if (view_height > this->height) view_height = this->height;
+
+  imCounterTotal(this->counter, view_height, "Reading ECW...");
+
+  {
+    int xmin, xmax, ymin, ymax, band_start;
+
+    // full image if not defined.
+    // this size must be inside the image
+    attrib_data = (int*)attrib_table->Get("ViewXmin");
+    xmin = attrib_data? *attrib_data: 0; 
+    if (xmin < 0) xmin = 0;
+
+    attrib_data = (int*)attrib_table->Get("ViewYmin");
+    ymin = attrib_data? *attrib_data: 0; 
+    if (ymin < 0) ymin = 0;
+
+    attrib_data = (int*)attrib_table->Get("ViewXmax");
+    xmax = attrib_data? *attrib_data: this->width-1; 
+    if (xmax > this->width-1) xmax = this->width-1;
+
+    attrib_data = (int*)attrib_table->Get("ViewYmax");
+    ymax = attrib_data? *attrib_data: this->height-1; 
+    if (ymax > this->height-1) ymax = this->height-1;
+  
+    band_start = 0;
+    UINT16* start_plane = (UINT16*)attrib_table->Get("MultiBandSelect");
+    if (start_plane)
+      band_start = *start_plane;
+
+    UINT32 *pBandList = (UINT32*)malloc(sizeof(UINT32)*nBands);
+    for(i = 0; i < nBands; i++)
+      pBandList[i] = i+band_start;
+
+    NCSError eError = NCScbmSetFileView(this->pNCSFileView, nBands, pBandList,
+                                        xmin, ymin, xmax, ymax,
+                                        view_width, view_height);
+    free(pBandList);
+
+    if( eError != NCS_SUCCESS) 
+      return IM_ERR_DATA;
+  }
+
+  // this is necessary to fool line buffer management
+  this->width = view_width;
+  this->height = view_height;
+  this->line_buffer_size = imImageLineSize(this->width, this->file_color_mode, this->file_data_type);
+
+  NCSEcwCellType eType = NCSCT_UINT8;
+  int type_size = 1;
+  if (this->file_data_type == IM_USHORT)
+  {
+    eType = NCSCT_UINT16;
+    type_size = 2;
+  }
+  else if (this->file_data_type == IM_FLOAT)
+  {
+    eType = NCSCT_IEEE4;
+    type_size = 4;
+  }
+  UINT8 **ppOutputLine = (UINT8**)malloc(sizeof(UINT8*)*nBands);
+  UINT8 *ppOutputBuffer = (UINT8*)malloc(type_size*view_width*nBands);
+  for(i = 0; i < nBands; i++)
+    ppOutputLine[i] = ppOutputBuffer + i*type_size*view_width;
+
+  for (int row = 0; row < view_height; row++)
+  {
+    NCSEcwReadStatus eError = NCScbmReadViewLineBILEx(this->pNCSFileView, eType, (void**)ppOutputLine);
+    if( eError != NCS_SUCCESS)
+    {
+      free(ppOutputLine);
+      free(ppOutputBuffer);
+      return IM_ERR_DATA;
+    }
+
+    iCopyDataBuffer(ppOutputLine, (imbyte*)this->line_buffer, nBands, view_width, type_size);
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+    {
+      free(ppOutputLine);
+      free(ppOutputBuffer);
+      return IM_ERR_COUNTER;
+    }
+  }
+
+  free(ppOutputLine);
+  free(ppOutputBuffer);
+  return IM_ERR_NONE;
+}
+
+int imFormatECW::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  (void)compression;
+  (void)color_mode;
+  (void)data_type;
+  return IM_ERR_DATA;
+
+  //int color_space = imColorModeSpace(color_mode);
+
+  //if (color_space != IM_GRAY && color_space != IM_RGB)// && color_space != IM_LUV)
+  //  return IM_ERR_DATA;                       
+  //                                            
+  //if (data_type != IM_BYTE && data_type != IM_USHORT && data_type != IM_FLOAT)
+  //  return IM_ERR_DATA;
+
+  //if (!compression || compression[0] == 0)
+  //  return IM_ERR_NONE;
+
+  //if (!imStrEqual(compression, "JPEG-2000"))
+  //  return IM_ERR_COMPRESS;
+
+  //return IM_ERR_NONE;
+}
diff --git a/src/im_format_gif.cpp b/src/im_format_gif.cpp
new file mode 100644
index 0000000..ce371ba
--- /dev/null
+++ b/src/im_format_gif.cpp
@@ -0,0 +1,1497 @@
+/** \file
+ * \brief GIF - Graphics Interchange Format
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_gif.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <setjmp.h>
+
+static const int InterlacedOffset[4] = { 0, 4, 2, 1 },  /* The way Interlaced image should */
+	               InterlacedJumps[4]  = { 8, 8, 4, 2 };  /* be read - offsets and jumps... */
+
+#define GIF_STAMP	  "GIF"	 /* First chars in file - GIF stamp. */
+#define GIF_VERSION	"89a"	 /* First chars in file - GIF stamp. */
+ 
+#define GIF_LZ_BITS		12
+
+#define GIF_LZ_MAX_CODE	    4095		/* Biggest code possible in 12 bits. */
+#define GIF_FLUSH_OUTPUT		4096    /* Impossible code, to signal flush. */
+#define GIF_FIRST_CODE		  4097    /* Impossible code, to signal first. */
+#define GIF_NO_SUCH_CODE		4098    /* Impossible code, to signal empty. */
+
+#define GIF_HT_KEY_MASK		0x1FFF		/* 13bits keys */
+#define GIF_HT_KEY_NUM_BITS		13		/* 13bits keys */
+#define GIF_HT_MAX_KEY		  8191	  /* 13bits - 1, maximal code possible */
+#define GIF_HT_SIZE			    8192	  /* 12bits = 4096 or twice as big! */
+
+/*  GIF89 extension function codes                                             */
+#define COMMENT_EXT_FUNC_CODE	    0xFE	/* comment */
+#define GRAPHICS_EXT_FUNC_CODE    0xF9	/* graphics control */
+#define PLAINTEXT_EXT_FUNC_CODE   0x01	/* plaintext */
+#define APPLICATION_EXT_FUNC_CODE 0xFF	/* application block */
+
+/* The 32 bits of the integer are divided into two parts for the key & code:   */
+/* 1. The code is 12 bits as our compression algorithm is limited to 12bits */
+/* 2. The key is 12 bits Prefix code + 8 bit new char or 20 bits.	    */
+#define GIF_HT_GET_KEY(l)	(l >> 12)
+#define GIF_HT_GET_CODE(l)	(l & 0x0FFF)
+#define GIF_HT_PUT_KEY(l)	(l << 12)
+#define GIF_HT_PUT_CODE(l)	(l & 0x0FFF)
+
+struct iGIFData
+{
+  unsigned char global_colors[256 * 3]; /* global color table if any */
+  int global_num_colors, /* global color table number of colors */
+      offset,            /* image offset */
+      step,              /* interlaced step */
+      interlaced,        /* image is interlaced or not */
+      screen_width,
+      screen_height,
+      start_offset[512], /* offset of first block */
+	    ClearCode,				 /* The CLEAR LZ code. */
+    	BitsPerPixel,	     /* Bits per pixel (Codes uses at list this + 1). */
+	    EOFCode,				   /* The EOF LZ code. */
+	    RunningCode,		   /* The next code algorithm can generate. */
+	    RunningBits,       /* The number of bits required to represent RunningCode. */
+	    MaxCode1,          /* 1 bigger than max. possible code, in RunningBits bits. */
+	    LastCode,		       /* The code before the current code. */
+	    CrntCode,				   /* Current algorithm code. */
+	    StackPtr,		       /* For character stack (see below). */
+	    CrntShiftState;		 /* Number of bits in CrntShiftDWord. */
+  unsigned char Buf[256];	                  /* Compressed input is buffered here. */
+  unsigned int CrntShiftDWord;             /* For bytes decomposition into codes. */
+  unsigned char Stack[GIF_LZ_MAX_CODE];	    /* Decoded pixels are stacked here. */
+  unsigned char Suffix[GIF_LZ_MAX_CODE+1];	/* So we can trace the codes. */
+  unsigned int Prefix[GIF_LZ_MAX_CODE+1];
+  unsigned int HTable[GIF_HT_SIZE];            /* hash table for the compression only, when using LZW */
+};
+
+/******************************************************************************
+* Routine to generate an HKey for the hashtable out of the given unique key.  *
+* The given Key is assumed to be 20 bits as follows: lower 8 bits are the     *
+* new postfix character, while the upper 12 bits are the prefix code.	      *
+* Because the average hit ratio is only 2 (2 hash references per entry),      *
+* evaluating more complex keys (such as twin prime keys) does not worth it!   *
+******************************************************************************/
+static int iGIFHashKeyItem(unsigned int Item)
+{
+  return ((Item >> 12) ^ Item) & GIF_HT_KEY_MASK;
+}
+
+/******************************************************************************
+* Routine to insert a new Item into the HashTable. The data is assumed to be  *
+* new one.								      *
+******************************************************************************/
+static void iGIFInsertHashTable(unsigned int *HTable, unsigned int Key, int Code)
+{
+  int HKey = iGIFHashKeyItem(Key);
+
+  while (GIF_HT_GET_KEY(HTable[HKey]) != 0xFFFFFL) 
+  {
+    HKey = (HKey + 1) & GIF_HT_KEY_MASK;
+  }
+
+  HTable[HKey] = GIF_HT_PUT_KEY(Key) | GIF_HT_PUT_CODE(Code);
+}
+
+/******************************************************************************
+* Routine to test if given Key exists in HashTable and if so returns its code *
+* Returns the Code if key was found, -1 if not.				      *
+******************************************************************************/
+static int iGIFExistsHashTable(unsigned int *HTable, unsigned int Key)
+{
+  int HKey = iGIFHashKeyItem(Key);
+  unsigned int HTKey;
+
+  while ((HTKey = GIF_HT_GET_KEY(HTable[HKey])) != 0xFFFFFL) 
+  {
+    if (Key == HTKey) 
+      return GIF_HT_GET_CODE(HTable[HKey]);
+
+    HKey = (HKey + 1) & GIF_HT_KEY_MASK;
+  }
+
+  return -1;
+}
+
+/******************************************************************************
+*   This routines buffers the given characters until 255 characters are ready *
+* to be output. If Code is equal to -1 the buffer is flushed (EOF).	      *
+*   The buffer is Dumped with first byte as its size, as GIF format requires. *
+******************************************************************************/
+static int iGIFBufferedOutput(imBinFile* handle, unsigned char *Buf, int c)
+{
+  if (c == GIF_FLUSH_OUTPUT) 
+  {
+    /* Flush everything out. */
+    if (Buf[0] != 0)
+      imBinFileWrite(handle, Buf, Buf[0] + 1, 1);
+
+    /* Mark end of compressed data, by an empty block (see GIF doc): */
+    Buf[0] = 0;
+    imBinFileWrite(handle, Buf, 1, 1);
+  }
+  else 
+  {
+    if (Buf[0] == 255) 
+    {
+      /* Dump out this buffer - it is full: */
+      imBinFileWrite(handle, Buf, Buf[0] + 1, 1);
+      Buf[0] = 0;
+    }
+
+    Buf[++Buf[0]] = (unsigned char)c;
+  }
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+static int iGIFWriteNetscapeApplication(imBinFile* handle, short iterations)
+{
+  /* record type byte */
+  imBinFileWrite(handle, (void*)"!", 1, 1);
+
+  /* block label */
+  imBinFileWrite(handle, (void*)"\xFF", 1, 1);
+
+  /* block size */
+  imBinFileWrite(handle, (void*)"\x0B", 1, 1);
+
+  /* application identifier + athentication code */
+  imBinFileWrite(handle, (void*)"NETSCAPE2.0", 11, 1);
+
+  /* sub block size */
+  imBinFileWrite(handle, (void*)"\x3", 1, 1);
+
+  /* ??? */
+  imBinFileWrite(handle, (void*)"\x1", 1, 1);
+
+  /* iterations */
+  imBinFileWrite(handle, &iterations, 1, 2);
+
+  /* block terminator */
+  imBinFileWrite(handle, (void*)"\0", 1, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+/******************************************************************************
+*   The LZ compression output routine:					      *
+*   This routine is responsable for the compression of the bit stream into    *
+* 8 bits (bytes) packets.						      *
+******************************************************************************/
+static int iGIFCompressOutput(iGIFData* igif, imBinFile* handle, int Code)
+{
+  int error = IM_ERR_NONE;
+
+  if (Code == GIF_FLUSH_OUTPUT) 
+  {
+    while (igif->CrntShiftState > 0) 
+    {
+      /* Get Rid of what is left in DWord, and flush it. */
+      error = iGIFBufferedOutput(handle, igif->Buf, igif->CrntShiftDWord & 0xff);
+      igif->CrntShiftDWord >>= 8;
+      igif->CrntShiftState -= 8;
+    }
+    igif->CrntShiftState = 0;			   /* For next time. */
+    error = iGIFBufferedOutput(handle, igif->Buf, GIF_FLUSH_OUTPUT);
+  }
+  else 
+  {
+    igif->CrntShiftDWord |= ((int) Code) << igif->CrntShiftState;
+    igif->CrntShiftState += igif->RunningBits;
+    while (igif->CrntShiftState >= 8) 
+    {
+      /* Dump out full bytes: */
+      error = iGIFBufferedOutput(handle, igif->Buf, igif->CrntShiftDWord & 0xff);
+      igif->CrntShiftDWord >>= 8;
+      igif->CrntShiftState -= 8;
+    }
+  }
+
+  /* If code cannt fit into RunningBits bits, must raise its size. Note */
+  /* however that codes above 4095 are used for special signaling.      */
+  if (igif->RunningCode >= igif->MaxCode1 && Code <= GIF_LZ_MAX_CODE) 
+  {
+    igif->MaxCode1 = 1 << ++igif->RunningBits;
+  }
+
+  return error;
+}
+
+/******************************************************************************
+*   The LZ compression routine:						      *
+*   This version compress the given buffer Line of length LineLen.	      *
+*   This routine can be called few times (one per scan line, for example), in *
+* order the complete the whole image.					      *
+******************************************************************************/
+static int iGIFCompressLine(iGIFData* igif, imBinFile* handle, unsigned char *Line, int LineLen)
+{
+  int i = 0, CrntCode, NewCode;
+  unsigned int NewKey;
+  unsigned char Pixel;
+
+  if (igif->CrntCode == GIF_FIRST_CODE)		  /* Its first time! */
+    CrntCode = Line[i++];
+  else
+    CrntCode = igif->CrntCode;     /* Get last code in compression. */
+
+  while (i < LineLen) 
+  {			    /* Decode LineLen items. */
+    Pixel = Line[i++];		      /* Get next pixel from stream. */
+    /* Form a new unique key to search hash table for the code combines  */
+    /* CrntCode as Prefix string with Pixel as postfix char.	     */
+    NewKey = (((unsigned int) CrntCode) << 8) + Pixel;
+
+    if ((NewCode = iGIFExistsHashTable(igif->HTable, NewKey)) >= 0) 
+    {
+      /* This Key is already there, or the string is old one, so	     */
+      /* simple take new code as our CrntCode:			     */
+      CrntCode = NewCode;
+    }
+    else 
+    {
+      /* Put it in hash table, output the prefix code, and make our    */
+      /* CrntCode equal to Pixel.					     */
+      if (iGIFCompressOutput(igif, handle, CrntCode) != IM_ERR_NONE) 
+        return IM_ERR_ACCESS;
+
+      CrntCode = Pixel;
+
+      /* If however the HashTable if full, we send a clear first and   */
+      /* Clear the hash table.					     */
+      if (igif->RunningCode >= GIF_LZ_MAX_CODE) 
+      {
+        /* Time to do some clearance: */
+        if (iGIFCompressOutput(igif, handle, igif->ClearCode) != IM_ERR_NONE) 
+          return IM_ERR_ACCESS;
+
+        igif->RunningCode = igif->EOFCode + 1;
+        igif->RunningBits = igif->BitsPerPixel + 1;
+        igif->MaxCode1 = 1 << igif->RunningBits;
+        memset(igif->HTable, 0xFF, GIF_HT_SIZE * sizeof(int));
+      }
+      else 
+      {
+        /* Put this unique key with its relative Code in hash table: */
+        iGIFInsertHashTable(igif->HTable, NewKey, igif->RunningCode++);
+      }
+    }
+  }
+
+  /* Preserve the current state of the compression algorithm: */
+  igif->CrntCode = CrntCode;
+
+  return IM_ERR_NONE;
+}
+
+/******************************************************************************
+*   This routines read one gif data block at a time and buffers it internally *
+* so that the decompression routine could access it.			      *
+*   The routine returns the next byte from its internal buffer (or read next  *
+* block in if buffer empty).		      *
+******************************************************************************/
+static int iGIFBufferedInput(imBinFile* handle, unsigned char *Buf, unsigned char *NextByte)
+{
+  if (Buf[0] == 0) 
+  {
+    /* Needs to read the next buffer - this one is empty: */
+    imBinFileRead(handle, Buf, 1, 1);
+    imBinFileRead(handle, &Buf[1], Buf[0], 1);
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+
+    *NextByte = Buf[1];
+    Buf[1] = 2;	   /* We use now the second place as last char read! */
+    Buf[0]--;
+  }
+  else 
+  {
+    *NextByte = Buf[Buf[1]++];
+    Buf[0]--;
+  }
+
+  return IM_ERR_NONE;
+}
+
+/******************************************************************************
+*   The LZ decompression input routine:					      *
+*   This routine is responsable for the decompression of the bit stream from  *
+* 8 bits (bytes) packets, into the real codes.				      *
+******************************************************************************/
+static int iGIFDecompressInput(iGIFData* igif, imBinFile* handle, int *Code)
+{
+  unsigned char NextByte;
+  static unsigned int CodeMasks[] = 
+  {
+    0x0000, 0x0001, 0x0003, 0x0007,
+    0x000f, 0x001f, 0x003f, 0x007f,
+    0x00ff, 0x01ff, 0x03ff, 0x07ff,
+    0x0fff
+  };
+
+  while (igif->CrntShiftState < igif->RunningBits) 
+  {
+    /* Needs to get more bytes from input stream for next code: */
+    if (iGIFBufferedInput(handle, igif->Buf, &NextByte) != IM_ERR_NONE) 
+      return IM_ERR_ACCESS;
+
+    igif->CrntShiftDWord |= ((unsigned int) NextByte) << igif->CrntShiftState;
+    igif->CrntShiftState += 8;
+  }
+
+  *Code = igif->CrntShiftDWord & CodeMasks[igif->RunningBits];
+
+  igif->CrntShiftDWord >>= igif->RunningBits;
+  igif->CrntShiftState -= igif->RunningBits;
+
+  /* If code cannt fit into RunningBits bits, must raise its size. Note */
+  /* however that codes above 4095 are used for special signaling.      */
+  if (++(igif->RunningCode) > igif->MaxCode1 && igif->RunningBits < GIF_LZ_BITS) 
+  {
+    igif->MaxCode1 <<= 1;
+    igif->RunningBits++;
+  }
+
+  return IM_ERR_NONE;
+}
+
+/******************************************************************************
+* Routine to trace the Prefixes linked list until we get a prefix which is    *
+* not code, but a pixel value (less than ClearCode). Returns that pixel value.*
+* If image is defective, we might loop here forever, so we limit the loops to *
+* the maximum possible if image O.k. - LZ_MAX_CODE times.		      *
+******************************************************************************/
+static int iGIFGetPrefixChar(unsigned int *Prefix, int Code, int ClearCode)
+{
+  int i = 0;
+
+  while (Code > ClearCode && i++ <= GIF_LZ_MAX_CODE) 
+    Code = Prefix[Code];
+
+  return Code;
+}
+
+static int iGIFDecompressLine(iGIFData* igif, imBinFile* handle, unsigned char *Line,	int LineLen)
+{
+  int i = 0, j, CrntCode, EOFCode, ClearCode, CrntPrefix, LastCode, StackPtr;
+  unsigned char *Stack, *Suffix;
+  unsigned int *Prefix;
+
+  StackPtr = igif->StackPtr;
+  Prefix = igif->Prefix;
+  Suffix = igif->Suffix;
+  Stack = igif->Stack;
+  EOFCode = igif->EOFCode;
+  ClearCode = igif->ClearCode;
+  LastCode = igif->LastCode;
+
+  if (StackPtr != 0) 
+  {
+    /* Let pop the stack off before continueing to read the gif file: */
+    while (StackPtr != 0 && i < LineLen) 
+      Line[i++] = Stack[--StackPtr];
+  }
+
+  while (i < LineLen) 
+  {			    
+    /* Decode LineLen items. */
+    if (iGIFDecompressInput(igif, handle, &CrntCode))
+      return IM_ERR_ACCESS;
+
+    if (CrntCode == EOFCode) 
+    {
+      /* Note however that usually we will not be here as we will stop */
+      /* decoding as soon as we got all the pixel, or EOF code will    */
+      /* not be read at all, and DGifGetLine/Pixel clean everything.   */
+      if (i != LineLen - 1) 
+        return IM_ERR_ACCESS;
+  
+    i++;
+    }
+    else if (CrntCode == ClearCode) 
+    {
+      /* We need to start over again: */
+      for (j = 0; j <= GIF_LZ_MAX_CODE; j++) 
+        Prefix[j] = GIF_NO_SUCH_CODE;
+
+      igif->RunningCode = igif->EOFCode + 1;
+      igif->RunningBits = igif->BitsPerPixel + 1;
+      igif->MaxCode1 = 1 << igif->RunningBits;
+      LastCode = igif->LastCode = GIF_NO_SUCH_CODE;
+    }
+    else 
+    {
+      /* Its regular code - if in pixel range simply add it to output  */
+      /* stream, otherwise trace to codes linked list until the prefix */
+      /* is in pixel range:					     */
+      if (CrntCode < ClearCode) 
+      {
+        /* This is simple - its pixel scalar, so add it to output:   */
+        Line[i++] = (unsigned char)CrntCode;
+      }
+      else 
+      {
+        /* Its a code to needed to be traced: trace the linked list  */
+        /* until the prefix is a pixel, while pushing the suffix     */
+        /* pixels on our stack. If we done, pop the stack in reverse */
+        /* (thats what stack is good for!) order to output.	     */
+        if (Prefix[CrntCode] == GIF_NO_SUCH_CODE) 
+        {
+          /* Only allowed if CrntCode is exactly the running code: */
+          /* In that case CrntCode = XXXCode, CrntCode or the	     */
+          /* prefix code is last code and the suffix char is	     */
+          /* exactly the prefix of last code!			     */
+          if (CrntCode == igif->RunningCode - 2) 
+          {
+            CrntPrefix = LastCode;
+            Suffix[igif->RunningCode - 2] =
+            Stack[StackPtr++] = (unsigned char)iGIFGetPrefixChar(Prefix, LastCode, ClearCode);
+          }
+          else 
+            return IM_ERR_ACCESS;
+        }
+        else
+          CrntPrefix = CrntCode;
+
+        /* Now (if image is O.K.) we should not get an NO_SUCH_CODE  */
+        /* During the trace. As we might loop forever, in case of    */
+        /* defective image, we count the number of loops we trace    */
+        /* and stop if we got LZ_MAX_CODE. obviously we can not      */
+        /* loop more than that.					     */
+        j = 0;
+        while (j++ <= GIF_LZ_MAX_CODE && CrntPrefix > ClearCode && CrntPrefix <= GIF_LZ_MAX_CODE) 
+        {
+          Stack[StackPtr++] = Suffix[CrntPrefix];
+          CrntPrefix = Prefix[CrntPrefix];
+        }
+
+        if (j >= GIF_LZ_MAX_CODE || CrntPrefix > GIF_LZ_MAX_CODE) 
+            return IM_ERR_ACCESS;
+
+        /* Push the last character on stack: */
+        Stack[StackPtr++] = (unsigned char)CrntPrefix;
+
+        /* Now lets pop all the stack into output: */
+        while (StackPtr != 0 && i < LineLen)
+          Line[i++] = Stack[--StackPtr];
+      }
+
+      if (LastCode != GIF_NO_SUCH_CODE) 
+      {
+        Prefix[igif->RunningCode - 2] = LastCode;
+
+        if (CrntCode == igif->RunningCode - 2) 
+        {
+          /* Only allowed if CrntCode is exactly the running code: */
+          /* In that case CrntCode = XXXCode, CrntCode or the	     */
+          /* prefix code is last code and the suffix char is	     */
+          /* exactly the prefix of last code!			     */
+          Suffix[igif->RunningCode - 2] = (unsigned char)iGIFGetPrefixChar(Prefix, LastCode, ClearCode);
+        }
+        else 
+        {
+          Suffix[igif->RunningCode - 2] = (unsigned char)iGIFGetPrefixChar(Prefix, CrntCode, ClearCode);
+        }
+      }
+    
+      LastCode = CrntCode;
+    }
+  }
+
+  igif->LastCode = LastCode;
+  igif->StackPtr = StackPtr;
+
+  return IM_ERR_NONE;
+}
+
+/*******************************************
+*   Skip sub-blocks until terminator found *
+********************************************/
+static int iGIFSkipSubBlocks(imBinFile* handle)
+{
+  unsigned char byte_value;
+  do
+  {
+    /* reads the number of bytes of the block or the terminator */
+    imBinFileRead(handle, &byte_value, 1, 1);
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+
+    /* jump number of bytes, ignores the contents */
+    if (byte_value) imBinFileSeekOffset(handle, byte_value);
+  }while (byte_value != 0);
+
+  return IM_ERR_NONE;
+}
+
+static int iGIFSkipImage(imBinFile* handle, int *image_count, int *terminate)
+{
+  int found_image = 0;
+  unsigned char byte_value;
+
+  *terminate = 0;
+  do
+  {
+    /* reads the record type byte */
+    byte_value = 0;
+    imBinFileRead(handle, &byte_value, 1, 1);
+
+    switch (byte_value) 
+    {
+	  case ',': /* image description */
+      /* jump 8 bytes */
+      imBinFileSeekOffset(handle, 8);
+
+      /* reads the image information byte */
+      imBinFileRead(handle, &byte_value, 1, 1);
+
+      if (byte_value & 0x80)
+      {
+        int bpp = (byte_value & 0x07) + 1;
+        int num_colors = 1 << bpp;
+
+        /* skip the color table */
+        imBinFileSeekOffset(handle, 3*num_colors);
+      }
+
+      /* jump 1 byte (LZW Min Code) */
+      imBinFileSeekOffset(handle, 1);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;
+
+      /* skip sub blocks */
+      if (iGIFSkipSubBlocks(handle) != IM_ERR_NONE)
+        return IM_ERR_ACCESS;
+
+      /* one more image */
+      found_image = 1;
+      (*image_count)++;
+	    break;
+	  case '!': /* extension */
+      /* jump 1 byte (label) */
+      imBinFileSeekOffset(handle, 1);
+
+      /* skip sub blocks */
+      if (iGIFSkipSubBlocks(handle) != IM_ERR_NONE)
+        return IM_ERR_ACCESS;
+	    break;
+	  case ';': /* terminate */
+	  default:  /* probably EOF */
+      *terminate = 1;
+      break;
+	  }
+
+  } while (!(*terminate) && (!found_image));
+
+  if (!found_image && *image_count == 0)
+    return IM_ERR_FORMAT;
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+static void iGIFReadGraphicsControl(imBinFile* handle, imAttribTable* attrib_table)
+{
+  unsigned char byte_value;
+  unsigned short word_value;
+
+  /* jump 1 bytes (size) */
+  imBinFileSeekOffset(handle, 1);
+
+  /* reads the packed descrition */
+  imBinFileRead(handle, &byte_value, 1, 1);
+  if (imBinFileError(handle))
+    return;
+
+  /* user input */
+  if (byte_value & 0x02)
+    attrib_table->Set("UserInput", IM_BYTE, 1, "\x1");
+
+  /* disposal */
+  if (byte_value & 0x1C)
+  {
+    char* disposal;
+    int disp = (byte_value & 0x1C) >> 2;
+
+    switch (disp)
+    {
+    default:
+      disposal = "UNDEF";
+      break;
+    case 0x01:
+      disposal = "LEAVE";
+      break;
+    case 0x02:
+      disposal = "RBACK";
+      break;
+    case 0x04:
+      disposal = "RPREV";
+      break;
+    }
+
+    attrib_table->Set("Disposal", IM_BYTE, 6, disposal);
+  }
+
+  /* delay time */
+  imBinFileRead(handle, &word_value, 1, 2);
+  if (word_value)
+    attrib_table->Set("Delay", IM_USHORT, 1, &word_value);
+
+  /* transparency color */
+  if (byte_value & 0x01)
+  {
+    imBinFileRead(handle, &byte_value, 1, 1);
+    attrib_table->Set("TransparencyIndex", IM_BYTE, 1, &byte_value);
+  }
+  else
+    imBinFileSeekOffset(handle, 1);
+
+  /* jump 1 bytes (terminator) */
+  imBinFileSeekOffset(handle, 1);
+}
+
+static int iGIFReadApplication(imBinFile* handle, imAttribTable* attrib_table)
+{
+  char identifier[9];
+
+  /* jump 1 byte (size) */
+  imBinFileSeekOffset(handle, 1);
+
+  /* reads the application identifier */
+  imBinFileRead(handle, identifier, 8, 1);
+  if (identifier[7] != 0)
+    identifier[8] = 0;
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (imStrEqual(identifier, "NETSCAPE"))
+  {
+    unsigned char authentication[4];
+    /* reads the application authentication code */
+    imBinFileRead(handle, authentication, 3, 1);
+    authentication[3] = 0;
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+
+    if (strcmp((char*)authentication, "2.0") == 0)
+    {
+      unsigned short word_value;
+
+      /* jump 2 bytes (size + 1) */
+      imBinFileSeekOffset(handle, 2);
+
+      /* reads the number of iterations */
+      imBinFileRead(handle, &word_value, 1, 2);
+
+      attrib_table->Set("Iterations", IM_USHORT, 1, &word_value);
+
+      /* jump 1 byte (terminator) */
+      imBinFileSeekOffset(handle, 1);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;
+    }
+    else
+    {
+      /* Skip remaining blocks */
+      if (iGIFSkipSubBlocks(handle) != IM_ERR_NONE)
+        return IM_ERR_ACCESS;
+    }
+  }
+  else
+  {
+    /* jump 3 bytes (authentication code) */
+    imBinFileSeekOffset(handle, 3);
+
+    /* Skip remaining blocks */
+    if (iGIFSkipSubBlocks(handle) != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+static int iGIFReadComment(imBinFile* handle, imAttribTable* attrib_table)
+{
+  unsigned char byte_value, buffer[255*100] = "", *buffer_ptr;
+  int size = 0;
+
+  buffer_ptr = &buffer[0];
+
+  do
+  {
+    /* reads the number of bytes of the block or the terminator */
+    imBinFileRead(handle, &byte_value, 1, 1);
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+
+    /* reads data */
+    if (byte_value)
+    {
+      imBinFileRead(handle, buffer_ptr, byte_value, 1);
+
+      if (buffer_ptr[byte_value-1] == 0)
+      {
+        size += byte_value-1;
+        buffer_ptr += byte_value-1;
+      }
+      else
+      {
+        size += byte_value;
+        buffer_ptr += byte_value;
+      }
+    }
+
+  }while (byte_value != 0);
+
+  if (buffer[0] != 0)
+    attrib_table->Set("Description", IM_BYTE, size, buffer);
+
+  return IM_ERR_NONE;
+}
+
+static int iGIFReadExtension(imBinFile* handle, imAttribTable* attrib_table)
+{
+  unsigned char byte_value;
+
+  /* read block label */
+  imBinFileRead(handle, &byte_value, 1, 1);
+
+  if (byte_value == 0xF9)
+  {
+    /* Graphics Control Extension */
+    iGIFReadGraphicsControl(handle, attrib_table);
+  }
+  else if (byte_value == 0xFE)
+  {
+    /* Comment Extension */
+    if (iGIFReadComment(handle, attrib_table) != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+  }
+  else if (byte_value == 0xFF)
+  {
+    /* Application Extension */
+    if (iGIFReadApplication(handle, attrib_table) != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+  }
+  else
+  {
+    /* skip sub blocks */
+    if (iGIFSkipSubBlocks(handle) != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+  }
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+static int iGIFWriteComment(imBinFile* handle, unsigned char *buffer, int size)
+{
+  unsigned char byte_value;
+
+  /* record type byte */
+  imBinFileWrite(handle, (void*)"!", 1, 1);
+
+  /* block label */
+  imBinFileWrite(handle, (void*)"\xFE", 1, 1);
+
+  while (size > 0)
+  {
+    if (size > 255)
+      byte_value = 255;
+    else
+      byte_value = (unsigned char)size;
+
+    /* sub block size */
+    imBinFileWrite(handle, &byte_value, 1, 1);
+
+    /* sub block data */
+    imBinFileWrite(handle, buffer, byte_value, 1);
+
+    buffer += byte_value;
+    size -= byte_value;
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+  }
+
+  /* block terminator */
+  imBinFileWrite(handle, (void*)"\0", 1, 1);
+
+  return IM_ERR_NONE;
+}
+
+static int iGIFWriteGraphicsControl(imBinFile* handle, imAttribTable* attrib_table)
+{
+  const void *attrib_user_input, *attrib_disposal, *attrib_delay, *attrib_transparency;
+  unsigned char byte_value;
+
+  attrib_user_input = attrib_table->Get("UserInput");
+  attrib_disposal = attrib_table->Get("Disposal");
+  attrib_delay = attrib_table->Get("Delay");
+  attrib_transparency = attrib_table->Get("TransparencyIndex");
+
+  /* Writes the Graphics Control Extension */
+  if (attrib_user_input || attrib_disposal || attrib_delay || attrib_transparency)
+  {
+    unsigned short word_value;
+
+    /* record type byte */
+    imBinFileWrite(handle, (void*)"!", 1, 1);
+
+    /* block label */
+    imBinFileWrite(handle, (void*)"\xF9", 1, 1);
+
+    /* block size */
+    imBinFileWrite(handle, (void*)"\x04", 1, 1);
+
+    byte_value = 0;
+
+    /* user input flag */
+    if (attrib_user_input && *(unsigned char*)attrib_user_input == 1)
+      byte_value |= 0x02;
+
+    /* transparency flag */
+    if (attrib_transparency)
+      byte_value |= 0x01;
+
+    /* disposal flag */
+    if (attrib_disposal)
+    {
+      int disp = 0;
+      if (imStrEqual((char*)attrib_disposal, "LEAVE"))
+        disp = 0x01;
+      else if (imStrEqual((char*)attrib_disposal, "RBACK"))
+        disp = 0x02;
+      else if (imStrEqual((char*)attrib_disposal, "RPREV"))
+        disp = 0x04;
+
+      disp = disp << 2;
+      byte_value |= disp;
+    }
+
+    /* packed */
+    imBinFileWrite(handle, &byte_value, 1, 1);
+
+    /* delay time */
+    if (attrib_delay)
+      word_value = *(unsigned short*)attrib_delay;
+    else
+      word_value = 0;
+
+    imBinFileWrite(handle, &word_value, 1, 2);
+
+    /* transparency color */
+    if (attrib_transparency)
+    {
+      byte_value = *(unsigned char*)attrib_transparency;
+      imBinFileWrite(handle, &byte_value, 1, 1);
+    }
+    else
+      imBinFileWrite(handle, (void*)"\0", 1, 1);
+
+    /* terminator */
+    imBinFileWrite(handle, (void*)"\0", 1, 1);
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+static const char* iGIFCompTable[1] = 
+{
+  "LZW"
+};
+
+class imFormatGIF: public imFormat
+{
+  imBinFile* handle;
+  iGIFData gif_data;
+
+  int GIFReadImageInfo();
+  int GIFWriteImageInfo();
+
+public:
+  imFormatGIF()
+    :imFormat("GIF", 
+              "Graphics Interchange Format", 
+              "*.gif;", 
+              iGIFCompTable, 
+              1, 
+              1)
+    {}
+  ~imFormatGIF() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterGIF(void)
+{
+  imFormatRegister(new imFormatGIF());
+}
+
+int imFormatGIF::Open(const char* file_name)
+{
+  this->handle = imBinFileOpen(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  unsigned char sig[4];
+  if (!imBinFileRead(this->handle, sig, 3, 1))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  sig[3] = 0;
+  if (!imStrEqual((char*)sig, GIF_STAMP))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* ignore version */
+  imBinFileSeekOffset(handle, 3);
+
+  strcpy(this->compression, "LZW");
+
+  /* reads screen width and screen height */
+  imushort word_value;
+  imBinFileRead(handle, &word_value, 1, 2);
+  gif_data.screen_width = word_value;
+
+  imBinFileRead(handle, &word_value, 1, 2);
+  gif_data.screen_height = word_value;
+
+  /* reads color table information byte */
+  imbyte byte_value;
+  imBinFileRead(handle, &byte_value, 1, 1);
+
+  /* jump 2 bytes (bgcolor + aspect ratio) */
+  imBinFileSeekOffset(handle, 2);
+
+  /* global color table, if exists */
+  if (byte_value & 0x80)
+  {
+    int bpp = (byte_value & 0x07) + 1;
+    gif_data.global_num_colors = 1 << bpp;
+
+    /* reads the color palette */
+    imBinFileRead(handle, gif_data.global_colors, gif_data.global_num_colors * 3, 1);
+  }
+
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  /* count number of images */
+  int error, terminate;
+  this->image_count = 0;
+  do
+  {
+    // store each offset before counting images
+    gif_data.start_offset[this->image_count] = imBinFileTell(handle);
+    error = iGIFSkipImage(handle, &this->image_count, &terminate);
+  } while (!terminate && error == IM_ERR_NONE);
+
+  if (this->image_count == 0 || error != IM_ERR_NONE)
+  {
+    imBinFileClose(handle);
+    return error;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatGIF::New(const char* file_name)
+{
+  this->handle = imBinFileNew(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  /* writes the GIF STAMP and version - header */
+  imBinFileWrite(handle, (void*)GIF_STAMP, 3, 1);   /* identifier */
+  imBinFileWrite(handle, (void*)GIF_VERSION, 3, 1); /* format version */
+
+  // File header will be written at the first image
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_ACCESS;
+  }
+
+  strcpy(this->compression, "LZW");
+  
+  return IM_ERR_NONE;
+}
+
+void imFormatGIF::Close()
+{
+  if (this->is_new && !imBinFileError(this->handle))
+    imBinFileWrite(this->handle, (void*)";", 1, 1);
+
+  imBinFileClose(this->handle);
+}
+
+void* imFormatGIF::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatGIF::GIFReadImageInfo()
+{
+  imbyte byte_value;
+  imushort word_value;
+  int int_value;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  /* reads the image left position */
+  imBinFileRead(handle, &word_value, 1, 2);
+  if (word_value)
+    attrib_table->Set("XScreen", IM_USHORT, 1, &word_value);
+
+  /* reads the image top position */
+  imBinFileRead(handle, &word_value, 1, 2);
+  if (word_value)
+    attrib_table->Set("YScreen", IM_USHORT, 1, &word_value);
+
+  /* reads the image width */
+  imBinFileRead(handle, &word_value, 1, 2);
+  this->width = word_value;
+
+  /* reads the image height */
+  imBinFileRead(handle, &word_value, 1, 2);
+  this->height = word_value;
+
+  /* reads the image information byte */
+  imBinFileRead(handle, &byte_value, 1, 1);
+
+  gif_data.interlaced = (byte_value & 0x40)? 1: 0;
+  if (gif_data.interlaced)
+  {
+    int_value = 1;
+    attrib_table->Set("Interlaced", IM_INT, 1, &int_value);
+  }
+
+  this->file_color_mode = IM_MAP;
+  this->file_data_type = IM_BYTE;
+
+  /* local color table */
+  int num_colors;
+  unsigned char *colors;
+  unsigned char local_colors[256 * 3];
+
+  if (byte_value & 0x80)
+  {
+    int bpp = (byte_value & 0x07) + 1;
+    num_colors = 1 << bpp;
+    colors = local_colors;
+
+    /* reads the color table */
+    imBinFileRead(handle, local_colors, num_colors * 3, 1);
+  }
+  else if (gif_data.global_num_colors)
+  {
+    colors = gif_data.global_colors;
+    num_colors = gif_data.global_num_colors;
+  }
+  else
+    return IM_ERR_FORMAT;
+
+  long palette[256];
+  for (int c = 0; c < num_colors; c++)
+  {
+    palette[c] = imColorEncode(colors[c*3],
+                               colors[c*3+1],
+                               colors[c*3+2]);
+  }
+
+  imFileSetPalette(this, palette, num_colors);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatGIF::GIFWriteImageInfo()
+{
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  this->file_color_mode |= IM_TOPDOWN;
+
+  imAttribTable* attrib_table = AttribTable();
+  const void* attrib = attrib_table->Get("Interlaced");
+  if (attrib)
+    gif_data.interlaced = *(int*)attrib;
+
+  imBinFileWrite(handle, (void*)",", 1, 1);  /* Image separator character. */
+
+  imushort word_value;
+
+  attrib = attrib_table->Get("XScreen");
+  if (attrib)
+    word_value = *(unsigned short*)attrib;
+  else
+    word_value = 0;
+  imBinFileWrite(handle, &word_value, 1, 2); /* image left */
+
+  attrib = attrib_table->Get("YScreen");
+  if (attrib)
+    word_value = *(unsigned short*)attrib;
+  else
+    word_value = 0;
+  imBinFileWrite(handle, &word_value, 1, 2); /* image top */
+
+  word_value = (unsigned short)this->width;
+  imBinFileWrite(handle, &word_value, 1, 2); /* image width */
+  word_value = (unsigned short)this->height;
+  imBinFileWrite(handle, &word_value, 1, 2); /* image height */
+
+  /* local color table */
+  imbyte byte_value = 0x80;
+  if (gif_data.interlaced)
+    byte_value |= 0x40;
+
+  int num_colors = 256;
+  if (imColorModeSpace(this->user_color_mode) == IM_MAP)
+  {
+    int bpp = 0, c = this->palette_count-1;
+    while (c) {c = c >> 1;bpp++;} 
+    byte_value |= (bpp-1); 
+    num_colors = 1 << bpp;
+  }
+  else
+    byte_value |= 0x07; /* 8 bits = 256 grays */
+
+  imBinFileWrite(handle, &byte_value, 1, 1); /* image information */
+
+  /* write color table */
+  unsigned char local_colors[256*3];
+  for (int c = 0; c < num_colors; c++) // write all data, even not used colors
+  {
+    unsigned char r, g, b;
+    imColorDecode(&r, &g, &b, this->palette[c]);
+    local_colors[c*3] = r;
+    local_colors[c*3+1] = g;
+    local_colors[c*3+2] = b;
+  }
+  imBinFileWrite(handle, local_colors, num_colors*3, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatGIF::ReadImageInfo(int index)
+{
+  imAttribTable* attrib_table = AttribTable();
+  attrib_table->RemoveAll();
+
+  if (gif_data.screen_width) 
+  {
+    imushort word_value = (imushort)gif_data.screen_width;
+    attrib_table->Set("ScreenWidth", IM_USHORT, 1, &word_value);
+  }
+
+  if (gif_data.screen_height) 
+  {
+    imushort word_value = (imushort)gif_data.screen_height;
+    attrib_table->Set("ScreenHeight", IM_USHORT, 1, &word_value);
+  }
+
+  /* jump to start offset of the image */
+  imBinFileSeekTo(handle, gif_data.start_offset[index]);
+
+  int found_image = 0;
+  imbyte byte_value;
+
+  int terminate = 0;
+  do
+  {
+    /* reads the record type byte */
+    byte_value = 0;
+    imBinFileRead(handle, &byte_value, 1, 1);
+
+    switch (byte_value) 
+    {
+	  case '!': /* 0x21 extension (appears before the image) */
+      if (iGIFReadExtension(handle, attrib_table) != IM_ERR_NONE)
+        return IM_ERR_ACCESS;
+	    break;
+	  case ',': /* 0x2C image description and color table */
+      if (GIFReadImageInfo() != IM_ERR_NONE)
+        return IM_ERR_ACCESS;
+
+      /* we will read only this image for now, so break the loop */
+      found_image = 1;
+	    break;
+	  case ';': /* if terminate before find image return error */
+	  default:
+      terminate = 1;
+      break;
+	  }
+  } while (!terminate && !found_image);
+
+  if (!found_image)
+    return IM_ERR_ACCESS;
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  /* reads the LZW Min code byte */
+  imBinFileRead(handle, &byte_value, 1, 1);
+
+  /* now initialize the compression control data */
+
+  gif_data.BitsPerPixel = byte_value;
+  gif_data.ClearCode = (1 << byte_value);
+  gif_data.EOFCode = gif_data.ClearCode + 1;
+  gif_data.RunningCode = gif_data.EOFCode + 1;
+  gif_data.RunningBits = byte_value + 1;	 /* Number of bits per code. */
+  gif_data.MaxCode1 = 1 << gif_data.RunningBits;     /* Max. code + 1. */
+  gif_data.StackPtr = 0;		    /* No pixels on the pixel stack. */
+  gif_data.LastCode = GIF_NO_SUCH_CODE;
+  gif_data.CrntShiftState = 0;	/* No information in CrntShiftDWord. */
+  gif_data.CrntShiftDWord = 0;
+  gif_data.Buf[0] = 0;			      /* Input Buffer empty. */
+
+  for (int i = 0; i <= GIF_LZ_MAX_CODE; i++) 
+    gif_data.Prefix[i] = GIF_NO_SUCH_CODE;
+
+  gif_data.step = 0;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatGIF::WriteImageInfo()
+{
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  this->file_color_mode |= IM_TOPDOWN;
+  this->file_data_type = this->user_data_type;
+
+  imAttribTable* attrib_table = AttribTable();
+  const void* attrib_data;
+  int attrib_size;
+
+  if (this->image_count == 0)
+  {
+    imushort word_value;
+
+    // write file header
+
+    /* logical screen descriptor */
+    attrib_data = attrib_table->Get("ScreenWidth");
+    if (attrib_data) word_value = *(imushort*)attrib_data;
+    else             word_value = (imushort)this->width;
+    imBinFileWrite(handle, &word_value, 1, 2);
+
+    attrib_data = attrib_table->Get("ScreenHeight");
+    if (attrib_data) word_value = *(imushort*)attrib_data;
+    else             word_value = (imushort)this->height;
+    imBinFileWrite(handle, &word_value, 1, 2);
+
+    imbyte byte_value = 0;  /* no global color table, 0 colors */
+    imBinFileWrite(handle, &byte_value, 1, 1); /* screen information */
+    imBinFileWrite(handle, (void*)"\0\0", 2, 1);  /* (bgcolor + aspect ratio) */
+  }
+
+  attrib_data = attrib_table->Get("Description", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    if (iGIFWriteComment(handle, (imbyte*)attrib_data, attrib_size) != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+  }
+
+  attrib_data = attrib_table->Get("Iterations");
+  if (attrib_data)
+  {
+    if (iGIFWriteNetscapeApplication(handle, *(short*)attrib_data) != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+  }
+
+  if (iGIFWriteGraphicsControl(handle, attrib_table) != IM_ERR_NONE)
+    return IM_ERR_ACCESS;
+
+  if (GIFWriteImageInfo() != IM_ERR_NONE)
+    return IM_ERR_ACCESS;
+
+  /* initializes the hash table */
+  memset(gif_data.HTable, 0xFF, GIF_HT_SIZE * sizeof(int));
+
+  /* initializes compression data */
+
+  imbyte byte_value = 8;
+  imBinFileWrite(handle, &byte_value, 1, 1); /* Write the Code size to file. */
+
+  gif_data.Buf[0] = 0;			  /* Nothing was output yet. */
+  gif_data.BitsPerPixel = 8;
+  gif_data.ClearCode = (1 << 8);
+  gif_data.EOFCode = gif_data.ClearCode + 1;
+  gif_data.RunningBits = 8 + 1;	 /* Number of bits per code. */
+  gif_data.MaxCode1 = 1 << gif_data.RunningBits;	   /* Max. code + 1. */
+  gif_data.CrntCode = GIF_FIRST_CODE;	   /* Signal that this is first one! */
+  gif_data.CrntShiftState = 0;      /* No information in CrntShiftDWord. */
+  gif_data.CrntShiftDWord = 0;
+
+  gif_data.RunningCode = gif_data.EOFCode + 1;
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  gif_data.step = 0;          /* interlaced step */
+
+  return iGIFCompressOutput(&gif_data, handle, gif_data.ClearCode);
+}
+
+int imFormatGIF::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading GIF...");
+
+  int row = 0, error;
+  for (int i = 0; i < this->height; i++)
+  {
+    error = iGIFDecompressLine(&gif_data, handle, (imbyte*)this->line_buffer, this->width);
+    if (error != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+	  if (gif_data.interlaced)
+	  {
+      row += InterlacedJumps[gif_data.step];
+
+      if (row > this->height-1)
+      {
+        gif_data.step++;
+        row = InterlacedOffset[gif_data.step];
+      }
+	  }
+	  else
+      row++;
+  }
+
+  /* Skip remaining empty blocks of the image data */
+  if (iGIFSkipSubBlocks(handle) != IM_ERR_NONE)
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatGIF::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing GIF...");
+
+  int row = 0, error;
+  for (int i = 0; i < this->height; i++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    error = iGIFCompressLine(&gif_data, handle, (imbyte*)this->line_buffer, this->width);
+
+    if (error != IM_ERR_NONE)
+      return IM_ERR_ACCESS;
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+	  if (gif_data.interlaced)
+	  {
+      row += InterlacedJumps[gif_data.step];
+
+      if (row > this->height-1)
+      {
+        gif_data.step++;
+        row = InterlacedOffset[gif_data.step];
+      }
+	  }
+	  else
+      row++;
+  }
+
+  /* writes the end picture code */
+  iGIFCompressOutput(&gif_data, handle, gif_data.CrntCode);
+  iGIFCompressOutput(&gif_data, handle, gif_data.EOFCode);
+  iGIFCompressOutput(&gif_data, handle, GIF_FLUSH_OUTPUT);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  this->image_count++;
+  return IM_ERR_NONE;
+}
+
+int imFormatGIF::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space != IM_MAP && color_space != IM_GRAY && color_space != IM_BINARY)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "LZW"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
+
diff --git a/src/im_format_ico.cpp b/src/im_format_ico.cpp
new file mode 100644
index 0000000..d4d1884
--- /dev/null
+++ b/src/im_format_ico.cpp
@@ -0,0 +1,650 @@
+/** \file
+ * \brief ICO - Windows Icon
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_ico.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+/*
+typedef struct
+{
+    WORD           idReserved;   // Reserved (must be 0)
+    WORD           idType;       // Resource Type (1 for icons)
+    WORD           idCount;      // How many images?
+    ICONDIRENTRY   idEntries[1]; // An entry for each image (idCount of 'em)
+} ICONDIR, *LPICONDIR; // 6
+typedef struct
+{
+    BYTE        bWidth;          // Width, in pixels, of the image
+    BYTE        bHeight;         // Height, in pixels, of the image
+    BYTE        bColorCount;     // Number of colors in image (0 if >=8bpp)
+    BYTE        bReserved;       // Reserved ( must be 0)
+    WORD        wPlanes;         // Color Planes
+    WORD        wBitCount;       // Bits per pixel
+    DWORD       dwBytesInRes;    // How many bytes in this resource?
+    DWORD       dwImageOffset;   // Where in the file is this image?
+} ICONDIRENTRY, *LPICONDIRENTRY;  // 16
+typdef struct
+{
+   BITMAPINFOHEADER   icHeader;      // DIB header
+   RGBQUAD         icColors[1];   // Color table
+   BYTE            icXOR[1];      // DIB bits for XOR mask
+   BYTE            icAND[1];      // DIB bits for AND mask (1 bpp)
+} ICONIMAGE, *LPICONIMAGE;
+*/
+
+static const char* iICOCompTable[1] = 
+{
+  "NONE"
+};
+
+class imFormatICO: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  unsigned short bpp;         /* number of bits per pixel */
+  unsigned int offset[10],
+                next_offset;
+  int line_raw_size;              // raw line size
+
+  int ReadPalette();
+  int WritePalette();
+  void FixRGBOrder();
+
+public:
+  imFormatICO()
+    :imFormat("ICO", 
+              "Windows Icon", 
+              "*.ico;", 
+              iICOCompTable, 
+              1, 
+              1)
+    {}
+  ~imFormatICO() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterICO(void)
+{
+  imFormatRegister(new imFormatICO());
+}
+
+int imFormatICO::Open(const char* file_name)
+{
+  unsigned short word;
+
+  /* opens the binary file for reading with intel byte order */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  /* reads the reserved value */
+  imBinFileRead(handle, &word, 1, 2);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (word != 0)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* reads the resource type */
+  imBinFileRead(handle, &word, 1, 2);
+  if (word != 1)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* reads the number of images */
+  imBinFileRead(handle, &word, 1, 2);
+
+  this->image_count = word > 10? 10: word;
+  strcpy(this->compression, "NONE");
+
+  for (int i = 0; i < this->image_count; i++)
+  {
+    /* skip ICONDIRENTRY data except image offset */
+    imBinFileSeekOffset(handle, 12);
+
+    /* reads the image offset */
+    imBinFileRead(handle, &this->offset[i], 1, 4);
+   
+    if (imBinFileError(handle))
+    {
+      imBinFileClose(handle);
+      return IM_ERR_ACCESS;
+    }
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatICO::New(const char* file_name)
+{
+  /* opens the binary file for writing with intel byte order */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  imushort word_value = 0;
+  imBinFileWrite(handle, &word_value, 1, 2); /* reserved */
+  word_value = 1;
+  imBinFileWrite(handle, &word_value, 1, 2); /* resource type */
+  imBinFileWrite(handle, &word_value, 1, 2); /* number of images, at least one, must update at close */
+
+  this->next_offset = 6 + 5 * 16;  // offset to the first image, room for 5 ICONDIRENTRY
+
+  return IM_ERR_NONE;
+}
+
+void imFormatICO::Close()
+{
+  if (this->is_new)
+  {
+    if (this->image_count > 1)
+    {
+      imBinFileSeekTo(handle, 4);
+      imushort word_value = (imushort)this->image_count;
+      imBinFileWrite(handle, &word_value, 1, 2); /* number of images */
+    }
+  }
+
+  imBinFileClose(handle);
+}
+
+void* imFormatICO::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatICO::ReadImageInfo(int index)
+{
+  this->file_data_type = IM_BYTE;
+  unsigned int dword_value;
+
+  if (index >= image_count)
+    return IM_ERR_DATA;
+
+  // offset + size
+  imBinFileSeekTo(handle, this->offset[index] + 4);
+
+  /* reads the image width */
+  imBinFileRead(handle, &dword_value, 1, 4);
+  this->width = (int)dword_value;
+
+  /* reads the image height */
+  imBinFileRead(handle, &dword_value, 1, 4);
+  this->height = (int)(dword_value / 2);
+
+  /* jump 2 bytes (planes) */
+  imBinFileSeekOffset(handle, 2);
+
+  // bpp
+  imBinFileRead(handle, &this->bpp, 1, 2);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  // sanity check
+  if (this->bpp != 1 && this->bpp != 4 && this->bpp != 8 &&
+      this->bpp != 24 && this->bpp != 32)
+    return IM_ERR_DATA;
+
+  if (this->bpp > 8)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+    if (this->bpp == 32)
+      this->file_color_mode |= IM_ALPHA;
+  }
+  else
+  {
+    this->file_color_mode = IM_MAP;
+    this->palette_count = 1 << bpp;
+  }
+
+  if (this->bpp < 8)
+    this->convert_bpp = this->bpp;
+
+  this->line_raw_size = imFileLineSizeAligned(this->width, this->bpp, 4);
+  this->line_buffer_extra = 4; // room enough for padding
+
+  /* jump 8 bytes (compression, image size, resolution) */
+  imBinFileSeekOffset(handle, 16);
+
+  if (this->bpp <= 8)
+  {
+    /* reads the number of colors used */
+    imBinFileRead(handle, &dword_value, 1, 4);
+
+    /* updates the palette_count based on the number of colors used */
+    if (dword_value != 0 && (int)dword_value < this->palette_count)
+      this->palette_count = dword_value;
+
+    /* jump 4 bytes (important colors) */
+    imBinFileSeekOffset(handle, 4);
+  }
+  else
+  {
+    /* jump 8 bytes (used colors, important colors) */
+    imBinFileSeekOffset(handle, 8);
+  }
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->bpp <= 8)
+    return ReadPalette();
+
+  return IM_ERR_NONE;
+}
+
+int imFormatICO::WriteImageInfo()
+{
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  if (this->image_count == 5)
+    return IM_ERR_DATA;
+
+  if (this->width > 255 || this->height > 255)
+    return IM_ERR_DATA;
+
+  if (this->file_color_mode == IM_BINARY)
+  {
+    this->bpp = 1;
+    this->convert_bpp = 1;
+  }
+  else if (this->file_color_mode == IM_RGB)
+  {
+    this->file_color_mode |= IM_PACKED;
+    if (imColorModeHasAlpha(this->user_color_mode))
+    {
+      this->file_color_mode |= IM_ALPHA;
+      this->bpp = 32;
+    }
+    else
+      this->bpp = 24;
+  }
+  else
+    this->bpp = 8;
+
+  this->line_raw_size = imFileLineSizeAligned(this->width, this->bpp, 4);
+  this->line_buffer_extra = 4; // room enough for padding
+  int palette_size = (this->bpp > 8)? 0: this->palette_count*4;
+
+  imbyte byte_value;
+  imushort word_value;
+
+  /* updates the ICON directory entry */
+
+  imBinFileSeekTo(handle, 6 + this->image_count * 16);  // ICONDIR + i * ICONDIRENTRY
+
+  byte_value = (imbyte)this->width;
+  imBinFileWrite(handle, &byte_value, 1, 1); /* width */
+  byte_value = (imbyte)this->height;
+  imBinFileWrite(handle, &byte_value, 1, 1); /* height */
+  byte_value = (imbyte)((this->bpp > 8)? 0: this->palette_count);
+  imBinFileWrite(handle, &byte_value, 1, 1); /* color count */
+  imBinFileWrite(handle, (void*)"\0", 1, 1);        /* reserved */
+  word_value = 1;
+  imBinFileWrite(handle, &word_value, 1, 2); /* planes */
+  word_value = this->bpp;
+  imBinFileWrite(handle, &word_value, 1, 2); /* bit count */
+  int and_line_size = imFileLineSizeAligned(this->width, 1, 4);
+  int resource_size = 40 + palette_size + (line_raw_size + and_line_size) * this->height;
+  unsigned int dword_value = resource_size;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* resource size */
+  dword_value = this->next_offset;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* data offset */
+
+  this->offset[this->image_count] = this->next_offset;
+  this->next_offset += resource_size;
+
+  /* writes the image */
+
+  imBinFileSeekTo(handle, this->offset[this->image_count]);
+
+  dword_value = 40;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* header size */
+  dword_value = this->width;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* width */
+  dword_value = this->height*2;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* height */
+  word_value = 1;
+  imBinFileWrite(handle, &word_value, 1, 2);  /* planes */
+  word_value = this->bpp;
+  imBinFileWrite(handle, &word_value, 1, 2);  /* bpp */
+  dword_value = 0;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* compression */
+  dword_value = line_raw_size * this->height;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* data size */
+
+  imBinFileWrite(handle, (void*)"\0\0\0\0\0\0\0\0", 8, 1); /* resolution */
+
+  dword_value = (this->bpp > 8)? 0: this->palette_count;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* colors used */
+  dword_value = 0;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* colors important (all) */
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->bpp < 24)
+    return WritePalette();
+
+  return IM_ERR_NONE;
+}
+
+int imFormatICO::ReadPalette()
+{
+  /* reads the color palette */
+  unsigned char bmp_colors[256 * 4];
+  imBinFileRead(handle, bmp_colors, this->palette_count * 4, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;
+    this->palette[c] = imColorEncode(bmp_colors[i + 2], 
+                                     bmp_colors[i + 1], 
+                                     bmp_colors[i]);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatICO::WritePalette()
+{
+  unsigned char bmp_colors[256 * 4];
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;                       
+    imColorDecode(&bmp_colors[i + 2], &bmp_colors[i + 1], &bmp_colors[i], this->palette[c]);
+    bmp_colors[i + 3] = 0;
+  }
+
+  /* writes the color palette */
+  imBinFileWrite(handle, bmp_colors, this->palette_count * 4, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatICO::FixRGBOrder()
+{
+  if (this->bpp == 24)
+  {
+    imbyte* byte_data = (imbyte*)this->line_buffer;
+    for (int x = 0; x < this->width; x++)
+    {
+      int c = x*3;
+      imbyte temp = byte_data[c];     // swap R and B
+      byte_data[c] = byte_data[c+2];
+      byte_data[c+2] = temp;
+    }
+  }
+  else /* bpp == 32 */
+  {
+    /* inverts the DWORD values if not intel */
+    if (imBinCPUByteOrder() == IM_BIGENDIAN)
+      imBinSwapBytes4(this->line_buffer, this->width);
+
+    unsigned int* dword_data = (unsigned int*)this->line_buffer;
+    imbyte* byte_data = (imbyte*)this->line_buffer;
+
+    for (int x = 0; x < this->width; x++)
+    {
+      unsigned int dword_value = dword_data[x];
+      int c = x*4;
+      byte_data[c]   = (imbyte)((0x00FF0000 & dword_value) >> 16);
+      byte_data[c+1] = (imbyte)((0x0000FF00 & dword_value) >> 8);
+      byte_data[c+2] = (imbyte)((0x000000FF & dword_value) >> 0);
+      byte_data[c+3] = (imbyte)((0xFF000000 & dword_value) >> 24);
+    }
+  }
+}
+
+static inline int PixelOffset(int is_top_down, int is_packed, int width, int height, int depth, int col, int row, int plane)
+{
+  if (is_top_down)
+    row = height-1 - row;
+
+  if (is_packed) 
+    return row*width*depth + col*depth + plane;
+  else           
+    return plane*width*height + row*width + col;
+}
+
+int imFormatICO::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading ICO...");
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imBinFileRead(handle, this->line_buffer, this->line_raw_size, 1);
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (this->bpp > 8)
+      FixRGBOrder();
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  if ((imColorModeHasAlpha(this->user_color_mode) && this->bpp!=32) ||  /* user has alpha and file does not have alpha -> alpha came from AND data */
+       imColorModeSpace(this->user_color_mode) == IM_MAP)   /* or MAP */
+  {
+    int line_size = imFileLineSizeAligned(this->width, 1, 4);
+    int image_size = this->height*line_size;
+    imbyte* and_data = new imbyte[image_size];
+
+    imBinFileRead(handle, and_data, image_size, 1);
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    imbyte* and_data_line = and_data;
+    imbyte* user_data = (imbyte*)data;
+    unsigned long histo[256];
+    int depth = imColorModeDepth(this->user_color_mode);
+    int alpha_plane = 0;
+    if (imColorModeHasAlpha(this->user_color_mode))
+      alpha_plane = depth - 1;
+    else
+      memset(histo, 0, 256*sizeof(unsigned long));
+
+    for (int j = 0; j < this->height; j++)
+    {
+      for (int i = 0; i < this->width; i++)
+      {
+        int offset = PixelOffset(imColorModeIsTopDown(this->user_color_mode), 
+                                imColorModeIsPacked(this->user_color_mode), 
+                                this->width, this->height, depth, i, j, alpha_plane);
+
+        if (imColorModeHasAlpha(this->user_color_mode))
+        {
+          if (((and_data_line[i / 8] >> (7 - i % 8)) & 0x01))
+            user_data[offset] = 0;
+          else
+            user_data[offset] = 255;
+        }
+        else
+        {
+          /* the most repeated index with transparency will be the transparent index. */
+          if (((and_data_line[i / 8] >> (7 - i % 8)) & 0x01))
+            histo[user_data[offset]]++;
+        }
+      }
+      and_data_line += line_size;
+    }
+
+    if (imColorModeSpace(this->user_color_mode) == IM_MAP)
+    {
+      imbyte transp_index = 0;
+      unsigned long histo_max = histo[0];
+
+      for (int i = 1; i < 256; i++)
+      {
+        if (histo_max < histo[i])
+        {
+          histo_max = histo[i];
+          transp_index = (imbyte)i;
+        }
+      }
+      AttribTable()->Set("TransparencyIndex", IM_BYTE, 1, &transp_index);
+    }
+
+    delete [] and_data;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatICO::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing ICO...");
+
+  /* Image Data */
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (this->bpp > 8)
+      FixRGBOrder();
+
+    imBinFileWrite(handle, this->line_buffer, this->line_raw_size, 1);
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  /* AND Data */
+
+  int and_line_size = imFileLineSizeAligned(this->width, 1, 4);
+  int and_size = this->height*and_line_size;
+  imbyte* and_data = new imbyte[and_size];
+  memset(and_data, 0, and_size);  /* zero = opaque */
+
+  if (imColorModeHasAlpha(this->user_color_mode))
+  {
+    imbyte* and_data_line = and_data;
+    imbyte* user_data = (imbyte*)data;
+    int depth = imColorModeDepth(this->user_color_mode);
+    int alpha_plane = depth - 1;
+
+    for (int j = 0; j < this->height; j++)
+    {
+      for (int i = 0; i < this->width; i++)
+      {
+        int offset = PixelOffset(imColorModeIsTopDown(this->user_color_mode), 
+                                 imColorModeIsPacked(this->user_color_mode), 
+                                 this->width, this->height, depth, i, j, alpha_plane);
+
+        if (user_data[offset] == 0) /* mark only full transparent pixels */
+          and_data_line[i / 8] |=  (0x01 << (7 - (i % 8)));
+      }
+      and_data_line += and_line_size;
+    }
+  }
+  else
+  {
+    const imbyte* transp_index = (const imbyte*)AttribTable()->Get("TransparencyIndex");
+    if (imColorModeSpace(this->user_color_mode) == IM_MAP && transp_index)
+    {
+      imbyte* and_data_line = and_data;
+      imbyte* user_data = (imbyte*)data;
+      int depth = imColorModeDepth(this->user_color_mode);
+
+      for (int j = 0; j < this->height; j++)
+      {
+        for (int i = 0; i < this->width; i++)
+        {
+          int offset = PixelOffset(imColorModeIsTopDown(this->user_color_mode), 
+                                  imColorModeIsPacked(this->user_color_mode), 
+                                  this->width, this->height, depth, i, j, 0);
+
+          if (user_data[offset] == *transp_index)
+            and_data_line[i / 8] |=  (0x01 << (7 - (i % 8)));
+        }
+        and_data_line += and_line_size;
+      }
+    }
+  }
+
+  imBinFileWrite(handle, and_data, and_size, 1);
+  delete [] and_data;
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;     
+
+  this->image_count++;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatICO::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_jp2.cpp b/src/im_format_jp2.cpp
new file mode 100644
index 0000000..16e2c60
--- /dev/null
+++ b/src/im_format_jp2.cpp
@@ -0,0 +1,493 @@
+/** \file
+ * \brief JP2 File Format
+ *
+ * See Copyright Notice in im_lib.h
+ * See libJaper Copyright Notice in jasper.h
+ * $Id: im_format_jp2.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_jp2.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "jasper/jasper.h"
+#include "jpc/jpc_enc.h"
+#include "jp2/jp2_cod.h"
+
+extern "C" 
+{
+  /* implemented in jas_binfile.c */
+  jas_stream_t *jas_binfile_open(const char *file_name, int is_new);
+}
+
+jas_seqent_t iJP2Bits2Int(jas_seqent_t v, int prec, int sgnd)
+{
+  v &= JAS_ONES(prec);
+  return  (sgnd && (v & (1 << (prec - 1)))) ? (v - (1 << prec)) : v;
+}
+
+/* this is based on jas_image_readcmpt */
+template <class T> 
+int iJP2ReadLine(jas_image_t *image, int row, int cmpno, T *data)
+{
+  jas_image_cmpt_t *cmpt = image->cmpts_[cmpno];
+
+  if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * row) * cmpt->cps_, SEEK_SET) < 0) 
+    return 0;
+
+  // this offset will convert from signed to unsigned
+  int offset = 0;
+  if (cmpt->sgnd_ && cmpt->prec_ > 1)
+    offset = 1 << (cmpt->prec_-1);
+
+  for (int j = 0; j < cmpt->width_; j++) 
+  {
+    jas_seqent_t v = 0;
+
+    for (int k = 0; k < cmpt->cps_; k++) 
+    {
+      int c = jas_stream_getc(cmpt->stream_);
+      if (c == EOF) 
+        return 0;
+
+      v = (v << 8) | (c & 0xff);
+    }
+
+    v = iJP2Bits2Int(v, cmpt->prec_, cmpt->sgnd_);
+
+    *data++ = (T)(v + offset);
+  }
+
+  return 1;
+}
+
+uint_fast32_t iJP2Int2Bits(jas_seqent_t v, int prec, int sgnd)
+{
+  uint_fast32_t ret;
+  ret = ((sgnd && v < 0) ? ((1 << prec) + v) : v) & JAS_ONES(prec);
+  return ret;
+}
+
+/* this is based on jas_image_writecmpt */
+template <class T> 
+int iJP2WriteLine(jas_image_t *image, int row, int cmpno, T *data)
+{
+  jas_image_cmpt_t *cmpt = image->cmpts_[cmpno];
+
+  if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * row) * cmpt->cps_, SEEK_SET) < 0) 
+    return 0;
+
+  for (int j = 0; j < cmpt->width_; j++) 
+  {
+    jas_seqent_t v = iJP2Int2Bits(*data++, cmpt->prec_, cmpt->sgnd_);
+
+    for (int k = 0; k < cmpt->cps_; k++) 
+    {
+      int c = (v >> (8 * (cmpt->cps_ - 1))) & 0xff;
+      if (jas_stream_putc(cmpt->stream_, (imbyte)c) == EOF) 
+        return 0;
+
+      v <<= 8;
+    }
+  }
+
+  return 1;
+}
+
+static const char* iJP2CompTable[1] = 
+{
+  "JPEG-2000",
+};
+
+class imFormatJP2: public imFormat
+{
+  int fmtid;
+  jas_stream_t *stream;
+  jas_image_t *image;
+
+public:
+  imFormatJP2()
+    :imFormat("JP2", 
+              "JPEG-2000 JP2 File Format", 
+              "*.jp2;*.jpc;*.j2c;*.j2k;", 
+              iJP2CompTable, 
+              1, 
+              0)
+    {
+      image = 0;
+    }
+  ~imFormatJP2() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+static char* ijp2_message = NULL;
+static int ijp2_abort = 0;
+static int ijp2_counter = -1;
+
+static void iJP2ProgressProc(int done, int total, char *descr)
+{
+  (void)descr;
+  if (done == 0)
+  {
+    imCounterTotal(ijp2_counter, total, ijp2_message);
+    ijp2_message = NULL;
+  }
+
+  if (!imCounterIncTo(ijp2_counter, done))
+    ijp2_abort = 1;
+}
+
+static int iJP2AbortProc(void)
+{
+  return ijp2_abort;
+}
+
+void imFormatRegisterJP2(void)
+{
+  // Jasper library initialization
+  jas_init();
+
+  jas_set_progress_proc((jas_progress_proc_t)iJP2ProgressProc);
+  jas_set_test_abort_proc((jas_test_abort_proc_t)iJP2AbortProc);
+  
+  imFormatRegister(new imFormatJP2());
+}
+
+int imFormatJP2::Open(const char* file_name)
+{
+  this->stream = jas_binfile_open(file_name, 0);
+  if (this->stream == NULL)
+    return IM_ERR_OPEN;
+
+  this->fmtid = jas_image_getfmt(this->stream);
+  if (this->fmtid < 0)
+  {
+    jas_stream_close(this->stream);
+    return IM_ERR_FORMAT;
+  }
+
+  strcpy(this->compression, "JPEG-2000");
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJP2::New(const char* file_name)
+{
+  this->stream = jas_binfile_open(file_name, 1);
+  if (this->stream == NULL)
+    return IM_ERR_OPEN;
+
+  strcpy(this->compression, "JPEG-2000");
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatJP2::Close()
+{
+  if (this->image)
+    jas_image_destroy(this->image);
+
+  jas_stream_close(this->stream);
+}
+
+void* imFormatJP2::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->stream->obj_;
+  else if (index == 1)
+    return (void*)this->image;
+  else if (index == 2)
+    return (void*)this->stream;
+  else
+    return NULL;
+}
+
+int imFormatJP2::ReadImageInfo(int index)
+{
+  (void)index;
+
+  // The counter is started because in Jasper all image reading is done here. BAD!
+  ijp2_counter = this->counter;
+  ijp2_abort = 0;
+  ijp2_message = "Reading JP2...";
+  this->image = jas_image_decode(this->stream, this->fmtid, 0);
+  ijp2_counter = -1;
+  if (!this->image)
+    return IM_ERR_ACCESS;
+
+  this->width = jas_image_width(this->image);
+  this->height = jas_image_height(this->image);
+
+  int clrspc_fam = jas_clrspc_fam(jas_image_clrspc(image));
+  switch(clrspc_fam)
+  {
+  case JAS_CLRSPC_FAM_GRAY:
+    this->file_color_mode = IM_GRAY;
+    break;
+  case JAS_CLRSPC_FAM_XYZ:
+    this->file_color_mode = IM_XYZ;
+    break;
+  case JAS_CLRSPC_FAM_RGB:
+    this->file_color_mode = IM_RGB;
+    break;
+  case JAS_CLRSPC_FAM_YCBCR:
+    this->file_color_mode = IM_YCBCR;
+    break;
+  case JAS_CLRSPC_FAM_LAB:
+    this->file_color_mode = IM_LAB;
+    break;
+  default: 
+    return IM_ERR_DATA;
+  }
+
+  this->file_data_type = IM_BYTE;
+  int prec = jas_image_cmptprec(image, 0);
+  if (prec > 8)
+    this->file_data_type = IM_USHORT;
+
+  if (prec < 8)
+    this->convert_bpp = -prec; // just expand to 0-255
+
+  if (prec == 1 && this->file_color_mode == IM_GRAY)
+    this->file_color_mode = IM_BINARY;
+
+  int cmpno = jas_image_getcmptbytype(this->image, JAS_IMAGE_CT_OPACITY);
+  if (cmpno != -1)
+    this->file_color_mode |= IM_ALPHA;
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  if (image->metadata.count > 0) 
+  {
+    imAttribTable* attrib_table = AttribTable();
+    
+    // First write GeoTIFF data
+    jas_metadata_box_t *metabox = &image->metadata.boxes[JAS_IMAGE_BOX_GEO]; 
+    if (metabox->size>0 && metabox->buf) 
+      attrib_table->Set("GeoTIFFBox", IM_BYTE, metabox->size, metabox->buf);
+
+    // Check if XMP is there
+    metabox = &image->metadata.boxes[JAS_IMAGE_BOX_XMP]; 
+    if (metabox->size>0 && metabox->buf) 
+      attrib_table->Set("XMLPacket", IM_BYTE, metabox->size, metabox->buf);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJP2::WriteImageInfo()
+{
+  this->file_data_type = this->user_data_type;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  this->file_color_mode |= IM_TOPDOWN;
+
+  int prec = 8;
+  if (this->file_data_type == IM_USHORT)
+    prec = 16;
+
+  jas_clrspc_t clrspc;
+  switch (imColorModeSpace(this->user_color_mode))
+  {
+  case IM_BINARY:
+    prec = 1;    
+  case IM_GRAY:
+    clrspc = JAS_CLRSPC_SGRAY;
+    break;
+  case IM_RGB:   
+    clrspc = JAS_CLRSPC_SRGB;
+    break;
+  case IM_XYZ:
+    clrspc = JAS_CLRSPC_CIEXYZ;
+    break;
+  case IM_LAB:
+    clrspc = JAS_CLRSPC_CIELAB;
+    break;
+  case IM_YCBCR:
+    clrspc = JAS_CLRSPC_SYCBCR;
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  if (imColorModeHasAlpha(this->user_color_mode))
+    this->file_color_mode |= IM_ALPHA;
+
+  int numcmpts = imColorModeDepth(this->file_color_mode);
+  
+  jas_image_cmptparm_t cmptparms[4];
+  for (int i = 0; i < numcmpts; i++) 
+  {
+    jas_image_cmptparm_t* cmptparm = &cmptparms[i];
+
+    cmptparm->tlx = 0;
+    cmptparm->tly = 0;
+    cmptparm->hstep = 1;
+    cmptparm->vstep = 1;
+    cmptparm->width = this->width;
+    cmptparm->height = this->height;
+    cmptparm->prec = prec;
+    cmptparm->sgnd = 0;
+  }
+
+  this->image = jas_image_create(numcmpts, cmptparms, clrspc);
+  if (!this->image)
+    return IM_ERR_DATA;
+
+  if (this->image->metadata.count > 0) 
+  {
+    const void* data;
+    int size;
+    imAttribTable* attrib_table = AttribTable();
+
+    // GeoTIFF first
+    data = attrib_table->Get("GeoTIFFBox", NULL, &size);
+    if (data)
+    {
+      jas_metadata_box_t *metabox = &image->metadata.boxes[JAS_IMAGE_BOX_GEO]; 
+      jas_box_alloc(metabox, size);
+      memcpy(metabox->buf, data, size);
+      memcpy(metabox->id, msi_uuid, sizeof(msi_uuid));
+    }
+   
+    // Adobe XMP
+    data = attrib_table->Get("XMLPacket", NULL, &size);
+    {
+      jas_metadata_box_t *metabox = &image->metadata.boxes[JAS_IMAGE_BOX_XMP]; 
+      jas_box_alloc(metabox, size);
+      memcpy(metabox->buf, data, size);
+      memcpy(metabox->id, xmp_uuid, sizeof(xmp_uuid));
+    }
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJP2::ReadImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+
+  imCounterTotal(this->counter, count, NULL);
+
+  int alpha_plane = -1;
+  if (imColorModeHasAlpha(this->user_color_mode) && imColorModeHasAlpha(this->file_color_mode))
+    alpha_plane = imColorModeDepth(this->file_color_mode) - 1;
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    int cmpno;
+    if (plane == alpha_plane)
+      cmpno = jas_image_getcmptbytype(image, JAS_IMAGE_CT_OPACITY);
+    else
+      cmpno = jas_image_getcmptbytype(image, JAS_IMAGE_CT_COLOR(plane));
+
+    if (cmpno == -1)
+      return IM_ERR_DATA;
+
+    int ret = 1;
+    if (this->file_data_type == IM_BYTE)
+      ret = iJP2ReadLine(image, row, cmpno, (imbyte*)this->line_buffer);
+    else
+      ret = iJP2ReadLine(image, row, cmpno, (imushort*)this->line_buffer);
+
+    if (!ret)
+      return IM_ERR_ACCESS;
+
+    imFileLineBufferRead(this, data, row, plane);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJP2::WriteImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+  imCounterTotal(this->counter, count, "Writing JP2...");  /* first time count */
+
+  int depth = imColorModeDepth(this->file_color_mode);
+  if (imColorModeHasAlpha(this->user_color_mode) && imColorModeHasAlpha(this->file_color_mode))
+  {
+    jas_image_setcmpttype(image, depth-1, JAS_IMAGE_CT_OPACITY);
+    depth--;
+  }
+
+  for (int d = 0; d < depth; d++)
+    jas_image_setcmpttype(image, d, JAS_IMAGE_CT_COLOR(d));
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    imFileLineBufferWrite(this, data, row, plane);
+
+    int ret = 1;
+    if (this->file_data_type == IM_BYTE)
+      ret = iJP2WriteLine(image, row, plane, (imbyte*)this->line_buffer);
+    else
+      ret = iJP2WriteLine(image, row, plane, (imushort*)this->line_buffer);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  char outopts[512] = "";
+  imAttribTable* attrib_table = AttribTable();
+
+  float* ratio = (float*)attrib_table->Get("CompressionRatio");
+  if (ratio)
+    sprintf(outopts, "rate=%g", (double)(1.0 / *ratio));
+
+  // The counter continuous because in Jasper all image writing is done here. BAD!
+  ijp2_counter = this->counter;
+  ijp2_abort = 0;
+  ijp2_message = NULL;  /* other counts */
+  int err = jas_image_encode(image, stream, 0 /*JP2 format always */, outopts);
+  ijp2_counter = -1;
+  if (err)
+    return IM_ERR_ACCESS;
+
+  jas_stream_flush(stream);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJP2::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_MAP || color_space == IM_CMYK || 
+      color_space == IM_LUV)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE && data_type != IM_USHORT)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "JPEG-2000"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_jpeg.cpp b/src/im_format_jpeg.cpp
new file mode 100644
index 0000000..91d0ae9
--- /dev/null
+++ b/src/im_format_jpeg.cpp
@@ -0,0 +1,820 @@
+/** \file
+ * \brief JPEG File Interchange Format
+ *
+ * See Copyright Notice in im_lib.h
+ * See libJPEG Copyright Notice in jpeglib.h
+ * $Id: im_format_jpeg.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+#include "im_math.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+#include <math.h>
+
+extern "C" {
+#include "jpeglib.h"
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+}
+             
+#ifdef USE_EXIF
+#include "exif-data.h"
+#include "exif-entry.h"
+#include "exif-utils.h"
+extern "C" const char *exif_tag_get_name_index (unsigned int i, ExifTag *tag);
+#endif
+
+/* libjpeg error handlers */
+
+struct JPEGerror_mgr 
+{
+  jpeg_error_mgr pub;  /* "public" fields */
+  jmp_buf setjmp_buffer;      /* for return to caller */
+};
+
+METHODDEF(void)
+JPEGerror_exit (j_common_ptr cinfo)
+{
+  /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */
+  JPEGerror_mgr* err_mgr = (JPEGerror_mgr*)cinfo->err;
+
+  /* Return control to the setjmp point */
+  longjmp(err_mgr->setjmp_buffer, 1);
+}
+
+METHODDEF(void)
+JPEGoutput_message (j_common_ptr cinfo)
+{
+  (void)cinfo;
+}
+
+METHODDEF(void)
+JPEGemit_message (j_common_ptr cinfo, int msg_level)
+{
+  (void)cinfo; (void)msg_level;
+}
+
+static const char* iJPEGCompTable[1] = 
+{
+  "JPEG"
+};
+
+class imFormatJPEG: public imFormat
+{
+  jpeg_decompress_struct dinfo;
+  jpeg_compress_struct cinfo;
+  JPEGerror_mgr jerr;
+
+  imBinFile* handle;
+  int fix_adobe;
+
+#ifdef USE_EXIF
+  void iReadExifAttrib(unsigned char* data, int data_length, imAttribTable* attrib_table);
+  void iWriteExifAttrib(imAttribTable* attrib_table);
+#endif
+
+public:
+  imFormatJPEG()
+    :imFormat("JPEG", 
+              "JPEG File Interchange Format", 
+              "*.jpg;*.jpeg;*.jpe;*.jfif;*.jif;*.jfi;", 
+              iJPEGCompTable, 
+              1, 
+              0)
+    {}
+  ~imFormatJPEG() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterJPEG(void)
+{
+  imFormatRegister(new imFormatJPEG());
+}
+
+int imFormatJPEG::Open(const char* file_name)
+{
+  this->handle = imBinFileOpen(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  unsigned char sig[2];
+  if (!imBinFileRead(this->handle, sig, 2, 1))
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (sig[0] != 0xFF || sig[1] != 0xD8)
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_FORMAT;
+  }
+
+  imBinFileSeekTo(this->handle, 0);
+
+  strcpy(this->compression, "JPEG");
+  this->image_count = 1;
+
+  this->dinfo.err = jpeg_std_error(&this->jerr.pub);
+  this->jerr.pub.error_exit = JPEGerror_exit;
+  this->jerr.pub.output_message = JPEGoutput_message;
+  this->jerr.pub.emit_message = JPEGemit_message;
+
+  /* Establish the setjmp return context for error_exit to use. */
+  if (setjmp(this->jerr.setjmp_buffer)) 
+  {
+    /* If we get here, the JPEG code has signaled an error.
+     * We need to clean up the JPEG object, close the input file, and return. */
+    jpeg_destroy_decompress(&this->dinfo);
+    imBinFileClose(this->handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* Now we can initialize the JPEG decompression object. */
+  jpeg_create_decompress(&this->dinfo);
+
+  /* Step 2: specify data source (eg, a file) */
+  jpeg_stdio_src(&this->dinfo, (FILE*)this->handle);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJPEG::New(const char* file_name)
+{
+  this->handle = imBinFileNew(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  this->cinfo.err = jpeg_std_error(&this->jerr.pub);
+  this->jerr.pub.error_exit = JPEGerror_exit;
+  this->jerr.pub.output_message = JPEGoutput_message;
+  this->jerr.pub.emit_message = JPEGemit_message;
+  
+  /* Establish the setjmp return context for error_exit to use. */
+  if (setjmp(this->jerr.setjmp_buffer)) 
+  {
+    /* If we get here, the JPEG code has signaled an error.
+     * We need to clean up the JPEG object, close the input file, and return. */
+    jpeg_destroy_compress(&this->cinfo);
+    imBinFileClose(this->handle);
+    return IM_ERR_ACCESS;
+  }
+  
+  jpeg_create_compress(&this->cinfo);
+
+  /* Step 2: specify data destination (eg, a file) */
+  jpeg_stdio_dest(&this->cinfo, (FILE*)this->handle);
+
+  strcpy(this->compression, "JPEG");
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatJPEG::Close()
+{
+  if (this->is_new)
+    jpeg_destroy_compress(&this->cinfo);
+  else
+    jpeg_destroy_decompress(&this->dinfo);
+
+  imBinFileClose(this->handle);
+}
+
+void* imFormatJPEG::Handle(int index)
+{
+  if (index == 0)
+    return this->handle;
+  else if (index == 1)
+  {
+    if (this->is_new)
+      return (void*)&this->cinfo;
+    else
+      return (void*)&this->dinfo;
+  }
+  else
+    return NULL;
+}
+
+#ifdef USE_EXIF
+void imFormatJPEG::iReadExifAttrib(unsigned char* data, int data_length, imAttribTable* attrib_table)
+{
+  ExifData* exif = exif_data_new_from_data(data, data_length);
+  if (!exif)
+    return;
+
+  void* value = NULL;
+  int c, value_size = 0;
+
+	ExifByteOrder byte_order = exif_data_get_byte_order(exif);
+
+  for (int i = 0; i < 3; i += 2)  // Only scan for IFD_0 (0) and IFD_EXIF (2)
+  {
+    ExifContent *content = exif->ifd[i];
+
+    if (content && content->count) 
+    {
+	    for (int j = 0; j < (int)content->count; j++) 
+      {
+        ExifEntry *entry = content->entries[j];
+        int type = 0;
+
+        const char* name = exif_tag_get_name(entry->tag);
+        if (!name)
+          continue;
+
+        if (value_size < (int)entry->size)
+        {
+          value = realloc(value, entry->size);
+          value_size = entry->size;
+        }
+
+        int format_size = exif_format_get_size(entry->format);
+
+        if (entry->tag == EXIF_TAG_RESOLUTION_UNIT)
+        {
+          int res_unit = (int)exif_get_short (entry->data, byte_order);
+
+          if (res_unit == 2)
+            attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPI");
+          else if (res_unit == 3)
+            attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPC");
+
+          continue;
+        }
+
+        switch (entry->format) 
+        {
+        case EXIF_FORMAT_UNDEFINED:
+        case EXIF_FORMAT_ASCII:
+        case EXIF_FORMAT_SBYTE:
+        case EXIF_FORMAT_BYTE:
+          {
+            type = IM_BYTE;
+            imbyte *bvalue = (imbyte*)value;
+            for (c = 0; c < (int)entry->components; c++) 
+              bvalue[c] = entry->data[c];
+          }
+          break;
+        case EXIF_FORMAT_SSHORT:
+        case EXIF_FORMAT_SHORT:
+          {
+            type = IM_USHORT;
+            imushort *usvalue = (imushort*)value;
+            for (c = 0; c < (int)entry->components; c++) 
+              usvalue[c] = exif_get_short(entry->data + format_size * c, byte_order);
+          }
+          break;
+        case EXIF_FORMAT_LONG:
+          {
+            type = IM_INT;
+            int *ivalue = (int*)value;
+            for (c = 0; c < (int)entry->components; c++) 
+              ivalue[c] = (int)exif_get_long(entry->data + format_size * c, byte_order);
+          }
+          break;
+        case EXIF_FORMAT_SLONG:
+          {
+            type = IM_INT;
+            int *ivalue = (int*)value;
+            for (c = 0; c < (int)entry->components; c++) 
+              ivalue[c] = (int)exif_get_slong(entry->data + format_size * c, byte_order);
+          }
+          break;
+        case EXIF_FORMAT_RATIONAL:
+          {
+	          ExifRational v_rat;
+            type = IM_FLOAT;
+            float *fvalue = (float*)value;
+            for (c = 0; c < (int)entry->components; c++) 
+            {
+              v_rat = exif_get_rational(entry->data + format_size * c, byte_order);
+              fvalue[c] = (float)v_rat.numerator / (float)v_rat.denominator;
+            }
+          }
+          break;
+        case EXIF_FORMAT_SRATIONAL:
+          {
+	          ExifSRational v_srat;
+            type = IM_FLOAT;
+            float *fvalue = (float*)value;
+            for (c = 0; c < (int)entry->components; c++) 
+            {
+              v_srat = exif_get_srational(entry->data + format_size * c, byte_order);
+              fvalue[c] = (float)v_srat.numerator / (float)v_srat.denominator;
+            }
+          }
+          break;
+        case EXIF_FORMAT_FLOAT:  // missing from libEXIF
+        case EXIF_FORMAT_DOUBLE:
+          break;
+        }
+
+        attrib_table->Set(name, type, entry->components, value);
+      }
+    }
+  }
+
+  if (value) free(value);
+
+  exif_data_free(exif);
+}
+
+static void iGetRational(float fvalue, int *num, int *den, int sign)
+{
+  if (floorf(fvalue) == fvalue)
+  {
+    *num = (int)floorf(fvalue);
+    *den = 1;
+    return;
+  }
+
+  float ivalue = 1.0f/fvalue;
+  if (floorf(ivalue) == ivalue)
+  {
+    *den = (int)floorf(ivalue);
+    *num = 1;
+    return;
+  }
+
+	if (fvalue < 0) 
+  {
+		if (sign == 1)
+			fvalue = 0;
+		else
+			fvalue = -fvalue;
+	}
+
+	*den = 1;
+	if (fvalue > 0) 
+  {
+		while (fvalue < 1L<<(31-3) && *den < 1L<<(31-3))
+    {
+			fvalue *= 1<<3;
+      *den *= 1<<3;
+    }
+	}
+
+	*num = sign * imRound(fvalue);
+}
+
+void imFormatJPEG::iWriteExifAttrib(imAttribTable* attrib_table)
+{
+  ExifData* exif = exif_data_new();
+
+  ExifByteOrder byte_order;
+  if (imBinCPUByteOrder() == IM_LITTLEENDIAN)
+	  byte_order = EXIF_BYTE_ORDER_INTEL;
+	else
+		byte_order = EXIF_BYTE_ORDER_MOTOROLA;
+    
+  exif_data_set_byte_order(exif, byte_order);
+
+  int c, i = 0;
+  while(i>=0)
+  {
+    ExifTag tag;
+    const char * name = exif_tag_get_name_index(i, &tag);
+    if (!name)
+      break;
+
+    ExifEntry *entry;
+    int attrib_count;
+    const void* attrib_data = attrib_table->Get(name, NULL, &attrib_count); 
+    if (attrib_data)
+    {
+      entry = exif_entry_new();
+
+      ExifContent *content;
+      if (tag > EXIF_TAG_COPYRIGHT)
+        content = exif->ifd[2];     // IFD_EXIF (2) contains EXIF tags
+      else
+        content = exif->ifd[0];     // IFD_0    (0) contains TIFF tags 
+
+      exif_content_add_entry(content, entry);
+
+      exif_entry_initialize(entry, tag);
+
+      if (!entry->format)  // unsupported tag
+      {
+        i++;
+        continue;
+      }
+
+      int format_size = exif_format_get_size(entry->format);
+
+      if (tag == EXIF_TAG_RESOLUTION_UNIT)
+      {
+        int res_unit;
+        if (imStrEqual((char*)attrib_data, "DPI"))
+          res_unit = 2;
+        else
+          res_unit = 3;
+
+        exif_set_short (entry->data, byte_order, (imushort)res_unit);
+
+        i++;
+        continue;
+      }
+
+      if (entry->components == 0)
+      {
+		    entry->components = attrib_count;
+        if (entry->data) free(entry->data);
+        entry->size = format_size * entry->components;
+        entry->data = (imbyte*)malloc(entry->size);
+      }
+
+      switch (entry->format) 
+      {
+      case EXIF_FORMAT_UNDEFINED:
+      case EXIF_FORMAT_ASCII:
+      case EXIF_FORMAT_BYTE:
+        {
+          imbyte *bvalue = (imbyte*)attrib_data;
+          for (c = 0; c < (int)entry->components; c++) 
+            entry->data[c] = bvalue[c];
+        }
+        break;
+      case EXIF_FORMAT_SHORT:
+        {
+          imushort *usvalue = (imushort*)attrib_data;
+          for (c = 0; c < (int)entry->components; c++) 
+            exif_set_short(entry->data + format_size * c, byte_order, usvalue[c]);
+        }
+        break;
+      case EXIF_FORMAT_LONG:
+        {
+          int *ivalue = (int*)attrib_data;
+          for (c = 0; c < (int)entry->components; c++) 
+            exif_set_long(entry->data + format_size * c, byte_order, (unsigned int)ivalue[c]);
+        }
+        break;
+      case EXIF_FORMAT_SLONG:
+        {
+          int *ivalue = (int*)attrib_data;
+          for (c = 0; c < (int)entry->components; c++) 
+            exif_set_slong(entry->data + format_size * c, byte_order, (int)ivalue[c]);
+        }
+        break;
+      case EXIF_FORMAT_RATIONAL:
+        {
+	        ExifRational v_rat;
+          int num, den;
+          float *fvalue = (float*)attrib_data;
+          for (c = 0; c < (int)entry->components; c++) 
+          {
+            iGetRational(fvalue[c], &num, &den, 1);
+            v_rat.numerator = num;
+            v_rat.denominator = den;
+            exif_set_rational(entry->data + format_size * c, byte_order, v_rat);
+          }
+        }
+        break;
+      case EXIF_FORMAT_SRATIONAL:
+        {
+	        ExifSRational v_srat;
+          int num, den;
+          float *fvalue = (float*)attrib_data;
+          for (c = 0; c < (int)entry->components; c++) 
+          {
+            iGetRational(fvalue[c], &num, &den, 1);
+            v_srat.numerator = num;
+            v_srat.denominator = den;
+            exif_set_srational(entry->data + format_size * c, byte_order, v_srat);
+          }
+        }
+        break;
+      }
+    }
+
+    i++;
+  }
+
+  imbyte* data = NULL;
+  unsigned int data_size = 0;
+
+  exif_data_save_data(exif, &data, &data_size);
+
+  if (data)
+  {
+    jpeg_write_marker(&this->cinfo, JPEG_APP0+1, data, data_size);
+    free(data);
+  }
+
+  exif_data_free(exif);
+}
+#endif
+
+int imFormatJPEG::ReadImageInfo(int index)
+{
+  (void)index;
+  this->fix_adobe = 0;
+
+  if (setjmp(this->jerr.setjmp_buffer)) 
+    return IM_ERR_ACCESS;
+
+  // notify libjpeg to save the COM marker
+  jpeg_save_markers(&this->dinfo, JPEG_COM, 0xFFFF);
+  jpeg_save_markers(&this->dinfo, JPEG_APP0+1, 0xFFFF);
+
+  /* Step 3: read file parameters with jpeg_read_header() */
+  if (jpeg_read_header(&this->dinfo, TRUE) != JPEG_HEADER_OK)
+    return IM_ERR_ACCESS;
+
+  this->width = this->dinfo.image_width;
+  this->height = this->dinfo.image_height;
+  this->file_data_type = IM_BYTE;
+
+  switch(this->dinfo.jpeg_color_space)
+  {
+  case JCS_GRAYSCALE:
+    this->file_color_mode = IM_GRAY;
+    break;
+  case JCS_RGB:
+    this->file_color_mode = IM_RGB;
+    break;
+  case JCS_YCbCr:
+    this->file_color_mode = IM_RGB;
+    break;
+  case JCS_CMYK:
+    this->file_color_mode = IM_CMYK;
+    break;
+  case JCS_YCCK:
+    this->file_color_mode = IM_CMYK; // this is the only supported conversion in libjpeg
+    this->dinfo.out_color_space = JCS_CMYK;
+    this->fix_adobe = 1;
+    break;
+  default: /* JCS_UNKNOWN */
+    return IM_ERR_DATA;
+  }
+
+  imAttribTable* attrib_table = AttribTable();
+
+  int* auto_ycbcr = (int*)attrib_table->Get("AutoYCbCr");
+  if (auto_ycbcr && *auto_ycbcr == 0 &&
+      this->dinfo.jpeg_color_space == JCS_YCbCr)
+  {
+    this->file_color_mode = IM_YCBCR;
+    this->dinfo.out_color_space = JCS_YCbCr;
+  }
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  if (imColorModeDepth(this->file_color_mode) > 1)
+    this->file_color_mode |= IM_PACKED;
+
+  if (this->dinfo.progressive_mode != 0)
+  {
+    int progressive = 1;
+    attrib_table->Set("Interlaced", IM_INT, 1, &progressive);
+  }
+
+  if (this->dinfo.density_unit != 0)
+  {
+    float xres = (float)this->dinfo.X_density, 
+          yres = (float)this->dinfo.Y_density;
+
+    if (this->dinfo.density_unit == 1)
+      attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPI");
+    else
+      attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPC");
+
+    attrib_table->Set("XResolution", IM_FLOAT, 1, (void*)&xres);
+    attrib_table->Set("YResolution", IM_FLOAT, 1, (void*)&yres);
+  }
+
+  if (this->dinfo.marker_list)
+  {
+    jpeg_saved_marker_ptr cur_marker = this->dinfo.marker_list;
+
+    // search for COM marker
+    while (cur_marker)
+    {
+      if (cur_marker->marker == JPEG_COM)
+      {
+        char* desc = new char [cur_marker->data_length+1];
+        memcpy(desc, cur_marker->data, cur_marker->data_length);
+        desc[cur_marker->data_length] = 0;
+        attrib_table->Set("Description", IM_BYTE, cur_marker->data_length+1, desc);
+        delete [] desc;
+      }
+      
+#ifdef USE_EXIF
+      if (cur_marker->marker == JPEG_APP0+1)
+        iReadExifAttrib(cur_marker->data, cur_marker->data_length, attrib_table);
+#endif
+
+      cur_marker = cur_marker->next;
+    }
+  }
+
+  /* Step 5: Start decompressor */
+  if (jpeg_start_decompress(&this->dinfo) == FALSE)
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJPEG::WriteImageInfo()
+{
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  this->file_color_mode |= IM_TOPDOWN;
+
+  if (imColorModeDepth(this->file_color_mode) > 1)
+    this->file_color_mode |= IM_PACKED;
+
+  this->file_data_type = IM_BYTE;
+
+  /* Step 3: set parameters for compression */
+  this->cinfo.image_width = this->width;   /* image width and height, in pixels */
+  this->cinfo.image_height = this->height;
+
+  this->cinfo.input_components = imColorModeDepth(this->file_color_mode);
+
+  switch (imColorModeSpace(this->user_color_mode))
+  {
+  case IM_BINARY:
+    this->convert_bpp = -1; // expand 1 to 255
+  case IM_GRAY:
+    this->cinfo.in_color_space = JCS_GRAYSCALE;
+    break;
+  case IM_RGB:   
+    this->cinfo.in_color_space = JCS_RGB;
+    break;
+  case IM_CMYK:
+    this->cinfo.in_color_space = JCS_CMYK;
+    break;
+  case IM_YCBCR:
+    this->cinfo.in_color_space = JCS_YCbCr;
+    break;
+  default:
+    this->cinfo.in_color_space = JCS_UNKNOWN;
+    break;
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) 
+    return IM_ERR_ACCESS;
+
+  jpeg_set_defaults(&this->cinfo);
+
+  imAttribTable* attrib_table = AttribTable();
+
+  int* auto_ycbcr = (int*)attrib_table->Get("AutoYCbCr");
+  if (auto_ycbcr && *auto_ycbcr == 0 &&
+      this->cinfo.in_color_space == JCS_RGB)
+  {
+    jpeg_set_colorspace(&this->cinfo, JCS_RGB);
+  }
+
+  int* interlaced = (int*)attrib_table->Get("Interlaced");
+  if (interlaced && *interlaced)
+    jpeg_simple_progression(&this->cinfo);
+
+  int* quality = (int*)attrib_table->Get("JPEGQuality");
+  if (quality)
+    jpeg_set_quality(&this->cinfo, *quality, TRUE);
+
+  char* res_unit = (char*)attrib_table->Get("ResolutionUnit");
+  if (res_unit)
+  {
+    float* xres = (float*)attrib_table->Get("XResolution");
+    float* yres = (float*)attrib_table->Get("YResolution");
+
+    if (xres && yres)
+    {
+      if (imStrEqual(res_unit, "DPI"))
+        this->cinfo.density_unit = 1;
+      else
+        this->cinfo.density_unit = 2;
+
+      this->cinfo.X_density = (UINT16)*xres;
+      this->cinfo.Y_density = (UINT16)*yres;
+    }
+  }
+
+  /* Step 4: Start compressor */
+  jpeg_start_compress(&this->cinfo, TRUE);
+
+  int desc_size;
+  char* desc = (char*)attrib_table->Get("Description", NULL, &desc_size);
+  if (desc)
+    jpeg_write_marker(&this->cinfo, JPEG_COM, (JOCTET*)desc, desc_size-1);
+
+#ifdef USE_EXIF
+  iWriteExifAttrib(attrib_table);
+#endif
+
+  return IM_ERR_NONE;
+}
+
+static void iFixAdobe(unsigned char* line_buffer, int width)
+{
+  width *= 4;
+  for (int i = 0; i < width; i++)
+  {
+    *line_buffer = 255 - *line_buffer;
+    line_buffer++;
+  }
+}
+
+int imFormatJPEG::ReadImageData(void* data)
+{
+  if (setjmp(this->jerr.setjmp_buffer)) 
+    return IM_ERR_ACCESS;
+
+  imCounterTotal(this->counter, this->dinfo.output_height, "Reading JPEG...");
+
+  int row = 0, plane = 0;
+  while (this->dinfo.output_scanline < this->dinfo.output_height) 
+  {
+    if (jpeg_read_scanlines(&this->dinfo, (JSAMPARRAY)&this->line_buffer, 1) == 0)
+      return IM_ERR_ACCESS;
+
+    if (this->fix_adobe)
+      iFixAdobe((unsigned char*)this->line_buffer, this->width);
+
+    imFileLineBufferRead(this, data, row, plane);
+
+    if (!imCounterInc(this->counter))
+    {
+      jpeg_finish_decompress(&this->dinfo);
+      return IM_ERR_COUNTER;
+    }
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  jpeg_finish_decompress(&this->dinfo);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJPEG::WriteImageData(void* data)
+{
+  if (setjmp(this->jerr.setjmp_buffer)) 
+    return IM_ERR_ACCESS;
+
+  imCounterTotal(this->counter, this->dinfo.output_height, "Writing JPEG...");
+
+  int row = 0, plane = 0;
+  while (this->cinfo.next_scanline < this->cinfo.image_height) 
+  {
+    imFileLineBufferWrite(this, data, row, plane);
+
+    if (jpeg_write_scanlines(&this->cinfo, (JSAMPARRAY)&this->line_buffer, 1) == 0)
+      return IM_ERR_ACCESS;
+
+    if (!imCounterInc(this->counter))
+    {
+      jpeg_finish_compress(&this->cinfo);
+      return IM_ERR_COUNTER;
+    }
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  jpeg_finish_compress(&this->cinfo);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatJPEG::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_MAP || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "JPEG"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
+
diff --git a/src/im_format_krn.cpp b/src/im_format_krn.cpp
new file mode 100644
index 0000000..41c9be7
--- /dev/null
+++ b/src/im_format_krn.cpp
@@ -0,0 +1,377 @@
+/** \file
+ * \brief KRN - IM Kernel File Format
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_krn.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <math.h>
+
+static int iKRNReadNextInteger(imBinFile* handle, int *value)
+{
+  int c = 0, found = 0;
+  static char buffer[10];
+
+  while (!found)
+  {
+    imBinFileRead(handle, &buffer[c], 1, 1);
+
+    /* if it's a number increments the number of characters readed */
+    if ((buffer[c] >= (int)'0' && buffer[c] <= (int)'9') || buffer[c] == (int)'-')
+      c++;
+    else
+    {
+      /* if it's not a number and we readed some characters convert them to an integer */
+      if (c > 0)
+      {
+        buffer[c] = 0;
+        *value = atoi(buffer);
+        found = 1;
+      }
+    }
+
+    if (imBinFileError(handle) || c > 10)
+      return 0;
+  } 
+
+  return 1;
+}
+
+static int iKRNReadNextReal(imBinFile* handle, float *value)
+{
+  int c = 0, found = 0;
+  static char buffer[16];
+
+  while (!found)
+  {
+    imBinFileRead(handle, &buffer[c], 1, 1);
+
+    /* if it's a number increments the number of characters readed */
+    if ((buffer[c] >= (int)'0' && buffer[c] <= (int)'9') || buffer[c] == (int)'-' || buffer[c] == (int)'.')
+      c++;
+    else
+    {
+      /* if it's not a number and we readed some characters convert them to an integer */
+      if (c > 0)
+      {
+        buffer[c] = 0;
+        *value = (float)atof(buffer);
+        found = 1;
+      }
+    }
+
+    if (imBinFileError(handle) || c > 16)
+      return 0;
+  } 
+
+  return 1;
+}
+
+static int iKRNReadDescription(imBinFile* handle, char* comment, int *size)
+{
+  imbyte byte_value = 0;
+
+  // find the first \n
+  while(byte_value != '\n')
+  {
+    imBinFileRead(handle, &byte_value, 1, 1);
+    if (imBinFileError(handle))
+      return 0;
+  }
+
+  *size = 0;
+
+  // Read up to the next \n
+
+  imBinFileRead(handle, &byte_value, 1, 1);
+  if (imBinFileError(handle))
+    return 0;
+
+  while(byte_value != '\n')
+  {
+    if (byte_value != '\r')
+    {
+      comment[*size] = byte_value;
+      (*size)++;
+    }
+
+    imBinFileRead(handle, &byte_value, 1, 1);
+    if (imBinFileError(handle))
+      return 0;
+  }
+
+  if (*size != 0)
+  {
+    comment[*size] = 0;
+    (*size)++;
+  }
+
+  return 1;
+}
+
+static const char* iKRNCompTable[1] = 
+{
+  "NONE"
+};
+
+class imFormatKRN: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+
+public:
+  imFormatKRN()
+    :imFormat("KRN", 
+              "IM Kernel File Format", 
+              "*.krn;", 
+              iKRNCompTable, 
+              1, 
+              0)
+    {}
+  ~imFormatKRN() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterKRN(void)
+{
+  imFormatRegister(new imFormatKRN());
+}
+
+int imFormatKRN::Open(const char* file_name)
+{
+  char sig[9];
+
+  /* opens the binary file for reading */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  /* reads the KRN format identifier */
+  imBinFileRead(handle, sig, 8, 1);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  sig[8] = 0;
+
+  if (!imStrEqual(sig, "IMKERNEL"))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  this->image_count = 1;
+  strcpy(this->compression, "NONE");
+
+  return IM_ERR_NONE;
+}
+
+int imFormatKRN::New(const char* file_name)
+{
+  /* opens the binary file for writing */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  this->image_count = 1;  
+  if (!imBinFileWrite(handle, (void*)"IMKERNEL\n", 9, 1))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+void imFormatKRN::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatKRN::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatKRN::ReadImageInfo(int index)
+{
+  (void)index;
+  this->file_color_mode = IM_GRAY|IM_TOPDOWN;
+
+  char desc[512];
+  int desc_size;
+  if (!iKRNReadDescription(handle, desc, &desc_size))
+    return IM_ERR_ACCESS;
+
+  imAttribTable* attrib_table = AttribTable();
+  if (desc_size)
+    attrib_table->Set("Description", IM_BYTE, desc_size, desc);
+
+  if (!iKRNReadNextInteger(handle, &this->width))
+    return IM_ERR_ACCESS;
+
+  if (!iKRNReadNextInteger(handle, &this->height))
+    return IM_ERR_ACCESS;
+
+  int type;
+  if (!iKRNReadNextInteger(handle, &type))
+    return IM_ERR_ACCESS;
+
+  if (type == 0)
+    this->file_data_type = IM_INT;
+  else
+    this->file_data_type = IM_FLOAT;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatKRN::WriteImageInfo()
+{
+  this->file_data_type = this->user_data_type;
+  this->file_color_mode = IM_GRAY|IM_TOPDOWN;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  int attrib_size;
+  const void* attrib_data = attrib_table->Get("Description", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    char* desc = (char*)attrib_data;
+    int size = 0;
+    while(size < (attrib_size-1) && (desc[size] != '\r' && desc[size] != '\n'))
+      size++;
+
+    imBinFileWrite(handle, desc, size, 1);
+  }
+  imBinFileWrite(handle, (void*)"\n", 1, 1);
+
+  imBinFilePrintf(handle, "%d\n", this->width);
+  imBinFilePrintf(handle, "%d\n", this->height);
+
+  if (this->file_data_type == IM_INT)
+    imBinFileWrite(handle, (void*)"0\n", 1, 1);
+  else
+    imBinFileWrite(handle, (void*)"1\n", 1, 1);
+  
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatKRN::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading KRN...");
+
+  for (int row = 0; row < this->height; row++)
+  {
+    for (int col = 0; col < this->width; col++)
+    {
+      if (this->file_data_type == IM_INT)
+      {
+        int value;
+        if (!iKRNReadNextInteger(handle, &value))
+          return IM_ERR_ACCESS;
+
+        ((int*)this->line_buffer)[col] = value;
+      }
+      else
+      {
+        float value;
+        if (!iKRNReadNextReal(handle, &value))
+          return IM_ERR_ACCESS;
+
+        ((float*)this->line_buffer)[col] = value;
+      }
+    }
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatKRN::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing KRN...");
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    for (int col = 0; col < this->width; col++)
+    {
+      if (this->file_data_type == IM_INT)
+      {
+        int value = ((int*)this->line_buffer)[col];
+
+        if (!imBinFilePrintf(handle, "%d ", value))
+          return IM_ERR_ACCESS;
+      }
+      else
+      {
+        float value = ((float*)this->line_buffer)[col];
+
+        if (!imBinFilePrintf(handle, "%f ", (double)value))
+          return IM_ERR_ACCESS;
+      }
+
+      if (col == this->width-1)
+        imBinFileWrite(handle, (void*)"\n", 1, 1);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatKRN::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space != IM_GRAY)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_INT && data_type != IM_FLOAT)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_led.cpp b/src/im_format_led.cpp
new file mode 100644
index 0000000..d67ee8c
--- /dev/null
+++ b/src/im_format_led.cpp
@@ -0,0 +1,360 @@
+/** \file
+ * \brief LED - IUP image in LED
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_led.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+
+/* Sample LED Image
+LEDImage = IMAGE[
+0 = "0 0 0",
+1 = "192 192 192",
+2 = "0 0 128",
+3 = "255 255 255"]
+(20, 19
+,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+,1,1,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1
+,1,1,0,3,3,3,3,0,0,1,1,1,1,1,1,1,1,1,1,1
+,1,1,0,3,3,3,3,0,3,0,1,1,1,1,1,1,1,1,1,1
+,1,1,0,3,0,0,3,0,2,2,2,2,2,2,1,1,1,1,1,1
+,1,1,0,3,3,3,3,3,2,3,3,3,3,2,2,1,1,1,1,1
+,1,1,0,3,0,0,0,0,2,3,3,3,3,2,3,2,1,1,1,1
+,1,1,0,3,3,3,3,3,2,3,0,0,3,2,2,2,2,1,1,1
+,1,1,0,3,0,0,0,0,2,3,3,3,3,3,3,3,2,1,1,1
+,1,1,0,3,3,3,3,3,2,3,0,0,0,0,0,3,2,1,1,1
+,1,1,0,0,0,0,0,0,2,3,3,3,3,3,3,3,2,1,1,1
+,1,1,1,1,1,1,1,1,2,3,0,0,0,0,0,3,2,1,1,1
+,1,1,1,1,1,1,1,1,2,3,3,3,3,3,3,3,2,1,1,1
+,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,1,1,1
+,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+)
+*/
+
+static int iLEDReadNextInteger(imBinFile* handle, int *value)
+{
+  int c = 0, found = 0;
+  static char buffer[10];
+
+  while (!found)
+  {
+    imBinFileRead(handle, &buffer[c], 1, 1);
+
+    /* if it's a number increments the number of characters readed */
+    if (buffer[c] >= (int)'0' && buffer[c] <= (int)'9')
+      c++;
+    else
+    {
+      /* if it's not a number and we readed some characters convert them to an integer */
+      if (c > 0)
+      {
+        buffer[c] = 0;
+        *value = atoi(buffer);
+        found = 1;
+      }
+    }
+
+    if (imBinFileError(handle) || c > 10)
+      return 0;
+  } 
+
+  return 1;
+}
+
+static const char* iLEDCompTable[1] = 
+{
+  "NONE"
+};
+
+class imFormatLED: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  int pal_count;
+
+  int ReadPalette();
+  int WritePalette();
+
+public:
+  imFormatLED()
+    :imFormat("LED", 
+              "IUP image in LED special format", 
+              "*.led;", 
+              iLEDCompTable, 
+              1, 
+              0)
+    {}
+  ~imFormatLED() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterLED(void)
+{
+  imFormatRegister(new imFormatLED());
+}
+
+int imFormatLED::Open(const char* file_name)
+{
+  char sig[4];
+  unsigned char byte_value;
+  int found = 0;
+
+  /* opens the binary file for reading */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  this->image_count = 1;
+  strcpy(this->compression, "NONE");
+
+  imBinFileRead(handle, sig, 3, 1);
+  sig[3] = 0;
+
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (!imStrEqual(sig, "LED"))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  unsigned long offset = imBinFileTell(handle);
+
+  /* count the number of colors */
+  this->pal_count = -1; // will count the first '=' that is not a color
+  while (!found)
+  {
+    imBinFileRead(handle, &byte_value, 1, 1);
+
+    if (byte_value == '(')
+      found = 1;
+
+    if (byte_value == '=')
+      this->pal_count++;
+
+    if (imBinFileError(handle))
+    {
+      imBinFileClose(handle);
+      return IM_ERR_ACCESS;
+    }
+  } 
+
+  imBinFileSeekTo(handle, offset);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::New(const char* file_name)
+{
+  /* opens the binary file for writing */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileWrite(handle, (void*)"LEDImage = IMAGE", 16, 1);
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+void imFormatLED::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatLED::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatLED::ReadImageInfo(int index)
+{
+  (void)index;
+
+  this->palette_count = this->pal_count;
+
+  if (ReadPalette() != IM_ERR_NONE)
+    return IM_ERR_ACCESS;
+
+  iLEDReadNextInteger(handle, &this->width);
+  iLEDReadNextInteger(handle, &this->height);
+ 
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = IM_MAP;
+  this->file_color_mode |= IM_TOPDOWN;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::WriteImageInfo()
+{
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  this->file_color_mode |= IM_TOPDOWN;
+
+  if (WritePalette() != IM_ERR_NONE)
+    return IM_ERR_ACCESS;
+
+  imBinFilePrintf(handle, "(%d, %d\n", this->width, this->height);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::ReadPalette()
+{
+  int c, r, g, b, i;
+
+  /* convert the color map to the IM format */
+  for (c = 0; c < this->palette_count; c++)
+  {
+    iLEDReadNextInteger(handle, &i);
+    iLEDReadNextInteger(handle, &r);
+    iLEDReadNextInteger(handle, &g);
+    iLEDReadNextInteger(handle, &b);
+
+    this->palette[i] = imColorEncode((unsigned char)r, (unsigned char)g, (unsigned char)b);
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::WritePalette()
+{
+  int c;
+  unsigned char r, g, b;
+
+  imBinFileWrite(handle, (void*)"[\n", 2, 1);
+
+  /* convert the color map from the IM format */
+  for (c = 0; c < this->palette_count; c++)
+  {
+    imColorDecode(&r, &g, &b, this->palette[c]);
+    imBinFilePrintf(handle, "%d = \"%d %d %d\"", c, (int)r, (int)g, (int)b);
+
+    if (c != this->palette_count - 1)
+      imBinFileWrite(handle, (void*)",\n", 2, 1);
+  }
+
+  imBinFileWrite(handle, (void*)"]\n", 2, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::ReadImageData(void* data)
+{
+  int value;
+
+  imCounterTotal(this->counter, this->height, "Reading LED...");
+
+  for (int row = 0; row < this->height; row++)
+  {
+    for (int col = 0; col < this->width; col++)
+    {
+      if (!iLEDReadNextInteger(handle, &value))
+        return IM_ERR_ACCESS;
+
+      ((imbyte*)this->line_buffer)[col] = (unsigned char)value;
+    }
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  } 
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing LED...");
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    for (int col = 0; col < this->width; col++)
+    {
+      imBinFilePrintf(handle, ",%d", (int)((imbyte*)this->line_buffer)[col]);
+    }
+  
+    imBinFileWrite(handle, (void*)"\n", 1, 1);
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  imBinFileWrite(handle, (void*)")", 1, 1);
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;     
+
+  return IM_ERR_NONE;
+}
+
+int imFormatLED::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_pcx.cpp b/src/im_format_pcx.cpp
new file mode 100644
index 0000000..32487bc
--- /dev/null
+++ b/src/im_format_pcx.cpp
@@ -0,0 +1,701 @@
+/** \file
+ * \brief PCX - ZSoft Picture
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_pcx.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+#define PCX_ID 0x0A
+
+
+/* PCX file header */
+/*  1   Id;             Manufacturer ID        */
+/*  1   Version;        Version                */
+/*  1   Encoding;       Encoding Scheme        */
+/*  1   BitsPerPixel;   Bits/Pixel/Plane       */
+/*  2   Xmin;           X Start (upper left)   */
+/*  2   Ymin;           Y Start (top)          */
+/*  2   Xmax;           X End (lower right)    */
+/*  2   Ymax;           Y End (bottom)         */
+/*  2   Hdpi;           Horizontal Resolution  */
+/*  2   Vdpi;           Vertical Resolution    */
+/*  3*16 Colormap;      16-Color EGA Palette   */
+/*  1   Reserved;       Reserved               */
+/*  1   NPlanes;        Number of Color Planes */
+/*  2   BytesPerLine;   Bytes/Line/Plane       */
+/*  2   PaletteInfo;    Palette Interpretation */
+/*  2   HScreenSize;    Horizontal Screen Size */
+/*  2   VScreenSize;    Vertical Screen Size   */
+/*  54  Filler;         Reserved               */
+/*  128    */
+
+/* Default 16 color VGA palette */
+static unsigned char iPCXDefaultPalette[3*16] = 
+{
+    0,   0,   0,
+    0,   0, 255,
+    0, 255,   0,
+    0, 255, 255,
+  255,   0,   0,
+  255,   0, 255,
+  255, 255,   0,
+  255, 255, 255,
+   85,  85, 255,
+   85,  85,  85,
+    0, 170,   0,
+  170,   0,   0,
+   85, 255, 255,
+  255,  85, 255,
+  255, 255,  85,
+  255, 255, 255  
+};
+
+static int iPCXEncodeScanLine(unsigned char* EncodedBuffer, const unsigned char* DecodedBuffer, int BufferSize)
+{
+  int index = 0;        /* Index into uncompressed data buffer  */
+  int scanindex = 0;    /* Index into compressed data buffer    */
+  unsigned char runcount;                  /* Length of encoded pixel run          */
+  unsigned char runvalue;                  /* Value of encoded pixel run           */
+
+  while (index < BufferSize)
+  {
+    /** Get the run count of the next pixel value run.
+    ** Pixel value runs are encoded until a different pixel value
+    ** is encountered, the end of the scan line is reached, or 63
+    ** pixel values have been counted. */
+    for (runcount = 1, runvalue = DecodedBuffer[index]; 
+         index + runcount < BufferSize && runvalue == DecodedBuffer[index + runcount] && runcount < 63; 
+         runcount++);
+
+    /** Encode the run into a one or two-unsigned char code.
+    ** Multiple pixel runs are stored in two-unsigned char codes.  If a single
+    ** pixel run has a value of less than 64 then it is stored in a
+    ** one-unsigned char code.  If a single pixel run has a value of 64 to 255
+    ** then it is stored in a two-unsigned char code. */
+
+    if (runcount > 1)                   /* Multiple pixel run */
+    {
+      EncodedBuffer[scanindex++] = (unsigned char)(runcount | 0xC0);
+      EncodedBuffer[scanindex++] = runvalue;
+    }
+    else                                /* Single pixel run   */
+    {
+      if (DecodedBuffer[index] < 64)  /* Value is 0 to 63   */
+        EncodedBuffer[scanindex++] = runvalue;
+      else                            /* Value is 64 to 255 */
+      {
+        EncodedBuffer[scanindex++] = (unsigned char)(runcount | 0xC0);
+        EncodedBuffer[scanindex++] = runvalue;
+      }
+    }
+
+    index += runcount;  /* Jump ahead to next pixel run value */
+  }
+
+  return scanindex;      /* Return the number of unsigned chars written to buffer */
+}
+
+static int iPCXDecodeScanLine(imBinFile* handle, unsigned char* DecodedBuffer, int BufferSize)
+{
+  int  index = 0;    /* Index into compressed scan line buffer */
+  unsigned char data;          /* Data byte read from PCX file           */
+  unsigned char runcount = 0;   /* Length of decoded pixel run            */
+  unsigned char runvalue = 0;   /* Value of decoded pixel run             */
+
+  while (index < BufferSize)    /* Read until the end of the buffer     */
+  {
+    imBinFileRead(handle, &data, 1, 1);
+
+    if ((data & 0xC0) == 0xC0)              /* Two-unsigned char code    */
+    {
+      runcount = (unsigned char)(data & 0x3F);             /* Get run count    */
+      imBinFileRead(handle, &runvalue, 1, 1);
+    }
+    else                                    /* One unsigned char code    */
+    {
+      runcount = 1;                       /* Run count is one */
+      runvalue = data;                    /* Pixel value      */
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+
+    /* Write the pixel run to the buffer */
+    for (;runcount && (index < BufferSize); runcount--, index++)
+      DecodedBuffer[index] = runvalue;    /* Assign value to buffer   */
+  }
+
+  return IM_ERR_NONE;      
+}
+
+static const char* iPCXCompTable[2] = 
+{
+  "NONE",
+  "RLE"
+};
+
+class imFormatPCX: public imFormat
+{
+  imBinFile* handle;           /* the binary file handle */
+  int bpp;                     /* number of bits per pixel */
+  unsigned char version;       /* format version */
+  unsigned char comp_type;      /* PCX compression information */
+  int line_raw_size;         /* bytes per line per plane */
+
+  int ReadPalette();
+  int WritePalette();
+  void Expand4bpp();
+  void Pack24bpp();
+  void Unpack24bpp();
+
+public:
+  imFormatPCX()
+    :imFormat("PCX", 
+              "ZSoft Picture", 
+              "*.pcx;", 
+              iPCXCompTable, 
+              2, 
+              0)
+    {}
+  ~imFormatPCX() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterPCX(void)
+{
+  imFormatRegister(new imFormatPCX());
+}
+
+int imFormatPCX::Open(const char* file_name)
+{
+  unsigned char id;
+
+  /* opens the binary file for reading with intel unsigned char order */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  /* reads the PCX format identifier */
+  imBinFileRead(handle, &id, 1, 1);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (id != PCX_ID)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* reads the format version */
+  imBinFileRead(handle, &this->version, 1, 1);
+
+  /* reads the compression comp_type */
+  imBinFileRead(handle, &this->comp_type, 1, 1);
+  if (this->comp_type)
+    strcpy(this->compression, "RLE");
+  else
+    strcpy(this->compression, "NONE");
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPCX::New(const char* file_name)
+{
+  /* opens the binary file for writing with intel byte order */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  return IM_ERR_NONE;
+}
+
+void imFormatPCX::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatPCX::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatPCX::ReadImageInfo(int index)
+{
+  unsigned char bppp, planes;
+  unsigned short xmin, xmax, ymax, ymin, word, bplp;
+  (void)index;
+
+  this->file_data_type = IM_BYTE;
+
+  /* reads the Number of bits/pixel per plane */
+  imBinFileRead(handle, &bppp, 1, 1);
+
+  /* reads the image width and height */
+  imBinFileRead(handle, &xmin, 1, 2);
+  imBinFileRead(handle, &ymin, 1, 2);
+  imBinFileRead(handle, &xmax, 1, 2);
+  imBinFileRead(handle, &ymax, 1, 2);
+  this->width = xmax - xmin + 1;
+  this->height = ymax - ymin + 1;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  if (xmin && ymin)
+  {
+    attrib_table->Set("XScreen", IM_USHORT, 1, &xmin);
+    attrib_table->Set("YScreen", IM_USHORT, 1, &ymin);
+  }
+
+  /* read the x resolution */
+  imBinFileRead(handle, &word, 1, 2);
+  float xres = word;
+
+  /* read the y resolution */
+  imBinFileRead(handle, &word, 1, 2);
+  float yres = word;
+
+  if (xres && yres)
+  {
+    attrib_table->Set("XResolution", IM_FLOAT, 1, &xres);
+    attrib_table->Set("YResolution", IM_FLOAT, 1, &yres);
+    attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPI");
+  }
+
+  /* jump 3*16+1 bytes (colormap + reserved) */
+  imBinFileSeekOffset(handle, 3*16+1);
+
+  /* reads the Number of color planes */
+  imBinFileRead(handle, &planes, 1, 1);
+  this->bpp = bppp * planes;
+
+  /* reads the Number of bytes per scan line per color planes */
+  imBinFileRead(handle, &bplp, 1, 2);
+  this->line_raw_size = bplp * planes;
+  this->line_buffer_extra = 2; // room enough for padding
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  // sanity check
+  if (this->bpp != 1 && this->bpp != 4 && 
+      this->bpp != 8 && this->bpp != 24)
+    return IM_ERR_DATA;
+
+  if (this->bpp > 8)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+    this->line_buffer_extra += 3*this->width; // room for 24 bpp packing
+  }
+  else
+  {
+    this->file_color_mode = IM_MAP;
+    this->palette_count = 1 << this->bpp;
+
+    if (this->bpp == 1)  // only 1 bpp, 4 bpp will be expanded here
+      this->convert_bpp = 1;
+
+    if (this->bpp == 4)
+      this->line_buffer_extra += this->width; // room for 4 bpp expansion
+  }
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  if (this->bpp <= 8)
+    return ReadPalette();
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPCX::WriteImageInfo()
+{
+  unsigned short word_value, bplp;
+  unsigned char byte_value, filler[54+3*2];
+
+  this->file_data_type = IM_BYTE;
+
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  if (imStrEqual(this->compression, "NONE"))
+    this->comp_type = (unsigned char)0;
+  else
+    this->comp_type = (unsigned char)1;
+
+  if (this->file_color_mode == IM_BINARY)
+  {
+    this->bpp = 1;
+    this->convert_bpp = 1;
+  }
+  else if (this->file_color_mode == IM_RGB)
+  {
+    this->bpp = 24;
+    this->file_color_mode |= IM_PACKED;
+  }
+  else
+    this->bpp = 8;
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  int planes = imColorModeDepth(this->file_color_mode);
+  bplp = (unsigned short)imFileLineSizeAligned(this->width, this->bpp/planes, 2);
+  this->line_raw_size = bplp * planes;
+  this->line_buffer_extra = 2; // room enough for padding
+
+  if (this->comp_type || this->bpp == 24)
+  {
+    // allocates room for 24 bpp packing/unpacking and/or compression
+    // allocates more than enough since compression algoritm can be ineficient
+    this->line_buffer_extra += 2*this->line_raw_size;
+  }
+
+  this->version = 5;
+
+  imAttribTable* attrib_table = AttribTable();
+  /* writes the PCX file header */
+
+  unsigned short xmin = 0, ymin = 0;
+  const void* attrib_data = attrib_table->Get("XScreen");
+  if (attrib_data) xmin = *(unsigned short*)attrib_data;
+  attrib_data = attrib_table->Get("YScreen");
+  if (attrib_data) ymin = *(unsigned short*)attrib_data;
+
+  byte_value = PCX_ID;
+  imBinFileWrite(handle, &byte_value, 1, 1); /* identifier */
+  imBinFileWrite(handle, &this->version, 1, 1); /* format version */
+  imBinFileWrite(handle, &this->comp_type, 1, 1); /* compression comp_type */
+  byte_value = (imbyte)(this->bpp/planes);
+  imBinFileWrite(handle, &byte_value, 1, 1); /* bits/pixel/plane */
+  word_value = xmin;
+  imBinFileWrite(handle, &word_value, 1, 2); /* xmin */
+  word_value = ymin;
+  imBinFileWrite(handle, &word_value, 1, 2); /* ymin */
+  word_value = (unsigned short)(this->width - 1) + xmin;
+  imBinFileWrite(handle, &word_value, 1, 2); /* xmax */
+  word_value = (unsigned short)(this->height - 1) + ymin;
+  imBinFileWrite(handle, &word_value, 1, 2); /* ymax */
+  
+  unsigned short hdpi = 0, vdpi = 0;
+  attrib_data = attrib_table->Get("ResolutionUnit");
+  if (attrib_data)
+  {
+    char* res_unit = (char*)attrib_data;
+
+    float* xres = (float*)attrib_table->Get("XResolution");
+    float* yres = (float*)attrib_table->Get("YResolution");
+
+    if (imStrEqual(res_unit, "DPC"))
+    {
+      hdpi = (unsigned short)(*xres * 2.54);
+      vdpi = (unsigned short)(*yres * 2.54);
+    }
+  }
+
+  /* write the x resolution */
+  word_value = hdpi;
+  imBinFileWrite(handle, &word_value, 1, 2); /* hdpi */
+                                                      
+  /* write the y resolution */
+  word_value = vdpi;
+  imBinFileWrite(handle, &word_value, 1, 2); /* vdpi */
+
+  imBinFileWrite(handle, iPCXDefaultPalette, 3*16, 1); /* 16 colors palette */
+  byte_value = 0;
+  imBinFileWrite(handle, &byte_value, 1, 1); /* reserved */
+  byte_value = (imbyte)planes;
+  imBinFileWrite(handle, &byte_value, 1, 1); /* planes */
+  word_value = bplp;
+  imBinFileWrite(handle, &word_value, 1, 2); /* bytes per line per plane */
+  memset(filler, 0, 54+3*2);
+  imBinFileWrite(handle, filler, 54+3*2, 1); /* palette info, hscreen size, vscreen size, filler */
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPCX::ReadPalette()
+{
+  unsigned char pcx_colors[256 * 3];
+
+  if (this->version == 5 && this->bpp == 1)
+  {
+    pcx_colors[0] = 0; pcx_colors[1] = 0; pcx_colors[2] = 0;
+    pcx_colors[3] = 255; pcx_colors[4] = 255; pcx_colors[5] = 255;
+  }
+  else if (this->version == 5 && this->bpp == 8)
+  {
+    unsigned char ExtPal;
+
+    /* jump to the end of file minus the palette data */
+    imBinFileSeekFrom(handle, -769);
+
+    /* reads palette identifier */
+    imBinFileRead(handle, &ExtPal, 1, 1);
+
+    if (ExtPal != 12)
+      return IM_ERR_ACCESS;
+
+    /* reads palette colors */
+    imBinFileRead(handle, pcx_colors, 768, 1);
+  }
+  else if (this->version == 3)
+  {
+    memcpy(pcx_colors, iPCXDefaultPalette, this->palette_count * 3);
+  }
+  else
+  {
+    /* jump to the begining of the file at the start of the palette data */
+    imBinFileSeekTo(handle, 4+6*2);
+
+    /* reads palette colors */
+    imBinFileRead(handle, pcx_colors, 3 * 16, 1);
+  }
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 3;                       
+    this->palette[c] = imColorEncode(pcx_colors[i], pcx_colors[i+1], pcx_colors[i+2]);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPCX::WritePalette()
+{
+  unsigned char ExtPal = (unsigned char)12;
+  unsigned char pcx_colors[256 * 3];
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 3;
+    imColorDecode(&pcx_colors[i], &pcx_colors[i+1], &pcx_colors[i+2], this->palette[c]);
+  }
+
+  /* writes the palette identifier */
+  imBinFileWrite(handle, &ExtPal, 1, 1);
+
+  /* writes the color palette */
+  imBinFileWrite(handle, pcx_colors, 256 * 3, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatPCX::Expand4bpp()
+{
+  int num_bits = 8, WidthDiv8 = (this->width + 7) / 8;
+
+  int line_plane_size = this->line_raw_size / 4;
+  imbyte *in_data = (unsigned char*)this->line_buffer;
+  imbyte *out_data = in_data + this->line_buffer_size+2;
+
+  for (int x = 0; x < WidthDiv8; x++)
+  {
+    imbyte b1 = in_data[x];
+    imbyte b2 = (in_data + line_plane_size)[x];
+    imbyte b3 = (in_data + 2 * line_plane_size)[x];
+    imbyte b4 = (in_data + 3 * line_plane_size)[x];
+
+    if (x == WidthDiv8-1)
+      num_bits = this->width % 8;
+
+    for (int b = 0; b < num_bits; b++)
+    {
+      imbyte byte_value = 0;
+
+      /* If the most significant bit is set... */
+      /* Set the appropriate bit in the higher order nibble */
+      if (b1 & '\x80') byte_value |= 0x01;
+      if (b2 & '\x80') byte_value |= 0x02;
+      if (b3 & '\x80') byte_value |= 0x04;
+      if (b4 & '\x80') byte_value |= 0x08;
+      b1<<=1; b2<<=1; b3<<=1; b4<<=1;
+
+      *out_data++ = byte_value;
+    }
+  }
+
+  memcpy(this->line_buffer, in_data + this->line_buffer_size+2, this->width);
+}
+
+void imFormatPCX::Pack24bpp()
+{
+  imbyte *in_data = (unsigned char*)this->line_buffer;
+  imbyte *out_data = in_data + this->line_buffer_size+2;
+
+  int line_plane_size = this->line_raw_size / 3;
+
+  imbyte *red   = in_data;
+  imbyte *green = in_data + line_plane_size;
+  imbyte *blue  = in_data + 2*line_plane_size;
+
+  for (int i = 0; i < this->width; i++)
+  {
+    *out_data++ = *red++;
+    *out_data++ = *green++;
+    *out_data++ = *blue++;
+  }
+
+  memcpy(in_data, in_data + this->line_buffer_size+2, this->line_raw_size);
+}
+
+void imFormatPCX::Unpack24bpp()
+{
+  imbyte *in_data = (unsigned char*)this->line_buffer;
+  imbyte *out_data = in_data + this->line_buffer_size+2;
+
+  int line_plane_size = this->line_raw_size / 3;
+
+  imbyte *red   = out_data;
+  imbyte *green = out_data + line_plane_size;
+  imbyte *blue  = out_data + 2*line_plane_size;
+
+  for (int i = 0; i < this->width; i++)
+  {
+    *red++ = *in_data++;
+    *green++ = *in_data++;
+    *blue++ = *in_data++;
+  }
+
+  memcpy(out_data - (this->line_buffer_size+2), out_data, this->line_raw_size);
+}
+
+int imFormatPCX::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading PCX...");
+
+  imBinFileSeekTo(handle, 128);
+
+  for (int row = 0; row < this->height; row++)
+  {
+    /* read and decompress the data */
+    if (this->comp_type)
+    {
+      if (iPCXDecodeScanLine(handle, (imbyte*)this->line_buffer, this->line_raw_size) == IM_ERR_ACCESS)
+        return IM_ERR_ACCESS;     
+    }
+    else
+    {
+      imBinFileRead(handle, this->line_buffer, this->line_raw_size, 1);
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+    }
+
+    if (this->bpp == 4)
+      Expand4bpp();
+
+    if (this->bpp == 24)
+      Pack24bpp();
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPCX::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing PCX...");
+
+  imBinFileSeekTo(handle, 128);
+
+  imbyte* compressed_buffer = NULL;
+  if (this->comp_type) // point to the extra buffer
+    compressed_buffer = (imbyte*)this->line_buffer + this->line_buffer_size+2;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (this->bpp == 24)
+      Unpack24bpp();
+
+    /* compress and writes the data */
+    /* the compressed buffer size will probably be diferent from the uncompressed buffer size */
+    if (this->comp_type)
+    {
+      int compressed_size = iPCXEncodeScanLine(compressed_buffer, (imbyte*)this->line_buffer, this->line_raw_size);
+      imBinFileWrite(handle, compressed_buffer, compressed_size, 1);
+    }
+    else
+    {
+      imBinFileWrite(handle, this->line_buffer, this->line_raw_size, 1);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  if (this->bpp == 8)
+    return WritePalette();
+  
+  return IM_ERR_NONE;
+}
+
+int imFormatPCX::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE") && !imStrEqual(compression, "RLE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_png.cpp b/src/im_format_png.cpp
new file mode 100644
index 0000000..1e7f8ba
--- /dev/null
+++ b/src/im_format_png.cpp
@@ -0,0 +1,910 @@
+/** \file
+ * \brief PNG - Portable Network Graphic Format
+ *
+ * See Copyright Notice in im_lib.h
+ * See libPNG Copyright Notice in png.h
+ * $Id: im_format_png.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "png.h"
+
+static void png_user_read_fn(png_structp png_ptr, png_bytep buffer, png_size_t size)
+{
+  imBinFileRead((imBinFile*)png_ptr->io_ptr, buffer, size, 1);
+  if (imBinFileError((imBinFile*)png_ptr->io_ptr))
+    png_error(png_ptr, "Read Error");
+}
+
+static void png_user_write_fn(png_structp png_ptr, png_bytep buffer, png_size_t size)
+{
+  imBinFileWrite((imBinFile*)png_ptr->io_ptr, buffer, size, 1);
+  if (imBinFileError((imBinFile*)png_ptr->io_ptr))
+    png_error(png_ptr, "Write Error");
+}
+
+static void png_user_flush_fn(png_structp png_ptr)
+{
+  (void)png_ptr;
+}
+
+static const char* iPNGCompTable[1] = 
+{
+  "DEFLATE"
+};
+
+class imFormatPNG: public imFormat
+{
+  png_structp png_ptr;
+  png_infop info_ptr;
+
+  imBinFile* handle;
+  int interlace_steps, fixbits;
+
+  void iReadAttrib(imAttribTable* attrib_table);
+  void iWriteAttrib(imAttribTable* attrib_table);
+
+public:
+  imFormatPNG()
+    :imFormat("PNG", 
+              "Portable Network Graphic Format", 
+              "*.png;", 
+              iPNGCompTable, 
+              1, 
+              0)
+    {}
+  ~imFormatPNG() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterPNG(void)
+{
+  imFormatRegister(new imFormatPNG());
+}
+
+int imFormatPNG::Open(const char* file_name)
+{
+  this->handle = imBinFileOpen(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  unsigned char sig[8];
+  if (!imBinFileRead(this->handle, sig, 8, 1))
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (png_sig_cmp(sig, 0, 8) != 0)
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_FORMAT;
+  }
+
+  imBinFileSeekTo(this->handle, 0);
+
+  strcpy(this->compression, "DEFLATE");
+  this->image_count = 1;
+
+  /* Create and initialize the png_struct with the default error handler functions. */
+  this->png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, (png_error_ptr)NULL, (png_error_ptr)NULL);
+  if (!this->png_ptr)
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_FORMAT;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNG::New(const char* file_name)
+{
+  this->handle = imBinFileNew(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  this->png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+	if (this->png_ptr == NULL)
+  {
+    imBinFileClose(this->handle);
+    return IM_ERR_ACCESS;
+  }
+
+  strcpy(this->compression, "DEFLATE");
+  this->image_count = 1;
+  
+  return IM_ERR_NONE;
+}
+
+void imFormatPNG::Close()
+{
+  if (this->is_new)
+    png_destroy_write_struct(&this->png_ptr,  &this->info_ptr);
+  else
+    png_destroy_read_struct(&this->png_ptr, &this->info_ptr, (png_infopp)NULL);
+
+  imBinFileClose(this->handle);
+}
+
+void* imFormatPNG::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else if (index == 1)
+    return (void*)this->png_ptr;
+  else
+    return 0;
+}
+
+void imFormatPNG::iReadAttrib(imAttribTable* attrib_table)
+{
+  double gamma;
+  if (png_get_gAMA(png_ptr, info_ptr, &gamma))
+  {
+    float fvalue = (float)gamma;
+    attrib_table->Set("Gamma", IM_FLOAT, 1, &fvalue);
+  }
+
+  png_uint_32 xr, yr;
+  int res_unit_type = PNG_RESOLUTION_UNKNOWN;
+  if (png_get_pHYs(png_ptr, info_ptr, &xr, &yr, &res_unit_type))
+  {
+    if (res_unit_type == PNG_RESOLUTION_METER)
+    {
+      float xres = xr / 100.0f;
+      float yres = yr / 100.0f;
+      attrib_table->Set("XResolution", IM_FLOAT, 1, &xres);
+      attrib_table->Set("YResolution", IM_FLOAT, 1, &yres);
+      attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPC");
+    }
+  }
+
+  png_int_32 x, y;
+  int unit_type;
+  if (png_get_oFFs(png_ptr, info_ptr, &x, &y, &unit_type))
+  {
+    float xpos, ypos;
+
+    if (res_unit_type == PNG_RESOLUTION_UNKNOWN)
+    {
+      if (unit_type == PNG_OFFSET_PIXEL)
+      {
+        xpos = (float)x;
+        ypos = (float)y;
+      }
+      else
+      {
+        xpos = 0;  // can not calculate position
+        ypos = 0;
+      }
+    }
+    else 
+    {
+      if (unit_type == PNG_OFFSET_PIXEL)
+      {
+        // pixels to centimeters
+        xpos = ((float)x / (float)xr) * 100.0f;
+        ypos = ((float)y / (float)yr) * 100.0f;
+      }
+      else
+      {
+        // micrometers to centimeters
+        xpos = (float)x / 100.0f;
+        ypos = (float)y / 100.0f;
+      }
+    }
+
+    if (xpos && ypos)
+    {
+      // Position is in ResolutionUnits
+      attrib_table->Set("YPosition", IM_FLOAT, 1, &ypos);
+      attrib_table->Set("XPosition", IM_FLOAT, 1, &xpos);
+    }
+  }
+  
+  int intent;
+  if (png_get_sRGB(png_ptr, info_ptr, &intent))
+  {
+    if (intent)
+      attrib_table->Set("sRGBIntent", IM_INT, 1, &intent);
+  }
+
+  double chroma[8];
+  if (png_get_cHRM(png_ptr,info_ptr, &chroma[0], &chroma[1], &chroma[2], &chroma[3], &chroma[4], &chroma[5], &chroma[6], &chroma[7]))
+  {
+    float white[2] = {(float)chroma[0], (float)chroma[1]};
+    float primchroma[6] = {(float)chroma[2], (float)chroma[3], 
+                           (float)chroma[4], (float)chroma[5], 
+                           (float)chroma[6], (float)chroma[7]};
+    attrib_table->Set("WhitePoint", IM_FLOAT, 2, white);
+    attrib_table->Set("PrimaryChromaticities", IM_FLOAT, 6, primchroma);
+  }
+
+  png_charp pcal_purpose;  
+  int pcal_type, pcal_nparams;
+  png_int_32 pcal_limits[2];
+  png_charp pcal_units;    
+  png_charpp pcal_params;  
+  if (png_get_pCAL(png_ptr, info_ptr, &pcal_purpose, &pcal_limits[0], &pcal_limits[1], &pcal_type, &pcal_nparams, &pcal_units, &pcal_params))
+  {
+    char param_buf[255*100], *param_ptr;
+    int p, size, total_size = 0;
+
+    attrib_table->Set("CalibrationName", IM_BYTE, strlen(pcal_purpose)+1, pcal_purpose);
+    attrib_table->Set("CalibrationLimits", IM_INT, 2, pcal_limits);
+    attrib_table->Set("CalibrationUnits", IM_BYTE, strlen(pcal_units)+1, pcal_units);
+    attrib_table->Set("CalibrationEquation", IM_BYTE, 1, &pcal_type);
+
+    param_ptr = &param_buf[0];
+    for (p = 0; p < pcal_nparams; p++)
+    {
+      size = strlen(pcal_params[p]);
+      memcpy(param_ptr, pcal_params[p], size);
+      param_ptr += size;
+      *param_ptr = '\n';
+      param_ptr++;
+      total_size += size+1;
+    }
+    *param_ptr = '0';
+
+    attrib_table->Set("CalibrationParam", IM_BYTE, total_size+1, param_buf);
+  }
+
+  int num_trans;
+  png_bytep trans;
+  png_color_16p trans_values;
+  if (png_get_tRNS(png_ptr, info_ptr, &trans, &num_trans, &trans_values))
+  {
+    if (imColorModeSpace(file_color_mode) == IM_MAP)
+    {
+      attrib_table->Set("TransparencyIndex", IM_BYTE, num_trans, trans);
+    }
+    else if (imColorModeSpace(file_color_mode) == IM_RGB)
+    {                              
+      imbyte transp_color[3];
+      transp_color[0] = (imbyte)(trans_values->red >> 8);
+      transp_color[1] = (imbyte)(trans_values->green >> 8);
+      transp_color[2] = (imbyte)(trans_values->blue >> 8);
+      attrib_table->Set("TransparentColor", IM_BYTE, 3, transp_color);
+    }
+    else
+    {
+      imbyte bvalue = (imbyte)(trans_values->gray >> 8);
+      attrib_table->Set("TransparencyIndex", IM_BYTE, 1, &bvalue);
+    }
+  }
+
+  int num_text;
+  png_textp text_ptr;
+  if (png_get_text(png_ptr, info_ptr, &text_ptr, &num_text))
+  {
+    int t;
+    for (t = 0; t < num_text; t++)
+    {
+      png_textp png_text = &text_ptr[t];
+      if (png_text->text_length)
+      {
+        if (imStrEqual(png_text->key, "Creation Time"))
+          attrib_table->Set("DateTime", IM_BYTE, png_text->text_length+1, png_text->text);
+        else
+          attrib_table->Set(png_text->key, IM_BYTE, png_text->text_length+1, png_text->text);
+      }
+    }
+  }
+
+  png_timep time;
+  if (png_get_tIME(png_ptr, info_ptr, &time))
+  {
+    char* stime = png_convert_to_rfc1123(png_ptr, time);
+    attrib_table->Set("DateTimeModified", IM_BYTE, strlen(stime)+1, stime);
+  }
+
+  png_charp name;
+  int compression_type;
+  png_charp profile;
+  png_uint_32 proflen;
+  if (png_get_iCCP(png_ptr, info_ptr, &name, &compression_type, &profile, &proflen))
+    attrib_table->Set("ICCProfile", IM_BYTE, proflen, profile);
+
+  int scale_unit;
+  double scale_width, scale_height;
+  if (png_get_sCAL(png_ptr, info_ptr, &scale_unit, &scale_width, &scale_height))
+  {
+    if (scale_unit == PNG_SCALE_METER || scale_unit == PNG_SCALE_RADIAN)
+    {
+      float xscale = (float)scale_width;
+      float yscale = (float)scale_height;
+      attrib_table->Set("XScale", IM_FLOAT, 1, &xscale);
+      attrib_table->Set("YScale", IM_FLOAT, 1, &yscale);
+      if (scale_unit == PNG_SCALE_METER)
+        attrib_table->Set("ScaleUnit", IM_BYTE, 7, "meters");
+      else
+        attrib_table->Set("ScaleUnit", IM_BYTE, 8, "radians");
+    }
+  }
+}
+
+static int iAttribStringCount = 0;
+
+static int iFindAttribString(void* user_data, int index, const char* name, int data_type, int count, const void* data)
+{
+  png_textp text_ptr = (png_textp)user_data;
+  (void)index;
+
+  if (data_type == IM_BYTE && count > 3 && ((imbyte*)data)[count-1] == 0)
+  {                                                   
+    if (imStrEqual(name, "ResolutionUnit") ||
+        imStrEqual(name, "InkNames") ||
+        imStrEqual(name, "CalibrationUnits") ||
+        imStrEqual(name, "CalibrationName") ||
+        imStrEqual(name, "CalibrationParam") ||
+        imStrEqual(name, "ICCProfile") ||
+        imStrEqual(name, "ScaleUnit"))
+      return 1;
+    
+    png_textp png_text = &text_ptr[iAttribStringCount];
+
+    png_text->key = (char*)name;
+    png_text->text = (char*)data;
+    png_text->text_length = count-1;
+
+    if (count < 1000)
+      png_text->compression = PNG_TEXT_COMPRESSION_NONE;
+    else
+      png_text->compression = PNG_TEXT_COMPRESSION_zTXt;
+
+    iAttribStringCount++;
+  }
+
+  return 1;
+}
+
+void imFormatPNG::iWriteAttrib(imAttribTable* attrib_table)
+{
+  const void* attrib_data = attrib_table->Get("Gamma");
+  if (attrib_data)
+    png_set_gAMA(png_ptr, info_ptr, *(float*)attrib_data);
+
+  int offset_res = PNG_OFFSET_PIXEL;
+  attrib_data = attrib_table->Get("ResolutionUnit");
+  if (attrib_data)
+  {
+    char* res_unit = (char*)attrib_data;
+
+    float* xres = (float*)attrib_table->Get("XResolution");
+    float* yres = (float*)attrib_table->Get("YResolution");
+
+    if (xres && yres)
+    {
+      png_uint_32 ixres, iyres;
+
+      if (imStrEqual(res_unit, "DPI"))
+      {
+        ixres = (png_uint_32)(*xres * 100. / 2.54);
+        iyres = (png_uint_32)(*yres * 100. / 2.54);
+        offset_res = -1;
+      }
+      else
+      {
+        ixres = (png_uint_32)(*xres * 100.);
+        iyres = (png_uint_32)(*yres * 100.);
+        offset_res = PNG_OFFSET_MICROMETER;
+      }
+
+      png_set_pHYs(png_ptr, info_ptr, ixres, iyres, PNG_RESOLUTION_METER);
+    }
+  }
+
+  attrib_data = attrib_table->Get("XPosition");
+  if (attrib_data)
+  {
+    float xpos = *(float*)attrib_data;
+
+    attrib_data = attrib_table->Get("YPosition");
+    if (attrib_data)
+    {
+      float ypos = *(float*)attrib_data;
+
+      if (offset_res == -1)
+      {
+        // inches to micrometer
+        offset_res = PNG_OFFSET_MICROMETER;
+        xpos *= 25400.0f;
+        ypos *= 25400.0f;
+      }
+      else if (offset_res == PNG_OFFSET_MICROMETER)
+      {
+        // centimeter to micrometer
+        xpos *= 100.0f;
+        ypos *= 100.0f;
+      }
+
+      png_set_oFFs(png_ptr, info_ptr, (png_int_32)xpos, (png_int_32)ypos, offset_res);
+    }
+  }
+
+  attrib_data = attrib_table->Get("sRGBIntent");
+  if (attrib_data)
+    png_set_sRGB(png_ptr, info_ptr, *(int*)attrib_data);
+
+  attrib_data = attrib_table->Get("PrimaryChromaticities");
+  if (attrib_data)
+  {
+    float *primchroma = (float*)attrib_data;
+
+    attrib_data = attrib_table->Get("WhitePoint");
+    if (attrib_data)
+    {
+      float* white = (float*)attrib_data;
+
+      png_set_cHRM(png_ptr,info_ptr, white[0], white[1], 
+                                     primchroma[0], primchroma[1], primchroma[2], 
+                                     primchroma[3], primchroma[4], primchroma[5]);
+    }
+  }
+
+  attrib_data = attrib_table->Get("CalibrationName");
+  if (attrib_data)
+  {
+    char params[255][100], *pparams[255], *new_param_ptr;
+    int nparams = 0, size;
+
+    char* name = (char*)attrib_data;
+    int* limits = (int*)attrib_table->Get("CalibrationLimits");
+    char* units = (char*)attrib_table->Get("CalibrationUnits");
+    char* equation = (char*)attrib_table->Get("CalibrationEquation");
+    char* param_ptr = (char*)attrib_table->Get("CalibrationParam");
+
+    do
+    {
+      new_param_ptr = (char*)strstr(param_ptr, "\n");
+      if (new_param_ptr)
+      {
+        size = new_param_ptr - param_ptr;
+        memcpy(params[nparams], param_ptr, size);
+        params[nparams][size] = 0;
+        param_ptr = new_param_ptr+1;
+        pparams[nparams] = params[nparams];
+        nparams++;
+      }
+    } while (new_param_ptr && *param_ptr != 0);
+
+    png_set_pCAL(png_ptr, info_ptr, name, limits[0], limits[1], *equation, nparams, units, pparams);
+  }
+
+  int transp_count;
+  attrib_data = attrib_table->Get("TransparencyIndex", NULL, &transp_count);
+  if (attrib_data)
+  {
+    png_color_16 trans_values;
+    if (imColorModeSpace(file_color_mode) == IM_MAP)
+    {
+      png_set_tRNS(png_ptr, info_ptr, (imbyte*)attrib_data, transp_count, NULL);
+    }
+    else if (imColorModeSpace(file_color_mode) == IM_GRAY)
+    {
+      imbyte *transp_color = (imbyte*)attrib_data;
+      trans_values.gray = (png_uint_16)(transp_color[0] << 8);
+      png_set_tRNS(png_ptr, info_ptr, NULL, 1, &trans_values);
+    }
+  }
+
+  attrib_data = attrib_table->Get("TransparentColor");
+  if (attrib_data)
+  {
+    if (imColorModeSpace(file_color_mode) == IM_RGB)
+    {
+      png_color_16 trans_values;
+      imbyte *transp_color = (imbyte*)attrib_data;
+      trans_values.red = (png_uint_16)(transp_color[0] << 8);
+      trans_values.green = (png_uint_16)(transp_color[1] << 8);
+      trans_values.blue = (png_uint_16)(transp_color[2] << 8);
+      png_set_tRNS(png_ptr, info_ptr, NULL, 1, &trans_values);
+    }
+  }
+  
+  iAttribStringCount = 0;
+  png_text text_ptr[512];
+  attrib_table->ForEach(text_ptr, iFindAttribString);
+  if (iAttribStringCount)
+    png_set_text(png_ptr, info_ptr, text_ptr, iAttribStringCount);
+
+  attrib_data = attrib_table->Get("DateTimeModified");
+  if (attrib_data)
+  {
+    png_time ptime;
+    time_t cur_time;
+    time(&cur_time);
+    png_convert_from_time_t(&ptime, cur_time);
+    png_set_tIME(png_ptr, info_ptr, &ptime);
+  }
+
+  int proflen;
+  attrib_data = attrib_table->Get("ICCProfile", NULL, &proflen);
+  if (attrib_data)
+  {
+    png_charp profile = (png_charp)attrib_data;
+    png_set_iCCP(png_ptr, info_ptr, "ICC Profile", 0, profile, proflen);
+  }
+
+  attrib_data = attrib_table->Get("ScaleUnit");
+  if (attrib_data)
+  {
+    char* scale_unit = (char*)attrib_data;
+
+    float* xscale = (float*)attrib_table->Get("XScale");
+    float* yscale = (float*)attrib_table->Get("YScale");
+
+    if (xscale && yscale)
+    {
+      if (imStrEqual(scale_unit, "meters"))
+        png_set_sCAL(png_ptr, info_ptr, PNG_SCALE_METER, *xscale, *yscale);
+      else if (imStrEqual(scale_unit, "radians"))
+        png_set_sCAL(png_ptr, info_ptr, PNG_SCALE_RADIAN, *xscale, *yscale);
+    }
+  }
+}
+
+int imFormatPNG::ReadImageInfo(int index)
+{
+  (void)index;
+
+  /* Allocate/initialize the memory for image information.  REQUIRED. */
+  info_ptr = png_create_info_struct(png_ptr);
+  if (info_ptr == NULL)
+    return IM_ERR_ACCESS;
+
+  /* Set error handling */
+  if (setjmp(png_ptr->jmpbuf))
+    return IM_ERR_ACCESS;
+
+  png_set_read_fn(png_ptr, (void*)this->handle, (png_rw_ptr)png_user_read_fn);
+
+  png_read_info(png_ptr, info_ptr);
+
+  png_uint_32 Width, Height;
+  int bit_depth, color_type, interlace_type;
+  png_get_IHDR(png_ptr, info_ptr, &Width, &Height, &bit_depth, &color_type, &interlace_type, NULL, NULL);
+
+  this->width = Width;
+  this->height = Height;
+
+  switch(color_type)
+  {
+  case PNG_COLOR_TYPE_GRAY:
+    this->file_color_mode = IM_GRAY;
+    break;
+  case PNG_COLOR_TYPE_GRAY_ALPHA:
+    this->file_color_mode = IM_GRAY | IM_ALPHA;
+    break;
+  case PNG_COLOR_TYPE_RGB:
+    this->file_color_mode = IM_RGB;
+    break;
+  case PNG_COLOR_TYPE_RGB_ALPHA:
+    this->file_color_mode = IM_RGB | IM_ALPHA;
+    break;
+  case PNG_COLOR_TYPE_PALETTE:
+    this->file_color_mode = IM_MAP;
+    break;
+  default: 
+    return IM_ERR_DATA;
+  }
+
+  if (bit_depth == 16)
+  {
+    this->file_data_type = IM_USHORT;
+
+    if (imBinCPUByteOrder() == IM_LITTLEENDIAN) // Intel
+      png_set_swap(png_ptr);
+  }
+  else if (bit_depth == 1)
+  {
+    if (this->file_color_mode == IM_RGB)
+      return IM_ERR_DATA;
+
+    this->file_color_mode = IM_BINARY;
+    this->file_data_type = IM_BYTE;
+  }
+  else
+    this->file_data_type = IM_BYTE;
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  if (imColorModeDepth(this->file_color_mode) > 1)
+    this->file_color_mode |= IM_PACKED;
+
+  this->fixbits = 0;
+  if (bit_depth < 8)
+  {
+    png_set_packing(png_ptr);
+    if (bit_depth > 1 && 
+        (imColorModeSpace(this->file_color_mode) == IM_GRAY || imColorModeSpace(this->file_color_mode) == IM_RGB))
+      this->fixbits = bit_depth;
+  }
+
+  if (imColorModeSpace(this->file_color_mode) == IM_MAP)
+  {
+    png_colorp pal;
+    int count;
+    if (png_get_PLTE(png_ptr, info_ptr, &pal, &count))
+    {
+      long palette[256];
+
+      for (int c = 0; c < count; c++)
+      {
+        palette[c] = imColorEncode(pal[c].red,
+                                   pal[c].green,
+                                   pal[c].blue);
+      }
+
+      imFileSetPalette(this, palette, count);
+    }
+    else
+      return IM_ERR_FORMAT;
+  }
+
+  imAttribTable* attrib_table = AttribTable();
+
+  this->interlace_steps = 1; // Not interlaced.
+  if (interlace_type)
+  {
+    attrib_table->Set("Interlaced", IM_INT, 1, &interlace_type);
+    /* Turn on interlace handling. */
+    this->interlace_steps = png_set_interlace_handling(png_ptr);
+  }
+
+  png_read_update_info(png_ptr, info_ptr);
+
+  iReadAttrib(attrib_table);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNG::WriteImageInfo()
+{
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  this->file_color_mode |= IM_TOPDOWN;
+
+  this->file_data_type = this->user_data_type;
+
+  int bit_depth = 8;
+  if (this->file_data_type == IM_USHORT)
+    bit_depth = 16;
+
+  int color_type;
+  switch (imColorModeSpace(this->user_color_mode))
+  {
+  case IM_BINARY:
+    bit_depth = 1;
+    this->convert_bpp = 1;
+  case IM_GRAY:
+    color_type = PNG_COLOR_TYPE_GRAY;
+    break;
+  case IM_RGB:   
+    color_type = PNG_COLOR_TYPE_RGB;
+    break;
+  case IM_MAP:
+    color_type = PNG_COLOR_TYPE_PALETTE;
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  if (imColorModeHasAlpha(this->user_color_mode))
+  {
+    color_type |= PNG_COLOR_MASK_ALPHA;
+    this->file_color_mode |= IM_ALPHA;
+  }
+
+  if (imColorModeDepth(this->file_color_mode) > 1)
+    this->file_color_mode |= IM_PACKED;
+
+  /* Allocate/initialize the image information data.  REQUIRED */
+  info_ptr = png_create_info_struct(png_ptr);
+  if (info_ptr == NULL)
+    return IM_ERR_ACCESS;
+
+  /* Set error handling.  REQUIRED if you aren't supplying your own
+  * error hadnling functions in the png_create_write_struct() call. */
+  if (setjmp(png_ptr->jmpbuf))
+    return IM_ERR_ACCESS;
+
+  png_set_write_fn(png_ptr, this->handle, (png_rw_ptr)png_user_write_fn, (png_flush_ptr)png_user_flush_fn);
+
+  imAttribTable* attrib_table = AttribTable();
+
+  int interlace = 0;
+  int* interlaced = (int*)attrib_table->Get("Interlaced");
+  if (interlaced && *interlaced)
+    interlace = 1;
+
+  /* write image header */
+  png_set_IHDR(png_ptr, info_ptr, this->width, this->height, bit_depth, color_type, interlace, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+  if (imColorModeSpace(this->user_color_mode) == IM_MAP)
+  {
+    png_color pal[256];
+    unsigned char r, g, b;
+    for (int c = 0; c < this->palette_count; c++)
+    {
+      imColorDecode(&r, &g, &b, this->palette[c]);
+      pal[c].red = r;
+      pal[c].green = g;
+      pal[c].blue = b;
+    }
+
+    png_set_PLTE(png_ptr, info_ptr, pal, this->palette_count);
+  }
+
+  int* quality = (int*)attrib_table->Get("ZIPQuality");
+  if (quality)
+    png_set_compression_level(png_ptr, *quality);
+
+  iWriteAttrib(attrib_table);
+
+  /* write image attribs */
+  png_write_info(png_ptr, info_ptr);
+
+  if (this->file_data_type == IM_USHORT)
+  {
+    if (imBinCPUByteOrder() == IM_LITTLEENDIAN) // Intel
+      png_set_swap(png_ptr);
+  }
+
+  this->interlace_steps = 1;
+  if (interlace)
+    this->interlace_steps = png_set_interlace_handling(png_ptr);
+
+  return IM_ERR_NONE;
+}
+
+static int iInterlaceRowCheck(int row_step, int pass)
+{
+  switch(row_step)
+  {
+  case 0:
+    if (pass == 1 || pass == 2 || pass == 4 || pass == 6)
+      return 1;
+    break;
+  case 4:
+    if (pass == 3 || pass == 4 || pass == 6)
+      return 1;
+    break;
+  case 2:
+  case 6:
+    if (pass == 5 || pass == 6)
+      return 1;
+    break;
+  case 1:
+  case 3:
+  case 5:
+  case 7:
+    if (pass == 7)
+      return 1;
+    break;
+  }
+
+  return 0;
+}
+
+int imFormatPNG::ReadImageData(void* data)
+{
+  if (setjmp(this->png_ptr->jmpbuf))
+    return IM_ERR_ACCESS;
+
+  int count = this->height*this->interlace_steps;
+  imCounterTotal(this->counter, count, "Reading PNG...");
+
+  int row = 0;
+  for (int i = 0; i < count; i++)
+  {
+    if (this->interlace_steps > 1 && ((row % 8) % 2 == 0)) // only when interlaced and in the 2,4,6 row steps.
+      imFileLineBufferWrite(this, data, row, 0);
+
+    png_read_row(this->png_ptr, (imbyte*)this->line_buffer, NULL);
+
+    if (this->interlace_steps == 1 || iInterlaceRowCheck(row % 8, png_ptr->pass+1))
+    {
+      if (this->fixbits)
+      {
+        unsigned char* buf = (unsigned char*)this->line_buffer;
+        for (int b = 0; b < this->line_buffer_size; b++)
+        {
+          if (this->fixbits == 4)
+            *buf *= 17;
+          else
+            *buf *= 85;
+
+          buf++;
+        }
+      }
+
+      imFileLineBufferRead(this, data, row, 0);
+    }
+
+    if (!imCounterInc(this->counter))
+    {
+      png_read_end(this->png_ptr, NULL);
+      return IM_ERR_COUNTER;
+    }
+
+   row++;
+   if (row == this->height)
+     row = 0;
+  }
+
+  png_read_end(this->png_ptr, NULL);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNG::WriteImageData(void* data)
+{
+  if (setjmp(this->png_ptr->jmpbuf))
+    return IM_ERR_ACCESS;
+
+  int count = this->height*this->interlace_steps;
+  imCounterTotal(this->counter, count, "Writing PNG...");
+
+  int row = 0;
+  for (int i = 0; i < count; i++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    png_write_row(this->png_ptr, (imbyte*)this->line_buffer);
+
+    if (!imCounterInc(this->counter))
+    {
+      png_write_end(this->png_ptr, this->info_ptr);
+      return IM_ERR_COUNTER;
+    }
+
+   row++;
+   if (row == this->height)
+     row = 0;
+  }
+
+  png_write_end(this->png_ptr, this->info_ptr);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNG::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE && data_type != IM_USHORT)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "DEFLATE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
+
diff --git a/src/im_format_pnm.cpp b/src/im_format_pnm.cpp
new file mode 100644
index 0000000..1c7832a
--- /dev/null
+++ b/src/im_format_pnm.cpp
@@ -0,0 +1,502 @@
+/** \file
+ * \brief PNM - Netpbm Portable Image Map
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_pnm.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+static int iPNMReadNextInteger(imBinFile* handle, int *value)
+{
+  int c = 0, found = 0;
+  static char buffer[10];
+
+  while (!found)
+  {
+    imBinFileRead(handle, &buffer[c], 1, 1);
+
+    /* if it's a number increments the number of characters readed */
+    if (buffer[c] >= (int)'0' && buffer[c] <= (int)'9')
+      c++;
+    else
+    {
+      /* if it's not a number and we readed some characters convert them to an integer */
+      if (c > 0)
+      {
+        buffer[c] = 0;
+        *value = atoi(buffer);
+        found = 1;
+      }
+    }
+
+    if (imBinFileError(handle) || c > 10)
+      return 0;
+  } 
+
+  return 1;
+}
+
+/* comments start with '#' after the first \n */
+static int iPNMReadComment(imBinFile* handle, char* comment, int *size)
+{
+  imbyte byte_value = 0;
+
+  // find the first \n
+  while(byte_value != '\n')
+  {
+    imBinFileRead(handle, &byte_value, 1, 1);
+    if (imBinFileError(handle))
+      return 0;
+  }
+
+  *size = 0;
+
+  imBinFileRead(handle, &byte_value, 1, 1);
+  if (imBinFileError(handle))
+    return 0;
+
+  if (byte_value == '#')
+  {
+    while(byte_value != '\n')
+    {
+      imBinFileRead(handle, &byte_value, 1, 1);
+      if (imBinFileError(handle))
+        return 0;
+
+      if (byte_value != '\r')
+      {
+        comment[*size] = byte_value;
+        (*size)++;
+      }
+    }
+  }
+  else
+    imBinFileSeekOffset(handle, -1);
+
+  if (*size != 0)
+  {
+    comment[*size] = 0;
+    (*size)++;
+  }
+
+  return 1;
+}
+
+static const char* iPNMCompTable[2] = 
+{
+  "NONE",
+  "ASCII"
+};
+
+class imFormatPNM: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  unsigned char image_type;
+
+  void FixBinary();
+
+public:
+  imFormatPNM()
+    :imFormat("PNM", 
+              "Netpbm Portable Image Map", 
+              "*.pnm;*.pbm;*.ppm;*.pgm;", 
+              iPNMCompTable, 
+              2, 
+              1)
+    {}
+  ~imFormatPNM() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterPNM(void)
+{
+  imFormatRegister(new imFormatPNM());
+}
+
+int imFormatPNM::Open(const char* file_name)
+{
+  unsigned char sig[2];
+
+  /* opens the binary file for reading */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  /* reads the PNM format identifier */
+  imBinFileRead(handle, sig, 2, 1);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (sig[0] != 'P' || (sig[1] != '1' && sig[1] != '2' &&
+                        sig[1] != '3' && sig[1] != '4' &&
+                        sig[1] != '5' && sig[1] != '6'))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  this->image_type = sig[1];
+  this->image_count = 1;     // increment this if found image after data
+
+  if (this->image_type == '1' || this->image_type == '2' || this->image_type == '3')
+    strcpy(this->compression, "ASCII");
+  else
+    strcpy(this->compression, "NONE");
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNM::New(const char* file_name)
+{
+  /* opens the binary file for writing */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  this->image_count = 1;  
+
+  return IM_ERR_NONE;
+}
+
+void imFormatPNM::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatPNM::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatPNM::ReadImageInfo(int index)
+{
+  (void)index;
+
+  switch (this->image_type)
+  {
+  case '4':
+    this->convert_bpp = 1;
+  case '1':
+    this->file_color_mode = IM_BINARY;
+    break;
+  case '2':
+  case '5':
+    this->file_color_mode = IM_GRAY;
+    break;
+  case '3':
+  case '6':
+    this->file_color_mode = IM_RGB | IM_PACKED;
+    break;
+  }
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  char comment[4096];
+  int size;
+  if (!iPNMReadComment(handle, comment, &size))
+    return IM_ERR_ACCESS;
+
+  if (size)
+    attrib_table->Set("Description", IM_BYTE, size, comment);
+
+  if (!iPNMReadNextInteger(handle, &this->width))
+    return IM_ERR_ACCESS;
+
+  if (!iPNMReadNextInteger(handle, &this->height))
+    return IM_ERR_ACCESS;
+
+  if (this->height <= 0 || this->width <= 0)
+    return IM_ERR_DATA;
+
+  int max_val = 255;
+  if (this->image_type != '4' && this->image_type != '1')
+  {
+    if (!iPNMReadNextInteger(handle, &max_val))
+      return IM_ERR_ACCESS;
+  }
+
+  this->file_data_type = IM_BYTE;
+  if (max_val > 255)
+    this->file_data_type = IM_USHORT;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNM::WriteImageInfo()
+{
+  this->file_data_type = this->user_data_type;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  int plain;
+  if (imStrEqual(this->compression, "ASCII"))
+    plain = 1;
+  else
+    plain = 0;
+
+  switch (this->file_color_mode)
+  {
+  case IM_BINARY:
+    if (plain)
+      this->image_type = '1';
+    else
+    {
+      this->image_type = '4';
+      this->convert_bpp = 1;
+    }
+    break;
+  case IM_GRAY:
+    if (plain)
+      this->image_type = '2';
+    else
+      this->image_type = '5';
+    break;
+  case IM_RGB:
+    if (plain)
+      this->image_type = '3';
+    else
+      this->image_type = '6';
+    this->file_color_mode |= IM_PACKED;
+    break;
+  }
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  imBinFilePrintf(handle, "P%c\n", (int)this->image_type);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  int attrib_size;
+  const void* attrib_data = attrib_table->Get("Description", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    char* desc = (char*)attrib_data;
+    int size = 0;
+    while(size < (attrib_size-1) && (desc[size] != '\r' && desc[size] != '\n'))
+      size++;
+
+    imBinFileWrite(handle, (void*)"#", 1, 1);
+    imBinFileWrite(handle, desc, size, 1);
+    imBinFileWrite(handle, (void*)"\n", 1, 1);
+  }
+
+  imBinFilePrintf(handle, "%d\n", this->width);
+  imBinFilePrintf(handle, "%d\n", this->height);
+
+  if (this->image_type != '4' && this->image_type != '1')
+  {
+    int max_val = 255;
+    if (this->file_data_type == IM_USHORT)
+      max_val = 65535;
+
+    imBinFilePrintf(handle, "%d\n", max_val);
+  }
+  
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatPNM::FixBinary()
+{
+  unsigned char* buf = (unsigned char*)this->line_buffer;
+  for (int b = 0; b < this->line_buffer_size; b++)
+  {
+    *buf = ~(*buf);
+    buf++;
+  }
+}
+
+int imFormatPNM::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading PNM...");
+
+  int line_count = imImageLineCount(this->width, this->file_color_mode);
+
+  int line_raw_size;
+  if (this->image_type == '4')
+    line_raw_size = imFileLineSizeAligned(this->width, 1, 1);
+  else
+    line_raw_size = imImageLineSize(this->width, this->file_color_mode, this->file_data_type);
+
+  int plain = 0;
+  if (this->image_type == '1' || this->image_type == '2' || this->image_type == '3')
+    plain = 1;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    if (plain)
+    {
+      int value;
+      for (int col = 0; col < line_count; col++)
+      {
+        if (!iPNMReadNextInteger(handle, &value))
+          return IM_ERR_ACCESS;
+
+        if (this->image_type == '1' && value < 2)
+          value = 1 - value;
+
+        if (this->file_data_type == IM_USHORT)
+          ((imushort*)this->line_buffer)[col] = (imushort)value;
+        else
+          ((imbyte*)this->line_buffer)[col] = (unsigned char)value;
+      }
+    }
+    else
+    {
+      imBinFileRead(handle, this->line_buffer, line_raw_size, 1);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+
+      if (this->image_type == '4')
+        FixBinary();
+    }
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  // try to find another image, ignore errors from here
+
+  /* reads the PNM format identifier */
+  unsigned char sig[2];
+  imBinFileRead(handle, sig, 2, 1);
+  if (imBinFileError(handle))
+    return IM_ERR_NONE;
+
+  if (sig[0] != 'P' || (sig[1] != '1' && sig[1] != '2' &&
+                        sig[1] != '3' && sig[1] != '4' &&
+                        sig[1] != '5' && sig[1] != '6'))
+    return IM_ERR_NONE;
+
+  this->image_type = sig[1];
+  this->image_count++;
+
+  if (this->image_type == '1' || this->image_type == '2' || this->image_type == '3')
+    strcpy(this->compression, "ASCII");
+  else
+    strcpy(this->compression, "NONE");
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNM::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing PNM...");
+
+  int line_count = imImageLineCount(this->width, this->file_color_mode);
+
+  int line_raw_size;
+  if (this->image_type == '4')
+    line_raw_size = imFileLineSizeAligned(this->width, 1, 1);
+  else
+    line_raw_size = imImageLineSize(this->width, this->file_color_mode, this->file_data_type);
+
+  int plain = 0;
+  if (this->image_type == '1' || this->image_type == '2' || this->image_type == '3')
+    plain = 1;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (plain)
+    {
+      int line_size = 0;
+      for (int col = 0; col < line_count; col++)
+      {
+        int value;
+        if (this->file_data_type == IM_USHORT)
+          value = ((imushort*)this->line_buffer)[col];
+        else
+          value = ((imbyte*)this->line_buffer)[col];
+
+        if (this->image_type == '1' && value < 2)
+          value = 1 - value;
+
+        int write_size = imBinFilePrintf(handle, "%d ", value);
+        if (!write_size)
+          return IM_ERR_ACCESS;
+
+        line_size += write_size;
+
+        // No line should be longer than 70 characters. 
+        if (line_size > 60 || col == line_count-1)
+        {
+          line_size = 0;
+          imBinFileWrite(handle, (void*)"\n", 1, 1);
+        }
+      }
+    }
+    else
+    {
+      if (this->image_type == '4')
+        FixBinary();
+
+      imBinFileWrite(handle, this->line_buffer, line_raw_size, 1);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatPNM::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK || color_space == IM_MAP)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE && data_type != IM_USHORT)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE") && !imStrEqual(compression, "ASCII"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_ras.cpp b/src/im_format_ras.cpp
new file mode 100644
index 0000000..bab074a
--- /dev/null
+++ b/src/im_format_ras.cpp
@@ -0,0 +1,598 @@
+/** \file
+ * \brief RAS - Sun Raster File
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_ras.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_format_all.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+/*  File Header Structure. */
+/*  4  Magic;      magic number */
+/*  4  BufferSize;      width (pixels) of image */
+/*  4  Height;     height (pixels) of image */
+/*  4  Depth;      depth (1, 8, 24, or 32) of pixel */
+/*  4  Length;     length (bytes) of image */
+/*  4  Type;       type of file; see RT_OLD below */
+/*  4  MapType;    type of colormap; see RAS_NONE below */
+/*  4  MapLength;  length (bytes) of following map */
+/*  32  */
+
+#define  RAS_ID  0x59A66A95
+
+/* Sun supported ras_type's */
+#define RAS_OLD    0  /* Raw pixrect image in 68000 byte order */
+#define RAS_STANDARD  1  /* Raw pixrect image in 68000 byte order */
+#define RAS_BYTE_ENCODED  2  /* Run-length compression of bytes */
+#define RAS_EXPERIMENTAL 0xffff  /* Reserved for testing */
+
+#define RAS_ESCAPE  0x80
+
+/* Sun supported ras_maptype's */
+#define RAS_NONE  0      /* ras_maplength is expected to be 0 */
+#define RAS_EQUAL_RGB  1  /* red[ras_maplength/3],green[],blue[] */
+#define RAS_RAW    2     /* Sun registered ras_maptype's */
+
+
+/* NOTES:
+ *   Each line of the image is rounded out to a multiple of 16 bits.
+ *   This corresponds to the rounding convention used by the memory pixrect
+ *   package (/usr/include/pixrect/memvar.h) of the SunWindows system.
+ *  The ras_encoding field (always set to 0 by Sun's supported software)
+ *   was renamed to ras_length in release 2.0.  As a result, rasterfiles
+ *   of type 0 generated by the old software claim to have 0 length; for
+ *   compatibility, code reading rasterfiles must be prepared to compute the
+ *   true length from the width, height, and depth fields. */
+
+static int iRASDecodeScanLine(imBinFile* handle, unsigned char* DecodedBuffer, int BufferSize)
+{
+  int index = 0;
+  unsigned char count = 0;
+  unsigned char value = 0;
+
+  while (index < BufferSize)
+  {
+    imBinFileRead(handle, &value, 1, 1);
+
+    if (value == RAS_ESCAPE)
+    {
+      imBinFileRead(handle, &count, 1, 1);
+
+      if (count != 0)
+      {
+        imBinFileRead(handle, &value, 1, 1);
+
+        count++;
+        while (count-- && index < BufferSize)
+        {
+          *DecodedBuffer++ = value;
+          index++;
+        }
+      }
+      else
+      {
+        *DecodedBuffer++ = RAS_ESCAPE;
+        index++;
+      }
+    }
+    else
+    {
+      *DecodedBuffer++ = value;
+      index++;                         
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;
+  }
+
+  return IM_ERR_NONE;
+}
+
+static int iRASEncodeScanLine(imbyte* EncodedBuffer, const imbyte* DecodedBuffer, int BufferSize)
+{
+  int index = 0;        /* Index into uncompressed data buffer  */
+  int scanindex = 0;    /* Index into compressed data buffer    */
+  int runcount;                  /* Length of encoded pixel run          */
+  unsigned char runvalue;                  /* Value of encoded pixel run           */
+
+  while (index < BufferSize)
+  {
+    for (runcount = 1, runvalue = DecodedBuffer[index]; 
+         index + runcount < BufferSize && runvalue == DecodedBuffer[index + runcount] && runcount < 256; 
+         runcount++);
+
+    if (runcount > 2)                   /* Multiple pixel run */
+    {
+      EncodedBuffer[scanindex++] = RAS_ESCAPE;
+      EncodedBuffer[scanindex++] = (imbyte)(runcount-1);
+      EncodedBuffer[scanindex++] = runvalue;
+    }
+    else if (runcount == 2)
+    {
+      if (runvalue == RAS_ESCAPE)      /* Two Escapes */
+      {
+        EncodedBuffer[scanindex++] = RAS_ESCAPE;
+        EncodedBuffer[scanindex++] = 1;            
+        EncodedBuffer[scanindex++] = RAS_ESCAPE;
+      }
+      else                             /* Two Single runs */
+      {
+        EncodedBuffer[scanindex++] = runvalue;
+        EncodedBuffer[scanindex++] = runvalue;
+      }
+    }
+    else                               /* Single run   */
+    {
+      if (runvalue == RAS_ESCAPE)
+      {
+        EncodedBuffer[scanindex++] = RAS_ESCAPE;
+        EncodedBuffer[scanindex++] = 0;
+      }
+      else
+        EncodedBuffer[scanindex++] = runvalue;
+    }
+
+    index += runcount;  /* Jump ahead to next pixel run value */
+  }
+
+  return scanindex;      /* Return the number of unsigned chars written to buffer */
+}
+
+static const char* iRASCompTable[2] = 
+{
+  "NONE",
+  "RLE"
+};
+
+class imFormatRAS: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  unsigned int bpp,          /* number of bits per pixel */
+               comp_type,    /* ras compression information */
+               map_type,     /* palette information */
+               line_raw_size;    /* line buffer size */
+
+  int ReadPalette();
+  int WritePalette();
+  void FixRGB();
+
+public:
+  imFormatRAS()
+    :imFormat("RAS", 
+              "Sun Raster File", 
+              "*.ras;", 
+              iRASCompTable, 
+              2, 
+              0)
+    {}
+  ~imFormatRAS() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterRAS(void)
+{
+  imFormatRegister(new imFormatRAS());
+}
+
+int imFormatRAS::Open(const char* file_name)
+{
+  unsigned int dword_value;
+
+  /* opens the binary file for reading with motorola byte order */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_BIGENDIAN); 
+
+  /* reads the RAS format identifier */
+  imBinFileRead(handle, &dword_value, 1, 4);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (dword_value != RAS_ID)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* reads the compression information */
+  imBinFileSeekOffset(handle, 16);
+
+  imBinFileRead(handle, &this->comp_type, 1, 4);
+  if (this->comp_type == RAS_BYTE_ENCODED)
+    strcpy(this->compression, "RLE");
+  else if (this->comp_type == RAS_OLD || this->comp_type == RAS_STANDARD)
+    strcpy(this->compression, "NONE");
+  else
+  {
+    imBinFileClose(handle);
+    return IM_ERR_COMPRESS;
+  }
+
+  imBinFileSeekOffset(handle, -20);
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAS::New(const char* file_name)
+{
+  /* opens the binary file for writing with motorola byte order */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_BIGENDIAN); 
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatRAS::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatRAS::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatRAS::ReadImageInfo(int index)
+{
+  (void)index;
+  unsigned int dword_value;
+
+  this->file_data_type = IM_BYTE;
+
+  /* reads the image width */
+  imBinFileRead(handle, &dword_value, 1, 4);
+  this->width = (int)dword_value;
+
+  /* reads the image height */
+  imBinFileRead(handle, &dword_value, 1, 4);
+  this->height = (int)dword_value;
+
+  /* reads the number of bits per pixel */
+  imBinFileRead(handle, &this->bpp, 1, 4);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  // sanity check
+  if (this->bpp != 1 && this->bpp != 8 && 
+      this->bpp != 24 && this->bpp != 32)
+    return IM_ERR_DATA;
+
+  if (this->bpp > 8)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+
+    if (this->bpp == 32)
+      this->file_color_mode |= IM_ALPHA;
+  }
+  else
+  {
+    this->file_color_mode = IM_MAP;
+
+    if (this->bpp == 1)
+    {
+      this->convert_bpp = 1;
+      this->palette_count = 2;
+    }
+  }
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  this->line_raw_size = imFileLineSizeAligned(this->width, this->bpp, 2);
+  this->line_buffer_extra = 2; // room enough for padding
+
+  /* jump 8 bytes (Length+Compression) */
+  imBinFileSeekOffset(handle, 8);
+
+  /* reads the palette information */
+  imBinFileRead(handle, &this->map_type, 1, 4);
+
+  /* reads the palette size */
+  imBinFileRead(handle, &dword_value, 1, 4);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  /* updates the pal_size based on the palette size */
+  if (this->bpp <= 8 && this->map_type != RAS_NONE)
+  {
+    this->palette_count = dword_value / 3;
+    return ReadPalette();
+  }
+
+  if (this->bpp <= 8 && this->map_type == RAS_NONE)
+  {
+    if (this->bpp == 1)
+      this->file_color_mode = IM_BINARY;
+    else
+      this->file_color_mode = IM_GRAY;
+
+    this->file_color_mode |= IM_TOPDOWN;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAS::WriteImageInfo()
+{
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  if (imStrEqual(this->compression, "RLE"))
+    this->comp_type = RAS_BYTE_ENCODED;
+  else
+    this->comp_type = RAS_STANDARD;
+
+  // Force the palette, even for Binary and Gray.
+  this->map_type = RAS_EQUAL_RGB;
+
+  if (this->file_color_mode == IM_BINARY)
+  {
+    this->bpp = 1;
+    this->convert_bpp = 1;
+  }
+  else if (this->file_color_mode == IM_RGB)
+  {
+    this->file_color_mode |= IM_PACKED;
+    this->map_type = RAS_NONE;
+    this->bpp = 24;
+
+    if (imColorModeHasAlpha(this->user_color_mode))
+    {
+      this->file_color_mode |= IM_ALPHA;
+      this->bpp = 32;
+    }
+  }
+  else
+    this->bpp = 8;
+
+  this->file_color_mode |= IM_TOPDOWN;
+
+  this->line_raw_size = imFileLineSizeAligned(this->width, this->bpp, 2);
+  this->line_buffer_extra = 2; // room enough for padding
+
+  if (this->comp_type == RAS_BYTE_ENCODED)
+  {
+    // allocates more than enough since compression algoritm can be ineficient
+    this->line_buffer_extra += 2*this->line_raw_size;
+  }
+
+  /* writes the RAS file header */
+
+  unsigned int dword_value = RAS_ID;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* identifier */
+  dword_value = this->width;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* image width */
+  dword_value = this->height;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* image height */
+  dword_value = this->bpp;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* bits per pixel */
+  dword_value = this->height * this->line_raw_size;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* image lenght */
+  dword_value = this->comp_type;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* compression information */
+  dword_value = this->map_type;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* palette information */
+  dword_value = (this->map_type == RAS_NONE)? 0: this->palette_count * 3;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* palette lenght */
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->map_type != RAS_NONE)
+    return WritePalette();
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAS::ReadPalette()
+{
+  unsigned char ras_colors[256 * 3];
+
+  /* reads the color palette */
+  imBinFileRead(handle, ras_colors, this->palette_count * 3, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    if (this->map_type == RAS_RAW)
+    {
+      int i = c * 3;
+      this->palette[c] = imColorEncode(ras_colors[i], 
+                                       ras_colors[i+1], 
+                                       ras_colors[i+2]);
+    }
+    else
+    {
+      this->palette[c] = imColorEncode(ras_colors[c], 
+                                       ras_colors[c+this->palette_count], 
+                                       ras_colors[c+2*this->palette_count]);
+    }
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAS::WritePalette()
+{
+  int c;
+  unsigned char ras_colors[256 * 3];
+
+  /* convert the color map to the IM format */
+  for (c = 0; c < this->palette_count; c++)
+  {
+    imColorDecode(&ras_colors[c], &ras_colors[c+this->palette_count], &ras_colors[c+2*this->palette_count], this->palette[c]);
+  }
+
+  /* writes the color palette */
+  imBinFileWrite(handle, ras_colors, this->palette_count * 3, 1);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatRAS::FixRGB()
+{
+  int x;
+  imbyte* byte_data = (imbyte*)this->line_buffer;
+
+  switch (this->bpp)
+  {
+  case 32:
+    {
+      // convert ABGR <-> RGBA
+      for (x = 0; x < this->width; x++)
+      {
+        int c = x*4;
+        imbyte temp = byte_data[c];     // swap R and A
+        byte_data[c] = byte_data[c+3];
+        byte_data[c+3] = temp;
+
+        temp = byte_data[c+1];     // swap G and B
+        byte_data[c+1] = byte_data[c+2];
+        byte_data[c+2] = temp;
+      }
+    }
+    break;
+  default: // 24
+    {
+      // convert BGR <-> RGB
+      for (x = 0; x < this->width; x++)
+      {
+        int c = x*3;
+        imbyte temp = byte_data[c];     // swap R and B
+        byte_data[c] = byte_data[c+2];
+        byte_data[c+2] = temp;
+      }
+    }
+    break;
+  }
+}
+
+int imFormatRAS::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading RAS...");
+
+  for (int row = 0; row < this->height; row++)
+  {
+    /* read and decompress the data */
+    if (this->comp_type != RAS_BYTE_ENCODED)
+    {
+      imBinFileRead(handle, this->line_buffer, this->line_raw_size, 1);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+    }
+    else
+    {                                               
+      if (iRASDecodeScanLine(handle, (imbyte*)this->line_buffer, this->line_raw_size) == IM_ERR_ACCESS)
+        return IM_ERR_ACCESS;     
+    }
+
+    if (this->bpp > 8)
+      FixRGB();
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAS::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing RAS...");
+
+  imbyte* compressed_buffer = NULL;
+  if (this->comp_type == RAS_BYTE_ENCODED)  // point to the extra buffer
+    compressed_buffer = (imbyte*)this->line_buffer + this->line_buffer_size+2;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (this->bpp > 8)
+      FixRGB();
+
+    if (this->comp_type == RAS_BYTE_ENCODED)
+    {
+      int compressed_size = iRASEncodeScanLine(compressed_buffer, (imbyte*)this->line_buffer, this->line_raw_size);
+      imBinFileWrite(handle, compressed_buffer, compressed_size, 1);
+    }
+    else
+    {
+      imBinFileWrite(handle, this->line_buffer, this->line_raw_size, 1);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAS::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE") && !imStrEqual(compression, "RLE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_raw.cpp b/src/im_format_raw.cpp
new file mode 100644
index 0000000..83ffadb
--- /dev/null
+++ b/src/im_format_raw.cpp
@@ -0,0 +1,270 @@
+/** \file
+ * \brief RAW File Format
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_raw.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_util.h"
+#include "im_format_raw.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+static const char* iRAWCompTable[1] = 
+{
+  "NONE"
+};
+
+class imFormatRAW: public imFormat
+{
+  imBinFile* handle;
+  int padding;
+
+  int iRawUpdateParam(int index);
+
+public:
+  imFormatRAW()
+    :imFormat("RAW", 
+              "RAW File Format", 
+              "*.*;", 
+              iRAWCompTable, 
+              1, 
+              1)
+    {}
+  ~imFormatRAW() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+imFormat* imFormatInitRAW(void)
+{
+  return new imFormatRAW();
+}
+
+int imFormatRAW::Open(const char* file_name)
+{
+  this->handle = imBinFileOpen(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  strcpy(this->compression, "NONE");
+
+  this->image_count = 0;
+  this->padding = 0;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAW::New(const char* file_name)
+{
+  this->handle = imBinFileNew(file_name);
+  if (this->handle == NULL)
+    return IM_ERR_OPEN;
+
+  this->padding = 0;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatRAW::Close()
+{
+  imBinFileClose(this->handle);
+}
+
+void* imFormatRAW::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+static int iCalcPad(int padding, int line_size)
+{
+  if (padding == 1)
+    return 0;
+
+  {
+    int rest = line_size % padding;
+    if (rest == 0)
+      return 0;
+
+    return padding - rest;
+  }
+}
+
+int imFormatRAW::iRawUpdateParam(int index)
+{
+  (void)index;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  // update image count
+  int* icount = (int*)attrib_table->Get("ImageCount");
+  if (icount)
+    this->image_count = *icount;
+  else
+    this->image_count = 1;
+
+  // update file byte order
+  int* byte_order = (int*)attrib_table->Get("ByteOrder");
+  if (byte_order)
+    imBinFileByteOrder(this->handle, *byte_order);
+
+  // position at start offset, the default is at 0
+  int* start_offset = (int*)attrib_table->Get("StartOffset");
+  if (!start_offset)
+    imBinFileSeekOffset(this->handle, 0);
+  else
+    imBinFileSeekOffset(this->handle, *start_offset);
+
+  if (imBinFileError(this->handle))
+    return IM_ERR_ACCESS;
+
+  int* stype = (int*)attrib_table->Get("SwitchType");
+  if (stype)
+    this->switch_type = *stype;
+
+  // The following attributes MUST exist
+  this->width = *(int*)attrib_table->Get("Width");
+  this->height = *(int*)attrib_table->Get("Height");
+  this->file_color_mode = *(int*)attrib_table->Get("ColorMode");
+  this->file_data_type = *(int*)attrib_table->Get("DataType");
+
+  int* pad = (int*)attrib_table->Get("Padding");
+  if (pad)
+  {
+    int line_size = imImageLineSize(this->width, this->file_color_mode, this->file_data_type);
+    if (this->switch_type && (this->file_data_type == IM_FLOAT || this->file_data_type == IM_CFLOAT))
+      line_size *= 2;
+    this->padding = iCalcPad(*pad, line_size);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAW::ReadImageInfo(int index)
+{
+  return iRawUpdateParam(index);
+}
+
+int imFormatRAW::WriteImageInfo()
+{
+  this->file_color_mode = this->user_color_mode;
+  this->file_data_type = this->user_data_type;
+
+  return iRawUpdateParam(this->image_count);
+}
+
+static int iFileDataTypeSize(int file_data_type, int switch_type)
+{
+  int type_size = imDataTypeSize(file_data_type);
+  if ((file_data_type == IM_FLOAT || file_data_type == IM_CFLOAT) && switch_type)
+    type_size *= 2;
+  return type_size;
+}
+
+int imFormatRAW::ReadImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+  int line_count = imImageLineCount(this->width, this->file_color_mode);
+  int type_size = iFileDataTypeSize(this->file_data_type, this->switch_type);
+
+  // treat complex as 2 real
+  if (this->file_data_type == IM_CFLOAT) 
+  {
+    type_size /= 2;
+    line_count *= 2;
+  }
+
+  imCounterTotal(this->counter, count, "Reading RAW...");
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    imBinFileRead(this->handle, (imbyte*)this->line_buffer, line_count, type_size);
+
+    if (imBinFileError(this->handle))
+      return IM_ERR_ACCESS;
+
+    imFileLineBufferRead(this, data, row, plane);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+
+    if (this->padding)
+      imBinFileSeekOffset(this->handle, this->padding);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatRAW::WriteImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+  int line_count = imImageLineCount(this->width, this->file_color_mode);
+  int type_size = iFileDataTypeSize(this->file_data_type, this->switch_type);
+
+  // treat complex as 2 real
+  if (this->file_data_type == IM_CFLOAT) 
+  {
+    type_size /= 2;
+    line_count *= 2;
+  }
+
+  imCounterTotal(this->counter, count, "Writing RAW...");
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    imFileLineBufferWrite(this, data, row, plane);
+
+    imBinFileWrite(this->handle, (imbyte*)this->line_buffer, line_count, type_size);
+
+    if (imBinFileError(this->handle))
+      return IM_ERR_ACCESS;
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+
+    if (this->padding)
+      imBinFileSeekOffset(this->handle, this->padding);
+  }
+
+  this->image_count++;
+  return IM_ERR_NONE;
+}
+
+int imFormatRAW::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  (void)data_type;
+
+  if (imColorSpace(color_mode) == IM_MAP)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
+
diff --git a/src/im_format_sgi.cpp b/src/im_format_sgi.cpp
new file mode 100644
index 0000000..1b0ac0a
--- /dev/null
+++ b/src/im_format_sgi.cpp
@@ -0,0 +1,607 @@
+/** \file
+ * \brief SGI - Silicon Graphics Image File Format
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_sgi.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_util.h"
+#include "im_format_all.h"
+#include "im_counter.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+
+/*  File Header Structure. */
+/*  2   Magic;          474 */
+/*  1   Storage;        0 ou 1 Compression */
+/*  1   BPC;            1 ou 2 Bytes Per Pixel Component */
+/*  2   Dimension;      1 ou 2 ou 3 */
+/*  2   XSize;          Width */
+/*  2   YSize;          Height */
+/*  2   ZSize;          Number of Channels. B/W=1, RGB=3, RGBA=4 */
+/*  4   PixMin;         Minimum Pixel Value */
+/*  4   PixMax;         Maximum Pixel Value */
+/*  4   Dummy1; */
+/*  80  ImageName;*/
+/*  4   ColorMap;       0 ou 1 ou 2 ou 3 */
+/*  404 Dummy2;*/
+/*  512  */
+
+#define SGI_ID  474
+
+/* Compression */
+#define SGI_VERBATIM 0
+#define SGI_RLE      1
+
+/*  ColorMap Ids */
+#define SGI_NORMAL    0
+#define SGI_DITHERED  1
+#define SGI_SCREEN    2
+#define SGI_COLORMAP  3
+
+template <class T> 
+static int iSGIDecodeScanLine(T *optr, const T *iptr, int width)
+{
+  T pixel;
+  int c = 0, count;
+
+  while (c < width)
+  {
+    pixel = *iptr++;
+
+    count = pixel & 0x7f;
+    if (!count)
+      break;
+
+    c += count;
+    if (c > width)
+      return IM_ERR_ACCESS;
+
+    if (pixel & 0x80)
+    {
+      while (count--)
+        *optr++ = *iptr++;
+    }
+    else
+    {
+      pixel = *iptr++;
+      while (count--)
+        *optr++ = pixel;
+    }
+  }
+
+  if (c < width)
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+template <class T> 
+static int iSGIEncodeScanLine(T *optr, const T *iptr, int width)
+{
+  const T *ibufend = iptr + width,
+          *sptr;
+  T *start_optr = optr;
+  int todo, cc, count;
+
+  while(iptr < ibufend) 
+  {
+    sptr = iptr;
+    iptr += 2;
+    while ((iptr < ibufend) && 
+           ((iptr[-2] != iptr[-1]) || (iptr[-1] != iptr[0])))
+      iptr++;
+    iptr -= 2;
+    count = iptr-sptr;
+
+    while (count) 
+    {
+      todo = (count > 126) ? 126: count;
+      count -= todo;
+      *optr++ = (T)(0x80 | todo);
+      while(todo--)
+        *optr++ = *sptr++;
+    }
+    sptr = iptr;
+    cc = *iptr++;
+
+    while((iptr < ibufend) && (*iptr == cc))
+      iptr++;
+    count = iptr-sptr;
+
+    while(count) 
+    {
+      todo = (count > 126)? 126: count;
+      count -= todo;
+      *optr++ = (T)todo;
+      *optr++ = (T)cc;
+    }
+  }
+  *optr++ = 0;
+
+  return optr-start_optr;
+}
+
+static const char* iSGICompTable[2] = 
+{
+  "NONE",
+  "RLE"
+};
+
+class imFormatSGI: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  unsigned char comp_type,    /* sgi compression information */
+                bpc;          /* bytes per channels */
+  unsigned int *starttab,    /* compression control buffer */
+               *lengthtab;   /* compression control buffer */
+
+public:
+  imFormatSGI()
+    :imFormat("SGI", 
+              "Silicon Graphics Image File Format", 
+              "*.rgb;*.rgba;*.bw;*.sgi;", 
+              iSGICompTable, 
+              2, 
+              0)
+    {}
+  ~imFormatSGI() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterSGI(void)
+{
+  imFormatRegister(new imFormatSGI());
+}
+
+int imFormatSGI::Open(const char* file_name)
+{
+  unsigned short word_value;
+
+  /* opens the binary file for reading with motorola byte order */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_BIGENDIAN); 
+
+  /* reads the SGI format identifier */
+  imBinFileRead(handle, &word_value, 1, 2);
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+
+  if (word_value != SGI_ID)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+
+  /* reads the compression information */
+  imBinFileRead(handle, &this->comp_type, 1, 1);
+  if (this->comp_type == SGI_RLE)
+    strcpy(this->compression, "RLE");
+  else if (this->comp_type == SGI_VERBATIM)
+    strcpy(this->compression, "NONE");
+  else
+  {
+    imBinFileClose(handle);
+    return IM_ERR_COMPRESS;
+  }
+
+  this->starttab = NULL;
+  this->lengthtab = NULL;
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatSGI::New(const char* file_name)
+{
+  /* opens the binary file for writing with motorola byte order */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_BIGENDIAN); 
+
+  this->starttab = NULL;
+  this->lengthtab = NULL;
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatSGI::Close()
+{
+  if (this->starttab) free(this->starttab);
+  if (this->lengthtab) free(this->lengthtab);
+  imBinFileClose(handle);
+}
+
+void* imFormatSGI::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatSGI::ReadImageInfo(int index)
+{
+  (void)index;
+  unsigned short word_value, dimension, depth;
+
+  /* reads the number of bits per channel */
+  imBinFileRead(handle, &this->bpc, 1, 1);
+
+  /* reads the number of dimensions */
+  imBinFileRead(handle, &dimension, 1, 2);
+
+  /* reads the image width */
+  imBinFileRead(handle, &word_value, 1, 2);
+  this->width = word_value;
+
+  /* reads the image height */
+  imBinFileRead(handle, &word_value, 1, 2);
+  this->height = word_value;
+
+  /* reads the number of channels */
+  imBinFileRead(handle, &depth, 1, 2);
+
+  /* jump 12 bytes (min, max, dummy) */
+  imBinFileSeekOffset(handle, 12);
+
+  /* reads the image name */
+  char image_name[80];
+  imBinFileRead(handle, image_name, 80, 1);
+
+  if (image_name[0] != 0)
+    AttribTable()->Set("Description", IM_BYTE, imStrNLen(image_name, 80)+1, image_name);
+
+  /* reads the color map information */
+  unsigned int color_map_id; 
+  imBinFileRead(handle, &color_map_id, 1, 4);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  this->file_data_type = IM_BYTE;
+  if (this->bpc == 2)
+    this->file_data_type = IM_USHORT;
+
+  switch (dimension)
+  {
+  case 1:
+    this->height = 1;
+    depth = 1;
+  case 2:
+    depth = 1;
+    break;
+  case 3:
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  switch (color_map_id)
+  {
+  case SGI_NORMAL:
+    switch(depth)
+    {
+    case 1:
+      this->file_color_mode = IM_GRAY;
+      break;
+    case 3:
+      this->file_color_mode = IM_RGB;
+      break;
+    case 4:
+      this->file_color_mode = IM_RGB | IM_ALPHA;
+      break;
+    default:
+      return IM_ERR_DATA;
+    }
+    break;
+  case SGI_DITHERED:
+    this->file_color_mode = IM_MAP;
+    break;
+  case SGI_COLORMAP:
+    this->file_color_mode = IM_RGB;
+    break;
+  case SGI_SCREEN:
+    this->file_color_mode = IM_GRAY;
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  /* jump 404 bytes (dummy) */
+  imBinFileSeekOffset(handle, 404);
+
+  if (this->comp_type == SGI_RLE)
+  {
+    int tablen = this->height * depth;
+    this->starttab = (unsigned int *)malloc(tablen * sizeof(int));
+    this->lengthtab = (unsigned int *)malloc(tablen * sizeof(int));
+
+    /* reads the compression control information */
+    imBinFileRead(handle, this->starttab, tablen, 4);
+    imBinFileRead(handle, this->lengthtab, tablen, 4);
+
+    // allocates more than enough since compression algoritm can be ineficient
+    this->line_buffer_extra = 2*imImageLineSize(this->width, this->file_color_mode, this->file_data_type);
+  }
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (color_map_id == SGI_DITHERED)
+  {
+    static int red[8] = {0, 36, 73, 109, 146, 182, 218, 255};
+    static int green[8] = {0, 36, 73, 109, 146, 182, 218, 255};
+    static int blue[4] = {0, 85, 170, 255};
+
+    int c = 0;
+    for (int b = 0; b < 4; b++)
+    {
+      for (int g = 0; g < 8; g++)
+      {
+        for (int r = 0; r < 8; r++)
+        {
+          this->palette[c] = imColorEncode((imbyte)red[r], 
+                                           (imbyte)green[g], 
+                                           (imbyte)blue[b]);
+          c++;
+        }
+      }
+    }
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatSGI::WriteImageInfo()
+{
+  unsigned int dword_value;
+  unsigned short word_value;
+  unsigned char dummy[404];
+  memset(dummy, 0, 404);
+
+  this->comp_type = SGI_VERBATIM;
+  if (imStrEqual(this->compression, "RLE"))
+    this->comp_type = SGI_RLE;
+
+  unsigned int color_map_id = SGI_NORMAL;
+
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  int dimension = 2;
+  if (this->file_color_mode == IM_BINARY)
+    this->convert_bpp = -1; // expand 1 to 255
+  else if (this->file_color_mode == IM_RGB)
+  {
+    dimension = 3;
+    if (imColorModeHasAlpha(this->user_color_mode))
+      this->file_color_mode |= IM_ALPHA;
+  }
+
+  this->file_data_type = this->user_data_type;
+
+  this->bpc = 1;
+  int max = 255;
+  if (this->file_data_type == IM_USHORT)
+  {
+    max = 65535;
+    this->bpc = 2;
+  }
+
+  this->starttab = NULL;
+  this->lengthtab = NULL;
+
+  /* writes the SGI file header */
+  word_value = SGI_ID;
+  imBinFileWrite(handle, &word_value, 1, 2); /* identifier */
+  imBinFileWrite(handle, &this->comp_type, 1, 1); /* storage */
+  imBinFileWrite(handle, &this->bpc, 1, 1); /* bpc */
+  word_value = (imushort)dimension;
+  imBinFileWrite(handle, &word_value, 1, 2); /* dimension */
+  word_value = (unsigned short)this->width;
+  imBinFileWrite(handle, &word_value, 1, 2); /* image width */
+  word_value = (unsigned short)this->height;
+  imBinFileWrite(handle, &word_value, 1, 2); /* image height */
+  word_value = (imushort)imColorModeDepth(this->file_color_mode);
+  imBinFileWrite(handle, &word_value, 1, 2); /* depth */
+  dword_value = 0;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* min */
+  dword_value = max;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* max */
+  imBinFileWrite(handle, dummy, 4, 1); /* dummy */
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  int size;
+  char* image_name = (char*)AttribTable()->Get("Description", NULL, &size);
+  if (image_name)
+  {
+    if (size < 80)
+    {
+      imBinFileWrite(handle, image_name, size, 1); 
+      imBinFileWrite(handle, dummy, 80-size, 1); 
+    }
+    else
+    {
+      imBinFileWrite(handle, image_name, 79, 1); 
+      imBinFileWrite(handle, (void*)"\0", 1, 1); 
+    }
+  }
+  else
+    imBinFileWrite(handle, dummy, 80, 1); /* empty image name */
+
+  dword_value = color_map_id;
+  imBinFileWrite(handle, &dword_value, 1, 4); /* color_map_id */
+  imBinFileWrite(handle, dummy, 404, 1); /* dummy */
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->comp_type == SGI_RLE)
+  {
+    int tablen = this->height * imColorModeDepth(this->file_color_mode);
+    this->starttab = (unsigned int *)malloc(tablen*4);
+    this->lengthtab = (unsigned int *)malloc(tablen*4);
+
+    /* writes the empty compression control information */
+    /* we will write again at the end */
+    imBinFileWrite(handle, this->starttab, tablen*4, 1);
+    imBinFileWrite(handle, this->lengthtab, tablen*4, 1);
+
+    // allocates more than enough since compression algoritm can be ineficient
+    this->line_buffer_extra = 2*imImageLineSize(this->width, this->file_color_mode, this->file_data_type);
+  }
+
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatSGI::ReadImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+
+  imCounterTotal(this->counter, count, "Reading SGI...");
+
+  imbyte* compressed_buffer = NULL;
+  if (this->comp_type == SGI_RLE)  // point to the extra buffer
+    compressed_buffer = (imbyte*)this->line_buffer + this->line_buffer_size;
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    if (this->comp_type == SGI_VERBATIM)
+    {
+      imBinFileRead(handle, this->line_buffer, this->line_buffer_size/this->bpc, this->bpc);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+    }
+    else
+    {
+      int row_index = row + plane*this->height;
+      imBinFileSeekTo(handle, this->starttab[row_index]);
+      imBinFileRead(handle, compressed_buffer, this->lengthtab[row_index] / this->bpc, this->bpc);
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+
+      if (this->bpc == 1)
+        iSGIDecodeScanLine((imbyte*)this->line_buffer, compressed_buffer, this->width);
+      else
+        iSGIDecodeScanLine((imushort*)this->line_buffer, (imushort*)compressed_buffer, this->width);
+    }
+
+    imFileLineBufferRead(this, data, row, plane);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatSGI::WriteImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+
+  imCounterTotal(this->counter, count, "Writing SGI...");
+
+  imbyte* compressed_buffer = NULL;
+  if (this->comp_type == SGI_RLE)  // point to the extra buffer
+    compressed_buffer = (imbyte*)this->line_buffer + this->line_buffer_size;
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    imFileLineBufferWrite(this, data, row, plane);
+
+    if (this->comp_type == SGI_VERBATIM)
+      imBinFileWrite(handle, this->line_buffer, this->line_buffer_size/this->bpc, this->bpc);
+    else
+    {
+      int length;
+      if (this->bpc == 1)
+        length = iSGIEncodeScanLine(compressed_buffer, (imbyte*)this->line_buffer, this->width);
+      else
+        length = iSGIEncodeScanLine((imushort*)compressed_buffer, (imushort*)this->line_buffer, this->width);
+
+      int row_index = row + plane*this->height;
+      this->starttab[row_index] = imBinFileTell(handle);
+      this->lengthtab[row_index] = length*this->bpc;
+
+      imBinFileWrite(handle, compressed_buffer, length, this->bpc);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  if (this->comp_type == SGI_RLE)
+  {
+    imBinFileSeekTo(this->handle, 512);
+    int tablen = this->height * imColorModeDepth(this->file_color_mode);
+    imBinFileWrite(handle, this->starttab, tablen, 4);
+    imBinFileWrite(handle, this->lengthtab, tablen, 4);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatSGI::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK || color_space == IM_MAP)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE && data_type != IM_USHORT)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE") && !imStrEqual(compression, "RLE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_tga.cpp b/src/im_format_tga.cpp
new file mode 100644
index 0000000..8e8e176
--- /dev/null
+++ b/src/im_format_tga.cpp
@@ -0,0 +1,1104 @@
+/** \file
+ * \brief TGA - Truevision Graphics Adapter File
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_tga.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_util.h"
+#include "im_format_all.h"
+#include "im_counter.h"
+#include "im_math.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+#include <time.h>
+#include <math.h>
+
+
+/*
+|--------|--------|------------------------------------------------------------|
+|    0   |     1  |  Number of Characters in Identification Field.             |
+|        |        |  This field is a one-byte unsigned integer, specifying     |
+|        |        |  the length of the Image Identification Field.  Its range  |
+|        |        |  is 0 to 255.  A value of 0 means that no Image            |
+|        |        |  Identification Field is included.                         |
+|--------|--------|------------------------------------------------------------|
+|    1   |     1  |  Color Map Type.                                           |
+|--------|--------|------------------------------------------------------------|
+|    2   |     1  |  Image Type Code.                                          |
+|--------|--------|------------------------------------------------------------|
+|    3   |     5  |  Color Map Specification.                                  |
+|    3   |     2  |  Color Map Origin.                                         |
+|        |        |  Integer ( lo-hi ) index of first color map entry.         |
+|    5   |     2  |  Color Map Length.                                         |
+|        |        |  Integer ( lo-hi ) count of color map entries.             |
+|    7   |     1  |  Color Map Entry Size.                                     |
+|        |        |  Number of bits in each color map entry.  16 for           |
+|        |        |  the Targa 16, 24 for the Targa 24, 32 for the Targa 32.   |
+|--------|--------|------------------------------------------------------------|
+|    8   |    10  |  Image Specification.                                      |
+|    8   |     2  |  X Origin of Image.                                        |
+|        |        |  Integer ( lo-hi ) X coordinate of the lower left corner   |
+|        |        |  of the image.                                             |
+|   10   |     2  |  Y Origin of Image.                                        |
+|        |        |  Integer ( lo-hi ) Y coordinate of the lower left corner   |
+|        |        |  of the image.                                             |
+|   12   |     2  |  Width of Image.                                           |
+|        |        |  Integer ( lo-hi ) width of the image in pixels.           |
+|   14   |     2  |  Height of Image.                                          |
+|        |        |  Integer ( lo-hi ) height of the image in pixels.          |
+|   16   |     1  |  Image Pixel Size.                                         |
+|        |        |  Number of bits in a stored pixel index.                   |
+|   17   |     1  |  Image Descriptor Byte.                                    |
+|        |        |  Bits 3-0 - number of attribute bits associated with each  |
+|        |        |             pixel.                                         |
+|        |        |  Bit 4    - reserved.  Must be set to 0.                   |
+|        |        |  Bit 5    - screen origin bit.                             |
+|        |        |             0 = Origin in lower left-hand corner.          |
+|        |        |             1 = Origin in upper left-hand corner.          |
+|        |        |             Must be 0 for Truevision images.               |
+|        |        |  Bits 7-6 - Data storage interleaving flag.                |
+|        |        |             00 = non-interleaved.                          |
+|        |        |             01 = two-way (even/odd) interleaving.          |
+|        |        |             10 = four way interleaving.                    |
+|        |        |             11 = reserved.                                 |
+|        |        |  This entire byte should be set to 0.  Don't ask me.       |
+|--------|--------|------------------------------------------------------------|
+|   18   | varies |  Image Identification Field.                               |
+|        |        |  Contains a free-form identification field of the length   |
+|        |        |  specified in byte 1 of the image record.  It's usually    |
+|        |        |  omitted ( length in byte 1 = 0 ), but can be up to 255    |
+|        |        |  characters.  If more identification information is        |
+|        |        |  required, it can be stored after the image data.          |
+|--------|--------|------------------------------------------------------------|
+
+Extension Area:
+
+* The inclusion of a scaled-down �postage stamp� copy of the image
+* Date and Time of image file creation
+* Author Name
+* Author Comments
+* Job Name
+* Job Accumulated Time
+* Gamma Value
+* Correct Color LUT
+* Pixel Aspect Ratio
+* Scan Line Offset Table
+* Key Color
+* Software Package Name and Version Number
+* Developer Definable Areas
+* Attribute (Alpha) channel Type
+* The ability for simple expansion
+*/
+
+static int iTGADecodeScanLine(imBinFile* handle, imbyte *DecodedBuffer, int width, int pixel_size)
+{
+  int i=0;
+  unsigned char runcount; /* repetition count field */
+  imbyte pixel_buffer[4];
+  
+  while (i < width) 
+  { 
+    imBinFileRead(handle, &runcount, 1, 1);  
+
+    if (runcount & 0x80)
+    { 
+      imBinFileRead(handle, pixel_buffer, pixel_size, 1); 
+      runcount &= 0x7F;
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+
+      runcount++;      
+      while (runcount-- && i < width)
+      {
+        memcpy(DecodedBuffer, pixel_buffer, pixel_size);
+        i++;
+        DecodedBuffer += pixel_size;
+      }
+    } 
+    else
+    { 
+      runcount++;
+      while (runcount-- && i < width)
+      {
+        imBinFileRead(handle, pixel_buffer, pixel_size, 1);
+        memcpy(DecodedBuffer, pixel_buffer, pixel_size);
+        i++;
+        DecodedBuffer += pixel_size;
+      }
+
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+    }
+  }
+
+  return IM_ERR_NONE;
+}
+
+static inline int iTGAEqualPixel(const imbyte* Buffer1, const imbyte* Buffer2, int pixel_size)
+{
+  while(pixel_size--)
+  {
+    if (*Buffer1++ != *Buffer2++)
+      return 0;
+  }
+  return 1;
+}
+
+static int iTGAEncodeScanLine(imbyte* EncodedBuffer, const imbyte* DecodedBuffer, int width, int pixel_size)
+{
+  imbyte pixel_buffer[4];
+  unsigned char runcount; /* Length of encoded pixel run          */
+  int x = 0;              /* Index into uncompressed data buffer  */
+  imbyte* StartBuffer = EncodedBuffer;
+
+  while (x < width)
+  {
+    runcount = 1;
+    memcpy(pixel_buffer, &DecodedBuffer[x*pixel_size], pixel_size);
+
+    // count equal pixels
+    while (x+runcount < width && runcount < 128 && 
+           iTGAEqualPixel(pixel_buffer, &DecodedBuffer[(x+runcount)*pixel_size], pixel_size))
+      runcount++; 
+
+    if (runcount == 1)
+    {
+      // count different pixels
+      while (x+runcount+1 < width && runcount < 128)
+      {
+        memcpy(pixel_buffer, &DecodedBuffer[(x+runcount)*pixel_size], pixel_size);
+
+        if (!iTGAEqualPixel(pixel_buffer, &DecodedBuffer[(x+runcount+1)*pixel_size], pixel_size))
+          runcount++; 
+        else
+          break;
+      }
+
+      *EncodedBuffer++ = (imbyte)(runcount-1);
+
+      memcpy(EncodedBuffer, &DecodedBuffer[x*pixel_size], runcount*pixel_size);
+      EncodedBuffer += runcount*pixel_size;
+    }
+    else
+    {
+      *EncodedBuffer++ = (imbyte)(0x80 | (runcount-1));
+
+      memcpy(EncodedBuffer, pixel_buffer, pixel_size);
+      EncodedBuffer += pixel_size;
+    }
+
+    x += runcount;
+  }
+
+  return EncodedBuffer-StartBuffer;      /* Return the number of unsigned chars written to buffer */
+}
+
+static const char* iTGACompTable[2] = 
+{
+  "NONE",
+  "RLE"
+};
+
+class imFormatTGA: public imFormat
+{
+  imBinFile* handle;          /* the binary file handle */
+  unsigned char id_lenght;
+  unsigned char map_type, image_type, map_bpp, bpp;
+
+  int ReadPalette();
+  int WritePalette();
+  void FixRGB();
+  int LoadExtensionArea();
+  int SaveExtensionArea();
+
+public:
+  imFormatTGA()
+    :imFormat("TGA", 
+              "Truevision Graphics Adapter File", 
+              "*.tga;*.icb;*.vst;*.tpic;", 
+              iTGACompTable, 
+              2, 
+              0)
+    {}
+  ~imFormatTGA() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterTGA(void)
+{
+  imFormatRegister(new imFormatTGA());
+}
+
+int imFormatTGA::Open(const char* file_name)
+{
+  /* opens the binary file for reading with intel byte order */
+  handle = imBinFileOpen(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+  
+  imBinFileRead(handle, &this->id_lenght, 1, 1);
+  imBinFileRead(handle, &this->map_type, 1, 1);
+  imBinFileRead(handle, &this->image_type, 1, 1);
+
+  if (imBinFileError(handle))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_ACCESS;
+  }
+  
+  if (this->image_type != 1 && this->image_type != 2 && this->image_type != 3 && 
+      this->image_type != 9 && this->image_type != 10 && this->image_type != 11)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+  
+  if (this->map_type != 0 && this->map_type != 1)
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+  
+  if (this->map_type == 0 && (this->image_type == 1 || this->image_type == 9))
+  {
+    imBinFileClose(handle);
+    return IM_ERR_FORMAT;
+  }
+  
+  if (this->image_type == 9 || this->image_type == 10 || this->image_type == 11)
+    strcpy(this->compression, "RLE");
+  else
+    strcpy(this->compression, "NONE");
+
+  this->image_count = 1;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatTGA::New(const char* file_name)
+{
+  /* opens the binary file for writing with intel byte order */
+  handle = imBinFileNew(file_name);
+  if (!handle)
+    return IM_ERR_OPEN;
+
+  imBinFileByteOrder(handle, IM_LITTLEENDIAN); 
+
+  return IM_ERR_NONE;
+}
+
+void imFormatTGA::Close()
+{
+  imBinFileClose(handle);
+}
+
+void* imFormatTGA::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->handle;
+  else
+    return NULL;
+}
+
+int imFormatTGA::ReadImageInfo(int index)
+{
+  (void)index;
+  unsigned char byte_value;
+  unsigned short word_value;
+
+  this->file_data_type = IM_BYTE;
+  
+  if (this->image_type == 1 || this->image_type == 9)
+    this->file_color_mode = IM_MAP;
+  else if (this->image_type == 2 || this->image_type == 10)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+  }
+  else if (this->image_type == 3 || this->image_type == 11)
+    this->file_color_mode = IM_GRAY;
+  else
+    return IM_ERR_DATA;
+
+  if (this->map_type == 0)
+    imBinFileSeekOffset(handle, 5);  // jump color map information
+  else
+  {
+    /* jump 2 bytes (first entry index) */
+    imBinFileSeekOffset(handle, 2);
+    
+    imBinFileRead(handle, &word_value, 1, 2);
+    this->palette_count = word_value;
+    
+    imBinFileRead(handle, &this->map_bpp, 1, 1);
+
+    if (this->map_bpp == 15) this->map_bpp = 16;
+
+	  if (this->map_bpp != 16 && this->map_bpp != 24 && this->map_bpp != 32)
+      return IM_ERR_DATA;
+  }
+  
+  /* jump 4 bytes (X-Origin, Y-Origin) */
+  unsigned short xmin, ymin;
+  imBinFileRead(handle, &xmin, 1, 2);
+  imBinFileRead(handle, &ymin, 1, 2);
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  if (xmin && ymin)
+  {
+    attrib_table->Set("XScreen", IM_USHORT, 1, &xmin);
+    attrib_table->Set("YScreen", IM_USHORT, 1, &ymin);
+  }
+  
+  /* reads the image width */
+  imBinFileRead(handle, &word_value, 1, 2);
+  this->width = word_value;
+  
+  /* reads the image height */
+  imBinFileRead(handle, &word_value, 1, 2);
+  this->height = word_value;
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+  
+  imBinFileRead(handle, &this->bpp, 1, 1);
+  
+  if (this->bpp > 8 && imColorModeSpace(this->file_color_mode) != IM_RGB)
+    return IM_ERR_DATA;
+  
+  if (this->bpp == 15) this->bpp = 16;
+
+	if (this->bpp != 8 && this->bpp != 16 && 
+      this->bpp != 24 && this->bpp != 32)
+    return IM_ERR_DATA;
+
+  if (this->bpp == 32)
+    this->file_color_mode |= IM_ALPHA;
+
+  // image descriptor
+  imBinFileRead(handle, &byte_value, 1, 1);
+  
+  if (byte_value & 0x20)
+    this->file_color_mode |= IM_TOPDOWN;
+  
+  // image ID
+  if (this->id_lenght)
+  {
+    char desc[256];
+    imBinFileRead(handle, desc, this->id_lenght, 1);
+    desc[this->id_lenght] = 0;
+    attrib_table->Set("Title", IM_BYTE, this->id_lenght+1, desc);
+  }
+  
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+  
+  if (this->map_type)
+  {
+    if (!ReadPalette())
+      return IM_ERR_ACCESS;
+  }
+
+  long cur_offset = imBinFileTell(handle);
+  imBinFileSeekFrom(handle, -18);  
+  char ext_sig[18];
+  imBinFileRead(handle, ext_sig, 18, 1);
+  if (ext_sig[17] == 0 && imStrEqual(ext_sig, "TRUEVISION-XFILE."))
+  {
+    if (!LoadExtensionArea())
+      return IM_ERR_ACCESS;
+  }
+  imBinFileSeekTo(handle, cur_offset);  
+  
+  return IM_ERR_NONE;
+}
+
+int imFormatTGA::WriteImageInfo()
+{
+  unsigned char byte_value;
+  unsigned short word_value;
+
+  this->map_bpp = 0;
+  this->map_type = 0;
+
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+  switch (this->file_color_mode)
+  {
+  case IM_BINARY:
+    this->convert_bpp = -1; // expand 1 to 255
+  case IM_GRAY:
+    this->bpp = 8;
+    if (imStrEqual(this->compression, "RLE"))
+      this->image_type = 11;
+    else
+      this->image_type = 3;
+    break;
+  case IM_MAP:
+    this->bpp = 8;
+    this->map_bpp = 24;
+    this->map_type = 1;
+    if (imStrEqual(this->compression, "RLE"))
+      this->image_type = 9;
+    else
+      this->image_type = 1;
+    break;
+  case IM_RGB:
+    this->bpp = 24;
+    this->file_color_mode |= IM_PACKED;
+    if (imStrEqual(this->compression, "RLE"))
+      this->image_type = 10;
+    else
+      this->image_type = 2;
+    break;
+  }
+
+  if (this->image_type > 3)
+  {
+    // allocates more than enough since compression algoritm can be ineficient
+    this->line_buffer_extra += 2*this->width*imColorModeDepth(this->file_color_mode);
+  }
+  
+  imAttribTable* attrib_table = AttribTable();
+
+  /* writes the TGA file header */
+
+  int length = 0;
+  const char* desc_attrib = (const char*)attrib_table->Get("Title", NULL, &length);
+  if (desc)
+  {
+    if (length > 255)
+      this->id_lenght = 255;
+    else
+      this->id_lenght = (imbyte)length;
+  }
+  else
+    this->id_lenght = 0;
+  
+  /* IDLength */
+  imBinFileWrite(handle, &this->id_lenght, 1, 1); 
+  
+  /* Color Map Type */
+  imBinFileWrite(handle, &this->map_type, 1, 1);
+  
+  /* Image Type */
+  imBinFileWrite(handle, &this->image_type, 1, 1);
+  
+  /* Color Map Specification - 1st entry index */
+  word_value = 0;
+  imBinFileWrite(handle, &word_value, 1, 2); 
+  
+  /* Color map length */
+  word_value = (unsigned short) this->palette_count;
+  imBinFileWrite(handle, &word_value, 1, 2); 
+  
+  /* Color Map Entry size */
+  byte_value = this->map_type? this->map_bpp: (imbyte)0;
+  imBinFileWrite(handle, &byte_value, 1, 1); 
+
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+  
+  unsigned short xmin = 0, ymin = 0;
+  const void* attrib_data = attrib_table->Get("XScreen");
+  if (attrib_data) xmin = *(unsigned short*)attrib_data;
+  attrib_data = attrib_table->Get("YScreen");
+  if (attrib_data) ymin = *(unsigned short*)attrib_data;
+
+  /* X-orign of image */
+  word_value = xmin;
+  imBinFileWrite(handle, &word_value, 1, 2); 
+  
+  /* Y-orign of image */
+  word_value = ymin;
+  imBinFileWrite(handle, &word_value, 1, 2); 
+  
+  /* Image Width */
+  word_value = (imushort)this->width;
+  imBinFileWrite(handle, &word_value, 1, 2); 
+  
+  /* Image Height */
+  word_value = (imushort)this->height;
+  imBinFileWrite(handle, &word_value, 1, 2); 
+  
+  /* Pixel Depth */
+  imBinFileWrite(handle, &this->bpp, 1, 1);  
+  
+  /* Image Descriptor */
+  byte_value = 0x00;
+  imBinFileWrite(handle, &byte_value, 1, 1);  
+  
+  /* image ID */
+  if (this->id_lenght)
+  {
+    if (length > 255)
+    {
+      imBinFileWrite(handle, (void*)desc_attrib, 254, 1);  
+      byte_value = 0x00;
+      imBinFileWrite(handle, &byte_value, 1, 1);  
+    }
+    else
+      imBinFileWrite(handle, (void*)desc_attrib, this->id_lenght, 1);  
+  }
+  
+  /* tests if everything was ok */
+  if (imBinFileError(handle))
+    return IM_ERR_ACCESS;
+
+  if (this->map_type)
+  {
+    if (!WritePalette())
+      return IM_ERR_ACCESS;
+  }
+  
+  return IM_ERR_NONE;
+}
+
+static long iTGARGB2Color(int c, unsigned char *colors, int map_bpp)
+{
+  unsigned char r,g,b;
+  
+  if (map_bpp == 16)
+  {
+    unsigned short word_value = ((unsigned short*)colors)[c];
+    
+    r = (imbyte)(((word_value & 0x7C00) >> 10)*8);
+    g = (imbyte)(((word_value & 0x03E0) >>  5)*8);
+    b = (imbyte)( (word_value & 0x001F)       *8);
+  }
+  else // 24 or 32
+  {
+    int i = c * (map_bpp / 8);
+    
+    r = colors[i+2];
+    g = colors[i+1];
+    b = colors[i];
+  }
+  
+  return imColorEncode(r, g, b);
+}
+
+int imFormatTGA::ReadPalette()
+{
+  int map_size = imFileLineSizeAligned(this->palette_count, this->map_bpp, 1);
+  unsigned char* tga_colors = (unsigned char*) malloc(map_size);
+  
+  /* reads the color palette */
+  imBinFileRead(handle, tga_colors, map_size, 1);
+  if (imBinFileError(handle))
+    return 0;
+
+  if (imBinCPUByteOrder() == IM_BIGENDIAN && this->map_bpp == 16)
+	  imBinSwapBytes2(tga_colors, map_size/2);
+  
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+    this->palette[c] = iTGARGB2Color(c, tga_colors, this->map_bpp);
+  
+  free(tga_colors);
+  
+  return 1;
+}
+
+int imFormatTGA::WritePalette()
+{
+  unsigned char tga_color[256*3];
+  
+  /* convert the color map from the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = 3*c;
+    imColorDecode(&tga_color[i+2], &tga_color[i+1], &tga_color[i], this->palette[c]);
+  }
+  
+  /* writes the color palette */
+  imBinFileWrite(handle, tga_color, this->palette_count * 3, 1);
+
+  if (imBinFileError(handle))
+    return 0;
+  
+  return 1;
+}
+
+int imFormatTGA::LoadExtensionArea()
+{
+  unsigned int dword_value;
+  imBinFileSeekFrom(handle, -26);  
+
+  // extension offset
+  imBinFileRead(handle, &dword_value, 1, 4);
+  if (imBinFileError(handle))
+    return 0;
+
+  imBinFileSeekTo(handle, dword_value);  
+  if (imBinFileError(handle))
+    return 0;
+
+  unsigned short word_value;
+  imbyte buffer[512];
+  imAttribTable* attrib_table = AttribTable();
+
+  // extension size
+  imBinFileSeekOffset(handle, 2);  
+
+  // author name
+  imBinFileRead(handle, buffer, 41, 1);
+  if (buffer[0] != 0)
+    attrib_table->Set("Author", IM_BYTE, imStrNLen((char*)buffer, 41)+1, buffer);
+
+  // author comments
+  imBinFileRead(handle, buffer, 324, 1);
+  if (buffer[0] != 0)
+  {
+    int size1 = imStrNLen((char*)buffer, 81);
+    for (int i = 1; i < 4; i++)
+    {
+      int sizei = imStrNLen((char*)buffer + i*81, 81);
+      if (sizei) 
+      {
+        memcpy(buffer + size1, buffer + i*81, sizei);
+        size1 += sizei;
+      }
+    }
+    buffer[size1] = 0;
+
+    attrib_table->Set("Description", IM_BYTE, size1+1, buffer);
+  }
+
+  if (imBinFileError(handle))
+    return 0;
+
+  {
+    tm ttm;
+    ttm.tm_wday = 0;
+    ttm.tm_yday = 0;
+    ttm.tm_isdst = -1;
+
+    int valid = 0;
+    imBinFileRead(handle, &word_value, 1, 2); // moth
+    ttm.tm_mon = word_value-1;
+    if (word_value) valid = 1;
+    imBinFileRead(handle, &word_value, 1, 2); // day
+    ttm.tm_mday = word_value;
+    if (word_value) valid = 1;
+    imBinFileRead(handle, &word_value, 1, 2); // year
+    ttm.tm_year = word_value-1900;
+    if (word_value) valid = 1;
+    imBinFileRead(handle, &word_value, 1, 2); // hour
+    ttm.tm_hour = word_value;
+    imBinFileRead(handle, &word_value, 1, 2); // minute
+    ttm.tm_min = word_value;
+    imBinFileRead(handle, &word_value, 1, 2); // seconds
+    ttm.tm_sec = word_value;
+
+    if (imBinFileError(handle))
+      return 0;
+
+    if (valid)
+    {
+      time_t tt = mktime(&ttm);
+      char* str = ctime(&tt);
+      if (str) 
+      {
+        int size = strlen(str);
+        str[size-1] = 0;   // remove "\n"
+        attrib_table->Set("DateTimeModified", IM_BYTE, size, str);
+      }
+    }
+  }
+
+  // job name
+  imBinFileRead(handle, buffer, 41, 1);
+  if (buffer[0] != 0)
+    attrib_table->Set("JobName", IM_BYTE, imStrNLen((char*)buffer, 41)+1, buffer);
+
+  // job time
+  imBinFileSeekOffset(handle, 6);  
+
+  // Software
+  imBinFileRead(handle, buffer, 41, 1);
+  if (buffer[0] != 0)
+    attrib_table->Set("Software", IM_BYTE, imStrNLen((char*)buffer, 41)+1, buffer);
+
+  if (imBinFileError(handle))
+    return 0;
+
+  // Software Version
+  imBinFileRead(handle, &word_value, 1, 2);
+  if (word_value)
+  {
+    int size = sprintf((char*)buffer, "%f", (double)word_value / 100.0);
+    imBinFileRead(handle, &buffer[size], 1, 1);
+    buffer[size+1] = 0;
+    attrib_table->Set("SoftwareVersion", IM_BYTE, size+1, buffer);
+  }
+
+  // key color, aspect ratio
+  imBinFileSeekOffset(handle, 8); 
+
+  // gamma
+  imBinFileRead(handle, &word_value, 1, 2); // num
+  if (word_value)
+  {
+    float gamma = (float)word_value;
+    imBinFileRead(handle, &word_value, 1, 2); // den
+    if (word_value)
+    {
+      gamma /= (float)word_value;
+      attrib_table->Set("Gamma", IM_FLOAT, 1, &gamma);
+    }
+  }
+
+  if (imBinFileError(handle))
+    return 0;
+
+  return 1;
+}
+
+static void iGetRational(float fvalue, int *num, int *den)
+{
+  if (floorf(fvalue) == fvalue)
+  {
+    *num = (int)floorf(fvalue);
+    *den = 1;
+    return;
+  }
+
+  float ivalue = 1.0f/fvalue;
+  if (floorf(ivalue) == ivalue)
+  {
+    *den = (int)floorf(ivalue);
+    *num = 1;
+    return;
+  }
+
+	*den = 1;
+	if (fvalue > 0) 
+  {
+		while (fvalue < 1L<<(31-3) && *den < 1L<<(31-3))
+    {
+			fvalue *= 1<<3;
+      *den *= 1<<3;
+    }
+	}
+
+	*num = imRound(fvalue);
+}
+
+int imFormatTGA::SaveExtensionArea()
+{
+  unsigned int dword_value;
+  unsigned short word_value;
+
+  // get offset before write
+  long ext_offset = imBinFileTell(handle);
+
+  imbyte buffer[512];
+  memset(buffer, 0, 512);
+
+  imAttribTable* attrib_table = AttribTable();
+
+  // extension size
+  word_value = 495;
+  imBinFileWrite(handle, &word_value, 1, 2);
+
+  // author name
+  int attrib_size;
+  const void* attrib_data = attrib_table->Get("Author", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    int size = attrib_size > 41? 40: attrib_size;
+    imBinFileWrite(handle, (void*)attrib_data, size, 1);
+    if (size < 41)
+      imBinFileWrite(handle, buffer, 41-size, 1);
+  }
+  else
+    imBinFileWrite(handle, buffer, 41, 1);
+
+  // author comments
+  attrib_data = attrib_table->Get("Description", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    int size = 0, size2 = 0, i = 0;
+    while(attrib_size && i < 4)
+    {
+      int line_size;
+      if (attrib_size > 81)
+        line_size = 80;
+      else
+        line_size = attrib_size;
+
+      memcpy(buffer + size, (imbyte*)attrib_data + size2, line_size);
+
+      attrib_size -= line_size;
+      size2 += line_size;
+      size += line_size;
+      i++;
+
+      int remain = 81-line_size;
+      if (remain)
+      {
+        memset(buffer + size, 0, remain);
+        size += remain;
+      }
+    }
+
+    imBinFileWrite(handle, buffer, 324, 1);
+    memset(buffer, 0, 512);
+  }
+  else
+    imBinFileWrite(handle, buffer, 324, 1);
+
+  if (imBinFileError(handle))
+    return 0;
+
+  attrib_data = attrib_table->Get("DateTimeModified");
+  if (attrib_data)
+  {
+    time_t cur_time;
+    time(&cur_time);
+    tm* ttm = localtime(&cur_time);
+
+    word_value = (imushort)ttm->tm_mon+1;
+    imBinFileWrite(handle, &word_value, 1, 2); // moth
+    word_value = (imushort)ttm->tm_mday;
+    imBinFileWrite(handle, &word_value, 1, 2); // day
+    word_value = (imushort)ttm->tm_year+1900;
+    imBinFileWrite(handle, &word_value, 1, 2); // year
+    word_value = (imushort)ttm->tm_hour;
+    imBinFileWrite(handle, &word_value, 1, 2); // hour
+    word_value = (imushort)ttm->tm_min;
+    imBinFileWrite(handle, &word_value, 1, 2); // minute
+    word_value = (imushort)ttm->tm_sec;
+    imBinFileWrite(handle, &word_value, 1, 2); // seconds
+
+    if (imBinFileError(handle))
+      return 0;
+  }
+  else
+    imBinFileWrite(handle, buffer, 12, 1);
+
+  // job name
+  attrib_data = attrib_table->Get("JobName", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    int size = attrib_size > 41? 40: attrib_size;
+    imBinFileWrite(handle, (void*)attrib_data, size, 1);
+    if (size < 41)
+      imBinFileWrite(handle, buffer, 41-size, 1);
+  }
+  else
+    imBinFileWrite(handle, buffer, 41, 1);
+
+  // job time
+  imBinFileWrite(handle, buffer, 6, 1);
+
+  // Software
+  attrib_data = attrib_table->Get("Software", NULL, &attrib_size);
+  if (attrib_data)
+  {
+    int size = attrib_size > 41? 40: attrib_size;
+    imBinFileWrite(handle, (void*)attrib_data, size, 1);
+    if (size < 41)
+      imBinFileWrite(handle, buffer, 41-size, 1);
+  }
+  else
+    imBinFileWrite(handle, buffer, 41, 1);
+
+  if (imBinFileError(handle))
+    return 0;
+
+  // Software Version, key color, aspect ratio
+  imBinFileWrite(handle, buffer, 11, 1);
+
+  // gamma
+  attrib_data = attrib_table->Get("Gamma");
+  if (attrib_data)
+  {
+    float gamma = *(float*)attrib_data;
+
+    int num, den;
+    iGetRational(gamma, &num, &den);
+
+    word_value = (imushort)num;
+    imBinFileWrite(handle, &word_value, 1, 2); // num
+    word_value = (imushort)den;
+    imBinFileWrite(handle, &word_value, 1, 2); // den
+  }
+  else
+    imBinFileWrite(handle, buffer, 4, 1);
+
+  // Color Correction, Postage Stamp, Scanline Offset, Attributes Type
+  imBinFileWrite(handle, buffer, 13, 1);
+
+  // FOOTER
+
+  // extension offset
+  dword_value = ext_offset;
+  imBinFileWrite(handle, &dword_value, 1, 4);
+
+  // Developer Directory Offset
+  imBinFileWrite(handle, buffer, 4, 1);
+
+  // signature, reserved, zero string terminator
+  imBinFileWrite(handle, (void*)"TRUEVISION-XFILE.\0", 18, 1);
+
+  if (imBinFileError(handle))
+    return 0;
+
+  return 1;
+}
+
+void imFormatTGA::FixRGB()
+{
+  int x;
+  imbyte* byte_data = (imbyte*)this->line_buffer;
+
+  if (this->bpp == 16)
+  {
+    /* inverts the WORD values if not intel */
+    if (imBinCPUByteOrder() == IM_BIGENDIAN)
+      imBinSwapBytes2(this->line_buffer, this->width);
+
+    imushort* word_data = (imushort*)this->line_buffer;
+
+    // from end to start
+    for (x = this->width-1; x >= 0; x--)
+    {
+      imushort word_value = word_data[x];
+      int c = x*3;
+      byte_data[c]   = (imbyte)(((word_value & 0x7C00) >> 10)*8);
+      byte_data[c+1] = (imbyte)(((word_value & 0x03E0) >>  5)*8);
+      byte_data[c+2] = (imbyte)( (word_value & 0x001F)       *8);
+    }
+  }
+  else  // 24 and 32
+  {
+    // convert BGR <-> RGB
+    // convert BGRA <-> RGBA
+    imbyte* byte_data = (imbyte*)this->line_buffer;
+    int planes = this->bpp/8;
+    for (x = 0; x < this->width; x++)
+    {
+      int c = x*planes;
+      imbyte temp = byte_data[c];     // swap R and B
+      byte_data[c] = byte_data[c+2];
+      byte_data[c+2] = temp;
+    }
+  }
+}
+
+int imFormatTGA::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading TGA...");
+
+  int line_size = this->line_buffer_size;
+  if (this->bpp == 16)
+    line_size = this->width*2;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    if (this->image_type > 3)
+    {
+      if (iTGADecodeScanLine(handle, (imbyte*)this->line_buffer, this->width, this->bpp/8) == IM_ERR_ACCESS)
+        return IM_ERR_ACCESS;     
+    }
+    else
+    {
+      imBinFileRead(handle, this->line_buffer, line_size, 1);
+      if (imBinFileError(handle))
+        return IM_ERR_ACCESS;     
+    }
+
+    if (this->bpp > 8)
+      FixRGB();
+  
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+  
+  return IM_ERR_NONE;
+}
+
+int imFormatTGA::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing TGA...");
+
+  imbyte* compressed_buffer = NULL;
+  if (this->image_type > 3)  // point to the extra buffer
+    compressed_buffer = (imbyte*)this->line_buffer + this->line_buffer_size;
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (this->bpp > 8)
+      FixRGB();
+
+    if (this->image_type > 3)
+    {
+      int compressed_size = iTGAEncodeScanLine(compressed_buffer, (imbyte*)this->line_buffer, this->width, this->bpp/8);
+      imBinFileWrite(handle, compressed_buffer, compressed_size, 1);
+    }
+    else
+    {
+      imBinFileWrite(handle, this->line_buffer, this->line_buffer_size, 1);
+    }
+
+    if (imBinFileError(handle))
+      return IM_ERR_ACCESS;     
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  if (!SaveExtensionArea())
+    return IM_ERR_ACCESS;     
+
+  return IM_ERR_NONE;
+}
+
+int imFormatTGA::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  if (!compression || compression[0] == 0)
+    return IM_ERR_NONE;
+
+  if (!imStrEqual(compression, "NONE") && !imStrEqual(compression, "RLE"))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_tiff.cpp b/src/im_format_tiff.cpp
new file mode 100644
index 0000000..98467f9
--- /dev/null
+++ b/src/im_format_tiff.cpp
@@ -0,0 +1,1421 @@
+/** \file
+ * \brief TIFF - Tagged Image File Format
+ *
+ * See Copyright Notice in im_lib.h
+ * See libTIFF Copyright Notice in tiff.h
+ * $Id: im_format_tiff.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_util.h"
+#include "im_format_all.h"
+#include "im_counter.h"
+
+#include "tiffiop.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+
+
+#define TIFFTAG_GEOPIXELSCALE        33550
+#define TIFFTAG_INTERGRAPH_MATRIX    33920
+#define TIFFTAG_GEOTIEPOINTS         33922
+#define TIFFTAG_GEOTRANSMATRIX       34264
+#define TIFFTAG_GEOKEYDIRECTORY      34735
+#define TIFFTAG_GEODOUBLEPARAMS      34736
+#define TIFFTAG_GEOASCIIPARAMS       34737
+
+#define TIFFTAG_CFAREPEATPATTERNDIM	33421	/* dimensions of CFA pattern */
+#define TIFFTAG_CFAPATTERN		33422	/* color filter array pattern */
+#define	    PHOTOMETRIC_CFA		32803	/* color filter array */
+#define	    PHOTOMETRIC_LINEARRAW		34892
+
+static const TIFFFieldInfo iTiffFieldInfo[] = 
+{
+  /* missing in libTIFF (fixed in libtiff 4.0) */
+  { EXIFTAG_COLORSPACE, 1, 1, TIFF_SHORT, FIELD_CUSTOM, TRUE,	FALSE, "ColorSpace" },
+
+  /* Patch from Dave Coffin (Used for DNG) */
+  { TIFFTAG_WHITELEVEL,	-2, -1,	TIFF_LONG,	FIELD_CUSTOM,  0,	1,	"WhiteLevel" },
+  { TIFFTAG_WHITELEVEL,	-2, -1,	TIFF_SHORT,	FIELD_CUSTOM,  0,	1,	"WhiteLevel" },
+  { TIFFTAG_CFAREPEATPATTERNDIM, 2, 2, TIFF_SHORT,	FIELD_CUSTOM, 0,	0,	"CFARepeatPatternDim" },
+  { TIFFTAG_CFAPATTERN,	-1, -1,	TIFF_BYTE,	FIELD_CUSTOM, 0,	1,	"CFAPattern" },
+
+  /* GeoTIFF Tags */
+  { TIFFTAG_GEOPIXELSCALE,	-1,-1, TIFF_DOUBLE,	FIELD_CUSTOM, TRUE,	TRUE,	"GeoPixelScale" },
+  { TIFFTAG_INTERGRAPH_MATRIX,-1,-1, TIFF_DOUBLE,	FIELD_CUSTOM, TRUE,	TRUE,	"Intergraph TransformationMatrix" },
+  { TIFFTAG_GEOTIEPOINTS,	-1,-1, TIFF_DOUBLE,	FIELD_CUSTOM, TRUE,	TRUE,	"GeoTiePoints" },
+  { TIFFTAG_GEOTRANSMATRIX,	-1,-1, TIFF_DOUBLE,	FIELD_CUSTOM, TRUE,	TRUE,	"GeoTransformationMatrix" },
+  { TIFFTAG_GEOKEYDIRECTORY,-1,-1, TIFF_SHORT,	FIELD_CUSTOM, TRUE,	TRUE,	"GeoKeyDirectory" },
+  { TIFFTAG_GEODOUBLEPARAMS,	-1,-1, TIFF_DOUBLE,	FIELD_CUSTOM, TRUE,	TRUE,	"GeoDoubleParams" },
+  { TIFFTAG_GEOASCIIPARAMS,	-1,-1, TIFF_ASCII,	FIELD_CUSTOM, TRUE,	FALSE, "GeoASCIIParams" }
+};
+
+#define IMTIFF_NUMCOMP 15
+
+/* this list must be sorted because of bsearch */
+static uint16 iTIFFCompIdTable [IMTIFF_NUMCOMP] = 
+{
+  COMPRESSION_NONE,                       
+  COMPRESSION_CCITTRLE,                   
+  COMPRESSION_CCITTFAX3,                  
+  COMPRESSION_CCITTFAX4,                  
+  COMPRESSION_LZW,                        
+  COMPRESSION_JPEG,                       
+  COMPRESSION_ADOBE_DEFLATE,              
+  COMPRESSION_NEXT,                       
+  COMPRESSION_CCITTRLEW,                  
+  COMPRESSION_PACKBITS,                   
+  COMPRESSION_THUNDERSCAN,                
+  COMPRESSION_PIXARLOG,                   
+  COMPRESSION_DEFLATE,                    
+  COMPRESSION_SGILOG,
+  COMPRESSION_SGILOG24
+};
+
+static int iTIFFCompareCompID(const void *elem1, const void *elem2)
+{
+  const uint16 *tiff_comp_elem1 = (const uint16 *)elem1;
+  const uint16 *tiff_comp_elem2 = (const uint16 *)elem2;
+
+  if (*tiff_comp_elem1 > *tiff_comp_elem2)
+    return 1;
+
+  if (*tiff_comp_elem1 < *tiff_comp_elem2)
+    return -1;
+
+  return 0;
+}
+
+static int iTIFFGetCompIndex(uint16 Compression)
+{
+  if (Compression == COMPRESSION_OJPEG)
+    Compression = COMPRESSION_JPEG;
+
+  uint16* comp_result = (uint16 *)bsearch(&Compression, iTIFFCompIdTable, sizeof(iTIFFCompIdTable)/sizeof(uint16), sizeof(uint16), iTIFFCompareCompID);
+
+  if (comp_result == NULL)
+  {
+    return -1;
+  }
+
+  return (comp_result - iTIFFCompIdTable);
+}
+
+/* this list must follow iTIFFCompIdTable order */
+static const char* iTIFFCompTable[IMTIFF_NUMCOMP] = 
+{
+  "NONE",
+  "CCITTRLE",
+  "CCITTFAX3",
+  "CCITTFAX4",
+  "LZW",
+  "JPEG",
+  "ADOBEDEFLATE",
+  "NEXT",
+  "CCITTRLEW",
+  "RLE",
+  "THUNDERSCAN",
+  "PIXARLOG",    
+  "DEFLATE",
+  "SGILOG",
+  "SGILOG24"
+};
+
+static uint16 iTIFFCompFind(const char* compression)
+{
+  for(int i = 0; i < IMTIFF_NUMCOMP; i++)
+  {
+    if (imStrEqual(compression, iTIFFCompTable[i]))
+      return iTIFFCompIdTable[i];
+  }
+
+  return (uint16)-1;
+}
+
+static uint16 iTIFFCompDefault(int color_space, int data_type)
+{
+  if (color_space == IM_BINARY)
+    return COMPRESSION_CCITTRLE;
+
+  if (color_space == IM_MAP)
+    return COMPRESSION_PACKBITS;
+
+  if (color_space == IM_YCBCR && data_type == IM_BYTE)
+    return COMPRESSION_JPEG;
+
+  if (color_space == IM_XYZ)
+    return COMPRESSION_SGILOG;
+
+  if (data_type >= IM_FLOAT)
+    return COMPRESSION_NONE;
+
+  return COMPRESSION_LZW;
+}
+
+static uint16 iTIFFCompCalc(const char* compression, int color_mode, int data_type)
+{
+  uint16 Compression;
+  if (compression[0] == 0)
+    Compression = iTIFFCompDefault(imColorModeSpace(color_mode), data_type);
+  else
+    Compression = iTIFFCompFind(compression);
+
+  return Compression;
+}
+
+static int iTIFFWriteTag(TIFF* tiff, int index, const char* name, int data_type, int count, const void* data)
+{
+  const TIFFFieldInfo *fld = TIFFFieldWithName(tiff, name);
+  (void)data_type;
+  (void)index;
+  if (fld)
+  {
+    if (fld->field_tag == TIFFTAG_EXIFIFD ||         /* offset */
+        fld->field_tag == TIFFTAG_GPSIFD ||          
+        fld->field_tag == TIFFTAG_INTEROPERABILITYIFD ||   
+	      fld->field_tag == TIFFTAG_SUBIFD ||          
+	      fld->field_tag == TIFFTAG_COLORMAP ||        /* handled elsewhere */
+	      fld->field_tag == TIFFTAG_EXTRASAMPLES ||
+	      fld->field_tag == TIFFTAG_TRANSFERFUNCTION ||
+	      fld->field_tag == TIFFTAG_RESOLUTIONUNIT ||
+	      fld->field_tag == TIFFTAG_XRESOLUTION ||
+	      fld->field_tag == TIFFTAG_YRESOLUTION ||
+        fld->field_tag == TIFFTAG_INKNAMES)
+      return 1;
+
+    if (fld->field_passcount)
+    {
+      double* double_data = NULL;
+
+      if (fld->field_type==TIFF_DOUBLE)
+      {
+        float* float_data = (float*)data;
+        double_data = new double [count];
+        for (int p = 0; p < count; p++) 
+          double_data[p] = float_data[p];
+        data = double_data;
+      }
+
+			if (fld->field_writecount == TIFF_VARIABLE2)
+      {
+        uint32 value_count = (uint32)count;
+        if (TIFFSetField(tiff, fld->field_tag, value_count, data) != 1)
+          return 1;
+      }
+      else
+      {
+        uint16 value_count = (uint16)count;
+        if (TIFFSetField(tiff, fld->field_tag, value_count, data) != 1)
+          return 1;
+      }
+
+      if (fld->field_type==TIFF_DOUBLE)
+        delete [] double_data;
+    } 
+    else
+    {
+      if (fld->field_tag == TIFFTAG_PAGENUMBER ||
+			    fld->field_tag == TIFFTAG_HALFTONEHINTS ||
+			    fld->field_tag == TIFFTAG_YCBCRSUBSAMPLING ||
+          fld->field_tag == TIFFTAG_DOTRANGE)
+      {
+        // there are 2 separated ushort values
+        uint16* ushort_value = (uint16*)data;
+        TIFFSetField(tiff, fld->field_tag, ushort_value[0], ushort_value[1]);
+        return 1;
+      }
+
+      if (count > 1 || fld->field_type == TIFF_ASCII)
+        TIFFSetField(tiff, fld->field_tag, data);
+      else
+      {
+        switch(fld->field_type)
+        {
+        case TIFF_UNDEFINED:
+        case TIFF_ASCII:
+        case TIFF_BYTE:
+        case TIFF_SBYTE:
+          {
+            imbyte* byte_data = (imbyte*)data;
+            TIFFSetField(tiff, fld->field_tag, *byte_data);
+          }
+          break;
+        case TIFF_SHORT:
+        case TIFF_SSHORT:
+          {
+            imushort* short_data = (imushort*)data;
+            TIFFSetField(tiff, fld->field_tag, *short_data);
+          }
+          break;
+        case TIFF_LONG:
+        case TIFF_SLONG:
+          {
+            int* long_data = (int*)data;
+            TIFFSetField(tiff, fld->field_tag, *long_data);
+          }
+          break;
+        case TIFF_RATIONAL:
+        case TIFF_SRATIONAL:
+        case TIFF_FLOAT:
+          {
+            float* float_data = (float*)data;
+            TIFFSetField(tiff, fld->field_tag, *float_data);
+          }
+          break;
+        case TIFF_DOUBLE:
+          {
+            float* float_data = (float*)data;
+            TIFFSetField(tiff, fld->field_tag, (double)*float_data);
+          }
+          break;
+        default:
+          break;
+        }
+      }
+    } 
+  }
+
+  return 1;
+}
+
+static void iTIFFWriteCustomTags(TIFF* tiff, imAttribTable* attrib_table)
+{
+  attrib_table->ForEach(tiff, (imAttribTableCallback)iTIFFWriteTag);
+}
+
+static void iTIFFReadCustomTags(TIFF* tiff, imAttribTable* attrib_table)
+{
+  int  i;
+  short tag_count;
+
+  tag_count = (short) TIFFGetTagListCount(tiff);
+  for( i = 0; i < tag_count; i++ )
+  {
+    ttag_t tag = TIFFGetTagListEntry(tiff, i);
+    const TIFFFieldInfo *fld;
+
+    fld = TIFFFieldWithTag(tiff, tag);
+    if (fld == NULL)
+      continue;
+
+    if (fld->field_tag == TIFFTAG_EXIFIFD ||         /* offset */
+        fld->field_tag == TIFFTAG_GPSIFD ||          
+        fld->field_tag == TIFFTAG_INTEROPERABILITYIFD ||   
+	      fld->field_tag == TIFFTAG_SUBIFD ||          
+	      fld->field_tag == TIFFTAG_COLORMAP ||        /* handled elsewhere */
+	      fld->field_tag == TIFFTAG_EXTRASAMPLES ||
+	      fld->field_tag == TIFFTAG_TRANSFERFUNCTION ||
+	      fld->field_tag == TIFFTAG_RESOLUTIONUNIT ||
+	      fld->field_tag == TIFFTAG_XRESOLUTION ||
+	      fld->field_tag == TIFFTAG_YRESOLUTION ||
+        fld->field_tag == TIFFTAG_INKNAMES)
+      continue;
+
+      if (fld->field_tag == TIFFTAG_BLACKLEVEL ||
+          fld->field_tag == TIFFTAG_DEFAULTCROPSIZE ||
+          fld->field_tag == TIFFTAG_DEFAULTCROPORIGIN)
+      {
+        /* libTIFF bug. When reading custom tags there is an incorrect interpretation of the tag
+        that leads to return always type=RATIONAL for these tags. */
+        continue;
+      }
+
+    int data_type = -1, data_count = -1;
+    void* data = NULL;
+
+    if (fld->field_passcount)
+    {
+			if (fld->field_readcount == TIFF_VARIABLE2)
+      {
+        uint32 value_count;
+        if (TIFFGetField(tiff, tag, &value_count, &data) != 1)
+          continue;
+        data_count = value_count;
+      }
+      else
+      {
+        uint16 value_count;
+        if (TIFFGetField(tiff, tag, &value_count, &data) != 1)
+          continue;
+        data_count = value_count;
+      }
+
+      switch(fld->field_type)
+      {
+      case TIFF_UNDEFINED:
+      case TIFF_ASCII: 
+      case TIFF_BYTE:
+      case TIFF_SBYTE:
+        data_type = IM_BYTE;
+        break;
+      case TIFF_SHORT:
+      case TIFF_SSHORT:
+        data_type = IM_USHORT;
+        break;
+      case TIFF_LONG:
+      case TIFF_SLONG:
+        data_type = IM_INT;
+        break;
+      case TIFF_RATIONAL:
+      case TIFF_SRATIONAL:
+      case TIFF_FLOAT:
+        data_type = IM_FLOAT;
+        break;
+      case TIFF_DOUBLE:
+        {
+          double* double_data = (double*)data;
+          float* float_data = new float [data_count];
+          for (int p = 0; p < data_count; p++) 
+            float_data[p] = (float)double_data[p];
+          attrib_table->Set(fld->field_name, IM_FLOAT, data_count, float_data);
+          delete [] float_data;
+        }
+        continue;
+      default:
+        continue;
+      }
+
+      if (data && data_count > 0)
+        attrib_table->Set(fld->field_name, data_type, data_count, data);
+    } 
+    else
+    {
+      data_count = fld->field_readcount;
+
+      if (fld->field_tag == TIFFTAG_PAGENUMBER ||
+			    fld->field_tag == TIFFTAG_HALFTONEHINTS ||
+			    fld->field_tag == TIFFTAG_YCBCRSUBSAMPLING ||
+          fld->field_tag == TIFFTAG_DOTRANGE)
+      {
+        // there are 2 separated ushort values
+        uint16 ushort_value[2];
+        if (TIFFGetField(tiff, fld->field_tag, &ushort_value[0], &ushort_value[1]))
+          attrib_table->Set(fld->field_name, IM_USHORT, 2, ushort_value);
+        continue;
+      }
+
+      switch(fld->field_type)
+      {
+      case TIFF_UNDEFINED:
+      case TIFF_BYTE:
+      case TIFF_SBYTE:
+      case TIFF_ASCII:
+        data_type = IM_BYTE;
+        break;
+      case TIFF_SHORT:
+      case TIFF_SSHORT:
+        data_type = IM_USHORT;
+        break;
+      case TIFF_LONG:
+      case TIFF_SLONG:
+        data_type = IM_INT;
+        break;
+      case TIFF_RATIONAL:
+      case TIFF_SRATIONAL:
+      case TIFF_FLOAT:
+      case TIFF_DOUBLE:
+        data_type = IM_FLOAT;
+        break;
+      default:
+        continue;
+      }
+
+		  if (fld->field_type == TIFF_ASCII ||
+		      fld->field_readcount == TIFF_VARIABLE ||
+		      fld->field_readcount == TIFF_VARIABLE2 ||
+		      fld->field_readcount == TIFF_SPP ||
+		      data_count > 1) 
+      {
+        if (TIFFGetField(tiff, tag, &data) != 1)
+          continue;
+
+        if (data)
+        {
+          if (fld->field_type == TIFF_ASCII && data_count == -1)
+            data_count = strlen((char*)data)+1;
+
+          if (data_count > 0)
+          {
+            if (fld->field_type == TIFF_DOUBLE)
+            {
+              double* double_data = (double*)data;
+              float* float_data = new float [data_count];
+              for (int p = 0; p < data_count; p++) 
+                float_data[p] = (float)double_data[p];
+              attrib_table->Set(fld->field_name, IM_FLOAT, data_count, float_data);
+              delete [] float_data;
+            }
+            else
+              attrib_table->Set(fld->field_name, data_type, data_count, data);
+          }
+        }
+      }
+      else if (data_count == 1)
+      {
+        data = malloc(imDataTypeSize(data_type));
+        if (TIFFGetField(tiff, tag, data) == 1)
+          attrib_table->Set(fld->field_name, data_type, data_count, data);
+        free(data);
+        data = NULL;
+      }
+    } 
+  }
+}
+
+static void iTIFFReadAttributes(TIFF* tiff, imAttribTable* attrib_table)
+{
+  uint16 ResolutionUnit = RESUNIT_NONE;
+  TIFFGetField(tiff, TIFFTAG_RESOLUTIONUNIT, &ResolutionUnit);
+  if (ResolutionUnit != RESUNIT_NONE)
+  {
+    float xres = 0, yres = 0;
+
+    TIFFGetField(tiff, TIFFTAG_XRESOLUTION, &xres);
+    TIFFGetField(tiff, TIFFTAG_YRESOLUTION, &yres);
+
+    if (xres != 0 && yres != 0)
+    {
+      if (ResolutionUnit == RESUNIT_INCH)
+        attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPI");
+      else
+        attrib_table->Set("ResolutionUnit", IM_BYTE, 4, "DPC");
+
+      attrib_table->Set("XResolution", IM_FLOAT, 1, (void*)&xres);
+      attrib_table->Set("YResolution", IM_FLOAT, 1, (void*)&yres);
+    }
+  }
+
+  uint16 *transferfunction[3]; 
+  if (TIFFGetField(tiff, TIFFTAG_TRANSFERFUNCTION, &transferfunction[0], &transferfunction[1], &transferfunction[2]))
+  {
+    uint16 SamplesPerPixel = 1, BitsPerSample = 1, ExtraSamples = 0, *SampleInfo;
+    TIFFGetFieldDefaulted(tiff, TIFFTAG_BITSPERSAMPLE, &BitsPerSample);
+    TIFFGetFieldDefaulted(tiff, TIFFTAG_EXTRASAMPLES, &ExtraSamples, &SampleInfo);
+    TIFFGetFieldDefaulted(tiff, TIFFTAG_SAMPLESPERPIXEL, &SamplesPerPixel);
+
+    int num = (SamplesPerPixel - ExtraSamples) > 1 ? 3 : 1;
+    int count = 1L<<BitsPerSample;
+    if (num == 1)
+      attrib_table->Set("TransferFunction0", IM_USHORT, count, transferfunction[0]);
+    else
+    {
+      attrib_table->Set("TransferFunction0", IM_USHORT, count, transferfunction[0]);
+      attrib_table->Set("TransferFunction1", IM_USHORT, count, transferfunction[1]);
+      attrib_table->Set("TransferFunction2", IM_USHORT, count, transferfunction[2]);
+    }
+  }
+
+  char *inknames;
+  if (TIFFGetField(tiff, TIFFTAG_INKNAMES, &inknames))
+  {
+    // Ink names are separated by '0', so strlen will measure only the first string
+    uint16 numinks;
+    TIFFGetField(tiff, TIFFTAG_NUMBEROFINKS, &numinks);
+    int inknameslen = 0;
+    for (int k = 0; k < (int)numinks; k++)
+      inknameslen += strlen(inknames+inknameslen)+1;
+    attrib_table->Set("InkNames", IM_BYTE, inknameslen, inknames);
+  }
+
+  iTIFFReadCustomTags(tiff, attrib_table);
+
+  uint32 offset;
+  if (TIFFGetField(tiff, TIFFTAG_EXIFIFD, &offset))
+  {
+    tdir_t cur_dir = TIFFCurrentDirectory(tiff);
+
+    if (!TIFFReadEXIFDirectory(tiff, offset))
+    {
+      TIFFSetDirectory(tiff, cur_dir);
+      return;
+    }
+
+    iTIFFReadCustomTags(tiff, attrib_table);
+    TIFFSetDirectory(tiff, cur_dir);
+  }
+}
+
+static void iTIFFWriteAttributes(TIFF* tiff, imAttribTable* attrib_table)
+{
+  char* res_unit = (char*)attrib_table->Get("ResolutionUnit");
+  if (res_unit)
+  {
+    float* xres = (float*)attrib_table->Get("XResolution");
+    float* yres = (float*)attrib_table->Get("YResolution");
+
+    if (xres && yres)
+    {
+      uint16 tiff_res_unit = RESUNIT_CENTIMETER;
+      if (imStrEqual(res_unit, "DPI"))
+        tiff_res_unit = RESUNIT_INCH;
+
+      TIFFSetField(tiff, TIFFTAG_RESOLUTIONUNIT, tiff_res_unit);
+      TIFFSetField(tiff, TIFFTAG_XRESOLUTION, *xres);
+      TIFFSetField(tiff, TIFFTAG_YRESOLUTION, *yres);
+    }
+  }
+
+  uint16 *transferfunction0 = (uint16*)attrib_table->Get("TransferFunction0"); 
+  if (transferfunction0)
+  {
+    uint16 SamplesPerPixel = 1, ExtraSamples = 0, *SampleInfo;
+    TIFFGetFieldDefaulted(tiff, TIFFTAG_EXTRASAMPLES, &ExtraSamples, &SampleInfo);
+    TIFFGetFieldDefaulted(tiff, TIFFTAG_SAMPLESPERPIXEL, &SamplesPerPixel);
+
+    int num = (SamplesPerPixel - ExtraSamples) > 1 ? 3 : 1;
+    if (num == 1)
+      TIFFSetField(tiff, TIFFTAG_TRANSFERFUNCTION, transferfunction0);
+    else
+    {
+      uint16 *transferfunction1 = (uint16*)attrib_table->Get("TransferFunction1"); 
+      uint16 *transferfunction2 = (uint16*)attrib_table->Get("TransferFunction2"); 
+
+      if (transferfunction1 && transferfunction2)
+        TIFFSetField(tiff, TIFFTAG_TRANSFERFUNCTION, transferfunction0, transferfunction1, transferfunction2);
+    }
+  }
+
+  char* inknames = (char*)attrib_table->Get("InkNames");
+  if (inknames)
+    TIFFSetField(tiff, TIFFTAG_INKNAMES, inknames);
+
+  int proflength;
+  const void* profdata = attrib_table->Get("ICCProfile", (int*)NULL, &proflength);
+  if (profdata)
+    TIFFSetField(tiff, TIFFTAG_ICCPROFILE, proflength, profdata);
+
+  iTIFFWriteCustomTags(tiff, attrib_table);
+}
+
+class imFormatTIFF: public imFormat
+{
+  TIFF* tiff;
+  int invert,      // must invert black and white reference
+      cpx_int,     // original data is a complex integer
+      lab_fix,     // convert CIE Lab to unsigned
+      extra_sample_size, // eliminate extra samples if more than one
+      sample_size,
+      start_plane; // first band to read in a multiband image
+
+  void** tile_buf;
+  int tile_buf_count, tile_width, tile_height, start_row, tile_line_size, tile_line_raw_size;
+
+  int ReadTileline(void* line_buffer, int row, int plane);
+
+public:
+  imFormatTIFF()
+    :imFormat("TIFF", 
+              "Tagged Image File Format", 
+              "*.tif;*.tiff;", 
+              iTIFFCompTable, 
+              IMTIFF_NUMCOMP, 
+              1)
+    {}
+  ~imFormatTIFF() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+static void iTIFFDefaultDirectory(TIFF *tiff)
+{
+  /* Install the IM Tag field info */
+  TIFFMergeFieldInfo(tiff, iTiffFieldInfo, TIFFArrayCount(iTiffFieldInfo));
+}
+
+void imFormatRegisterTIFF(void)
+{
+  TIFFSetTagExtender(iTIFFDefaultDirectory);
+  imFormatRegister(new imFormatTIFF());
+}
+
+int imFormatTIFF::Open(const char* file_name)
+{
+  this->tiff = TIFFOpen(file_name, "r");
+  if (this->tiff == NULL)
+    return IM_ERR_FORMAT;
+
+  // Return the compression of the first image in the file.
+  uint16 Compression = COMPRESSION_NONE;
+  TIFFGetField(this->tiff, TIFFTAG_COMPRESSION, &Compression);
+  int comp_index = iTIFFGetCompIndex(Compression);
+  if (comp_index == -1) return IM_ERR_COMPRESS;
+  strcpy(this->compression, iTIFFCompTable[comp_index]);
+
+  this->image_count = TIFFNumberOfDirectories(this->tiff);
+  this->tile_buf = 0;
+  this->start_plane = 0;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatTIFF::New(const char* file_name)
+{
+  this->tiff = TIFFOpen(file_name, "w");
+  if (this->tiff == NULL)
+    return IM_ERR_OPEN;
+
+  this->tile_buf = 0;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatTIFF::Close()
+{
+  if (this->tile_buf)
+  {
+    for (int i = 0; i < this->tile_buf_count; i++)
+      free(this->tile_buf[i]);
+    free(this->tile_buf);
+  }
+
+  TIFFClose(this->tiff);
+}
+
+void* imFormatTIFF::Handle(int index)
+{
+  if (index == 0)
+    return (void*)this->tiff->tif_fd;
+  else if (index == 1)
+    return (void*)this->tiff;
+  else
+    return NULL;
+}
+                
+int imFormatTIFF::ReadImageInfo(int index)
+{
+  this->cpx_int = 0;
+  this->invert = 0;
+  this->lab_fix = 0;
+  this->extra_sample_size = 0;
+
+  if (!TIFFSetDirectory(this->tiff, (tdir_t)index))
+    return IM_ERR_ACCESS;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  uint16* attrib_start_plane = (uint16*)attrib_table->Get("MultiBandSelect");
+  if (attrib_start_plane)
+    this->start_plane = *attrib_start_plane;
+  else
+    this->start_plane = 0;
+
+  uint16* sub_ifd = (uint16*)attrib_table->Get("SubIFDSelect");
+
+  /* must clear the attribute list, because TIFF can have many different images */
+  attrib_table->RemoveAll();
+
+  void* data = NULL;
+  if (TIFFGetField(this->tiff, TIFFTAG_DNGVERSION, &data) == 1 && data)
+  {
+    uint32 SubFileType = 0;
+    TIFFGetField(this->tiff, TIFFTAG_SUBFILETYPE, &SubFileType);
+
+    uint16 SubIFDsCount = 0;
+    uint32* SubIFDs = NULL;
+    TIFFGetField(this->tiff, TIFFTAG_SUBIFD, &SubIFDsCount, &SubIFDs);
+    attrib_table->Set("SubIFDCount", IM_USHORT, 1, (void*)&SubIFDsCount);
+
+    /* If is a DNG file, and has SubIFDs, 
+       then ignore the thumbnail and position at the desired SubIFD. */
+
+    if (SubFileType == FILETYPE_REDUCEDIMAGE && SubIFDsCount != 0)
+    {
+      int index = sub_ifd? *sub_ifd: 0;
+      if (index >= SubIFDsCount) index = SubIFDsCount-1;
+      uint32 SubIFDOffset = SubIFDs[index];
+
+      /* Load the main image attributes, the SubIFD contains only a few attributes. */
+      iTIFFReadAttributes(this->tiff, attrib_table);
+
+      TIFFSetSubDirectory(this->tiff, SubIFDOffset);
+    }
+  }
+
+  uint16 Compression = COMPRESSION_NONE;
+  TIFFGetField(this->tiff, TIFFTAG_COMPRESSION, &Compression);
+  int comp_index = iTIFFGetCompIndex(Compression);
+  if (comp_index == -1) return IM_ERR_COMPRESS;
+  strcpy(this->compression, iTIFFCompTable[comp_index]);
+
+  if (Compression == COMPRESSION_JPEG || Compression == COMPRESSION_OJPEG)
+    TIFFSetField(this->tiff, TIFFTAG_JPEGCOLORMODE, JPEGCOLORMODE_RGB);
+
+  uint32 Width;
+  if (!TIFFGetField(this->tiff, TIFFTAG_IMAGEWIDTH, &Width))       
+    return IM_ERR_FORMAT;
+  this->width = Width;
+
+  uint32 Height;
+  if (!TIFFGetField(this->tiff, TIFFTAG_IMAGELENGTH, &Height))
+    return IM_ERR_FORMAT;
+  this->height = Height;
+
+  uint16 Photometric;
+  if (!TIFFGetField(this->tiff, TIFFTAG_PHOTOMETRIC, &Photometric))
+    return IM_ERR_FORMAT;
+  attrib_table->Set("Photometric", IM_USHORT, 1, (void*)&Photometric);
+
+  switch(Photometric)
+  {
+  case PHOTOMETRIC_MINISWHITE:
+    this->invert = 1;
+  case PHOTOMETRIC_LINEARRAW:
+  case PHOTOMETRIC_CFA:
+  case PHOTOMETRIC_LOGL:
+  case PHOTOMETRIC_MASK:
+  case PHOTOMETRIC_MINISBLACK:
+    this->file_color_mode = IM_GRAY;
+    break;
+  case PHOTOMETRIC_PALETTE:
+    this->file_color_mode = IM_MAP;
+    break;
+  case PHOTOMETRIC_RGB:
+    this->file_color_mode = IM_RGB;
+    break;
+  case PHOTOMETRIC_SEPARATED:
+    this->file_color_mode = IM_CMYK;
+    break;
+  case PHOTOMETRIC_YCBCR:
+    if (Compression == COMPRESSION_JPEG || Compression == COMPRESSION_OJPEG)
+      this->file_color_mode = IM_RGB;
+    else
+      this->file_color_mode = IM_YCBCR;
+    break;
+  case PHOTOMETRIC_CIELAB:
+    this->lab_fix = 1;
+  case PHOTOMETRIC_ITULAB:
+  case PHOTOMETRIC_ICCLAB:
+    this->file_color_mode = IM_LAB;
+    break;
+  case PHOTOMETRIC_LOGLUV:
+    this->file_color_mode = IM_XYZ;
+    break;
+  default: 
+    return IM_ERR_DATA;
+  }
+
+  if (Photometric == PHOTOMETRIC_LOGLUV || Photometric == PHOTOMETRIC_LOGL)
+    TIFFSetField(this->tiff, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_FLOAT);
+
+  uint16 SamplesPerPixel = 1, BitsPerSample = 1;
+  TIFFGetFieldDefaulted(this->tiff, TIFFTAG_BITSPERSAMPLE, &BitsPerSample);
+  TIFFGetFieldDefaulted(this->tiff, TIFFTAG_SAMPLESPERPIXEL, &SamplesPerPixel);
+
+  if (BitsPerSample == 1 && this->file_color_mode == IM_GRAY)
+    this->file_color_mode = IM_BINARY;
+
+  /* consistency checks */
+  if (Photometric == PHOTOMETRIC_PALETTE && (SamplesPerPixel != 1 || BitsPerSample > 8))
+    return IM_ERR_DATA;
+
+  if (Photometric == PHOTOMETRIC_MASK && (SamplesPerPixel != 1 || BitsPerSample != 1))
+    return IM_ERR_DATA;
+
+  if ((Photometric == PHOTOMETRIC_CFA || Photometric == PHOTOMETRIC_LINEARRAW) && SamplesPerPixel == 3)  /* when there are 3 sensors */
+    this->file_color_mode = IM_RGB;
+
+  if ((Photometric == PHOTOMETRIC_CFA || Photometric == PHOTOMETRIC_LINEARRAW) && BitsPerSample == 12)
+    this->convert_bpp = 12;
+
+  uint16 PlanarConfig = PLANARCONFIG_CONTIG;
+  TIFFGetFieldDefaulted(this->tiff, TIFFTAG_PLANARCONFIG, &PlanarConfig);
+
+  if (PlanarConfig == PLANARCONFIG_CONTIG && SamplesPerPixel > 1)
+    this->file_color_mode |= IM_PACKED;
+
+  uint16 ExtraSamples = 0, *SampleInfo;
+  TIFFGetFieldDefaulted(this->tiff, TIFFTAG_EXTRASAMPLES, &ExtraSamples, &SampleInfo);
+  if (ExtraSamples == 1)
+  {
+    switch (SampleInfo[0]) 
+    {
+    case EXTRASAMPLE_UNSPECIFIED: /* !unspecified data */
+    case EXTRASAMPLE_ASSOCALPHA:  /* data is pre-multiplied */
+    case EXTRASAMPLE_UNASSALPHA:  /* data is not pre-multiplied */
+      this->file_color_mode |= IM_ALPHA;
+      break;
+    }
+    attrib_table->Set("ExtraSampleInfo", IM_USHORT, 1, (void*)&SampleInfo[0]);
+  }
+  else if ((ExtraSamples > 1) && (PlanarConfig == PLANARCONFIG_CONTIG))
+  {
+    /* usually a multi band image, we read only one band */
+    this->sample_size = (BitsPerSample*(SamplesPerPixel-ExtraSamples) + 7)/8; 
+    this->extra_sample_size = (BitsPerSample*SamplesPerPixel + 7)/8;
+
+    /* add space for the line buffer (this is more than necessary) */
+    this->line_buffer_extra = TIFFScanlineSize(this->tiff);
+  }
+
+  uint16 SampleFormat = SAMPLEFORMAT_UINT;
+  TIFFGetField(this->tiff, TIFFTAG_SAMPLEFORMAT, &SampleFormat);
+  switch(SampleFormat)
+  {
+  case SAMPLEFORMAT_VOID:
+  case SAMPLEFORMAT_UINT:
+    if (BitsPerSample < 8)
+    {
+      if (BitsPerSample != 1 && BitsPerSample != 2 && BitsPerSample != 4)
+        return IM_ERR_DATA;
+
+      this->file_data_type = IM_BYTE;
+      this->convert_bpp = BitsPerSample;
+    }
+    else if (BitsPerSample == 8)
+      this->file_data_type = IM_BYTE;
+    else if (BitsPerSample <= 16)
+      this->file_data_type = IM_USHORT;
+    else if (BitsPerSample <= 32)
+    {
+      this->switch_type = 1;
+      this->file_data_type = IM_INT;
+    }
+    else
+      return IM_ERR_DATA;
+    break;
+  case SAMPLEFORMAT_INT:
+    if (BitsPerSample <= 8)
+    {
+      this->switch_type = 1;
+      this->file_data_type = IM_BYTE;
+    }
+    else if (BitsPerSample <= 16)
+    {
+      this->switch_type = 1;
+      this->file_data_type = IM_USHORT;
+    }
+    else if (BitsPerSample <= 32)
+      this->file_data_type = IM_INT;
+    else
+      return IM_ERR_DATA;
+    break;
+  case SAMPLEFORMAT_IEEEFP:
+    if (BitsPerSample == 32)
+      this->file_data_type = IM_FLOAT;      
+    else if (BitsPerSample == 64)
+    {
+      this->switch_type = 1;
+      this->file_data_type = IM_FLOAT;
+    }
+    else
+      return IM_ERR_DATA;
+    break;
+  case SAMPLEFORMAT_COMPLEXINT:
+    if (BitsPerSample == 32)
+    {
+      this->cpx_int = 1;
+      this->file_data_type = IM_CFLOAT;  // convert short to float
+    }
+    else if (BitsPerSample == 64)
+    {
+      this->cpx_int = 2;
+      this->file_data_type = IM_CFLOAT; // convert int to float     
+    }
+    else
+      return IM_ERR_DATA;
+    break;
+  case SAMPLEFORMAT_COMPLEXIEEEFP:
+    if (BitsPerSample == 64)
+      this->file_data_type = IM_CFLOAT;      
+    else if (BitsPerSample == 128)
+    {
+      this->switch_type = 1;
+      this->file_data_type = IM_CFLOAT;
+    }
+    else
+      return IM_ERR_DATA;
+    break;
+  default:
+    return IM_ERR_DATA;
+  }
+
+  uint16 *rmap, *gmap, *bmap; 
+  if (TIFFGetField(this->tiff, TIFFTAG_COLORMAP, &rmap, &gmap, &bmap))
+  {
+    long palette[256];
+    int palette_count = 1 << BitsPerSample;
+
+    for (int c = 0; c < palette_count; c++)
+    {
+      palette[c] = imColorEncode((unsigned char)(rmap[c] >> 8),
+                                 (unsigned char)(gmap[c] >> 8),
+                                 (unsigned char)(bmap[c] >> 8));
+    }
+
+    imFileSetPalette(this, palette, palette_count);
+  }
+
+  if (TIFFIsTiled(this->tiff))
+  {
+    if (this->tile_buf)
+    {
+      for (int i = 0; i < this->tile_buf_count; i++)
+        free(this->tile_buf[i]);
+      free(this->tile_buf);
+    }
+
+    uint32 tileWidth, tileLength;
+    TIFFGetField(this->tiff, TIFFTAG_TILEWIDTH, &tileWidth);
+    TIFFGetField(this->tiff, TIFFTAG_TILELENGTH, &tileLength);
+    this->tile_width = (int)tileWidth;
+    this->tile_height = (int)tileLength;
+
+    this->tile_buf_count = (Width + tileWidth-1) / tileWidth;
+    if (PlanarConfig == PLANARCONFIG_SEPARATE)
+      this->tile_buf_count *= SamplesPerPixel;
+    this->tile_line_size = TIFFTileRowSize(this->tiff);
+    this->tile_line_raw_size = TIFFScanlineSize(this->tiff);
+    this->start_row = 0;
+
+    this->tile_buf = (void**)malloc(sizeof(void*)*this->tile_buf_count);
+    int tile_size = TIFFTileSize(this->tiff);
+    for (int t = 0; t < this->tile_buf_count; t++)
+      this->tile_buf[t] = malloc(tile_size);
+  }
+
+  if (SamplesPerPixel < imColorModeDepth(this->file_color_mode))
+    return IM_ERR_DATA;
+
+  if (SamplesPerPixel > 1 && imColorModeSpace(this->file_color_mode) == IM_GRAY)
+  {
+    /* multiband data, we read only one band */
+    attrib_table->Set("MultiBandCount", IM_USHORT, 1, (void*)&SamplesPerPixel);
+  }
+
+  uint16 Orientation;
+  TIFFGetFieldDefaulted(this->tiff, TIFFTAG_ORIENTATION, &Orientation);
+  switch (Orientation) 
+  {
+  case ORIENTATION_TOPRIGHT:
+  case ORIENTATION_RIGHTTOP:  
+  case ORIENTATION_LEFTTOP:  
+  case ORIENTATION_TOPLEFT:
+    this->file_color_mode |= IM_TOPDOWN;
+    break;
+  }
+  attrib_table->Set("Orientation", IM_USHORT, 1, (void*)&Orientation);
+
+  iTIFFReadAttributes(this->tiff, attrib_table);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatTIFF::WriteImageInfo()
+{
+  this->file_color_mode = this->user_color_mode;
+  this->file_data_type = this->user_data_type;
+  this->lab_fix = 0;
+
+  uint16 Compression = iTIFFCompCalc(this->compression, this->file_color_mode, this->file_data_type);
+  if (Compression == (uint16)-1)
+    return IM_ERR_COMPRESS;
+
+  int comp_index = iTIFFGetCompIndex(Compression);
+  strcpy(this->compression, iTIFFCompTable[comp_index]);
+
+  TIFFSetField(this->tiff, TIFFTAG_COMPRESSION, Compression);
+
+  uint32 Width = this->width;
+  TIFFSetField(this->tiff, TIFFTAG_IMAGEWIDTH, Width);
+
+  uint32 Height = this->height;
+  TIFFSetField(this->tiff, TIFFTAG_IMAGELENGTH, Height);
+
+  static uint16 colorspace2photometric [] =
+  {
+    PHOTOMETRIC_RGB,    
+    PHOTOMETRIC_PALETTE,    
+    PHOTOMETRIC_MINISBLACK,   
+    PHOTOMETRIC_MINISBLACK, 
+    PHOTOMETRIC_SEPARATED,   
+    PHOTOMETRIC_YCBCR,  
+    PHOTOMETRIC_CIELAB,
+    (uint16)-1,          // Pure Luv not supported
+    PHOTOMETRIC_LOGLUV   // LogLuv Saved as XYZ
+  };
+
+  uint16 Photometric = colorspace2photometric[imColorModeSpace(this->file_color_mode)];
+
+  // Correction for sgi LogL
+  if (Compression == COMPRESSION_SGILOG && Photometric == PHOTOMETRIC_MINISBLACK)
+    Photometric = PHOTOMETRIC_LOGL;
+
+  // Corrections for JPEG, automatic convert from RGB to YCbCr when writing
+  if (Compression == COMPRESSION_JPEG && Photometric == PHOTOMETRIC_RGB)
+    Photometric = PHOTOMETRIC_YCBCR;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  uint16* photometric = (uint16*)attrib_table->Get("Photometric");
+  if (photometric)
+  {
+    if (*photometric == PHOTOMETRIC_MASK && Photometric == PHOTOMETRIC_MINISBLACK)
+      Photometric = PHOTOMETRIC_MASK;
+    else if (*photometric == PHOTOMETRIC_MINISWHITE && Photometric == PHOTOMETRIC_MINISBLACK)
+      Photometric = PHOTOMETRIC_MINISWHITE;
+    else if (*photometric == PHOTOMETRIC_ICCLAB && Photometric == PHOTOMETRIC_CIELAB)
+      Photometric = PHOTOMETRIC_ICCLAB;
+    else if (*photometric == PHOTOMETRIC_ITULAB && Photometric == PHOTOMETRIC_CIELAB)
+      Photometric = PHOTOMETRIC_ITULAB;
+  }
+
+  if (Photometric == PHOTOMETRIC_CIELAB)
+    this->lab_fix = 1;
+
+  TIFFSetField(this->tiff, TIFFTAG_PHOTOMETRIC, Photometric);
+
+  // This is the default, and many software assume/handle only this, so we force it.
+  uint16 PlanarConfig = PLANARCONFIG_CONTIG;
+  TIFFSetField(this->tiff, TIFFTAG_PLANARCONFIG, PlanarConfig);
+  if (imColorModeDepth(this->file_color_mode) > 1)
+    this->file_color_mode |= IM_PACKED;
+
+  // Corrections for JPEG, must be set after Photometric and PlanarConfig
+  if (Compression == COMPRESSION_JPEG && imColorModeSpace(this->file_color_mode) == IM_RGB)
+    TIFFSetField(this->tiff, TIFFTAG_JPEGCOLORMODE, JPEGCOLORMODE_RGB);
+
+  // Compression options
+  int* zip_quality = (int*)attrib_table->Get("ZIPQuality");
+  if (zip_quality && (Compression == COMPRESSION_DEFLATE || Compression == COMPRESSION_ADOBE_DEFLATE))
+    TIFFSetField(this->tiff, TIFFTAG_ZIPQUALITY, *zip_quality);
+
+  if (Compression == COMPRESSION_JPEG)
+  {
+    int* jpeg_quality = (int*)attrib_table->Get("JPEGQuality");
+    if (jpeg_quality)
+      TIFFSetField(this->tiff, TIFFTAG_JPEGQUALITY, *jpeg_quality);
+  }
+
+  // This is the default, and many software assume/handle only this, so we force it.
+  uint16 Orientation = ORIENTATION_TOPLEFT; 
+  TIFFSetField(this->tiff, TIFFTAG_ORIENTATION, Orientation);
+  this->file_color_mode |= IM_TOPDOWN;
+
+  static uint16 datatype2format[] =
+  {
+    SAMPLEFORMAT_UINT,    
+    SAMPLEFORMAT_UINT,  
+    SAMPLEFORMAT_INT,     
+    SAMPLEFORMAT_IEEEFP,  
+    SAMPLEFORMAT_COMPLEXIEEEFP
+  };
+  uint16 SampleFormat = datatype2format[this->file_data_type];
+  TIFFSetField(this->tiff, TIFFTAG_SAMPLEFORMAT, SampleFormat);
+
+  uint16 BitsPerSample = (uint16)(imDataTypeSize(this->file_data_type)*8);
+  if (imColorModeSpace(this->file_color_mode) == IM_BINARY) 
+  {
+    BitsPerSample = 1;
+    this->convert_bpp = 1;
+  }
+  TIFFSetField(this->tiff, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
+
+  // Correction for Luv, this will change BitsperSample and SampleFormat
+  if (Photometric == PHOTOMETRIC_LOGLUV || Photometric == PHOTOMETRIC_LOGL)
+    TIFFSetField(this->tiff, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_FLOAT);
+
+  uint16 SamplesPerPixel = (uint16)imColorModeDepth(this->file_color_mode);
+  TIFFSetField(this->tiff, TIFFTAG_SAMPLESPERPIXEL, SamplesPerPixel);
+
+  if (imColorModeHasAlpha(this->file_color_mode))
+  {
+    uint16 ExtraSamples = 1, SampleInfo[1] = {EXTRASAMPLE_UNASSALPHA};
+    uint16* sample_info = (uint16*)attrib_table->Get("ExtraSampleInfo");
+    if (sample_info) SampleInfo[0] = *sample_info;
+    TIFFSetField(this->tiff, TIFFTAG_EXTRASAMPLES, ExtraSamples, SampleInfo);
+  }
+
+  if (imColorModeSpace(this->file_color_mode) == IM_MAP)
+  {
+    uint16 rmap[256], gmap[256], bmap[256];
+    memset(rmap, 0, 256 * 2);
+    memset(gmap, 0, 256 * 2);
+    memset(bmap, 0, 256 * 2);
+
+    unsigned char r, g, b;
+    for (int c = 0; c < this->palette_count; c++)
+    {
+      imColorDecode(&r, &g, &b, this->palette[c]);
+      rmap[c] = (uint16)(((uint16)r) << 8);
+      gmap[c] = (uint16)(((uint16)g) << 8);
+      bmap[c] = (uint16)(((uint16)b) << 8);
+    }
+
+    TIFFSetField(this->tiff, TIFFTAG_COLORMAP, rmap, gmap, bmap);
+  }
+
+  // Force libTIFF to calculate best RowsPerStrip
+  uint32 RowsPerStrip = (uint32)-1; 
+  RowsPerStrip = TIFFDefaultStripSize(this->tiff, RowsPerStrip);
+  TIFFSetField(this->tiff, TIFFTAG_ROWSPERSTRIP, RowsPerStrip);
+
+  iTIFFWriteAttributes(this->tiff, attrib_table);
+
+  return IM_ERR_NONE;
+}
+
+static void iTIFFExpandComplexInt(void* line_buffer, int count, int cpx_int)
+{
+  count *= 2;
+
+  // conversion will be done in place
+
+  if (cpx_int == 1)
+  {
+    // convert short to float, expanding from 16 to 32 bits
+    short* short_buffer = (short*)line_buffer;
+    float* float_buffer = (float*)line_buffer;
+
+    float_buffer += count-1; // from end to start
+    short_buffer += count-1;
+
+    for (int i = 0; i < count; i++)
+      *float_buffer-- = (float)(*short_buffer--);
+  }
+  else
+  {
+    // convert int to float, same size not expanding    
+    int*   int_buffer   = (int*)line_buffer;
+    float* float_buffer = (float*)line_buffer;
+
+    for (int i = 0; i < count; i++)
+      *float_buffer++ = (float)(*int_buffer++);
+  }
+}
+
+static void iTIFFExtraSamplesFix(unsigned char* line_buffer, int width, int sample_size, int extra_sample_size, int plane)
+{
+  /* ignore all the other extra samples, here the samples are packed */
+  for (int i = 1; i < width; i++)
+  {
+    memcpy(line_buffer + i*sample_size, line_buffer + i*extra_sample_size + plane, sample_size);
+  }
+}
+
+/*
+For CIELab (PhotometricInterpretation = 8), the L* component is encoded in 8 bits as an unsigned integer
+range [0,255], and encoded in 16 bits as an unsigned integer range [0,65535]. The a* and b* components
+are encoded in 8 bits as signed integers range [-128,127], and encoded in 16 bits as signed integers range [-
+32768,32767]. The 8 bit chrominance values are exactly equal to the 1976 CIE a* and b* values, while the
+16 bit values are equal to 256 times the 1976 CIE a* and b* values.
+
+For ICCLab (PhotometricInterpretation = 9), the L* component is encoded in 8 bits as an unsigned integer
+range [0,255], and encoded in 16 bits as an unsigned integer range [0,65280]. The a* and b* components
+are encoded in 8 bits as unsigned integers range [0,255], and encoded in 16 bits as unsigned integers range
+[0,65535]. The 8 bit chrominance values are exactly equal to the 1976 CIE a* and b* values plus 128,
+while the 16 bit values are equal to 256 times the 1976 CIE a* and b* values plus 32768 (this is also 256
+times the 8 bit encoding). PhotometricInterpretation 9 is designed to match the encoding used by the ICC
+profile specification.
+*/
+
+static void iTIFFLabFix(void* line_buffer, int width, int data_type, int is_new)
+{
+  if (data_type == IM_BYTE)
+  {
+    imbyte* byte_buffer = (imbyte*)line_buffer;
+
+    int offset = 128;
+    if (is_new) offset = -128;
+
+    for (int i = 0; i < width; i++)
+    {
+      *(byte_buffer+1) = (imbyte)(*((char*)byte_buffer+1) + offset);
+      *(byte_buffer+2) = (imbyte)(*((char*)byte_buffer+2) + offset);
+
+      byte_buffer += 3;
+    }
+  }
+  else if (data_type == IM_USHORT)
+  {
+    imushort* ushort_buffer = (imushort*)line_buffer;
+
+    int offset = 32768;
+    if (is_new) offset = -32768;
+
+    for (int i = 0; i < width; i++)
+    {
+      *(ushort_buffer+1) = (imushort)(*((short*)ushort_buffer+1) + offset);
+      *(ushort_buffer+2) = (imushort)(*((short*)ushort_buffer+2) + offset);
+
+      ushort_buffer += 3;
+    }
+  }
+  // Do NOT know how it is encoded for other data types.
+}
+
+int imFormatTIFF::ReadTileline(void* line_buffer, int row, int plane)
+{
+  int t;
+
+  if (row == 0)
+    this->start_row = 0;
+
+  if (row == this->start_row + this->tile_width)
+    this->start_row = row;
+
+  // load a line of tiles
+  if (row == this->start_row)
+  {
+    int x = 0;
+    for (t = 0; t < this->tile_buf_count; t++)
+    {
+      if (TIFFReadTile(this->tiff, this->tile_buf[t], x, start_row, 0, (tsample_t)plane) <= 0)
+        return -1;
+
+      x += this->tile_width;
+    }
+  }
+
+  int line_size = this->tile_line_size;
+  int tile_line = row - this->start_row;
+
+  for (t = 0; t < this->tile_buf_count; t++)
+  {
+    if (t == this->tile_buf_count-1)
+    {
+      int extra = this->tile_line_size*this->tile_buf_count - this->tile_line_raw_size;
+      line_size -= extra;
+    }
+
+    memcpy(line_buffer, (imbyte*)(this->tile_buf[t]) + tile_line*tile_line_size, line_size);
+    line_buffer = (imbyte*)(line_buffer) + line_size;
+  }
+
+  return 1;
+}
+
+int imFormatTIFF::ReadImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+
+  imCounterTotal(this->counter, count, "Reading TIFF...");
+
+  int row = 0, plane = this->start_plane;
+  for (int i = 0; i < count; i++)
+  {
+    if (TIFFIsTiled(this->tiff))
+    {
+      if (ReadTileline(this->line_buffer, row, (tsample_t)plane) <= 0)
+        return IM_ERR_ACCESS;
+    }
+    else
+    {
+      if (TIFFReadScanline(this->tiff, this->line_buffer, row, (tsample_t)plane) <= 0)
+        return IM_ERR_ACCESS;
+    }
+
+    if (this->invert && this->file_data_type == IM_BYTE)
+    {
+      unsigned char* buf = (unsigned char*)this->line_buffer;
+      for (int b = 0; b < this->line_buffer_size; b++)
+      {
+        *buf = ~(*buf);
+        buf++;
+      }
+    }
+
+    if (this->cpx_int)
+    {
+      int line_count = imImageLineCount(this->width, this->user_color_mode);
+      iTIFFExpandComplexInt(this->line_buffer, line_count, this->cpx_int);
+    }
+
+    if (this->lab_fix)
+      iTIFFLabFix(this->line_buffer, this->width, this->file_data_type, 0);
+
+    if (this->extra_sample_size)
+      iTIFFExtraSamplesFix((imbyte*)this->line_buffer, this->width, this->sample_size, this->extra_sample_size, plane);
+
+    imFileLineBufferRead(this, data, row, plane);
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  return IM_ERR_NONE;
+}
+
+int imFormatTIFF::WriteImageData(void* data)
+{
+  int count = imFileLineBufferCount(this);
+
+  imCounterTotal(this->counter, count, "Writing TIFF...");
+
+  int row = 0, plane = 0;
+  for (int i = 0; i < count; i++)
+  {
+    imFileLineBufferWrite(this, data, row, plane);
+
+    if (this->lab_fix)
+      iTIFFLabFix(this->line_buffer, this->width, this->file_data_type, 1);
+
+    if (TIFFWriteScanline(this->tiff, this->line_buffer, row, (tsample_t)plane) <= 0)
+      return IM_ERR_ACCESS;
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+
+    imFileLineBufferInc(this, &row, &plane);
+  }
+
+  this->image_count++;
+
+  if (!TIFFWriteDirectory(this->tiff))
+    return IM_ERR_ACCESS;
+
+   return IM_ERR_NONE;
+}
+
+int imFormatTIFF::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  if (!compression)
+    return IM_ERR_NONE;
+
+  if (imColorModeSpace(color_mode) == IM_LUV)
+    return IM_ERR_DATA;
+
+  uint16 Compression = iTIFFCompCalc(compression, color_mode, data_type);
+  if (Compression == (uint16)-1)
+    return IM_ERR_COMPRESS;
+
+  /* no support for 2 bpp or 4 bpp */
+  if (Compression == COMPRESSION_THUNDERSCAN || Compression == COMPRESSION_NEXT)
+    return IM_ERR_COMPRESS;
+
+  /* Binary compression restrictions */
+  if ((Compression == COMPRESSION_CCITTRLE || Compression == COMPRESSION_CCITTRLEW ||
+       Compression == COMPRESSION_CCITTFAX3  || Compression == COMPRESSION_CCITTFAX4) &&
+      imColorModeSpace(color_mode) != IM_BINARY)
+    return IM_ERR_COMPRESS;
+
+  /* JPEG compression restrictions */
+  if (Compression == COMPRESSION_JPEG && 
+      (data_type != IM_BYTE || 
+       imColorModeSpace(color_mode) == IM_MAP || imColorModeSpace(color_mode) == IM_BINARY))
+    return IM_ERR_COMPRESS;
+
+  /* Pixar log accepts only 3 types */
+  if (Compression == COMPRESSION_PIXARLOG && 
+      data_type != IM_BYTE && data_type != IM_USHORT  && data_type != IM_FLOAT)
+    return IM_ERR_COMPRESS;
+
+  /* SGI Luv compression restrictions */
+  if ((Compression == COMPRESSION_SGILOG || Compression == COMPRESSION_SGILOG24) &&
+      (imColorModeSpace(color_mode) != IM_XYZ || data_type != IM_FLOAT))
+    return IM_ERR_COMPRESS;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_format_wmv.cpp b/src/im_format_wmv.cpp
new file mode 100644
index 0000000..3b411c3
--- /dev/null
+++ b/src/im_format_wmv.cpp
@@ -0,0 +1,1619 @@
+/** \file
+ * \brief WMV - Windows Media Video Format
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_format_wmv.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im_format.h"
+#include "im_util.h"
+#include "im_format_wmv.h"
+#include "im_counter.h"
+
+#include <wmsdk.h>
+
+//#include <Dvdmedia.h>
+#define AMINTERLACE_1FieldPerSample  0x00000002
+#define AMINTERLACE_Field1First      0x00000004
+
+#include "im_dib.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <memory.h>
+
+
+#define SAFE_RELEASE( x )   \
+    if ( x )                \
+    {                       \
+        x->Release();       \
+        x = NULL;           \
+    }
+
+#define SAFE_ARRAYDELETE( x )   \
+    if ( x )                    \
+    {                           \
+        delete[] x;             \
+        x = NULL;               \
+    }
+
+static HRESULT iConfigCompressedStream( IWMStreamConfig * pStreamConfig,
+                                        IWMProfile * pIWMProfile,
+                                        BOOL fIsVBR, DWORD dwBitrate, DWORD dwQuality, DWORD dwSecPerKey,
+                                        WM_MEDIA_TYPE * pmt )
+{
+  WORD wFALSE = 0;
+  HRESULT hr = S_OK;
+
+  do
+  {
+    // This is used just to get the stream number, it will be released and
+    // NOT added to the profile
+    IWMStreamConfig * pStreamConfig2 = NULL;
+    hr = pIWMProfile->CreateNewStream( WMMEDIATYPE_Video, &pStreamConfig2 );
+    if (FAILED(hr))
+      break;
+
+    WORD wStreamNum = 0;
+    hr = pStreamConfig2->GetStreamNumber( &wStreamNum );
+
+    SAFE_RELEASE( pStreamConfig2 );
+
+    if (FAILED(hr))
+      break;
+
+    // Configure the stream
+
+    hr = pStreamConfig->SetStreamNumber( wStreamNum );
+    if (FAILED(hr))
+      break;
+
+    hr = pStreamConfig->SetStreamName( L"Video Stream" );
+    if (FAILED(hr))
+      break;
+
+    // Each stream in the profile has to have a unique connection name.
+    // Let's use the stream number to create it.
+
+    WCHAR pwszConnectionName[10];
+    swprintf( pwszConnectionName, L"Video%d", (DWORD)wStreamNum );
+
+    hr = pStreamConfig->SetConnectionName( pwszConnectionName );
+    if (FAILED(hr))
+      break;
+
+    hr = pStreamConfig->SetBitrate( dwBitrate );
+    if (FAILED(hr))
+      break;
+
+    hr = pStreamConfig->SetBufferWindow( (DWORD)-1 );
+    if (FAILED(hr))
+      break;
+
+    IWMVideoMediaProps * pIWMMediaProps = NULL;
+    hr = pStreamConfig->QueryInterface( IID_IWMVideoMediaProps, (void **) &pIWMMediaProps );
+    if (FAILED(hr))
+      break;
+
+    hr = pIWMMediaProps->SetQuality( dwQuality );
+    hr = pIWMMediaProps->SetMaxKeyFrameSpacing( 10000 * (QWORD)dwSecPerKey );
+
+    hr = pIWMMediaProps->SetMediaType( pmt );
+
+    SAFE_RELEASE( pIWMMediaProps );
+
+    if (FAILED(hr))
+      break;
+
+    IWMPropertyVault* pPropertyVault = NULL;
+    hr = pStreamConfig->QueryInterface( IID_IWMPropertyVault, (void**)&pPropertyVault ); 
+    if (FAILED(hr))
+      break;
+
+    hr = pPropertyVault->SetProperty( g_wszVBREnabled, WMT_TYPE_BOOL, (BYTE*)&fIsVBR, sizeof( BOOL ) );
+    if ( SUCCEEDED( hr ) && fIsVBR)
+      pPropertyVault->SetProperty( g_wszVBRQuality, WMT_TYPE_DWORD, (BYTE*)&dwQuality, sizeof( DWORD ) );
+
+    SAFE_RELEASE( pPropertyVault );
+
+    hr = S_OK;
+
+  } while( wFALSE );
+
+  return( hr );
+}
+
+static HRESULT iCreateCompressedStream(IWMProfileManager * pManager,
+                                       IWMStreamConfig* *pNewStreamConfig,
+                                       WM_MEDIA_TYPE* *pNewMediaType,
+                                       WORD biBitCount, GUID subtype)
+{
+  IWMCodecInfo  * pCodecInfo = NULL;
+  IWMMediaProps  * pMediaProps = NULL;
+
+  IWMStreamConfig* pStreamConfig = NULL;
+  WM_MEDIA_TYPE* pMediaType = NULL;
+
+  HRESULT hr = S_OK;
+  WORD wFALSE = 0;
+
+  do
+  {
+    hr = pManager->QueryInterface(IID_IWMCodecInfo, (void **) &pCodecInfo);
+    if (FAILED(hr))
+      break;
+
+    DWORD cCodecs;
+    hr = pCodecInfo->GetCodecInfoCount( WMMEDIATYPE_Video, &cCodecs );
+    if (FAILED(hr))
+      break;
+
+    for( int i = cCodecs-1; i >= 0; i-- )
+    {
+      DWORD cFormats;
+      hr = pCodecInfo->GetCodecFormatCount( WMMEDIATYPE_Video, i, &cFormats );
+      if (FAILED(hr))
+        break;
+
+      for(DWORD j = 0; j < cFormats; j++ )
+      {
+        SAFE_RELEASE( pStreamConfig );
+
+        hr = pCodecInfo->GetCodecFormat( WMMEDIATYPE_Video, i, j, &pStreamConfig );
+        if (FAILED(hr))
+          break;
+
+        SAFE_RELEASE( pMediaProps );
+
+        hr = pStreamConfig->QueryInterface( IID_IWMMediaProps, (void **) &pMediaProps );
+        if (FAILED(hr))
+          break;
+
+        DWORD cbMT;
+        hr = pMediaProps->GetMediaType( NULL, &cbMT );
+        if (FAILED(hr))
+          break;
+
+        SAFE_ARRAYDELETE( pMediaType );
+
+        pMediaType = (WM_MEDIA_TYPE *) new BYTE[ cbMT ];
+        if( !pMediaType )
+        {
+          hr = E_OUTOFMEMORY;
+          break;
+        }
+
+        hr = pMediaProps->GetMediaType( pMediaType, &cbMT );
+        if (FAILED(hr))
+          break;
+
+        if( pMediaType->formattype != WMFORMAT_VideoInfo ||
+            pMediaType->subtype != subtype)  // This is our main target
+        {
+          SAFE_RELEASE( pStreamConfig );
+          continue;
+        }
+
+        WMVIDEOINFOHEADER* pVIH = (WMVIDEOINFOHEADER*) pMediaType->pbFormat;
+
+        if( pVIH->bmiHeader.biBitCount >= biBitCount )
+          break; // SUCCESS !!!!!
+
+        SAFE_RELEASE( pStreamConfig );
+      }
+
+      if( FAILED( hr ) || NULL != pStreamConfig )
+        break;
+    }
+
+    if (FAILED(hr))
+      break;
+
+    if( NULL == pStreamConfig )
+    {
+      hr = NS_E_VIDEO_CODEC_NOT_INSTALLED;
+      break;
+    }
+
+  } while( wFALSE );
+
+  SAFE_RELEASE( pCodecInfo );
+  SAFE_RELEASE( pMediaProps );
+
+  *pNewStreamConfig = pStreamConfig;
+  *pNewMediaType = pMediaType;
+
+  return( hr );
+}
+
+static HRESULT iAddCompressedVideoStream( IWMProfileManager * pManager, IWMProfile * pIWMProfile,
+                                          GUID subtype, BITMAPINFOHEADER * bmiHeader, float fps,
+                                          BOOL fIsVBR, DWORD dwBitRate, DWORD dwQuality, DWORD dwSecPerKey)
+{
+  HRESULT hr = S_OK;
+  WORD wFALSE = 0;
+
+  IWMStreamConfig* pStreamConfig = NULL;
+  WM_MEDIA_TYPE* pMediaType = NULL;
+
+  do
+  {
+    hr = iCreateCompressedStream(pManager, &pStreamConfig, &pMediaType,
+                                 bmiHeader->biBitCount, subtype);
+    if (FAILED(hr))
+      break;
+
+    WMVIDEOINFOHEADER * pVIH = (WMVIDEOINFOHEADER *) pMediaType->pbFormat;
+
+    pVIH->dwBitRate = dwBitRate;
+
+    // Video content does not play correctly unless it is encoded 
+    // to a size that is a multiple of four for both width and height. 
+    pVIH->bmiHeader.biWidth = ((bmiHeader->biWidth + 3) / 4) * 4;
+    pVIH->bmiHeader.biHeight = ((bmiHeader->biHeight + 3) / 4) * 4;
+
+    pVIH->rcSource.left = 0;
+    pVIH->rcSource.top = 0;
+    pVIH->rcSource.bottom = pVIH->bmiHeader.biHeight;
+    pVIH->rcSource.right = pVIH->bmiHeader.biWidth;
+    pVIH->rcTarget = pVIH->rcSource;
+    pVIH->dwBitErrorRate = 0;
+    pVIH->AvgTimePerFrame = (LONGLONG)(10000000.0f / fps);
+
+    hr = iConfigCompressedStream( pStreamConfig, pIWMProfile, 
+                                  fIsVBR, dwBitRate, dwQuality, 
+                                  dwSecPerKey, pMediaType );
+    if (FAILED(hr))
+      break;
+
+    hr = pIWMProfile->AddStream( pStreamConfig );
+    if (FAILED(hr))
+      break;
+  }
+  while( wFALSE );
+
+  SAFE_RELEASE( pStreamConfig );
+  SAFE_ARRAYDELETE( pMediaType );
+
+  return( hr );
+}
+
+static HRESULT iConfigUncompressedStream( IWMStreamConfig * pStreamConfig,
+                                          DWORD dwBitrate,
+                                          WM_MEDIA_TYPE * pmt )
+{
+  WORD wFALSE = 0;
+  HRESULT hr = S_OK;
+
+  do
+  {
+    // Configure the stream
+
+    hr = pStreamConfig->SetStreamName( L"Video Stream" );
+    if (FAILED(hr))
+      break;
+
+    // Each stream in the profile has to have a unique connection name.
+    // Let's use the stream number to create it.
+
+    WORD wStreamNum = 0;
+    hr = pStreamConfig->GetStreamNumber( &wStreamNum );
+    if (FAILED(hr))
+      break;
+
+    WCHAR pwszConnectionName[10];
+    swprintf( pwszConnectionName, L"Video%d", (DWORD)wStreamNum );
+
+    hr = pStreamConfig->SetConnectionName( pwszConnectionName );
+    if (FAILED(hr))
+      break;
+
+    hr = pStreamConfig->SetBitrate( dwBitrate );
+    if (FAILED(hr))
+      break;
+
+    hr = pStreamConfig->SetBufferWindow( 0 );
+    if (FAILED(hr))
+      break;
+
+    IWMMediaProps * pIWMMediaProps = NULL;
+    hr = pStreamConfig->QueryInterface( IID_IWMMediaProps, (void **) &pIWMMediaProps );
+    if (FAILED(hr))
+      break;
+
+    hr = pIWMMediaProps->SetMediaType( pmt );
+
+    SAFE_RELEASE( pIWMMediaProps );
+
+    if (FAILED(hr))
+      break;
+
+    IWMPropertyVault* pPropertyVault = NULL;
+    hr = pStreamConfig->QueryInterface( IID_IWMPropertyVault, (void**)&pPropertyVault ); 
+    if (FAILED(hr))
+      break;
+
+    BOOL fFalse = FALSE;
+    hr = pPropertyVault->SetProperty( g_wszVBREnabled, WMT_TYPE_BOOL, (BYTE*)&fFalse, sizeof( BOOL ) );
+
+    SAFE_RELEASE( pPropertyVault );
+
+    hr = S_OK;
+
+  } while( wFALSE );
+
+  return( hr );
+}
+
+static HRESULT iAddUncompressedVideoStream( IWMProfile * pProfile, 
+                                            BITMAPINFOHEADER * bmiHeader, 
+                                            int BitmapInfoSize, int BitmapDataSize,
+                                            float fps)
+{
+  HRESULT hr = S_OK;
+  WORD wFALSE = 0;
+
+  IWMStreamConfig* pStreamConfig = NULL;
+  WM_MEDIA_TYPE* pMediaType = NULL;
+
+  do
+  {
+    hr = pProfile->CreateNewStream( WMMEDIATYPE_Video, &pStreamConfig );
+    if ( FAILED( hr ) )
+      break;
+
+    DWORD cbVideoInfo = sizeof(WMVIDEOINFOHEADER) - sizeof(BITMAPINFOHEADER) + BitmapInfoSize;
+
+    // Create a new Media Type
+    pMediaType = (WM_MEDIA_TYPE*) new BYTE[ sizeof( WM_MEDIA_TYPE ) + cbVideoInfo ];
+    if ( !pMediaType)
+    {
+      hr = E_OUTOFMEMORY;
+      break;
+    }
+
+    switch (bmiHeader->biBitCount)
+    {
+    case 32:
+        pMediaType->subtype = WMMEDIASUBTYPE_RGB32;
+        break;
+    case 24:
+        pMediaType->subtype = WMMEDIASUBTYPE_RGB24;
+        break;
+    case 8:
+        pMediaType->subtype = WMMEDIASUBTYPE_RGB8;
+        break;
+    }
+
+    pMediaType->majortype = WMMEDIATYPE_Video;
+    pMediaType->bFixedSizeSamples = TRUE;
+    pMediaType->bTemporalCompression = FALSE;
+    pMediaType->lSampleSize = BitmapDataSize;
+    pMediaType->formattype = WMFORMAT_VideoInfo;
+    pMediaType->pUnk = NULL;
+    pMediaType->cbFormat = cbVideoInfo;
+    pMediaType->pbFormat = ( ((BYTE*) pMediaType) + sizeof( WM_MEDIA_TYPE ) ); // Format data is immediately after media type
+
+    WMVIDEOINFOHEADER * pVIH = (WMVIDEOINFOHEADER *) pMediaType->pbFormat;
+
+    pVIH->rcSource.left = 0;
+    pVIH->rcSource.top = 0;
+    pVIH->rcSource.bottom = bmiHeader->biHeight;
+    pVIH->rcSource.right = bmiHeader->biWidth;
+    pVIH->rcTarget = pVIH->rcSource;
+    pVIH->dwBitRate = (DWORD)(BitmapDataSize * fps);
+    pVIH->dwBitErrorRate = 0;
+    pVIH->AvgTimePerFrame = (LONGLONG)(10000000.0f / fps);
+
+    CopyMemory(&pVIH->bmiHeader, bmiHeader, BitmapInfoSize);
+
+    hr = iConfigUncompressedStream( pStreamConfig, pVIH->dwBitRate, pMediaType );
+    if (FAILED(hr))
+      break;
+
+    hr = pProfile->AddStream( pStreamConfig );
+    if (FAILED(hr))
+      break;
+  }
+  while( wFALSE );
+
+  SAFE_RELEASE( pStreamConfig );
+  SAFE_ARRAYDELETE( pMediaType );
+
+  return( hr );
+}
+
+#define WMV_COMPRESS_COUNT 7
+#define WMV_UNCOMPRESS_COUNT 9
+
+static GUID iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+WMV_UNCOMPRESS_COUNT];
+
+static void iInitGuid()
+{
+  iWMVCompSubtypeTable[0] = WMMEDIASUBTYPE_MP43; 
+  iWMVCompSubtypeTable[1] = WMMEDIASUBTYPE_MP4S; 
+  iWMVCompSubtypeTable[2] = WMMEDIASUBTYPE_WMV1; 
+  iWMVCompSubtypeTable[3] = WMMEDIASUBTYPE_MSS1; 
+  iWMVCompSubtypeTable[4] = WMMEDIASUBTYPE_WMV2; 
+  iWMVCompSubtypeTable[5] = WMMEDIASUBTYPE_MSS2; 
+  iWMVCompSubtypeTable[6] = WMMEDIASUBTYPE_WMV3;
+
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+0] = WMMEDIASUBTYPE_RGB555; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+1] = WMMEDIASUBTYPE_RGB24; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+2] = WMMEDIASUBTYPE_RGB32; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+3] = WMMEDIASUBTYPE_I420; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+4] = WMMEDIASUBTYPE_IYUV; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+5] = WMMEDIASUBTYPE_YV12; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+6] = WMMEDIASUBTYPE_YUY2; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+7] = WMMEDIASUBTYPE_UYVY; 
+  iWMVCompSubtypeTable[WMV_COMPRESS_COUNT+8] = WMMEDIASUBTYPE_YVYU; 
+}
+
+static const char* iWMVCompTable[WMV_COMPRESS_COUNT+1] = 
+{
+  "NONE",
+  "MPEG-4v3",   
+  "MPEG-4v1",   
+  "WMV7",       
+  "WMV7Screen",  
+  "WMV8",       
+  "WMV9Screen", 
+  "WMV9"
+};
+
+static const char* iWMFCompFindName(GUID SubType)
+{
+  int i;
+  for(i = 0; i < WMV_COMPRESS_COUNT; i++)
+  {
+    if (SubType == iWMVCompSubtypeTable[i])
+      return iWMVCompTable[i+1];
+  }
+
+  for(; i < WMV_COMPRESS_COUNT+WMV_UNCOMPRESS_COUNT; i++)
+  {
+    if (SubType == iWMVCompSubtypeTable[i])
+      return iWMVCompTable[0];
+  }
+
+  return "Unknown";
+}
+
+static GUID iWMFCompFindSubType(const char* compression)
+{
+  if (compression[0] == 0)
+    return WMMEDIASUBTYPE_WMV3;
+
+  for(int i = 0; i < WMV_COMPRESS_COUNT; i++)
+  {
+    if (imStrEqual(compression, iWMVCompTable[i+1]))
+      return iWMVCompSubtypeTable[i];
+  }
+
+  return WMMEDIASUBTYPE_Base;
+}
+
+class imFormatWMV: public imFormat
+{
+  IWMSyncReader* Reader;        // When reading
+  WM_MEDIA_TYPE* MediaType;
+  WORD stream_number;
+  DWORD seekable;
+  int current_frame;
+
+  IWMWriter* Writer;            // When writing
+  DWORD input_number;
+  DWORD BitmapDataSize;
+  DWORD BitmapInfoSize;
+
+  float fps;
+  WCHAR wfile_name[4096];
+  IWMHeaderInfo* HeaderInfo;
+  BITMAPINFOHEADER* bmiHeader;
+  unsigned int rmask, gmask, bmask, 
+                roff, goff, boff; /* pixel bit mask control when reading 16 and 32 bpp images */
+
+  void ReadPalette(unsigned char* bmp_colors);
+  void WritePalette(unsigned char* bmp_colors);
+  void FixRGB(int bpp);
+  void InitMasks(imDib* dib);
+  void iReadAttrib(imAttribTable* attrib_table);
+  void iWriteAttrib(imAttribTable* attrib_table);
+  void CalcFPS();
+  void SetOutputProps();
+  int SetInputProps();
+  int SetProfile();
+
+public:
+  imFormatWMV()
+    :imFormat("WMV", 
+              "Windows Media Video Format", 
+              "*.wmv;*.asf;", 
+              iWMVCompTable, 
+              WMV_COMPRESS_COUNT+1, 
+              1)
+    {}
+  ~imFormatWMV() {}
+
+  int Open(const char* file_name);
+  int New(const char* file_name);
+  void Close();
+  void* Handle(int index);
+  int ReadImageInfo(int index);
+  int ReadImageData(void* data);
+  int WriteImageInfo();
+  int WriteImageData(void* data);
+  int CanWrite(const char* compression, int color_mode, int data_type) const;
+};
+
+void imFormatRegisterWMV(void)
+{
+  imFormatRegister(new imFormatWMV());
+}
+
+int imFormatWMV::Open(const char* file_name)
+{
+  /* initializes COM */
+  CoInitialize(NULL);
+  iInitGuid();
+
+  HRESULT hr = WMCreateSyncReader(NULL, 0, &Reader);
+  if (hr != 0)
+  {
+    CoUninitialize();
+    return IM_ERR_MEM;
+  }
+
+  /* open existing file */
+  MultiByteToWideChar(CP_ACP, 0, file_name, -1, wfile_name, 4096);
+  hr = Reader->Open(wfile_name);
+  if (hr != 0)
+  {
+    Reader->Release();
+    CoUninitialize();
+            
+    if (hr == NS_E_FILE_OPEN_FAILED || 
+        hr == NS_E_FILE_NOT_FOUND || 
+        hr == NS_E_INVALID_DATA)
+      return IM_ERR_OPEN;
+    else if (hr == NS_E_UNRECOGNIZED_STREAM_TYPE)
+      return IM_ERR_FORMAT;
+    else
+      return IM_ERR_ACCESS;
+  }
+
+  IWMProfile* pProfile = NULL;
+  Reader->QueryInterface(IID_IWMProfile, (VOID**)&pProfile);
+
+  DWORD stream_count;
+  pProfile->GetStreamCount(&stream_count);
+
+  this->stream_number = (WORD)-1;
+  for (int i = 0; i < (int)stream_count; i++)
+  {
+    IWMStreamConfig* StreamConfig;
+    pProfile->GetStream(i, &StreamConfig);
+
+    GUID StreamType;
+    StreamConfig->GetStreamType(&StreamType);
+
+    if (StreamType == WMMEDIATYPE_Video ||
+        StreamType == WMMEDIATYPE_Image)
+    {
+      hr = StreamConfig->GetStreamNumber(&this->stream_number);
+
+      IWMMediaProps* Props;
+      StreamConfig->QueryInterface(IID_IWMMediaProps, (VOID**)&Props);
+
+      DWORD pcbType;
+      Props->GetMediaType(NULL, &pcbType);
+      MediaType = (WM_MEDIA_TYPE*)malloc(pcbType);
+      Props->GetMediaType(MediaType, &pcbType);
+
+      Props->Release();
+
+      const char* comp_name = iWMFCompFindName(MediaType->subtype);
+      strcpy(this->compression, comp_name);
+      break;
+    }
+
+    StreamConfig->Release();
+  }
+
+  if (this->stream_number == (WORD)-1)
+  {
+    pProfile->Release();
+    Reader->Close();
+    Reader->Release();
+    CoUninitialize();
+    return IM_ERR_DATA;
+  }
+
+  hr = Reader->QueryInterface(IID_IWMHeaderInfo, (VOID**)&HeaderInfo);
+
+  CalcFPS();
+
+  WMT_ATTR_DATATYPE attrib_type;
+  WORD attrib_length;
+  WORD StreamNumber = 0;
+
+  seekable = 0;
+  attrib_length = 4;
+  attrib_type = WMT_TYPE_BOOL;
+  hr = HeaderInfo->GetAttributeByName(&StreamNumber, g_wszWMSeekable, 
+                                      &attrib_type, (BYTE*)&seekable, &attrib_length);
+
+  QWORD num_frame = 0;
+  attrib_length = 8;
+  attrib_type = WMT_TYPE_QWORD;
+  hr = HeaderInfo->GetAttributeByName(&stream_number, g_wszWMNumberOfFrames, 
+                                      &attrib_type, (BYTE*)&num_frame, &attrib_length);
+
+  if (num_frame == 0)
+  {
+    QWORD duration = 0;
+    attrib_length = 8;
+    attrib_type = WMT_TYPE_QWORD;
+    hr = HeaderInfo->GetAttributeByName(&StreamNumber, g_wszWMDuration, 
+                                        &attrib_type, (BYTE*)&duration, &attrib_length);
+
+    num_frame = (int)(((double)(unsigned int)duration * (double)fps) / 10000000.0);
+  }
+
+  this->image_count = (int)num_frame;
+
+  SetOutputProps();
+
+  WMT_STREAM_SELECTION wmtSS = WMT_ON;
+  hr = Reader->SetStreamsSelected(1, &stream_number, &wmtSS);
+  hr = Reader->SetReadStreamSamples(stream_number, FALSE);
+
+  this->bmiHeader = NULL;
+  this->current_frame = 0;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatWMV::New(const char* file_name)
+{
+  /* initializes COM */
+  CoInitialize(NULL);
+  iInitGuid();
+
+  HRESULT hr = WMCreateWriter(NULL, &Writer); 
+  if (hr != 0)
+  {
+    CoUninitialize();
+    return IM_ERR_MEM;
+  }
+
+  MultiByteToWideChar(CP_ACP, 0, file_name, -1, wfile_name, 4096);
+
+  Writer->QueryInterface(IID_IWMHeaderInfo, (VOID**)&HeaderInfo);
+
+  this->bmiHeader = NULL;
+  this->current_frame = 0;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatWMV::Close()
+{
+  HeaderInfo->Release();
+
+  if (this->is_new)
+  {
+    free(this->bmiHeader);
+
+    Writer->EndWriting();
+    Writer->Release();
+  }
+  else
+  {
+    free(MediaType);
+
+    Reader->Close();
+    Reader->Release();
+  }
+
+  CoUninitialize();
+}
+
+void* imFormatWMV::Handle(int index)
+{
+  if (index == 1)
+  {
+    if (this->is_new)
+      return (void*)this->Writer;
+    else
+      return (void*)this->Reader;
+  }
+  else
+    return NULL;
+}
+
+void imFormatWMV::iReadAttrib(imAttribTable* attrib_table)
+{
+  WORD StreamNumber = 0;
+  WORD attrib_list_count = 0;
+  HeaderInfo->GetAttributeCount(StreamNumber, &attrib_list_count);
+
+  WCHAR* attrib_name = NULL;
+  int name_max_size = 0;
+  char* name = NULL;
+  WORD attrib_name_count;
+  WMT_ATTR_DATATYPE attrib_type;
+  BYTE* attrib_data = NULL;
+  WORD attrib_length;
+  int data_max_size = 0;
+  HRESULT hr;
+  int data_type, data_count;
+
+  for (WORD i = 0; i < attrib_list_count; i++)
+  {
+    attrib_name_count = 0;
+    attrib_length = 0;
+
+    hr = HeaderInfo->GetAttributeByIndex(i, &StreamNumber, NULL, &attrib_name_count, 
+                                            &attrib_type, NULL, &attrib_length);
+
+    if (FAILED(hr))
+      continue;
+
+    if (attrib_length == 0)
+      continue;
+
+    if (name_max_size < attrib_name_count)
+    {
+      attrib_name = (WCHAR*)realloc(attrib_name, attrib_name_count*2);
+      name = (char*)realloc(name, attrib_name_count);
+      name_max_size = attrib_name_count;
+    }
+
+    if (data_max_size < attrib_length)
+    {
+      attrib_data = (BYTE*)realloc(attrib_data, attrib_length);
+      data_max_size = attrib_length;
+    }
+
+    HeaderInfo->GetAttributeByIndex(i, &StreamNumber, attrib_name, &attrib_name_count, 
+                                       &attrib_type, attrib_data, &attrib_length);
+
+    WideCharToMultiByte(CP_ACP, 0, attrib_name, attrib_name_count, name, attrib_name_count, NULL, NULL);
+
+    switch (attrib_type)
+    {
+    case WMT_TYPE_BOOL:
+      {
+        DWORD* ddata = (DWORD*)attrib_data;
+        if (*ddata == 0)
+          continue;
+      }
+    case WMT_TYPE_DWORD:
+      data_type = IM_INT;
+      data_count = attrib_length/4;
+      break;
+    case WMT_TYPE_STRING:
+      data_type = IM_BYTE;
+      data_count = attrib_length/2;
+      {
+        WCHAR* wdata = (WCHAR*)attrib_data;
+        CHAR* sdata = (CHAR*)attrib_data;
+        for (int j = 0; j < data_count; j++)
+        {
+          CHAR cvalue;
+          WideCharToMultiByte(CP_ACP, 0, &wdata[j], 1, &cvalue, 1, NULL, NULL);
+          sdata[j] = cvalue;
+        }
+      }
+      break;
+    case WMT_TYPE_BINARY:
+      data_type = IM_BYTE;
+      data_count = attrib_length;
+      break;
+    case WMT_TYPE_QWORD:
+      {
+        data_type = IM_INT;
+        data_count = attrib_length/8;
+        // convert to int in-place
+        QWORD* qdata = (QWORD*)attrib_data;
+        DWORD* ddata = (DWORD*)attrib_data;
+        for (int j = 0; j < data_count; j++)
+        {
+          ddata[j] = (DWORD)qdata[j];
+        }
+      }
+      break;
+    case WMT_TYPE_WORD:
+      data_type = IM_USHORT;
+      data_count = attrib_length/2;
+      break;
+    default:
+      continue;
+    }
+
+    attrib_table->Set(name, data_type, data_count, attrib_data);
+  }
+
+  if (name) free(name);
+  if (attrib_name) free(attrib_name);
+  if (attrib_data) free(attrib_data);
+}
+
+static int iAttribSet(void* user_data, int index, const char* name, int data_type, int data_count, const void* data)
+{
+  (void)index;
+  WORD StreamNumber = 0;
+  IWMHeaderInfo* HeaderInfo = (IWMHeaderInfo*)user_data;
+
+  WCHAR wName[50];
+  WMT_ATTR_DATATYPE Type;
+  BYTE* Value = NULL;
+  WORD ValueSize = 0;
+
+  MultiByteToWideChar(CP_ACP, 0, name, -1, wName, 50);
+
+  switch(data_type)
+  {
+  case IM_BYTE:
+    if (imStrCheck(data, data_count))
+      Type = WMT_TYPE_STRING;
+    else
+      Type = WMT_TYPE_BINARY;
+    break;
+  case IM_USHORT:
+    Type = WMT_TYPE_WORD;
+    break;
+  case IM_INT:
+    Type = WMT_TYPE_DWORD;
+    break;
+  default:
+    return 1;
+  }
+
+  switch (Type)
+  {
+  case WMT_TYPE_BOOL:
+  case WMT_TYPE_DWORD:
+    ValueSize = (WORD)(data_count*4);
+    break;
+  case WMT_TYPE_STRING:
+    ValueSize = (WORD)(data_count*2);
+    Value = (BYTE*)malloc(ValueSize);
+    MultiByteToWideChar(CP_ACP, 0, (char*)data, data_count, (WCHAR*)Value, data_count);
+    break;
+  case WMT_TYPE_BINARY:
+    ValueSize = (WORD)data_count;
+    break;
+  case WMT_TYPE_QWORD:
+    {
+      ValueSize = (WORD)(data_count*8);
+      Value = (BYTE*)malloc(ValueSize);
+
+      QWORD* qdata = (QWORD*)Value;
+      int* idata = (int*)data;
+      for (int j = 0; j < data_count; j++)
+      {
+        qdata[j] = (QWORD)idata[j];
+      }
+    }
+    break;
+  case WMT_TYPE_WORD:
+    ValueSize = (WORD)(data_count*2);
+    break;
+  }
+
+  if (Value)
+  {
+    HeaderInfo->SetAttribute(StreamNumber, wName, Type, 
+                                           Value, ValueSize);
+    free(Value);
+  }
+  else
+    HeaderInfo->SetAttribute(StreamNumber, wName, Type, 
+                                           (BYTE*)data, ValueSize);
+  return 1;
+}
+
+void imFormatWMV::iWriteAttrib(imAttribTable* attrib_table)
+{
+  attrib_table->ForEach((void*)HeaderInfo, iAttribSet);
+}
+
+void imFormatWMV::CalcFPS()
+{
+ 	LONGLONG AvgTimePerFrame = 0;
+
+  if (MediaType->formattype == WMFORMAT_VideoInfo)
+  {
+    WMVIDEOINFOHEADER* info_header = (WMVIDEOINFOHEADER*)MediaType->pbFormat;
+    bmiHeader = &info_header->bmiHeader;
+    AvgTimePerFrame = info_header->AvgTimePerFrame;
+  }
+  else if (MediaType->formattype == WMFORMAT_MPEG2Video)
+  {
+    WMVIDEOINFOHEADER2* info_header = (WMVIDEOINFOHEADER2*)MediaType->pbFormat;
+    bmiHeader = &info_header->bmiHeader;
+    AvgTimePerFrame = info_header->AvgTimePerFrame;
+  }
+
+  WMT_ATTR_DATATYPE attrib_type;
+  WORD attrib_length;
+
+  DWORD frame_rate = 0;
+  attrib_length = 4;
+  HeaderInfo->GetAttributeByName(&stream_number, g_wszWMVideoFrameRate,   // V9 Only
+                                 &attrib_type, (BYTE*)&frame_rate, &attrib_length);
+
+  fps = (float)frame_rate;
+  if (frame_rate == 0)
+  {
+    if (AvgTimePerFrame == 0)
+    {
+      fps = 15;   // default value
+    }
+    else
+    {
+      fps = 10000000.0f / (float)AvgTimePerFrame;
+
+      int ifps = (int)(fps * 100);
+      if (ifps == 2997 || ifps == 2996 || ifps == 2998)
+        fps = (30.0f * 1000.0f) / 1001.0f;
+      else if (ifps == 2397 || ifps == 2396 || ifps == 2398)
+        fps = (24.0f * 1000.0f) / 1001.0f;
+      else if (ifps == 2400)
+        fps = 24.0f;
+      else if (ifps == 3000)
+        fps = 30.0f;
+    }
+  }
+}
+
+void imFormatWMV::SetOutputProps()
+{
+  DWORD output_number;
+  Reader->GetOutputNumberForStream(stream_number, &output_number);
+
+  DWORD format_count;
+  Reader->GetOutputFormatCount(output_number, &format_count);
+
+  for(DWORD f = 0; f < format_count; f++)
+  {
+    IWMOutputMediaProps* Props;
+    Reader->GetOutputFormat(output_number, f, &Props); 
+
+    DWORD pcbType;
+    Props->GetMediaType(NULL, &pcbType);
+    WM_MEDIA_TYPE* mt = (WM_MEDIA_TYPE*)malloc(pcbType);
+    Props->GetMediaType(mt, &pcbType);
+
+    if (mt->subtype == WMMEDIASUBTYPE_RGB24 ||
+        mt->subtype == WMMEDIASUBTYPE_RGB8)
+    {
+      Reader->SetOutputProps(output_number, Props);
+      Props->Release();
+      free(mt);
+      return;
+    }
+
+    Props->Release();
+    free(mt);
+  }
+}
+
+int imFormatWMV::SetInputProps()
+{
+  DWORD input_count;
+  Writer->GetInputCount(&input_count);
+
+  GUID guidInputType;
+  IWMInputMediaProps* Props = NULL;
+
+  input_number = (DWORD)-1;
+  for(DWORD i = 0; i < input_count; i++)
+  {
+    Writer->GetInputProps(i, &Props);
+
+    Props->GetType(&guidInputType);
+
+    if(guidInputType == WMMEDIATYPE_Video)
+    {
+      input_number = i;
+      break;
+    }
+
+    Props->Release();
+  }
+
+  if (input_number == (DWORD)-1)
+    return 0;
+
+  DWORD cbVideoInfo = sizeof(WMVIDEOINFOHEADER) - sizeof(BITMAPINFOHEADER) + this->BitmapInfoSize;
+  WMVIDEOINFOHEADER* pVideoInfo = (WMVIDEOINFOHEADER*)new BYTE[cbVideoInfo];
+
+  pVideoInfo->rcSource.left = 0;
+  pVideoInfo->rcSource.top = 0;
+  pVideoInfo->rcSource.bottom = this->bmiHeader->biHeight;
+  pVideoInfo->rcSource.right = this->bmiHeader->biWidth;
+  pVideoInfo->rcTarget = pVideoInfo->rcSource;
+  pVideoInfo->dwBitRate = (DWORD)(this->BitmapDataSize * fps);
+  pVideoInfo->dwBitErrorRate = 0;
+  pVideoInfo->AvgTimePerFrame = (LONGLONG)(10000000.0f / fps);
+
+  CopyMemory(&(pVideoInfo->bmiHeader), this->bmiHeader, BitmapInfoSize);
+
+  WM_MEDIA_TYPE mt;
+  mt.majortype = WMMEDIATYPE_Video;
+  mt.bFixedSizeSamples = TRUE;
+  mt.bTemporalCompression = FALSE;
+  mt.lSampleSize = BitmapDataSize;
+  mt.formattype = WMFORMAT_VideoInfo;
+  mt.pUnk = NULL;
+  mt.cbFormat = cbVideoInfo;
+  mt.pbFormat = (BYTE*)pVideoInfo;
+
+  switch (this->bmiHeader->biBitCount)
+  {
+  case 32:
+      mt.subtype = WMMEDIASUBTYPE_RGB32;
+      break;
+  case 24:
+      mt.subtype = WMMEDIASUBTYPE_RGB24;
+      break;
+  case 8:
+      mt.subtype = WMMEDIASUBTYPE_RGB8;
+      break;
+  }
+
+  Props->SetMediaType(&mt);
+
+  HRESULT hr = Writer->SetInputProps(input_number, Props);
+  Props->Release();
+  free(pVideoInfo);
+
+  if (FAILED(hr))
+    return 0;
+
+  return 1;
+}
+
+int imFormatWMV::SetProfile()
+{
+  HRESULT hr;
+
+  IWMProfileManager* ProfileManager = NULL;
+  WMCreateProfileManager(&ProfileManager);
+
+  IWMProfile* Profile = NULL;
+  hr = ProfileManager->CreateEmptyProfile(WMT_VER_9_0, &Profile);
+  if (FAILED(hr))
+  {
+    ProfileManager->Release();
+    return 0;
+  }
+
+  if (imStrEqual(this->compression, "NONE"))
+  {
+    hr = iAddUncompressedVideoStream(Profile, 
+                                    this->bmiHeader, 
+                                    this->BitmapInfoSize, this->BitmapDataSize, this->fps);
+  }
+  else
+  {
+    DWORD dwBitRate = 2400*1000;
+    const void* attrib_data = AttribTable()->Get("DataRate");
+    if (attrib_data)
+      dwBitRate = (*(int*)attrib_data) * 1000;
+
+    DWORD dwQuality = 50;
+    attrib_data = AttribTable()->Get("WMFQuality");
+    if (attrib_data)
+      dwQuality = *(int*)attrib_data;
+
+    DWORD dwSecPerKey = 5000;
+    attrib_data = AttribTable()->Get("MaxKeyFrameTime");
+    if (attrib_data)
+      dwSecPerKey = *(int*)attrib_data;
+
+    BOOL fIsVBR = FALSE; // CBR is the default
+    attrib_data = AttribTable()->Get("VBR");
+    if (attrib_data)
+      fIsVBR = *(int*)attrib_data;
+
+    GUID subtype = iWMFCompFindSubType(this->compression);
+    if (subtype == WMMEDIASUBTYPE_Base)
+    {
+      Profile->Release();
+      ProfileManager->Release();
+      return 0;
+    }
+
+    hr = iAddCompressedVideoStream(ProfileManager, Profile, subtype,
+                                  this->bmiHeader, this->fps,
+                                  fIsVBR, dwBitRate, dwQuality, dwSecPerKey);
+  }
+
+  hr = Writer->SetProfile(Profile);
+  Profile->Release();
+  ProfileManager->Release();
+
+  if (FAILED(hr))
+    return 0;
+
+  return 1;
+}
+
+int imFormatWMV::ReadImageInfo(int index)
+{
+  if (this->seekable && this->current_frame != index)
+  {
+    HRESULT hr = Reader->SetRangeByFrame(stream_number, index, 0);
+    this->current_frame = index;
+
+    if (hr == NS_E_INVALID_REQUEST)
+    {
+      QWORD start_time = (QWORD)(index * (10000000.0f / fps));
+      hr = Reader->SetRange(start_time, 0);
+    }
+
+    if (hr != S_OK)
+      return IM_ERR_ACCESS;
+  }
+  
+  if (this->bmiHeader != NULL)
+    return IM_ERR_NONE;
+
+  imAttribTable* attrib_table = AttribTable();
+
+  if (MediaType->formattype == WMFORMAT_VideoInfo)
+  {
+    WMVIDEOINFOHEADER* info_header = (WMVIDEOINFOHEADER*)MediaType->pbFormat;
+    bmiHeader = &info_header->bmiHeader;
+
+    if (info_header->dwBitRate)
+    {
+      int data_rate = info_header->dwBitRate/1000;
+      attrib_table->Set("DataRate", IM_INT, 1, &data_rate);
+    }
+  }
+  else if (MediaType->formattype == WMFORMAT_MPEG2Video)
+  {
+    WMVIDEOINFOHEADER2* info_header = (WMVIDEOINFOHEADER2*)MediaType->pbFormat;
+    bmiHeader = &info_header->bmiHeader;
+
+    if (info_header->dwBitRate)
+    {
+      int data_rate = info_header->dwBitRate/1000;
+      attrib_table->Set("DataRate", IM_INT, 1, &data_rate);
+    }
+
+    if (info_header->dwInterlaceFlags)
+    {
+      int int_value = 1;
+      attrib_table->Set("Interlaced", IM_INT, 1, &int_value);
+
+      if (info_header->dwInterlaceFlags & AMINTERLACE_1FieldPerSample)
+        int_value = 1;
+      else
+        int_value = 2;
+
+      attrib_table->Set("FieldsPerSample", IM_INT, 1, &int_value);
+       
+      if (info_header->dwInterlaceFlags & AMINTERLACE_Field1First)
+        int_value = 1;
+      else
+        int_value = 2;
+
+      attrib_table->Set("FirstField", IM_INT, 1, &int_value);
+
+      // OBS: The top field in PAL is field 1, and the top field in NTSC is field 2
+    }
+
+    if (info_header->dwPictAspectRatioX)
+      attrib_table->Set("XAspectRatio", IM_INT, 1, &info_header->dwPictAspectRatioX);
+    if (info_header->dwPictAspectRatioY)
+      attrib_table->Set("YAspectRatio", IM_INT, 1, &info_header->dwPictAspectRatioY);
+  }
+  else
+    return IM_ERR_DATA;
+
+  attrib_table->Set("FPS", IM_FLOAT, 1, &fps);
+
+  int top_down = 0;
+  if (bmiHeader->biHeight < 0)
+    top_down = 1;
+
+  this->width = bmiHeader->biWidth;
+  this->height = top_down? -bmiHeader->biHeight: bmiHeader->biHeight;
+
+  int bpp = bmiHeader->biBitCount;
+
+  this->file_data_type = IM_BYTE;
+
+  if (bpp > 8)
+  {
+    this->file_color_mode = IM_RGB;
+    this->file_color_mode |= IM_PACKED;
+  }
+  else
+  {
+    this->palette_count = 1 << bpp;
+    this->file_color_mode = IM_MAP;
+  }
+
+  if (bpp < 8)
+    this->convert_bpp = bpp;
+
+  if (bpp == 32)
+    this->file_color_mode |= IM_ALPHA;
+
+  if (top_down)
+    this->file_color_mode |= IM_TOPDOWN;
+
+  if (bpp <= 8)
+  {
+    /* updates the palette_count based on the number of colors used */
+    if (bmiHeader->biClrUsed != 0 && 
+        (int)bmiHeader->biClrUsed < this->palette_count)
+      this->palette_count = bmiHeader->biClrUsed;
+
+    ReadPalette((unsigned char*)(bmiHeader + 1));
+  }
+
+  this->line_buffer_extra = 4; // room enough for padding
+
+  iReadAttrib(attrib_table);
+
+  return IM_ERR_NONE;
+}
+
+int imFormatWMV::WriteImageInfo()
+{
+  if (this->bmiHeader)
+  {
+    if (this->bmiHeader->biWidth != width || this->bmiHeader->biHeight != height ||
+        imColorModeSpace(file_color_mode) != imColorModeSpace(user_color_mode))
+      return IM_ERR_DATA;
+
+    return IM_ERR_NONE;  // parameters can be set only once
+  }
+
+  // force bottom up orientation
+  this->file_data_type = IM_BYTE;
+  this->file_color_mode = imColorModeSpace(this->user_color_mode);
+
+  int bpp;
+  if (this->file_color_mode == IM_RGB)
+  {
+    this->file_color_mode |= IM_PACKED;
+    bpp = 24;
+
+    if (imColorModeHasAlpha(this->user_color_mode))
+    {
+      this->file_color_mode |= IM_ALPHA;
+      bpp = 32;
+
+      this->rmask = 0x00FF0000;
+      this->roff = 16;
+
+      this->gmask = 0x0000FF00;
+      this->goff = 8;
+
+      this->bmask = 0x000000FF;
+      this->boff = 0;
+    }
+  }
+  else
+    bpp = 8;
+
+  this->line_buffer_extra = 4; // room enough for padding
+
+  imAttribTable* attrib_table = AttribTable();
+
+  const void* attrib_data = attrib_table->Get("FPS");
+  if (attrib_data)
+    fps = *(float*)attrib_data;
+  else
+    fps = 15;
+
+  this->BitmapDataSize = this->height * imFileLineSizeAligned(this->width, bpp, 4);
+
+  DWORD biClrUsed = bpp > 8? 0: this->palette_count;
+  this->BitmapInfoSize = sizeof(BITMAPINFOHEADER) + biClrUsed * sizeof(RGBQUAD);
+
+  this->bmiHeader = (BITMAPINFOHEADER*)malloc(this->BitmapInfoSize);
+  this->bmiHeader->biSize = sizeof(BITMAPINFOHEADER);
+  this->bmiHeader->biWidth = this->width;
+  this->bmiHeader->biHeight = this->height;
+  this->bmiHeader->biPlanes = 1;
+  this->bmiHeader->biBitCount = (WORD)bpp;
+  this->bmiHeader->biCompression = BI_RGB;
+  this->bmiHeader->biSizeImage = this->BitmapDataSize;
+  this->bmiHeader->biXPelsPerMeter = 0;
+  this->bmiHeader->biYPelsPerMeter = 0;
+  this->bmiHeader->biClrUsed = biClrUsed;
+  this->bmiHeader->biClrImportant = 0;
+
+  if (this->bmiHeader->biBitCount <= 8)
+    WritePalette((unsigned char*)(this->bmiHeader + 1));
+
+  if (!SetProfile())
+    return IM_ERR_COMPRESS;
+
+  if (!SetInputProps())
+    return IM_ERR_ACCESS;
+
+  HRESULT hr = Writer->SetOutputFilename(wfile_name);
+  if(FAILED(hr))
+    return IM_ERR_ACCESS;
+
+  iWriteAttrib(attrib_table);
+
+  hr = Writer->BeginWriting();
+  if(FAILED(hr))
+    return IM_ERR_ACCESS;
+
+  return IM_ERR_NONE;
+}
+
+void imFormatWMV::ReadPalette(unsigned char* bmp_colors)
+{
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;
+    this->palette[c] = imColorEncode(bmp_colors[i + 2], 
+                                     bmp_colors[i + 1], 
+                                     bmp_colors[i]);
+  }
+}
+
+void imFormatWMV::WritePalette(unsigned char* bmp_colors)
+{
+  /* convert the color map to the IM format */
+  for (int c = 0; c < this->palette_count; c++)
+  {
+    int i = c * 4;                       
+    imColorDecode(&bmp_colors[i + 2], &bmp_colors[i + 1], &bmp_colors[i], this->palette[c]);
+    bmp_colors[i + 3] = 0;
+  }
+}
+
+void imFormatWMV::InitMasks(imDib* dib)
+{
+  if (dib->bmih->biCompression == BI_BITFIELDS)
+  {
+    unsigned int Mask;
+    unsigned int *PalMask = (unsigned int*)dib->bmic;
+
+    this->roff = 0;
+    this->rmask = Mask = PalMask[0];
+    while (!(Mask & 0x01) && (Mask != 0))
+      {Mask >>= 1; this->roff++;}
+
+    this->goff = 0;
+    this->gmask = Mask = PalMask[1];
+    while (!(Mask & 0x01) && (Mask != 0))
+      {Mask >>= 1; this->goff++;}
+
+    this->boff = 0;
+    this->bmask = Mask = PalMask[2];
+    while (!(Mask & 0x01) && (Mask != 0))
+      {Mask >>= 1; this->boff++;}
+  }
+  else
+  {
+    if (dib->bmih->biBitCount == 16)
+    {                   
+      this->rmask = 0x7C00;
+      this->roff = 10;
+
+      this->gmask = 0x03E0;
+      this->goff = 5;
+
+      this->bmask = 0x001F;
+      this->boff = 0;
+    }
+    else
+    {
+      this->rmask = 0x00FF0000;
+      this->roff = 16;
+
+      this->gmask = 0x0000FF00;
+      this->goff = 8;
+
+      this->bmask = 0x000000FF;
+      this->boff = 0;
+    }
+  }
+}
+
+void imFormatWMV::FixRGB(int bpp)
+{
+  int x;
+
+  switch (bpp)
+  {
+  case 16:
+    {
+      /* inverts the WORD values if not intel */
+      if (imBinCPUByteOrder() == IM_BIGENDIAN)
+        imBinSwapBytes2(this->line_buffer, this->width);
+
+      imushort* word_data = (imushort*)this->line_buffer;
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+
+      // from end to start
+      for (x = this->width-1; x >= 0; x--)
+      {
+        imushort word_value = word_data[x];
+        int c = x*3;
+        byte_data[c]   = (imbyte)((((rmask & word_value) >> roff) * 255) / (rmask >> roff));
+        byte_data[c+1] = (imbyte)((((gmask & word_value) >> goff) * 255) / (gmask >> goff));
+        byte_data[c+2] = (imbyte)((((bmask & word_value) >> boff) * 255) / (bmask >> boff));
+      }
+    }
+    break;
+  case 32:
+    {
+      unsigned int* dword_data = (unsigned int*)this->line_buffer;
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+
+      for (x = 0; x < this->width; x++)
+      {
+        unsigned int dword_value = dword_data[x];
+        int c = x*3;
+        byte_data[c]   = (imbyte)((rmask & dword_value) >> roff);
+        byte_data[c+1] = (imbyte)((gmask & dword_value) >> goff);
+        byte_data[c+2] = (imbyte)((bmask & dword_value) >> boff);
+        byte_data[c+3] = (imbyte)((0xFF000000 & dword_value) >> 24);
+      }
+    }
+    break;
+  default: // 24
+    {
+      imbyte* byte_data = (imbyte*)this->line_buffer;
+      for (x = 0; x < this->width; x++)
+      {
+        int c = x*3;
+        imbyte temp = byte_data[c];     // swap R and B
+        byte_data[c] = byte_data[c+2];
+        byte_data[c+2] = temp;
+      }
+    }
+    break;
+  }
+}
+
+int imFormatWMV::ReadImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Reading WMV Frame...");
+
+  INSSBuffer* pSample = NULL;
+
+  {
+    QWORD cnsSampleTime = 0;  // All will be ignored
+    QWORD cnsDuration = 0;
+    DWORD dwFlags = 0;
+    WORD wStreamNum = 0;
+    DWORD dwOutputNum = 0;
+    HRESULT hr;
+
+    hr = Reader->GetNextSample(stream_number, &pSample, &cnsSampleTime,
+                                              &cnsDuration, &dwFlags,
+                                              &dwOutputNum, &wStreamNum);
+
+    if (FAILED(hr))
+      return IM_ERR_ACCESS;
+  }
+
+  imbyte* dib_bits = NULL;
+  pSample->GetBuffer(&dib_bits);
+  if (!dib_bits)
+  {
+    pSample->Release();
+    return IM_ERR_MEM;
+  }
+
+  imDib* dib = imDibCreateReference((imbyte*)this->bmiHeader, dib_bits);
+
+  if (dib->bmih->biBitCount == 16 || dib->bmih->biBitCount == 32)
+    InitMasks(dib);
+  else if (dib->bmih->biBitCount <= 8)
+  {
+    this->palette_count = dib->palette_count;
+    ReadPalette((unsigned char*)dib->bmic);
+  }
+
+  for (int row = 0; row < this->height; row++)
+  {
+    CopyMemory(this->line_buffer, dib_bits, dib->line_size);
+    dib_bits += dib->line_size;
+
+    if (dib->bmih->biBitCount > 8)
+      FixRGB(dib->bmih->biBitCount);
+
+    imFileLineBufferRead(this, data, row, 0);
+
+    if (!imCounterInc(this->counter))
+    {
+      imDibDestroy(dib);
+      dib = NULL;
+      pSample->Release();
+      return IM_ERR_COUNTER;
+    }
+  }
+
+  imDibDestroy(dib);
+  pSample->Release();
+  this->current_frame++;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatWMV::WriteImageData(void* data)
+{
+  imCounterTotal(this->counter, this->height, "Writing WMV Frame...");
+
+  INSSBuffer* pSample = NULL;
+  Writer->AllocateSample(BitmapDataSize, &pSample);
+
+  imbyte* dib_bits = NULL;
+  if (pSample) pSample->GetBuffer(&dib_bits);
+  if (!dib_bits || !pSample)
+  {
+    if (pSample) pSample->Release();
+    return IM_ERR_MEM;
+  }
+
+  imDib* dib = imDibCreateReference((imbyte*)this->bmiHeader, dib_bits);
+  if (dib->bmih->biBitCount <= 8)
+    WritePalette((unsigned char*)dib->bmic);
+
+  for (int row = 0; row < this->height; row++)
+  {
+    imFileLineBufferWrite(this, data, row, 0);
+
+    if (dib->bmih->biBitCount > 8)
+      FixRGB(dib->bmih->biBitCount);
+
+    CopyMemory(dib_bits, this->line_buffer, dib->line_size);
+    dib_bits += dib->line_size;
+
+    if (!imCounterInc(this->counter))
+      return IM_ERR_COUNTER;
+  }
+
+  QWORD VideoTime = (QWORD)(this->image_count * (10000000.0f / fps));
+
+  HRESULT hr = Writer->WriteSample(input_number,
+                                   VideoTime,
+                                   0,
+                                   pSample);
+  if (hr != 0)
+    return IM_ERR_ACCESS;
+
+  imDibDestroy(dib);
+  pSample->Release();
+  this->image_count++;
+
+  return IM_ERR_NONE;
+}
+
+int imFormatWMV::CanWrite(const char* compression, int color_mode, int data_type) const
+{
+  (void)compression;
+
+  int color_space = imColorModeSpace(color_mode);
+
+  if (color_space == IM_YCBCR || color_space == IM_LAB || 
+      color_space == IM_LUV || color_space == IM_XYZ ||
+      color_space == IM_CMYK)
+    return IM_ERR_DATA;                       
+                                              
+  if (data_type != IM_BYTE)
+    return IM_ERR_DATA;
+
+  return IM_ERR_NONE;
+}
diff --git a/src/im_image.cpp b/src/im_image.cpp
new file mode 100644
index 0000000..7acae43
--- /dev/null
+++ b/src/im_image.cpp
@@ -0,0 +1,746 @@
+/** \file
+ * \brief Image Manipulation
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_image.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+#include <assert.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_util.h"
+#include "im_attrib.h"
+#include "im_file.h"
+
+
+int imImageCheckFormat(int color_mode, int data_type)
+{
+  if ((imColorModeSpace(color_mode) == IM_MAP || imColorModeSpace(color_mode) == IM_BINARY) &&
+      (data_type != IM_BYTE))
+    return 0;
+
+  return 1;
+}
+
+int imImagePixelOffset(int is_packed, int width, int height, int depth, int col, int row, int plane)
+{
+  if (is_packed) 
+    return row*width*depth + col*depth + plane;
+  else           
+    return plane*width*height + row*width + col;
+}
+
+int imImageDataSize(int width, int height, int color_mode, int data_type)
+{
+  return width * height * imColorModeDepth(color_mode) * imDataTypeSize(data_type);
+}
+                           
+int imImageLineCount(int width, int color_mode)
+{
+  if (imColorModeIsPacked(color_mode))
+    return width*imColorModeDepth(color_mode);
+  else
+    return width;
+}
+
+int imImageLineSize(int width, int color_mode, int data_type)
+{
+  return imImageLineCount(width, color_mode) * imDataTypeSize(data_type);
+}
+
+static void iImageInit(imImage* image, int width, int height, int color_space, int data_type)
+{
+  assert(width>0);
+  assert(height>0);
+  assert(color_space >= IM_RGB && color_space <= IM_XYZ);
+  assert(data_type >= IM_BYTE && data_type <= IM_CFLOAT);
+
+  image->width = width;
+  image->height = height;
+  image->color_space = color_space;
+  image->data_type = data_type;
+  image->has_alpha = 0;
+
+  image->depth = imColorModeDepth(color_space);
+  image->line_size = image->width * imDataTypeSize(data_type); 
+  image->plane_size = image->line_size * image->height; 
+  image->size = image->plane_size * image->depth;
+  image->count = image->width * image->height; 
+
+  if (image->data)
+  {
+    void* data0 = image->data[0];
+    free(image->data);
+    image->data = (void**)malloc((image->depth+1) * sizeof(void*));  // add room for alpha
+    image->data[0] = data0;
+  }
+  else
+    image->data = (void**)malloc((image->depth+1) * sizeof(void*));
+}
+
+imImage* imImageInit(int width, int height, int color_space, int data_type, void* data_buffer, long* palette, int palette_count)
+{
+  if (!imImageCheckFormat(color_space, data_type))
+    return NULL;
+                 
+  imImage* image = (imImage*)malloc(sizeof(imImage));
+  image->data = 0;
+    
+  iImageInit(image, width, height, color_space, data_type);
+
+  if (data_buffer)
+  {
+    for (int d = 0; d < image->depth; d++)
+      image->data[d] = (imbyte*)data_buffer + d*image->plane_size;
+  }
+
+  if (imColorModeDepth(color_space) == 1)
+  {
+    image->palette = palette;
+    image->palette_count = palette_count;
+  }
+  else
+  {
+    image->palette = NULL;
+    image->palette_count = 0;
+  }
+
+  image->attrib_table = new imAttribTable(599);
+
+  return image;
+}
+
+imImage* imImageCreate(int width, int height, int color_space, int data_type)
+{
+  imImage* image = imImageInit(width, height, color_space, data_type, NULL, NULL, 0);
+  if (!image) return NULL;
+
+  if (imColorModeDepth(color_space) == 1)
+  {
+    image->palette = (long*)malloc(256*sizeof(long));
+
+    if (image->color_space == IM_BINARY)
+    {
+      image->palette_count = 2;
+      image->palette[0] = imColorEncode(0, 0, 0);
+      image->palette[1] = imColorEncode(255, 255, 255);
+    }
+    else
+    {
+      image->palette_count = 256;
+      for (int i = 0; i < 256; i++)
+        image->palette[i] = imColorEncode((imbyte)i, (imbyte)i, (imbyte)i);
+    }
+  }
+  
+  image->data[0] = malloc(image->size);
+  if (!image->data[0])
+  {
+    imImageDestroy(image);
+    return NULL;
+  }
+
+  for (int d = 1; d < image->depth; d++)
+    image->data[d] = (imbyte*)(image->data[0]) + d*image->plane_size;
+
+  if ((image->color_space == IM_YCBCR || image->color_space == IM_LAB || image->color_space == IM_LUV) && 
+      (image->data_type == IM_BYTE || image->data_type == IM_USHORT))
+  {
+    memset(image->data[0], 0, image->plane_size);
+
+    if (image->data_type == IM_BYTE)
+    {
+      imbyte* usdata = (imbyte*)image->data[1];
+      for (int i = 0; i < 2*image->count; i++)
+        *usdata++ = 128;
+    }
+    else
+    {
+      imushort* usdata = (imushort*)image->data[1];
+      for (int i = 0; i < 2*image->count; i++)
+        *usdata++ = 32768;
+    }
+  }
+  else
+    memset(image->data[0], 0, image->size);
+
+  return image;
+}
+
+imImage* imImageCreateBased(const imImage* image, int width, int height, int color_space, int data_type)
+{
+  assert(image);
+
+  if (width <= 0) width = image->width;
+  if (height <= 0) height = image->height;
+  if (color_space < 0) color_space = image->color_space;
+  if (data_type < 0) data_type = image->data_type;
+
+  imImage* new_image = imImageCreate(width, height, color_space, data_type);
+  imImageCopyAttributes(image, new_image);
+  return new_image;
+}
+
+void imImageAddAlpha(imImage* image)
+{
+  assert(image);
+
+  if (image->has_alpha)
+    return;
+
+  unsigned char* new_data = (unsigned char*)realloc(image->data[0], image->size+image->plane_size);
+  if (!new_data)
+    return;
+
+ image->data[0] = new_data;
+  for (int d = 1; d < image->depth+1; d++)
+    image->data[d] = (imbyte*)(image->data[0]) + d*image->plane_size;
+
+  memset(image->data[image->depth], 0, image->plane_size);
+
+  image->has_alpha = IM_ALPHA;
+}
+
+void imImageReshape(imImage* image, int width, int height)
+{
+  assert(image);
+
+  int old_size = image->size, 
+      old_width = width, 
+      old_height = height;
+
+  iImageInit(image, width, height, image->color_space, image->data_type);
+
+  if (old_size < image->size)
+  {
+    void* data0 = realloc(image->data[0], image->has_alpha? image->size+image->plane_size: image->size);
+    if (!data0) // if failed restore the previous size
+      iImageInit(image, old_width, old_height, image->color_space, image->data_type);
+    else
+      image->data[0] = data0;
+  }
+
+  memset(image->data[0], 0, image->has_alpha? image->size+image->plane_size: image->size);
+
+  int depth = image->has_alpha? image->depth+1: image->depth;
+
+  for (int d = 1; d < depth; d++)
+    image->data[d] = (imbyte*)image->data[0] + d*image->plane_size;
+}
+
+void imImageDestroy(imImage* image)
+{
+  assert(image);
+
+  imAttribTable* attrib_table = (imAttribTable*)image->attrib_table;
+  delete attrib_table;
+
+  if (image->data[0])
+    free(image->data[0]);
+
+  if (image->palette)
+    free(image->palette);
+
+  free(image->data);
+
+  // This will help detect invalid image usage after destroy.
+  memset(image, 0, sizeof(imImage));
+
+  free(image);
+}
+
+void imImageClear(imImage* image)
+{
+  assert(image);
+  memset(image->data[0], 0, image->has_alpha? image->size+image->plane_size: image->size);
+}
+
+int imImageIsBitmap(const imImage* image)
+{
+  assert(image);
+  return imColorModeIsBitmap(image->color_space, image->data_type);
+}
+
+void imImageCopy(const imImage* src_image, imImage* dst_image)
+{
+  assert(src_image);
+  assert(dst_image);
+
+  imImageCopyData(src_image, dst_image);
+
+  if (dst_image != src_image)
+    imImageCopyAttributes(src_image, dst_image);
+}
+
+void imImageCopyData(const imImage* src_image, imImage* dst_image)
+{
+  assert(src_image);
+  assert(dst_image);
+  assert(imImageMatch(src_image, dst_image));
+
+  if (dst_image != src_image)
+  {
+    memcpy(dst_image->data[0], src_image->data[0], (src_image->has_alpha && dst_image->has_alpha)? src_image->size+src_image->plane_size: src_image->size);
+  }
+}
+
+imImage* imImageDuplicate(const imImage* image)
+{
+  assert(image);
+
+  imImage* new_image = imImageCreate(image->width, image->height, image->color_space, image->data_type);
+  if (!new_image)
+    return NULL;
+
+  if (image->has_alpha)
+    imImageAddAlpha(new_image);
+
+  imImageCopy(image, new_image);
+
+  return new_image;
+}
+
+imImage* imImageClone(const imImage* image)
+{
+  assert(image);
+
+  imImage* new_image = imImageCreate(image->width, image->height, image->color_space, image->data_type);
+  if (!new_image)
+    return NULL;
+
+  if (image->has_alpha)
+    imImageAddAlpha(new_image);
+
+  imImageCopyAttributes(image, new_image);
+
+  return new_image;
+}
+
+void imImageSetAttribute(imImage* image, const char* attrib, int data_type, int count, const void* data)
+{
+  assert(image);
+  assert(attrib);
+  imAttribTable* attrib_table = (imAttribTable*)image->attrib_table;
+  if (data)
+  {
+    if (count == -1 && data_type == IM_BYTE) // Data is zero terminated like a string
+      count = strlen((char*)data)+1;
+
+    attrib_table->Set(attrib, data_type, count, data);
+  }
+  else
+    attrib_table->UnSet(attrib);
+}
+
+const void* imImageGetAttribute(const imImage* image, const char* attrib, int *data_type, int *count)
+{
+  assert(image);
+  assert(attrib);
+  imAttribTable* attrib_table = (imAttribTable*)image->attrib_table;
+  return attrib_table->Get(attrib, data_type, count);
+}
+
+static void iAttributeTableCopy(const void* src_attrib_table, void* dst_attrib_table)
+{
+  const imAttribTable* src_table = (const imAttribTable*)src_attrib_table;
+  imAttribTable* dst_table = (imAttribTable*)dst_attrib_table;
+  dst_table->CopyFrom(*src_table);
+}
+
+void imImageCopyAttributes(const imImage* src_image, imImage* dst_image)
+{
+  assert(src_image);
+  assert(dst_image);
+
+  if (src_image->palette && dst_image->palette &&
+      src_image->color_space == dst_image->color_space)
+  {
+    memcpy(dst_image->palette, src_image->palette, 256*sizeof(long));
+    dst_image->palette_count = src_image->palette_count;
+  }
+
+  iAttributeTableCopy(src_image->attrib_table, dst_image->attrib_table);
+}
+
+static int iAttribCB(void* user_data, int index, const char* name, int data_type, int count, const void* data)
+{
+  (void)data_type;
+  (void)data;
+  (void)count;
+  char** attrib = (char**)user_data;
+  attrib[index] = (char*)name;
+  return 1;
+}
+
+void imImageGetAttributeList(const imImage* image, char** attrib, int *attrib_count)
+{
+  assert(image);
+  assert(attrib_count);
+
+  imAttribTable* attrib_table = (imAttribTable*)image->attrib_table;
+  *attrib_count = attrib_table->Count();
+
+  if (attrib) attrib_table->ForEach((void*)attrib, iAttribCB);
+}
+
+void imImageSetPalette(imImage* image, long* palette, int palette_count)
+{
+  assert(image);
+
+  if (image->palette)
+  {
+    free(image->palette);
+    image->palette = palette;
+    image->palette_count = palette_count;
+  }
+}
+
+int imImageMatchSize(const imImage* image1, const imImage* image2)
+{
+  assert(image1);
+  assert(image2);
+
+  return ((image1->width == image2->width) &&
+          (image1->height == image2->height));
+}
+
+int imImageMatchColor(const imImage* image1, const imImage* image2)
+{
+  assert(image1);
+  assert(image2);
+
+  return (image1->data_type == image2->data_type &&
+          image1->color_space == image2->color_space);
+}
+
+int imImageMatchDataType(const imImage* image1, const imImage* image2)
+{
+  assert(image1);
+  assert(image2);
+
+  return (image1->data_type == image2->data_type &&
+          image1->width == image2->width &&
+          image1->height == image2->height);
+}
+
+int imImageMatchColorSpace(const imImage* image1, const imImage* image2)
+{
+  assert(image1);
+  assert(image2);
+
+  return (image1->width == image2->width &&
+          image1->height == image2->height &&
+          image1->color_space == image2->color_space);
+}
+
+int imImageMatch(const imImage* image1, const imImage* image2)
+{
+  assert(image1);
+  assert(image2);
+
+  return (image1->data_type == image2->data_type &&
+          image1->width == image2->width &&
+          image1->height == image2->height &&
+          image1->color_space == image2->color_space);
+}
+
+void imImageSetBinary(imImage* image)
+{
+  assert(image);
+
+  if (image->palette)
+  {
+    image->color_space = IM_BINARY;
+    image->palette_count = 2;
+    image->palette[0] = imColorEncode(0, 0, 0);
+    image->palette[1] = imColorEncode(255, 255, 255);
+  }
+}
+
+void imImageMakeBinary(imImage *image)
+{
+  assert(image);
+
+  imbyte *map = (imbyte*)image->data[0];
+  for(int i = 0; i < image->count; i++)
+  {
+    if (*map)
+      *map = 1;
+    map++;
+  }
+}
+
+static void iImageGrayCheckChange(imImage *image)
+{
+  int i, do_remap = 0;
+  imbyte remap[256];
+  imbyte r, g, b;
+
+  for (i = 0; i < image->palette_count; i++)
+  {
+    imColorDecode(&r, &g, &b, image->palette[i]);
+
+    if (r != g || g != b)
+      return;
+
+    remap[i] = r;
+
+    if (r != i)
+      do_remap = 1;
+  }
+
+  if (do_remap)
+  {
+    imbyte *map = (imbyte*)image->data[0];
+    for(i = 0; i < image->count; i++)
+    {
+      *map = remap[*map];
+      map++;
+    }
+  }
+
+  image->color_space = IM_GRAY;
+  image->palette_count = 256;
+
+  for (i = 0; i < 256; i++)
+    image->palette[i] = imColorEncode((imbyte)i, (imbyte)i, (imbyte)i);
+}
+
+static int iImageCheckBinary(const imImage* image)
+{
+  if (image->color_space == IM_MAP && image->palette_count == 2)
+  {
+    long black = imColorEncode(0, 0, 0);
+    long white = imColorEncode(255, 255, 255);
+    if ((image->palette[0] == black || image->palette[0] == white) &&
+        (image->palette[1] == black || image->palette[1] == white))
+    {
+      return 1;
+    }
+  }
+
+  if (image->color_space == IM_GRAY && image->data_type == IM_BYTE)
+  {
+    imbyte* map = (imbyte*)image->data[0];
+    for (int i = 0; i < image->count; i++)
+    {
+      if (*map != 0 && *map != 255 && *map != 1) // allow 255 and 1
+        return 0;
+
+      map++;
+    }
+
+    return 1;
+  }
+  else
+    return 0;
+}
+
+static void iLoadImageData(imFile* ifile, imImage* image, int *error, int bitmap)
+{
+  iAttributeTableCopy(ifile->attrib_table, image->attrib_table);
+
+  *error = imFileReadImageData(ifile, image->data[0], bitmap, image->has_alpha);
+
+  if (image->color_space == IM_MAP)
+  {
+    imFileGetPalette(ifile, image->palette, &image->palette_count);
+
+    // convert to gray if all colors are grays
+    iImageGrayCheckChange(image);
+  }
+
+  // since Binary is a special case of Gray, check this
+  if (iImageCheckBinary(image))
+  {
+    imImageSetBinary(image);
+    imImageMakeBinary(image);
+  }
+}
+
+imImage* imFileLoadImage(imFile* ifile, int index, int *error)
+{
+  assert(ifile);
+
+  int width, height, color_mode, data_type;
+  *error = imFileReadImageInfo(ifile, index, &width, &height, &color_mode, &data_type);
+  if (*error) return NULL; 
+  
+  imImage* image = imImageCreate(width, height, imColorModeSpace(color_mode), data_type);
+  if (!image) 
+  {
+    *error = IM_ERR_MEM;
+    return NULL;
+  }
+
+  if (imColorModeHasAlpha(color_mode))
+    imImageAddAlpha(image);
+
+  iLoadImageData(ifile, image, error, 0);
+
+  return image;
+}
+
+void imFileLoadImageFrame(imFile* ifile, int index, imImage* image, int *error)
+{
+  assert(ifile);
+
+  int width, height, color_mode, data_type;
+  *error = imFileReadImageInfo(ifile, index, &width, &height, &color_mode, &data_type);
+  if (*error) return; 
+  
+  // check if we can reuse the data
+  if (image->width != width || 
+      image->height != height ||
+      image->depth != imColorModeDepth(imColorModeSpace(color_mode)) ||
+      image->has_alpha != imColorModeHasAlpha(color_mode) ||
+      image->data_type != data_type) 
+  {
+    *error = IM_ERR_DATA;
+    return;
+  }
+
+  image->color_space = imColorModeSpace(color_mode);
+  iLoadImageData(ifile, image, error, 0);
+}
+
+imImage* imFileLoadBitmap(imFile* ifile, int index, int *error)
+{
+  assert(ifile);
+
+  int width, height, color_mode, data_type;
+  *error = imFileReadImageInfo(ifile, index, &width, &height, &color_mode, &data_type);
+  if (*error) return NULL; 
+  
+  imImage* image = imImageCreate(width, height, imColorModeToBitmap(color_mode), IM_BYTE);
+  if (!image) 
+  {
+    *error = IM_ERR_MEM;
+    return NULL;
+  }
+
+  if (imColorModeHasAlpha(color_mode))
+    imImageAddAlpha(image);
+
+  iLoadImageData(ifile, image, error, 1);
+
+  return image;
+}
+
+void imFileLoadBitmapFrame(imFile* ifile, int index, imImage* image, int *error)
+{
+  assert(ifile);
+
+  int width, height, color_mode, data_type;
+  *error = imFileReadImageInfo(ifile, index, &width, &height, &color_mode, &data_type);
+  if (*error) return; 
+  
+  // check if we can reuse the data
+  if (image->width != width || 
+      image->height != height ||
+      image->depth != imColorModeDepth(imColorModeToBitmap(color_mode)) ||
+      image->has_alpha != imColorModeHasAlpha(color_mode) ||
+      image->data_type != IM_BYTE) 
+  {
+    *error = IM_ERR_DATA;
+    return;
+  }
+
+  image->color_space = imColorModeToBitmap(color_mode);
+  iLoadImageData(ifile, image, error, 1);
+}
+
+imImage* imFileLoadImageRegion(imFile* ifile, int index, int bitmap, int *error, 
+                          int xmin, int xmax, int ymin, int ymax, int width, int height)
+{
+  assert(ifile);
+
+  int color_mode, data_type;
+  *error = imFileReadImageInfo(ifile, index, NULL, NULL, &color_mode, &data_type);
+  if (*error) return NULL; 
+  
+  imImage* image = imImageCreate(width, height, 
+                                 bitmap? imColorModeToBitmap(color_mode): imColorModeSpace(color_mode), 
+                                 bitmap? IM_BYTE: data_type);
+  if (!image) 
+  {
+    *error = IM_ERR_MEM;
+    return NULL;
+  }
+
+  if (imColorModeHasAlpha(color_mode))
+    imImageAddAlpha(image);
+
+  imFileSetAttribute(ifile, "ViewXmin", IM_INT, 1, &xmin);
+  imFileSetAttribute(ifile, "ViewXmax", IM_INT, 1, &xmax);
+  imFileSetAttribute(ifile, "ViewYmin", IM_INT, 1, &ymin);
+  imFileSetAttribute(ifile, "ViewYmax", IM_INT, 1, &ymax);
+  imFileSetAttribute(ifile, "ViewWidth", IM_INT, 1, &width);
+  imFileSetAttribute(ifile, "ViewHeight", IM_INT, 1, &height);
+
+  iLoadImageData(ifile, image, error, bitmap);
+
+  return image;
+}
+
+int imFileSaveImage(imFile* ifile, const imImage* image)
+{
+  assert(ifile);
+  assert(image);
+
+  if (image->color_space == IM_MAP)
+    imFileSetPalette(ifile, image->palette, image->palette_count);
+
+  iAttributeTableCopy(image->attrib_table, ifile->attrib_table);
+
+  int color_mode = image->color_space;
+  if (image->has_alpha)
+    color_mode |= IM_ALPHA;
+
+  int error = imFileWriteImageInfo(ifile, image->width, image->height, color_mode, image->data_type);
+  if (error) return error;
+  
+  return imFileWriteImageData(ifile, image->data[0]);
+}
+
+imImage* imFileImageLoad(const char* file_name, int index, int *error)
+{
+  imFile* ifile = imFileOpen(file_name, error);
+  if (!ifile) return NULL;
+  imImage* image = imFileLoadImage(ifile, index, error);
+  imFileClose(ifile);
+  return image;
+}
+
+imImage* imFileImageLoadBitmap(const char* file_name, int index, int *error)
+{
+  imFile* ifile = imFileOpen(file_name, error);
+  if (!ifile) return NULL;
+  imImage* image = imFileLoadBitmap(ifile, index, error);
+  imFileClose(ifile);
+  return image;
+}
+
+imImage* imFileImageLoadRegion(const char* file_name, int index, int bitmap, int *error, 
+                               int xmin, int xmax, int ymin, int ymax, int width, int height)
+{
+  imFile* ifile = imFileOpen(file_name, error);
+  if (!ifile) return NULL;
+  imImage* image = imFileLoadImageRegion(ifile, index, bitmap, error, xmin, xmax, ymin, ymax, width, height);
+  imFileClose(ifile);
+  return image;
+}
+
+int imFileImageSave(const char* file_name, const char* format, const imImage* image)
+{
+  int error;
+  imFile* ifile = imFileNew(file_name, format, &error);
+  if (!ifile) return error;
+  error = imFileSaveImage(ifile, image);
+  imFileClose(ifile);
+  return error;
+}
diff --git a/src/im_jp2.def b/src/im_jp2.def
new file mode 100644
index 0000000..c81c208
--- /dev/null
+++ b/src/im_jp2.def
@@ -0,0 +1,2 @@
+EXPORTS
+  imFormatRegisterJP2
\ No newline at end of file
diff --git a/src/im_jp2.mak b/src/im_jp2.mak
new file mode 100644
index 0000000..d88c984
--- /dev/null
+++ b/src/im_jp2.mak
@@ -0,0 +1,48 @@
+PROJNAME = im
+LIBNAME = im_jp2
+OPT = YES
+
+SRCJP2 =  \
+    base/jas_cm.c      base/jas_icc.c      base/jas_init.c    base/jas_stream.c  base/jas_version.c \
+    base/jas_debug.c   base/jas_iccdata.c  base/jas_malloc.c  base/jas_string.c  base/jas_tmr.c \
+    base/jas_getopt.c  base/jas_image.c    base/jas_seq.c     base/jas_tvp.c            \
+    jp2/jp2_cod.c  jp2/jp2_dec.c  jp2/jp2_enc.c                                         \
+    jpc/jpc_bs.c   jpc/jpc_math.c   jpc/jpc_mqenc.c  jpc/jpc_t1enc.c  jpc/jpc_tagtree.c \
+    jpc/jpc_cs.c   jpc/jpc_mct.c    jpc/jpc_qmfb.c   jpc/jpc_t2cod.c  jpc/jpc_tsfb.c    \
+    jpc/jpc_dec.c  jpc/jpc_mqcod.c  jpc/jpc_t1cod.c  jpc/jpc_t2dec.c  jpc/jpc_util.c    \
+    jpc/jpc_enc.c  jpc/jpc_mqdec.c  jpc/jpc_t1dec.c  jpc/jpc_t2enc.c
+SRCJP2  := $(addprefix libjasper/, $(SRCJP2))
+
+SRC = jas_binfile.c im_format_jp2.cpp $(SRCJP2)
+                                       
+INCLUDES = libjasper
+
+DEFINES  = EXCLUDE_JPG_SUPPORT EXCLUDE_MIF_SUPPORT EXCLUDE_PNM_SUPPORT \
+           EXCLUDE_BMP_SUPPORT EXCLUDE_PGX_SUPPORT EXCLUDE_RAS_SUPPORT \
+           EXCLUDE_TIFF_SUPPORT JAS_GEO_OMIT_PRINTING_CODE
+
+ifneq ($(findstring Win, $(TEC_SYSNAME)), )
+  ifneq ($(findstring owc1, $(TEC_UNAME)), )
+    DEFINES += JAS_TYPES
+  endif         
+  ifneq ($(findstring dll, $(TEC_UNAME)), )
+    DEFINES += JAS_WIN_MSVC_BUILD JAS_TYPES
+  endif         
+  ifneq ($(findstring vc, $(TEC_UNAME)), )
+    DEFINES += JAS_WIN_MSVC_BUILD JAS_TYPES
+  endif         
+  ifneq ($(findstring bc, $(TEC_UNAME)), )
+    DEFINES += JAS_TYPES
+  endif         
+  ifneq ($(findstring gcc, $(TEC_UNAME)), )
+    DEFINES += HAVE_UNISTD_H JAS_TYPES
+  endif         
+  ifneq ($(findstring mingw, $(TEC_UNAME)), )
+    DEFINES += HAVE_UNISTD_H HAVE_STDINT_H JAS_TYPES
+  endif         
+else
+  DEFINES += HAVE_UNISTD_H JAS_TYPES
+endif
+
+USE_IM=Yes
+IM = ..
diff --git a/src/im_lib.cpp b/src/im_lib.cpp
new file mode 100644
index 0000000..66ded68
--- /dev/null
+++ b/src/im_lib.cpp
@@ -0,0 +1,34 @@
+/** \file
+ * \brief Library Management
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_lib.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "im_lib.h"
+
+static char *iVersion = "TECVERID.str:IM:LIB:"IM_VERSION;
+
+const char iIdent[] =
+  "$IM: " IM_VERSION " " IM_COPYRIGHT " $\n"
+  "$URL: www.tecgraf.puc-rio.br/im $\n";
+
+const char* imVersion(void)
+{                  
+	 (void)iVersion;
+	 (void)iIdent;
+   return IM_VERSION;
+}
+
+const char* imVersionDate(void)
+{
+  return IM_VERSION_DATE;
+}
+
+int imVersionNumber(void)
+{
+  return IM_VERSION_NUMBER;
+}
diff --git a/src/im_lua3.c b/src/im_lua3.c
new file mode 100644
index 0000000..1c85e59
--- /dev/null
+++ b/src/im_lua3.c
@@ -0,0 +1,1297 @@
+/** \file
+ * \brief LuaBinding for Lua 3
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_lua3.c,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <lua.h>
+
+#include "im.h"
+#include "im_lib.h"
+
+#include <cd.h>
+#include <cdlua3_private.h>
+
+#include "imlua.h"
+
+
+/***************************************************************************\
+* Globals.                                                                  *
+\***************************************************************************/
+static int color_tag;
+static int imagergb_tag;
+static int imagergba_tag;
+static int palette_tag;
+static int imagemap_tag;
+static int channel_tag;
+
+static channel_t channel_info;
+
+#define IMLUA_VERSION "IMLua 1.2"
+/***************************************************************************\
+* Creation and destruction functions.                                       *
+\***************************************************************************/
+
+/***************************************************************************\
+* Creates a buffer for a RGB image.                                         *
+\***************************************************************************/
+static void imlua_createimagergb(void)
+{
+  lua_Object width, height;
+  long int width_i, height_i;
+  imagergb_t *imagergb_p;
+
+  width = lua_getparam(1);
+  height = lua_getparam(2);
+  if (!(lua_isnumber(width) && lua_isnumber(height)))
+    lua_error("imCreateImageRGB: invalid dimensions parameter!");
+  width_i = (long int) lua_getnumber(width);
+  height_i = (long int) lua_getnumber(height);
+  if (width_i < 1 || height_i < 1)
+    lua_error("imCreateImageRGB: image dimensions should be positive integers!");
+
+  if (lua_getparam(3) != LUA_NOOBJECT)
+    lua_error("imCreateImageRGB: too many parameters!");
+  
+  imagergb_p = (imagergb_t *) malloc(sizeof(imagergb_t));
+  if (!imagergb_p) {
+    lua_pushnil();
+    return;
+  }
+
+  imagergb_p->width = width_i;
+  imagergb_p->height = height_i;
+  imagergb_p->size = width_i*height_i;
+  imagergb_p->red = (unsigned char *) malloc(imagergb_p->size);
+  imagergb_p->green = (unsigned char *) malloc(imagergb_p->size);
+  imagergb_p->blue = (unsigned char *) malloc(imagergb_p->size);
+  
+  if (!(imagergb_p->red && imagergb_p->green && imagergb_p->blue)) { 
+    if (imagergb_p->red) free(imagergb_p->red);
+    if (imagergb_p->green) free(imagergb_p->green);
+    if (imagergb_p->blue) free(imagergb_p->blue);
+    free(imagergb_p);
+    lua_pushnil();
+    return;
+  }
+
+  memset(imagergb_p->red, 255, imagergb_p->size);
+  memset(imagergb_p->green, 255, imagergb_p->size);
+  memset(imagergb_p->blue, 255, imagergb_p->size);
+  
+  lua_pushusertag((void *) imagergb_p, imagergb_tag);
+}
+
+/***************************************************************************\
+* Frees a previously allocated imagergb. We don't free imagergb_p to avoid  *
+* problems if the user called killimagergb twice with the same object. The  *
+* structure will be freed by a userdata "gc" fallback in LUA 3.0.           *
+\***************************************************************************/
+static void imlua_killimagergb(void)
+{
+  lua_Object imagergb;
+  imagergb_t *imagergb_p;
+
+  imagergb = lua_getparam(1);
+  if (imagergb == LUA_NOOBJECT)
+    lua_error("imKillImageRGB: imagergb parameter missing!");
+  if (lua_isnil(imagergb))
+    lua_error("imKillImageRGB: attempt to kill a NIL imagergb!");
+  if (lua_tag(imagergb) != imagergb_tag)
+    lua_error("imKillImageRGB: invalid imagergb parameter!");
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+  if (!(imagergb_p->red && imagergb_p->green && imagergb_p->blue)) 
+    lua_error("imKillImageRGB: attempt to kill a killed imagergb!");
+
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imKillImageRGB: too many parameters!");
+
+  free(imagergb_p->red);
+  free(imagergb_p->green);
+  free(imagergb_p->blue);
+  imagergb_p->red = NULL;
+  imagergb_p->green = NULL;
+  imagergb_p->blue = NULL;
+}
+
+/***************************************************************************\
+* Creates a palette as a palette_tag usertag lua_Object. A palette can be   *
+* considered and treated as a color table.                                  *
+\***************************************************************************/
+static void imlua_createpalette(void)
+{
+  lua_Object size;
+  long int size_i;
+  palette_t *palette_p;
+
+  size = lua_getparam(1);
+  if (!(lua_isnumber(size)))
+    lua_error("imCreatePalette: invalid palette parameter!");
+  size_i = (long int) lua_getnumber(size);
+  if (size_i < 1)
+    lua_error("imCreatePalette: palette size should be a positive integer!");
+
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imCreatePalette: too many parameters!");
+
+  palette_p = (palette_t *) malloc(sizeof(palette_t));
+  if (!palette_p) {
+    lua_pushnil();
+    return;
+  }
+
+  palette_p->size = size_i;
+  palette_p->color = (long int *) malloc(palette_p->size * sizeof(long int));
+  if (!palette_p->color) {
+    free(palette_p);
+    lua_pushnil();
+    return;
+  }
+
+  memset(palette_p->color, 255, palette_p->size * sizeof(long int));
+  lua_pushusertag((void *) palette_p, palette_tag);
+}
+
+/***************************************************************************\
+* Frees a previously allocated palette. We don't free palette_p to prevent  *
+* a problem if the user called killpalette twice with the same object. The  *
+* structure will be freed by a userdata "gc" fallback in LUA 3.0.           *
+\***************************************************************************/
+static void imlua_killpalette(void)
+{
+  lua_Object palette;
+  palette_t *palette_p;
+
+  palette = lua_getparam(1);
+  if (palette == LUA_NOOBJECT)
+    lua_error("imKillPalette: palette parameter missing!");
+  if (lua_isnil(palette))
+    lua_error("imKillPalette: attempt to kill a NIL palette!");
+  if (lua_tag(palette) != palette_tag)
+    lua_error("imKillPalette: invalid palette parameter!");
+  palette_p = (palette_t *) lua_getuserdata(palette);
+  if (!palette_p->color) 
+    lua_error("imKillPalette: attempt to kill a killed palette!");
+
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imKillPalette: too many parameters!");
+
+  free(palette_p->color);
+  palette_p->color = NULL;
+}
+
+/***************************************************************************\
+* Creates a imagemap as a imagemap_tag usertag lua_Object.                  *
+\***************************************************************************/
+static void imlua_createimagemap(void)
+{
+  lua_Object width;
+  lua_Object height;
+
+  long int width_i;
+  long int height_i;
+  imagemap_t *imagemap_p;
+
+  width = lua_getparam(1);
+  height = lua_getparam(2);
+  if (!(lua_isnumber(width) && lua_isnumber(height)))
+    lua_error("imCreateImageMap: invalid dimensions parameter!");
+  width_i = (long int) lua_getnumber(width);
+  height_i = (long int) lua_getnumber(height);
+  if (width_i < 1 || height_i < 1)
+    lua_error("imCreateImageMap: imagemap dimensions should be positive integers!");
+
+  if (lua_getparam(3) != LUA_NOOBJECT)
+    lua_error("imCreateImageMap: too many parameters!");
+
+  imagemap_p = (imagemap_t *) malloc(sizeof(imagemap_t));
+  if (!imagemap_p) {
+    lua_pushnil();
+    return;
+  }
+
+  imagemap_p->size = width_i*height_i;
+  imagemap_p->width = width_i;
+  imagemap_p->height = height_i;
+  imagemap_p->index = (unsigned char *) malloc(imagemap_p->size);
+  if (!imagemap_p->index) {
+    free(imagemap_p);
+    lua_pushnil();
+    return;
+  }
+
+  memset(imagemap_p->index, 0, imagemap_p->size);
+  lua_pushusertag((void *) imagemap_p, imagemap_tag);
+}
+
+/***************************************************************************\
+* Frees a previously allocated imagemap. We don't free imagemap_p to avoid  *
+* problems if the user called killimagemap twice with the same object. The  *
+* structure will be freed by a userdata "gc" fallback in LUA 3.0.           *
+\***************************************************************************/
+static void imlua_killimagemap(void)
+{
+  lua_Object imagemap;
+  imagemap_t *imagemap_p;
+
+  imagemap = lua_getparam(1);
+  if (imagemap == LUA_NOOBJECT)
+    lua_error("imKillImageMap: imagemap parameter missing!");
+  if (lua_isnil(imagemap))
+    lua_error("imKillImageMap: attempt to kill a NIL imagemap!");
+  if (lua_tag(imagemap) != imagemap_tag)
+    lua_error("imKillImageMap: invalid imagemap parameter!");
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+  if (!imagemap_p->index) 
+    lua_error("imKillImageMap: attempt to kill a killed imagemap!");
+    
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imKillImageMap: too many parameters!");
+
+  free(imagemap_p->index);
+  imagemap_p->index = NULL;
+}
+
+/***************************************************************************\
+* IM API Functions.                                                         *
+\***************************************************************************/
+
+/***************************************************************************\
+* imFileFormat                                                              *
+\***************************************************************************/
+static void imlua_fileformat(void)
+{
+  lua_Object file;
+
+  char *file_s;
+  int format_i;
+  int compress_i;
+
+  int err;
+
+  file = lua_getparam(1);
+  if (!lua_isstring(file))
+    lua_error("imFileFormat: invalid filename parameter!");
+  file_s = (char *) lua_getstring(file);
+
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imFileFormat: too many parameters!");
+
+  err = imFileFormat(file_s, &format_i);
+
+  compress_i = (format_i & IM_DEFAULT) ? 1 : 0;
+  format_i = format_i & 0xFF;
+  
+  /* if success, return the format */
+  if (err == IM_ERR_NONE) {
+    lua_pushnumber( format_i);
+    lua_pushnumber( compress_i);
+  }
+  /* if failure, return nil */
+  else {
+    lua_pushnil();
+    lua_pushnil();
+  }
+
+  lua_pushnumber( err);
+}
+
+/***************************************************************************\
+* imFileFormat                                                              *
+\***************************************************************************/
+static void imlua_imageinfo(void)
+{
+  lua_Object file;
+
+  char *file_s;
+  
+  int width, height, image_type, pal_size;
+  int err;
+  
+  file = lua_getparam(1);
+  if (!lua_isstring(file))
+    lua_error("imFileFormat: invalid filename parameter!");
+  file_s = (char *) lua_getstring(file);
+
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imFileFormat: too many parameters!");
+
+  err = imImageInfo(file_s, &width, &height, &image_type, &pal_size);
+  
+  /* if success, return the format */
+  if (err == IM_ERR_NONE) {
+    lua_pushnumber( width);
+    lua_pushnumber( height);
+    lua_pushnumber( image_type);
+    lua_pushnumber( pal_size);
+  }
+  /* if failure, return nil */
+  else {
+    lua_pushnil();
+    lua_pushnil();
+    lua_pushnil();
+    lua_pushnil();
+  }
+
+  lua_pushnumber( err);
+}
+
+/***************************************************************************\
+* imEncodeColor                                                             *
+\***************************************************************************/
+static void imlua_encodecolor(void)
+{
+  lua_Object red, green, blue;
+  float red_f, green_f, blue_f;
+  unsigned char red_i, green_i, blue_i;
+  long int color_i;
+
+  red = lua_getparam(1);
+  green = lua_getparam(2);
+  blue = lua_getparam(3);
+
+  if (lua_getparam(4) != LUA_NOOBJECT)
+    lua_error("imEncodeColor: too many parameters!");
+
+  if (!(lua_isnumber(red) && lua_isnumber(green) && lua_isnumber(blue)))
+    lua_error("imEncodeColor: invalid color component parameter!");
+
+  red_f = (float)lua_getnumber(red);
+  green_f = (float)lua_getnumber(green);
+  blue_f = (float)lua_getnumber(blue);
+
+  if (red_f < 0 || red_f > 255 || green_f < 0 || 
+      green_f > 255 || blue_f < 0 || blue_f > 255)
+    lua_error("imEncodeColor: color components values should be in range [0, 255]!");
+
+  red_i = (unsigned char) (red_f);
+  green_i = (unsigned char) (green_f);
+  blue_i = (unsigned char) (blue_f);
+
+  color_i = imEncodeColor(red_i, green_i, blue_i);
+  lua_pushusertag((void *) color_i, color_tag);
+}
+
+/***************************************************************************\
+* imDecodeColor                                                             *
+\***************************************************************************/
+static void imlua_decodecolor(void)
+{
+  lua_Object color;
+  long int color_i;
+  unsigned char red_i, green_i, blue_i;
+
+  color = lua_getparam(1);
+  if (lua_tag(color) != color_tag)
+    lua_error("imDecodeColor: invalid color parameter!");
+  color_i = (long int) lua_getuserdata(color);
+
+  if (lua_getparam(2) != LUA_NOOBJECT)
+    lua_error("imDecodeColor: too many parameters!");
+
+  imDecodeColor(&red_i, &green_i, &blue_i, color_i);
+
+  lua_pushnumber( red_i);
+  lua_pushnumber( green_i);
+  lua_pushnumber( blue_i);
+}
+
+/***************************************************************************\
+* imLoadRGB                                                                 *
+\***************************************************************************/
+static void imlua_loadrgb(void)
+{
+  lua_Object file, imagergb;
+
+  imagergb_t *imagergb_p;
+  char *file_s;
+
+  int err;
+
+  file = lua_getparam(1);
+  if (!lua_isstring(file))
+    lua_error("imLoadRGB: invalid filename parameter!");
+  file_s = (char *) lua_getstring(file);
+
+  imagergb = lua_getparam(2);
+  if (lua_tag(imagergb) != imagergb_tag)
+    lua_error("imLoadRGB: invalid imagergb parameter!");
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+
+  if (lua_getparam(3) != LUA_NOOBJECT)
+    lua_error("imLoadRGB: too many parameters!");
+
+  err = imLoadRGB(file_s, imagergb_p->red, imagergb_p->green, imagergb_p->blue);
+  lua_pushnumber( err);
+}
+
+/***************************************************************************\
+* imLoadMap                                                                 *
+\***************************************************************************/
+static void imlua_loadmap(void)
+{
+  lua_Object file, imagemap, palette;
+
+  imagemap_t *imagemap_p;
+  palette_t *palette_p;
+  char *file_s;
+
+  int err;
+
+  file = lua_getparam(1);
+  if (!lua_isstring(file))
+    lua_error("imLoadMap: invalid filename parameter!");
+  file_s = (char *) lua_getstring(file);
+
+  imagemap = lua_getparam(2);
+  if (lua_tag(imagemap) != imagemap_tag)
+    lua_error("imLoadMap: invalid imagemap parameter!");
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+
+  palette = lua_getparam(3);
+  if (lua_tag(palette) != palette_tag)
+    lua_error("imLoadMap: invalid palette parameter!");
+  palette_p = (palette_t *) lua_getuserdata(palette);
+
+  if (lua_getparam(4) != LUA_NOOBJECT)
+    lua_error("imLoadMap: too many parameters!");
+
+  err = imLoadMap(file_s, imagemap_p->index, palette_p->color);
+  lua_pushnumber( err);
+}
+
+/***************************************************************************\
+* imSaveRGB                                                                 *
+\***************************************************************************/
+static void imlua_savergb(void)
+{
+  lua_Object file, imagergb, format, compress;
+
+  imagergb_t *imagergb_p;
+  char *file_s;
+  int format_i;
+  int compress_i;
+
+  int err;
+
+  imagergb = lua_getparam(1);
+  if (lua_tag(imagergb) != imagergb_tag)
+    lua_error("imSaveRGB: invalid imagergb parameter!");
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+
+  format = lua_getparam(2);
+  if (!lua_isnumber(format))
+    lua_error("imSaveRGB: invalid format parameter!");
+  format_i = (int) lua_getnumber(format);
+  
+  compress = lua_getparam(3);
+  if (!lua_isnumber(compress))
+    lua_error("imSaveRGB: invalid compression parameter!");
+  compress_i = (int) lua_getnumber(compress);
+
+  file = lua_getparam(4);
+  if (!lua_isstring(file))
+    lua_error("imSaveRGB: invalid filename parameter!");
+  file_s = (char *) lua_getstring(file);
+
+  if (lua_getparam(5) != LUA_NOOBJECT)
+    lua_error("imSaveRGB: too many parameters!");
+
+  err = imSaveRGB(imagergb_p->width, imagergb_p->height, format_i | (compress_i << 8), 
+    imagergb_p->red, imagergb_p->green, imagergb_p->blue, file_s);
+  lua_pushnumber( err);
+}
+
+/***************************************************************************\
+* imSaveMap                                                                 *
+\***************************************************************************/
+static void imlua_savemap(void)
+{
+  lua_Object file, imagemap, palette, format, compress;
+
+  imagemap_t *imagemap_p;
+  palette_t *palette_p;
+  char *file_s;
+  int format_i;
+  int compress_i;
+
+  int err;
+
+  imagemap = lua_getparam(1);
+  if (lua_tag(imagemap) != imagemap_tag)
+    lua_error("imSaveMap: invalid imagemap  parameter!");
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+
+  palette = lua_getparam(2);
+  if (lua_tag(palette) != palette_tag)
+    lua_error("imLoadMap: invalid palette parameter!");
+  palette_p = (palette_t *) lua_getuserdata(palette);
+
+  format = lua_getparam(3);
+  if (!lua_isnumber(format))
+    lua_error("imSaveMap: invalid format parameter!");
+  format_i = (int) lua_getnumber(format);
+  
+  compress = lua_getparam(4);
+  if (!lua_isnumber(compress))
+    lua_error("imSaveMap: invalid compression parameter!");
+  compress_i = (int) lua_getnumber(compress);
+
+  file = lua_getparam(5);
+  if (!lua_isstring(file))
+    lua_error("imSaveMap: invalid filename parameter!");
+  file_s = (char *) lua_getstring(file);
+
+  if (lua_getparam(6) != LUA_NOOBJECT)
+    lua_error("imSaveMap: too many parameters!");
+
+  err = imSaveMap(imagemap_p->width, imagemap_p->height, format_i | (compress_i << 8), 
+    imagemap_p->index, palette_p->size, palette_p->color, file_s);
+  lua_pushnumber( err);
+}
+
+/***************************************************************************\
+* imRGB2Map                                                                 *
+\***************************************************************************/
+static void imlua_rgb2map(void)
+{
+  lua_Object imagergb, imagemap, palette;
+
+  imagemap_t *imagemap_p;
+  palette_t *palette_p;
+  imagergb_t *imagergb_p;
+
+  imagergb = lua_getparam(1);
+  if (lua_tag(imagergb) != imagergb_tag)
+    lua_error("imRGB2Map: invalid imagergb parameter!");
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+
+  imagemap = lua_getparam(2);
+  if (lua_tag(imagemap) != imagemap_tag)
+    lua_error("imRGB2Map: invalid imagemap parameter!");
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+
+  palette = lua_getparam(3);
+  if (lua_tag(palette) != palette_tag)
+    lua_error("imRGB2Map: invalid palette parameter!");
+  palette_p = (palette_t *) lua_getuserdata(palette);
+
+  if (lua_getparam(4) != LUA_NOOBJECT)
+    lua_error("imRGB2Map: too many parameters!");
+
+  if (imagergb_p->size != imagemap_p->size)
+    lua_error("imRGB2Map: images have incompatible dimensions!");
+  
+  imRGB2Map(imagergb_p->width, imagergb_p->height, imagergb_p->red, 
+    imagergb_p->green, imagergb_p->blue, imagemap_p->index, palette_p->size, 
+    palette_p->color);
+}
+
+/***************************************************************************\
+* imMap2RGB                                                                 *
+\***************************************************************************/
+static void imlua_map2rgb(void)
+{
+  lua_Object imagergb, imagemap, palette;
+
+  imagemap_t *imagemap_p;
+  palette_t *palette_p;
+  imagergb_t *imagergb_p;
+
+  imagemap = lua_getparam(1);
+  if (lua_tag(imagemap) != imagemap_tag)
+    lua_error("imMap2RGB: invalid imagemap parameter!");
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+
+  palette = lua_getparam(2);
+  if (lua_tag(palette) != palette_tag)
+    lua_error("imMap2RGB: invalid palette parameter!");
+  palette_p = (palette_t *) lua_getuserdata(palette);
+
+  imagergb = lua_getparam(3);
+  if (lua_tag(imagergb) != imagergb_tag)
+    lua_error("imMap2RGB: invalid imagergb parameter!");
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+
+  if (lua_getparam(4) != LUA_NOOBJECT)
+    lua_error("imMap2RGB: too many parameters!");
+
+  if (imagergb_p->size != imagemap_p->size)
+    lua_error("imMap2RGB: images have incompatible dimensions!");
+  
+  imMap2RGB(imagemap_p->width, imagemap_p->height, imagemap_p->index, palette_p->size, 
+    palette_p->color, imagergb_p->red, imagergb_p->green, imagergb_p->blue);
+}
+
+/***************************************************************************\
+* imMap2Gray                                                                *
+\***************************************************************************/
+static void imlua_map2gray(void)
+{
+  lua_Object imagemap, palette, graymap, grays;
+
+  imagemap_t *imagemap_p;
+  palette_t *palette_p;
+  imagemap_t *graymap_p;
+  palette_t *grays_p;
+
+  imagemap = lua_getparam(1);
+  if (lua_tag(imagemap) != imagemap_tag)
+    lua_error("imMap2Gray: invalid imagemap parameter!");
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+
+  palette = lua_getparam(2);
+  if (lua_tag(palette) != palette_tag)
+    lua_error("imMap2Gray: invalid palette parameter!");
+  palette_p = (palette_t *) lua_getuserdata(palette);
+
+  graymap = lua_getparam(3);
+  if (lua_tag(graymap) != imagemap_tag)
+    lua_error("imMap2Gray: invalid graymap parameter!");
+  graymap_p = (imagemap_t *) lua_getuserdata(graymap);
+
+  grays = lua_getparam(4);
+  if (lua_tag(grays) != palette_tag)
+    lua_error("imMap2Gray: invalid grays parameter!");
+  grays_p = (palette_t *) lua_getuserdata(grays);
+
+  if (lua_getparam(5) != LUA_NOOBJECT)
+    lua_error("imMap2Gray: too many parameters!");
+
+  if (imagemap_p->size != graymap_p->size)
+    lua_error("imMap2Gray: images have incompatible dimensions!");
+  
+  if (grays_p->size < 256)
+    lua_error("imMap2Gray: grays palette should be of size 256!");
+
+  imMap2Gray(imagemap_p->width, imagemap_p->height, imagemap_p->index, 
+    palette_p->size, palette_p->color, graymap_p->index, grays_p->color);
+}
+
+/***************************************************************************\
+* imRGB2Gray                                                                *
+\***************************************************************************/
+static void imlua_rgb2gray(void)
+{
+  lua_Object imagergb, graymap, grays;
+
+  imagergb_t *imagergb_p;
+  imagemap_t *graymap_p;
+  palette_t *grays_p;
+
+  imagergb = lua_getparam(1);
+  if (lua_tag(imagergb) != imagergb_tag)
+    lua_error("imRGB2Gray: invalid imagergb parameter!");
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+
+  graymap = lua_getparam(2);
+  if (lua_tag(graymap) != imagemap_tag)
+    lua_error("imRGB2Gray: invalid graymap parameter!");
+  graymap_p = (imagemap_t *) lua_getuserdata(graymap);
+
+  grays = lua_getparam(3);
+  if (lua_tag(grays) != palette_tag)
+    lua_error("imRGB2Gray: invalid grays parameter!");
+  grays_p = (palette_t *) lua_getuserdata(grays);
+
+  if (lua_getparam(4) != LUA_NOOBJECT)
+    lua_error("imRGB2Gray: too many parameters!");
+
+  if (imagergb_p->size != graymap_p->size)
+    lua_error("imRGB2Gray: images have incompatible dimensions!");
+  
+  if (grays_p->size < 256)
+    lua_error("imRGB2Gray: grays palette should be of size 256!");
+
+  imRGB2Gray(imagergb_p->width, imagergb_p->height, imagergb_p->red,
+    imagergb_p->green, imagergb_p->blue, graymap_p->index, grays_p->color);
+}
+
+/***************************************************************************\
+* imStretch.                                                                *
+\***************************************************************************/
+static void imlua_stretch(void)
+{
+  lua_Object src, dst;
+
+  imagemap_t *srcmap_p;
+  imagemap_t *dstmap_p;
+  imagergb_t *srcrgb_p;
+  imagergb_t *dstrgb_p;
+
+  src = lua_getparam(1);
+  dst = lua_getparam(2);
+  if (lua_getparam(3) != LUA_NOOBJECT)
+    lua_error("imStretch: too many parameters!");
+
+  if ((lua_tag(src) == imagergb_tag) && (lua_tag(dst) == imagergb_tag)) {
+    srcrgb_p = (imagergb_t *) lua_getuserdata(src);
+    dstrgb_p = (imagergb_t *) lua_getuserdata(dst);
+    imStretch(srcrgb_p->width, srcrgb_p->height, srcrgb_p->red,
+      dstrgb_p->width, dstrgb_p->height, dstrgb_p->red);
+    imStretch(srcrgb_p->width, srcrgb_p->height, srcrgb_p->green,
+      dstrgb_p->width, dstrgb_p->height, dstrgb_p->green);
+    imStretch(srcrgb_p->width, srcrgb_p->height, srcrgb_p->blue,
+      dstrgb_p->width, dstrgb_p->height, dstrgb_p->blue);
+  } 
+  else if ((lua_tag(src) == imagemap_tag) && (lua_tag(dst) == imagemap_tag)) {
+    srcmap_p = (imagemap_t *) lua_getuserdata(src);
+    dstmap_p = (imagemap_t *) lua_getuserdata(dst);
+    imStretch(srcmap_p->width, srcmap_p->height, srcmap_p->index,
+      dstmap_p->width, dstmap_p->height, dstmap_p->index);
+  }
+  else {
+    lua_error("imStretch: inconsistent parameters!");
+  }
+}
+
+/***************************************************************************\
+* imResize.                                                                *
+\***************************************************************************/
+static void imlua_resize(void)
+{
+  lua_Object src, dst;
+
+  imagergb_t *srcrgb_p;
+  imagergb_t *dstrgb_p;
+
+  src = lua_getparam(1);
+  dst = lua_getparam(2);
+  if (lua_getparam(3) != LUA_NOOBJECT)
+    lua_error("imResize: too many parameters!");
+
+  if ((lua_tag(src) == imagergb_tag) && (lua_tag(dst) == imagergb_tag)) {
+    srcrgb_p = (imagergb_t *) lua_getuserdata(src);
+    dstrgb_p = (imagergb_t *) lua_getuserdata(dst);
+    imStretch(srcrgb_p->width, srcrgb_p->height, srcrgb_p->red,
+      dstrgb_p->width, dstrgb_p->height, dstrgb_p->red);
+    imStretch(srcrgb_p->width, srcrgb_p->height, srcrgb_p->green,
+      dstrgb_p->width, dstrgb_p->height, dstrgb_p->green);
+    imStretch(srcrgb_p->width, srcrgb_p->height, srcrgb_p->blue,
+      dstrgb_p->width, dstrgb_p->height, dstrgb_p->blue);
+  } 
+  else {
+    lua_error("imResize: parameters must be of type imagergb_tag!");
+  }
+}
+
+/***************************************************************************\
+* imVersion.                                                                *
+\***************************************************************************/
+static void imlua_version(void)
+{
+  if (lua_getparam(1) != LUA_NOOBJECT)
+    lua_error("imVersion: too many parameters!");
+ 
+  lua_pushstring(imVersion());
+}
+
+/***************************************************************************\
+* Fallback implementation.                                                  *
+\***************************************************************************/
+
+/***************************************************************************\
+* imagemap "settable" fallback.                                             *
+\***************************************************************************/
+static void imagemapsettable_fb(void)
+{
+  lua_Object imagemap, index, value;
+
+  imagemap_t *imagemap_p;
+  long int index_i;
+  long int value_i;
+
+  imagemap = lua_getparam(1);
+  index = lua_getparam(2);
+  value = lua_getparam(3);
+
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+  if (!imagemap_p) {
+    lua_error("imagemap_tag \"settable\": invalid imagemap_tag object!");
+  }
+
+  if (!lua_isnumber(index)) {
+    lua_error("imagemap_tag \"settable\": index should be a number!");
+  }
+
+  if (!lua_isnumber(value)) {
+    lua_error("imagemap_tag \"settable\": value should be a number!");
+  }
+  
+  value_i = (long int) lua_getnumber(value);
+  if ((value_i < 0 || value_i > 255)) 
+    lua_error("imagemap_tag \"settable\": value should be in range [0, 255]!");
+
+  index_i = (long int) lua_getnumber(index);
+  if (index_i < 0 || index_i >= imagemap_p->size)
+    lua_error("imagemap_tag \"settable\": index is out of bounds!");
+
+  imagemap_p->index[index_i] = (unsigned char) value_i;
+}
+
+/***************************************************************************\
+* palette "settable" fallback.                                              *
+\***************************************************************************/
+static void palettesettable_fb(void)
+{
+  lua_Object palette, index, color;
+  
+  palette_t *palette_p;
+  long int index_i;
+  long int color_i;
+
+  palette = lua_getparam(1);
+  index = lua_getparam(2);
+  color = lua_getparam(3);
+
+  palette_p = (palette_t *) lua_getuserdata(palette);
+  if (!palette_p) {
+    lua_error("palette_tag \"settable\": invalid palette_tag object!");
+  }
+
+  if (!lua_isnumber(index)) {
+    lua_error("palette_tag \"settable\": index should be a number!");
+  }
+
+  if (lua_tag(color) != color_tag) 
+    lua_error("palette_tag \"settable\": value should be of type color_tag!");
+  
+  color_i = (long int) lua_getuserdata(color);
+  
+  index_i = (long int) lua_getnumber(index);
+  if (index_i < 0 || index_i >= palette_p->size)
+    lua_error("palette_tag \"settable\": index is out of bounds!");
+
+  palette_p->color[index_i] = color_i;
+}
+
+/***************************************************************************\
+* channel "settable" fallback. This fallback is called when a LUA line like *
+* "imagergb.r[y*w + x] = c" is executed. The imagergb "gettable" fallback   *
+* fills and returns a channel structure with info about the buffer. This    *
+* structure is consulted and the value is assigned where it should.         *
+\***************************************************************************/
+static void channelsettable_fb(void)
+{
+  lua_Object channel, index, value;
+  
+  channel_t *channel_p;
+  long int index_i;
+  long int value_i;
+
+  channel = lua_getparam(1);
+  index = lua_getparam(2);
+  value = lua_getparam(3);
+
+  channel_p = (channel_t *) lua_getuserdata(channel);
+  if (!channel_p) {
+    lua_error("channel_tag \"settable\": invalid channel_tag object!");
+  }
+
+  if (!lua_isnumber(index)) {
+    lua_error("channel_tag \"settable\": index should be a number!");
+  }
+  index_i = (long int) lua_getnumber(index);
+  if (index_i < 0 || index_i >= channel_p->size) {
+    lua_error("channel_tag \"settable\": index is out of bounds!");
+  }
+  
+  if (!lua_isnumber(value)) {
+    lua_error("channel_tag \"settable\": value should be a number!");
+  }
+  value_i = (long int) lua_getnumber(value);
+  if ((value_i < 0 || value_i > 255)) {
+    lua_error("channel_tag \"settable\": value should be in range [0, 255]!");
+  }
+
+  channel_p->value[index_i] = (unsigned char) value_i;
+}
+
+/***************************************************************************\
+* imagemap "gettable" fallback.                                             *
+\***************************************************************************/
+static void imagemapgettable_fb(void)
+{
+  lua_Object imagemap, index;
+
+  imagemap_t *imagemap_p;
+  long int index_i;
+
+  imagemap = lua_getparam(1);
+  index = lua_getparam(2);
+
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+  if (!imagemap_p)
+    lua_error("imagemap_tag \"gettable\": invalid imagemap_tag object!");
+
+  if (!lua_isnumber(index)) {
+    lua_error("imagemap_tag \"gettable\": index should be a number!");
+  }
+
+  index_i = (long int) lua_getnumber(index);
+  if (index_i < 0 || index_i >= imagemap_p->size)
+    lua_error("imagemap_tag \"gettable\": index is out of bounds!");
+
+  lua_pushnumber( imagemap_p->index[index_i]);
+}
+
+/***************************************************************************\
+* palette "gettable" fallback.                                              *
+\***************************************************************************/
+static void palettegettable_fb(void)
+{
+  lua_Object palette, index;
+  
+  palette_t *palette_p;
+  long int index_i;
+
+  palette = lua_getparam(1);
+  index = lua_getparam(2);
+
+  palette_p = (palette_t *) lua_getuserdata(palette);
+  if (!palette_p)
+    lua_error("palette_tag \"gettable\": invalid palette_tag object!");
+
+  if (!lua_isnumber(index)) {
+    lua_error("palette_tag \"gettable\": index should be a number!");
+  }
+
+  index_i = (long int) lua_getnumber(index);
+  if (index_i < 0 || index_i >= palette_p->size)
+    lua_error("palette_tag \"gettable\": index is out of bounds!");
+
+  lua_pushusertag((void *) palette_p->color[index_i], color_tag);
+}
+
+/***************************************************************************\
+* channel "gettable" fallback. This fallback is called when a LUA line like *
+* "c = imagergb.r[y*w + x]" is executed. The imagergb "gettable" fallback   *
+* fills and returns a channel structure with info about the buffer. This    *
+* structure is consulted and the appropriate value is returned.             *
+\***************************************************************************/
+static void channelgettable_fb(void)
+{
+  lua_Object channel, index;
+  
+  channel_t *channel_p;
+  long int index_i;
+
+  channel = lua_getparam(1);
+  index = lua_getparam(2);
+
+  channel_p = (channel_t *) lua_getuserdata(channel);
+  if (!channel_p) {
+    lua_error("channel_tag \"gettable\": invalid channel_tag object!");
+  }
+
+  if (!lua_isnumber(index)) {
+    lua_error("channel_tag \"gettable\": index should be a number!");
+  }
+  index_i = (long int) lua_getnumber(index);
+  if (index_i < 0 || index_i >= channel_p->size) {
+    lua_error("channel_tag \"gettable\": index is out of bounds!");
+  }
+  
+  lua_pushnumber( channel_p->value[index_i]);
+}
+
+/***************************************************************************\
+* imagergb "gettable" fallback. This fallback is called when a LUA line     *
+* like "c = imagergb.r[y*w + x]" or "imagergb.r[y*w + x] = c" is executed.  *
+* The channel_info global is filled and its address is returned with a      *
+* channel_tag usertag lua_Object. The following "gettable" or "settable"    *
+* then assigns or returns the appropriate value.                            *
+\***************************************************************************/
+static void imagergbgettable_fb(void)
+{
+  lua_Object imagergb, index;
+  
+  char *index_s;
+  imagergb_t *imagergb_p;
+
+  imagergb = lua_getparam(1);
+  index = lua_getparam(2);
+
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+  if (!imagergb_p)
+    lua_error("imagergb_tag \"gettable\": invalid imagergb_tag object!");
+
+  if (!lua_isstring(index)) {
+    lua_error("imagergb_tag \"gettable\": index should be a channel name!");
+  }
+  index_s = (char *) lua_getstring(index);
+
+  channel_info.size = imagergb_p->size;
+  
+  if (*index_s == 'r' || *index_s == 'R') {
+    channel_info.value = imagergb_p->red;
+  }
+  else if (*index_s == 'g' || *index_s == 'G') {
+    channel_info.value = imagergb_p->green;
+  }
+  else if (*index_s == 'b' || *index_s == 'B') {
+    channel_info.value = imagergb_p->blue;
+  }
+  else {
+    lua_error("imagergb_tag \"gettable\": index is an invalid channel name!");
+  }
+
+  lua_pushusertag((void *) &channel_info, channel_tag);
+}
+
+/***************************************************************************\
+* imagergba "gettable" fallback. This fallback is called when a LUA line    *
+* like "c = imagergba.r[y*w + x]" or "imagergba.r[y*w + x] = c" is executed.*
+* The channel_info global is filled and its address is returned with a      *
+* channel_tag usertag lua_Object. The following "gettable" or "settable"    *
+* then assigns or returns the appropriate value.                            *
+\***************************************************************************/
+static void imagergbagettable_fb(void)
+{
+  lua_Object imagergba, index;
+  
+  char *index_s;
+  imagergba_t *imagergba_p;
+
+  imagergba = lua_getparam(1);
+  index = lua_getparam(2);
+
+  imagergba_p = (imagergba_t *) lua_getuserdata(imagergba);
+  if (!imagergba_p)
+    lua_error("imagergba_tag \"gettable\": invalid imagergba_tag object!");
+
+  if (!lua_isstring(index)) {
+    lua_error("imagergba_tag \"gettable\": index should be a channel name!");
+  }
+  index_s = (char *) lua_getstring(index);
+
+  channel_info.size = imagergba_p->size;
+  
+  if (*index_s == 'r' || *index_s == 'R') {
+    channel_info.value = imagergba_p->red;
+  }
+  else if (*index_s == 'g' || *index_s == 'G') {
+    channel_info.value = imagergba_p->green;
+  }
+  else if (*index_s == 'b' || *index_s == 'B') {
+    channel_info.value = imagergba_p->blue;
+  }
+  else if (*index_s == 'a' || *index_s == 'A') {
+    channel_info.value = imagergba_p->alpha;
+  }
+  else {
+    lua_error("imagergba_tag \"gettable\": index is an invalid channel name!");
+  }
+
+  lua_pushusertag((void *) &channel_info, channel_tag);
+}
+
+/***************************************************************************\
+* palette "gc" fallback.                                                    *
+\***************************************************************************/
+static void palettegc_fb(void)
+{
+  lua_Object palette;
+
+  palette_t *palette_p;
+
+  palette = lua_getparam(1);
+  palette_p = (palette_t *) lua_getuserdata(palette);
+  if (!palette_p)
+    lua_error("palette_tag \"gc\": invalid palette_tag object!");
+
+  /* if the palette has not been killed, kill it */
+  if (palette_p->color) free(palette_p->color);
+
+  /* free the palette_t structure */
+  free(palette_p);
+}
+
+/***************************************************************************\
+* imagergb "gc" fallback.                                                   *
+\***************************************************************************/
+static void imagergbgc_fb(void)
+{
+  lua_Object imagergb;
+
+  imagergb_t *imagergb_p;
+
+  imagergb = lua_getparam(1);
+  imagergb_p = (imagergb_t *) lua_getuserdata(imagergb);
+  if (!imagergb_p)
+    lua_error("imagergb_tag \"gc\": invalid imagergb_tag object!");
+
+  /* if the imagergb has not been killed, kill it */
+  if (imagergb_p->red) free(imagergb_p->red);
+  if (imagergb_p->green) free(imagergb_p->green);
+  if (imagergb_p->blue) free(imagergb_p->blue);
+
+  /* free the imagergb_t structure */
+  free(imagergb_p);
+}
+
+/***************************************************************************\
+* imagergba "gc" fallback.                                                   *
+\***************************************************************************/
+static void imagergbagc_fb(void)
+{
+  lua_Object imagergba;
+
+  imagergba_t *imagergba_p;
+
+  imagergba = lua_getparam(1);
+  imagergba_p = (imagergba_t *) lua_getuserdata(imagergba);
+  if (!imagergba_p)
+    lua_error("imagergba_tag \"gc\": invalid imagergba_tag object!");
+
+  /* if the imagergba has not been killed, kill it */
+  if (imagergba_p->red) free(imagergba_p->red);
+  if (imagergba_p->green) free(imagergba_p->green);
+  if (imagergba_p->blue) free(imagergba_p->blue);
+  if (imagergba_p->alpha) free(imagergba_p->alpha);
+
+  /* free the imagergba_t structure */
+  free(imagergba_p);
+}
+
+/***************************************************************************\
+* imagemap "gc" fallback.                                                   *
+\***************************************************************************/
+static void imagemapgc_fb(void)
+{
+  lua_Object imagemap;
+
+  imagemap_t *imagemap_p;
+
+  imagemap = lua_getparam(1);
+  imagemap_p = (imagemap_t *) lua_getuserdata(imagemap);
+  if (!imagemap_p)
+    lua_error("imagemap_tag \"gc\": invalid imagemap_tag object!");
+
+  /* if the imagemap has not been killed, kill it */
+  if (imagemap_p->index) free(imagemap_p->index);
+
+  /* free the imagemap_t structure */
+  free(imagemap_p);
+}
+
+/***************************************************************************\
+* Initialization code.                                                      *
+\***************************************************************************/
+
+/***************************************************************************\
+* Initializes IMLua.                                                        *
+\***************************************************************************/
+void imlua_open(void)
+{
+  lua_Object cdlua_tag;
+  
+  /* check if CD has been initialized */
+  cdlua_tag = lua_getglobal("CDLUA_INSTALLED");
+
+  /* get CD defined tags, let CD deal with the user tag objects  */
+  if ((cdlua_tag != LUA_NOOBJECT) && (!lua_isnil(cdlua_tag))) {
+    cdlua_tag = lua_getglobal("CDLUA_COLOR_TAG");
+    color_tag = (int) lua_getnumber(cdlua_tag);
+    cdlua_tag = lua_getglobal("CDLUA_IMAGERGB_TAG");
+    imagergb_tag = (int) lua_getnumber(cdlua_tag);
+    cdlua_tag = lua_getglobal("CDLUA_IMAGERGBA_TAG");
+    imagergba_tag = (int) lua_getnumber(cdlua_tag);
+    cdlua_tag = lua_getglobal("CDLUA_PALETTE_TAG");
+    palette_tag = (int) lua_getnumber(cdlua_tag);
+    cdlua_tag = lua_getglobal("CDLUA_IMAGEMAP_TAG");
+    imagemap_tag = (int) lua_getnumber(cdlua_tag);
+    cdlua_tag = lua_getglobal("CDLUA_CHANNEL_TAG");
+    channel_tag = (int) lua_getnumber(cdlua_tag);
+  }
+  /* define IM own tags and fallbacks  */
+  else {
+    color_tag     = lua_newtag();
+    imagergb_tag  = lua_newtag();
+    imagergba_tag = lua_newtag();
+    imagemap_tag  = lua_newtag();
+    palette_tag   = lua_newtag();
+    channel_tag   = lua_newtag();
+
+    /* associate the fallbacks */
+    lua_pushcfunction(palettesettable_fb); lua_settagmethod(palette_tag, "settable");
+    lua_pushcfunction(channelsettable_fb); lua_settagmethod(channel_tag, "settable");
+    lua_pushcfunction(imagemapsettable_fb); lua_settagmethod(imagemap_tag, "settable");
+  
+    lua_pushcfunction(imagergbgettable_fb); lua_settagmethod(imagergb_tag, "gettable");
+    lua_pushcfunction(imagergbagettable_fb); lua_settagmethod(imagergba_tag, "gettable");
+    lua_pushcfunction(palettegettable_fb); lua_settagmethod(palette_tag, "gettable");
+    lua_pushcfunction(imagemapgettable_fb); lua_settagmethod(imagemap_tag, "gettable");
+    lua_pushcfunction(channelgettable_fb); lua_settagmethod(channel_tag, "gettable");
+
+    lua_pushcfunction(imagergbgc_fb); lua_settagmethod(imagergb_tag, "gc");
+    lua_pushcfunction(imagergbagc_fb); lua_settagmethod(imagergba_tag, "gc");
+    lua_pushcfunction(palettegc_fb); lua_settagmethod(palette_tag, "gc");
+    lua_pushcfunction(imagemapgc_fb); lua_settagmethod(imagemap_tag, "gc");
+  }
+
+  /* register used tags in global context for other libraries use */
+  lua_pushnumber(1.0f); lua_setglobal("IMLUA_INSTALLED");
+  lua_pushnumber( color_tag); lua_setglobal("IMLUA_COLOR_TAG");
+  lua_pushnumber( imagergb_tag); lua_setglobal("IMLUA_IMAGERGB_TAG");
+  lua_pushnumber( imagergba_tag); lua_setglobal("IMLUA_IMAGERGBA_TAG");
+  lua_pushnumber( imagemap_tag); lua_setglobal("IMLUA_IMAGEMAP_TAG");
+  lua_pushnumber( palette_tag); lua_setglobal("IMLUA_PALETTE_TAG");
+  lua_pushnumber( channel_tag); lua_setglobal("IMLUA_CHANNEL_TAG");
+
+  /* registered IM functions */
+  lua_register("imDecodeColor",         imlua_decodecolor);
+  lua_register("imEncodeColor",         imlua_encodecolor);
+  lua_register("imLoadRGB",             imlua_loadrgb);
+  lua_register("imLoadMap",             imlua_loadmap);
+  lua_register("imSaveRGB",             imlua_savergb);
+  lua_register("imSaveMap",             imlua_savemap);
+  lua_register("imFileFormat",          imlua_fileformat);
+  lua_register("imImageInfo",           imlua_imageinfo);
+  lua_register("imRGB2Map",             imlua_rgb2map);
+  lua_register("imMap2RGB",             imlua_map2rgb);
+  lua_register("imRGB2Gray",            imlua_rgb2gray);
+  lua_register("imMap2Gray",            imlua_map2gray);
+  lua_register("imVersion",             imlua_version);
+  lua_register("imResize",              imlua_resize);
+  lua_register("imStretch",             imlua_stretch);
+  
+  /* creation and destruction functions */
+  lua_register("imCreateImageRGB",      imlua_createimagergb);
+  lua_register("imCreateImageMap",      imlua_createimagemap);
+  lua_register("imCreatePalette",       imlua_createpalette);
+  lua_register("imKillImageRGB",        imlua_killimagergb);
+  lua_register("imKillImageMap",        imlua_killimagemap);
+  lua_register("imKillPalette",         imlua_killpalette);
+
+  /* im constants */
+  lua_pushnumber( IM_BMP); lua_setglobal("IM_BMP");
+  lua_pushnumber( IM_PCX); lua_setglobal("IM_PCX");
+  lua_pushnumber( IM_GIF); lua_setglobal("IM_GIF");
+  lua_pushnumber( IM_TIF); lua_setglobal("IM_TIF");
+  lua_pushnumber( IM_RAS); lua_setglobal("IM_RAS");
+  lua_pushnumber( IM_SGI); lua_setglobal("IM_SGI");
+  lua_pushnumber( IM_JPG); lua_setglobal("IM_JPG");
+  lua_pushnumber( IM_LED); lua_setglobal("IM_LED");
+  lua_pushnumber( IM_TGA); lua_setglobal("IM_TGA");
+
+  lua_pushnumber( 0); lua_setglobal("IM_NONE");
+  lua_pushnumber( 1); lua_setglobal("IM_DEFAULT");
+  lua_pushnumber( 2); lua_setglobal("IM_COMPRESSED");
+
+  lua_pushnumber( IM_RGB); lua_setglobal("IM_RGB");
+  lua_pushnumber( IM_MAP); lua_setglobal("IM_MAP");
+
+  lua_pushnumber( IM_ERR_NONE); lua_setglobal("IM_ERR_NONE");
+  lua_pushnumber( IM_ERR_OPEN); lua_setglobal("IM_ERR_OPEN");
+  lua_pushnumber( IM_ERR_READ); lua_setglobal("IM_ERR_READ");
+  lua_pushnumber( IM_ERR_WRITE); lua_setglobal("IM_ERR_WRITE");
+  lua_pushnumber( IM_ERR_FORMAT); lua_setglobal("IM_ERR_FORMAT");
+  lua_pushnumber( IM_ERR_TYPE); lua_setglobal("IM_ERR_TYPE");
+  lua_pushnumber( IM_ERR_COMP); lua_setglobal("IM_ERR_COMP");
+}
diff --git a/src/im_palette.cpp b/src/im_palette.cpp
new file mode 100644
index 0000000..0e6f967
--- /dev/null
+++ b/src/im_palette.cpp
@@ -0,0 +1,551 @@
+/** \file
+ * \brief Palette Generators
+ * Creates several standard palettes
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_palette.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_util.h"
+#include "im_palette.h"
+#include "im_colorhsi.h"
+
+#include <stdlib.h>
+#include <memory.h>
+#include <assert.h>
+#include <math.h>
+
+static inline int iSqr(int x)
+{
+  return x*x;
+}
+
+static inline int iAbs(int x)
+{
+  return x < 0? -x: x;
+}
+
+int imPaletteFindNearest(const long* palette, int palette_count, long color)
+{
+  assert(palette);
+  assert(palette_count);
+
+  int lSqrDiff, lBestDiff = (unsigned int)-1;
+  int pIndex = -1;
+
+  imbyte red1, green1, blue1;
+  imColorDecode(&red1, &green1, &blue1, color);
+
+  for (int lIndex = 0; lIndex < palette_count; lIndex++, palette++)
+  {
+    if (color == *palette)
+      return lIndex;
+
+    imbyte red2, green2, blue2;
+    imColorDecode(&red2, &green2, &blue2, *palette);
+
+    lSqrDiff = iSqr(red1 - red2) +
+               iSqr(green1 - green2) +
+               iSqr(blue1 - blue2);
+
+    if (lSqrDiff < lBestDiff)
+    {
+      lBestDiff = lSqrDiff;
+      pIndex = lIndex;
+    }
+  }
+
+  return pIndex;
+}
+
+int imPaletteFindColor(const long* palette, int palette_count, long color, unsigned char tol)
+{
+  assert(palette);
+  assert(palette_count);
+
+  /* Divides in two section for faster results when Tolerance is 0.*/
+  if (tol == 0)
+  {
+    for (int lIndex = 0; lIndex < palette_count; lIndex++, palette++)
+    {
+      if (color == *palette)
+        return lIndex;
+    }
+  }
+  else
+  {
+    imbyte red1, green1, blue1;
+    imColorDecode(&red1, &green1, &blue1, color);
+
+    for (int lIndex = 0; lIndex < palette_count; lIndex++, palette++)
+    {
+      imbyte red2, green2, blue2;
+      imColorDecode(&red2, &green2, &blue2, *palette);
+
+      if (iAbs(red1 - red2) < tol &&
+          iAbs(green1 - green2) < tol &&
+          iAbs(blue1 - blue2) < tol)
+      {
+        return lIndex;
+      }
+    }
+  }
+
+  return -1;
+}
+
+long* imPaletteGray(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (255, 255, 255)*/
+    /* From   Black   to      White     */
+    *(ct++) = imColorEncode((imbyte)lIndex, (imbyte)lIndex, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+long* imPaletteRed(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (255, 0, 0) */
+    /* From   Black   to      Red   */
+    *(ct++) = imColorEncode((imbyte)lIndex, 0, 0);
+  }
+
+  return palette;
+}
+
+long* imPaletteGreen(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (0, 255, 0)*/
+    /* From   Black   to    Green   */
+    *(ct++) = imColorEncode(0, (imbyte)lIndex, 0);
+  }
+
+  return palette;
+}
+
+long* imPaletteBlue(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (0, 0, 255)*/
+    /* From   Black   to    Blue    */
+    *(ct++) = imColorEncode(0, 0, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+long* imPaletteYellow(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (255, 255, 0)*/
+    /* From   Black   to      Yellow  */
+    *(ct++) = imColorEncode((imbyte)lIndex, (imbyte)lIndex, 0);
+  }
+
+  return palette;
+}
+
+long* imPaletteMagenta(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (255, 0, 255)*/
+    /* From   Black   to    Magenta   */
+    *(ct++) = imColorEncode((imbyte)lIndex, 0, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+long* imPaletteCian(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 0) to (0, 255, 255)*/
+    /* From   Black   to     Cian    */
+    *(ct++) = imColorEncode(0, (imbyte)lIndex, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+long* imPaletteHues(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  int i;
+  float tone, step1 = 255.0f/41.0f, step2 = 255.0f/42.0f;
+
+  /* 1+42+1+41+1+42+1+41+1+42+1+41+1 = 256 */
+
+  /* red */
+  *(ct++) = imColorEncode((imbyte)255, 0, 0);
+
+  for (tone = step2, i = 0; i < 42; i++, tone += step2)
+  {
+    /* From (255, 0, 0) to (255, 255, 0) */
+    /* From      Red    to      Yellow   */
+    *(ct++) = imColorEncode((imbyte)255, (imbyte)tone, 0);
+  }
+
+  /* yellow */
+  *(ct++) = imColorEncode((imbyte)255, (imbyte)255, 0);
+
+  for (tone = step1, i = 0; i < 41; i++, tone += step1)
+  {
+    /* From (255, 255, 0) to (0, 255, 0)  */
+    /* From     Yellow    to    Green    */
+    *(ct++) = imColorEncode((imbyte)(255.0f-tone), (imbyte)255, 0);
+  }
+
+  /* green */
+  *(ct++) = imColorEncode(0, (imbyte)255, 0);;
+
+  for (tone = step2, i = 0; i < 42; i++, tone += step2)
+  {
+    /* From (0, 255, 0) to (0, 255, 255) */
+    /* From    Green    to     Cian      */
+    *(ct++) = imColorEncode(0, (imbyte)255, (imbyte)tone);
+  }
+
+  /* cian */
+  *(ct++) = imColorEncode(0, (imbyte)255, (imbyte)255);
+
+  for (tone = step1, i = 0; i < 41; i++, tone += step1)
+  {
+    /* From (0, 255, 255) to (0, 0, 255) */
+    /* From     Cian      to     Blue    */
+    *(ct++) = imColorEncode(0, (imbyte)(255.0f-tone), (imbyte)255);
+  }
+
+  /* blue */
+  *(ct++) = imColorEncode(0, 0, (imbyte)255);
+
+  for (tone = step2, i = 0; i < 42; i++, tone += step2)
+  {
+    /* From (0, 0, 255) to (255, 0, 255) */
+    /* From    Blue     to    Magenta    */
+    *(ct++) = imColorEncode((imbyte)tone, 0, (imbyte)255);
+  }
+
+  /* magenta */
+  *(ct++) = imColorEncode((imbyte)255, 0, (imbyte)255);
+
+  for (tone = step1, i = 0; i < 41; i++, tone += step1)
+  {
+    /* From (255, 0, 255) to (255, 0, 0) */
+    /* From    Magenta    to      Red    */
+    *(ct++) = imColorEncode((imbyte)255, 0, (imbyte)(255.0f-tone));
+  }
+
+  /* black */
+  *(ct++) = imColorEncode(0, 0, 0);;
+
+  return palette;
+}
+
+long* imPaletteRainbow(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  int hue;
+  unsigned char r, g, b;
+  float h, s, i, factor, H;
+
+  s = 1.0f;
+  factor = 360.0f / 256.0f;
+
+  for (hue = 0; hue < 256; hue++)
+  {
+    h = hue * factor;
+    h = 300-h;
+    if (h < 0) h += 360;
+    H = h/57.2957795131f;
+
+    i = imColorHSI_ImaxS(H, cos(H), sin(H));
+
+    imColorHSI2RGBbyte(h, s, i, &r, &g, &b);
+
+    *(ct++) = imColorEncode(r, g, b);;
+  }
+
+  return palette;
+}
+
+long* imPaletteBlueIce(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  
+  for (int lIndex = 0; lIndex < 256; lIndex++)
+  {
+    /* From (0, 0, 255) to (255, 255, 255)*/
+    /* From    Blue    to       White     */
+    *(ct++) = imColorEncode((imbyte)lIndex, (imbyte)lIndex, 255);
+  }
+
+  return palette;
+}
+
+long* imPaletteHotIron(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  int lIndex, lSubIndex;
+
+  for (lIndex = 0, lSubIndex = 0; lSubIndex < 128; lSubIndex++, lIndex += 2)
+  {
+    /* From (0, 0, 0) to (254, 0, 0) */
+    /* From   Black   to     ~Red    */
+    *(ct++) = imColorEncode((imbyte)lIndex, 0, 0);
+  }
+
+  for (lIndex = 0, lSubIndex = 0; lSubIndex < 64; lSubIndex++, lIndex += 2)
+  {
+    /* From (255, 0, 0) to (255, 126, 0) */
+    /* From      Red    to     ~Orange  */
+    *(ct++) = imColorEncode(255, (imbyte)lIndex, 0);
+  }
+
+  for (lIndex = 0, lSubIndex = 0; lSubIndex < 63; lSubIndex++, lIndex += 2)
+  {
+    /* From (255, 128, 0) to (255, 252, 252)*/
+    /* From     Orange    to     ~White   */
+    imbyte red = 255;
+    imbyte green = (imbyte)(128 + lIndex);
+    imbyte blue = (imbyte)(lIndex * 2 + 4);
+
+    *(ct++) = imColorEncode(red, green, blue);
+  }
+
+  *(ct++) = imColorEncode(255, 255, 255);
+
+  return palette;
+}
+
+long* imPaletteBlackBody(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  int lIndex, lSubIndex;
+
+  for (lIndex = 0, lSubIndex = 0; lSubIndex < 85; lSubIndex++, lIndex += 3)
+  {
+    /* From (0, 0, 0) to (252, 0, 0) */
+    /* From   Black   to     ~Red   */
+    *(ct++) = imColorEncode((imbyte)lIndex, 0, 0);
+  }
+
+  for (lIndex = 0, lSubIndex = 0; lSubIndex < 85; lSubIndex++, lIndex += 3)
+  {
+    /* From (255, 0, 0) to (255, 252, 0)*/
+    /* From      Red    to     ~Yellow */
+    *(ct++) = imColorEncode(255, (imbyte)lIndex, 0);
+  }
+
+  for (lIndex = 0, lSubIndex = 0; lSubIndex < 86; lSubIndex++, lIndex += 3)
+  {
+    /* From (255, 255, 0) to (255, 255, 255)*/
+    /* From     Yellow    to      White  */
+    *(ct++) = imColorEncode(255, 255, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+long* imPaletteHighContrast(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+  int lIndex;
+
+  static struct{unsigned char r, g, b;} HighContrastColors[65] = {
+    { 0,0,0 },     
+
+    { 255,0,0 },      { 128,0,0 },      { 64,0,0 },       { 192,0,0 },
+    { 0,255,0 },      { 0,128,0 },      { 0,64,0 },       { 0,192,0 },    
+    { 0,0,255 },      { 0,0,128 },      { 0,0,64 },       { 0,0,192 },    
+    { 255,255,0 },    { 128,128,0 },    { 64,64,0 },      { 192,192,0 },    
+    { 255,0,255 },    { 128,0,128 },    { 64,0,64 },      { 192,0,192 },    
+    { 0,255,255 },    { 0,128,128 },    { 0,64,64 },      { 0,192,192 },    
+    { 255,255,255 },  { 128,128,128 },  { 64,64,64 },     { 192,192,192 },    
+
+    { 255,128,128 },  { 64,255,255 },   { 192,255,255 },   
+    { 128,255,128 },  { 255,64,255 },   { 255,192,255 },     
+    { 128,128,255 },  { 255,255,64 },   { 255,255,192 },     
+    { 255,255,128 },  { 64,64,255 },    { 192,192,255 },     
+    { 255,128,255 },  { 64,255,64 },    { 192,255,192 },     
+    { 128,255,255 },  { 255,64,64 },    { 255,192,192 },   
+
+    { 128,64,64 },    { 128,192,192 },   
+    { 64,128,64 },    { 192,128,192 },   
+    { 64,64,128 },    { 192,192,128 },   
+    { 128,128,64 },   { 128,128,192 },   
+    { 128,64,128 },   { 128,192,128 },   
+    { 64,128,128 },   { 192,128,128 },   
+    
+    { 192,64,64 },
+    { 64,192,64 },  
+    { 64,64,192 },  
+    { 192,192,64 }, 
+    { 192,64,192 }, 
+    { 64,192,192 }, 
+  };
+
+  for (lIndex = 0; lIndex < 65; lIndex++)
+  {
+    *(ct++) = imColorEncode(HighContrastColors[lIndex].r, 
+                            HighContrastColors[lIndex].g, 
+                            HighContrastColors[lIndex].b);
+  }
+
+  for (; lIndex < 256; lIndex++)
+  {
+    *(ct++) = imColorEncode((imbyte)lIndex, (imbyte)lIndex, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+/* 256 divided in 6 steps results in these steps.*/
+static int iSixStepsTable[6] = {0, 51, 102, 153, 204, 255};
+
+long* imPaletteUniform(void)
+{
+  long* palette = (long*)malloc(sizeof(long)*256);
+  long* ct = palette;
+
+  for (int lRedIndex = 0; lRedIndex < 6; lRedIndex++)
+    for (int lGreenIndex = 0; lGreenIndex < 6; lGreenIndex++)
+      for (int lBlueIndex = 0; lBlueIndex < 6; lBlueIndex++)
+      {
+        imbyte red = (imbyte)iSixStepsTable[lRedIndex];
+        imbyte green = (imbyte)iSixStepsTable[lGreenIndex];
+        imbyte blue = (imbyte)iSixStepsTable[lBlueIndex];
+
+        *(ct++) = imColorEncode(red, green, blue);
+      }
+
+  /* We initialize only 216 colors (6x6x6), rest 40 colors.*/
+  /* Fill them with a gray scale palette.*/
+  for (int lIndex = 6; lIndex < 246; lIndex += 6)
+  {
+    *(ct++) = imColorEncode((imbyte)lIndex, (imbyte)lIndex, (imbyte)lIndex);
+  }
+
+  return palette;
+}
+
+/* X divided by 51. Convert to 216 color space. */
+static int iDividedBy51Table[256] =
+{
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5
+};
+
+/* X multiplied by 36. Shift to red position.*/
+static int iTimes36Table[6] = {0, 36, 72, 108, 144, 180};
+
+/* X multiplied by 36. Shift to green position.*/
+static int iTimes6Table[6] = {0, 6, 12, 18, 24, 30};
+
+int imPaletteUniformIndex(long color)
+{
+  imbyte red, green, blue;
+  imColorDecode(&red, &green, &blue, color);
+  return iTimes36Table[iDividedBy51Table[red]] + iTimes6Table[iDividedBy51Table[green]] + iDividedBy51Table[blue];
+}
+
+/* Remainder of X divided by 51. Used to position in the halftone*/
+static int iModulo51Table[256] =
+{
+  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
+  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
+  29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
+  45, 46, 47, 48, 49, 50,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+  42, 43, 44, 45, 46, 47, 48, 49, 50,  0,  1,  2,  3,  4,  5,  6,
+  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+  23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+  39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,  0,  1,  2,  3,
+  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+  20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+  36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,  0
+};
+
+/* Dither matrices for 8 bit to 2.6 bit halftones.*/
+static int iHalftone8x8Table[64] =
+{
+  0, 38,  9, 47,  2, 40, 11, 50,
+  25, 12, 35, 22, 27, 15, 37, 24,
+  6, 44,  3, 41,  8, 47,  5, 43,
+  31, 19, 28, 15, 34, 21, 31, 18,
+  1, 39, 11, 49,  0, 39, 10, 48,
+  27, 14, 36, 23, 26, 13, 35, 23,
+  7, 46,  4, 43,  7, 45,  3, 42,
+  33, 20, 30, 17, 32, 19, 29, 16
+};
+
+int imPaletteUniformIndexHalftoned(long color, int x, int y)
+{
+  int lHalf = iHalftone8x8Table[(x % 8) * 8 + y % 8];
+
+  imbyte red, green, blue;
+  imColorDecode(&red, &green, &blue, color);
+
+  /* Now, look up each value in the halftone matrix using an 8x8 ordered dither.*/
+  int lRed = iDividedBy51Table[red] + (iModulo51Table[red] > lHalf? 1: 0);
+  int lGreen = iDividedBy51Table[green] + (iModulo51Table[green] > lHalf? 1: 0);
+  int lBlue = iDividedBy51Table[blue] + (iModulo51Table[blue] > lHalf? 1: 0);
+
+  return iTimes36Table[lRed] + iTimes6Table[lGreen] + lBlue;
+}
diff --git a/src/im_process.def b/src/im_process.def
new file mode 100644
index 0000000..b3e1a2b
--- /dev/null
+++ b/src/im_process.def
@@ -0,0 +1,162 @@
+EXPORTS
+  imCalcRMSError
+  imCalcSNR
+  imProcessAutoCovariance
+  imProcessBinMorphClose
+  imProcessBinMorphConvolve
+  imProcessBinMorphDilate
+  imProcessBinMorphErode
+  imProcessBinMorphOpen
+  imProcessBinMorphOutline
+  imProcessCompassConvolve
+  imProcessConvolve
+  imProcessConvolveRep
+  imProcessConvolveSep
+  imProcessDiffOfGaussianConvolve
+  imProcessGaussianConvolve
+  imProcessGrayMorphClose
+  imProcessGrayMorphConvolve
+  imProcessGrayMorphDilate
+  imProcessGrayMorphErode
+  imProcessGrayMorphGradient
+  imProcessGrayMorphOpen
+  imProcessGrayMorphTopHat
+  imProcessGrayMorphWell
+  imProcessHoughLines
+  imProcessHoughLinesDraw
+  imProcessLapOfGaussianConvolve
+  imProcessLocalMaxThreshold
+  imProcessMeanConvolve
+  imProcessMedianConvolve
+  imProcessMinMaxThreshold
+  imProcessOtsuThreshold
+  imProcessPercentThreshold
+  imProcessRadial
+  imProcessRangeContrastThreshold
+  imProcessRangeConvolve
+  imProcessRankClosestConvolve
+  imProcessRankMaxConvolve
+  imProcessRankMinConvolve
+  imProcessReduce
+  imProcessRenderAddGaussianNoise
+  imProcessRenderAddSpeckleNoise
+  imProcessRenderAddUniformNoise
+  imProcessRenderBox
+  imProcessRenderCondOp
+  imProcessRenderCone
+  imProcessRenderConstant
+  imProcessRenderCosine
+  imProcessRenderGaussian
+  imProcessRenderLapOfGaussian
+  imProcessRenderOp
+  imProcessRenderRamp
+  imProcessRenderRandomNoise
+  imProcessRenderSinc
+  imProcessRenderTent
+  imProcessRenderWheel
+  imProcessResize
+  imProcessRotate
+  imProcessSobelConvolve
+  imProcessUniformErrThreshold
+  imCalcCountColors
+  imCalcGrayHistogram
+  imCalcHistogram
+  imCalcHistogramStatistics
+  imCalcHistoImageStatistics
+  imCalcImageStatistics
+  imProcessPixelate
+  imProcessArithmeticConstOp
+  imProcessArithmeticOp
+  imProcessBinMorphThin
+  imProcessBitMask
+  imProcessBitPlane
+  imProcessBitwiseOp
+  imProcessBlendConst
+  imProcessBlend
+  imProcessCalcRotateSize
+  imProcessDifusionErrThreshold
+  imProcessDirectConv
+  imProcessEqualizeHistogram
+  imProcessExpandHistogram
+  imProcessFlip
+  imProcessHysteresisThresEstimate
+  imProcessHysteresisThreshold
+  imProcessLocalMaxThresEstimate
+  imProcessMergeComplex
+  imProcessMergeComponents
+  imProcessMergeHSI
+  imProcessMirror
+  imProcessMultipleMean
+  imProcessMultipleStdDev
+  imProcessQuantizeGrayUniform
+  imProcessQuantizeRGBUniform
+  imProcessReduceBy4
+  imProcessRotate180
+  imProcessRotate90
+  imProcessSplitComplex
+  imProcessSplitComponents
+  imProcessSplitHSI
+  imProcessSplitYChroma
+  imProcessThreshold
+  imProcessThresholdByDiff
+  imProcessToneGamut
+  imProcessUnArithmeticOp
+  imProcessUnNormalize
+  imProcessZeroCrossing
+  imProcessRotateKernel
+  imProcessAddMargins
+  imProcessReplaceColor
+  imProcessPosterize
+  imProcessNegative
+  imProcessCanny
+  imProcessMultiplyConj
+  imProcessNormalizeComponents
+  imGaussianStdDev2KernelSize
+  imProcessBitwiseNot
+  imProcessDistanceTransform
+  imAnalyzeFindRegions
+  imAnalyzeMeasureArea
+  imAnalyzeMeasureCentroid
+  imAnalyzeMeasurePrincipalAxis
+  imAnalyzeMeasureHoles
+  imProcessPerimeterLine
+  imAnalyzeMeasurePerimeter
+  imProcessPrune
+  imProcessFillHoles
+  imAnalyzeMeasurePerimArea
+  imProcessSliceThreshold
+  imProcessRenderGrid
+  imProcessRenderChessboard
+  imProcessInsert
+  imProcessCrop
+  imProcessRegionalMaximum
+  imCalcUShortHistogram
+  imProcessSwirl
+  imProcessPrewittConvolve
+  imProcessSplineEdgeConvolve
+  imProcessConvolveDual
+  imKernelSobel
+  imKernelPrewitt
+  imKernelKirsh
+  imKernelLaplacian4
+  imKernelLaplacian8
+  imKernelLaplacian5x5
+  imKernelLaplacian7x7
+  imKernelGradian3x3
+  imKernelGradian7x7
+  imKernelSculpt
+  imKernelMean3x3
+  imKernelMean5x5
+  imKernelCircularMean5x5
+  imKernelMean7x7
+  imKernelCircularMean7x7
+  imKernelGaussian3x3
+  imKernelGaussian5x5
+  imKernelBarlett5x5
+  imKernelTopHat5x5
+  imKernelTopHat7x7
+  imKernelEnhance
+  imGaussianKernelSize2StdDev
+  imProcessRotateRef 
+  imProcessInterlaceSplit
+  imProcessBarlettConvolve
diff --git a/src/im_process.mak b/src/im_process.mak
new file mode 100644
index 0000000..064be76
--- /dev/null
+++ b/src/im_process.mak
@@ -0,0 +1,36 @@
+PROJNAME = im
+LIBNAME = im_process
+OPT = YES
+
+SRC = \
+    im_arithmetic_bin.cpp  im_morphology_gray.cpp  im_quantize.cpp   \
+    im_arithmetic_un.cpp   im_geometric.cpp        im_render.cpp     \
+    im_color.cpp           im_histogram.cpp        im_resize.cpp     \
+    im_convolve.cpp        im_houghline.cpp        im_statistics.cpp \
+    im_convolve_rank.cpp   im_logic.cpp            im_threshold.cpp  \
+    im_effects.cpp         im_morphology_bin.cpp   im_tonegamut.cpp  \
+    im_canny.cpp           im_distance.cpp         im_analyze.cpp    \
+    im_kernel.cpp
+SRC  := $(addprefix process/, $(SRC))
+                                       
+USE_IM = Yes
+IM = ..
+
+ifneq ($(findstring Win, $(TEC_SYSNAME)), )
+    ifneq ($(findstring ow, $(TEC_UNAME)), )
+      DEFINES += IM_DEFMATHFLOAT
+    endif  
+    ifneq ($(findstring bc, $(TEC_UNAME)), )
+      DEFINES += IM_DEFMATHFLOAT
+    endif  
+else
+  ifneq ($(findstring AIX, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT 
+  endif
+  ifneq ($(findstring SunOS, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT
+  endif
+  ifneq ($(findstring HP-UX, $(TEC_UNAME)), )
+    DEFINES += IM_DEFMATHFLOAT
+  endif
+endif
diff --git a/src/im_rgb2map.cpp b/src/im_rgb2map.cpp
new file mode 100644
index 0000000..465743a
--- /dev/null
+++ b/src/im_rgb2map.cpp
@@ -0,0 +1,964 @@
+/** \file
+ * \brief RGB to Map Conversion
+ *
+ * Most part of this code is based on jquant2.c from version 5         
+ * of the IJG JPEG software,                          
+ * copyright (C) 1991-1994, Thomas G. Lane.                   
+ * Some other parts are from XV software 
+ * copyright John Bradley.
+ * This file doen not follows the IM library nomenclature convention.
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_rgb2map.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+
+#include "im.h"
+#include "im_util.h"
+#include "im_convert.h"
+#include "im_counter.h"
+
+
+/* RANGE forces a to be in the range b..c (inclusive) */
+#define RANGE(a,b,c) { if (a < b) a = b;  if (a > c) a = c; }
+
+static void xvbcopy(const imbyte* src, imbyte* dst, int len)
+{
+  /* determine if the regions overlap
+   *
+   * 3 cases:  src=dst, src<dst, src>dst
+   *
+   * if src=dst, they overlap completely, but nothing needs to be moved
+   * if src<dst and src+len>dst then they overlap
+   * if src>dst and src<dst+len then they overlap
+   */
+
+  if (src==dst || len<=0) return;    /* nothin' to do */
+  
+  if (src<dst && src+len>dst) {  /* do a backward copy */
+    src = src + len - 1;
+    dst = dst + len - 1;
+    for ( ; len>0; len--, src--, dst--) *dst = *src;
+  }
+
+  else {  /* they either overlap (src>dst) or they don't overlap */
+    /* do a forward copy */
+    for ( ; len>0; len--, src++, dst++) *dst = *src;
+  }
+}
+
+/****************************/
+static int quick_map(imbyte *red, imbyte *green, imbyte *blue, int w, int h, imbyte *map, 
+                     imbyte *rmap, imbyte *gmap, imbyte *bmap, int maxcol)
+{
+/* scans picture until it finds more than 'maxcol' different colors.  If it
+finds more than 'maxcol' colors, it returns '0'.  If it DOESN'T, it does
+the 24-to-8 conversion by simply sticking the colors it found into
+a colormap, and changing instances of a color in pic24 into colormap
+  indicies (in pic8) */
+  
+  unsigned long colors[256],col;
+  int           i, nc, low, high, mid, count;
+  imbyte         *pred, *pgreen, *pblue, *pix;
+  
+  if (maxcol>256) maxcol = 256;
+  
+  /* put the first color in the table by hand */
+  nc = 0;  mid = 0;  
+ 
+  count = w*h;
+  for (i=count,pred=red,pgreen=green,pblue=blue; i; i--) 
+  {
+    col  = (((unsigned long) *pred++) << 16);  
+    col += (((unsigned long) *pgreen++) << 8);
+    col +=  *pblue++;
+    
+    /* binary search the 'colors' array to see if it's in there */
+    low = 0;  high = nc-1;
+    while (low <= high) 
+    {
+      mid = (low+high)/2;
+      if      (col < colors[mid]) high = mid - 1;
+      else if (col > colors[mid]) low  = mid + 1;
+      else break;
+    }
+    
+    if (high < low) 
+    { /* didn't find color in list, add it. */
+      if (nc>=maxcol) 
+        return 0;
+
+      xvbcopy((const imbyte*)&colors[low], (imbyte*)&colors[low+1], (nc - low) * sizeof(unsigned long));
+      colors[low] = col;
+      nc++;
+    }
+  }
+  
+  /* run through the data a second time, this time mapping pixel values in
+  pic24 into colormap offsets into 'colors' */
+  
+  for (i=count,pred=red,pgreen=green,pblue=blue, pix=map; i; i--,pix++) 
+  {
+    col  = (((unsigned long) *pred++) << 16);  
+    col += (((unsigned long) *pgreen++) << 8);
+    col +=  *pblue++;
+    
+    /* binary search the 'colors' array.  It *IS* in there */
+    low = 0;  high = nc-1;
+    while (low <= high) 
+    {
+      mid = (low+high)/2;
+      if      (col < colors[mid]) high = mid - 1;
+      else if (col > colors[mid]) low  = mid + 1;
+      else break;
+    }
+    
+    if (high < low) 
+      return 0;
+    
+    *pix = (imbyte)mid;
+  }
+  
+  /* and load up the 'desired colormap' */
+  for (i=0; i<nc; i++) 
+  {
+    rmap[i] = (unsigned char)( colors[i]>>16);  
+    gmap[i] = (unsigned char)((colors[i]>>8) & 0xff);
+    bmap[i] = (unsigned char)( colors[i]     & 0xff);
+  }
+  
+  return nc;
+}
+
+#define MAXNUMCOLORS  256	/* maximum size of colormap */
+
+#define C0_SCALE 2		/* scale R distances by this much */
+#define C1_SCALE 3		/* scale G distances by this much */
+#define C2_SCALE 1		/* and B by this much */
+
+#define HIST_C0_BITS  5		/* bits of precision in R histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (8-HIST_C0_BITS)
+#define C1_SHIFT  (8-HIST_C1_BITS)
+#define C2_SHIFT  (8-HIST_C2_BITS)
+
+
+typedef imushort histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the histogram array */
+typedef hist1d hist2d[HIST_C1_ELEMS];
+typedef hist2d hist3d[HIST_C0_ELEMS];
+
+typedef short FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+
+typedef FSERROR *FSERRPTR;	/* pointer to error array */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  int volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+typedef box * boxptr;
+
+/* Local state for the IJG quantizer */
+
+static hist2d * sl_histogram;	/* pointer to the 3D histogram array */
+static FSERRPTR sl_fserrors;	/* accumulated-errors array */
+static int * sl_error_limiter;	/* table for clamping the applied error */
+static int sl_on_odd_row;	/* flag to remember which row we are on */
+static imbyte* sl_colormap[3];	/* selected colormap */
+static int sl_num_colors;	/* number of selected colors */
+
+
+static void   slow_fill_histogram (imbyte*, imbyte*, imbyte*, int);
+static boxptr find_biggest_color_pop (boxptr, int);
+static boxptr find_biggest_volume (boxptr, int);
+static void   update_box (boxptr);
+static int    median_cut (boxptr, int, int);
+static void   compute_color (boxptr, int);
+static void   slow_select_colors (int);
+static int    find_nearby_colors (int, int, int, imbyte []);
+static void   find_best_colors (int,int,int,int, imbyte [], imbyte []);
+static void   fill_inverse_cmap (int, int, int);
+static void   slow_map_pixels (imbyte*, imbyte*, imbyte*, int, int, imbyte*);
+static void   init_error_limit (void);
+
+
+/* Master control for slow quantizer. */
+static int slow_quant(imbyte *red, imbyte *green, imbyte *blue, int w, int h, imbyte *map, 
+                      imbyte *rm, imbyte *gm, imbyte *bm, int descols)
+{
+  size_t fs_arraysize = (w + 2) * (3 * sizeof(FSERROR));
+  
+  /* Allocate all the temporary storage needed */
+  init_error_limit();
+
+  sl_histogram = (hist2d *) malloc(sizeof(hist3d));
+  sl_fserrors = (FSERRPTR) malloc(fs_arraysize);
+  
+  if (! sl_error_limiter || ! sl_histogram || ! sl_fserrors) 
+  {
+    if (sl_error_limiter) free(sl_error_limiter-255);
+    if (sl_fserrors) free(sl_fserrors);
+    if (sl_histogram) free(sl_histogram);
+    return 1;
+  }
+  
+  sl_colormap[0] = (imbyte*) rm;
+  sl_colormap[1] = (imbyte*) gm;
+  sl_colormap[2] = (imbyte*) bm;
+  
+  /* Compute the color histogram */
+  slow_fill_histogram(red, green, blue, w*h);
+  
+  /* Select the colormap */
+  slow_select_colors(descols);
+  
+  /* Zero the histogram: now to be used as inverse color map */
+  memset(sl_histogram, 0, sizeof(hist3d));
+  
+  /* Initialize the propagated errors to zero. */
+  memset(sl_fserrors, 0, fs_arraysize);
+  sl_on_odd_row = 0;
+  
+  /* Map the image. */
+  slow_map_pixels(red, green, blue, w, h, map);
+  
+  /* Release working memory. */
+  free(sl_histogram);
+  free(sl_error_limiter-255);
+  free(sl_fserrors);
+
+  return 0;
+}
+
+
+static void slow_fill_histogram (register imbyte *red, register imbyte *green, register imbyte *blue, int numpixels)
+{
+  register histptr histp;
+  register hist2d * histogram = sl_histogram;
+  
+  memset(histogram, 0, sizeof(hist3d));
+  
+  while (numpixels-- > 0) 
+  {
+    /* get pixel value and index into the histogram */
+    histp = & histogram[*red >> C0_SHIFT] [*green >> C1_SHIFT] [*blue >> C2_SHIFT];
+
+    /* increment, check for overflow and undo increment if so. */
+    if (++(*histp) <= 0)
+      (*histp)--;
+
+    red++;
+    green++;
+    blue++;
+  }
+}
+
+
+static boxptr find_biggest_color_pop (boxptr boxlist, int numboxes)
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+static boxptr find_biggest_volume (boxptr boxlist, int numboxes)
+{
+  register boxptr boxp;
+  register int i;
+  register int maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+static void update_box (boxptr boxp)
+{
+  hist2d * histogram = sl_histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  int dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+        histp = & histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0min = c0min = c0;
+            goto have_c0min;
+          }
+      }
+have_c0min:
+      if (c0max > c0min)
+        for (c0 = c0max; c0 >= c0min; c0--)
+          for (c1 = c1min; c1 <= c1max; c1++) {
+            histp = & histogram[c0][c1][c2min];
+            for (c2 = c2min; c2 <= c2max; c2++)
+              if (*histp++ != 0) {
+                boxp->c0max = c0max = c0;
+                goto have_c0max;
+              }
+          }
+have_c0max:
+          if (c1max > c1min)
+            for (c1 = c1min; c1 <= c1max; c1++)
+              for (c0 = c0min; c0 <= c0max; c0++) {
+                histp = & histogram[c0][c1][c2min];
+                for (c2 = c2min; c2 <= c2max; c2++)
+                  if (*histp++ != 0) {
+                    boxp->c1min = c1min = c1;
+                    goto have_c1min;
+                  }
+              }
+have_c1min:
+              if (c1max > c1min)
+                for (c1 = c1max; c1 >= c1min; c1--)
+                  for (c0 = c0min; c0 <= c0max; c0++) {
+                    histp = & histogram[c0][c1][c2min];
+                    for (c2 = c2min; c2 <= c2max; c2++)
+                      if (*histp++ != 0) {
+                        boxp->c1max = c1max = c1;
+                        goto have_c1max;
+                      }
+                  }
+have_c1max:
+                  if (c2max > c2min)
+                    for (c2 = c2min; c2 <= c2max; c2++)
+                      for (c0 = c0min; c0 <= c0max; c0++) {
+                        histp = & histogram[c0][c1min][c2];
+                        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+                          if (*histp != 0) {
+                            boxp->c2min = c2min = c2;
+                            goto have_c2min;
+                          }
+                      }
+have_c2min:
+                      if (c2max > c2min)
+                        for (c2 = c2max; c2 >= c2min; c2--)
+                          for (c0 = c0min; c0 <= c0max; c0++) {
+                            histp = & histogram[c0][c1min][c2];
+                            for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+                              if (*histp != 0) {
+                                boxp->c2max = c2max = c2;
+                                goto have_c2max;
+                              }
+                          }
+have_c2max:
+                          
+                          dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+                          dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+                          dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+                          boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+                          
+                          ccount = 0;
+                          for (c0 = c0min; c0 <= c0max; c0++)
+                            for (c1 = c1min; c1 <= c1max; c1++) {
+                              histp = & histogram[c0][c1][c2min];
+                              for (c2 = c2min; c2 <= c2max; c2++, histp++)
+                                if (*histp != 0) {
+                                  ccount++;
+                                }
+                            }
+                            boxp->colorcount = ccount;
+}
+
+
+static int median_cut (boxptr boxlist, int numboxes, int desired_colors)
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+  
+  while (numboxes < desired_colors) {
+  /* Select box to split.
+  * Current algorithm: by population for first half, then by volume.
+    */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+    */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(b1);
+    update_box(b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+static void compute_color (boxptr boxp, int icolor)
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  hist2d * histogram = sl_histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+        if ((count = *histp++) != 0) {
+          total += count;
+          c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+          c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+          c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+        }
+      }
+    }
+    
+    sl_colormap[0][icolor] = (imbyte) ((c0total + (total>>1)) / total);
+    sl_colormap[1][icolor] = (imbyte) ((c1total + (total>>1)) / total);
+    sl_colormap[2][icolor] = (imbyte) ((c2total + (total>>1)) / total);
+}
+
+
+static void slow_select_colors (int descolors)
+/* Master routine for color selection */
+{
+  box boxlist[MAXNUMCOLORS];
+  int numboxes;
+  int i;
+  
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = 255 >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = 255 >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = 255 >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(& boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(boxlist, numboxes, descolors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(& boxlist[i], i);
+  sl_num_colors = numboxes;
+}
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+static int find_nearby_colors (int minc0, int minc1, int minc2, imbyte* colorlist)
+{
+  int numcolors = sl_num_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  int minmaxdist, min_dist, max_dist, tdist;
+  int mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+  
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+  
+  minmaxdist = 0x7FFFFFFFL;
+  
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = sl_colormap[0][i];
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+        tdist = (x - maxc0) * C0_SCALE;
+        max_dist = tdist*tdist;
+      } else {
+        tdist = (x - minc0) * C0_SCALE;
+        max_dist = tdist*tdist;
+      }
+    }
+    
+    x = sl_colormap[1][i];
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+        tdist = (x - maxc1) * C1_SCALE;
+        max_dist += tdist*tdist;
+      } else {
+        tdist = (x - minc1) * C1_SCALE;
+        max_dist += tdist*tdist;
+      }
+    }
+    
+    x = sl_colormap[2][i];
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+        tdist = (x - maxc2) * C2_SCALE;
+        max_dist += tdist*tdist;
+      } else {
+        tdist = (x - minc2) * C2_SCALE;
+        max_dist += tdist*tdist;
+      }
+    }
+    
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+  
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (imbyte) i;
+  }
+  return ncolors;
+}
+
+
+static void find_best_colors (int minc0, int minc1, int minc2, int numcolors,
+                              imbyte* colorlist, imbyte* bestcolor)
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register int * bptr;	/* pointer into bestdist[] array */
+  imbyte * cptr;		/* pointer into bestcolor[] array */
+  int dist0, dist1;		/* initial distance values */
+  register int dist2;		/* current distance in inner loop */
+  int xx0, xx1;		/* distance increments */
+  register int xx2;
+  int inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  int bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+  
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = colorlist[i];
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - (int) sl_colormap[0][icolor]) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - (int) sl_colormap[1][icolor]) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - (int) sl_colormap[2][icolor]) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+        dist2 = dist1;
+        xx2 = inc2;
+        for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+          if (dist2 < *bptr) {
+            *bptr = dist2;
+            *cptr = (imbyte) icolor;
+          }
+          dist2 += xx2;
+          xx2 += 2 * STEP_C2 * STEP_C2;
+          bptr++;
+          cptr++;
+        }
+        dist1 += xx1;
+        xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+static void fill_inverse_cmap (int c0, int c1, int c2)
+{
+  hist2d * histogram = sl_histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register imbyte * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  imbyte colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  imbyte bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+  
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+  
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  numcolors = find_nearby_colors(minc0, minc1, minc2, colorlist);
+  
+  /* Determine the actually nearest colors. */
+  find_best_colors(minc0, minc1, minc2, numcolors, colorlist, bestcolor);
+  
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+        *cachep++ = (histcell) (*cptr++ + 1);
+      }
+    }
+  }
+}
+
+
+static void slow_map_pixels (imbyte *red, imbyte *green, imbyte *blue, int width, int height, imbyte *map)
+{
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  imbyte *inRptr, *inGptr, *inBptr;		/* => current input pixel */
+  imbyte* outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing errorptr */
+  int row, col, offset;
+  int *error_limit = sl_error_limiter;
+  imbyte* colormap0 = sl_colormap[0];
+  imbyte* colormap1 = sl_colormap[1];
+  imbyte* colormap2 = sl_colormap[2];
+  hist2d * histogram = sl_histogram;
+  
+  for (row = 0; row < height; row++) 
+  {
+    offset = row * width;
+
+    inRptr = & red[offset];
+    inGptr = & green[offset];
+    inBptr = & blue[offset];
+    outptr = & map[offset];
+
+    if (sl_on_odd_row) 
+    {
+      /* work right to left in this row */
+      offset = width-1;
+
+      inRptr += offset;	/* so point to rightmost pixel */
+      inGptr += offset;	/* so point to rightmost pixel */
+      inBptr += offset;	/* so point to rightmost pixel */
+
+      outptr += offset;
+
+      dir = -1;
+      dir3 = -3;
+      errorptr = sl_fserrors + (width+1)*3; /* => entry after last column */
+      sl_on_odd_row = 0;	/* flip for next time */
+    } 
+    else 
+    {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = sl_fserrors;	/* => entry before first real column */
+      sl_on_odd_row = 1;	/* flip for next time */
+    }
+
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+    
+    for (col = width; col > 0; col--) 
+    {
+      cur0 = (cur0 + errorptr[dir3+0] + 8) >> 4;
+      cur1 = (cur1 + errorptr[dir3+1] + 8) >> 4;
+      cur2 = (cur2 + errorptr[dir3+2] + 8) >> 4;
+
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+
+      cur0 += inRptr[0];
+      cur1 += inGptr[0];
+      cur2 += inBptr[0];
+
+      RANGE(cur0, 0, 255);
+      RANGE(cur1, 0, 255);
+      RANGE(cur2, 0, 255);
+
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+        fill_inverse_cmap(cur0>>C0_SHIFT, cur1>>C1_SHIFT, cur2>>C2_SHIFT);
+
+      /* Now emit the colormap index for this cell */
+      {
+        register int pixcode = *cachep - 1;
+        *outptr = (imbyte) pixcode;
+        /* Compute representation error for this pixel */
+        cur0 -= (int) colormap0[pixcode];
+        cur1 -= (int) colormap1[pixcode];
+        cur2 -= (int) colormap2[pixcode];
+      }
+
+      /* Compute error fractions to be propagated to adjacent pixels.
+      * Add these into the running sums, and simultaneously shift the
+      * next-line error sums left by 1 column. */
+      { 
+        register LOCFSERROR bnexterr, delta;
+        bnexterr = cur0;	/* Process component 0 */
+        delta = cur0 * 2;
+        cur0 += delta;		/* form error * 3 */
+        errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+        cur0 += delta;		/* form error * 5 */
+        bpreverr0 = belowerr0 + cur0;
+        belowerr0 = bnexterr;
+        cur0 += delta;		/* form error * 7 */
+        bnexterr = cur1;	/* Process component 1 */
+        delta = cur1 * 2;
+        cur1 += delta;		/* form error * 3 */
+        errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+        cur1 += delta;		/* form error * 5 */
+        bpreverr1 = belowerr1 + cur1;
+        belowerr1 = bnexterr;
+        cur1 += delta;		/* form error * 7 */
+        bnexterr = cur2;	/* Process component 2 */
+        delta = cur2 * 2;
+        cur2 += delta;		/* form error * 3 */
+        errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+        cur2 += delta;		/* form error * 5 */
+        bpreverr2 = belowerr2 + cur2;
+        belowerr2 = bnexterr;
+        cur2 += delta;		/* form error * 7 */
+      }
+
+      /* At this point curN contains the 7/16 error value to be propagated
+      * to the next pixel on the current line, and all the errors for the
+      * next line have been shifted over.  We are therefore ready to move on.
+      */
+      inRptr += dir;		/* Advance pixel pointers to next column */
+      inGptr += dir;		/* Advance pixel pointers to next column */
+      inBptr += dir;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+
+    /* Post-loop cleanup: we must unload the final error values into the
+    * final fserrors[] entry.  Note we need not unload belowerrN because
+    * it is for the dummy column before or after the actual array.
+    */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/* Allocate and fill in the error_limiter table */
+static void init_error_limit (void)
+{
+  int * table;
+  int in, out, STEPSIZE;
+  
+  table = (int *) malloc((size_t) ((255*2+1) * sizeof(int)));
+  if (! table) return;
+  
+  table += 255;		/* so can index -255 .. +255 */
+  sl_error_limiter = table;
+  
+  STEPSIZE = ((255+1)/16);
+
+  /* Map errors 1:1 up to +- 255/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) 
+  {
+    table[in]  =  out; 
+    table[-in] = -out;
+  }
+
+  /* Map errors 1:2 up to +- 3*255/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) 
+  {
+    table[in]  =  out;
+    table[-in] = -out;
+  }
+
+  /* Clamp the rest to final out value (which is (255+1)/8) */
+  for (; in <= 255; in++) 
+  {
+    table[in]  =  out; 
+    table[-in] = -out;
+  }
+}
+
+int imConvertRGB2Map(int width, int height, unsigned char *red, unsigned char *green, unsigned char *blue, unsigned char *map, long *palette, int *palette_count)
+{
+  int i, err, new_palette_count;
+  imbyte rm[256], gm[256], bm[256];
+
+  if (*palette_count <= 0 || *palette_count > 256)
+    *palette_count = 256;
+  
+  new_palette_count = quick_map(red, green, blue, width, height, map, rm, gm, bm, *palette_count);
+  if (new_palette_count)  
+  {
+    for (i=0; i < new_palette_count; i++)
+      *palette++ = imColorEncode(rm[i], gm[i], bm[i]);
+
+    *palette_count = new_palette_count;
+    return IM_ERR_NONE;
+  }
+  
+  err = slow_quant(red, green, blue, width, height, map, rm, gm, bm, *palette_count);
+  if (err)
+    return IM_ERR_MEM;
+
+  for (i=0; i < *palette_count; i++)
+    *palette++ = imColorEncode(rm[i], gm[i], bm[i]);
+
+  return IM_ERR_NONE;
+}
+
diff --git a/src/im_str.cpp b/src/im_str.cpp
new file mode 100644
index 0000000..f50dcf1
--- /dev/null
+++ b/src/im_str.cpp
@@ -0,0 +1,67 @@
+/** \file
+ * \brief String Utilities
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_str.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+
+#include <stdlib.h>
+#include <memory.h>
+#include <assert.h>
+
+#include "im_util.h"
+
+int imStrEqual(const char* str1, const char* str2)
+{
+  assert(str1);
+  assert(str2);
+
+  /* While both strings are equal and not 0 */
+  while (*str1 == *str2 && *str1)
+  {
+    str1++;
+    str2++;
+  }
+
+  /* Is last char not equal ? */
+  if (*str1 != *str2)
+    return 0;
+
+  return 1;
+}
+
+int imStrNLen(const char* str, int max_len)
+{                       
+  assert(str);
+
+  const char* start_str = str;
+
+  while(max_len && *str)
+  {
+    max_len--;
+    str++;
+  }
+
+  return str - start_str;
+}
+
+int imStrCheck(const void* data, int count)
+{
+  const char* str = (char*)data;
+
+  if (str[count-1] == 0)
+    return 1;
+
+  while(count && *str)
+  {
+    count--;
+    str++;
+  }
+
+  if (count > 0)
+    return 1;
+
+  return 0;
+}
+
diff --git a/src/im_sysfile_unix.cpp b/src/im_sysfile_unix.cpp
new file mode 100644
index 0000000..2065bb6
--- /dev/null
+++ b/src/im_sysfile_unix.cpp
@@ -0,0 +1,211 @@
+/** \file
+ * \brief System Dependent Binary File Access (UNIX)
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_sysfile_unix.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "im_util.h"
+#include "im_binfile.h"
+
+
+class imBinSystemFile: public imBinFileBase
+{
+protected:
+  int FileHandle, 
+      Error;
+
+  unsigned long ReadBuf(void* pValues, unsigned long pSize);
+  unsigned long WriteBuf(void* pValues, unsigned long pSize);
+
+public:
+  virtual void Open(const char* pFileName);
+  virtual void New(const char* pFileName);
+  virtual void Close();
+
+  unsigned long FileSize();
+  int HasError() const;
+  void SeekTo(unsigned long pOffset);
+  void SeekOffset(long pOffset);
+  void SeekFrom(long pOffset);
+  unsigned long Tell() const;
+  int EndOfFile() const;
+};
+
+imBinFileBase* iBinSystemFileNewFunc()
+{
+  return new imBinSystemFile();
+}
+
+void imBinSystemFile::Open(const char* pFileName)
+{
+  int mode = O_RDONLY;
+#ifdef O_BINARY
+    mode |= O_BINARY;
+#endif        
+  this->FileHandle = open(pFileName, mode, 0);
+  if (this->FileHandle < 0) 
+    this->Error = errno;
+  else
+    this->Error = 0;
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 0;
+}
+
+void imBinSystemFile::New(const char* pFileName)
+{
+  int mode = O_WRONLY | O_CREAT | O_TRUNC;           
+#ifdef O_BINARY
+    mode |= O_BINARY;
+#endif        
+  this->FileHandle = open(pFileName, mode, 0666); // User/Group/Other can read and write
+  if (this->FileHandle < 0) 
+    this->Error = errno;
+  else
+    this->Error = 0;
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 1;
+}
+
+void imBinSystemFile::Close()
+{
+  assert(this->FileHandle > -1);
+  int ret = close(this->FileHandle);
+  if (ret < 0)
+    this->Error = errno;
+  else
+    this->Error = 0;
+}
+
+int imBinSystemFile::HasError() const
+{
+  if (this->FileHandle < 0 || this->Error) return 1;
+  return 0;
+}
+
+unsigned long imBinSystemFile::ReadBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle > -1);
+	int ret = read(this->FileHandle, pValues, (size_t)pSize);
+  if (ret < 0)
+    this->Error = errno;
+  else
+    this->Error = 0;
+  return ret < 0? 0: ret;
+}
+                             
+unsigned long imBinSystemFile::WriteBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle > -1);
+  int ret = write(this->FileHandle, pValues, (size_t)pSize);
+  if (ret < 0)
+    this->Error = errno;
+  else
+    this->Error = 0;
+  return ret < 0? 0: ret;
+}
+
+void imBinSystemFile::SeekTo(unsigned long pOffset)
+{
+  assert(this->FileHandle > -1);
+  int ret = lseek(this->FileHandle, pOffset, SEEK_SET);
+  if (ret < 0)
+    this->Error = errno;
+  else
+    this->Error = 0;
+}
+
+void imBinSystemFile::SeekOffset(long pOffset)
+{
+  assert(this->FileHandle > -1);
+  int ret = lseek(this->FileHandle, pOffset, SEEK_CUR);
+  if (ret < 0)
+    this->Error = errno;
+  else
+    this->Error = 0;
+}
+
+void imBinSystemFile::SeekFrom(long pOffset)
+{
+  assert(this->FileHandle > -1);
+  int ret = lseek(this->FileHandle, pOffset, SEEK_END);
+  if (ret < 0)
+    this->Error = errno;
+  else
+    this->Error = 0;
+}
+
+unsigned long imBinSystemFile::Tell() const
+{
+  assert(this->FileHandle > -1);
+  long offset = lseek(this->FileHandle, 0L, SEEK_CUR);
+  return offset < 0? 0: offset;
+}
+
+unsigned long imBinSystemFile::FileSize()
+{
+  assert(this->FileHandle > -1);
+  long lCurrentPosition = lseek(this->FileHandle, 0L, SEEK_CUR);
+  long lSize = lseek(this->FileHandle, 0L, SEEK_END);
+  lseek(this->FileHandle, lCurrentPosition, SEEK_SET);
+  return lSize < 0? 0: lSize;
+}
+
+int imBinSystemFile::EndOfFile() const
+{
+  assert(this->FileHandle > -1);
+  long lCurrentPosition = lseek(this->FileHandle, 0L, SEEK_CUR);
+  long lSize = lseek(this->FileHandle, 0L, SEEK_END);
+  lseek(this->FileHandle, lCurrentPosition, SEEK_SET);
+  return lCurrentPosition == lSize? 1: 0;
+}
+
+
+
+class imBinSystemFileHandle: public imBinSystemFile
+{
+public:
+  virtual void Open(const char* pFileName);
+  virtual void New(const char* pFileName);
+  virtual void Close();
+};
+
+imBinFileBase* iBinSystemFileHandleNewFunc()
+{
+  return new imBinSystemFileHandle();
+}
+
+void imBinSystemFileHandle::Open(const char* pFileName)
+{
+  // the file was successfully opened already by the client
+
+  int *s = (int*)pFileName;
+  this->FileHandle = s[0];
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 0;
+  this->Error = 0;
+}
+
+void imBinSystemFileHandle::New(const char* pFileName)
+{
+  // the file was successfully opened already the client
+
+  int *s = (int*)pFileName;
+  this->FileHandle = s[0];
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 1;
+  this->Error = 0;
+}
+
+void imBinSystemFileHandle::Close()
+{
+  // does nothing, the client must close the file
+}
diff --git a/src/im_sysfile_win32.cpp b/src/im_sysfile_win32.cpp
new file mode 100644
index 0000000..14e7c40
--- /dev/null
+++ b/src/im_sysfile_win32.cpp
@@ -0,0 +1,202 @@
+/** \file
+ * \brief System Dependent Binary File Access.
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_sysfile_win32.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <windows.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <assert.h>
+
+#include "im_util.h"
+#include "im_binfile.h"
+
+class imBinSystemFile: public imBinFileBase
+{
+protected:
+  HANDLE FileHandle;
+  int Error;
+
+  unsigned long ReadBuf(void* pValues, unsigned long pSize);
+  unsigned long WriteBuf(void* pValues, unsigned long pSize);
+
+public:
+  virtual void Open(const char* pFileName);
+  virtual void New(const char* pFileName);
+  virtual void Close();
+
+  unsigned long FileSize();
+  int HasError() const;
+  void SeekTo(unsigned long pOffset);
+  void SeekOffset(long pOffset);
+  void SeekFrom(long pOffset);
+  unsigned long Tell() const;
+  int EndOfFile() const;
+};
+
+imBinFileBase* iBinSystemFileNewFunc()
+{
+  return new imBinSystemFile();
+}
+
+void imBinSystemFile::Open(const char* pFileName)
+{
+  this->FileHandle = CreateFile(pFileName, GENERIC_READ, 
+                                           FILE_SHARE_READ, 
+                                           NULL, 
+                                           OPEN_EXISTING,
+                                           FILE_ATTRIBUTE_NORMAL,
+                                           NULL);
+  this->Error = (this->FileHandle == INVALID_HANDLE_VALUE)? 1: 0;
+  SetLastError(NO_ERROR);
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 0;
+}
+
+void imBinSystemFile::New(const char* pFileName)
+{
+  this->FileHandle = CreateFile(pFileName, GENERIC_READ | GENERIC_WRITE, 
+                                           0, 
+                                           NULL, 
+                                           CREATE_ALWAYS,
+                                           FILE_ATTRIBUTE_NORMAL,
+                                           NULL);
+  this->Error = (this->FileHandle == INVALID_HANDLE_VALUE)? 1: 0;
+  SetLastError(NO_ERROR);
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 1;
+}
+
+void imBinSystemFile::Close()
+{
+  if (this->FileHandle != INVALID_HANDLE_VALUE) 
+    CloseHandle(this->FileHandle);
+
+  this->FileHandle = INVALID_HANDLE_VALUE;
+  this->Error = 1;
+}
+
+unsigned long imBinSystemFile::FileSize()
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  this->Error = 0;
+  DWORD Size = GetFileSize(this->FileHandle, NULL);
+  if (Size == INVALID_FILE_SIZE)
+    this->Error = 1;
+  return Size;
+}
+
+unsigned long imBinSystemFile::ReadBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  this->Error = 0;
+  DWORD dwSize = 0;
+  ReadFile(this->FileHandle, pValues, pSize, &dwSize, NULL);
+  if (dwSize != pSize)
+    this->Error = 1;
+  return dwSize;
+}
+                             
+unsigned long imBinSystemFile::WriteBuf(void* pValues, unsigned long pSize)
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  this->Error = 0;
+  DWORD dwSize = 0;
+  WriteFile(this->FileHandle, pValues, pSize, &dwSize, NULL);
+  if (dwSize != pSize)
+    this->Error = 1;
+  return dwSize;
+}
+
+int imBinSystemFile::HasError() const
+{
+  return this->Error;
+}
+        
+void imBinSystemFile::SeekTo(unsigned long pOffset)
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  this->Error = 0;
+  DWORD ret = SetFilePointer(this->FileHandle, pOffset, NULL, FILE_BEGIN);
+  if (ret == INVALID_SET_FILE_POINTER)
+    this->Error = 1;
+}
+
+void imBinSystemFile::SeekOffset(long pOffset)
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  this->Error = 0;
+  DWORD ret = SetFilePointer(this->FileHandle, pOffset, NULL, FILE_CURRENT);
+  if (ret == INVALID_SET_FILE_POINTER)
+    this->Error = 1;
+}
+
+void imBinSystemFile::SeekFrom(long pOffset)
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  this->Error = 0;
+  DWORD ret = SetFilePointer(this->FileHandle, pOffset, NULL, FILE_END);
+  if (ret == INVALID_SET_FILE_POINTER)
+    this->Error = 1;
+}
+
+unsigned long imBinSystemFile::Tell() const
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  return SetFilePointer(this->FileHandle, 0, NULL, FILE_CURRENT);
+}
+
+int imBinSystemFile::EndOfFile() const
+{
+  assert(this->FileHandle != INVALID_HANDLE_VALUE);
+  DWORD cur_pos = SetFilePointer(this->FileHandle, 0, NULL, FILE_CURRENT);
+  DWORD end_pos = SetFilePointer(this->FileHandle, 0, NULL, FILE_END);
+  SetFilePointer(this->FileHandle, cur_pos, NULL, FILE_CURRENT);
+  return (cur_pos == end_pos)? 1: 0;
+}
+
+
+
+class imBinSystemFileHandle: public imBinSystemFile
+{
+public:
+  virtual void Open(const char* pFileName);
+  virtual void New(const char* pFileName);
+  virtual void Close();
+};
+
+imBinFileBase* iBinSystemFileHandleNewFunc()
+{
+  return new imBinSystemFileHandle();
+}
+
+void imBinSystemFileHandle::Open(const char* pFileName)
+{
+  // the file was successfully opened already the client 
+
+  HANDLE file_handle = (HANDLE)pFileName;
+  this->FileHandle = file_handle;
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 0;
+  this->Error = 0;
+}
+
+void imBinSystemFileHandle::New(const char* pFileName)
+{
+  // the file was successfully opened already the client 
+
+  HANDLE file_handle = (HANDLE)pFileName;
+  this->FileHandle = file_handle;
+  InitByteOrder(imBinCPUByteOrder());
+  this->IsNew = 1;
+  this->Error = 0;
+}
+
+void imBinSystemFileHandle::Close()
+{
+  // does nothing, the client must close the file
+}
diff --git a/src/im_wmv.def b/src/im_wmv.def
new file mode 100644
index 0000000..1e017f3
--- /dev/null
+++ b/src/im_wmv.def
@@ -0,0 +1,2 @@
+EXPORTS
+  imFormatRegisterWMV
\ No newline at end of file
diff --git a/src/im_wmv.mak b/src/im_wmv.mak
new file mode 100644
index 0000000..fef3c40
--- /dev/null
+++ b/src/im_wmv.mak
@@ -0,0 +1,23 @@
+PROJNAME = im
+LIBNAME = im_wmv
+OPT = YES
+
+SRC = im_format_wmv.cpp
+                                       
+ifneq ($(findstring _64, $(TEC_UNAME)), )
+  WMFSDK = d:/lng/wmfsdk95
+  INCLUDES = $(WMFSDK)/include
+else
+#  WMFSDK = d:/lng/wmfsdk11
+#  EXTRAINCS = $(WMFSDK)/include
+  WMFSDK = d:/lng/wmfsdk9
+  INCLUDES = $(WMFSDK)/include
+endif
+  
+DEFINES = _CRT_NON_CONFORMING_SWPRINTFS                                     
+
+LDIR = $(WMFSDK)/lib
+LIBS = wmvcore
+
+USE_IM = Yes
+IM = ..
diff --git a/src/imlua3.def b/src/imlua3.def
new file mode 100644
index 0000000..d27384d
--- /dev/null
+++ b/src/imlua3.def
@@ -0,0 +1,2 @@
+EXPORTS
+  imlua_open
\ No newline at end of file
diff --git a/src/imlua3.mak b/src/imlua3.mak
new file mode 100644
index 0000000..aac7b00
--- /dev/null
+++ b/src/imlua3.mak
@@ -0,0 +1,12 @@
+PROJNAME = im
+LIBNAME = imlua3
+OPT = YES
+
+SRC = im_lua3.c
+
+USE_LUA = Yes
+#Do NOT use USE_CD because we use no CD functions, only headers are used.
+INCLUDES = $(CD)/include
+
+USE_IM = Yes
+IM = ..
diff --git a/src/imlua5.mak b/src/imlua5.mak
new file mode 100644
index 0000000..0776335
--- /dev/null
+++ b/src/imlua5.mak
@@ -0,0 +1,15 @@
+PROJNAME = im
+LIBNAME = imlua51
+DEF_FILE = imlua.def
+
+OPT = YES
+
+SRCDIR = lua5
+
+SRC = imlua.c imlua_aux.c imlua_convert.c imlua_file.c imlua_image.c imlua_palette.c imlua_util.c
+
+INCLUDES = lua5
+
+USE_IM = YES
+USE_LUA51 = YES
+IM = ..
diff --git a/src/imlua_avi.mak b/src/imlua_avi.mak
new file mode 100644
index 0000000..238a4d4
--- /dev/null
+++ b/src/imlua_avi.mak
@@ -0,0 +1,17 @@
+PROJNAME = im
+LIBNAME = imlua_avi51
+DEF_FILE = imlua_avi.def
+
+OPT = YES
+
+SRCDIR = lua5
+
+SRC = imlua_avi.c
+
+LIBS = im_avi
+
+INCLUDES = lua5
+
+USE_IMLUA = Yes
+USE_LUA51 = Yes
+IM = ..
diff --git a/src/imlua_capture5.mak b/src/imlua_capture5.mak
new file mode 100644
index 0000000..bee750e
--- /dev/null
+++ b/src/imlua_capture5.mak
@@ -0,0 +1,17 @@
+PROJNAME = im
+LIBNAME = imlua_capture51
+DEF_FILE = imlua_capture.def
+
+OPT = YES
+
+SRCDIR = lua5
+
+SRC = imlua_capture.c
+
+LIBS = im_capture
+
+INCLUDES = lua5
+
+USE_IMLUA = Yes
+USE_LUA51 = Yes
+IM = ..
diff --git a/src/imlua_fftw5.mak b/src/imlua_fftw5.mak
new file mode 100644
index 0000000..dedabcf
--- /dev/null
+++ b/src/imlua_fftw5.mak
@@ -0,0 +1,18 @@
+PROJNAME = im
+LIBNAME = imlua_fftw51
+DEF_FILE = imlua_fftw.def
+
+OPT = YES
+
+SRC = lua5/imlua_fftw.c
+DEF_FILE = lua5/imlua_fftw.def
+
+SRCLUA = lua5/im_fftw.lua
+SRCLUADIR = lua5
+
+LIBS = im_fftw
+INCLUDES = lua5
+
+USE_IMLUA = YES
+USE_LUA51 = YES
+IM = ..
diff --git a/src/imlua_jp2.mak b/src/imlua_jp2.mak
new file mode 100644
index 0000000..35711b0
--- /dev/null
+++ b/src/imlua_jp2.mak
@@ -0,0 +1,17 @@
+PROJNAME = im
+LIBNAME = imlua_jp251
+DEF_FILE = imlua_jp2.def
+
+OPT = YES
+
+SRCDIR = lua5
+
+SRC = imlua_jp2.c
+
+LIBS = im_jp2
+
+INCLUDES = lua5
+
+USE_IMLUA = Yes
+USE_LUA51 = Yes
+IM = ..
diff --git a/src/imlua_process5.mak b/src/imlua_process5.mak
new file mode 100644
index 0000000..6dfa5a5
--- /dev/null
+++ b/src/imlua_process5.mak
@@ -0,0 +1,18 @@
+PROJNAME = im
+LIBNAME = imlua_process51
+DEF_FILE = imlua_process.def
+
+OPT = YES
+
+SRC = lua5/imlua_process.c lua5/imlua_kernel.c
+DEF_FILE = lua5/imlua_process.def
+
+SRCLUA = lua5/im_process.lua
+SRCLUADIR = lua5
+
+LIBS = im_process
+INCLUDES = lua5
+
+USE_IMLUA = YES
+USE_LUA51 = YES
+IM = ..
diff --git a/src/imlua_wmv.mak b/src/imlua_wmv.mak
new file mode 100644
index 0000000..f487075
--- /dev/null
+++ b/src/imlua_wmv.mak
@@ -0,0 +1,17 @@
+PROJNAME = im
+LIBNAME = imlua_wmv51
+DEF_FILE = imlua_wmv.def
+
+OPT = YES
+
+SRCDIR = lua5
+
+SRC = imlua_wmv.c
+
+LIBS = im_wmv
+
+INCLUDES = lua5
+
+USE_IMLUA = Yes
+USE_LUA51 = Yes
+IM = ..
diff --git a/src/jas_binfile.c b/src/jas_binfile.c
new file mode 100644
index 0000000..487a2d3
--- /dev/null
+++ b/src/jas_binfile.c
@@ -0,0 +1,97 @@
+/** \file
+ * \brief libJasper I/O
+ * I/O uses imBinFile instead of libJasper original handlers.
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: jas_binfile.c,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_stream.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+
+#include "im_binfile.h"
+
+/* These were static in jas_stream.c */
+jas_stream_t *jas_stream_create(void);
+void jas_stream_initbuf(jas_stream_t *stream, int bufmode, char *buf, int bufsize);
+
+static int file_read(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+  imBinFile* file_bin = (imBinFile*)obj;
+  return imBinFileRead(file_bin, buf, cnt, 1);
+}
+
+static int file_write(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+  imBinFile* file_bin = (imBinFile*)obj;
+  return imBinFileWrite(file_bin, buf, cnt, 1);
+}
+
+static long file_seek(jas_stream_obj_t *obj, long offset, int origin)
+{
+  imBinFile* file_bin = (imBinFile*)obj;
+  switch (origin)
+  {
+  case SEEK_SET:
+    imBinFileSeekTo(file_bin, offset);
+    break;
+  case SEEK_CUR:
+    imBinFileSeekOffset(file_bin, offset);
+    break;
+  case SEEK_END: 
+    imBinFileSeekFrom(file_bin, offset);
+    break;
+  }
+
+  return imBinFileError(file_bin);
+}
+
+static int file_close(jas_stream_obj_t *obj)
+{
+  imBinFile* file_bin = (imBinFile*)obj;
+  imBinFileClose(file_bin);
+  return 0;
+}
+
+static jas_stream_ops_t jas_stream_fileops = {
+  file_read,
+  file_write,
+  file_seek,
+  file_close
+};
+
+jas_stream_t *jas_binfile_open(const char *file_name, int is_new)
+{
+  void* handle;
+  jas_stream_t *stream;
+
+  if (is_new)
+    handle = (void*)imBinFileNew(file_name);
+  else
+    handle = (void*)imBinFileOpen(file_name);
+
+  if (!handle)
+    return 0;
+
+  /* Allocate a stream object. */
+  stream = jas_stream_create();
+
+  if (is_new)
+    stream->openmode_ = JAS_STREAM_WRITE | JAS_STREAM_CREATE | JAS_STREAM_BINARY;
+  else
+    stream->openmode_ = JAS_STREAM_READ | JAS_STREAM_BINARY;
+
+  /* Select the operations for a file stream object. */
+  stream->ops_ = &jas_stream_fileops;
+
+  stream->obj_ = handle;
+
+  /* By default, use full buffering for this type of stream. */
+  jas_stream_initbuf(stream, JAS_STREAM_FULLBUF, 0, 0);
+
+  return stream;
+}
diff --git a/src/libexif/_stdint.h b/src/libexif/_stdint.h
new file mode 100644
index 0000000..401189b
--- /dev/null
+++ b/src/libexif/_stdint.h
@@ -0,0 +1,19 @@
+
+#ifndef __STDINT_H
+#define __STDINT_H
+
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef signed char int8_t;
+typedef short int16_t;
+typedef int int32_t;
+#endif
+
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+#ifndef __uint32_t_defined
+#define __uint32_t_defined
+typedef unsigned int uint32_t;
+#endif
+
+#endif 
diff --git a/src/libexif/canon/exif-mnote-data-canon.c b/src/libexif/canon/exif-mnote-data-canon.c
new file mode 100644
index 0000000..b1c5dab
--- /dev/null
+++ b/src/libexif/canon/exif-mnote-data-canon.c
@@ -0,0 +1,281 @@
+/* exif-mnote-data-canon.c
+ *
+ * Copyright � 2002, 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ * Copyright � 2003 Matthieu Castet <mat-c@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "exif-mnote-data-canon.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-utils.h>
+#include <libexif/exif-data.h>
+
+#define DEBUG
+
+static void
+exif_mnote_data_canon_clear (ExifMnoteDataCanon *n)
+{
+	ExifMnoteData *d = (ExifMnoteData *) n;
+	unsigned int i;
+
+	if (!n) return;
+
+	if (n->entries) {
+		for (i = 0; i < n->count; i++)
+			if (n->entries[i].data) {
+				exif_mem_free (d->mem, n->entries[i].data);
+				n->entries[i].data = NULL;
+			}
+		exif_mem_free (d->mem, n->entries);
+		n->entries = NULL;
+		n->count = 0;
+	}
+}
+
+static void
+exif_mnote_data_canon_free (ExifMnoteData *n)
+{
+	if (!n) return;
+
+	exif_mnote_data_canon_clear ((ExifMnoteDataCanon *) n);
+}
+
+static char *
+exif_mnote_data_canon_get_value (ExifMnoteData *note, unsigned int n, char *val, unsigned int maxlen)
+{
+	ExifMnoteDataCanon *cnote = (ExifMnoteDataCanon *) note;
+
+	if (!note) return NULL;
+	if (cnote->count <= n) return NULL;
+	return mnote_canon_entry_get_value (&cnote->entries[n], val, maxlen);
+}
+
+static void
+exif_mnote_data_canon_set_byte_order (ExifMnoteData *d, ExifByteOrder o)
+{
+	ExifByteOrder o_orig;
+	ExifMnoteDataCanon *n = (ExifMnoteDataCanon *) d;
+	unsigned int i;
+
+	if (!n) return;
+
+	o_orig = n->order;
+	n->order = o;
+	for (i = 0; i < n->count; i++) {
+		n->entries[i].order = o;
+		exif_array_set_byte_order (n->entries[i].format, n->entries[i].data,
+				n->entries[i].components, o_orig, o);
+	}
+}
+
+static void
+exif_mnote_data_canon_set_offset (ExifMnoteData *n, unsigned int o)
+{
+	if (n) ((ExifMnoteDataCanon *) n)->offset = o;
+}
+
+static void
+exif_mnote_data_canon_save (ExifMnoteData *ne, 
+	unsigned char **buf, unsigned int *buf_size)
+{
+	ExifMnoteDataCanon *n = (ExifMnoteDataCanon *) ne;
+	unsigned int i, o, s, doff;
+
+	if (!n || !buf || !buf_size) return;
+
+	/*
+	 * Allocate enough memory for all entries and the number
+	 * of entries.
+	 */
+	*buf_size = 2 + n->count * 12 + 4;
+	*buf = exif_mem_alloc (ne->mem, sizeof (char) * *buf_size);
+	if (!*buf) return;
+
+	/* Save the number of entries */
+	exif_set_short (*buf, n->order, (ExifShort) n->count);
+	
+	/* Save each entry */
+	for (i = 0; i < n->count; i++) {
+		o = 2 + i * 12;
+		exif_set_short (*buf + o + 0, n->order, (ExifShort) n->entries[i].tag);
+		exif_set_short (*buf + o + 2, n->order, (ExifShort) n->entries[i].format);
+		exif_set_long  (*buf + o + 4, n->order,
+				n->entries[i].components);
+		o += 8;
+		s = exif_format_get_size (n->entries[i].format) *
+						n->entries[i].components;
+		if (s > 4) {
+			*buf_size += s;
+
+			/* Ensure even offsets. Set padding bytes to 0. */
+			if (s & 1) *buf_size += 1;
+			*buf = exif_mem_realloc (ne->mem, *buf,
+						 sizeof (char) * *buf_size);
+			if (!*buf) return;
+			doff = *buf_size - s;
+			if (s & 1) { doff--; *(*buf + *buf_size - 1) = '\0'; }
+			exif_set_long (*buf + o, n->order, n->offset + doff);
+		} else
+			doff = o;
+
+		/*
+		 * Write the data. Fill unneeded bytes with 0. Do not
+		 * crash if data is NULL.
+		 */
+		if (!n->entries[i].data) memset (*buf + doff, 0, s);
+		else memcpy (*buf + doff, n->entries[i].data, s);
+		if (s < 4) memset (*buf + doff + s, 0, (4 - s));
+	}
+}
+
+/* XXX
+ * FIXME: exif_mnote_data_canon_load() may fail and there is no
+ *        semantics to express that.
+ *        See bug #1054323 for details, especially the comment by liblit
+ *        after it has supposedly been fixed:
+ *
+ *        https://sourceforge.net/tracker/?func=detail&aid=1054323&group_id=12272&atid=112272
+ *        Unfortunately, the "return" statements aren't commented at
+ *        all, so it isn't trivial to find out what is a normal
+ *        return, and what is a reaction to an error condition.
+ */
+
+static void
+exif_mnote_data_canon_load (ExifMnoteData *ne,
+	const unsigned char *buf, unsigned int buf_size)
+{
+	ExifMnoteDataCanon *n = (ExifMnoteDataCanon *) ne;
+	ExifShort c;
+	unsigned int i, o, s;
+
+	if (!n || !buf || !buf_size || (buf_size < 6 + n->offset + 2)) return;
+
+	/* Read the number of entries and remove old ones. */
+	c = exif_get_short (buf + 6 + n->offset, n->order);
+	exif_mnote_data_canon_clear (n);
+
+	/* Parse the entries */
+	for (i = 0; i < c; i++) {
+	    o = 6 + 2 + n->offset + 12 * i;
+	    if (o + 8 > buf_size) return;
+
+	    n->count = i + 1;
+	    n->entries = exif_mem_realloc (ne->mem, n->entries,
+					   sizeof (MnoteCanonEntry) * (i+1));
+	    memset (&n->entries[i], 0, sizeof (MnoteCanonEntry));
+	    n->entries[i].tag        = exif_get_short (buf + o, n->order);
+	    n->entries[i].format     = exif_get_short (buf + o + 2, n->order);
+	    n->entries[i].components = exif_get_long (buf + o + 4, n->order);
+	    n->entries[i].order      = n->order;
+
+	    /*
+	     * Size? If bigger than 4 bytes, the actual data is not
+	     * in the entry but somewhere else (offset).
+	     */
+	    s = exif_format_get_size (n->entries[i].format) *
+		    		      n->entries[i].components;
+	    if (!s) return;
+	    o += 8;
+	    if (s > 4) o = exif_get_long (buf + o, n->order) + 6;
+	    if (o + s > buf_size) return;
+	    
+	    /* Sanity check */
+	    n->entries[i].data = exif_mem_alloc (ne->mem, sizeof (char) * s);
+	    if (!n->entries[i].data) return;
+	    n->entries[i].size = s;
+	    memcpy (n->entries[i].data, buf + o, s);
+	}
+}
+
+static unsigned int
+exif_mnote_data_canon_count (ExifMnoteData *n)
+{
+	return n ? ((ExifMnoteDataCanon *) n)->count : 0;
+}
+
+static unsigned int
+exif_mnote_data_canon_get_id (ExifMnoteData *d, unsigned int n)
+{
+	ExifMnoteDataCanon *note = (ExifMnoteDataCanon *) d;
+
+	if (!note) return 0;
+	if (note->count <= n) return 0;
+	return note->entries[n].tag;
+}
+
+static const char *
+exif_mnote_data_canon_get_name (ExifMnoteData *note, unsigned int i)
+{
+	ExifMnoteDataCanon *cnote = (ExifMnoteDataCanon *) note;
+
+	if (!note) return NULL;
+	if (i >= cnote->count) return NULL;
+	return mnote_canon_tag_get_name (cnote->entries[i].tag);
+}
+
+static const char *
+exif_mnote_data_canon_get_title (ExifMnoteData *note, unsigned int i)
+{
+	ExifMnoteDataCanon *cnote = (ExifMnoteDataCanon *) note;
+
+	if (!note) return NULL;
+	if (i >= cnote->count) return NULL;
+	return mnote_canon_tag_get_title (cnote->entries[i].tag);
+}
+
+static const char *
+exif_mnote_data_canon_get_description (ExifMnoteData *note, unsigned int i)
+{
+	ExifMnoteDataCanon *cnote = (ExifMnoteDataCanon *) note;
+	if (!note) return NULL;
+	if (i >= cnote->count) return NULL;
+	return mnote_canon_tag_get_description (cnote->entries[i].tag);
+}
+
+ExifMnoteData *
+exif_mnote_data_canon_new (ExifMem *mem)
+{
+	ExifMnoteData *d;
+
+	if (!mem) return NULL;
+
+	d = exif_mem_alloc (mem, sizeof (ExifMnoteDataCanon));
+	if (!d) return NULL;
+
+	exif_mnote_data_construct (d, mem);
+
+	/* Set up function pointers */
+	d->methods.free            = exif_mnote_data_canon_free;
+	d->methods.set_byte_order  = exif_mnote_data_canon_set_byte_order;
+	d->methods.set_offset      = exif_mnote_data_canon_set_offset;
+	d->methods.load            = exif_mnote_data_canon_load;
+	d->methods.save            = exif_mnote_data_canon_save;
+	d->methods.count           = exif_mnote_data_canon_count;
+	d->methods.get_id          = exif_mnote_data_canon_get_id;
+	d->methods.get_name        = exif_mnote_data_canon_get_name;
+	d->methods.get_title       = exif_mnote_data_canon_get_title;
+	d->methods.get_description = exif_mnote_data_canon_get_description;
+	d->methods.get_value       = exif_mnote_data_canon_get_value;
+
+	return d;
+}
diff --git a/src/libexif/canon/exif-mnote-data-canon.h b/src/libexif/canon/exif-mnote-data-canon.h
new file mode 100644
index 0000000..a476ca0
--- /dev/null
+++ b/src/libexif/canon/exif-mnote-data-canon.h
@@ -0,0 +1,45 @@
+/* exif-mnote-data-canon.h
+ *
+ * Copyright � 2002, 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_MNOTE_DATA_CANON_H__
+#define __EXIF_MNOTE_DATA_CANON_H__
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-mnote-data.h>
+#include <libexif/exif-mnote-data-priv.h>
+#include <libexif/exif-mem.h>
+
+typedef struct _ExifMnoteDataCanon ExifMnoteDataCanon;
+
+#include <libexif/canon/mnote-canon-entry.h>
+
+struct _ExifMnoteDataCanon {
+	ExifMnoteData parent;
+
+	MnoteCanonEntry *entries;
+	unsigned int count;
+
+	ExifByteOrder order;
+	unsigned int offset;
+};
+
+ExifMnoteData *exif_mnote_data_canon_new (ExifMem *mem);
+
+#endif /* __EXIF_MNOTE_DATA_CANON_H__ */
diff --git a/src/libexif/canon/mnote-canon-entry.c b/src/libexif/canon/mnote-canon-entry.c
new file mode 100644
index 0000000..5fa4991
--- /dev/null
+++ b/src/libexif/canon/mnote-canon-entry.c
@@ -0,0 +1,590 @@
+/* mnote-canon-entry.c
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ * Copyright � 2003 Matthieu Castet <mat-c@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+#include "mnote-canon-entry.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libexif/exif-format.h>
+#include <libexif/exif-utils.h>
+#include <libexif/i18n.h>
+
+/* #define DEBUG */
+
+#undef  MIN
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+
+#define CF(format,target,v,maxlen)                              \
+{                                                               \
+        if (format != target) {                                 \
+                snprintf (v, maxlen,                            \
+                        _("Invalid format '%s', "               \
+                        "expected '%s'."),                      \
+                        exif_format_get_name (format),          \
+                        exif_format_get_name (target));         \
+                break;                                          \
+        }                                                       \
+}
+
+#define CC(number,target,v,maxlen)                                      \
+{                                                                       \
+        if (number != target) {                                         \
+                snprintf (v, maxlen,                                    \
+                        _("Invalid number of components (%i, "          \
+                        "expected %i)."), (int) number, (int) target);  \
+                break;                                                  \
+        }                                                               \
+}
+#define CC2(number,t1,t2,v,maxlen)                                      \
+{                                                                       \
+	if ((number != t1) && (number != t2)) {                         \
+		snprintf (v, maxlen,                                    \
+			_("Invalid number of components (%i, "          \
+			"expected %i or %i)."), (int) number,		\
+			(int) t1, (int) t2);  				\
+		break;                                                  \
+	}                                                               \
+}
+
+char *
+mnote_canon_entry_get_value (const MnoteCanonEntry *entry, char *val, unsigned int maxlen)
+{
+    char buf[128];
+    ExifLong vl;
+    ExifShort vs, n;
+    int i;
+    unsigned char *data = entry->data;
+
+    if (!entry) return NULL;
+
+    memset (val, 0, maxlen);
+    maxlen--;
+
+	switch (entry->tag) {
+	case MNOTE_CANON_TAG_SETTINGS_1:
+		CF (entry->format, EXIF_FORMAT_SHORT, val, maxlen);
+		n = exif_get_short (data, entry->order) / 2;
+		data += 2;
+		CC (entry->components, n, val, maxlen);
+		for (i = 1; i < n; i++) {
+		    vs = exif_get_short (data, entry->order);
+		    data += 2;
+		    switch (i) {
+		    case 1:
+			strncpy (val, _("Macro mode : "), maxlen);
+			switch (vs) {
+			case 1:
+			    strncat (val, _("Macro"), maxlen - strlen(val));
+			    break;
+			case 2:
+			    strncat (val, _("Normal"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 2:
+			if (vs) {
+				snprintf (buf, sizeof (buf),
+					_(" / Self Timer : %i (ms)"), vs*100);
+				strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 4:
+			strncat (val, _(" / Flash mode : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0:
+			    strncat (val, _("Flash not fired"), maxlen - strlen(val));
+			    break;
+			case 1:
+			    strncat (val, _("auto"), maxlen - strlen(val));
+			    break;
+			case 2:
+			    strncat (val, _("on"), maxlen - strlen(val));
+			    break;
+			case 3:
+			    strncat (val, _("red eyes reduction"), maxlen - strlen(val));
+			    break;
+			case 4:
+			    strncat (val, _("slow synchro"), maxlen - strlen(val));
+			    break;
+			case 5:
+			    strncat (val, _("auto + red eyes reduction"), maxlen - strlen(val));
+			    break;
+			case 6:
+			    strncat (val, _("on + red eyes reduction"), maxlen - strlen(val));
+			    break;
+			case 16:
+			    strncat (val, _("external"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 5:
+			strncat (val, _(" / Continuous drive mode : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0:
+			    strncat (val, _("single or timer"), maxlen - strlen(val));
+			    break;
+			case 1:
+			    strncat (val, _("continuous"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 7:
+			strncat (val, _(" / Focus mode : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0:
+			    strncat (val, _("One-Shot"), maxlen - strlen(val));
+			    break;
+			case 1:
+			    strncat (val, _("AI Servo"), maxlen - strlen(val));
+			    break;
+			case 2:
+			    strncat (val, _("AI Focus"), maxlen - strlen(val));
+			    break;
+			case 3:
+			    strncat (val, _("MF"), maxlen - strlen(val));
+			    break;
+			case 4:
+			    strncat (val, _("Single"), maxlen - strlen(val));
+			    break;
+			case 5:
+			    strncat (val, _("Continuous"), maxlen - strlen(val));
+			    break;
+			case 6:
+			    strncat (val, _("MF"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 10:
+			strncat (val, _(" / Image size : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0:
+			    strncat (val, _("Large"), maxlen - strlen(val));
+			    break;
+			case 1:
+			    strncat (val, _("Medium"), maxlen - strlen(val));
+			    break;
+			case 2:
+			    strncat (val, _("Small"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 11:
+			strncat (val, _(" / Easy shooting mode : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0:
+			    strncat (val, _("Full Auto"), maxlen - strlen(val));
+			    break;
+			case 1:
+			    strncat (val, _("Manual"), maxlen - strlen(val));
+			    break;
+			case 2:
+			    strncat (val, _("Landscape"), maxlen - strlen(val));
+			    break;
+			case 3:
+			    strncat (val, _("Fast Shutter"), maxlen - strlen(val));
+			    break;
+			case 4:
+			    strncat (val, _("Slow Shutter"), maxlen - strlen(val));
+			    break;
+			case 5:
+			    strncat (val, _("Night"), maxlen - strlen(val));
+			    break;
+			case 6:
+			    strncat (val, _("Black & White"), maxlen - strlen(val));
+			    break;
+			case 7:
+			    strncat (val, _("Sepia"), maxlen - strlen(val));
+			    break;
+			case 8:
+			    strncat (val, _("Portrait"), maxlen - strlen(val));
+			    break;
+			case 9:
+			    strncat (val, _("Sports"), maxlen - strlen(val));
+			    break;
+			case 10:
+			    strncat (val, _("Macro / Close-Up"), maxlen - strlen(val));
+			    break;
+			case 11:
+			    strncat (val, _("Pan Focus"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 13:
+			strncat (val, _(" / Contrast : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0xffff:
+			    strncat (val, _("Low"), maxlen - strlen(val));
+			    break;
+			case 0x0000:
+			    strncat (val, _("Normal"), maxlen - strlen(val));
+			    break;
+			case 0x0001:
+			    strncat (val, _("High"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 14:
+			strncat (val, _(" / Saturation : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0xffff:
+			    strncat (val, _("Low"), maxlen - strlen(val));
+			    break;
+			case 0x0000:
+			    strncat (val, _("Normal"), maxlen - strlen(val));
+			    break;
+			case 0x0001:
+			    strncat (val, _("High"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 15:
+			strncat (val, _(" / Sharpness : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0xffff:
+			    strncat (val, _("Low"), maxlen - strlen(val));
+			    break;
+			case 0x0000:
+			    strncat (val, _("Normal"), maxlen - strlen(val));
+			    break;
+			case 0x0001:
+			    strncat (val, _("High"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 16:
+			if (vs) {
+			    strncat (val, _(" / ISO : "), maxlen - strlen(val));
+			    switch (vs) {
+			    case 15:
+				strncat (val, _("auto"), maxlen - strlen(val));
+				break;
+			    case 16:
+				strncat (val, _("50"), maxlen - strlen(val));
+				break;
+			    case 17:
+				strncat (val, _("100"), maxlen - strlen(val));
+				break;
+			    case 18:
+				strncat (val, _("200"), maxlen - strlen(val));
+				break;
+			    case 19:
+				strncat (val, _("400"), maxlen - strlen(val));
+				break;
+			    default:
+				snprintf (buf, sizeof (buf), _("%i???"), vs);
+				strncat (val, buf, maxlen - strlen(val));
+			    }
+			    break;
+			}
+		    case 17:
+			strncat (val, _(" / Metering mode : "), maxlen - strlen(val));
+			switch (vs) {
+			case 3:
+			    strncat (val, _("Evaluative"), maxlen - strlen(val));
+			    break;
+			case 4:
+			    strncat (val, _("Partial"), maxlen - strlen(val));
+			    break;
+			case 5:
+			    strncat (val, _("Center-weighted"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("%i???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 19:
+			strncat (val, _(" / AF point selected : "), maxlen - strlen(val));
+			switch (vs) {
+			case 0x3000:
+			    strncat (val, _("none (MF)"), maxlen - strlen(val));
+			    break;
+			case 0x3001:
+			    strncat (val, _("auto-selected"), maxlen - strlen(val));
+			    break;
+			case 0x3002:
+			    strncat (val, _("right"), maxlen - strlen(val));
+			    break;
+			case 0x3003:
+			    strncat (val, _("center"), maxlen - strlen(val));
+			    break;
+			case 0x3004:
+			    strncat (val, _("left"), maxlen - strlen(val));
+			    break;
+			default:
+			    snprintf (buf, sizeof (buf), _("0x%x???"), vs);
+			    strncat (val, buf, maxlen - strlen(val));
+			}
+			break;
+		    case 20:
+				strncat (val, _(" / Exposure mode : "), maxlen - strlen(val));
+				switch (vs) {
+           		case 0:
+					strncat (val, _("Easy shooting"), maxlen - strlen(val));
+                    break;
+                case 1:
+					strncat (val, _("Program"), maxlen - strlen(val));
+					break;
+				case 2:
+					strncat (val, _("Tv-priority"), maxlen - strlen(val));
+					break;
+				case 3:
+					strncat (val, _("Av-priority"), maxlen - strlen(val));
+					break;
+				case 4:
+					strncat (val, _("Manual"), maxlen - strlen(val));
+					break;
+				case 5:
+					strncat (val, _("A-DEP"), maxlen - strlen(val));
+					break;
+				default:
+					snprintf (buf, sizeof (buf), _("%i???"), vs);
+					strncat (val, buf, maxlen - strlen(val));
+                }
+				break;
+			case 23:
+				snprintf (buf, sizeof (buf), _(" / long focal length of lens (in focal units) : %u"), vs);
+				strncat (val, buf, maxlen - strlen(val));
+				break;
+			case 24:
+				snprintf (buf, sizeof (buf), _(" / short focal length of lens (in focal units) : %u"), vs);
+				strncat (val, buf, maxlen - strlen(val));
+				break;
+			case 25:
+				snprintf (buf, sizeof (buf), _(" / focal units per mm : %u"), vs);
+				strncat (val, buf, maxlen - strlen(val));
+				break;
+			case 29:
+				strncat (val, _(" / Flash details : "), maxlen - strlen(val));
+				if ((vs>>14)&1)
+					strncat (val, _("External E-TTL"), maxlen - strlen(val));
+				if ((vs>>13)&1)
+					strncat (val, _("Internal flash"), maxlen - strlen(val));
+				if ((vs>>11)&1)
+					strncat (val, _("FP sync used"), maxlen - strlen(val));
+				if ((vs>>4)&1)
+					strncat (val, _("FP sync enabled"), maxlen - strlen(val));
+#ifdef DEBUG
+				printf ("Value29=0x%08x\n", vs);
+#endif
+				break;
+			case 32:
+				strncat (val, _(" / Focus mode2 : "), maxlen - strlen(val));
+				switch (vs) {
+				case 0:
+					strncat (val, _("Single"), maxlen - strlen(val));
+					break;
+				case 1:
+					strncat (val, _("Continuous"), maxlen - strlen(val));
+					break;
+				default:
+					snprintf (buf, sizeof (buf), _("%i???"), vs);
+					strncat (val, buf, maxlen - strlen(val));
+				}
+				break;
+#ifdef DEBUG
+			default:
+                        	printf ("Value%d=%d\n", i, vs);
+#endif
+			}
+		}
+
+                break;
+
+	case MNOTE_CANON_TAG_SETTINGS_2:
+		CF (entry->format, EXIF_FORMAT_SHORT, val, maxlen);
+		n = exif_get_short (data, entry->order)/2;
+		data += 2;
+		CC (entry->components, n, val, maxlen);
+#ifdef DEBUG
+		printf ("Setting2 size %d %d\n",n,entry->size);
+#endif
+		for (i=1;i<n;i++)
+		{
+			vs = exif_get_short (data, entry->order);
+			data+=2;
+			switch(i) {
+			case 7:
+				strncpy (val, _("White balance : "), maxlen - strlen(val));
+				switch (vs) {
+				case 0:
+					strncat (val, _("Auto"), maxlen - strlen(val));
+					break;
+				case 1:
+					strncat (val, _("Sunny"), maxlen - strlen(val));
+					break;
+				case 2:
+					strncat (val, _("Cloudy"), maxlen - strlen(val));
+					break;
+				case 3:
+					strncat (val, _("Tungsten"), maxlen - strlen(val));
+					break;
+				case 4:
+					strncat (val, _("Flourescent"), maxlen - strlen(val));
+					break;
+				case 5:
+					strncat (val, _("Flash"), maxlen - strlen(val));
+					break;
+				case 6:
+					strncat (val, _("Custom"), maxlen - strlen(val));
+					break;
+				default:
+					snprintf (buf, sizeof (buf), _("%i???"), vs);
+					strncat (val, buf, maxlen - strlen(val));
+				}
+				break;
+			case 9:
+				snprintf (buf, sizeof (buf), _(" / Sequence number : %u"), vs);
+				strncat (val, buf, maxlen - strlen(val));
+				break;
+			case 14:
+				if (vs>>12)
+				{
+					strncat (val, _(" / AF point used : "), maxlen - strlen(val));
+					if (vs&1)
+						strncat (val, _("Right"), maxlen - strlen(val));
+					if ((vs>>1)&1)
+						strncat (val, _("Center"), maxlen - strlen(val));
+					if ((vs>>2)&1)
+						strncat (val, _("Left"), maxlen - strlen(val));
+					snprintf (buf, sizeof (buf), _(" (%u available focus point)"), vs>>12);
+					strncat (val, buf, maxlen - strlen(val));
+				}
+#ifdef DEBUG
+					printf ("0x%08x\n", vs);
+#endif
+				break;
+			case 15:
+				snprintf (buf, sizeof (buf), _(" / Flash bias : %.2f EV"), vs/32.0);
+				strncat (val, buf, maxlen - strlen(val));
+
+				break;
+			case 19:
+				snprintf (buf, sizeof (buf), _(" / Subject Distance (mm) : %u"), vs);
+				strncat (val, buf, maxlen - strlen(val));
+				break;
+#ifdef DEBUG
+			default:
+				printf ("Value%d=%d\n", i, vs);
+#endif
+			}
+		}
+
+		break;
+
+	case MNOTE_CANON_TAG_IMAGE_TYPE:
+	case MNOTE_CANON_TAG_OWNER:
+		CF (entry->format, EXIF_FORMAT_ASCII, val, maxlen);
+		CC (entry->components, 32, val, maxlen);
+		strncpy (val, data, MIN (entry->size, maxlen));
+		break;
+
+	case MNOTE_CANON_TAG_FIRMWARE:
+		CF (entry->format, EXIF_FORMAT_ASCII, val, maxlen);
+		CC2 (entry->components, 24, 32, val, maxlen);
+		strncpy (val, data, MIN (entry->size, maxlen));
+		break;
+
+	case MNOTE_CANON_TAG_IMAGE_NUMBER:
+		CF (entry->format, EXIF_FORMAT_LONG, val, maxlen);
+		CC (entry->components, 1, val, maxlen);
+		vl = exif_get_long (data, entry->order);
+		snprintf (val, maxlen, "%03lu-%04lu",
+				(unsigned long) vl/10000,
+				(unsigned long) vl%10000);
+		break;
+
+	case MNOTE_CANON_TAG_SERIAL_NUMBER:
+		CF (entry->format, EXIF_FORMAT_LONG, val, maxlen);
+		CC (entry->components, 1, val, maxlen);
+		vl = exif_get_long (data, entry->order);
+		snprintf (val, maxlen, "%04X-%05d", (int)vl>>16,(int)vl&0xffff);
+		break;
+
+	case MNOTE_CANON_TAG_CUSTOM_FUNCS:
+		CF (entry->format, EXIF_FORMAT_SHORT, val, maxlen);
+		n = exif_get_short (data, entry->order)/2;
+		data+=2;
+		CC (entry->components, n, val, maxlen);
+#ifdef DEBUG
+		printf ("Custom Function size %d %d\n",n,entry->size);
+#endif
+		for (i=1;i<n;i++)
+		{
+			vs = exif_get_short (data, entry->order);
+			data += 2;
+			snprintf (buf, sizeof(buf), _("C.F%d : %u"), i, vs);
+			strncat (val, buf, maxlen - strlen(val));
+		}
+		break;
+
+	default:
+#ifdef DEBUG
+		if (entry->format == EXIF_FORMAT_SHORT)
+		for(i=0;i<entry->components;i++) {
+			vs = exif_get_short (data, entry->order);
+			data+=2;
+			printf ("Value%d=%d\n", i, vs);
+		}
+		else if (entry->format == EXIF_FORMAT_LONG)
+		for(i=0;i<entry->components;i++) {
+			vl = exif_get_long (data, entry->order);
+			data+=4;
+			printf ("Value%d=%d\n", i, vs);
+		}
+		else if (entry->format == EXIF_FORMAT_ASCII)
+		    strncpy (val, data, MIN (entry->size, maxlen));
+#endif
+		break;
+        }
+
+        return val;
+}
diff --git a/src/libexif/canon/mnote-canon-entry.h b/src/libexif/canon/mnote-canon-entry.h
new file mode 100644
index 0000000..62345d8
--- /dev/null
+++ b/src/libexif/canon/mnote-canon-entry.h
@@ -0,0 +1,43 @@
+/* mnote-canon-entry.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_CANON_ENTRY_H__
+#define __MNOTE_CANON_ENTRY_H__
+
+#include <libexif/exif-format.h>
+#include <libexif/exif-byte-order.h>
+#include <libexif/canon/mnote-canon-tag.h>
+
+typedef struct _MnoteCanonEntry        MnoteCanonEntry;
+
+struct _MnoteCanonEntry {
+	MnoteCanonTag tag;
+	ExifFormat format;
+	unsigned long components;
+
+	unsigned char *data;
+	unsigned int size;
+
+	ExifByteOrder order;
+};
+
+char *mnote_canon_entry_get_value (const MnoteCanonEntry *entry, char *val, unsigned int maxlen);
+
+#endif /* __MNOTE_CANON_ENTRY_H__ */
diff --git a/src/libexif/canon/mnote-canon-tag.c b/src/libexif/canon/mnote-canon-tag.c
new file mode 100644
index 0000000..890a5fc
--- /dev/null
+++ b/src/libexif/canon/mnote-canon-tag.c
@@ -0,0 +1,75 @@
+/* mnote-canon-tag.c
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "mnote-canon-tag.h"
+
+#include <stdlib.h>
+
+#include <libexif/i18n.h>
+
+static struct {
+	MnoteCanonTag tag;
+	const char *name;
+	const char *title;
+	const char *description;
+} table[] = {
+	{MNOTE_CANON_TAG_SETTINGS_1, "Settings1", N_("Settings (first part)"), ""},
+	{MNOTE_CANON_TAG_SETTINGS_2, "Settings2", N_("Settings (second part)"), ""},
+	{MNOTE_CANON_TAG_IMAGE_TYPE, "ImageType", N_("Image type"), ""},
+	{MNOTE_CANON_TAG_FIRMWARE, "FirmwareVersion", N_("Firmware version"), ""},
+	{MNOTE_CANON_TAG_IMAGE_NUMBER, "ImageNumber", N_("Image number"), ""},
+	{MNOTE_CANON_TAG_OWNER, "OwnerName", N_("Owner name"), ""},
+	{MNOTE_CANON_TAG_SERIAL_NUMBER, "SerialNumber", N_("Serial number"), ""},
+	{MNOTE_CANON_TAG_CUSTOM_FUNCS, "CustomFunctions", N_("Custom functions"), ""},
+	{0, NULL, NULL, NULL}
+};
+
+const char *
+mnote_canon_tag_get_name (MnoteCanonTag t)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].name));
+	return NULL;
+}
+
+const char *
+mnote_canon_tag_get_title (MnoteCanonTag t)
+{
+	unsigned int i;
+
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR); 
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].title));
+	return NULL;
+}
+
+const char *
+mnote_canon_tag_get_description (MnoteCanonTag t)
+{
+	unsigned int i;
+
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].description));
+	return NULL;
+}
diff --git a/src/libexif/canon/mnote-canon-tag.h b/src/libexif/canon/mnote-canon-tag.h
new file mode 100644
index 0000000..ce1a72e
--- /dev/null
+++ b/src/libexif/canon/mnote-canon-tag.h
@@ -0,0 +1,52 @@
+/* mnote-canon-tag.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_CANON_TAG_H__
+#define __MNOTE_CANON_TAG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+enum _MnoteCanonTag {
+	MNOTE_CANON_TAG_UNKNOWN_0	= 0x0,
+	MNOTE_CANON_TAG_SETTINGS_1	= 0x1,
+	MNOTE_CANON_TAG_UNKNOWN_3	= 0x3,
+	MNOTE_CANON_TAG_SETTINGS_2	= 0x4,
+	MNOTE_CANON_TAG_IMAGE_TYPE	= 0x6,
+	MNOTE_CANON_TAG_FIRMWARE	= 0x7,
+	MNOTE_CANON_TAG_IMAGE_NUMBER	= 0x8,
+	MNOTE_CANON_TAG_OWNER		= 0x9,
+	MNOTE_CANON_TAG_UNKNOWN_10	= 0xa,
+	MNOTE_CANON_TAG_SERIAL_NUMBER	= 0xc,
+	MNOTE_CANON_TAG_UNKNOWN_13	= 0xd,
+	MNOTE_CANON_TAG_CUSTOM_FUNCS	= 0xf
+};
+typedef enum _MnoteCanonTag MnoteCanonTag;
+
+const char *mnote_canon_tag_get_name        (MnoteCanonTag tag);
+const char *mnote_canon_tag_get_title       (MnoteCanonTag tag);
+const char *mnote_canon_tag_get_description (MnoteCanonTag tag);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MNOTE_CANON_TAG_H__ */
diff --git a/src/libexif/config.h b/src/libexif/config.h
new file mode 100644
index 0000000..54325e3
--- /dev/null
+++ b/src/libexif/config.h
@@ -0,0 +1,12 @@
+
+/* Define to 1 if translation of program messages to the user's native
+   language is requested. */
+/* #undef ENABLE_NLS */
+
+/* The gettext domain we're using */
+#define GETTEXT_PACKAGE "libexif-9"
+
+#ifdef WIN32
+#define snprintf _snprintf
+#endif
+
diff --git a/src/libexif/exif-byte-order.c b/src/libexif/exif-byte-order.c
new file mode 100644
index 0000000..1a4279f
--- /dev/null
+++ b/src/libexif/exif-byte-order.c
@@ -0,0 +1,39 @@
+/* exif-byte-order.c
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/i18n.h>
+
+#include <stdlib.h>
+
+const char *
+exif_byte_order_get_name (ExifByteOrder order)
+{
+	switch (order) {
+	case EXIF_BYTE_ORDER_MOTOROLA:
+		return (_("Motorola"));
+	case EXIF_BYTE_ORDER_INTEL:
+		return (_("Intel"));
+	default:
+		return NULL;
+	}
+}
diff --git a/src/libexif/exif-byte-order.h b/src/libexif/exif-byte-order.h
new file mode 100644
index 0000000..bd51d3c
--- /dev/null
+++ b/src/libexif/exif-byte-order.h
@@ -0,0 +1,39 @@
+/* exif-byte-order.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_BYTE_ORDER_H__
+#define __EXIF_BYTE_ORDER_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef enum {
+	EXIF_BYTE_ORDER_MOTOROLA,
+	EXIF_BYTE_ORDER_INTEL
+} ExifByteOrder;
+
+const char *exif_byte_order_get_name (ExifByteOrder order);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_BYTE_ORDER_H__ */
diff --git a/src/libexif/exif-content.c b/src/libexif/exif-content.c
new file mode 100644
index 0000000..39d73ad
--- /dev/null
+++ b/src/libexif/exif-content.c
@@ -0,0 +1,209 @@
+/* exif-content.c
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-content.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+static const unsigned char ExifHeader[] = {0x45, 0x78, 0x69, 0x66, 0x00, 0x00};
+
+struct _ExifContentPrivate
+{
+	unsigned int ref_count;
+
+	ExifMem *mem;
+	ExifLog *log;
+};
+
+ExifContent *
+exif_content_new (void)
+{
+	ExifMem *mem = exif_mem_new_default ();
+	ExifContent *content = exif_content_new_mem (mem);
+
+	exif_mem_unref (mem);
+
+	return content;
+}
+
+ExifContent *
+exif_content_new_mem (ExifMem *mem)
+{
+	ExifContent *content;
+
+	if (!mem) return NULL;
+
+	content = exif_mem_alloc (mem, (ExifLong) sizeof (ExifContent));
+	if (!content)
+		return NULL;
+	content->priv = exif_mem_alloc (mem,
+				(ExifLong) sizeof (ExifContentPrivate));
+	if (!content->priv) {
+		exif_mem_free (mem, content);
+		return NULL;
+	}
+
+	content->priv->ref_count = 1;
+
+	content->priv->mem = mem;
+	exif_mem_ref (mem);
+
+	return content;
+}
+
+void
+exif_content_ref (ExifContent *content)
+{
+	content->priv->ref_count++;
+}
+
+void
+exif_content_unref (ExifContent *content)
+{
+	content->priv->ref_count--;
+	if (!content->priv->ref_count)
+		exif_content_free (content);
+}
+
+void
+exif_content_free (ExifContent *content)
+{
+	ExifMem *mem = (content && content->priv) ? content->priv->mem : NULL;
+	unsigned int i;
+
+	if (!content) return;
+
+	for (i = 0; i < content->count; i++)
+		exif_entry_unref (content->entries[i]);
+	exif_mem_free (mem, content->entries);
+
+	if (content->priv) {
+		exif_log_unref (content->priv->log);
+	}
+
+	exif_mem_free (mem, content->priv);
+	exif_mem_free (mem, content);
+	exif_mem_unref (mem);
+}
+
+void
+exif_content_dump (ExifContent *content, unsigned int indent)
+{
+	char buf[1024];
+	unsigned int i;
+
+	for (i = 0; i < 2 * indent; i++)
+		buf[i] = ' ';
+	buf[i] = '\0';
+
+	if (!content)
+		return;
+
+	printf ("%sDumping exif content (%i entries)...\n", buf,
+		content->count);
+	for (i = 0; i < content->count; i++)
+		exif_entry_dump (content->entries[i], indent + 1);
+}
+
+void
+exif_content_add_entry (ExifContent *c, ExifEntry *entry)
+{
+	if (!c || !c->priv || !entry || entry->parent) return;
+
+	/* One tag can only be added once to an IFD. */
+	if (exif_content_get_entry (c, entry->tag)) {
+		exif_log (c->priv->log, EXIF_LOG_CODE_DEBUG, "ExifContent",
+			"An attempt has been made to add "
+			"the tag '%s' twice to an IFD. This is against "
+			"specification.", exif_tag_get_name (entry->tag));
+		return;
+	}
+
+	entry->parent = c;
+	c->entries = exif_mem_realloc (c->priv->mem,
+		c->entries, sizeof (ExifEntry) * (c->count + 1));
+	if (!c->entries) return;
+	c->entries[c->count] = entry;
+	exif_entry_ref (entry);
+	c->count++;
+}
+
+void
+exif_content_remove_entry (ExifContent *c, ExifEntry *e)
+{
+	unsigned int i;
+
+	if (!c || !c->priv || !e || (e->parent != c)) return;
+
+	/* Search the entry */
+	for (i = 0; i < c->count; i++) if (c->entries[i] == e) break;
+	if (i == c->count) return;
+
+	/* Remove the entry */
+	memmove (&c->entries[i], &c->entries[i + 1],
+		 sizeof (ExifEntry) * (c->count - i - 1));
+	c->count--;
+	e->parent = NULL;
+	exif_entry_unref (e);
+	c->entries = exif_mem_realloc (c->priv->mem, c->entries,
+					sizeof(ExifEntry) * c->count);
+}
+
+ExifEntry *
+exif_content_get_entry (ExifContent *content, ExifTag tag)
+{
+	unsigned int i;
+
+	if (!content)
+		return (NULL);
+
+	for (i = 0; i < content->count; i++)
+		if (content->entries[i]->tag == tag)
+			return (content->entries[i]);
+	return (NULL);
+}
+
+void
+exif_content_foreach_entry (ExifContent *content,
+			    ExifContentForeachEntryFunc func, void *data)
+{
+	unsigned int i;
+
+	if (!content || !func)
+		return;
+
+	for (i = 0; i < content->count; i++)
+		func (content->entries[i], data);
+}
+
+void
+exif_content_log (ExifContent *content, ExifLog *log)
+{
+	if (!content || !content->priv || !log || content->priv->log == log)
+		return;
+
+	if (content->priv->log) exif_log_unref (content->priv->log);
+	content->priv->log = log;
+	exif_log_ref (log);
+}
diff --git a/src/libexif/exif-content.h b/src/libexif/exif-content.h
new file mode 100644
index 0000000..115f8ed
--- /dev/null
+++ b/src/libexif/exif-content.h
@@ -0,0 +1,76 @@
+/* exif-content.h
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_CONTENT_H__
+#define __EXIF_CONTENT_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct _ExifContent        ExifContent;
+typedef struct _ExifContentPrivate ExifContentPrivate;
+
+#include <libexif/exif-tag.h>
+#include <libexif/exif-entry.h>
+#include <libexif/exif-data.h>
+#include <libexif/exif-log.h>
+#include <libexif/exif-mem.h>
+
+struct _ExifContent
+{
+        ExifEntry **entries;
+        unsigned int count;
+
+	/* Data containing this content */
+	ExifData *parent;
+
+	ExifContentPrivate *priv;
+};
+
+/* Lifecycle */
+ExifContent *exif_content_new     (void);
+ExifContent *exif_content_new_mem (ExifMem *);
+void         exif_content_ref     (ExifContent *content);
+void         exif_content_unref   (ExifContent *content);
+void         exif_content_free    (ExifContent *content);
+
+void         exif_content_add_entry     (ExifContent *content, ExifEntry *e);
+void         exif_content_remove_entry  (ExifContent *content, ExifEntry *e);
+ExifEntry   *exif_content_get_entry     (ExifContent *content, ExifTag tag);
+
+typedef void (* ExifContentForeachEntryFunc) (ExifEntry *, void *user_data);
+void         exif_content_foreach_entry (ExifContent *content,
+					 ExifContentForeachEntryFunc func,
+					 void *user_data);
+
+/* For your convenience */
+#define exif_content_get_value(c,t,v,m)					\
+	(exif_content_get_entry (c,t) ?					\
+	 exif_entry_get_value (exif_content_get_entry (c,t),v,m) : NULL)
+
+void exif_content_dump  (ExifContent *content, unsigned int indent);
+void exif_content_log   (ExifContent *content, ExifLog *log);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_CONTENT_H__ */
diff --git a/src/libexif/exif-data.c b/src/libexif/exif-data.c
new file mode 100644
index 0000000..92e5857
--- /dev/null
+++ b/src/libexif/exif-data.c
@@ -0,0 +1,1039 @@
+/* exif-data.c
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-mnote-data.h>
+#include <libexif/exif-data.h>
+#include <libexif/exif-ifd.h>
+#include <libexif/exif-mnote-data-priv.h>
+#include <libexif/exif-utils.h>
+#include <libexif/exif-loader.h>
+#include <libexif/exif-log.h>
+#include <libexif/i18n.h>
+
+#include <libexif/olympus/exif-mnote-data-olympus.h>
+#include <libexif/canon/exif-mnote-data-canon.h>
+#include <libexif/pentax/exif-mnote-data-pentax.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#undef MAX
+#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
+
+#if defined(__WATCOMC__) || defined(_MSC_VER)
+#      define strncasecmp strnicmp
+#endif
+
+#undef JPEG_MARKER_SOI
+#define JPEG_MARKER_SOI  0xd8
+#undef JPEG_MARKER_APP0
+#define JPEG_MARKER_APP0 0xe0
+#undef JPEG_MARKER_APP1
+#define JPEG_MARKER_APP1 0xe1
+
+static const unsigned char ExifHeader[] = {0x45, 0x78, 0x69, 0x66, 0x00, 0x00};
+
+struct _ExifDataPrivate
+{
+	ExifByteOrder order;
+
+	ExifMnoteData *md;
+
+	ExifLog *log;
+	ExifMem *mem;
+
+	unsigned int ref_count;
+
+	/* Temporarily used while loading data */
+	unsigned int offset_mnote;
+};
+
+static void *
+exif_data_alloc (ExifData *data, unsigned int i)
+{
+	void *d;
+
+	if (!data || !i) return NULL;
+
+	d = exif_mem_alloc (data->priv->mem, i);
+	if (d) return d;
+
+	EXIF_LOG_NO_MEMORY (data->priv->log, "ExifData", i);
+	return NULL;
+}
+
+ExifMnoteData *
+exif_data_get_mnote_data (ExifData *d)
+{
+	return (d && d->priv) ? d->priv->md : NULL;
+}
+
+ExifData *
+exif_data_new (void)
+{
+	ExifMem *mem = exif_mem_new_default ();
+	ExifData *d = exif_data_new_mem (mem);
+
+	exif_mem_unref (mem);
+
+	return d;
+}
+
+ExifData *
+exif_data_new_mem (ExifMem *mem)
+{
+	ExifData *data;
+	unsigned int i;
+
+	if (!mem) return NULL;
+
+	data = exif_mem_alloc (mem, sizeof (ExifData));
+	if (!data) return (NULL);
+	data->priv = exif_mem_alloc (mem, sizeof (ExifDataPrivate));
+	if (!data->priv) { exif_mem_free (mem, data); return (NULL); }
+	data->priv->ref_count = 1;
+
+	data->priv->mem = mem;
+	exif_mem_ref (mem);
+
+	for (i = 0; i < EXIF_IFD_COUNT; i++) {
+		data->ifd[i] = exif_content_new_mem (data->priv->mem);
+		if (!data->ifd[i]) {
+			exif_data_free (data);
+			return (NULL);
+		}
+		data->ifd[i]->parent = data;
+	}
+
+	return (data);
+}
+
+ExifData *
+exif_data_new_from_data (const unsigned char *data, unsigned int size)
+{
+	ExifData *edata;
+
+	edata = exif_data_new ();
+	exif_data_load_data (edata, data, size);
+	return (edata);
+}
+
+static void
+exif_data_load_data_entry (ExifData *data, ExifEntry *entry,
+			   const unsigned char *d,
+			   unsigned int size, unsigned int offset)
+{
+	unsigned int s, doff;
+
+	entry->tag        = exif_get_short (d + offset + 0, data->priv->order);
+	entry->format     = exif_get_short (d + offset + 2, data->priv->order);
+	entry->components = exif_get_long  (d + offset + 4, data->priv->order);
+
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+		  "Loading entry 0x%x ('%s')...", entry->tag,
+		  exif_tag_get_name (entry->tag));
+
+	/*
+	 * Size? If bigger than 4 bytes, the actual data is not
+	 * in the entry but somewhere else (offset).
+	 */
+	s = exif_format_get_size (entry->format) * entry->components;
+	if (!s)
+		return;
+	if (s > 4)
+		doff = exif_get_long (d + offset + 8, data->priv->order);
+	else
+		doff = offset + 8;
+
+	/* Sanity check */
+	if (size < doff + s)
+		return;
+
+	entry->data = exif_data_alloc (data, s);
+	if (entry->data) {
+		entry->size = s;
+		memcpy (entry->data, d + doff, s);
+	}
+
+	/* If this is the MakerNote, remember the offset */
+	if (entry->tag == EXIF_TAG_MAKER_NOTE) {
+		if (entry->size > 6) exif_log (data->priv->log,
+			  EXIF_LOG_CODE_DEBUG, "ExifData",
+		          "MakerNote found (%02x %02x %02x %02x "
+			  "%02x %02x %02x...).",
+			  entry->data[0], entry->data[1], entry->data[2],
+			  entry->data[3], entry->data[4], entry->data[5],
+			  entry->data[6]);
+		data->priv->offset_mnote = doff;
+	}
+
+	exif_entry_fix (entry);
+}
+
+static void
+exif_data_save_data_entry (ExifData *data, ExifEntry *e,
+			   unsigned char **d, unsigned int *ds,
+			   unsigned int offset)
+{
+	unsigned int doff, s;
+
+	if (!data || !data->priv) return;
+
+	/*
+	 * Each entry is 12 bytes long. The memory for the entry has
+	 * already been allocated.
+	 */
+	exif_set_short (*d + 6 + offset + 0,
+			data->priv->order, (ExifShort) e->tag);
+	exif_set_short (*d + 6 + offset + 2,
+			data->priv->order, (ExifShort) e->format);
+
+#ifndef EXIF_DONT_CHANGE_MAKER_NOTE
+	/* If this is the maker note tag, update it. */
+	if ((e->tag == EXIF_TAG_MAKER_NOTE) && data->priv->md) {
+		exif_mem_free (data->priv->mem, e->data);
+		e->data = NULL;
+		e->size = 0;
+		exif_mnote_data_set_offset (data->priv->md, *ds - 6);
+		exif_mnote_data_save (data->priv->md, &e->data, &e->size);
+		e->components = e->size;
+	}
+#endif
+
+	exif_set_long  (*d + 6 + offset + 4,
+			data->priv->order, e->components);
+
+	/*
+	 * Size? If bigger than 4 bytes, the actual data is not in
+	 * the entry but somewhere else.
+	 */
+	s = exif_format_get_size (e->format) * e->components;
+	if (s > 4) {
+		doff = *ds - 6;
+		*ds += s;
+
+		/*
+		 * According to the TIFF specification,
+		 * the offset must be an even number. If we need to introduce
+		 * a padding byte, we set it to 0.
+		 */
+		if (s & 1) (*ds)++;
+		*d = exif_mem_realloc (data->priv->mem, *d, *ds);
+		if (!*d) {
+			EXIF_LOG_NO_MEMORY (data->priv->log, "ExifData", *ds);
+		  	return;
+		}
+		exif_set_long (*d + 6 + offset + 8, data->priv->order, doff);
+		if (s & 1) *(*d + *ds - 1) = '\0';
+
+	} else
+		doff = offset + 8;
+
+	/* Write the data. Fill unneeded bytes with 0. */
+	memcpy (*d + 6 + doff, e->data, s);
+	if (s < 4) memset (*d + 6 + doff + s, 0, (4 - s));
+}
+
+static void
+exif_data_load_data_thumbnail (ExifData *data, const unsigned char *d,
+			       unsigned int ds, ExifLong offset, ExifLong size)
+{
+	if (ds < offset + size) {
+		exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+			  "Bogus thumbnail offset and size: %i < %i + %i.",
+			  (int) ds, (int) offset, (int) size);
+		return;
+	}
+	if (data->data) exif_mem_free (data->priv->mem, data->data);
+	data->size = size;
+	data->data = exif_data_alloc (data, data->size);
+	if (!data->data) return;
+	memcpy (data->data, d + offset, data->size);
+}
+
+#undef CHECK_REC
+#define CHECK_REC(i) 					\
+if (data->ifd[(i)] == ifd) {				\
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, \
+		"ExifData", "Recursive entry in IFD "	\
+		"'%s' detected. Skipping...",		\
+		exif_ifd_get_name (i));			\
+	break;						\
+}							\
+if (data->ifd[(i)]->count) {				\
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG,	\
+		"ExifData", "Attemt to load IFD "	\
+		"'%s' multiple times detected. "	\
+		"Skipping...",				\
+		exif_ifd_get_name (i));			\
+	break;						\
+}
+
+static void
+exif_data_load_data_content (ExifData *data, ExifContent *ifd,
+			     const unsigned char *d,
+			     unsigned int ds, unsigned int offset)
+{
+	ExifLong o, thumbnail_offset = 0, thumbnail_length = 0;
+	ExifShort n;
+	ExifEntry *entry;
+	unsigned int i;
+	ExifTag tag;
+
+	if (!data || !data->priv) return;
+
+	/* Read the number of entries */
+	if (offset >= ds - 1) return;
+	n = exif_get_short (d + offset, data->priv->order);
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+	          "Loading %i entries...", n);
+	offset += 2;
+
+	/* Check if we have enough data. */
+	if (offset + 12 * n > ds) n = (ds - offset) / 12;
+
+	for (i = 0; i < n; i++) {
+
+		tag = exif_get_short (d + offset + 12 * i, data->priv->order);
+		switch (tag) {
+		case EXIF_TAG_EXIF_IFD_POINTER:
+		case EXIF_TAG_GPS_INFO_IFD_POINTER:
+		case EXIF_TAG_INTEROPERABILITY_IFD_POINTER:
+		case EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH:
+		case EXIF_TAG_JPEG_INTERCHANGE_FORMAT:
+			o = exif_get_long (d + offset + 12 * i + 8,
+					   data->priv->order);
+			switch (tag) {
+			case EXIF_TAG_EXIF_IFD_POINTER:
+				CHECK_REC (EXIF_IFD_EXIF);
+				exif_data_load_data_content (data,
+					data->ifd[EXIF_IFD_EXIF], d, ds, o);
+				break;
+			case EXIF_TAG_GPS_INFO_IFD_POINTER:
+				CHECK_REC (EXIF_IFD_GPS);
+				exif_data_load_data_content (data,
+					data->ifd[EXIF_IFD_GPS], d, ds, o);
+				break;
+			case EXIF_TAG_INTEROPERABILITY_IFD_POINTER:
+				CHECK_REC (EXIF_IFD_INTEROPERABILITY);
+				exif_data_load_data_content (data,
+					data->ifd[EXIF_IFD_INTEROPERABILITY], d, ds, o);
+				break;
+			case EXIF_TAG_JPEG_INTERCHANGE_FORMAT:
+				thumbnail_offset = o;
+				if (thumbnail_offset && thumbnail_length)
+					exif_data_load_data_thumbnail (data, d,
+						ds, thumbnail_offset,
+						thumbnail_length);
+				break;
+			case EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH:
+				thumbnail_length = o;
+				if (thumbnail_offset && thumbnail_length)
+					exif_data_load_data_thumbnail (data, d,
+						ds, thumbnail_offset,
+						thumbnail_length);
+				break;
+			default:
+				return;
+			}
+			break;
+		default:
+
+			/*
+			 * If we don't know the tag, chances are high
+			 * that the EXIF data does not follow the standard.
+			 */
+			if (!exif_tag_get_name (tag)) {
+				exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+				  "Unknown tag %x (entry %i)", tag, i);
+				return;
+			}
+			entry = exif_entry_new_mem (data->priv->mem);
+			exif_data_load_data_entry (data, entry, d, ds,
+						   offset + 12 * i);
+			exif_content_add_entry (ifd, entry);
+			exif_entry_unref (entry);
+			break;
+		}
+	}
+}
+
+static int
+cmp_func (const unsigned char *p1, const unsigned char *p2, ExifByteOrder o)
+{
+	ExifShort tag1 = exif_get_short (p1, o);
+	ExifShort tag2 = exif_get_short (p2, o);
+
+	return (tag1 < tag2) ? -1 : (tag1 > tag2) ? 1 : 0;
+}
+
+static int
+cmp_func_intel (const void *elem1, const void *elem2)
+{
+	return cmp_func ((const unsigned char *) elem1,
+			(const unsigned char *) elem2, EXIF_BYTE_ORDER_INTEL);
+}
+
+static int
+cmp_func_motorola (const void *elem1, const void *elem2)
+{
+	return cmp_func ((const unsigned char *) elem1,
+			(const unsigned char *) elem2, EXIF_BYTE_ORDER_MOTOROLA);
+}
+
+static void
+exif_data_save_data_content (ExifData *data, ExifContent *ifd,
+			     unsigned char **d, unsigned int *ds,
+			     unsigned int offset)
+{
+	unsigned int j, n_ptr = 0, n_thumb = 0;
+	ExifIfd i;
+
+	if (!data || !data->priv || !ifd || !d || !ds) return;
+
+	for (i = 0; i < EXIF_IFD_COUNT; i++)
+		if (ifd == data->ifd[i])
+			break;
+	if (i == EXIF_IFD_COUNT)
+		return;
+
+	/*
+	 * Check if we need some extra entries for pointers or the thumbnail.
+	 */
+	switch (i) {
+	case EXIF_IFD_0:
+
+		/*
+		 * The pointer to IFD_EXIF is in IFD_0. The pointer to
+		 * IFD_INTEROPERABILITY is in IFD_EXIF.
+		 */
+		if (data->ifd[EXIF_IFD_EXIF]->count ||
+		    data->ifd[EXIF_IFD_INTEROPERABILITY]->count)
+			n_ptr++;
+
+		/* The pointer to IFD_GPS is in IFD_0. */
+		if (data->ifd[EXIF_IFD_GPS]->count)
+			n_ptr++;
+
+		break;
+	case EXIF_IFD_1:
+		if (data->size)
+			n_thumb = 2;
+		break;
+	case EXIF_IFD_EXIF:
+		if (data->ifd[EXIF_IFD_INTEROPERABILITY]->count)
+			n_ptr++;
+	default:
+		break;
+	}
+
+	/*
+	 * Allocate enough memory for all entries
+	 * and the number of entries.
+	 */
+	*ds += (2 + (ifd->count + n_ptr + n_thumb) * 12 + 4);
+	*d = exif_mem_realloc (data->priv->mem, *d, *ds);
+	if (!*d) {
+		EXIF_LOG_NO_MEMORY (data->priv->log, "ExifData", *ds);
+	  	return;
+	}
+
+	/* Save the number of entries */
+	exif_set_short (*d + 6 + offset, data->priv->order,
+			(ExifShort) (ifd->count + n_ptr + n_thumb));
+	offset += 2;
+
+	/* Save each entry */
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+		  "Saving %i entries (IFD '%s', offset: %i)...",
+		  ifd->count, exif_ifd_get_name (i), offset);
+	for (j = 0; j < ifd->count; j++)
+		exif_data_save_data_entry (data, ifd->entries[j], d, ds, offset + 12 * j);
+
+	offset += 12 * ifd->count;
+
+	/* Now save special entries. */
+	switch (i) {
+	case EXIF_IFD_0:
+
+		/*
+		 * The pointer to IFD_EXIF is in IFD_0.
+		 * However, the pointer to IFD_INTEROPERABILITY is in IFD_EXIF,
+		 * therefore, if IFD_INTEROPERABILITY is not empty, we need
+		 * IFD_EXIF even if latter is empty.
+		 */
+		if (data->ifd[EXIF_IFD_EXIF]->count ||
+		    data->ifd[EXIF_IFD_INTEROPERABILITY]->count) {
+			exif_set_short (*d + 6 + offset + 0, data->priv->order,
+					EXIF_TAG_EXIF_IFD_POINTER);
+			exif_set_short (*d + 6 + offset + 2, data->priv->order,
+					EXIF_FORMAT_LONG);
+			exif_set_long  (*d + 6 + offset + 4, data->priv->order,
+					1);
+			exif_set_long  (*d + 6 + offset + 8, data->priv->order,
+					*ds - 6);
+			exif_data_save_data_content (data,
+				data->ifd[EXIF_IFD_EXIF], d, ds, *ds - 6);
+			offset += 12;
+		}
+
+		/* The pointer to IFD_GPS is in IFD_0, too. */
+		if (data->ifd[EXIF_IFD_GPS]->count) {
+			exif_set_short (*d + 6 + offset + 0, data->priv->order,
+					EXIF_TAG_GPS_INFO_IFD_POINTER);
+			exif_set_short (*d + 6 + offset + 2, data->priv->order,
+					EXIF_FORMAT_LONG);
+			exif_set_long  (*d + 6 + offset + 4, data->priv->order,
+					1);
+			exif_set_long  (*d + 6 + offset + 8, data->priv->order,
+					*ds - 6);
+			exif_data_save_data_content (data,
+				data->ifd[EXIF_IFD_GPS], d, ds, *ds - 6);
+			offset += 12;
+		}
+
+		break;
+	case EXIF_IFD_EXIF:
+
+		/*
+		 * The pointer to IFD_INTEROPERABILITY is in IFD_EXIF.
+		 * See note above.
+		 */
+		if (data->ifd[EXIF_IFD_INTEROPERABILITY]->count) {
+			exif_set_short (*d + 6 + offset + 0, data->priv->order,
+					EXIF_TAG_INTEROPERABILITY_IFD_POINTER);
+			exif_set_short (*d + 6 + offset + 2, data->priv->order,
+					EXIF_FORMAT_LONG);
+			exif_set_long  (*d + 6 + offset + 4, data->priv->order,
+					1);
+			exif_set_long  (*d + 6 + offset + 8, data->priv->order,
+					*ds - 6);
+			exif_data_save_data_content (data,
+				data->ifd[EXIF_IFD_INTEROPERABILITY], d, ds,
+				*ds - 6);
+			offset += 12;
+		}
+
+		break;
+	case EXIF_IFD_1:
+
+		/*
+		 * Information about the thumbnail (if any) is saved in
+		 * IFD_1.
+		 */
+		if (data->size) {
+
+			/* EXIF_TAG_JPEG_INTERCHANGE_FORMAT */
+			exif_set_short (*d + 6 + offset + 0, data->priv->order,
+					EXIF_TAG_JPEG_INTERCHANGE_FORMAT);
+			exif_set_short (*d + 6 + offset + 2, data->priv->order,
+					EXIF_FORMAT_LONG);
+			exif_set_long  (*d + 6 + offset + 4, data->priv->order,
+					1);
+			exif_set_long  (*d + 6 + offset + 8, data->priv->order,
+					*ds - 6);
+			*ds += data->size;
+			*d = exif_mem_realloc (data->priv->mem, *d, *ds);
+			if (!*d) {
+				EXIF_LOG_NO_MEMORY (data->priv->log, "ExifData",
+						    *ds);
+			  	return;
+			}
+			memcpy (*d + *ds - data->size, data->data, data->size);
+			offset += 12;
+
+			/* EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH */
+			exif_set_short (*d + 6 + offset + 0, data->priv->order,
+				EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH);
+			exif_set_short (*d + 6 + offset + 2, data->priv->order,
+					EXIF_FORMAT_LONG);
+			exif_set_long  (*d + 6 + offset + 4, data->priv->order,
+					1);
+			exif_set_long  (*d + 6 + offset + 8, data->priv->order,
+					data->size);
+			offset += 12;
+		}
+
+		break;
+	default:
+		break;
+	}
+
+	/* Sort the directory according to TIFF specification */
+	qsort (*d + 6 + offset - (ifd->count + n_ptr + n_thumb) * 12,
+			(ifd->count + n_ptr + n_thumb), 12,
+			data->priv->order == EXIF_BYTE_ORDER_INTEL ? cmp_func_intel : cmp_func_motorola);
+
+	/* Correctly terminate the directory */
+	if (i == EXIF_IFD_0 && (data->ifd[EXIF_IFD_1]->count ||
+					     data->size)) {
+
+		/*
+		 * We are saving IFD 0. Tell where IFD 1 starts and save
+		 * IFD 1.
+		 */
+		exif_set_long (*d + 6 + offset, data->priv->order, *ds - 6);
+		exif_data_save_data_content (data, data->ifd[EXIF_IFD_1], d, ds,
+					     *ds - 6);
+	} else
+		exif_set_long (*d + 6 + offset, data->priv->order, 0);
+}
+
+typedef enum {
+	EXIF_DATA_TYPE_MAKER_NOTE_NONE		= 0,
+	EXIF_DATA_TYPE_MAKER_NOTE_CANON		= 1,
+	EXIF_DATA_TYPE_MAKER_NOTE_OLYMPUS	= 2,
+	EXIF_DATA_TYPE_MAKER_NOTE_PENTAX	= 3
+} ExifDataTypeMakerNote;
+
+static ExifDataTypeMakerNote
+exif_data_get_type_maker_note (ExifData *d)
+{
+	ExifEntry *e, *em;
+	char value[1024];
+
+	if (!d) return EXIF_DATA_TYPE_MAKER_NOTE_NONE;
+	
+	e = exif_data_get_entry (d, EXIF_TAG_MAKER_NOTE);
+        if (!e) return EXIF_DATA_TYPE_MAKER_NOTE_NONE;
+
+	/* Olympus & Nikon */
+	if ((e->size >= 5) && (!memcmp (e->data, "OLYMP", 5) ||
+			       !memcmp (e->data, "Nikon", 5)))
+		return EXIF_DATA_TYPE_MAKER_NOTE_OLYMPUS;
+
+	em = exif_data_get_entry (d, EXIF_TAG_MAKE);
+	if (!em) return EXIF_DATA_TYPE_MAKER_NOTE_NONE;
+
+	/* Canon */
+	if (!strcmp (exif_entry_get_value (em, value, sizeof (value)), "Canon"))
+		return EXIF_DATA_TYPE_MAKER_NOTE_CANON;
+
+	/* Pentax & some variant of Nikon */
+	if ((e->size >= 2) && (e->data[0] == 0x00) && (e->data[1] == 0x1b)) {
+		if (!strncasecmp (
+			exif_entry_get_value (em, value, sizeof(value)),
+					      "Nikon", 5))
+			return EXIF_DATA_TYPE_MAKER_NOTE_OLYMPUS;
+		else
+			return EXIF_DATA_TYPE_MAKER_NOTE_PENTAX;
+	}
+
+	return EXIF_DATA_TYPE_MAKER_NOTE_NONE;
+}
+
+#define LOG_TOO_SMALL \
+exif_log (data->priv->log, EXIF_LOG_CODE_CORRUPT_DATA, "ExifData", \
+		_("Size of data too small to allow for EXIF data."));
+
+void
+exif_data_load_data (ExifData *data, const unsigned char *d_orig,
+		     unsigned int ds_orig)
+{
+	unsigned int l;
+	ExifLong offset;
+	ExifShort n;
+	const unsigned char *d = d_orig;
+	unsigned int ds = ds_orig, len;
+
+	if (!data || !data->priv || !d || !ds) return;
+
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+		  "Parsing %i byte(s) EXIF data...\n", ds);
+
+	/*
+	 * It can be that the data starts with the EXIF header. If it does
+	 * not, search the EXIF marker.
+	 */
+	if (ds < 6) {
+		LOG_TOO_SMALL;
+		return;
+	}
+	if (!memcmp (d, ExifHeader, 6)) {
+		exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+			  "Found EXIF header.");
+	} else {
+		while (1) {
+			while ((d[0] == 0xff) && ds) {
+				d++;
+				ds--;
+			}
+
+			/* JPEG_MARKER_SOI */
+			if (d[0] == JPEG_MARKER_SOI) {
+				d++;
+				ds--;
+				continue;
+			}
+
+			/* JPEG_MARKER_APP0 */
+			if (d[0] == JPEG_MARKER_APP0) {
+				d++;
+				ds--;
+				l = (d[0] << 8) | d[1];
+				if (l > ds)
+					return;
+				d += l;
+				ds -= l;
+				continue;
+			}
+
+			/* JPEG_MARKER_APP1 */
+			if (d[0] == JPEG_MARKER_APP1)
+				break;
+
+			/* Unknown marker or data. Give up. */
+			exif_log (data->priv->log, EXIF_LOG_CODE_CORRUPT_DATA,
+				  "ExifData", _("EXIF marker not found."));
+			return;
+		}
+		d++;
+		ds--;
+		if (ds < 2) {
+			LOG_TOO_SMALL;
+			return;
+		}
+		len = (d[0] << 8) | d[1];
+		exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+			  "We have to deal with %i byte(s) of EXIF data.",
+			  len);
+		d += 2;
+		ds -= 2;
+	}
+
+	/*
+	 * Verify the exif header
+	 * (offset 2, length 6).
+	 */
+	if (ds < 6) {
+		LOG_TOO_SMALL;
+		return;
+	}
+	if (memcmp (d, ExifHeader, 6)) {
+		exif_log (data->priv->log, EXIF_LOG_CODE_CORRUPT_DATA,
+				"ExifData", _("EXIF header not found."));
+		return;
+	}
+
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+		  "Found EXIF header.");
+
+	/* Byte order (offset 6, length 2) */
+	if (ds < 14)
+		return;
+	if (!memcmp (d + 6, "II", 2))
+		data->priv->order = EXIF_BYTE_ORDER_INTEL;
+	else if (!memcmp (d + 6, "MM", 2))
+		data->priv->order = EXIF_BYTE_ORDER_MOTOROLA;
+	else {
+		exif_log (data->priv->log, EXIF_LOG_CODE_CORRUPT_DATA,
+				"ExifData", _("Unknown encoding."));
+		return;
+	}
+
+	/* Fixed value */
+	if (exif_get_short (d + 8, data->priv->order) != 0x002a)
+		return;
+
+	/* IFD 0 offset */
+	offset = exif_get_long (d + 10, data->priv->order);
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData", 
+		  "IFD 0 at %i.", (int) offset);
+
+	/* Parse the actual exif data (usually offset 14 from start) */
+	exif_data_load_data_content (data, data->ifd[EXIF_IFD_0], d + 6,
+				     ds - 6, offset);
+
+	/* IFD 1 offset */
+	if (offset + 6 + 2 > ds) {
+		return;
+	}
+	n = exif_get_short (d + 6 + offset, data->priv->order);
+	if (offset + 6 + 2 + 12 * n + 4 > ds) {
+		return;
+	}
+	offset = exif_get_long (d + 6 + offset + 2 + 12 * n, data->priv->order);
+	if (offset) {
+		exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+			  "IFD 1 at %i.", (int) offset);
+
+		/* Sanity check. */
+		if (offset > ds - 6) {
+			exif_log (data->priv->log, EXIF_LOG_CODE_CORRUPT_DATA,
+				  "ExifData", "Bogus offset.");
+			return;
+		}
+
+		exif_data_load_data_content (data, data->ifd[EXIF_IFD_1], d + 6,
+					     ds - 6, offset);
+	}
+
+	/*
+	 * If we got an EXIF_TAG_MAKER_NOTE, try to interpret it. Some
+	 * cameras use pointers in the maker note tag that point to the
+	 * space between IFDs. Here is the only place where we have access
+	 * to that data.
+	 */
+	switch (exif_data_get_type_maker_note (data)) {
+	case EXIF_DATA_TYPE_MAKER_NOTE_OLYMPUS:
+		data->priv->md = exif_mnote_data_olympus_new (data->priv->mem);
+		break;
+	case EXIF_DATA_TYPE_MAKER_NOTE_PENTAX:
+		data->priv->md = exif_mnote_data_pentax_new (data->priv->mem);
+		break;
+	case EXIF_DATA_TYPE_MAKER_NOTE_CANON:
+		data->priv->md = exif_mnote_data_canon_new (data->priv->mem);
+		break;
+	default:
+		break;
+	}
+
+	/* 
+	 * If we are able to interpret the maker note, do so.
+	 */
+	if (data->priv->md) {
+		exif_mnote_data_log (data->priv->md, data->priv->log);
+		exif_mnote_data_set_byte_order (data->priv->md,
+						data->priv->order);
+		exif_mnote_data_set_offset (data->priv->md,
+					    data->priv->offset_mnote);
+		exif_mnote_data_load (data->priv->md, d, ds);
+	}
+}
+
+void
+exif_data_save_data (ExifData *data, unsigned char **d, unsigned int *ds)
+{
+	if (!data || !d || !ds)
+		return;
+
+	/* Header */
+	*ds = 14;
+	*d = exif_data_alloc (data, *ds);
+	if (!*d) return;
+	memcpy (*d, ExifHeader, 6);
+
+	/* Order (offset 6) */
+	if (data->priv->order == EXIF_BYTE_ORDER_INTEL) {
+		memcpy (*d + 6, "II", 2);
+	} else {
+		memcpy (*d + 6, "MM", 2);
+	}
+
+	/* Fixed value (2 bytes, offset 8) */
+	exif_set_short (*d + 8, data->priv->order, 0x002a);
+
+	/*
+	 * IFD 0 offset (4 bytes, offset 10).
+	 * We will start 8 bytes after the
+	 * EXIF header (2 bytes for order, another 2 for the test, and
+	 * 4 bytes for the IFD 0 offset make 8 bytes together).
+	 */
+	exif_set_long (*d + 10, data->priv->order, 8);
+
+	/* Now save IFD 0. IFD 1 will be saved automatically. */
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+		  "Saving IFDs...");
+	exif_data_save_data_content (data, data->ifd[EXIF_IFD_0], d, ds,
+				     *ds - 6);
+	exif_log (data->priv->log, EXIF_LOG_CODE_DEBUG, "ExifData",
+		  "Saved %i byte(s) EXIF data.", *ds);
+}
+
+ExifData *
+exif_data_new_from_file (const char *path)
+{
+	ExifData *edata;
+	ExifLoader *loader;
+
+	loader = exif_loader_new ();
+	exif_loader_write_file (loader, path);
+	edata = exif_loader_get_data (loader);
+	exif_loader_unref (loader);
+
+	return (edata);
+}
+
+void
+exif_data_ref (ExifData *data)
+{
+	if (!data)
+		return;
+
+	data->priv->ref_count++;
+}
+
+void
+exif_data_unref (ExifData *data)
+{
+	if (!data) return;
+
+	data->priv->ref_count--;
+	if (!data->priv->ref_count) exif_data_free (data);
+}
+
+void
+exif_data_free (ExifData *data)
+{
+	unsigned int i;
+	ExifMem *mem = (data && data->priv) ? data->priv->mem : NULL;
+
+	if (!data) return;
+
+	for (i = 0; i < EXIF_IFD_COUNT; i++) {
+		if (data->ifd[i]) {
+			exif_content_unref (data->ifd[i]);
+			data->ifd[i] = NULL;
+		}
+	}
+
+	if (data->data) {
+		exif_mem_free (mem, data->data);
+		data->data = NULL;
+	}
+
+	if (data->priv) {
+		if (data->priv->log) {
+			exif_log_unref (data->priv->log);
+			data->priv->log = NULL;
+		}
+		if (data->priv->md) {
+			exif_mnote_data_unref (data->priv->md);
+			data->priv->md = NULL;
+		}
+		exif_mem_free (mem, data->priv);
+		exif_mem_free (mem, data);
+	}
+
+	exif_mem_unref (mem);
+}
+
+void
+exif_data_dump (ExifData *data)
+{
+	unsigned int i;
+
+	if (!data)
+		return;
+
+	for (i = 0; i < EXIF_IFD_COUNT; i++) {
+		if (data->ifd[i] && data->ifd[i]->count) {
+			printf ("Dumping IFD '%s'...\n",
+				exif_ifd_get_name (i));
+			exif_content_dump (data->ifd[i], 0);
+		}
+	}
+
+	if (data->data) {
+		printf ("%i byte(s) thumbnail data available.", data->size);
+		if (data->size >= 4) {
+			printf ("0x%02x 0x%02x ... 0x%02x 0x%02x\n",
+				data->data[0], data->data[1],
+				data->data[data->size - 2],
+				data->data[data->size - 1]);
+		}
+	}
+}
+
+ExifByteOrder
+exif_data_get_byte_order (ExifData *data)
+{
+	if (!data)
+		return (0);
+
+	return (data->priv->order);
+}
+
+void
+exif_data_foreach_content (ExifData *data, ExifDataForeachContentFunc func,
+			   void *user_data)
+{
+	unsigned int i;
+
+	if (!data || !func)
+		return;
+
+	for (i = 0; i < EXIF_IFD_COUNT; i++)
+		func (data->ifd[i], user_data);
+}
+
+typedef struct _ByteOrderChangeData ByteOrderChangeData;
+struct _ByteOrderChangeData {
+	ExifByteOrder old, new;
+};
+
+static void
+entry_set_byte_order (ExifEntry *e, void *data)
+{
+	ByteOrderChangeData *d = data;
+
+	if (!e)
+		return;
+
+	exif_array_set_byte_order (e->format, e->data, e->components, d->old, d->new);
+}
+
+static void
+content_set_byte_order (ExifContent *content, void *data)
+{
+	exif_content_foreach_entry (content, entry_set_byte_order, data);
+}
+
+void
+exif_data_set_byte_order (ExifData *data, ExifByteOrder order)
+{
+	ByteOrderChangeData d;
+
+	if (!data || (order == data->priv->order))
+		return;
+
+	d.old = data->priv->order;
+	d.new = order;
+	exif_data_foreach_content (data, content_set_byte_order, &d);
+	data->priv->order = order;
+	if (data->priv->md)
+		exif_mnote_data_set_byte_order (data->priv->md, order);
+}
+
+void
+exif_data_log (ExifData *data, ExifLog *log)
+{
+	unsigned int i;
+
+	if (!data || !data->priv) return;
+	exif_log_unref (data->priv->log);
+	data->priv->log = log;
+	exif_log_ref (log);
+
+	for (i = 0; i < EXIF_IFD_COUNT; i++)
+		exif_content_log (data->ifd[i], log);
+}
+
+/* Used internally within libexif */
+ExifLog *exif_data_get_log (ExifData *);
+ExifLog *
+exif_data_get_log (ExifData *data)
+{
+	if (!data || !data->priv) return NULL;
+	return data->priv->log;
+}
diff --git a/src/libexif/exif-data.h b/src/libexif/exif-data.h
new file mode 100644
index 0000000..9ecad1d
--- /dev/null
+++ b/src/libexif/exif-data.h
@@ -0,0 +1,104 @@
+/*! \file exif-data.h
+ * \brief FIXME foo bar blah
+ *
+ * \author Lutz M�ller <lutz@users.sourceforge.net>
+ * \date 2001-2005
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_DATA_H__
+#define __EXIF_DATA_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-ifd.h>
+#include <libexif/exif-log.h>
+#include <libexif/exif-tag.h>
+
+typedef struct _ExifData        ExifData;
+typedef struct _ExifDataPrivate ExifDataPrivate;
+
+#include <libexif/exif-content.h>
+#include <libexif/exif-mnote-data.h>
+#include <libexif/exif-mem.h>
+
+struct _ExifData
+{
+	ExifContent *ifd[EXIF_IFD_COUNT];
+
+	unsigned char *data;
+	unsigned int size;
+
+	ExifDataPrivate *priv;
+};
+
+ExifData *exif_data_new           (void);
+ExifData *exif_data_new_mem       (ExifMem *);
+
+/*! \brief load exif data from file
+ *  \param[in] path filename including path
+ *  
+ *  Foo bar blah bleh baz.
+ */
+ExifData *exif_data_new_from_file (const char *path);
+ExifData *exif_data_new_from_data (const unsigned char *data,
+				   unsigned int size);
+
+void      exif_data_load_data (ExifData *data, const unsigned char *d, 
+			       unsigned int size);
+void      exif_data_save_data (ExifData *data, unsigned char **d,
+			       unsigned int *size);
+
+void      exif_data_ref   (ExifData *data);
+void      exif_data_unref (ExifData *data);
+void      exif_data_free  (ExifData *data);
+
+ExifByteOrder exif_data_get_byte_order  (ExifData *data);
+void          exif_data_set_byte_order  (ExifData *data, ExifByteOrder order);
+
+ExifMnoteData *exif_data_get_mnote_data (ExifData *);
+
+typedef void (* ExifDataForeachContentFunc) (ExifContent *, void *user_data);
+void          exif_data_foreach_content (ExifData *data,
+					 ExifDataForeachContentFunc func,
+					 void *user_data);
+
+/* For debugging purposes and error reporting */
+void exif_data_dump (ExifData *data);
+void exif_data_log  (ExifData *data, ExifLog *log);
+
+/* For your convenience */
+#define exif_data_get_entry(d,t)					\
+	(exif_content_get_entry(d->ifd[EXIF_IFD_0],t) ?			\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_0],t) :			\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_1],t) ?			\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_1],t) :			\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_EXIF],t) ?		\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_EXIF],t) :		\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_GPS],t) ?		\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_GPS],t) :		\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_INTEROPERABILITY],t) ?	\
+	 exif_content_get_entry(d->ifd[EXIF_IFD_INTEROPERABILITY],t) : NULL)
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_DATA_H__ */
diff --git a/src/libexif/exif-entry.c b/src/libexif/exif-entry.c
new file mode 100644
index 0000000..09d37e2
--- /dev/null
+++ b/src/libexif/exif-entry.c
@@ -0,0 +1,1383 @@
+/* exif-entry.c
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-entry.h>
+#include <libexif/exif-ifd.h>
+#include <libexif/exif-utils.h>
+#include <libexif/i18n.h>
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+struct _ExifEntryPrivate
+{
+	unsigned int ref_count;
+
+	ExifMem *mem;
+};
+
+/* This function is hidden in exif-data.c */
+ExifLog *exif_data_get_log (ExifData *);
+
+static void
+exif_entry_log (ExifEntry *e, ExifLogCode code, const char *format, ...)
+{
+	va_list args;
+	ExifLog *l = NULL;
+
+	if (e && e->parent && e->parent->parent)
+		l = exif_data_get_log (e->parent->parent);
+	va_start (args, format);
+	exif_logv (l, code, "ExifEntry", format, args);
+	va_end (args);
+}
+
+static void *
+exif_entry_alloc (ExifEntry *e, unsigned int i)
+{
+	void *d;
+	ExifLog *l = NULL;
+
+	if (!e || !e->priv || !i) return NULL;
+
+	d = exif_mem_alloc (e->priv->mem, i);
+	if (d) return d;
+
+	if (e->parent && e->parent->parent)
+		l = exif_data_get_log (e->parent->parent);
+	EXIF_LOG_NO_MEMORY (l, "ExifEntry", i);
+	return NULL;
+}
+
+static void *
+exif_entry_realloc (ExifEntry *e, void *d_orig, unsigned int i)
+{
+	void *d;
+	ExifLog *l = NULL;
+
+	if (!e || !e->priv) return NULL;
+
+	if (!i) { exif_mem_free (e->priv->mem, d_orig); return NULL; }
+
+	d = exif_mem_realloc (e->priv->mem, d_orig, i);
+	if (d) return d;
+
+	if (e->parent && e->parent->parent)
+		l = exif_data_get_log (e->parent->parent);
+	EXIF_LOG_NO_MEMORY (l, "ExifEntry", i);
+	return NULL;
+}
+
+ExifEntry *
+exif_entry_new (void)
+{
+	ExifMem *mem = exif_mem_new_default ();
+	ExifEntry *e = exif_entry_new_mem (mem);
+
+	exif_mem_unref (mem);
+
+	return e;
+}
+
+ExifEntry *
+exif_entry_new_mem (ExifMem *mem)
+{
+	ExifEntry *e = NULL;
+
+	e = exif_mem_alloc (mem, sizeof (ExifEntry));
+	if (!e) return NULL;
+	e->priv = exif_mem_alloc (mem, sizeof (ExifEntryPrivate));
+	if (!e->priv) { exif_mem_free (mem, e); return NULL; }
+	e->priv->ref_count = 1;
+
+	e->priv->mem = mem;
+	exif_mem_ref (mem);
+
+	return e;
+}
+
+void
+exif_entry_ref (ExifEntry *e)
+{
+	if (!e) return;
+
+	e->priv->ref_count++;
+}
+
+void
+exif_entry_unref (ExifEntry *e)
+{
+	if (!e) return;
+
+	e->priv->ref_count--;
+	if (!e->priv->ref_count)
+		exif_entry_free (e);
+}
+
+void
+exif_entry_free (ExifEntry *e)
+{
+	if (!e) return;
+
+	if (e->priv) {
+		ExifMem *mem = e->priv->mem;
+		if (e->data)
+			exif_mem_free (mem, e->data);
+		exif_mem_free (mem, e->priv);
+		exif_mem_free (mem, e);
+		exif_mem_unref (mem);
+	}
+}
+
+void
+exif_entry_fix (ExifEntry *e)
+{
+	unsigned int i;
+	ExifByteOrder o;
+	ExifRational r;
+	ExifSRational sr;
+
+	if (!e || !e->priv) return;
+
+	switch (e->tag) {
+	
+	/* These tags all need to be of format SHORT. */
+	case EXIF_TAG_YCBCR_SUB_SAMPLING:
+	case EXIF_TAG_SUBJECT_AREA:
+	case EXIF_TAG_COLOR_SPACE:
+	case EXIF_TAG_PLANAR_CONFIGURATION:
+	case EXIF_TAG_SENSING_METHOD:
+	case EXIF_TAG_ORIENTATION:
+	case EXIF_TAG_YCBCR_POSITIONING:
+	case EXIF_TAG_PHOTOMETRIC_INTERPRETATION:
+	case EXIF_TAG_CUSTOM_RENDERED:
+	case EXIF_TAG_EXPOSURE_MODE:
+	case EXIF_TAG_WHITE_BALANCE:
+	case EXIF_TAG_SCENE_CAPTURE_TYPE:
+	case EXIF_TAG_GAIN_CONTROL:
+	case EXIF_TAG_SATURATION:
+	case EXIF_TAG_CONTRAST:
+	case EXIF_TAG_SHARPNESS:
+		switch (e->format) {
+		case EXIF_FORMAT_LONG:
+			if (!e->parent || !e->parent->parent) break;
+			o = exif_data_get_byte_order (e->parent->parent);
+			for (i = 0; i < e->components; i++)
+				exif_set_short (
+					e->data + i *
+					exif_format_get_size (
+					EXIF_FORMAT_SHORT), o,
+					(ExifShort) exif_get_long (
+					e->data + i *
+					exif_format_get_size (
+					EXIF_FORMAT_LONG), o));
+			e->format = EXIF_FORMAT_SHORT;
+			e->size = e->components *
+				exif_format_get_size (e->format);
+			e->data = exif_entry_realloc (e, e->data, e->size);
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag '%s' was of format '%s' (which is "
+				"against specification) and has been "
+				"changed to format '%s'.",
+				exif_tag_get_name (e->tag), 
+				exif_format_get_name (EXIF_FORMAT_LONG),
+				exif_format_get_name (EXIF_FORMAT_SHORT));
+			break;
+		case EXIF_FORMAT_SHORT:
+		default:
+			break;
+		}
+		break;
+
+	/* All these tags need to be of format 'Rational'. */
+	case EXIF_TAG_FNUMBER:
+	case EXIF_TAG_APERTURE_VALUE:
+	case EXIF_TAG_EXPOSURE_TIME:
+	case EXIF_TAG_FOCAL_LENGTH:
+		switch (e->format) {
+		case EXIF_FORMAT_SRATIONAL:
+			if (!e->parent || !e->parent->parent) break;
+			o = exif_data_get_byte_order (e->parent->parent);
+			for (i = 0; i < e->components; i++) {
+				sr = exif_get_srational (e->data + i * 
+					exif_format_get_size (
+						EXIF_FORMAT_SRATIONAL), o);
+				r.numerator = (ExifLong) sr.numerator;
+				r.denominator = (ExifLong) sr.denominator;
+				exif_set_rational (e->data + i *
+					exif_format_get_size (
+						EXIF_FORMAT_RATIONAL), o, r);
+			}
+			e->format = EXIF_FORMAT_RATIONAL;
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag '%s' was of format '%s' (which is "
+				"against specification) and has been "
+				"changed to format '%s'.",
+				exif_tag_get_name (e->tag),
+				exif_format_get_name (EXIF_FORMAT_SRATIONAL),
+				exif_format_get_name (EXIF_FORMAT_RATIONAL));
+			break;
+		default:
+			break;
+		}
+		break;
+
+	case EXIF_TAG_USER_COMMENT:
+
+		/* Format needs to be UNDEFINED. */
+		if (e->format != EXIF_FORMAT_UNDEFINED) {
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag 'UserComment' had invalid format '%s'. "
+				"Format has been set to 'undefined'.",
+				exif_format_get_name (e->format));
+			e->format = EXIF_FORMAT_UNDEFINED;
+		}
+		/* Some packages like Canon ZoomBrowser EX 4.5 store
+		   only one zero byte followed by 7 bytes of rubbish */
+		if ((e->size >= 8) && (e->data[0] == 0)) {
+			memcpy(e->data, "\0\0\0\0\0\0\0\0", 8);
+		}
+
+		/* Some cameras fill the tag with '\0' or ' '. */
+		for (i = 0; i < e->size &&
+			    (!e->data[i] || (e->data[i] == ' ')); i++);
+		if (i && (i == e->size)) {
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag 'UserComment' contained unnecessary "
+				"data which has been removed.");
+			exif_mem_free (e->priv->mem, e->data);
+			e->data = NULL;
+			e->size = 0;
+			e->components = 0;
+		}
+
+		/* There need to be at least 8 bytes. */
+		if (e->size < 8) {
+			e->data = exif_entry_realloc (e, e->data, 8 + e->size);
+			if (!e->data) {
+				e->size = 0;
+				e->components = 0;
+				return;
+			}
+
+			/* Assume ASCII */
+			memmove (e->data + 8, e->data, e->size);
+			memcpy (e->data, "ASCII\0\0\0", 8);
+			e->size += 8;
+			e->components += 8;
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag 'UserComment' has been expanded to at "
+				"least 8 bytes in order to follow the "
+				"specification.");
+			break;
+		}
+
+		/*
+		 * If the first 8 bytes are empty and real data starts
+		 * afterwards, let's assume ASCII and claim the 8 first
+		 * bytes for the format specifyer.
+		 */
+		if (i >= 8) {
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag 'UserComment' did not start with "
+				"format identifyer. This has been fixed.");
+			memcpy (e->data, "ASCII\0\0\0", 8);
+		}
+
+		/* First 8 bytes need to follow the specification. */
+		if (memcmp (e->data, "ASCII\0\0\0"     , 8) &&
+		    memcmp (e->data, "UNICODE\0"       , 8) &&
+		    memcmp (e->data, "JIS\0\0\0\0\0"   , 8) &&
+		    memcmp (e->data, "\0\0\0\0\0\0\0\0", 8)) {
+			e->data = exif_entry_realloc (e, e->data, 8 + e->size);
+			if (!e->data) {
+				e->size = 0;
+				e->components = 0;
+				break;
+			}
+
+			/* Assume ASCII */
+			memmove (e->data + 8, e->data, e->size);
+			memcpy (e->data, "ASCII\0\0\0", 8);
+			e->size += 8;
+			e->components += 8;
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Tag 'UserComment' did not start with "
+				"format identifyer. This has been fixed.");
+			break;
+		}
+
+		break;
+	default:
+		break;
+	}
+}
+
+void
+exif_entry_dump (ExifEntry *e, unsigned int indent)
+{
+	char buf[1024];
+	char value[1024];
+	unsigned int i;
+
+	for (i = 0; i < 2 * indent; i++)
+		buf[i] = ' ';
+	buf[i] = '\0';
+
+	if (!e)
+		return;
+
+	printf ("%sTag: 0x%x ('%s')\n", buf, e->tag,
+		exif_tag_get_name (e->tag));
+	printf ("%s  Format: %i ('%s')\n", buf, e->format,
+		exif_format_get_name (e->format));
+	printf ("%s  Components: %i\n", buf, (int) e->components);
+	printf ("%s  Size: %i\n", buf, e->size);
+	printf ("%s  Value: %s\n", buf, exif_entry_get_value (e, value, sizeof(value)));
+}
+
+#define CF(entry,target,v,maxlen)					\
+{									\
+	if (entry->format != target) {					\
+		exif_entry_log (entry, EXIF_LOG_CODE_CORRUPT_DATA,	\
+			_("The tag '%s' contains data of an invalid "	\
+			"format ('%s', expected '%s')."),		\
+			exif_tag_get_name (entry->tag),			\
+			exif_format_get_name (entry->format),		\
+			exif_format_get_name (target));			\
+		break;							\
+	}								\
+}
+
+#define CC(entry,target,v,maxlen)					\
+{									\
+	if (entry->components != target) {				\
+		exif_entry_log (entry, EXIF_LOG_CODE_CORRUPT_DATA,	\
+			_("The tag '%s' contains an invalid number of "	\
+			  "components (%i, expected %i)."),		\
+			exif_tag_get_name (entry->tag),		\
+			(int) entry->components, (int) target);		\
+		break;							\
+	}								\
+}
+
+static struct {
+	ExifTag tag;
+	const char *strings[10];
+} list[] = {
+  { EXIF_TAG_PLANAR_CONFIGURATION,
+    { N_("chunky format"), N_("planar format"), NULL}},
+  { EXIF_TAG_SENSING_METHOD,
+    { "", N_("Not defined"), N_("One-chip color area sensor"),
+      N_("Two-chip color area sensor"), N_("Three-chip color area sensor"),
+      N_("Color sequential area sensor"), "", N_("Trilinear sensor"),
+      N_("Color sequential linear sensor"), NULL}},
+  { EXIF_TAG_ORIENTATION,
+    { "", N_("top - left"), N_("top - right"), N_("bottom - right"),
+      N_("bottom - left"), N_("left - top"), N_("right - top"),
+      N_("right - bottom"), N_("left - bottom"), NULL}},
+  { EXIF_TAG_YCBCR_POSITIONING,
+    { "", N_("centered"), N_("co-sited"), NULL}},
+  { EXIF_TAG_PHOTOMETRIC_INTERPRETATION, {"", N_("RGB"), N_("YCbCr"), NULL}},
+  { EXIF_TAG_CUSTOM_RENDERED,
+    { N_("Normal process"), N_("Custom process"), NULL}},
+  { EXIF_TAG_EXPOSURE_MODE,
+    { N_("Auto exposure"), N_("Manual exposure"), N_("Auto bracket"), NULL}},
+  { EXIF_TAG_WHITE_BALANCE,
+    { N_("Auto white balance"), N_("Manual white balance"), NULL}},
+  { EXIF_TAG_SCENE_CAPTURE_TYPE,
+    { N_("Standard"), N_("Landscape"), N_("Portrait"),
+      N_("Night scene"), NULL}},
+  { EXIF_TAG_GAIN_CONTROL,
+    { N_("Normal"), N_("Low gain up"), N_("High gain up"),
+      N_("Low gain down"), N_("High gain down"), NULL}},
+  { EXIF_TAG_SATURATION,
+    { N_("Normal"), N_("Low saturation"), N_("High saturation"), NULL}},
+  { EXIF_TAG_CONTRAST , {N_("Normal"), N_("Soft"), N_("Hard"), NULL}},
+  { EXIF_TAG_SHARPNESS, {N_("Normal"), N_("Soft"), N_("Hard"), NULL}},
+  { 0, {NULL}}
+};
+
+static struct {
+  ExifTag tag;
+  struct {
+    int index;
+    const char *values[4];
+  } elem[25];
+} list2[] = {
+  { EXIF_TAG_METERING_MODE,
+    { {  0, {N_("Unknown"), NULL}},
+      {  1, {N_("Average"), N_("avg"), NULL}},
+      {  2, {N_("Center-Weighted Average"), N_("Center-Weight"), NULL}},
+      {  3, {N_("Spot"), NULL}},
+      {  4, {N_("Multi Spot"), NULL}},
+      {  5, {N_("Pattern"), NULL}},
+      {  6, {N_("Partial"), NULL}},
+      {255, {N_("Other"), NULL}},
+      {  0, {NULL}}}},
+  { EXIF_TAG_COMPRESSION,
+    { {1, {N_("Uncompressed"), NULL}},
+      {5, {N_("LZW compression"), NULL}},
+      {6, {N_("JPEG compression"), NULL}},
+      {0, {NULL}}}},
+  { EXIF_TAG_LIGHT_SOURCE,
+    { {  0, {N_("Unknown"), NULL}},
+      {  1, {N_("Daylight"), NULL}},
+      {  2, {N_("Fluorescent"), NULL}},
+      {  3, {N_("Tungsten incandescent light"), N_("Tungsten"), NULL}},
+      {  4, {N_("Flash"), NULL}},
+      {  9, {N_("Fine weather"), NULL}},
+      { 10, {N_("Cloudy weather"), N_("cloudy"), NULL}},
+      { 11, {N_("Shade"), NULL}},
+      { 12, {N_("Daylight fluorescent"), NULL}},
+      { 13, {N_("Day white fluorescent"), NULL}},
+      { 14, {N_("Cool white fluorescent"), NULL}},
+      { 15, {N_("White fluorescent"), NULL}},
+      { 17, {N_("Standard light A"), NULL}},
+      { 18, {N_("Standard light B"), NULL}},
+      { 19, {N_("Standard light C"), NULL}},
+      { 20, {N_("D55"), NULL}},
+      { 21, {N_("D65"), NULL}},
+      { 22, {N_("D75"), NULL}},
+      { 24, {N_("ISO studio tungsten"),NULL}},
+      {255, {N_("Other"), NULL}},
+      {  0, {NULL}}}},
+  { EXIF_TAG_FOCAL_PLANE_RESOLUTION_UNIT,
+    { {2, {N_("Inch"), N_("in"), NULL}},
+      {3, {N_("Centimeter"), N_("cm"), NULL}},
+      {0, {NULL}}}},
+  { EXIF_TAG_RESOLUTION_UNIT,
+    { {2, {N_("Inch"), N_("in"), NULL}},
+      {3, {N_("Centimeter"), N_("cm"), NULL}}, 
+      {0, {NULL}}}},
+  { EXIF_TAG_EXPOSURE_PROGRAM,
+    { {0, {N_("Not defined"), NULL}},
+      {1, {N_("Manual"), NULL}},
+      {2, {N_("Normal program"), N_("Normal"), NULL}},
+      {3, {N_("Aperture priority"), N_("Aperture"), NULL}},
+      {4, {N_("Shutter priority"),N_("Shutter"), NULL}},
+      {5, {N_("Creative program (biased toward depth of field)"),
+	   N_("Creative"), NULL}},
+      {6, {N_("Creative program (biased toward fast shutter speed"),
+	   N_("Action"), NULL}},
+      {7, {N_("Portrait mode (for closeup photos with the background out "
+	      "of focus)"), N_("Portrait"), NULL}},
+      {8, {N_("Landscape mode (for landscape photos with the background "
+	      "in focus)"), N_("Landscape"), NULL}},
+      {0, {NULL}}}},
+  { EXIF_TAG_FLASH,
+    { {0x0000, {N_("Flash did not fire."), N_("no flash"), NULL}},
+      {0x0001, {N_("Flash fired."), N_("flash"), N_("Yes"), NULL}},
+      {0x0005, {N_("Strobe return light not detected."), N_("W/o strobe"),
+		NULL}},
+      {0x0007, {N_("Strobe return light detected."), N_("W. strobe"), NULL}},
+      {0x0009, {N_("Flash fired, compulsatory flash mode"), NULL}},
+      {0x000d, {N_("Flash fired, compulsatory flash mode, return light "
+		   "not detected."), NULL}},
+      {0x000f, {N_("Flash fired, compulsatory flash mode, return light "
+		   "detected."), NULL}},
+      {0x0010, {N_("Flash did not fire, compulsatory flash mode."), NULL}},
+      {0x0018, {N_("Flash did not fire, auto mode."), NULL}},
+      {0x0019, {N_("Flash fired, auto mode."), NULL}},
+      {0x001d, {N_("Flash fired, auto mode, return light not detected."),
+		NULL}},
+      {0x001f, {N_("Flash fired, auto mode, return light detected."), NULL}},
+      {0x0020, {N_("No flash function."),NULL}},
+      {0x0041, {N_("Flash fired, red-eye reduction mode."), NULL}},
+      {0x0045, {N_("Flash fired, red-eye reduction mode, return light "
+		   "not detected."), NULL}},
+      {0x0047, {N_("Flash fired, red-eye reduction mode, return light "
+		   "detected."), NULL}},
+      {0x0049, {N_("Flash fired, compulsory flash mode, red-eye reduction "
+		   "mode."), NULL}},
+      {0x004d, {N_("Flash fired, compulsory flash mode, red-eye reduction "
+		  "mode, return light not detected"), NULL}},
+      {0x004f, {N_("Flash fired, compulsory flash mode, red-eye reduction, "
+		   "return light detected"), NULL}},
+      {0x0058, {N_("Flash did not fire, auto mode, red-eye reduction mode"), NULL}},
+      {0x0059, {N_("Flash fired, auto mode, red-eye reduction mode"), NULL}},
+      {0x005d, {N_("Flash fired, auto mode, return light not detected, "
+		   "red-eye reduction mode."), NULL}},
+      {0x005f, {N_("Flash fired, auto mode, return light detected, "
+		   "red-eye reduction mode."), NULL}},
+      {0x0000, {NULL}}}},
+  {EXIF_TAG_SUBJECT_DISTANCE_RANGE, 
+    { {0, {N_("Unknown"), N_("?"), NULL}},
+      {1, {N_("Macro"), NULL}},
+      {2, {N_("Close view"), N_("close"), NULL}},
+      {3, {N_("Distant view"), N_("distant"), NULL}},
+      {0, {NULL}}}},
+  { EXIF_TAG_COLOR_SPACE,
+    { {1, {N_("sRGB"), NULL}},
+      {0xffff, {N_("Uncalibrated"), NULL}}}},
+  {0, }
+};
+
+const char *
+exif_entry_get_value (ExifEntry *e, char *val, unsigned int maxlen)
+{
+	unsigned int i, j, k, l, ts;
+	const unsigned char *t;
+	ExifByte v_byte;
+	ExifShort v_short, v_short2, v_short3, v_short4;
+	ExifLong v_long;
+	ExifSLong v_slong;
+	ExifRational v_rat;
+	ExifSRational v_srat;
+	char b[64];
+	const char *c;
+	ExifByteOrder o;
+	double d;
+	ExifEntry *entry;
+	static struct {
+		char *label;
+		char major, minor;
+	} versions[] = {
+		{"0110", 1,  1},
+		{"0120", 1,  2},
+		{"0200", 2,  0},
+		{"0210", 2,  1},
+		{"0220", 2,  2},
+		{"0221", 2, 21},
+		{NULL  , 0,  0}
+	};
+
+	/* FIXME: This belongs to somewhere else. */
+	bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+
+	/* We need the byte order */
+	if (!e || !e->parent || !e->parent->parent)
+		return val;
+	o = exif_data_get_byte_order (e->parent->parent);
+
+	memset (val, 0, maxlen);
+	memset (b, 0, sizeof (b));
+	maxlen--;
+
+	/* Sanity check */
+	if (e->size != e->components * exif_format_get_size (e->format)) {
+		snprintf (val, maxlen, _("Invalid size of entry (%i, "
+			"expected %li x %i)."), e->size, e->components,
+				exif_format_get_size (e->format));
+		return val;
+	}
+
+	switch (e->tag) {
+	case EXIF_TAG_USER_COMMENT:
+
+		/*
+		 * The specification says UNDEFINED, but some
+		 * manufacturers don't care and use ASCII. If this is the
+		 * case here, only refuse to read it if there is no chance
+		 * of finding readable data.
+		 */
+		if ((e->format != EXIF_FORMAT_ASCII) || 
+		    (e->size <= 8) ||
+		    ( memcmp (e->data, "ASCII\0\0\0"  , 8) &&
+		      memcmp (e->data, "UNICODE\0"    , 8) &&
+		      memcmp (e->data, "JIS\0\0\0\0\0", 8) &&
+		      memcmp (e->data, "\0\0\0\0\0\0\0\0", 8)))
+			CF (e, EXIF_FORMAT_UNDEFINED, val, maxlen);
+
+		/*
+		 * Note that, according to the specification (V2.1, p 40),
+		 * the user comment field does not have to be 
+		 * NULL terminated.
+		 */
+		if ((e->size >= 8) && !memcmp (e->data, "ASCII\0\0\0", 8)) {
+			strncpy (val, (char *) e->data + 8, MIN (e->size - 8, maxlen));
+			break;
+		}
+		if ((e->size >= 8) && !memcmp (e->data, "UNICODE\0", 8)) {
+			strncpy (val, _("Unsupported UNICODE string"), maxlen);
+			break;
+		}
+		if ((e->size >= 8) && !memcmp (e->data, "JIS\0\0\0\0\0", 8)) {
+			strncpy (val, _("Unsupported JIS string"), maxlen);
+			break;
+		}
+
+		/* Check if there is really some information in the tag. */
+		for (i = 0; (i < e->size) &&
+			    (!e->data[i] || (e->data[i] == ' ')); i++);
+		if (i == e->size) break;
+
+		/*
+		 * If we reach this point, the tag does not
+		 * comply with the standard and seems to contain data.
+		 * Print as much as possible.
+		 */
+		exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+			"Tag UserComment does not comply "
+			"with standard but contains data.");
+		for (; (i < e->size)  && (strlen (val) < maxlen - 1); i++) {
+			exif_entry_log (e, EXIF_LOG_CODE_DEBUG,
+				"Byte at position %i: 0x%02x", i, e->data[i]);
+			val[strlen (val)] =
+				isprint (e->data[i]) ? e->data[i] : '.';
+		}
+		break;
+
+	case EXIF_TAG_EXIF_VERSION:
+		CF (e, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (e, 4, val, maxlen);
+		strncpy (val, _("Unknown Exif Version"), maxlen);
+		for (i = 0; versions[i].label; i++) {
+			if (!memcmp (e->data, versions[i].label, 4)) {
+    				snprintf (val, maxlen,
+					_("Exif Version %d.%d"),
+					versions[i].major,
+					versions[i].minor);
+    				break;
+			}
+		}
+		break;
+	case EXIF_TAG_FLASH_PIX_VERSION:
+		CF (e, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (e, 4, val, maxlen);
+		if (!memcmp (e->data, "0100", 4))
+			strncpy (val, _("FlashPix Version 1.0"), maxlen);
+		else if (!memcmp (e->data, "0101", 4))
+			strncpy (val, _("FlashPix Version 1.01"), maxlen);
+		else
+			strncpy (val, _("Unknown FlashPix Version"), maxlen);
+		break;
+	case EXIF_TAG_COPYRIGHT:
+		CF (e, EXIF_FORMAT_ASCII, val, maxlen);
+
+		/*
+		 * First part: Photographer.
+		 * Some cameras store a string like "   " here. Ignore it.
+		 */
+		if (e->size && e->data &&
+		    (strspn (e->data, " ") != strlen ((char *) e->data)))
+			strncpy (val, (char *) e->data, MIN (maxlen, e->size));
+		else
+			strncpy (val, _("[None]"), maxlen);
+		strncat (val, " ", maxlen - strlen (val));
+		strncat (val, _("(Photographer)"), maxlen - strlen (val));
+
+		/* Second part: Editor. */
+		t = e->data + strlen ((char *) e->data) + 1;
+		ts = e->data + e->size - t;
+		strncat (val, " - ", maxlen - strlen (val));
+		if (e->size && e->data && (ts > 0) &&
+		    (strspn (t, " ") != ts))
+			strncat (val, t, MIN (maxlen - strlen (val), ts));
+		else
+			strncat (val, _("[None]"), maxlen - strlen (val));
+		strncat (val, " ", maxlen - strlen (val));
+		strncat (val, _("(Editor)"), maxlen - strlen (val));
+
+		break;
+	case EXIF_TAG_FNUMBER:
+		CF (e, EXIF_FORMAT_RATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_rat = exif_get_rational (e->data, o);
+		if (!v_rat.denominator) return val;
+		snprintf (val, maxlen, "f/%.01f", (float) v_rat.numerator /
+						    (float) v_rat.denominator);
+		break;
+	case EXIF_TAG_APERTURE_VALUE:
+		CF (e, EXIF_FORMAT_RATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_rat = exif_get_rational (e->data, o);
+		if (!v_rat.denominator) return val;
+		snprintf (val, maxlen, "f/%.01f",
+			  pow (2 , ((float) v_rat.numerator /
+				    (float) v_rat.denominator) / 2.));
+		break;
+	case EXIF_TAG_FOCAL_LENGTH:
+		CF (e, EXIF_FORMAT_RATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_rat = exif_get_rational (e->data, o);
+		if (!v_rat.denominator) return val;
+
+		/*
+		 * For calculation of the 35mm equivalent,
+		 * Minolta cameras need a multiplier that depends on the
+		 * camera model.
+		 */
+		d = 0.;
+		entry = exif_content_get_entry (
+			e->parent->parent->ifd[EXIF_IFD_0], EXIF_TAG_MAKE);
+		if (entry && entry->data &&
+		    !strncmp (entry->data, "Minolta", 7)) {
+			entry = exif_content_get_entry (
+					e->parent->parent->ifd[EXIF_IFD_0],
+					EXIF_TAG_MODEL);
+			if (entry && entry->data) {
+				if (!strncmp (entry->data, "DiMAGE 7", 8))
+					d = 3.9;
+				else if (!strncmp (entry->data, "DiMAGE 5", 8))
+					d = 4.9;
+			}
+		}
+		if (d)
+			snprintf (b, sizeof (b), _(" (35 equivalent: %d mm)"),
+				  (int) (d * (double) v_rat.numerator /
+				  	     (double) v_rat.denominator));
+
+		snprintf (val, maxlen, "%.1f mm",
+			(float) v_rat.numerator / (float) v_rat.denominator);
+		if (maxlen > strlen (val) + strlen (b))
+			strncat (val, b, maxlen - strlen (val) - 1);
+		break;
+	case EXIF_TAG_SUBJECT_DISTANCE:
+		CF (e, EXIF_FORMAT_RATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_rat = exif_get_rational (e->data, o);
+		if (!v_rat.denominator) return val;
+		snprintf (val, maxlen, "%.1f m", (float) v_rat.numerator /
+						   (float) v_rat.denominator);
+		break;
+	case EXIF_TAG_EXPOSURE_TIME:
+		CF (e, EXIF_FORMAT_RATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_rat = exif_get_rational (e->data, o);
+		if (!v_rat.denominator) return val;
+		d = (double) v_rat.numerator / (double) v_rat.denominator;
+		if (d < 1)
+			snprintf (val, maxlen, _("1/%d"),
+				  (int) (1. / d));
+		else
+			snprintf (val, maxlen, _("%d"), (int) d);
+		if (maxlen > strlen (val) + strlen (_(" sec.")))
+			strncat (val, _(" sec."), maxlen - strlen (val) - 1);
+		break;
+	case EXIF_TAG_SHUTTER_SPEED_VALUE:
+		CF (e, EXIF_FORMAT_SRATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_srat = exif_get_srational (e->data, o);
+		if (!v_srat.denominator) return val;
+		snprintf (val, maxlen, "%.0f/%.0f", (float) v_srat.numerator,
+			  (float) v_srat.denominator);
+		if (maxlen > strlen (val) + strlen (_(" sec.")))
+			strncat (val, _(" sec."), maxlen - strlen (val) - 1);
+		snprintf (b, sizeof (b), " (APEX: %i)",
+			(int) pow (sqrt(2), (float) v_srat.numerator /
+				            (float) v_srat.denominator));
+		if (maxlen > strlen (val) + strlen (b))
+			strncat (val, b, maxlen - strlen (val) - 1);
+		break;
+	case EXIF_TAG_BRIGHTNESS_VALUE:
+		CF (e, EXIF_FORMAT_SRATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_srat = exif_get_srational (e->data, o);
+		snprintf (val, maxlen, "%i/%i", (int) v_srat.numerator,
+						  (int) v_srat.denominator);
+		/* FIXME: How do I calculate the APEX value? */
+		break;
+	case EXIF_TAG_FILE_SOURCE:
+		CF (e, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (e, 1, val, maxlen);
+		switch (e->data[0]) {
+		case 0x03: strncpy (val, _("DSC"), maxlen); break;
+		default: snprintf (val, maxlen, "0x%02x", e->data[0]); break;
+		}
+		break;
+	case EXIF_TAG_COMPONENTS_CONFIGURATION:
+		CF (e, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (e, 4, val, maxlen);
+		for (i = 0; i < 4; i++) {
+			switch (e->data[i]) {
+			case 0: c = _("-"); break;
+			case 1: c = _("Y"); break;
+			case 2: c = _("Cb"); break;
+			case 3: c = _("Cr"); break;
+			case 4: c = _("R"); break;
+			case 5: c = _("G"); break;
+			case 6: c = _("B"); break;
+			default: c = _("reserved"); break;
+			}
+			strncat (val, c, maxlen - strlen (val));
+			if (i < 3) strncat (val, " ", maxlen - strlen (val));
+		}
+		break;
+	case EXIF_TAG_EXPOSURE_BIAS_VALUE:
+		CF (e, EXIF_FORMAT_SRATIONAL, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_srat = exif_get_srational (e->data, o);
+		if (!v_srat.denominator) return val;
+		snprintf (val, maxlen, "%s%.01f",
+			  v_srat.denominator * v_srat.numerator > 0 ? "+" : "",
+			  (double) v_srat.numerator /
+			  (double) v_srat.denominator);
+		break;
+	case EXIF_TAG_YCBCR_SUB_SAMPLING:
+		CF (e, EXIF_FORMAT_SHORT, val, maxlen);
+		CC (e, 2, val, maxlen);
+		v_short  = exif_get_short (e->data, o);
+		v_short2 = exif_get_short (
+			e->data + exif_format_get_size (e->format),
+			o);
+		if ((v_short == 2) && (v_short2 == 1))
+			strncpy (val, _("YCbCr4:2:2"), maxlen);
+		else if ((v_short == 2) && (v_short2 == 2))
+			strncpy (val, _("YCbCr4:2:0"), maxlen);
+		else
+			snprintf (val, maxlen, "%i, %i", v_short, v_short2);
+		break;
+	case EXIF_TAG_MAKER_NOTE:
+		CF (e, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		snprintf (val, maxlen, _("%i bytes unknown data"),
+			  (int) e->components);
+		break;
+	case EXIF_TAG_SUBJECT_AREA:
+		CF (e, EXIF_FORMAT_SHORT, val, maxlen);
+		switch (e->components) {
+		case 2:
+			v_short  = exif_get_short (e->data, o);
+			v_short2 = exif_get_short (e->data + 2, o);
+			snprintf (val, maxlen, "(x,y) = (%i,%i)",
+				  v_short, v_short2);
+			break;
+		case 3:
+			v_short  = exif_get_short (e->data, o);
+			v_short2 = exif_get_short (e->data + 2, o);
+			v_short3 = exif_get_short (e->data + 4, o);
+			snprintf (val, maxlen, _("Within distance %i of "
+				"(x,y) = (%i,%i)"), v_short3, v_short,
+				v_short2);
+			break;
+		case 4:
+			v_short  = exif_get_short (e->data, o);
+			v_short2 = exif_get_short (e->data + 2, o);
+			v_short3 = exif_get_short (e->data + 4, o);
+			v_short4 = exif_get_short (e->data + 6, o);
+			snprintf (val, maxlen, _("Within rectangle "
+				"(width %i, height %i) around "
+				"(x,y) = (%i,%i)"), v_short3, v_short4,
+				v_short, v_short2);
+			break;
+		default:
+			snprintf (val, maxlen, _("Unexpected number "
+				"of components (%li, expected 2, 3, or 4)."),
+				e->components);	
+		}
+		break;
+
+	case EXIF_TAG_METERING_MODE:
+	case EXIF_TAG_COMPRESSION:
+	case EXIF_TAG_FOCAL_PLANE_RESOLUTION_UNIT:
+	case EXIF_TAG_RESOLUTION_UNIT:
+	case EXIF_TAG_EXPOSURE_PROGRAM:
+	case EXIF_TAG_FLASH:
+	case EXIF_TAG_SUBJECT_DISTANCE_RANGE:
+	case EXIF_TAG_COLOR_SPACE:
+		CF (e,EXIF_FORMAT_SHORT, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_short = exif_get_short (e->data, o);
+
+		/* Search the tag */
+		for (i = 0; list2[i].tag && (list2[i].tag != e->tag); i++);
+		if (!list2[i].tag) {
+			strncpy (val, "Internal error.", maxlen - 1);
+			break;
+		}
+
+		/* Find the value */
+		for (j = 0; list2[i].elem[j].values &&
+			    (list2[i].elem[j].index < v_short); j++);
+		if (list2[i].elem[j].index != v_short) {
+			snprintf (val, maxlen, "Internal error (unknown "
+				  "value %i).", v_short);
+			break;
+		}
+
+		/* Find a short enough value */
+		memset (val, 0, maxlen);
+		for (k = 0; list2[i].elem[j].values &&
+			    list2[i].elem[j].values[k]; k++) {
+		  l = strlen (_(list2[i].elem[j].values[k]));
+		  if ((maxlen > l) && (strlen (val) < l))
+		    strncpy (val, _(list2[i].elem[j].values[k]), maxlen - 1);
+		}
+		if (!strlen (val)) snprintf (val, maxlen, "%i", v_short);
+
+		break;
+	case EXIF_TAG_PLANAR_CONFIGURATION:
+	case EXIF_TAG_SENSING_METHOD:
+	case EXIF_TAG_ORIENTATION:
+	case EXIF_TAG_YCBCR_POSITIONING:
+	case EXIF_TAG_PHOTOMETRIC_INTERPRETATION:
+	case EXIF_TAG_CUSTOM_RENDERED:
+	case EXIF_TAG_EXPOSURE_MODE:
+	case EXIF_TAG_WHITE_BALANCE:
+	case EXIF_TAG_SCENE_CAPTURE_TYPE:
+	case EXIF_TAG_GAIN_CONTROL:
+	case EXIF_TAG_SATURATION:
+	case EXIF_TAG_CONTRAST:
+	case EXIF_TAG_SHARPNESS:
+		CF (e, EXIF_FORMAT_SHORT, val, maxlen);
+		CC (e, 1, val, maxlen);
+		v_short = exif_get_short (e->data, o);
+
+		/* Search the tag */
+		for (i = 0; list[i].tag && (list[i].tag != e->tag); i++);
+		if (!list[i].tag) {
+			strncpy (val, "Internal error.", maxlen - 1);
+			break;
+		}
+
+		/* Find the value */
+		for (j = 0; list[i].strings[j] && (j < v_short); j++);
+		if (!list[i].strings[j])
+			snprintf (val, maxlen, "%i", v_short);
+		else
+			strncpy (val, _(list[i].strings[j]), maxlen - 1);
+		break; 	
+	default:
+		if (!e->components) break;
+		switch (e->format) {
+		case EXIF_FORMAT_UNDEFINED:
+			break;
+		case EXIF_FORMAT_BYTE:
+		case EXIF_FORMAT_SBYTE:
+			v_byte = e->data[0];
+			snprintf (val, maxlen, "0x%02x", v_byte);
+			maxlen -= strlen (val);
+			for (i = 1; i < e->components; i++) {
+				v_byte = e->data[i];
+				snprintf (b, sizeof (b), ", 0x%02x", v_byte);
+				strncat (val, b, maxlen);
+				maxlen -= strlen (b);
+				if ((signed)maxlen <= 0) break;
+			}
+			break;
+		case EXIF_FORMAT_SHORT:
+		case EXIF_FORMAT_SSHORT:
+			v_short = exif_get_short (e->data, o);
+			snprintf (val, maxlen, "%i", v_short);
+			maxlen -= strlen (val);
+			for (i = 1; i < e->components; i++) {
+				v_short = exif_get_short (e->data +
+					exif_format_get_size (e->format) *
+					i, o);
+				snprintf (b, sizeof (b), ", %i", v_short);
+				strncat (val, b, maxlen);
+				maxlen -= strlen (b);
+				if ((signed)maxlen <= 0) break;
+			}
+			break;
+		case EXIF_FORMAT_LONG:
+			v_long = exif_get_long (e->data, o);
+			snprintf (val, maxlen, "%li", (long int) v_long);
+			maxlen -= strlen (val);
+			for (i = 1; i < e->components; i++) {
+				v_long = exif_get_long (e->data +
+					exif_format_get_size (e->format) *
+					i, o);
+				snprintf (b, sizeof (b), ", %li",
+					(long int) v_long);
+				strncat (val, b, maxlen);
+				maxlen -= strlen (b);
+				if ((signed)maxlen <= 0) break;
+			}
+			break;
+		case EXIF_FORMAT_SLONG:
+			v_slong = exif_get_slong (e->data, o);
+			snprintf (val, maxlen, "%li", (long int) v_slong);
+			maxlen -= strlen (val);
+			for (i = 1; i < e->components; i++) {
+				v_long = exif_get_slong (e->data +
+					exif_format_get_size (e->format) *
+					i, o);
+				snprintf (b, sizeof (b), ", %li",
+						(long int) v_long);
+				strncat (val, b, maxlen);
+				maxlen -= strlen (b);
+				if ((signed)maxlen <= 0) break;
+			}
+			break;
+		case EXIF_FORMAT_ASCII:
+			strncpy (val, (char *) e->data, MIN (maxlen, e->size));
+			break;
+		case EXIF_FORMAT_RATIONAL:
+			v_rat = exif_get_rational (e->data, o);
+			if (v_rat.denominator) {
+				snprintf (val, maxlen, "%2.2f", (double)v_rat.numerator / v_rat.denominator);
+			} else {
+				snprintf (val, maxlen, "%i/%i", v_rat.numerator,  v_rat.denominator);
+			}
+			maxlen -= strlen (val);
+			for (i = 1; i < e->components; i++) {
+				v_rat = exif_get_rational (
+					e->data + 8 * i, o);
+				snprintf (b, sizeof (b), ", %2.2f",
+					   (double)v_rat.numerator / v_rat.denominator);
+				strncat (val, b, maxlen);
+				maxlen -= strlen (b);
+				if ((signed)maxlen <= 0) break;
+			}
+			break;
+		case EXIF_FORMAT_SRATIONAL:
+			v_srat = exif_get_srational (e->data, o);
+			if (v_srat.denominator) {
+				snprintf (val, maxlen, "%2.2f", (double)v_srat.numerator / v_srat.denominator);
+			} else {
+				snprintf (val, maxlen, "%i/%i", v_srat.numerator,  v_srat.denominator);
+			}
+			maxlen -= strlen (val);
+			for (i = 1; i < e->components; i++) {
+				v_srat = exif_get_srational (
+					e->data + 8 * i, o);
+				snprintf (b, sizeof (b), ", %2.2f",
+					  (double)v_srat.numerator / v_srat.denominator);
+				strncat (val, b, maxlen);
+				maxlen -= strlen (b);
+				if ((signed) maxlen <= 0) break;
+			}
+			break;
+		case EXIF_FORMAT_DOUBLE:
+		case EXIF_FORMAT_FLOAT:
+		default:
+			/* What to do here? */
+			break;
+		}
+	}
+
+	return val;
+}
+
+void
+exif_entry_initialize (ExifEntry *e, ExifTag tag)
+{
+	time_t t;
+	struct tm *tm;
+	ExifRational r;
+	ExifByteOrder o;
+
+	/* We need the byte order */
+	if (!e || !e->parent || e->data || !e->parent->parent)
+		return;
+	o = exif_data_get_byte_order (e->parent->parent);
+
+	e->tag = tag;
+	switch (tag) {
+
+	/* LONG, 1 component, no default */
+	case EXIF_TAG_PIXEL_X_DIMENSION:
+	case EXIF_TAG_PIXEL_Y_DIMENSION:
+	case EXIF_TAG_EXIF_IFD_POINTER:
+	case EXIF_TAG_GPS_INFO_IFD_POINTER:
+	case EXIF_TAG_INTEROPERABILITY_IFD_POINTER:
+	case EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH:
+	case EXIF_TAG_JPEG_INTERCHANGE_FORMAT:
+		e->components = 1;
+		e->format = EXIF_FORMAT_LONG;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		break;
+
+	/* SHORT, 1 component, no default */
+	case EXIF_TAG_SUBJECT_LOCATION:
+	case EXIF_TAG_SENSING_METHOD:
+	case EXIF_TAG_PHOTOMETRIC_INTERPRETATION:
+	case EXIF_TAG_COMPRESSION:
+	case EXIF_TAG_EXPOSURE_MODE:
+	case EXIF_TAG_WHITE_BALANCE:
+	case EXIF_TAG_FOCAL_LENGTH_IN_35MM_FILM:
+	case EXIF_TAG_GAIN_CONTROL:
+	case EXIF_TAG_SUBJECT_DISTANCE_RANGE:
+	case EXIF_TAG_FLASH:
+	case EXIF_TAG_COLOR_SPACE:
+
+	/* SHORT, 1 component, default 0 */
+	case EXIF_TAG_IMAGE_WIDTH:
+	case EXIF_TAG_IMAGE_LENGTH:
+	case EXIF_TAG_EXPOSURE_PROGRAM:
+	case EXIF_TAG_LIGHT_SOURCE:
+	case EXIF_TAG_METERING_MODE:
+	case EXIF_TAG_CUSTOM_RENDERED:
+	case EXIF_TAG_SCENE_CAPTURE_TYPE:
+	case EXIF_TAG_CONTRAST:
+	case EXIF_TAG_SATURATION:
+	case EXIF_TAG_SHARPNESS:
+		e->components = 1;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		exif_set_short (e->data, o, 0);
+		break;
+
+	/* SHORT, 1 component, default 1 */
+	case EXIF_TAG_ORIENTATION:
+	case EXIF_TAG_PLANAR_CONFIGURATION:
+	case EXIF_TAG_YCBCR_POSITIONING:
+		e->components = 1;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		exif_set_short (e->data, o, 1);
+		break;
+
+	/* SHORT, 1 component, default 2 */
+	case EXIF_TAG_RESOLUTION_UNIT:
+	case EXIF_TAG_FOCAL_PLANE_RESOLUTION_UNIT:
+		e->components = 1;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		exif_set_short (e->data, o, 2);
+		break;
+
+	/* SHORT, 1 component, default 3 */
+	case EXIF_TAG_SAMPLES_PER_PIXEL:
+		e->components = 1;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		exif_set_short (e->data, o, 3);
+		break;
+
+	case EXIF_TAG_BITS_PER_SAMPLE:
+		e->components = 3;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+		exif_set_short (e->data, o, 8);
+		exif_set_short (
+			e->data + exif_format_get_size (e->format),
+			o, 8);
+		exif_set_short (
+			e->data + 2 * exif_format_get_size (e->format),
+			o, 8);
+		break;
+	case EXIF_TAG_YCBCR_SUB_SAMPLING:
+		e->components = 2;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+		exif_set_short (e->data, o, 2);
+		exif_set_short (
+			e->data + exif_format_get_size (e->format),
+			o, 1);
+		break;
+
+	/* SHORT, any component, no default */
+	case EXIF_TAG_SUBJECT_AREA:
+	case EXIF_TAG_ISO_SPEED_RATINGS:
+		e->components = 0;
+		e->format = EXIF_FORMAT_SHORT;
+		e->size = 0;
+		e->data = 0;
+		break;
+
+	/* SRATIONAL, 1 component, no default */
+	case EXIF_TAG_EXPOSURE_BIAS_VALUE:
+	case EXIF_TAG_BRIGHTNESS_VALUE:
+	case EXIF_TAG_SHUTTER_SPEED_VALUE:
+		e->components = 1;
+		e->format = EXIF_FORMAT_SRATIONAL;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		break;
+
+	/* RATIONAL, 1 component, no default */
+	case EXIF_TAG_EXPOSURE_TIME:
+	case EXIF_TAG_FOCAL_PLANE_X_RESOLUTION:
+	case EXIF_TAG_FOCAL_PLANE_Y_RESOLUTION:
+	case EXIF_TAG_EXPOSURE_INDEX:
+	case EXIF_TAG_FLASH_ENERGY:
+	case EXIF_TAG_FNUMBER:
+	case EXIF_TAG_FOCAL_LENGTH:
+	case EXIF_TAG_SUBJECT_DISTANCE:
+	case EXIF_TAG_MAX_APERTURE_VALUE:
+	case EXIF_TAG_APERTURE_VALUE:
+	case EXIF_TAG_COMPRESSED_BITS_PER_PIXEL:
+	case EXIF_TAG_PRIMARY_CHROMATICITIES:
+	case EXIF_TAG_DIGITAL_ZOOM_RATIO:
+		e->components = 1;
+		e->format = EXIF_FORMAT_RATIONAL;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		break;
+
+	/* RATIONAL, 1 component, default 72/1 */
+	case EXIF_TAG_X_RESOLUTION:
+	case EXIF_TAG_Y_RESOLUTION:
+		e->components = 1;
+		e->format = EXIF_FORMAT_RATIONAL;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		r.numerator = 72;
+		r.denominator = 1;
+		exif_set_rational (e->data, o, r);
+		break;
+
+	/* RATIONAL, 2 components, no default */
+	case EXIF_TAG_WHITE_POINT:
+		e->components = 2;
+		e->format = EXIF_FORMAT_RATIONAL;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		break;
+
+	/* RATIONAL, 6 components */
+	case EXIF_TAG_REFERENCE_BLACK_WHITE:
+		e->components = 6;
+		e->format = EXIF_FORMAT_RATIONAL;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+		r.denominator = 1;
+		r.numerator = 0;
+		exif_set_rational (e->data, o, r);
+		r.numerator = 255;
+		exif_set_rational (
+			e->data + exif_format_get_size (e->format), o, r);
+		r.numerator = 0;
+		exif_set_rational (
+			e->data + 2 * exif_format_get_size (e->format), o, r);
+		r.numerator = 255;
+		exif_set_rational (
+			e->data + 3 * exif_format_get_size (e->format), o, r);
+		r.numerator = 0;
+		exif_set_rational (
+			e->data + 4 * exif_format_get_size (e->format), o, r);
+		r.numerator = 255;
+		exif_set_rational (
+			e->data + 5 * exif_format_get_size (e->format), o, r);
+		break;
+
+	/* ASCII, 20 components, default current time */
+	case EXIF_TAG_DATE_TIME:
+	case EXIF_TAG_DATE_TIME_ORIGINAL:
+	case EXIF_TAG_DATE_TIME_DIGITIZED:
+		t = time (NULL);
+		tm = localtime (&t);
+		e->components = 20;
+		e->format = EXIF_FORMAT_ASCII;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+		snprintf ((char *) e->data, e->size,
+			  "%04i:%02i:%02i %02i:%02i:%02i",
+			  tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+			  tm->tm_hour, tm->tm_min, tm->tm_sec);
+		break;
+
+	/* EXIF_FORMAT_ASCII, 13 components */
+	case EXIF_TAG_RELATED_SOUND_FILE:
+		e->components = 13;
+		e->format = EXIF_FORMAT_ASCII;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		break;
+
+	case EXIF_TAG_IMAGE_UNIQUE_ID:
+		e->components = 33;
+		e->format = EXIF_FORMAT_ASCII;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		break;
+
+	/* ASCII, any components, no default */
+	case EXIF_TAG_SPECTRAL_SENSITIVITY:
+	case EXIF_TAG_SUB_SEC_TIME:
+	case EXIF_TAG_SUB_SEC_TIME_ORIGINAL:
+	case EXIF_TAG_SUB_SEC_TIME_DIGITIZED:
+	case EXIF_TAG_IMAGE_DESCRIPTION:
+	case EXIF_TAG_MAKE:
+	case EXIF_TAG_MODEL:
+	case EXIF_TAG_SOFTWARE:
+	case EXIF_TAG_ARTIST:
+	case EXIF_TAG_COPYRIGHT:
+		e->components = 0;
+		e->format = EXIF_FORMAT_ASCII;
+		e->size = 0;
+		e->data = NULL;
+		break;
+
+	/* UNDEFINED, no components, no default */
+	case EXIF_TAG_OECF:											 
+	case EXIF_TAG_SPATIAL_FREQUENCY_RESPONSE:
+	case EXIF_TAG_NEW_CFA_PATTERN:					 
+	case EXIF_TAG_DEVICE_SETTING_DESCRIPTION:
+	case EXIF_TAG_MAKER_NOTE:
+	case EXIF_TAG_USER_COMMENT:
+		e->components = 0;
+		e->format = EXIF_FORMAT_UNDEFINED;
+		e->size = 0;
+		e->data = NULL;
+		break;
+
+	/* UNDEFINED, 1 component, default 1 */
+	case EXIF_TAG_SCENE_TYPE:
+		e->components = 1;
+		e->format = EXIF_FORMAT_UNDEFINED;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+		e->data[0] = 0x01;
+		break;
+
+	/* UNDEFINED, 1 component, default 3 */
+	case EXIF_TAG_FILE_SOURCE:
+		e->components = 1;
+		e->format = EXIF_FORMAT_UNDEFINED;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+		e->data[0] = 0x03;
+		break;
+
+	/* UNDEFINED, 4 components, default 0 1 0 0 */
+	case EXIF_TAG_FLASH_PIX_VERSION:
+		e->components = 4;
+		e->format = EXIF_FORMAT_UNDEFINED;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+			memcpy (e->data, "0100", 4);
+		break;
+
+	/* UNDEFINED, 4 components, default 0 2 1 0 */
+	case EXIF_TAG_EXIF_VERSION:
+		e->components = 4;
+		e->format = EXIF_FORMAT_UNDEFINED;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = exif_entry_alloc (e, e->size);
+		if (!e->data) break;
+			memcpy (e->data, "0210", 4);
+		break;
+
+	/* UNDEFINED, 4 components, no default */
+	case EXIF_TAG_COMPONENTS_CONFIGURATION:
+		e->components = 4;
+		e->format = EXIF_FORMAT_UNDEFINED;
+		e->size = exif_format_get_size (e->format) * e->components;
+		e->data = malloc (e->size);
+		break;
+
+	default:
+		break;
+	}
+}
diff --git a/src/libexif/exif-entry.h b/src/libexif/exif-entry.h
new file mode 100644
index 0000000..072ee29
--- /dev/null
+++ b/src/libexif/exif-entry.h
@@ -0,0 +1,69 @@
+/* exif-entry.h
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_ENTRY_H__
+#define __EXIF_ENTRY_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct _ExifEntry        ExifEntry;
+typedef struct _ExifEntryPrivate ExifEntryPrivate;
+
+#include <libexif/exif-content.h>
+#include <libexif/exif-format.h>
+#include <libexif/exif-mem.h>
+
+struct _ExifEntry {
+        ExifTag tag;
+        ExifFormat format;
+        unsigned long components;
+
+        unsigned char *data;
+        unsigned int size;
+
+	/* Content containing this entry */
+	ExifContent *parent;
+
+	ExifEntryPrivate *priv;
+};
+
+/* Lifecycle */
+ExifEntry  *exif_entry_new     (void);
+ExifEntry  *exif_entry_new_mem (ExifMem *);
+void        exif_entry_ref     (ExifEntry *entry);
+void        exif_entry_unref   (ExifEntry *entry);
+void        exif_entry_free  (ExifEntry *entry);
+
+void        exif_entry_initialize (ExifEntry *entry, ExifTag tag);
+void        exif_entry_fix        (ExifEntry *entry);
+
+/* For your convenience */
+const char *exif_entry_get_value (ExifEntry *entry, char *val,
+				  unsigned int maxlen);
+
+void        exif_entry_dump      (ExifEntry *entry, unsigned int indent);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_ENTRY_H__ */
diff --git a/src/libexif/exif-format.c b/src/libexif/exif-format.c
new file mode 100644
index 0000000..e0c3650
--- /dev/null
+++ b/src/libexif/exif-format.c
@@ -0,0 +1,72 @@
+/* exif-format.c
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-format.h>
+#include <libexif/i18n.h>
+
+#include <stdlib.h>
+
+static struct {
+        ExifFormat format;
+	const char *name;
+        unsigned char size;
+} ExifFormatTable[] = {
+        {EXIF_FORMAT_BYTE,      N_("Byte"),      1},
+        {EXIF_FORMAT_ASCII,     N_("Ascii"),     1},
+        {EXIF_FORMAT_SHORT,     N_("Short"),     2},
+        {EXIF_FORMAT_LONG,      N_("Long"),      4},
+        {EXIF_FORMAT_RATIONAL,  N_("Rational"),  8},
+	{EXIF_FORMAT_SBYTE,     N_("SByte"),     1},
+	{EXIF_FORMAT_SSHORT,    N_("SShort"),    2},
+        {EXIF_FORMAT_SLONG,     N_("SLong"),     4},
+        {EXIF_FORMAT_SRATIONAL, N_("SRational"), 8},
+	{EXIF_FORMAT_FLOAT,     N_("Float"),     4},
+	{EXIF_FORMAT_DOUBLE,    N_("Double"),    8},
+        {EXIF_FORMAT_UNDEFINED, N_("Undefined"), 1},
+        {0, NULL, 0}
+};
+
+const char *
+exif_format_get_name (ExifFormat format)
+{
+	unsigned int i;
+
+	/* FIXME: This belongs to somewhere else. */
+	bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+
+	for (i = 0; ExifFormatTable[i].name; i++)
+		if (ExifFormatTable[i].format == format)
+			return _(ExifFormatTable[i].name);
+	return NULL;
+}
+
+unsigned char
+exif_format_get_size (ExifFormat format)
+{
+	unsigned int i;
+
+	for (i = 0; ExifFormatTable[i].size; i++)
+		if (ExifFormatTable[i].format == format)
+			return ExifFormatTable[i].size;
+	return 0;
+}
diff --git a/src/libexif/exif-format.h b/src/libexif/exif-format.h
new file mode 100644
index 0000000..0eae425
--- /dev/null
+++ b/src/libexif/exif-format.h
@@ -0,0 +1,50 @@
+/* exif-format.h
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_FORMAT_H__
+#define __EXIF_FORMAT_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef enum {
+        EXIF_FORMAT_BYTE       =  1,
+        EXIF_FORMAT_ASCII      =  2,
+        EXIF_FORMAT_SHORT      =  3,
+        EXIF_FORMAT_LONG       =  4,
+        EXIF_FORMAT_RATIONAL   =  5,
+	EXIF_FORMAT_SBYTE      =  6,
+        EXIF_FORMAT_UNDEFINED  =  7,
+	EXIF_FORMAT_SSHORT     =  8,
+        EXIF_FORMAT_SLONG      =  9,
+        EXIF_FORMAT_SRATIONAL  = 10,
+	EXIF_FORMAT_FLOAT      = 11,
+	EXIF_FORMAT_DOUBLE     = 12
+} ExifFormat;
+
+const char   *exif_format_get_name (ExifFormat format);
+unsigned char exif_format_get_size (ExifFormat format);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_FORMAT_H__ */
diff --git a/src/libexif/exif-ifd.c b/src/libexif/exif-ifd.c
new file mode 100644
index 0000000..96de261
--- /dev/null
+++ b/src/libexif/exif-ifd.c
@@ -0,0 +1,49 @@
+/* exif-ifd.c
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-ifd.h>
+
+#include <stdlib.h>
+
+static struct {
+	ExifIfd ifd;
+	const char *name;
+} ExifIfdTable[] = {
+	{EXIF_IFD_0, "0"},
+	{EXIF_IFD_1, "1"},
+	{EXIF_IFD_EXIF, "EXIF"},
+	{EXIF_IFD_GPS, "GPS"},
+	{EXIF_IFD_INTEROPERABILITY, "Interoperability"},
+	{0, NULL}
+};
+
+const char *
+exif_ifd_get_name (ExifIfd ifd)
+{
+	unsigned int i;
+
+	for (i = 0; ExifIfdTable[i].name; i++)
+		if (ExifIfdTable[i].ifd == ifd)
+			break;
+
+	return (ExifIfdTable[i].name);
+}
diff --git a/src/libexif/exif-ifd.h b/src/libexif/exif-ifd.h
new file mode 100644
index 0000000..01f0019
--- /dev/null
+++ b/src/libexif/exif-ifd.h
@@ -0,0 +1,35 @@
+/* exif-ifd.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_IFD_H__
+#define __EXIF_IFD_H__
+
+typedef enum {
+	EXIF_IFD_0 = 0,
+	EXIF_IFD_1,
+	EXIF_IFD_EXIF,
+	EXIF_IFD_GPS,
+	EXIF_IFD_INTEROPERABILITY,
+	EXIF_IFD_COUNT
+} ExifIfd;
+
+const char *exif_ifd_get_name (ExifIfd ifd);
+
+#endif /* __EXIF_IFD_H__ */
diff --git a/src/libexif/exif-loader.c b/src/libexif/exif-loader.c
new file mode 100644
index 0000000..d6eba7d
--- /dev/null
+++ b/src/libexif/exif-loader.c
@@ -0,0 +1,349 @@
+#include <config.h>
+
+#include <libexif/exif-loader.h>
+#include <libexif/i18n.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#undef JPEG_MARKER_SOI
+#define JPEG_MARKER_SOI  0xd8
+#undef JPEG_MARKER_APP0
+#define JPEG_MARKER_APP0 0xe0
+#undef JPEG_MARKER_APP1
+#define JPEG_MARKER_APP1 0xe1
+#undef JPEG_MARKER_APP13
+#define JPEG_MARKER_APP13 0xed
+#undef JPEG_MARKER_COM
+#define JPEG_MARKER_COM 0xfe
+
+typedef enum {
+	EL_READ = 0,
+	EL_READ_SIZE_BYTE_24,
+	EL_READ_SIZE_BYTE_16,
+	EL_READ_SIZE_BYTE_08,
+	EL_READ_SIZE_BYTE_00,
+	EL_SKIP_BYTES,
+	EL_EXIF_FOUND,
+} ExifLoaderState;
+
+typedef enum {
+	EL_DATA_FORMAT_UNKNOWN,
+	EL_DATA_FORMAT_EXIF,
+	EL_DATA_FORMAT_JPEG,
+	EL_DATA_FORMAT_FUJI_RAW
+} ExifLoaderDataFormat;
+
+struct _ExifLoader {
+	ExifLoaderState state;
+	ExifLoaderDataFormat data_format;
+
+	/* Small buffer used for detection of format */
+	unsigned char b[12];
+	unsigned char b_len;
+
+	unsigned int size;
+	unsigned char *buf;
+	unsigned int bytes_read;
+
+	unsigned int ref_count;
+
+	ExifLog *log;
+	ExifMem *mem;
+};
+
+static const unsigned char ExifHeader[] = {0x45, 0x78, 0x69, 0x66, 0x00, 0x00};
+
+static void *
+exif_loader_alloc (ExifLoader *l, unsigned int i)
+{
+	void *d;
+
+	if (!l || !i) return NULL;
+
+	d = exif_mem_alloc (l->mem, i);
+	if (d) return d;
+
+	EXIF_LOG_NO_MEMORY (l->log, "ExifLog", i);
+	return NULL;
+}
+
+#undef  MIN
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+
+void
+exif_loader_write_file (ExifLoader *l, const char *path)
+{
+	FILE *f;
+	int size;
+	unsigned char data[1024];
+
+	if (!l) return;
+
+	f = fopen (path, "rb");
+	if (!f) {
+		exif_log (l->log, EXIF_LOG_CODE_NONE, "ExifLoader",
+			  _("The file '%s' could not be opened."), path);
+		return;
+	}
+	while (1) {
+		size = fread (data, 1, sizeof (data), f);
+		if (size <= 0) break;
+		if (!exif_loader_write (l, data, size)) break;
+	}
+	fclose (f);
+}
+
+static unsigned int
+exif_loader_copy (ExifLoader *eld, unsigned char *buf, unsigned int len)
+{
+	if (!eld || (len && !buf) || (eld->bytes_read >= eld->size)) return 0;
+
+	/* If needed, allocate the buffer. */
+	if (!eld->buf) eld->buf = exif_loader_alloc (eld, eld->size);
+	if (!eld->buf) return 0;
+
+	/* Copy memory */
+	len = MIN (len, eld->size - eld->bytes_read);
+	memcpy (eld->buf + eld->bytes_read, buf, len);
+	eld->bytes_read += len;
+
+	return (eld->bytes_read >= eld->size) ? 0 : 1;
+}
+
+unsigned char
+exif_loader_write (ExifLoader *eld, unsigned char *buf, unsigned int len)
+{
+	unsigned int i;
+
+	if (!eld || (len && !buf)) return 0;
+
+	switch (eld->state) {
+	case EL_EXIF_FOUND:
+		return exif_loader_copy (eld, buf, len);
+	case EL_SKIP_BYTES:
+		if (eld->size > len) { eld->size -= len; return 1; }
+		len -= eld->size;
+		buf += eld->size;
+		eld->size = 0;
+		eld->b_len = 0;
+		switch (eld->data_format) {
+		case EL_DATA_FORMAT_FUJI_RAW:
+			eld->state = EL_READ_SIZE_BYTE_24;
+			break;
+		default:
+			eld->state = EL_READ;
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	exif_log (eld->log, EXIF_LOG_CODE_DEBUG, "ExifLoader",
+		  "Scanning %i byte(s) of data...", len);
+
+	/*
+	 * First fill the small buffer. Only continue if the buffer
+	 * is filled. Note that EXIF data contains at least 12 bytes.
+	 */
+	i = MIN (len, sizeof (eld->b) - eld->b_len);
+	if (i) {
+		memcpy (&eld->b[eld->b_len], buf, i);
+		eld->b_len += i;
+		if (eld->b_len < sizeof (eld->b)) return 1;
+		buf += i;
+		len -= i;
+	}
+
+	switch (eld->data_format) {
+	case EL_DATA_FORMAT_UNKNOWN:
+
+		/* Check the small buffer against known formats. */
+		if (!memcmp (eld->b, "FUJIFILM", 8)) {
+
+			/* Skip to byte 84. There is another offset there. */
+			eld->data_format = EL_DATA_FORMAT_FUJI_RAW;
+			eld->size = 84;
+			eld->state = EL_SKIP_BYTES;
+			eld->size = 84;
+
+		} else if (!memcmp (eld->b + 2, ExifHeader, sizeof (ExifHeader))) {
+
+			/* Read the size (2 bytes). */
+			eld->data_format = EL_DATA_FORMAT_EXIF;
+			eld->state = EL_READ_SIZE_BYTE_08;
+		}
+	default:
+		break;
+	}
+
+	for (i = 0; i < sizeof (eld->b); i++)
+		switch (eld->state) {
+		case EL_EXIF_FOUND:
+			if (!exif_loader_copy (eld, eld->b + i,
+					sizeof (eld->b) - i)) return 0;
+			return exif_loader_copy (eld, buf, len);
+		case EL_SKIP_BYTES:
+			eld->size--;
+			if (!eld->size) eld->state = EL_READ;
+			break;
+
+		case EL_READ_SIZE_BYTE_24:
+			eld->size |= eld->b[i] << 24;
+			eld->state = EL_READ_SIZE_BYTE_16;
+			break;
+		case EL_READ_SIZE_BYTE_16:
+			eld->size |= eld->b[i] << 16;
+			eld->state = EL_READ_SIZE_BYTE_08;
+			break;
+		case EL_READ_SIZE_BYTE_08:
+			eld->size |= eld->b[i] << 8;
+			eld->state = EL_READ_SIZE_BYTE_00;
+			break;
+		case EL_READ_SIZE_BYTE_00:
+			eld->size |= eld->b[i] << 0;
+			switch (eld->data_format) {
+			case EL_DATA_FORMAT_JPEG:
+				eld->state = EL_SKIP_BYTES;
+				eld->size -= 2;
+				break;
+			case EL_DATA_FORMAT_FUJI_RAW:
+				eld->data_format = EL_DATA_FORMAT_EXIF;
+				eld->state = EL_SKIP_BYTES;
+				eld->size -= 86;
+				break;
+			case EL_DATA_FORMAT_EXIF:
+				eld->state = EL_EXIF_FOUND;
+				break;
+			default:
+				break;
+			}
+			break;
+
+		default:
+			switch (eld->b[i]) {
+			case JPEG_MARKER_APP1:
+				eld->data_format = EL_DATA_FORMAT_EXIF;
+				eld->size = 0;
+				eld->state = EL_READ_SIZE_BYTE_08;
+				break;
+			case JPEG_MARKER_APP0:
+			case JPEG_MARKER_APP13:
+			case JPEG_MARKER_COM:
+				eld->data_format = EL_DATA_FORMAT_JPEG;
+				eld->size = 0;
+				eld->state = EL_READ_SIZE_BYTE_08;
+				break;
+			case 0xff:
+			case JPEG_MARKER_SOI:
+				break;
+			default:
+				exif_log (eld->log,
+					EXIF_LOG_CODE_CORRUPT_DATA,
+					"ExifLoader", _("The data supplied "
+						"does not seem to contain "
+						"EXIF data."));
+				exif_loader_reset (eld);
+				return 0;
+			}
+		}
+
+	/*
+	 * If we reach this point, the buffer has not been big enough
+	 * to read all data we need. Fill it with new data.
+	 */
+	eld->b_len = 0;
+	return exif_loader_write (eld, buf, len);
+}
+
+ExifLoader *
+exif_loader_new (void)
+{
+	ExifMem *mem = exif_mem_new_default ();
+	ExifLoader *l = exif_loader_new_mem (mem);
+
+	exif_mem_unref (mem);
+
+	return l;
+}
+
+ExifLoader *
+exif_loader_new_mem (ExifMem *mem)
+{
+	ExifLoader *loader;
+
+	if (!mem) return NULL;
+	
+	loader = exif_mem_alloc (mem, sizeof (ExifLoader));
+	if (!loader) return NULL;
+	loader->ref_count = 1;
+
+	loader->mem = mem;
+	exif_mem_ref (mem);
+
+	return loader;
+}
+
+void
+exif_loader_ref (ExifLoader *loader)
+{
+	if (loader) loader->ref_count++;
+}
+
+static void
+exif_loader_free (ExifLoader *loader)
+{
+	ExifMem *mem;
+
+	if (!loader) return;
+
+	mem = loader->mem;
+	exif_loader_reset (loader);
+	exif_mem_free (mem, loader);
+	exif_mem_unref (mem);
+}
+	
+void
+exif_loader_unref (ExifLoader *loader)
+{
+	if (!loader) return;
+	if (!--loader->ref_count)
+		exif_loader_free (loader);
+}
+
+void
+exif_loader_reset (ExifLoader *loader)
+{
+	if (!loader) return;
+	exif_mem_free (loader->mem, loader->buf); loader->buf = NULL;
+	loader->size = 0;
+	loader->bytes_read = 0;
+	loader->state = 0;
+	loader->b_len = 0;
+	loader->data_format = EL_DATA_FORMAT_UNKNOWN;
+}
+
+ExifData *
+exif_loader_get_data (ExifLoader *loader)
+{
+	ExifData *ed;
+
+	if (!loader) return NULL;
+
+	ed = exif_data_new_mem (loader->mem);
+	exif_data_log (ed, loader->log);
+	exif_data_load_data (ed, loader->buf, loader->bytes_read);
+
+	return ed;
+}
+
+void
+exif_loader_log (ExifLoader *loader, ExifLog *log)
+{
+	if (!loader) return;
+	exif_log_unref (loader->log);
+	loader->log = log;
+	exif_log_ref (log);
+}
diff --git a/src/libexif/exif-loader.h b/src/libexif/exif-loader.h
new file mode 100644
index 0000000..cf900c8
--- /dev/null
+++ b/src/libexif/exif-loader.h
@@ -0,0 +1,57 @@
+/* exif-loader.h
+ *
+ * Copyright � 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_LOADER_H__
+#define __EXIF_LOADER_H__
+
+#include <libexif/exif-data.h>
+#include <libexif/exif-loader.h>
+#include <libexif/exif-log.h>
+#include <libexif/exif-mem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef struct _ExifLoader ExifLoader;
+
+ExifLoader *exif_loader_new     (void);
+ExifLoader *exif_loader_new_mem (ExifMem *);
+void        exif_loader_ref     (ExifLoader *);
+void        exif_loader_unref   (ExifLoader *);
+
+void        exif_loader_write_file (ExifLoader *, const char *fname);
+
+/*
+ * Returns 1 while EXIF data is read (or while there is still 
+ * hope that there will be EXIF data later on), 0 otherwise.
+ */
+unsigned char exif_loader_write (ExifLoader *, unsigned char *, unsigned int);
+
+void          exif_loader_reset (ExifLoader *);
+ExifData     *exif_loader_get_data (ExifLoader *);
+
+void exif_loader_log (ExifLoader *, ExifLog *);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_LOADER_H__ */
diff --git a/src/libexif/exif-log.c b/src/libexif/exif-log.c
new file mode 100644
index 0000000..5dde1ce
--- /dev/null
+++ b/src/libexif/exif-log.c
@@ -0,0 +1,148 @@
+/* exif-log.c
+ *
+ * Copyright � 2004 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-log.h>
+#include <libexif/i18n.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+struct _ExifLog {
+	unsigned int ref_count;
+
+	ExifLogFunc func;
+	void *data;
+
+	ExifMem *mem;
+};
+
+static struct {
+	ExifLogCode code;
+	const char *title;
+	const char *message;
+} codes[] = {
+	{ EXIF_LOG_CODE_DEBUG, N_("Debugging information"),
+	  N_("Debugging information is available.") },
+	{ EXIF_LOG_CODE_NO_MEMORY, N_("Not enough memory"),
+	  N_("The system cannot provide enough memory.") },
+	{ EXIF_LOG_CODE_CORRUPT_DATA, N_("Corrupt data"),
+	  N_("The data provided does not follow the specification.") },
+	{ 0, NULL, NULL }
+};
+
+const char *
+exif_log_code_get_title (ExifLogCode code)
+{
+	unsigned int i;
+
+	for (i = 0; codes[i].title; i++) if (codes[i].code == code) break;
+	return _(codes[i].title);
+}
+
+const char *
+exif_log_code_get_message (ExifLogCode code)
+{
+	unsigned int i;
+
+	for (i = 0; codes[i].message; i++) if (codes[i].code == code) break;
+	return _(codes[i].message);
+}
+
+ExifLog *
+exif_log_new_mem (ExifMem *mem)
+{
+	ExifLog *log;
+
+	log = exif_mem_alloc (mem, sizeof (ExifLog));
+	if (!log) return NULL;
+	log->ref_count = 1;
+
+	log->mem = mem;
+	exif_mem_ref (mem);
+
+	return log;
+}
+
+ExifLog *
+exif_log_new (void)
+{
+	ExifMem *mem = exif_mem_new_default ();
+	ExifLog *log = exif_log_new_mem (mem);
+
+	exif_mem_unref (mem);
+
+	return log;
+}
+
+void
+exif_log_ref (ExifLog *log)
+{
+	if (!log) return;
+	log->ref_count++;
+}
+
+void
+exif_log_unref (ExifLog *log)
+{
+	if (!log) return;
+	if (log->ref_count > 0) log->ref_count--;
+	if (!log->ref_count) exif_log_free (log);
+}
+
+void
+exif_log_free (ExifLog *log)
+{
+	ExifMem *mem = log ? log->mem : NULL;
+
+	if (!log) return;
+
+	exif_mem_free (mem, log);
+	exif_mem_unref (mem);
+}
+
+void
+exif_log_set_func (ExifLog *log, ExifLogFunc func, void *data)
+{
+	if (!log) return;
+	log->func = func;
+	log->data = data;
+}
+
+void
+exif_log (ExifLog *log, ExifLogCode code, const char *domain,
+	  const char *format, ...)
+{
+	va_list args;
+
+	va_start (args, format);
+	exif_logv (log, code, domain, format, args);
+	va_end (args);
+}
+
+void
+exif_logv (ExifLog *log, ExifLogCode code, const char *domain,
+	   const char *format, va_list args)
+{
+	if (!log) return;
+	if (!log->func) return;
+	log->func (log, code, domain, format, args, log->data);
+}
diff --git a/src/libexif/exif-log.h b/src/libexif/exif-log.h
new file mode 100644
index 0000000..01eed3f
--- /dev/null
+++ b/src/libexif/exif-log.h
@@ -0,0 +1,75 @@
+/*! \file exif-log.h
+ *  \brief log message infrastructure
+ *
+ * Copyright � 2004 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_LOG_H__
+#define __EXIF_LOG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <libexif/exif-mem.h>
+#include <stdarg.h>
+
+typedef struct _ExifLog        ExifLog;
+
+ExifLog *exif_log_new     (void);
+ExifLog *exif_log_new_mem (ExifMem *);
+void     exif_log_ref     (ExifLog *log);
+void     exif_log_unref   (ExifLog *log);
+void     exif_log_free    (ExifLog *log);
+
+typedef enum {
+	EXIF_LOG_CODE_NONE,
+	EXIF_LOG_CODE_DEBUG,
+	EXIF_LOG_CODE_NO_MEMORY,
+	EXIF_LOG_CODE_CORRUPT_DATA
+} ExifLogCode;
+const char *exif_log_code_get_title   (ExifLogCode); /* Title for dialog   */
+const char *exif_log_code_get_message (ExifLogCode); /* Message for dialog */
+
+/** Log callback function prototype.
+ */
+typedef void (* ExifLogFunc) (ExifLog *log, ExifLogCode, const char *domain,
+			      const char *format, va_list args, void *data);
+
+/** Register log callback function.
+ */
+void     exif_log_set_func (ExifLog *log, ExifLogFunc func, void *data);
+
+void     exif_log  (ExifLog *log, ExifLogCode, const char *domain,
+		    const char *format, ...)
+#ifdef __GNUC__
+			__attribute__((__format__(printf,4,5)))
+#endif
+;
+
+void     exif_logv (ExifLog *log, ExifLogCode, const char *domain,
+		    const char *format, va_list args);
+
+/* For your convenience */
+#define EXIF_LOG_NO_MEMORY(l,d,s) exif_log (l, EXIF_LOG_CODE_NO_MEMORY, d, "Could not allocate %i byte(s).", s)
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_LOG_H__ */
diff --git a/src/libexif/exif-mem.c b/src/libexif/exif-mem.c
new file mode 100644
index 0000000..12027d1
--- /dev/null
+++ b/src/libexif/exif-mem.c
@@ -0,0 +1,95 @@
+#include <libexif/exif-mem.h>
+
+#include <stdlib.h>
+
+struct _ExifMem {
+	unsigned int ref_count;
+	ExifMemAllocFunc alloc_func;
+	ExifMemReallocFunc realloc_func;
+	ExifMemFreeFunc free_func;
+};
+
+static void *
+exif_mem_alloc_func (ExifLong ds)
+{
+	return calloc ((size_t) ds, 1);
+}
+
+static void *
+exif_mem_realloc_func (void *d, ExifLong ds)
+{
+	return realloc (d, (size_t) ds);
+}
+
+static void
+exif_mem_free_func (void *d)
+{
+	free (d);
+}
+
+ExifMem *
+exif_mem_new (ExifMemAllocFunc alloc_func, ExifMemReallocFunc realloc_func,
+	      ExifMemFreeFunc free_func)
+{
+	ExifMem *mem;
+
+	if (!alloc_func || !realloc_func) return NULL;
+	mem = alloc_func ? alloc_func (sizeof (ExifMem)) :
+		           realloc_func (NULL, sizeof (ExifMem));
+	if (!mem) return NULL;
+	mem->ref_count = 1;
+
+	mem->alloc_func   = alloc_func;
+	mem->realloc_func = realloc_func;
+	mem->free_func    = free_func;
+
+	return mem;
+}
+
+void
+exif_mem_ref (ExifMem *mem)
+{
+	if (!mem) return;
+	mem->ref_count++;
+}
+
+void
+exif_mem_unref (ExifMem *mem)
+{
+	if (!mem) return;
+	if (!--mem->ref_count)
+		exif_mem_free (mem, mem);
+}
+
+void
+exif_mem_free (ExifMem *mem, void *d)
+{
+	if (!mem) return;
+	if (mem->free_func) {
+		mem->free_func (d);
+		return;
+	}
+}
+
+void *
+exif_mem_alloc (ExifMem *mem, ExifLong ds)
+{
+	if (!mem) return NULL;
+	if (mem->alloc_func || mem->realloc_func)
+		return mem->alloc_func ? mem->alloc_func (ds) :
+					 mem->realloc_func (NULL, ds);
+	return NULL;
+}
+
+void *
+exif_mem_realloc (ExifMem *mem, void *d, ExifLong ds)
+{
+	return (mem && mem->realloc_func) ? mem->realloc_func (d, ds) : NULL;
+}
+
+ExifMem *
+exif_mem_new_default (void)
+{
+	return exif_mem_new (exif_mem_alloc_func, exif_mem_realloc_func,
+			     exif_mem_free_func);
+}
diff --git a/src/libexif/exif-mem.h b/src/libexif/exif-mem.h
new file mode 100644
index 0000000..aa774c6
--- /dev/null
+++ b/src/libexif/exif-mem.h
@@ -0,0 +1,54 @@
+/* exif-mem.h
+ *
+ * Copyright � 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_MEM_H__
+#define __EXIF_MEM_H__
+
+#include <libexif/exif-utils.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Should work like calloc: Needs to return initialized memory. */
+typedef void * (* ExifMemAllocFunc)   (ExifLong);
+
+typedef void * (* ExifMemReallocFunc) (void *, ExifLong);
+typedef void   (* ExifMemFreeFunc)    (void *);
+
+typedef struct _ExifMem ExifMem;
+
+ExifMem *exif_mem_new   (ExifMemAllocFunc, ExifMemReallocFunc,
+			 ExifMemFreeFunc);
+void     exif_mem_ref   (ExifMem *);
+void     exif_mem_unref (ExifMem *);
+
+void *exif_mem_alloc   (ExifMem *, ExifLong);
+void *exif_mem_realloc (ExifMem *, void *, ExifLong);
+void  exif_mem_free    (ExifMem *, void *);
+
+/* For your convenience */
+ExifMem *exif_mem_new_default (void);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_MEM_H__ */
diff --git a/src/libexif/exif-mnote-data-priv.h b/src/libexif/exif-mnote-data-priv.h
new file mode 100644
index 0000000..3a13883
--- /dev/null
+++ b/src/libexif/exif-mnote-data-priv.h
@@ -0,0 +1,76 @@
+/* exif-mnote-data-priv.h
+ *
+ * Copyright � 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_MNOTE_DATA_PRIV_H__
+#define __EXIF_MNOTE_DATA_PRIV_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <libexif/exif-mnote-data.h>
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-log.h>
+
+typedef struct _ExifMnoteDataMethods ExifMnoteDataMethods;
+struct _ExifMnoteDataMethods {
+
+	/* Live cycle */
+	void (* free) (ExifMnoteData *);
+
+	/* Modification */
+	void (* save) (ExifMnoteData *, unsigned char **, unsigned int *);
+	void (* load) (ExifMnoteData *, const unsigned char *, unsigned int);
+	void (* set_offset)     (ExifMnoteData *, unsigned int);
+	void (* set_byte_order) (ExifMnoteData *, ExifByteOrder);
+
+	/* Query */
+	unsigned int (* count)           (ExifMnoteData *);
+        unsigned int (* get_id)          (ExifMnoteData *, unsigned int);
+	const char * (* get_name)        (ExifMnoteData *, unsigned int);
+	const char * (* get_title)       (ExifMnoteData *, unsigned int);
+	const char * (* get_description) (ExifMnoteData *, unsigned int);
+	char * (* get_value)             (ExifMnoteData *, unsigned int, char *val, unsigned int maxlen);
+};
+
+typedef struct _ExifMnoteDataPriv ExifMnoteDataPriv;
+
+struct _ExifMnoteData 
+{
+	ExifMnoteDataPriv *priv;
+
+	ExifMnoteDataMethods methods;
+
+	/* Logging */
+	ExifLog *log;
+
+	/* Memory management */
+	ExifMem *mem;
+};
+
+void exif_mnote_data_construct      (ExifMnoteData *, ExifMem *mem);
+void exif_mnote_data_set_byte_order (ExifMnoteData *, ExifByteOrder);
+void exif_mnote_data_set_offset     (ExifMnoteData *, unsigned int);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_MNOTE_PRIV_H__ */
diff --git a/src/libexif/exif-mnote-data.c b/src/libexif/exif-mnote-data.c
new file mode 100644
index 0000000..f55cdd4
--- /dev/null
+++ b/src/libexif/exif-mnote-data.c
@@ -0,0 +1,158 @@
+/* exif-mnote-data.c
+ *
+ * Copyright (C) 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-mnote-data.h>
+#include <libexif/exif-mnote-data-priv.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+struct _ExifMnoteDataPriv
+{
+	unsigned int ref_count;
+};
+
+void
+exif_mnote_data_construct (ExifMnoteData *d, ExifMem *mem)
+{
+	if (!d || !mem) return;
+	if (d->priv) return;
+	d->priv = exif_mem_alloc (mem, sizeof (ExifMnoteDataPriv));
+	if (!d->priv) return;
+
+	d->priv->ref_count = 1;
+
+	d->mem = mem;
+	exif_mem_ref (mem);
+}
+
+void
+exif_mnote_data_ref (ExifMnoteData *d)
+{
+	if (d && d->priv) d->priv->ref_count++;
+}
+
+static void
+exif_mnote_data_free (ExifMnoteData *d)
+{
+	ExifMem *mem = d ? d->mem : NULL;
+
+	if (!d) return;
+	if (d->priv) {
+		if (d->methods.free) d->methods.free (d);
+		exif_mem_free (mem, d->priv);
+		d->priv = NULL;
+	}
+	exif_log_unref (d->log);
+	exif_mem_free (mem, d);
+	exif_mem_unref (mem);
+}
+
+void
+exif_mnote_data_unref (ExifMnoteData *d)
+{
+	if (!d || !d->priv) return;
+	if (d->priv->ref_count > 0) d->priv->ref_count--;
+	if (!d->priv->ref_count)
+		exif_mnote_data_free (d);
+}
+
+void
+exif_mnote_data_load (ExifMnoteData *d, const unsigned char *buf,
+		      unsigned int buf_size)
+{
+	if (!d || !d->methods.load) return;
+	d->methods.load (d, buf, buf_size);
+}
+
+void
+exif_mnote_data_save (ExifMnoteData *d, unsigned char **buf,
+		      unsigned int *buf_size)
+{
+	if (!d || !d->methods.save) return;
+	d->methods.save (d, buf, buf_size);
+}
+
+void
+exif_mnote_data_set_byte_order (ExifMnoteData *d, ExifByteOrder o)
+{
+	if (!d || !d->methods.set_byte_order) return;
+	d->methods.set_byte_order (d, o);
+}
+
+void
+exif_mnote_data_set_offset (ExifMnoteData *d, unsigned int o)
+{
+	if (!d || !d->methods.set_offset) return;
+	d->methods.set_offset (d, o);
+}
+
+unsigned int
+exif_mnote_data_count (ExifMnoteData *d)
+{
+	if (!d || !d->methods.count) return 0;
+	return d->methods.count (d);
+}
+
+unsigned int
+exif_mnote_data_get_id (ExifMnoteData *d, unsigned int n)
+{
+	if (!d || !d->methods.get_id) return 0;
+	return d->methods.get_id (d, n);
+}
+
+const char *
+exif_mnote_data_get_name (ExifMnoteData *d, unsigned int n)
+{
+	if (!d || !d->methods.get_name) return NULL;
+	return d->methods.get_name (d, n);
+}
+
+const char *
+exif_mnote_data_get_title (ExifMnoteData *d, unsigned int n)
+{
+	if (!d || !d->methods.get_title) return NULL;
+	return d->methods.get_title (d, n);
+}
+	
+const char *
+exif_mnote_data_get_description (ExifMnoteData *d, unsigned int n)
+{
+	if (!d || !d->methods.get_description) return NULL;
+	return d->methods.get_description (d, n);
+}
+	
+char *
+exif_mnote_data_get_value (ExifMnoteData *d, unsigned int n, char *val, unsigned int maxlen)
+{
+	if (!d || !d->methods.get_value) return NULL;
+	return d->methods.get_value (d, n, val, maxlen);
+}
+
+void
+exif_mnote_data_log (ExifMnoteData *d, ExifLog *log)
+{
+	if (!d) return;
+	exif_log_unref (d->log);
+	d->log = log;
+	exif_log_ref (log);
+}
diff --git a/src/libexif/exif-mnote-data.h b/src/libexif/exif-mnote-data.h
new file mode 100644
index 0000000..015f2ae
--- /dev/null
+++ b/src/libexif/exif-mnote-data.h
@@ -0,0 +1,54 @@
+/* exif-mnote-data.h
+ *
+ * Copyright � 2003 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_MNOTE_DATA_H__
+#define __EXIF_MNOTE_DATA_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <libexif/exif-log.h>
+
+typedef struct _ExifMnoteData ExifMnoteData;
+
+void exif_mnote_data_ref   (ExifMnoteData *);
+void exif_mnote_data_unref (ExifMnoteData *);
+
+void exif_mnote_data_load (ExifMnoteData *, const unsigned char *,
+			   unsigned int);
+void exif_mnote_data_save (ExifMnoteData *, unsigned char **, unsigned int *);
+
+unsigned int exif_mnote_data_count           (ExifMnoteData *);
+unsigned int exif_mnote_data_get_id          (ExifMnoteData *, unsigned int);
+const char  *exif_mnote_data_get_name        (ExifMnoteData *, unsigned int);
+const char  *exif_mnote_data_get_title       (ExifMnoteData *, unsigned int);
+const char  *exif_mnote_data_get_description (ExifMnoteData *, unsigned int);
+
+/* Returns NULL or val */
+char  *exif_mnote_data_get_value (ExifMnoteData *, unsigned int, char *val, unsigned int maxlen);
+
+void exif_mnote_data_log (ExifMnoteData *, ExifLog *);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_MNOTE_DATA_H__ */
diff --git a/src/libexif/exif-tag.c b/src/libexif/exif-tag.c
new file mode 100644
index 0000000..7781611
--- /dev/null
+++ b/src/libexif/exif-tag.c
@@ -0,0 +1,686 @@
+/* exif-tag.c
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-tag.h>
+#include <libexif/i18n.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef enum {
+	ESL_MANDATORY    = 1, /* Mandatory               */
+	ESL_CMANDATORY   = 2, /* Conditionally mandatory */
+	ESL_OPTIONAL     = 3, /* Optional                */
+	ESL_NOT_RECORDED = 4  /* Not recorded            */
+} ExifSL;                     /* Exif Support Level      */
+
+static struct {
+	ExifTag tag;
+	const char *name;
+	const char *title;
+	const char *description;
+	ExifSL esl_0[4], esl_1[4], esl_exif[4], esl_gps[4];
+} ExifTagTable[] = {
+	{EXIF_TAG_NEW_SUBFILE_TYPE, "NewSubfileType",
+	 "New Subfile Type", N_("A general indication of the kind of data "
+	    "contained in this subfile.")},
+	{EXIF_TAG_INTEROPERABILITY_INDEX, "InteroperabilityIndex",
+	 "InteroperabilityIndex",
+	 N_("Indicates the identification of the Interoperability rule. "
+	    "Use \"R98\" for stating ExifR98 Rules. Four bytes used "
+	    "including the termination code (NULL). see the separate "
+	    "volume of Recommended Exif Interoperability Rules (ExifR98) "
+	    "for other tags used for ExifR98.")},
+	{EXIF_TAG_INTEROPERABILITY_VERSION, "InteroperabilityVersion",
+	 "InteroperabilityVersion", ""},
+	{EXIF_TAG_IMAGE_WIDTH, "ImageWidth", N_("Image Width"),
+	 N_("The number of columns of image data, equal to the number of "
+	    "pixels per row. In JPEG compressed data a JPEG marker is "
+	    "used instead of this tag."),
+	 {ESL_MANDATORY, ESL_MANDATORY, ESL_MANDATORY, ESL_NOT_RECORDED}},
+	{EXIF_TAG_IMAGE_LENGTH, "ImageLength", N_("Image Length"),
+	 N_("The number of rows of image data. In JPEG compressed data a "
+	    "JPEG marker is used instead of this tag."), 
+	 {ESL_MANDATORY, ESL_MANDATORY, ESL_MANDATORY, ESL_NOT_RECORDED}},
+	{EXIF_TAG_BITS_PER_SAMPLE, "BitsPerSample", N_("Bits per Sample"),
+	 N_("The number of bits per image component. In this standard each "
+	    "component of the image is 8 bits, so the value for this "
+	    "tag is 9. See also <SamplesPerPixel>. In JPEG compressed data "
+	    "a JPEG marker is used instead of this tag."),
+	 {ESL_MANDATORY, ESL_MANDATORY, ESL_MANDATORY, ESL_NOT_RECORDED}},
+	{EXIF_TAG_COMPRESSION, "Compression", N_("Compression"),
+	 N_("The compression scheme used for the image data. When a "
+	    "primary image is JPEG compressed, this designation is "
+	    "not necessary and is omitted. When thumbnails use JPEG "
+	    "compression, this tag value is set to 6."),
+	 {ESL_MANDATORY, ESL_MANDATORY, ESL_MANDATORY, ESL_NOT_RECORDED}},
+	{EXIF_TAG_PHOTOMETRIC_INTERPRETATION, "PhotometricInterpretation",
+	 N_("Photometric Interpretation"),
+	 N_("The pixel composition. In JPEG compressed data a JPEG "
+	    "marker is used instead of this tag."),
+	 {ESL_MANDATORY, ESL_MANDATORY, ESL_MANDATORY, ESL_NOT_RECORDED}},
+	{EXIF_TAG_FILL_ORDER, "FillOrder", N_("Fill Order"), ""},
+	{EXIF_TAG_DOCUMENT_NAME, "DocumentName", N_("Document Name"), ""},
+	{EXIF_TAG_IMAGE_DESCRIPTION, "ImageDescription",
+	 N_("Image Description"),
+	 N_("A character string giving the title of the image. It may be "
+	    "a comment such as \"1988 company picnic\" or "
+	    "the like. Two-bytes character codes cannot be used. "
+	    "When a 2-bytes code is necessary, the Exif Private tag "
+	    "<UserComment> is to be used."),
+	 {ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY}},
+	{EXIF_TAG_MAKE, "Make", N_("Manufacturer"),
+	 N_("The manufacturer of the recording "
+	    "equipment. This is the manufacturer of the DSC, scanner, "
+	    "video digitizer or other equipment that generated the "
+	    "image. When the field is left blank, it is treated as "
+	    "unknown."),
+	 {ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY}},
+	{EXIF_TAG_MODEL, "Model", N_("Model"),
+	 N_("The model name or model number of the equipment. This is the "
+	    "model name or number of the DSC, scanner, video digitizer "
+	    "or other equipment that generated the image. When the field "
+	    "is left blank, it is treated as unknown."),
+	 {ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY}},
+	{EXIF_TAG_STRIP_OFFSETS, "StripOffsets", N_("Strip Offsets"),
+	 N_("For each strip, the byte offset of that strip. It is "
+	    "recommended that this be selected so the number of strip "
+	    "bytes does not exceed 64 Kbytes. With JPEG compressed "
+	    "data this designation is not needed and is omitted. See also "
+	    "<RowsPerStrip> and <StripByteCounts>."),
+	 {ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY}},
+	{EXIF_TAG_ORIENTATION, "Orientation", N_("Orientation"),
+	 N_("The image orientation viewed in terms of rows and columns."),
+	 {ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY, ESL_CMANDATORY}},
+	{EXIF_TAG_SAMPLES_PER_PIXEL, "SamplesPerPixel",
+	 N_("Samples per Pixel"),
+	 N_("The number of components per pixel. Since this standard applies "
+	    "to RGB and YCbCr images, the value set for this tag is 3. "
+	    "In JPEG compressed data a JPEG marker is used instead of this "
+	    "tag.")},
+	{EXIF_TAG_ROWS_PER_STRIP, "RowsPerStrip", N_("Rows per Strip"),
+	 N_("The number of rows per strip. This is the number of rows "
+	    "in the image of one strip when an image is divided into "
+	    "strips. With JPEG compressed data this designation is not "
+	    "needed and is omitted. See also <RowsPerStrip> and "
+	    "<StripByteCounts>.")},
+	{EXIF_TAG_STRIP_BYTE_COUNTS, "StripByteCounts", N_("Strip Byte Count"),
+	 N_("The total number of bytes in each strip. With JPEG compressed "
+	    "data this designation is not needed and is omitted.")},
+	{EXIF_TAG_X_RESOLUTION, "XResolution", N_("x-Resolution"),
+	 N_("The number of pixels per <ResolutionUnit> in the <ImageWidth> "
+	    "direction. When the image resolution is unknown, 72 [dpi] "
+	    "is designated.")},
+	{EXIF_TAG_Y_RESOLUTION, "YResolution", N_("y-Resolution"),
+	 N_("The number of pixels per <ResolutionUnit> in the <ImageLength> "
+	    "direction. The same value as <XResolution> is designated.")},
+	{EXIF_TAG_PLANAR_CONFIGURATION, "PlanarConfiguration",
+	 N_("Planar Configuration"),
+	 N_("Indicates whether pixel components are recorded in a chunky "
+	    "or planar format. In JPEG compressed files a JPEG marker "
+	    "is used instead of this tag. If this field does not exist, "
+	    "the TIFF default of 1 (chunky) is assumed.")},
+	{EXIF_TAG_RESOLUTION_UNIT, "ResolutionUnit", N_("Resolution Unit"),
+	 N_("The unit for measuring <XResolution> and <YResolution>. The same "
+	    "unit is used for both <XResolution> and <YResolution>. If "
+	    "the image resolution is unknown, 2 (inches) is designated.")},
+	{EXIF_TAG_TRANSFER_FUNCTION, "TransferFunction",
+	 N_("Transfer Function"),
+	 N_("A transfer function for the image, described in tabular style. "
+	    "Normally this tag is not necessary, since color space is "
+	    "specified in the color space information tag (<ColorSpace>).")},
+	{EXIF_TAG_SOFTWARE, "Software", N_("Software"),
+	 N_("This tag records the name and version of the software or "
+	    "firmware of the camera or image input device used to "
+	    "generate the image. The detailed format is not specified, but "
+	    "it is recommended that the example shown below be "
+	    "followed. When the field is left blank, it is treated as "
+	    "unknown.")},
+	{EXIF_TAG_DATE_TIME, "DateTime", N_("Date and Time"),
+	 N_("The date and time of image creation. In this standard "
+	    "(EXIF-2.1) it is the date and time the file was changed.")},
+	{EXIF_TAG_ARTIST, "Artist", N_("Artist"),
+	 N_("This tag records the name of the camera owner, photographer or "
+	    "image creator. The detailed format is not specified, but it is "
+	    "recommended that the information be written as in the example "
+	    "below for ease of Interoperability. When the field is "
+	    "left blank, it is treated as unknown.")},
+	{EXIF_TAG_WHITE_POINT, "WhitePoint", N_("White Point"),
+	 N_("The chromaticity of the white point of the image. Normally "
+	    "this tag is not necessary, since color space is specified "
+	    "in the colorspace information tag (<ColorSpace>).")},
+	{EXIF_TAG_PRIMARY_CHROMATICITIES, "PrimaryChromaticities",
+	 N_("Primary Chromaticities"),
+	 N_("The chromaticity of the three primary colors of the image. "
+	    "Normally this tag is not necessary, since colorspace is "
+	    "specified in the colorspace information tag (<ColorSpace>).")},
+	{EXIF_TAG_TRANSFER_RANGE, "TransferRange", N_("Transfer Range"), ""},
+	{EXIF_TAG_SUB_IFDS, "SubIFDs", "SubIFD Offsets", N_("Defined by Adobe Corporation "
+	    "to enable TIFF Trees within a TIFF file.")},
+	{EXIF_TAG_JPEG_PROC, "JPEGProc", "JPEGProc", ""},
+	{EXIF_TAG_JPEG_INTERCHANGE_FORMAT, "JPEGInterchangeFormat",
+	 N_("JPEG Interchange Format"),
+	 N_("The offset to the start byte (SOI) of JPEG compressed "
+	    "thumbnail data. This is not used for primary image "
+	    "JPEG data.")},
+	{EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH,
+	 "JPEGInterchangeFormatLength", N_("JPEG Interchange Format Length"),
+	 N_("The number of bytes of JPEG compressed thumbnail data. This "
+	    "is not used for primary image JPEG data. JPEG thumbnails "
+	    "are not divided but are recorded as a continuous JPEG "
+	    "bitstream from SOI to EOI. Appn and COM markers should "
+	    "not be recorded. Compressed thumbnails must be recorded in no "
+	    "more than 64 Kbytes, including all other data to be "
+	    "recorded in APP1.")},
+	{EXIF_TAG_YCBCR_COEFFICIENTS, "YCbCrCoefficients",
+	 N_("YCbCr Coefficients"),
+	 N_("The matrix coefficients for transformation from RGB to YCbCr "
+	    "image data. No default is given in TIFF; but here the "
+	    "value given in Appendix E, \"Color Space Guidelines\", is used "
+	    "as the default. The color space is declared in a "
+	    "color space information tag, with the default being the value "
+	    "that gives the optimal image characteristics "
+	    "Interoperability this condition.")},
+	{EXIF_TAG_YCBCR_SUB_SAMPLING, "YCbCrSubSampling",
+	 N_("YCbCr Sub-Sampling"),
+	 N_("The sampling ratio of chrominance components in relation to the "
+	    "luminance component. In JPEG compressed data a JPEG marker "
+	    "is used instead of this tag.")},
+	{EXIF_TAG_YCBCR_POSITIONING, "YCbCrPositioning",
+	 N_("YCbCr Positioning"),
+	 N_("The position of chrominance components in relation to the "
+	    "luminance component. This field is designated only for "
+	    "JPEG compressed data or uncompressed YCbCr data. The TIFF "
+	    "default is 1 (centered); but when Y:Cb:Cr = 4:2:2 it is "
+	    "recommended in this standard that 2 (co-sited) be used to "
+	    "record data, in order to improve the image quality when viewed "
+	    "on TV systems. When this field does not exist, the reader shall "
+	    "assume the TIFF default. In the case of Y:Cb:Cr = 4:2:0, the "
+	    "TIFF default (centered) is recommended. If the reader "
+	    "does not have the capability of supporting both kinds of "
+	    "<YCbCrPositioning>, it shall follow the TIFF default regardless "
+	    "of the value in this field. It is preferable that readers "
+	    "be able to support both centered and co-sited positioning.")},
+	{EXIF_TAG_REFERENCE_BLACK_WHITE, "ReferenceBlackWhite",
+	 N_("Reference Black/White"),
+	 N_("The reference black point value and reference white point "
+	    "value. No defaults are given in TIFF, but the values "
+	    "below are given as defaults here. The color space is declared "
+	    "in a color space information tag, with the default "
+	    "being the value that gives the optimal image characteristics "
+	    "Interoperability these conditions.")},
+	{EXIF_TAG_XML_PACKET, "XMLPacket", N_("XML Packet"), N_("XMP Metadata")},
+	{EXIF_TAG_RELATED_IMAGE_FILE_FORMAT, "RelatedImageFileFormat",
+	 "RelatedImageFileFormat", ""},
+	{EXIF_TAG_RELATED_IMAGE_WIDTH, "RelatedImageWidth",
+	 "RelatedImageWidth", ""},
+	{EXIF_TAG_RELATED_IMAGE_LENGTH, "RelatedImageLength",
+	 "RelatedImageLength", ""},
+	{EXIF_TAG_CFA_REPEAT_PATTERN_DIM, "CFARepeatPatternDim",
+	 "CFARepeatPatternDim", ""},
+	{EXIF_TAG_CFA_PATTERN, "CFAPattern",
+	 N_("CFA Pattern"),
+	 N_("Indicates the color filter array (CFA) geometric pattern of the "
+	    "image sensor when a one-chip color area sensor is used. "
+	    "It does not apply to all sensing methods.")},
+	{EXIF_TAG_BATTERY_LEVEL, "BatteryLevel", N_("Battery Level"), ""},
+	{EXIF_TAG_COPYRIGHT, "Copyright", N_("Copyright"),
+	 N_("Copyright information. In this standard the tag is used to "
+	    "indicate both the photographer and editor copyrights. It is "
+	    "the copyright notice of the person or organization claiming "
+	    "rights to the image. The Interoperability copyright "
+	    "statement including date and rights should be written in this "
+	    "field; e.g., \"Copyright, John Smith, 19xx. All rights "
+	    "reserved.\". In this standard the field records both the "
+	    "photographer and editor copyrights, with each recorded in a "
+	    "separate part of the statement. When there is a clear "
+	    "distinction between the photographer and editor copyrights, "
+	    "these are to be written in the order of photographer followed "
+	    "by editor copyright, separated by NULL (in this case, "
+	    "since the statement also ends with a NULL, there are two NULL "
+	    "codes) (see example 1). When only the photographer is given, "
+	    "it is terminated by one NULL code (see example 2). When only "
+	    "the editor copyright is given, "
+	    "the photographer copyright part consists of one space followed "
+	    "by a terminating NULL code, then the editor copyright is given "
+	    "(see example 3). When the field is left blank, it is treated "
+	    "as unknown.")},
+	{EXIF_TAG_EXPOSURE_TIME, "ExposureTime", N_("Exposure Time"),
+	 N_("Exposure time, given in seconds (sec).")},
+	{EXIF_TAG_FNUMBER, "FNumber", N_("FNumber"),
+	 N_("The F number.")},
+	{EXIF_TAG_IPTC_NAA, "IPTC/NAA", "IPTC/NAA", ""},
+	{EXIF_TAG_IMAGE_RESOURCES, "ImageResources", N_("Image Resources Block"), ""},
+	{EXIF_TAG_EXIF_IFD_POINTER, "ExifIFDPointer", "ExifIFDPointer",
+	 N_("A pointer to the Exif IFD. Interoperability, Exif IFD has the "
+	    "same structure as that of the IFD specified in TIFF. "
+	    "ordinarily, however, it does not contain image data as in "
+	    "the case of TIFF.")},
+	{EXIF_TAG_INTER_COLOR_PROFILE, "InterColorProfile",
+	 "InterColorProfile", ""},
+	{EXIF_TAG_EXPOSURE_PROGRAM, "ExposureProgram", "ExposureProgram",
+	 N_("The class of the program used by the camera to set exposure "
+	    "when the picture is taken.")},
+	{EXIF_TAG_SPECTRAL_SENSITIVITY, "SpectralSensitivity",
+	 N_("Spectral Sensitivity"),
+	 N_("Indicates the spectral sensitivity of each channel of the "
+	    "camera used. The tag value is an ASCII string compatible "
+	    "with the standard developed by the ASTM Technical committee.")},
+	{EXIF_TAG_GPS_INFO_IFD_POINTER, "GPSInfoIFDPointer", 
+	 "GPSInfoIFDPointer",
+	 N_("A pointer to the GPS Info IFD. The "
+	    "Interoperability structure of the GPS Info IFD, like that of "
+	    "Exif IFD, has no image data.")},
+#if 0
+	{EXIF_TAG_GPS_VERSION_ID, "GPSVersionID", "",
+	 N_("Indicates the version of <GPSInfoIFD>. The version is given "
+	    "as 2.0.0.0. This tag is mandatory when <GPSInfo> tag is "
+	    "present. (Note: The <GPSVersionID tag is given in bytes, "
+	    "unlike the <ExifVersion> tag. When the version is "
+	    "2.0.0.0, the tag value is 02000000.H).")},
+	{EXIF_TAG_GPS_LATITUDE_REF, "GPSLatitudeRef", ""
+	 N_("Indicates whether the latitude is north or south latitude. The "
+	    "ASCII value 'N' indicates north latitude, and 'S' is south "
+	    "latitude.")},
+	{EXIF_TAG_GPS_LATITUDE, "GPSLatitude", ""
+	 N_("Indicates the latitude. The latitude is expressed as three "
+	    "RATIONAL values giving the degrees, minutes, and seconds, "
+	    "respectively. When degrees, minutes and seconds are expressed, "
+	    "the format is dd/1,mm/1,ss/1. When degrees and minutes are used "
+	    "and, for example, fractions of minutes are given up to two "
+	    "two decimal places, the format is dd/1,mmmm/100,0/1.")},
+	{EXIF_TAG_GPS_LONGITUDE_REF, "GPSLongitudeRef", ""
+	 N_("Indicates whether the longitude is east or west longitude. "
+	    "ASCII 'E' indicates east longitude, and 'W' is west "
+	    "longitude.")},
+	{EXIF_TAG_GPS_LONGITUDE, "GPSLongitude", ""
+	 N_("Indicates the longitude. The longitude is expressed as three "
+	    "RATIONAL values giving the degrees, minutes, and seconds, "
+	    "respectively. When degrees, minutes and seconds are expressed, "
+	    "the format is ddd/1,mm/1,ss/1. When degrees and minutes are "
+	    "used and, for example, fractions of minutes are given up to "
+	    "two decimal places, the format is ddd/1,mmmm/100,0/1.")}, 
+#endif
+	{EXIF_TAG_ISO_SPEED_RATINGS, "ISOSpeedRatings",
+	 N_("ISO Speed Ratings"),
+	 N_("Indicates the ISO Speed and ISO Latitude of the camera or "
+	    "input device as specified in ISO 12232.")},
+	{EXIF_TAG_OECF, "OECF", "OECF",
+	 N_("Indicates the Opto-Electoric Conversion Function (OECF) "
+	    "specified in ISO 14524. <OECF> is the relationship between "
+	    "the camera optical input and the image values.")},
+	{EXIF_TAG_EXIF_VERSION, "ExifVersion", N_("Exif Version"),
+	 N_("The version of this standard supported. Nonexistence of this "
+	    "field is taken to mean nonconformance to the standard.")},
+	{EXIF_TAG_DATE_TIME_ORIGINAL, "DateTimeOriginal",
+	 N_("Date and Time (original)"),
+	 N_("The date and time when the original image data was generated. "
+	    "For a digital still camera "
+	    "the date and time the picture was taken are recorded.")},
+	{EXIF_TAG_DATE_TIME_DIGITIZED, "DateTimeDigitized",
+	 N_("Date and Time (digitized)"),
+	 N_("The date and time when the image was stored as digital data. ")},
+	{EXIF_TAG_COMPONENTS_CONFIGURATION, "ComponentsConfiguration",
+	 "ComponentsConfiguration",
+	 N_("Information specific to compressed data. The channels of "
+	    "each component are arranged in order from the 1st "
+	    "component to the 4th. For uncompressed data the data "
+	    "arrangement is given in the <PhotometricInterpretation> tag. "
+	    "However, since <PhotometricInterpretation> can only "
+	    "express the order of Y, Cb and Cr, this tag is provided "
+	    "for cases when compressed data uses components other than "
+	    "Y, Cb, and Cr and to enable support of other sequences.")},
+	{EXIF_TAG_COMPRESSED_BITS_PER_PIXEL, "CompressedBitsPerPixel",
+	 N_("Compressed Bits per Pixel"),
+	 N_("Information specific to compressed data. The compression mode "
+	    "used for a compressed image is indicated in unit bits "
+	    "per pixel.")},
+	{EXIF_TAG_SHUTTER_SPEED_VALUE, "ShutterSpeedValue", N_("Shutter speed"),
+	 N_("Shutter speed. The unit is the APEX (Additive System of "
+	    "Photographic Exposure) setting (see Appendix C).")},
+	{EXIF_TAG_APERTURE_VALUE, "ApertureValue", N_("Aperture"),
+	 N_("The lens aperture. The unit is the APEX value.")},
+	{EXIF_TAG_BRIGHTNESS_VALUE, "BrightnessValue", N_("Brightness"),
+	 N_("The value of brightness. The unit is the APEX value. "
+	    "Ordinarily it is given in the range of -99.99 to 99.99.")},
+	{EXIF_TAG_EXPOSURE_BIAS_VALUE, "ExposureBiasValue",
+	 N_("Exposure Bias"),
+	 N_("The exposure bias. The units is the APEX value. Ordinarily "
+	    "it is given in the range of -99.99 to 99.99.")},
+	{EXIF_TAG_MAX_APERTURE_VALUE, "MaxApertureValue", "MaxApertureValue",
+	 N_("The smallest F number of the lens. The unit is the APEX value. "
+	    "Ordinarily it is given in the range of 00.00 to 99.99, "
+	    "but it is not limited to this range.")},
+	{EXIF_TAG_SUBJECT_DISTANCE, "SubjectDistance",
+	 N_("Subject Distance"),
+	 N_("The distance to the subject, given in meters.")},
+	{EXIF_TAG_METERING_MODE, "MeteringMode", N_("Metering Mode"),
+	 N_("The metering mode.")},
+	{EXIF_TAG_LIGHT_SOURCE, "LightSource", N_("Light Source"),
+	 N_("The kind of light source.")},
+	{EXIF_TAG_FLASH, "Flash", N_("Flash"),
+	 N_("This tag is recorded when an image is taken using a strobe "
+	    "light (flash).")},
+	{EXIF_TAG_FOCAL_LENGTH, "FocalLength", N_("Focal Length"),
+	 N_("The actual focal length of the lens, in mm. Conversion is not "
+	    "made to the focal length of a 35 mm film camera.")},
+	{EXIF_TAG_MAKER_NOTE, "MakerNote", N_("Maker Note"),
+	 N_("A tag for manufacturers of Exif writers to record any desired "
+	    "information. The contents are up to the manufacturer.")},
+	{EXIF_TAG_USER_COMMENT, "UserComment", N_("User Comment"),
+	 N_("A tag for Exif users to write keywords or comments on the image "
+	    "besides those in <ImageDescription>, and without the "
+	    "character code limitations of the <ImageDescription> tag. The "
+	    "character code used in the <UserComment> tag is identified "
+	    "based on an ID code in a fixed 8-byte area at the start of "
+	    "the tag data area. The unused portion of the area is padded "
+	    "with NULL (\"00.h\"). ID codes are assigned by means of "
+	    "registration. The designation method and references for each "
+	    "character code are given in Table 6. The value of CountN "
+	    "is determinated based on the 8 bytes in the character code "
+	    "area and the number of bytes in the user comment part. Since "
+	    "the TYPE is not ASCII, NULL termination is not necessary "
+	    "(see Fig. 9). "
+	    "The ID code for the <UserComment> area may be a Defined code "
+	    "such as JIS or ASCII, or may be Undefined. The Undefined name "
+	    "is UndefinedText, and the ID code is filled with 8 bytes of all "
+	    "\"NULL\" (\"00.H\"). An Exif reader that reads the "
+	    "<UserComment> tag must have a function for determining the "
+	    "ID code. This function is not required in Exif readers that "
+	    "do not use the <UserComment> tag (see Table 7). "
+	    "When a <UserComment> area is set aside, it is recommended that "
+	    "the ID code be ASCII and that the following user comment "
+	    "part be filled with blank characters [20.H].")},
+	{EXIF_TAG_SUB_SEC_TIME, "SubsecTime", "SubsecTime",
+	 N_("A tag used to record fractions of seconds for the "
+	    "<DateTime> tag.")},
+	{EXIF_TAG_SUB_SEC_TIME_ORIGINAL, "SubSecTimeOriginal",
+	 "SubSecTimeOriginal",
+	 N_("A tag used to record fractions of seconds for the "
+	    "<DateTimeOriginal> tag.")},
+	{EXIF_TAG_SUB_SEC_TIME_DIGITIZED, "SubSecTimeDigitized",
+	 "SubSecTimeDigitized",
+	 N_("A tag used to record fractions of seconds for the "
+	    "<DateTimeDigitized> tag.")},
+	{EXIF_TAG_FLASH_PIX_VERSION, "FlashPixVersion", "FlashPixVersion",
+	 N_("The FlashPix format version supported by a FPXR file.")},
+	{EXIF_TAG_COLOR_SPACE, "ColorSpace", N_("Color Space"),
+	 N_("The color space information tag is always "
+	    "recorded as the color space specifier. Normally sRGB (=1) "
+	    "is used to define the color space based on the PC monitor "
+	    "conditions and environment. If a color space other than "
+	    "sRGB is used, Uncalibrated (=FFFF.H) is set. Image data "
+	    "recorded as Uncalibrated can be treated as sRGB when it is "
+	    "converted to FlashPix. On sRGB see Appendix E.")},
+	{EXIF_TAG_PIXEL_X_DIMENSION, "PixelXDimension", "PixelXDimension",
+	 N_("Information specific to compressed data. When a "
+	    "compressed file is recorded, the valid width of the "
+	    "meaningful image must be recorded in this tag, whether or "
+	    "not there is padding data or a restart marker. This tag "
+	    "should not exist in an uncompressed file. For details see "
+	    "section 2.8.1 and Appendix F.")},
+	{EXIF_TAG_PIXEL_Y_DIMENSION, "PixelYDimension", "PixelYDimension",
+	 N_("Information specific to compressed data. When a compressed "
+	    "file is recorded, the valid height of the meaningful image "
+	    "must be recorded in this tag, whether or not there is padding "
+	    "data or a restart marker. This tag should not exist in an "
+	    "uncompressed file. For details see section 2.8.1 and Appendix "
+	    "F. Since data padding is unnecessary in the vertical direction, "
+	    "the number of lines recorded in this valid image height tag "
+	    "will in fact be the same as that recorded in the SOF.")},
+	{EXIF_TAG_RELATED_SOUND_FILE, "RelatedSoundFile",
+	 "RelatedSoundFile",
+	 N_("This tag is used to record the name of an audio file related "
+	    "to the image data. The only relational information "
+	    "recorded here is the Exif audio file name and extension (an "
+	    "ASCII string consisting of 8 characters + '.' + 3 "
+	    "characters). The path is not recorded. Stipulations on audio "
+	    "are given in  section 3.6.3. File naming conventions are "
+	    "given in section 3.7.1. "
+	    "When using this tag, audio files must be recorded in "
+	    "conformance to the Exif audio format. Writers are also allowed "
+	    "to store the data such as Audio within APP2 as FlashPix "
+	    "extension stream data. "
+	    "Audio files must be recorded in conformance to the Exif audio "
+	    "format. The mapping of Exif image files and audio files is done "
+	    "in any of the three ways shown in Table 8. If multiple files "
+	    "are mapped to one file as in [2] or [3] of this table, the above "
+	    "format is used to record just one audio file name. If "
+	    "there are multiple audio files, the first recorded file is "
+	    "given. In the case of [3] in Table 8, for example, for the "
+	    "Exif image file \"DSC00001.JPG\" only  \"SND00001.WAV\" is "
+	    "given as the related Exif audio file. When there are three "
+	    "Exif audio files \"SND00001.WAV\", \"SND00002.WAV\" and "
+	    "\"SND00003.WAV\", the Exif image file name for each of them, "
+	    "\"DSC00001.JPG\", is indicated. By combining multiple "
+	    "relational information, a variety of playback possibilities "
+	    "can be supported. The method of using relational information "
+	    "is left to the implementation on the playback side. Since this "
+	    "information is an ASCII character string, it is terminated by "
+	    "NULL. When this tag is used to map audio files, the relation "
+	    "of the audio file to image data must also be indicated on the "
+	    "audio file end.")},
+	{EXIF_TAG_INTEROPERABILITY_IFD_POINTER, "InteroperabilityIFDPointer",
+	 "InteroperabilityIFDPointer",
+	 N_("Interoperability IFD is composed of tags which stores the "
+	    "information to ensure the Interoperability and pointed "
+	    "by the following tag located in Exif IFD. "
+	    "The Interoperability structure of Interoperability IFD is "
+	    "the same as TIFF defined IFD structure "
+	    "but does not contain the "
+	    "image data characteristically compared with normal TIFF "
+	    "IFD.")},
+	{EXIF_TAG_FLASH_ENERGY, "FlashEnergy", N_("Flash Energy"),
+	 N_("Indicates the strobe energy at the time the image is "
+	    "captured, as measured in Beam Candle Power Seconds (BCPS).")},
+	{EXIF_TAG_SPATIAL_FREQUENCY_RESPONSE, "SpatialFrequencyResponse",
+	 N_("Spatial Frequency Response"),
+	 N_("This tag records the camera or input device spatial frequency "
+	    "table and SFR values in the direction of image width, "
+	    "image height, and diagonal direction, as specified in ISO "
+	    "12233.")},
+	{EXIF_TAG_FOCAL_PLANE_X_RESOLUTION, "FocalPlaneXResolution",
+	 N_("Focal Plane x-Resolution"),
+	 N_("Indicates the number of pixels in the image width (X) direction "
+	    "per <FocalPlaneResolutionUnit> on the camera focal plane.")},
+	{EXIF_TAG_FOCAL_PLANE_Y_RESOLUTION, "FocalPlaneYResolution",
+	 N_("Focal Plane y-Resolution"),
+	 N_("Indicates the number of pixels in the image height (V) direction "
+	    "per <FocalPlaneResolutionUnit> on the camera focal plane.")},
+	{EXIF_TAG_FOCAL_PLANE_RESOLUTION_UNIT, "FocalPlaneResolutionUnit",
+	 N_("Focal Plane Resolution Unit"),
+	 N_("Indicates the unit for measuring <FocalPlaneXResolution> and "
+	    "<FocalPlaneYResolution>. This value is the same as the "
+	    "<ResolutionUnit>.")},
+	{EXIF_TAG_SUBJECT_LOCATION, "SubjectLocation",
+	 N_("Subject Location"),
+	 N_("Indicates the location of the main subject in the scene. The "
+	    "value of this tag represents the pixel at the center of the "
+	    "main subject relative to the left edge, prior to rotation "
+	    "processing as per the <Rotation> tag. The first value "
+	    "indicates the X column number and second indicates "
+	    "the Y row number.")},
+	{EXIF_TAG_EXPOSURE_INDEX, "ExposureIndex", N_("Exposure index"),
+	 N_("Indicates the exposure index selected on the camera or "
+	    "input device at the time the image is captured.")},
+	{EXIF_TAG_SENSING_METHOD, "SensingMethod", N_("Sensing Method"),
+	 N_("Indicates the image sensor type on the camera or input "
+	    "device.")},
+	{EXIF_TAG_FILE_SOURCE, "FileSource", N_("File Source"),
+	 N_("Indicates the image source. If a DSC recorded the image, "
+	    "this tag value of this tag always be set to 3, indicating "
+	    "that the image was recorded on a DSC.")},
+	{EXIF_TAG_SCENE_TYPE, "SceneType", N_("Scene Type"),
+	 N_("Indicates the type of scene. If a DSC recorded the image, "
+	    "this tag value must always be set to 1, indicating that the "
+	    "image was directly photographed.")},
+	{EXIF_TAG_NEW_CFA_PATTERN, "CFAPattern",
+	 N_("CFA Pattern"),
+	 N_("Indicates the color filter array (CFA) geometric pattern of the "
+	    "image sensor when a one-chip color area sensor is used. "
+	    "It does not apply to all sensing methods.")},
+	{EXIF_TAG_SUBJECT_AREA, "SubjectArea", N_("Subject Area"),
+	 N_("This tag indicates the location and area of the main subject "
+	    "in the overall scene.")},
+	{EXIF_TAG_TIFF_EP_STANDARD_ID, "TIFF/EPStandardID", N_("TIFF/EP Standard ID"), ""},
+	{EXIF_TAG_CUSTOM_RENDERED, "CustomRendered", N_("Custom Rendered"),
+	 N_("This tag indicates the use of special processing on image "
+	    "data, such as rendering geared to output. When special "
+	    "processing is performed, the reader is expected to disable "
+	    "or minimize any further processing.")},
+	{EXIF_TAG_EXPOSURE_MODE, "ExposureMode", N_("Exposure Mode"),
+	 N_("This tag indicates the exposure mode set when the image was "
+	    "shot. In auto-bracketing mode, the camera shoots a series of "
+	    "frames of the same scene at different exposure settings.")},
+	{EXIF_TAG_WHITE_BALANCE, "WhiteBalance", N_("White Balance"),
+	 N_("This tag indicates the white balance mode set when the image "
+	    "was shot.")},
+	{EXIF_TAG_DIGITAL_ZOOM_RATIO, "DigitalZoomRatio",
+	 N_("Digital Zoom Ratio"),
+	 N_("This tag indicates the digital zoom ratio when the image was "
+	    "shot. If the numerator of the recorded value is 0, this "
+	    "indicates that digital zoom was not used.")},
+	{EXIF_TAG_FOCAL_LENGTH_IN_35MM_FILM, "FocalLengthIn35mmFilm",
+	 N_("Focal Length In 35mm Film"),
+	 N_("This tag indicates the equivalent focal length assuming a "
+	    "35mm film camera, in mm. A value of 0 means the focal "
+	    "length is unknown. Note that this tag differs from the "
+	    "FocalLength tag.")},
+	{EXIF_TAG_SCENE_CAPTURE_TYPE, "SceneCaptureType",
+	 N_("Scene Capture Type"),
+	 N_("This tag indicates the type of scene that was shot. It can "
+	    "also be used to record the mode in which the image was "
+	    "shot. Note that this differs from the scene type "
+	    "(SceneType) tag.")},
+	{EXIF_TAG_GAIN_CONTROL, "GainControl", N_("Gain Control"),
+	 N_("This tag indicates the degree of overall image gain "
+	    "adjustment.")},
+	{EXIF_TAG_CONTRAST, "Contrast", N_("Contrast"),
+	 N_("This tag indicates the direction of contrast processing "
+	    "applied by the camera when the image was shot.")},
+	{EXIF_TAG_SATURATION, "Saturation", N_("Saturation"),
+	 N_("This tag indicates the direction of saturation processing "
+	    "applied by the camera when the image was shot.")},
+	{EXIF_TAG_SHARPNESS, "Sharpness", N_("Sharpness"),
+	 N_("This tag indicates the direction of sharpness processing "
+	    "applied by the camera when the image was shot.")},
+	{EXIF_TAG_DEVICE_SETTING_DESCRIPTION, "DeviceSettingDescription",
+	 N_("Device Setting Description"),
+	 N_("This tag indicates information on the picture-taking "
+	    "conditions of a particular camera model. The tag is used "
+	    "only to indicate the picture-taking conditions in the "
+	    "reader.")},
+	{EXIF_TAG_SUBJECT_DISTANCE_RANGE, "SubjectDistanceRange",
+	 N_("Subject Distance Range"),
+	 N_("This tag indicates the distance to the subject.")},
+	{EXIF_TAG_IMAGE_UNIQUE_ID, "ImageUniqueID", N_("Image Unique ID"),
+	 N_("This tag indicates an identifier assigned uniquely to "
+	    "each image. It is recorded as an ASCII string equivalent "
+	    "to hexadecimal notation and 128-bit fixed length.")},
+	{0, NULL, NULL, NULL}
+};
+
+/* For now, do not use these functions. */
+ExifTag      exif_tag_table_get_tag  (unsigned int n);
+const char  *exif_tag_table_get_name (unsigned int n);
+unsigned int exif_tag_table_count    (void);
+
+ExifTag
+exif_tag_table_get_tag (unsigned int n)
+{
+	return (n < exif_tag_table_count ()) ? ExifTagTable[n].tag : 0;
+}
+
+const char *
+exif_tag_table_get_name (unsigned int n)
+{
+	return (n < exif_tag_table_count ()) ? ExifTagTable[n].name : NULL;
+}
+
+unsigned int
+exif_tag_table_count (void)
+{
+	return sizeof (ExifTagTable) / sizeof (ExifTagTable[0]);
+}
+
+const char *
+exif_tag_get_name (ExifTag tag)
+{
+	unsigned int i;
+
+	for (i = 0; ExifTagTable[i].name; i++)
+		if (ExifTagTable[i].tag == tag)
+			break;
+
+	return ExifTagTable[i].name;
+}
+
+const char *
+exif_tag_get_title (ExifTag tag)
+{
+	unsigned int i;
+
+	/* FIXME: This belongs to somewhere else. */
+	bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+
+	for (i = 0; ExifTagTable[i].title; i++)
+		if (ExifTagTable[i].tag == tag) break;
+	return _(ExifTagTable[i].title);
+}
+
+const char *
+exif_tag_get_description (ExifTag tag)
+{
+	unsigned int i;
+
+	bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8");
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+
+	for (i = 0; ExifTagTable[i].description; i++)
+		if (ExifTagTable[i].tag == tag) break;
+	return _(ExifTagTable[i].description);
+}
+
+ExifTag 
+exif_tag_from_name (const char *name)
+{
+	unsigned int i;
+
+	if (!name) return 0;
+
+	for (i = 0; ExifTagTable[i].name; i++)
+		if (!strcmp (ExifTagTable[i].name, name)) break;
+	return ExifTagTable[i].tag;
+}
+
+const char *
+exif_tag_get_name_index (unsigned int i, ExifTag *tag)
+{
+	if (!ExifTagTable[i].name)
+		return NULL;
+
+	*tag = ExifTagTable[i].tag;
+
+	return (ExifTagTable[i].name);
+}
+
diff --git a/src/libexif/exif-tag.h b/src/libexif/exif-tag.h
new file mode 100644
index 0000000..f928374
--- /dev/null
+++ b/src/libexif/exif-tag.h
@@ -0,0 +1,148 @@
+/* exif-tag.h
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_TAG_H__
+#define __EXIF_TAG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+typedef enum {
+	EXIF_TAG_INTEROPERABILITY_INDEX		= 0x0001,
+	EXIF_TAG_INTEROPERABILITY_VERSION	= 0x0002,
+	EXIF_TAG_NEW_SUBFILE_TYPE		= 0x00fe,
+	EXIF_TAG_IMAGE_WIDTH 			= 0x0100,
+	EXIF_TAG_IMAGE_LENGTH 			= 0x0101,
+	EXIF_TAG_BITS_PER_SAMPLE 		= 0x0102,
+	EXIF_TAG_COMPRESSION 			= 0x0103,
+	EXIF_TAG_PHOTOMETRIC_INTERPRETATION 	= 0x0106,
+	EXIF_TAG_FILL_ORDER 			= 0x010a,
+	EXIF_TAG_DOCUMENT_NAME 			= 0x010d,
+	EXIF_TAG_IMAGE_DESCRIPTION 		= 0x010e,
+	EXIF_TAG_MAKE 				= 0x010f,
+	EXIF_TAG_MODEL 				= 0x0110,
+	EXIF_TAG_STRIP_OFFSETS 			= 0x0111,
+	EXIF_TAG_ORIENTATION 			= 0x0112,
+	EXIF_TAG_SAMPLES_PER_PIXEL 		= 0x0115,
+	EXIF_TAG_ROWS_PER_STRIP 		= 0x0116,
+	EXIF_TAG_STRIP_BYTE_COUNTS		= 0x0117,
+	EXIF_TAG_X_RESOLUTION 			= 0x011a,
+	EXIF_TAG_Y_RESOLUTION 			= 0x011b,
+	EXIF_TAG_PLANAR_CONFIGURATION 		= 0x011c,
+	EXIF_TAG_RESOLUTION_UNIT 		= 0x0128,
+	EXIF_TAG_TRANSFER_FUNCTION 		= 0x012d,
+	EXIF_TAG_SOFTWARE 			= 0x0131,
+	EXIF_TAG_DATE_TIME			= 0x0132,
+	EXIF_TAG_ARTIST				= 0x013b,
+	EXIF_TAG_WHITE_POINT			= 0x013e,
+	EXIF_TAG_PRIMARY_CHROMATICITIES		= 0x013f,
+	EXIF_TAG_TRANSFER_RANGE			= 0x0156,
+	EXIF_TAG_SUB_IFDS			= 0x014a,
+	EXIF_TAG_JPEG_PROC			= 0x0200,
+	EXIF_TAG_JPEG_INTERCHANGE_FORMAT	= 0x0201,
+	EXIF_TAG_JPEG_INTERCHANGE_FORMAT_LENGTH	= 0x0202,
+	EXIF_TAG_YCBCR_COEFFICIENTS		= 0x0211,
+	EXIF_TAG_YCBCR_SUB_SAMPLING		= 0x0212,
+	EXIF_TAG_YCBCR_POSITIONING		= 0x0213,
+	EXIF_TAG_REFERENCE_BLACK_WHITE		= 0x0214,
+	EXIF_TAG_XML_PACKET			= 0x02bc,
+	EXIF_TAG_RELATED_IMAGE_FILE_FORMAT	= 0x1000,
+	EXIF_TAG_RELATED_IMAGE_WIDTH		= 0x1001,
+	EXIF_TAG_RELATED_IMAGE_LENGTH		= 0x1002,
+	EXIF_TAG_CFA_REPEAT_PATTERN_DIM		= 0x828d,
+	EXIF_TAG_CFA_PATTERN			= 0x828e,
+	EXIF_TAG_BATTERY_LEVEL			= 0x828f,
+	EXIF_TAG_COPYRIGHT			= 0x8298,
+	EXIF_TAG_EXPOSURE_TIME			= 0x829a,
+	EXIF_TAG_FNUMBER			= 0x829d,
+	EXIF_TAG_IPTC_NAA			= 0x83bb,
+	EXIF_TAG_IMAGE_RESOURCES		= 0x8649,
+	EXIF_TAG_EXIF_IFD_POINTER		= 0x8769,
+	EXIF_TAG_INTER_COLOR_PROFILE		= 0x8773,
+	EXIF_TAG_EXPOSURE_PROGRAM		= 0x8822,
+	EXIF_TAG_SPECTRAL_SENSITIVITY		= 0x8824,
+	EXIF_TAG_GPS_INFO_IFD_POINTER		= 0x8825,
+	EXIF_TAG_ISO_SPEED_RATINGS		= 0x8827,
+	EXIF_TAG_OECF				= 0x8828,
+	EXIF_TAG_EXIF_VERSION			= 0x9000,
+	EXIF_TAG_DATE_TIME_ORIGINAL		= 0x9003,
+	EXIF_TAG_DATE_TIME_DIGITIZED		= 0x9004,
+	EXIF_TAG_COMPONENTS_CONFIGURATION	= 0x9101,
+	EXIF_TAG_COMPRESSED_BITS_PER_PIXEL	= 0x9102,
+	EXIF_TAG_SHUTTER_SPEED_VALUE		= 0x9201,
+	EXIF_TAG_APERTURE_VALUE			= 0x9202,
+	EXIF_TAG_BRIGHTNESS_VALUE		= 0x9203,
+	EXIF_TAG_EXPOSURE_BIAS_VALUE		= 0x9204,
+	EXIF_TAG_MAX_APERTURE_VALUE		= 0x9205,
+	EXIF_TAG_SUBJECT_DISTANCE		= 0x9206,
+	EXIF_TAG_METERING_MODE			= 0x9207,
+	EXIF_TAG_LIGHT_SOURCE			= 0x9208,
+	EXIF_TAG_FLASH				= 0x9209,
+	EXIF_TAG_FOCAL_LENGTH			= 0x920a,
+	EXIF_TAG_SUBJECT_AREA			= 0x9214,
+	EXIF_TAG_TIFF_EP_STANDARD_ID		= 0x9216,
+	EXIF_TAG_MAKER_NOTE			= 0x927c,
+	EXIF_TAG_USER_COMMENT			= 0x9286,
+	EXIF_TAG_SUB_SEC_TIME			= 0x9290,
+	EXIF_TAG_SUB_SEC_TIME_ORIGINAL		= 0x9291,
+	EXIF_TAG_SUB_SEC_TIME_DIGITIZED		= 0x9292,
+	EXIF_TAG_FLASH_PIX_VERSION		= 0xa000,
+	EXIF_TAG_COLOR_SPACE			= 0xa001,
+	EXIF_TAG_PIXEL_X_DIMENSION		= 0xa002,
+	EXIF_TAG_PIXEL_Y_DIMENSION		= 0xa003,
+	EXIF_TAG_RELATED_SOUND_FILE		= 0xa004,
+	EXIF_TAG_INTEROPERABILITY_IFD_POINTER	= 0xa005,
+	EXIF_TAG_FLASH_ENERGY			= 0xa20b,
+	EXIF_TAG_SPATIAL_FREQUENCY_RESPONSE	= 0xa20c,
+	EXIF_TAG_FOCAL_PLANE_X_RESOLUTION	= 0xa20e,
+	EXIF_TAG_FOCAL_PLANE_Y_RESOLUTION	= 0xa20f,
+	EXIF_TAG_FOCAL_PLANE_RESOLUTION_UNIT	= 0xa210,
+	EXIF_TAG_SUBJECT_LOCATION		= 0xa214,
+	EXIF_TAG_EXPOSURE_INDEX			= 0xa215,
+	EXIF_TAG_SENSING_METHOD			= 0xa217,
+	EXIF_TAG_FILE_SOURCE			= 0xa300,
+	EXIF_TAG_SCENE_TYPE			= 0xa301,
+	EXIF_TAG_NEW_CFA_PATTERN		= 0xa302,
+	EXIF_TAG_CUSTOM_RENDERED		= 0xa401,
+	EXIF_TAG_EXPOSURE_MODE			= 0xa402,
+	EXIF_TAG_WHITE_BALANCE			= 0xa403,
+	EXIF_TAG_DIGITAL_ZOOM_RATIO		= 0xa404,
+	EXIF_TAG_FOCAL_LENGTH_IN_35MM_FILM	= 0xa405,
+	EXIF_TAG_SCENE_CAPTURE_TYPE		= 0xa406,
+	EXIF_TAG_GAIN_CONTROL			= 0xa407,
+	EXIF_TAG_CONTRAST			= 0xa408,
+	EXIF_TAG_SATURATION			= 0xa409,
+	EXIF_TAG_SHARPNESS			= 0xa40a,
+	EXIF_TAG_DEVICE_SETTING_DESCRIPTION	= 0xa40b,
+	EXIF_TAG_SUBJECT_DISTANCE_RANGE		= 0xa40c,
+	EXIF_TAG_IMAGE_UNIQUE_ID		= 0xa420
+} ExifTag;
+
+ExifTag         exif_tag_from_name       (const char *);
+const char     *exif_tag_get_name        (ExifTag tag);
+const char     *exif_tag_get_title       (ExifTag tag);
+const char     *exif_tag_get_description (ExifTag tag);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_TAG_H__ */
diff --git a/src/libexif/exif-utils.c b/src/libexif/exif-utils.c
new file mode 100644
index 0000000..9386012
--- /dev/null
+++ b/src/libexif/exif-utils.c
@@ -0,0 +1,214 @@
+/* exif-utils.c
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#include <libexif/exif-utils.h>
+
+void
+exif_array_set_byte_order (ExifFormat f, unsigned char *b, unsigned int n,
+		ExifByteOrder o_orig, ExifByteOrder o_new)
+{
+	unsigned int j;
+	unsigned int fs = exif_format_get_size (f);
+	ExifShort s;
+	ExifSShort ss;
+	ExifLong l;
+	ExifSLong sl;
+	ExifRational r;
+	ExifSRational sr;
+
+	if (!b || !n || !fs) return;
+
+	switch (f) {
+	case EXIF_FORMAT_SHORT:
+		for (j = 0; j < n; j++) {
+			s = exif_get_short (b + j * fs, o_orig);
+			exif_set_short (b + j * fs, o_new, s);
+		}
+		break;
+	case EXIF_FORMAT_SSHORT:
+		for (j = 0; j < n; j++) {
+			ss = exif_get_sshort (b + j * fs, o_orig);
+			exif_set_sshort (b + j * fs, o_new, ss);
+		}
+		break;
+	case EXIF_FORMAT_LONG:
+		for (j = 0; j < n; j++) {
+			l = exif_get_long (b + j * fs, o_orig);
+			exif_set_long (b + j * fs, o_new, l);
+		}
+		break;
+	case EXIF_FORMAT_RATIONAL:
+		for (j = 0; j < n; j++) {
+			r = exif_get_rational (b + j * fs, o_orig);
+			exif_set_rational (b + j * fs, o_new, r);
+		}
+		break;
+	case EXIF_FORMAT_SLONG:
+		for (j = 0; j < n; j++) {
+			sl = exif_get_slong (b + j * fs, o_orig);
+			exif_set_slong (b + j * fs, o_new, sl);
+		}
+		break;
+	case EXIF_FORMAT_SRATIONAL:
+		for (j = 0; j < n; j++) {
+			sr = exif_get_srational (b + j * fs, o_orig);
+			exif_set_srational (b + j * fs, o_new, sr);
+		}
+		break;
+	case EXIF_FORMAT_UNDEFINED:
+	case EXIF_FORMAT_BYTE:
+	case EXIF_FORMAT_ASCII:
+	default:
+		/* Nothing here. */
+		break;
+	}
+}
+
+ExifSShort
+exif_get_sshort (const unsigned char *buf, ExifByteOrder order)
+{
+	if (!buf) return 0;
+        switch (order) {
+        case EXIF_BYTE_ORDER_MOTOROLA:
+                return ((buf[0] << 8) | buf[1]);
+        case EXIF_BYTE_ORDER_INTEL:
+                return ((buf[1] << 8) | buf[0]);
+        }
+
+	/* Won't be reached */
+	return (0);
+}
+
+ExifShort
+exif_get_short (const unsigned char *buf, ExifByteOrder order)
+{
+	return (exif_get_sshort (buf, order) & 0xffff);
+}
+
+void
+exif_set_sshort (unsigned char *b, ExifByteOrder order, ExifSShort value)
+{
+	if (!b) return;
+	switch (order) {
+	case EXIF_BYTE_ORDER_MOTOROLA:
+		b[0] = (unsigned char) (value >> 8);
+		b[1] = (unsigned char) value;
+		break;
+	case EXIF_BYTE_ORDER_INTEL:
+		b[0] = (unsigned char) value;
+		b[1] = (unsigned char) (value >> 8);
+		break;
+	}
+}
+
+void
+exif_set_short (unsigned char *b, ExifByteOrder order, ExifShort value)
+{
+	exif_set_sshort (b, order, value);
+}
+
+ExifSLong
+exif_get_slong (const unsigned char *b, ExifByteOrder order)
+{
+	if (!b) return 0;
+        switch (order) {
+        case EXIF_BYTE_ORDER_MOTOROLA:
+                return ((b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]);
+        case EXIF_BYTE_ORDER_INTEL:
+                return ((b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0]);
+        }
+
+	/* Won't be reached */
+	return (0);
+}
+
+void
+exif_set_slong (unsigned char *b, ExifByteOrder order, ExifSLong value)
+{
+	if (!b) return;
+	switch (order) {
+	case EXIF_BYTE_ORDER_MOTOROLA:
+		b[0] = (unsigned char) (value >> 24);
+		b[1] = (unsigned char) (value >> 16);
+		b[2] = (unsigned char) (value >> 8);
+		b[3] = (unsigned char) value;
+		break;
+	case EXIF_BYTE_ORDER_INTEL:
+		b[3] = (unsigned char) (value >> 24);
+		b[2] = (unsigned char) (value >> 16);
+		b[1] = (unsigned char) (value >> 8);
+		b[0] = (unsigned char) value;
+		break;
+	}
+}
+
+ExifLong
+exif_get_long (const unsigned char *buf, ExifByteOrder order)
+{
+        return (exif_get_slong (buf, order) & 0xffffffff);
+}
+
+void
+exif_set_long (unsigned char *b, ExifByteOrder order, ExifLong value)
+{
+	exif_set_slong (b, order, value);
+}
+
+ExifSRational
+exif_get_srational (const unsigned char *buf, ExifByteOrder order)
+{
+	ExifSRational r;
+
+	r.numerator   = buf ? exif_get_slong (buf, order) : 0;
+	r.denominator = buf ? exif_get_slong (buf + 4, order) : 0;
+
+	return (r);
+}
+
+ExifRational
+exif_get_rational (const unsigned char *buf, ExifByteOrder order)
+{
+	ExifRational r;
+
+	r.numerator   = buf ? exif_get_long (buf, order) : 0;
+	r.denominator = buf ? exif_get_long (buf + 4, order) : 0;
+
+	return (r);
+}
+
+void
+exif_set_rational (unsigned char *buf, ExifByteOrder order,
+		   ExifRational value)
+{
+	if (!buf) return;
+	exif_set_long (buf, order, value.numerator);
+	exif_set_long (buf + 4, order, value.denominator);
+}
+
+void
+exif_set_srational (unsigned char *buf, ExifByteOrder order,
+		    ExifSRational value)
+{
+	if (!buf) return;
+	exif_set_slong (buf, order, value.numerator);
+	exif_set_slong (buf + 4, order, value.denominator);
+}
diff --git a/src/libexif/exif-utils.h b/src/libexif/exif-utils.h
new file mode 100644
index 0000000..a75db0b
--- /dev/null
+++ b/src/libexif/exif-utils.h
@@ -0,0 +1,81 @@
+/* exif-utils.h
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_UTILS_H__
+#define __EXIF_UTILS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-format.h>
+#include <libexif/_stdint.h>
+
+
+/* If these definitions don't work for you, please let us fix the 
+ * macro generating _stdint.h */
+	
+typedef char		ExifByte;          /* 1 byte  */
+typedef char *		ExifAscii;
+typedef uint16_t	ExifShort;         /* 2 bytes */
+typedef int16_t         ExifSShort;        /* 2 bytes */
+typedef uint32_t	ExifLong;          /* 4 bytes */
+typedef struct {ExifLong numerator; ExifLong denominator;} ExifRational;
+typedef char		ExifUndefined;     /* 1 byte  */
+typedef int32_t		ExifSLong;         /* 4 bytes */
+typedef struct {ExifSLong numerator; ExifSLong denominator;} ExifSRational;
+
+
+ExifShort     exif_get_short     (const unsigned char *b, ExifByteOrder order);
+ExifSShort    exif_get_sshort    (const unsigned char *b, ExifByteOrder order);
+ExifLong      exif_get_long      (const unsigned char *b, ExifByteOrder order);
+ExifSLong     exif_get_slong     (const unsigned char *b, ExifByteOrder order);
+ExifRational  exif_get_rational  (const unsigned char *b, ExifByteOrder order);
+ExifSRational exif_get_srational (const unsigned char *b, ExifByteOrder order);
+
+void exif_set_short     (unsigned char *b, ExifByteOrder order,
+			 ExifShort value);
+void exif_set_sshort    (unsigned char *b, ExifByteOrder order,
+			 ExifSShort value);
+void exif_set_long      (unsigned char *b, ExifByteOrder order,
+			 ExifLong value);
+void exif_set_slong     (unsigned char *b, ExifByteOrder order,
+			 ExifSLong value);
+void exif_set_rational  (unsigned char *b, ExifByteOrder order,
+			 ExifRational value);
+void exif_set_srational (unsigned char *b, ExifByteOrder order,
+			 ExifSRational value);
+
+/* Please do not use this function outside of the library. */
+void exif_array_set_byte_order (ExifFormat, unsigned char *, unsigned int,
+		ExifByteOrder o_orig, ExifByteOrder o_new);
+
+#undef  MIN
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
+
+/* For compatibility with older versions */
+#define EXIF_TAG_SUBSEC_TIME EXIF_TAG_SUB_SEC_TIME
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __EXIF_UTILS_H__ */
diff --git a/src/libexif/exif.c b/src/libexif/exif.c
new file mode 100644
index 0000000..3be72a3
--- /dev/null
+++ b/src/libexif/exif.c
@@ -0,0 +1,1274 @@
+/*
+
+Copyright � 2000 Matthias Wandel, The PHP Group, Curtis Galloway
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+#include <sys/time.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <math.h>
+#include <sys/stat.h>
+#include <stdarg.h>
+#include <fcntl.h>
+
+#include "exif.h"
+
+typedef unsigned char uchar;
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* 
+   This structure stores global state for an EXIF image file.
+*/
+typedef struct {
+	exif_data_t *d;
+	int MotorolaOrder;
+	const char *filename;
+
+	char *Thumbnail;
+	int ThumbnailSize;
+} ImageInfoType;
+
+void *(*exif_malloc_fn)(int);
+void *(*exif_realloc_fn)(void *, int);
+void (*exif_free_fn)(void *);
+
+static char *
+exif_strndup(char *str, int len)
+{
+	char *rval = (*exif_malloc_fn)(len+1);
+	strncpy(rval, str, len);
+	rval[len] = '\0';
+	return rval;
+}
+
+struct exif_data *
+exif_alloc(void)
+{
+	exif_data_t *d;
+
+	d = (*exif_malloc_fn)(sizeof(exif_data_t));
+	bzero(d, sizeof(*d));
+	return d;
+}
+
+static void
+exif_error(char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vprintf(fmt, ap);
+	va_end(ap);
+}
+
+
+/* This structure is used to store a section of a Jpeg file. */
+typedef struct {
+    uchar *Data;
+    int      Type;
+    unsigned Size;
+} Section_t;
+
+#define EXIT_FAILURE  1
+#define EXIT_SUCCESS  0
+
+
+/* 
+   JPEG markers consist of one or more 0xFF bytes, followed by a marker
+   code byte (which is not an FF).  Here are the marker codes of interest
+   in this program.  (See jdmarker.c for a more complete list.)
+*/
+
+#define M_SOF0  0xC0            /* Start Of Frame N                        */
+#define M_SOF1  0xC1            /* N indicates which compression process   */
+#define M_SOF2  0xC2            /* Only SOF0-SOF2 are now in common use    */
+#define M_SOF3  0xC3
+#define M_SOF5  0xC5            /* NB: codes C4 and CC are NOT SOF markers */
+#define M_SOF6  0xC6
+#define M_SOF7  0xC7
+#define M_SOF9  0xC9
+#define M_SOF10 0xCA
+#define M_SOF11 0xCB
+#define M_SOF13 0xCD
+#define M_SOF14 0xCE
+#define M_SOF15 0xCF
+#define M_SOI   0xD8            /* Start Of Image (beginning of datastream) */
+#define M_EOI   0xD9            /* End Of Image (end of datastream)         */
+#define M_SOS   0xDA            /* Start Of Scan (begins compressed data)   */
+#define M_EXIF  0xE1            
+#define M_COM   0xFE            /* COMment                                  */
+
+
+#define PSEUDO_IMAGE_MARKER 0x123; /* Extra value. */
+
+#define EXIF_ALLOC_SIZE 16
+
+/*
+ * The name gets copied, so you can pass a static string;
+ * the data is not copied, so if it is a string,
+ * you must allocate it yourself.
+ */
+static int
+exif_append_data(exif_data_t **d_p,
+				 char *name,
+				 char rec_type,
+				 int exif_format,
+				 exif_rec_data_t *data)
+{
+	exif_data_t *d = *d_p;
+
+	if (rec_type == '\0')
+		return EXIT_FAILURE;
+
+	if (d->n_alloc <= d->n_recs) {
+		d->n_alloc += EXIF_ALLOC_SIZE;
+		d = (*exif_realloc_fn)(d, sizeof(exif_data_t) +
+							   sizeof(exif_record_t) * d->n_alloc);
+		*d_p = d;
+	}
+	d->recs[d->n_recs].rec_type = rec_type;
+	bcopy(data, &d->recs[d->n_recs].rec_data, sizeof(exif_rec_data_t));
+	d->recs[d->n_recs].rec_name = (char*)strdup(name);
+	d->n_recs++;
+	return EXIT_SUCCESS;
+}
+
+/*
+   Get 16 bits motorola order (always) for jpeg header stuff.
+*/
+static int
+Get16m(void *Short)
+{
+    return (((uchar *)Short)[0] << 8) | ((uchar *)Short)[1];
+}
+
+
+/*
+   Process a COM marker.
+   We want to print out the marker contents as legible text;
+   we must guard against random junk and varying newline representations.
+*/
+static void
+process_COM (ImageInfoType *ImageInfo, uchar *Data, int length)
+{
+    int ch;
+    char *Comment;
+    int nch;
+    int a;
+    exif_rec_data_t rd;
+
+    nch = 0;
+    Comment = (*exif_malloc_fn)(length+1);
+
+    for (a=2;a<length;a++) {
+        ch = Data[a];
+
+        if (ch == '\r' && Data[a+1] == '\n') continue; /* Remove cr followed by lf. */
+
+        if (isprint(ch) || ch == '\n' || ch == '\t') {
+            Comment[nch++] = (char)ch;
+        } else {
+            Comment[nch++] = '?';
+        }
+    }
+
+    Comment[nch] = '\0'; /* Null terminate */
+
+    rd.s = Comment;
+    exif_append_data(&ImageInfo->d, "Comment", 's', EXIF_FMT_COMPUTED, &rd);
+}
+ 
+/* Process a SOFn marker.  This is useful for the image dimensions. */
+static void
+process_SOFn (ImageInfoType *ImageInfo, uchar *Data, int marker)
+{
+    int data_precision, num_components;
+    const char *process;
+    exif_rec_data_t rd;
+
+    data_precision = Data[2];
+    rd.l = Get16m(Data+3);
+    exif_append_data(&ImageInfo->d,
+					 "Height",
+					 'l',
+					 EXIF_FMT_COMPUTED,
+					 &rd);
+    rd.l = Get16m(Data+5);
+    exif_append_data(&ImageInfo->d,
+					 "Width",
+					 'l',
+					 EXIF_FMT_COMPUTED,
+					 &rd);
+    num_components = Data[7];
+
+    if (num_components == 3) {
+		rd.l = 1;
+    } else {
+		rd.l = 0;
+    }
+    exif_append_data(&ImageInfo->d, "IsColor", 'l', EXIF_FMT_COMPUTED, &rd);
+
+    switch (marker) {
+	case M_SOF0:  process = "Baseline";  break;
+	case M_SOF1:  process = "Extended sequential";  break;
+	case M_SOF2:  process = "Progressive";  break;
+	case M_SOF3:  process = "Lossless";  break;
+	case M_SOF5:  process = "Differential sequential";  break;
+	case M_SOF6:  process = "Differential progressive";  break;
+	case M_SOF7:  process = "Differential lossless";  break;
+	case M_SOF9:  process = "Extended sequential, arithmetic coding";  break;
+	case M_SOF10: process = "Progressive, arithmetic coding";  break;
+	case M_SOF11: process = "Lossless, arithmetic coding";  break;
+	case M_SOF13: process = "Differential sequential, arithmetic coding";  break;
+	case M_SOF14: process = "Differential progressive, arithmetic coding"; break;
+	case M_SOF15: process = "Differential lossless, arithmetic coding";  break;
+	default:      process = "Unknown";  break;
+    }
+}
+
+/*
+   Describes format descriptor
+*/
+static int ExifBytesPerFormat[] = {0,1,1,2,4,8,1,1,2,4,8,4,8};
+#define NUM_FORMATS EXIF_FMT_DOUBLE
+
+/*
+   Describes tag values
+*/
+
+#define TAG_EXIF_OFFSET       0x8769
+#define TAG_INTEROP_OFFSET    0xa005
+
+#define TAG_COMPRESSION       0x0103
+
+#define TAG_MAKE              0x010F
+#define TAG_MODEL             0x0110
+#define TAG_ORIENTATION       0x0112
+
+#define TAG_SOFTWARE          0x0131
+
+/* Olympus specific tags */
+#define TAG_SPECIALMODE       0x0200
+#define TAG_JPEGQUAL          0x0201
+#define TAG_MACRO             0x0202
+#define TAG_DIGIZOOM          0x0204
+#define TAG_SOFTWARERELEASE   0x0207
+#define TAG_PICTINFO          0x0208
+#define TAG_CAMERAID          0x0209
+/* end Olympus specific tags */
+
+#define TAG_COPYRIGHT         0x8298
+
+#define TAG_EXPOSURETIME      0x829A
+#define TAG_FNUMBER           0x829D
+
+#define TAG_GPSINFO           0x8825
+#define TAG_ISOSPEED          0x8827
+#define TAG_EXIFVERSION       0x9000
+
+#define TAG_SHUTTERSPEED      0x9201
+#define TAG_APERTURE          0x9202
+#define TAG_MAXAPERTURE       0x9205
+#define TAG_FOCALLENGTH       0x920A
+
+#define TAG_DATETIME_ORIGINAL 0x9003
+#define TAG_USERCOMMENT       0x9286
+
+#define TAG_SUBJECT_DISTANCE  0x9206
+#define TAG_LIGHT_SOURCE      0x9208
+#define TAG_FLASH             0x9209
+
+#define TAG_FOCALPLANEXRES    0xa20E
+#define TAG_FOCALPLANEUNITS   0xa210
+#define TAG_IMAGEWIDTH        0xA002
+
+struct ExifTag {
+    unsigned short Tag;
+    char *Desc;
+	void (*Func)();
+};
+
+
+
+/* Convert a 16 bit unsigned value from file's native byte order */
+static int
+Get16u(void *Short, int MotorolaOrder)
+{
+    if (MotorolaOrder) {
+        return (((uchar *)Short)[0] << 8) | ((uchar *)Short)[1];
+    } else {
+        return (((uchar *)Short)[1] << 8) | ((uchar *)Short)[0];
+    }
+}
+
+/* Convert a 32 bit signed value from file's native byte order */
+static int
+Get32s(void *Long, int MotorolaOrder)
+{
+    if (MotorolaOrder) {
+        return  ((( char *)Long)[0] << 24) | (((uchar *)Long)[1] << 16)
+			| (((uchar *)Long)[2] << 8 ) | (((uchar *)Long)[3] << 0 );
+    } else {
+        return  ((( char *)Long)[3] << 24) | (((uchar *)Long)[2] << 16)
+			| (((uchar *)Long)[1] << 8 ) | (((uchar *)Long)[0] << 0 );
+    }
+}
+
+/* Convert a 32 bit unsigned value from file's native byte order */
+static unsigned
+Get32u(void *Long, int MotorolaOrder)
+{
+    return (unsigned)Get32s(Long, MotorolaOrder) & 0xffffffff;
+}
+
+
+/* Evaluate number, be it int, rational, or float from directory. */
+static double
+ConvertAnyFormat(void *ValuePtr, int Format, int MotorolaOrder)
+{
+    double Value;
+    Value = 0;
+
+    switch(Format) {
+	case EXIF_FMT_SBYTE:     Value = *(signed char *)ValuePtr;  break;
+	case EXIF_FMT_BYTE:      Value = *(uchar *)ValuePtr;        break;
+
+	case EXIF_FMT_USHORT:    Value = Get16u(ValuePtr,MotorolaOrder);          break;
+	case EXIF_FMT_ULONG:     Value = Get32u(ValuePtr,MotorolaOrder);          break;
+
+	case EXIF_FMT_URATIONAL:
+	case EXIF_FMT_SRATIONAL: 
+		{
+			int Num,Den;
+			Num = Get32s(ValuePtr,MotorolaOrder);
+			Den = Get32s(4+(char *)ValuePtr,MotorolaOrder);
+			if (Den == 0) {
+				Value = 0;
+			} else {
+				Value = (double)Num/Den;
+			}
+			break;
+		}
+
+	case EXIF_FMT_SSHORT:    Value = (signed short)Get16u(ValuePtr,MotorolaOrder);  break;
+	case EXIF_FMT_SLONG:     Value = Get32s(ValuePtr,MotorolaOrder);                break;
+
+        /* Not sure if this is correct (never seen float used in Exif format) */
+	case EXIF_FMT_SINGLE:    Value = (double)*(float *)ValuePtr;      break;
+	case EXIF_FMT_DOUBLE:    Value = *(double *)ValuePtr;             break;
+    }
+    return Value;
+}
+
+/* Evaluate number, be it int, rational, or float from directory. */
+static char
+ConvertAnyFormat2(void *ValuePtr, int ByteCount, int Format, int MotorolaOrder, exif_rec_data_t *data_p)
+{
+	char *str, *p;
+	char r_type;
+	unsigned char c;
+	static char hexdigits[] = "0123456789ABCDEF";
+
+    switch(Format) {
+	case EXIF_FMT_STRING:
+		data_p->s = exif_strndup(ValuePtr, ByteCount);
+		r_type = 's';
+		break;
+
+	case EXIF_FMT_SBYTE:
+		data_p->l = (long)*(signed char *)ValuePtr;
+		r_type = 'l';
+		break;
+
+	case EXIF_FMT_BYTE:
+		data_p->l = (long)*(uchar *)ValuePtr;
+		r_type = 'l';
+        break;
+
+	case EXIF_FMT_USHORT:
+		data_p->l = (long)Get16u(ValuePtr,MotorolaOrder);
+		r_type = 'l';
+		break;
+	case EXIF_FMT_ULONG:
+		data_p->l = (long)Get32u(ValuePtr,MotorolaOrder);
+		r_type = 'l';
+		break;
+
+	case EXIF_FMT_URATIONAL:
+	case EXIF_FMT_SRATIONAL: 
+		{
+			int Num,Den;
+			data_p->r.num = Get32s(ValuePtr,MotorolaOrder);
+			data_p->r.denom = Get32s(4+(char *)ValuePtr,MotorolaOrder);
+			r_type = 'r';
+			break;
+		}
+
+	case EXIF_FMT_SSHORT:
+		data_p->l = (signed short)Get16u(ValuePtr,MotorolaOrder);
+		r_type = 'l';
+		break;
+	case EXIF_FMT_SLONG:
+		data_p->l = (long)Get32s(ValuePtr,MotorolaOrder);
+		r_type = 'l';
+		break;
+
+        /* Not sure if this is correct (never seen float used in Exif format) */
+	case EXIF_FMT_SINGLE:
+		data_p->f = *(float *)ValuePtr;
+		r_type = 'f';
+		break;
+
+	case EXIF_FMT_DOUBLE:
+		data_p->g = *(double *)ValuePtr;
+		r_type = 'f';
+		break;
+
+	default:
+		/* unknown type */
+		p = str = (*exif_malloc_fn)(ByteCount*2 + 1);
+		while (ByteCount--) {
+			c = *(unsigned char *)ValuePtr++;
+			*p++ = hexdigits[c / 16];
+			*p++ = hexdigits[c % 16];
+		}
+		*p++ = '\0';
+		data_p->s = str;
+		r_type = 's';
+		break;
+    }
+    return r_type;
+}
+
+
+static void
+ProcessFocalPlaneUnits(ImageInfoType *ImageInfo,
+					   void *ValuePtr,
+					   int ByteCount,
+					   int Format,
+					   struct ExifTag *tag_p)
+{
+    exif_rec_data_t rd;
+	float FocalPlaneUnits;
+
+	switch((int)ConvertAnyFormat(ValuePtr, Format, ImageInfo->MotorolaOrder)) {
+	case 1:
+		FocalPlaneUnits = 25.4;
+		break; /* inch */
+	case 2: 
+		/* According to the information I was using, 2 means meters.
+		   But looking at the Canon PowerShot's files, inches is the only
+		   sensible value. */
+		FocalPlaneUnits = 25.4;
+		break;
+
+	case 3:
+		FocalPlaneUnits = 10;
+		break;  /* centimeter */
+	case 4:
+		FocalPlaneUnits = 1;
+		break;  /* milimeter  */
+	case 5:
+		FocalPlaneUnits = .001;
+		break;  /* micrometer */
+	}
+
+	rd.f = FocalPlaneUnits;
+	exif_append_data(&ImageInfo->d,
+					 "FocalPlaneUnits",
+					 'f',
+					 Format,
+					 &rd);
+}
+
+static void
+ProcessVersion(ImageInfoType *ImageInfo,
+				   void *ValuePtr,
+				   int ByteCount,
+				   int Format,
+				   struct ExifTag *tag_p)
+{
+    exif_rec_data_t rd;
+	rd.s = exif_strndup(ValuePtr, ByteCount);
+	exif_append_data(&ImageInfo->d,
+					 tag_p->Desc,
+					 's',
+					 Format,
+					 &rd);
+}
+
+static void
+ProcessUserComment(ImageInfoType *ImageInfo,
+				   void *_ValuePtr,
+				   int ByteCount,
+				   int Format,
+				   struct ExifTag *tag_p)
+{
+	char *ValuePtr = (char *)_ValuePtr;
+    exif_rec_data_t rd;
+	int a;
+
+	/* Olympus has this padded with trailing spaces.  Remove these first. */
+	for (a=ByteCount;;) {
+		a--;
+		if ((ValuePtr)[a] == ' ') {
+			(ValuePtr)[a] = '\0';
+		} else {
+			break;
+		}
+		if (a == 0) break;
+	}
+
+	/* Copy the comment */
+	if (memcmp(ValuePtr, "ASCII",5) == 0) {
+		for (a=5;a<10;a++) {
+			int c;
+			c = (ValuePtr)[a];
+			if (c != '\0' && c != ' ') {
+				rd.s = exif_strndup(a+ValuePtr, ByteCount - a);
+				exif_append_data(&ImageInfo->d,
+								 "UserComment",
+								 's',
+								 Format,
+								 &rd);
+				break;
+			}
+		}
+                    
+	} else {
+		rd.s = exif_strndup(ValuePtr, ByteCount);
+		exif_append_data(&ImageInfo->d,
+						 "UserComment",
+						 's',
+						 Format,
+						 &rd);
+	}
+}
+
+static void
+ProcessShutterSpeed(ImageInfoType *ImageInfo,
+					void *ValuePtr,
+					int ByteCount,
+					int Format,
+					struct ExifTag *tag_p)
+{
+    exif_rec_data_t rd;
+	char rec_type;
+
+	rec_type = ConvertAnyFormat2(ValuePtr, ByteCount, Format,
+								 ImageInfo->MotorolaOrder,
+								 &rd);
+	exif_append_data(&ImageInfo->d,
+					 tag_p->Desc,
+					 rec_type,
+					 Format,
+					 &rd);
+
+	/* Convert shutter speed value to shutter speed;
+	 * shutter speed is 1/(2**ShutterSpeedValue)
+     */
+	rd.r.denom = (int)pow(2.0, ((double)rd.r.num)/((double)rd.r.denom));
+	rd.r.num = 1;
+	exif_append_data(&ImageInfo->d,
+					 "ShutterSpeed",
+					 'r',
+					 EXIF_FMT_COMPUTED,
+					 &rd);
+	
+}
+
+static void
+ProcessAperture(ImageInfoType *ImageInfo,
+				void *ValuePtr,
+				int ByteCount,
+				int Format,
+				struct ExifTag *tag_p)
+{
+    exif_rec_data_t rd;
+	char rec_type;
+	double fstop;
+	char label[32];
+
+	rec_type = ConvertAnyFormat2(ValuePtr, ByteCount, Format,
+								 ImageInfo->MotorolaOrder,
+								 &rd);
+	exif_append_data(&ImageInfo->d,
+					 tag_p->Desc,
+					 rec_type,
+					 Format,
+					 &rd);
+
+	if (exif_find_record(ImageInfo->d, "FNumber") == NULL) {
+		/* Convert aperture to F-stop. */
+		fstop = pow(sqrt(2), ((double)rd.r.num)/((double)rd.r.denom));
+		sprintf(label, "f%.1g", fstop);
+		rd.s = (char*)strdup(label);
+		exif_append_data(&ImageInfo->d,
+						 "FNumber",
+						 's',
+						 EXIF_FMT_COMPUTED,
+						 &rd);
+	}
+}
+
+static void
+ProcessCanonMakerNote(ImageInfoType *ImageInfo,
+				 void *ValuePtr,
+				 int ByteCount,
+				 int Format,
+				 struct ExifTag *tag_p,
+				 char *OffsetBase)
+{
+	
+	/* This is for the Canon MakerNote. */
+	/* XXX - go by value of Maker tag. */
+    exif_rec_data_t rd;
+	char rec_type;
+	unsigned long n_dir, tag, format, components, offset;
+	char label[32];
+	void *OffsetPtr;
+
+	n_dir = Get16u(ValuePtr, ImageInfo->MotorolaOrder);
+	ValuePtr += 2;
+	while (n_dir--) {
+		tag = Get16u(ValuePtr, ImageInfo->MotorolaOrder);
+		ValuePtr += 2;
+		format = Get16u(ValuePtr, ImageInfo->MotorolaOrder);
+		ValuePtr += 2;
+		components = Get32u(ValuePtr, ImageInfo->MotorolaOrder);
+		ValuePtr += 4;
+		offset = Get32u(ValuePtr, ImageInfo->MotorolaOrder);
+        ByteCount = components * ExifBytesPerFormat[format];
+		if (ByteCount > 4) {
+			OffsetPtr = OffsetBase + offset;
+		} else {
+			OffsetPtr = ValuePtr;
+		}
+		ValuePtr += 4;
+		rec_type = ConvertAnyFormat2(OffsetPtr, ByteCount, format,
+									 ImageInfo->MotorolaOrder,
+									 &rd);
+		sprintf(label, "MakerNote%04x", tag);
+		exif_append_data(&ImageInfo->d,
+						 label,
+						 rec_type,
+						 format,
+						 &rd);
+		
+	}
+}
+
+
+struct MakerNote {
+	char *Make;
+	void (*Func)();
+};
+
+static struct MakerNote
+MakerProcessors[] = {
+	{"Canon", ProcessCanonMakerNote},
+    {NULL, NULL}
+};
+
+static void
+ProcessMakerNote(ImageInfoType *ImageInfo,
+				 void *ValuePtr,
+				 int ByteCount,
+				 int Format,
+				 struct ExifTag *tag_p,
+				 char *OffsetBase)
+{
+	struct MakerNote *mn_p;
+	exif_record_t *rec_p;
+
+	rec_p = exif_find_record(ImageInfo->d, "Make");
+	if (rec_p == NULL) {
+		return;
+	}
+
+	for(mn_p = &MakerProcessors[0]; mn_p->Make != NULL; mn_p++) {
+		if (strcmp(mn_p->Make, rec_p->rec_data.s) == 0) {
+			(*mn_p->Func)(ImageInfo, ValuePtr, ByteCount, Format, tag_p, OffsetBase);
+			break;
+		}
+	}
+}
+
+static struct ExifTag
+TagTable[] = {
+	{   0x0001, "InteroperabilityIndex"},
+	{   0x0002, "InteroperabilityVersion", ProcessVersion},
+	{	0x0100,	"ImageWidth"},
+	{	0x0101,	"ImageLength"},
+	{	0x0102,	"BitsPerSample"},
+	{	0x0103,	"Compression"},
+	{	0x0106,	"PhotometricInterpretation"},
+	{	0x010A,	"FillOrder"},
+	{	0x010D,	"DocumentName"},
+	{	0x010E,	"ImageDescription"},
+	{	0x010F,	"Make"},
+	{	0x0110,	"Model"},
+	{	0x0111,	"StripOffsets"},
+	{	0x0112,	"Orientation"},
+	{	0x0115,	"SamplesPerPixel"},
+	{	0x0116,	"RowsPerStrip"},
+	{	0x0117,	"StripByteCounts"},
+	{	0x011A,	"XResolution"},
+	{	0x011B,	"YResolution"},
+	{	0x011C,	"PlanarConfiguration"},
+	{	0x0128,	"ResolutionUnit"},
+	{	0x012D,	"TransferFunction"},
+	{	0x0131,	"Software"},
+	{	0x0132,	"DateTime"},
+	{	0x013B,	"Artist"},
+	{	0x013E,	"WhitePoint"},
+	{	0x013F,	"PrimaryChromaticities"},
+	{	0x0156,	"TransferRange"},
+	{	0x0200,	"JPEGProc"},
+	{	0x0201,	"JPEGInterchangeFormat"},
+	{	0x0202,	"JPEGInterchangeFormatLength"},
+	{	0x0211,	"YCbCrCoefficients"},
+	{	0x0212,	"YCbCrSubSampling"},
+	{	0x0213,	"YCbCrPositioning"},
+	{	0x0214,	"ReferenceBlackWhite"},
+	{   0x1000, "RelatedImageFileFormat"},
+	{   0x1001, "RelatedImageWidth"},
+	{   0x1002, "RelatedImageLength"},
+	{	0x828D,	"CFARepeatPatternDim"},
+	{	0x828E,	"CFAPattern"},
+	{	0x828F,	"BatteryLevel"},
+	{	0x8298,	"Copyright"},
+	{	0x829A,	"ExposureTime"},
+	{	0x829D,	"FNumber"},
+	{	0x83BB,	"IPTC/NAA"},
+	{	0x8769,	"ExifOffset"},
+	{	0x8773,	"InterColorProfile"},
+	{	0x8822,	"ExposureProgram"},
+	{	0x8824,	"SpectralSensitivity"},
+	{	0x8825,	"GPSInfo"},
+	{	0x8827,	"ISOSpeedRatings"},
+	{	0x8828,	"OECF"},
+	{	0x9000,	"ExifVersion", ProcessVersion},
+	{	0x9003,	"DateTimeOriginal"},
+	{	0x9004,	"DateTimeDigitized"},
+	{	0x9101,	"ComponentsConfiguration"},
+	{	0x9102,	"CompressedBitsPerPixel"},
+	{	0x9201,	"ShutterSpeedValue", ProcessShutterSpeed},
+	{	0x9202,	"ApertureValue", ProcessAperture},
+	{	0x9203,	"BrightnessValue"},
+	{	0x9204,	"ExposureBiasValue"},
+	{	0x9205,	"MaxApertureValue", ProcessAperture},
+	{	0x9206,	"SubjectDistance"},
+	{	0x9207,	"MeteringMode"},
+	{	0x9208,	"LightSource"},
+	{	0x9209,	"Flash"},
+	{	0x920A,	"FocalLength"},
+	{	0x927C,	"MakerNote", ProcessMakerNote},
+	{	0x9286,	"UserComment", ProcessUserComment},
+	{	0x9290,	"SubSecTime"},
+	{	0x9291,	"SubSecTimeOriginal"},
+	{	0x9292,	"SubSecTimeDigitized"},
+	{	0xA000,	"FlashPixVersion", ProcessVersion},
+	{	0xA001,	"ColorSpace"},
+	{	0xA002,	"ExifImageWidth"},
+	{	0xA003,	"ExifImageLength"},
+	{	0xA005,	"InteroperabilityOffset"},
+	{	0xA20B,	"FlashEnergy"},			        /* 0x920B in TIFF/EP */
+	{	0xA20C,	"SpatialFrequencyResponse"},	/* 0x920C    -  -    */
+	{	0xA20E,	"FocalPlaneXResolution"},    	/* 0x920E    -  -    */
+	{	0xA20F,	"FocalPlaneYResolution"},	    /* 0x920F    -  -    */
+	{	0xA210,	"FocalPlaneResolutionUnit", ProcessFocalPlaneUnits},
+	                                            /* 0x9210    -  -    */
+	{	0xA214,	"SubjectLocation"},		        /* 0x9214    -  -    */
+	{	0xA215,	"ExposureIndex"},		        /* 0x9215    -  -    */
+	{	0xA217,	"SensingMethod"},		        /* 0x9217    -  -    */
+	{	0xA300,	"FileSource"},
+	{	0xA301,	"SceneType"},
+	{        0, NULL}
+} ;
+
+
+
+/* Process one of the nested EXIF directories. */
+static int
+ProcessExifDir(ImageInfoType *ImageInfo, char *DirStart, char *OffsetBase, unsigned ExifLength, char *LastExifRefd)
+{
+    int de;
+    int a;
+    int NumDirEntries;
+    exif_rec_data_t rd;
+	char rec_type;
+	char label[32];
+
+    NumDirEntries = Get16u(DirStart, ImageInfo->MotorolaOrder);
+
+    if ((DirStart+2+NumDirEntries*12) > (OffsetBase+ExifLength)) {
+		exif_error("Illegally sized directory");
+		return FALSE;
+    }
+
+
+    for (de=0;de<NumDirEntries;de++) {
+        int Tag, Format, Components;
+        char *ValuePtr;
+        int ByteCount;
+        char *DirEntry;
+		struct ExifTag *tag_p;
+
+        DirEntry = DirStart+2+12*de;
+
+        Tag = Get16u(DirEntry, ImageInfo->MotorolaOrder);
+        Format = Get16u(DirEntry+2, ImageInfo->MotorolaOrder);
+        Components = Get32u(DirEntry+4, ImageInfo->MotorolaOrder);
+
+        if ((Format-1) >= NUM_FORMATS) {
+            /* (-1) catches illegal zero case as unsigned underflows to positive large. */
+			exif_error("Illegal format code in EXIF dir");
+			return FALSE;
+        }
+
+        ByteCount = Components * ExifBytesPerFormat[Format];
+
+        if (ByteCount > 4) {
+            unsigned OffsetVal;
+            OffsetVal = Get32u(DirEntry+8, ImageInfo->MotorolaOrder);
+            /* If its bigger than 4 bytes, the dir entry contains an offset. */
+            if (OffsetVal+ByteCount > ExifLength) {
+                /* Bogus pointer offset and / or bytecount value */
+				/*                printf("Offset %d bytes %d ExifLen %d\n",OffsetVal, ByteCount, ExifLength); */
+
+				exif_error("Illegal pointer offset value in EXIF");
+				return FALSE;
+            }
+            ValuePtr = OffsetBase+OffsetVal;
+        } else {
+            /* 4 bytes or less and value is in the dir entry itself */
+            ValuePtr = DirEntry+8;
+        }
+
+        if (LastExifRefd < ValuePtr+ByteCount) {
+            /* 
+			   Keep track of last byte in the exif header that was actually referenced.
+               That way, we know where the discardable thumbnail data begins.
+			*/
+            LastExifRefd = ValuePtr+ByteCount;
+        }
+
+        if (Tag == TAG_EXIF_OFFSET || Tag == TAG_INTEROP_OFFSET) {
+            char *SubdirStart;
+            SubdirStart = OffsetBase + Get32u(ValuePtr, ImageInfo->MotorolaOrder);
+            if (SubdirStart < OffsetBase || SubdirStart > OffsetBase+ExifLength) {
+				exif_error("Illegal subdirectory link");
+				return FALSE;
+            }
+            ProcessExifDir(ImageInfo, SubdirStart, OffsetBase, ExifLength, LastExifRefd);
+            continue;
+        }
+
+		/* Search through tag table */
+		for (tag_p = &TagTable[0]; tag_p->Desc != NULL; tag_p++) {
+			if (tag_p->Tag == Tag) {
+				if (tag_p->Func != NULL) {
+					(*tag_p->Func)(ImageInfo, ValuePtr, ByteCount, Format, tag_p, OffsetBase);
+				} else {
+					rec_type = ConvertAnyFormat2(ValuePtr, ByteCount, Format,
+												 ImageInfo->MotorolaOrder,
+												 &rd);
+					exif_append_data(&ImageInfo->d,
+									 tag_p->Desc,
+									 rec_type,
+									 Format,
+									 &rd);
+				}
+				break;
+			}
+		}
+		if (tag_p->Desc == NULL) {
+			rec_type = ConvertAnyFormat2(ValuePtr, ByteCount, Format,
+										 ImageInfo->MotorolaOrder,
+										 &rd);
+			sprintf(label, "0x%04x", Tag);
+			exif_append_data(&ImageInfo->d,
+							 label,
+							 rec_type,
+							 Format,
+							 &rd);
+		}
+	}
+    return TRUE;
+}
+
+/* 
+   Process an EXIF marker
+   Describes all the drivel that most digital cameras include...
+*/
+static int
+process_EXIF (ImageInfoType *ImageInfo, char *CharBuf, unsigned int length, char *LastExifRefd)
+{
+	int cc;
+	exif_rec_data_t rd;
+	LastExifRefd = CharBuf;
+
+	{   /* Check the EXIF header component */
+		static const uchar ExifHeader[] = {0x45, 0x78, 0x69, 0x66, 0x00, 0x00};
+		if (memcmp(CharBuf+2, ExifHeader,6)) {
+			exif_error("Incorrect Exif header");
+			return FALSE;
+		}
+	}
+
+	if (memcmp(CharBuf+8,"II",2) == 0) {
+		ImageInfo->MotorolaOrder = 0;
+	} else {
+		if (memcmp(CharBuf+8,"MM",2) == 0) {
+			ImageInfo->MotorolaOrder = 1;
+		} else {
+			exif_error("Invalid Exif alignment marker.");
+			return FALSE;
+		}
+	}
+
+	/* Check the next two values for correctness. */
+	if (Get16u(CharBuf+10,ImageInfo->MotorolaOrder) != 0x2a
+		|| Get32u(CharBuf+12,ImageInfo->MotorolaOrder) != 0x08) {
+		exif_error("Invalid Exif start (1, NULL)");
+		return FALSE;
+	}
+
+	/* First directory starts 16 bytes in.  Offsets start at 8 bytes in. */
+	cc = ProcessExifDir(ImageInfo, CharBuf+16, CharBuf+8, length-6, LastExifRefd);
+	if (cc != TRUE) {
+		return cc;
+	}
+	return TRUE;
+}
+ 
+/* Parse the marker stream until SOS or EOI is seen; */
+static int 
+scan_JPEG_header (ImageInfoType *ImageInfo, FILE *infile, Section_t *Sections, int *SectionsRead, int ReadAll, char *LastExifRefd)
+{
+    int a;
+    int HaveCom = FALSE;
+
+    a = fgetc(infile);
+    if (a != 0xff || fgetc(infile) != M_SOI) {
+        return FALSE;
+    }
+
+    for(*SectionsRead=0;*SectionsRead < 19;) {
+        int itemlen;
+        int marker = 0;
+        int ll,lh, got;
+        uchar *Data;
+
+        for (a=0;a<7;a++) {
+            marker = fgetc(infile);
+            if (marker != 0xff) break;
+        }
+        if (marker == 0xff) {
+            /* 0xff is legal padding, but if we get that many, something's wrong. */
+			exif_error("too many padding bytes!");
+			return FALSE;
+        }
+
+        Sections[*SectionsRead].Type = marker;
+  
+        /* Read the length of the section. */
+        lh = fgetc(infile);
+        ll = fgetc(infile);
+
+        itemlen = (lh << 8) | ll;
+
+        if (itemlen < 2) {
+			exif_error("invalid marker");
+			return FALSE;
+        }
+
+        Sections[*SectionsRead].Size = itemlen;
+
+        Data = (uchar *)(*exif_malloc_fn)(itemlen+1); /* Add 1 to allow sticking a 0 at the end. */
+        Sections[*SectionsRead].Data = Data;
+
+        /* Store first two pre-read bytes. */
+        Data[0] = (uchar)lh;
+        Data[1] = (uchar)ll;
+
+        got = fread(Data+2, 1, itemlen-2, infile); /* Read the whole section. */
+        if (got != itemlen-2) {
+			exif_error("reading from file");
+			return FALSE;
+        }
+        *SectionsRead += 1;
+
+        switch(marker) {
+		case M_SOS:   /* stop before hitting compressed data  */
+			/* If reading entire image is requested, read the rest of the data. */
+			if (ReadAll) {
+				int cp, ep, size;
+				/* Determine how much file is left. */
+				cp = ftell(infile);
+				fseek(infile, 0, SEEK_END);
+				ep = ftell(infile);
+				fseek(infile, cp, SEEK_SET);
+
+				size = ep-cp;
+				Data = (uchar *)(*exif_malloc_fn)(size);
+				if (Data == NULL) {
+					exif_error("could not allocate data for entire image");
+					return FALSE;
+				}
+
+				got = fread(Data, 1, size, infile);
+				if (got != size) {
+					exif_error("could not read the rest of the image");
+					return FALSE;
+				}
+
+				Sections[*SectionsRead].Data = Data;
+				Sections[*SectionsRead].Size = size;
+				Sections[*SectionsRead].Type = PSEUDO_IMAGE_MARKER;
+				(*SectionsRead)++;
+				/*
+				 *HaveAll = 1;
+				 */
+			}
+			return TRUE;
+
+		case M_EOI:   /* in case it's a tables-only JPEG stream */
+			exif_error("No image in jpeg!");
+			return FALSE;
+
+		case M_COM: /* Comment section */
+			if (HaveCom) {
+				(*SectionsRead) -= 1;
+				(*exif_free_fn)(Sections[*SectionsRead].Data);
+			} else {
+				process_COM(ImageInfo, Data, itemlen);
+				HaveCom = TRUE;
+			}
+			break;
+
+		case M_EXIF:
+			if (*SectionsRead <= 2) {
+				/* Seen files from some 'U-lead' software with Vivitar scanner
+				   that uses marker 31 later in the file (no clue what for!) */
+				process_EXIF(ImageInfo, (char *)Data, itemlen, LastExifRefd);
+			}
+			break;
+
+		case M_SOF0: 
+		case M_SOF1: 
+		case M_SOF2: 
+		case M_SOF3: 
+		case M_SOF5: 
+		case M_SOF6: 
+		case M_SOF7: 
+		case M_SOF9: 
+		case M_SOF10:
+		case M_SOF11:
+		case M_SOF13:
+		case M_SOF14:
+		case M_SOF15:
+			process_SOFn(ImageInfo, Data, marker);
+			break;
+		default:
+			/* skip any other marker silently. */
+			break;
+        }
+    }
+    return TRUE;
+}
+
+/* 
+   Discard read data.
+*/
+static void
+DiscardData(Section_t *Sections, int *SectionsRead)
+{
+    int a;
+    for (a=0;a<*SectionsRead-1;a++) {
+        (*exif_free_fn)(Sections[a].Data);
+    }
+    *SectionsRead = 0;
+}
+
+/* 
+   Read image data.
+*/
+static int
+ReadJpegFile(ImageInfoType *ImageInfo, Section_t *Sections, 
+			 int *SectionsRead, int fd, 
+			 int ReadAll, char *LastExifRefd)
+{
+    FILE *infile;
+    int ret;
+	char *tmp;
+	char **p_argv;
+	int p_argc;
+
+    infile = fdopen(fd, "rb"); /* Unix ignores 'b', windows needs it. */
+
+    if (infile == NULL) {
+		exif_error("Unable to open '%s'", ImageInfo->filename);
+		return FALSE;
+    }
+
+    /* Start with an empty image information structure. */
+    memset(ImageInfo, 0, sizeof(*ImageInfo));
+    memset(Sections, 0, sizeof(*Sections));
+
+    ImageInfo->d = exif_alloc();
+
+    /* Scan the JPEG headers. */
+    ret = scan_JPEG_header(ImageInfo, infile, Sections, SectionsRead, ReadAll, LastExifRefd);
+    if (!ret) {
+		exif_error("Invalid Jpeg file: '%s'",ImageInfo->filename);
+		return FALSE;
+    }
+
+    fclose(infile);
+
+    return ret;
+}
+
+static int
+read_jpeg_exif(ImageInfoType *ImageInfo, int fd, int ReadAll)
+{
+	Section_t Sections[20];
+	int SectionsRead;
+	char *LastExifRefd=NULL;
+	int ret;
+	int thumbsize=0;
+
+	ret = ReadJpegFile(ImageInfo, Sections, &SectionsRead, fd, ReadAll, LastExifRefd); 
+#if 0
+	/*
+	 * Thought this might pick out the embedded thumbnail, but it doesn't work.   -RL
+     */
+	for (i=0;i<SectionsRead-1;i++) {
+		if (Sections[i].Type == M_EXIF) {
+			thumbsize = Sections[i].Size;
+			if(thumbsize>0) {
+				ImageInfo->Thumbnail = (*exif_malloc_fn)(thumbsize+5);
+				ImageInfo->ThumbnailSize = thumbsize;
+				ImageInfo->Thumbnail[0] = 0xff;
+				ImageInfo->Thumbnail[1] = 0xd8;
+				ImageInfo->Thumbnail[2] = 0xff;
+				memcpy(ImageInfo->Thumbnail+4, Sections[i].Data, thumbsize+4);
+			}
+		}
+	}
+#endif
+    if (ret != FALSE) {
+        DiscardData(Sections, &SectionsRead);
+    }
+	return(ret);
+}
+
+exif_data_t *
+exif_parse_fd(int fd)
+{
+	ImageInfoType ImageInfo;
+
+	ImageInfo.filename = "<file stream>";
+	if (read_jpeg_exif(&ImageInfo, fd, 1) != TRUE) {
+		return NULL;
+	}
+	return ImageInfo.d;
+}
+
+exif_data_t *
+exif_parse_file(const char *filename)
+{
+	ImageInfoType ImageInfo;
+	int fd;
+	
+	ImageInfo.filename = filename;
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		return NULL;
+	}
+
+	if (read_jpeg_exif(&ImageInfo, fd, 1) != TRUE) {
+		return NULL;
+	}
+	return ImageInfo.d;
+}
+
+void
+exif_free_data(struct exif_data *d)
+{
+	int i;
+	for (i=0; i<d->n_recs; i++) {
+		(*exif_free_fn)(d->recs[i].rec_name);
+		if (d->recs[i].rec_type == 's') {
+			(*exif_free_fn)(d->recs[i].rec_data.s);
+		}
+	}
+	(*exif_free_fn)(d);
+}
+
+void
+exif_init(void *(*malloc_fn)(int),
+		  void (*free_fn)(void *),
+		  void *(*realloc_fn)(void *, int))
+{
+	if (malloc_fn == NULL) {
+		malloc_fn = (void *(*)(int))malloc;
+	}
+	exif_malloc_fn = malloc_fn;
+	if (free_fn == NULL) {
+		free_fn = (void (*)(void *))free;
+	}
+	exif_free_fn = free_fn;
+	if (realloc_fn == NULL) {
+		realloc_fn = (void *(*)(void *, int))realloc;
+	}
+	exif_realloc_fn = realloc_fn;
+}
+
+extern exif_record_t *
+exif_find_record(exif_data_t *d, const char *rec_name)
+{
+	int i;
+	for (i=0; i<d->n_recs; i++) {
+		if (strcmp(d->recs[i].rec_name, rec_name) == 0) {
+			return &d->recs[i];
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */
+
+
diff --git a/src/libexif/exif.h b/src/libexif/exif.h
new file mode 100644
index 0000000..32f0a6d
--- /dev/null
+++ b/src/libexif/exif.h
@@ -0,0 +1,86 @@
+/*
+
+Copyright � 2000 Curtis Galloway
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+*/
+
+typedef struct exif_rational {
+  int      num;
+  unsigned denom;
+} exif_rational_t;
+
+typedef union {
+  long            l;
+  float           f;
+  double          g;
+  char *          s;
+  exif_rational_t r;
+} exif_rec_data_t;
+
+typedef struct exif_record {
+  char *           rec_name;
+  char             rec_type;
+  int              exif_format;
+  exif_rec_data_t  rec_data;
+} exif_record_t;
+
+typedef struct exif_data {
+  int           n_recs;
+  int           n_alloc;
+  exif_record_t recs[0];
+} exif_data_t;
+
+
+/* EXIF data formats */
+
+#define EXIF_FMT_COMPUTED  -1 /* Not in raw data */
+#define EXIF_FMT_BYTE       1 
+#define EXIF_FMT_STRING     2
+#define EXIF_FMT_USHORT     3
+#define EXIF_FMT_ULONG      4
+#define EXIF_FMT_URATIONAL  5
+#define EXIF_FMT_SBYTE      6
+#define EXIF_FMT_UNDEFINED  7
+#define EXIF_FMT_SSHORT     8
+#define EXIF_FMT_SLONG      9
+#define EXIF_FMT_SRATIONAL 10
+#define EXIF_FMT_SINGLE    11
+#define EXIF_FMT_DOUBLE    12
+
+extern void
+exif_init(void *(*malloc_fn)(int),
+	  void (*free_fn)(void *),
+	  void *(*realloc_fn)(void *, int));
+
+extern exif_data_t *
+exif_parse_fd(int fd);
+
+extern exif_data_t *
+exif_parse_file(const char *filename);
+
+extern void
+exif_free_data(exif_data_t *d);
+
+extern exif_record_t *
+exif_find_record(exif_data_t *d, const char *rec_name);
+
+
diff --git a/src/libexif/i18n.h b/src/libexif/i18n.h
new file mode 100644
index 0000000..6d08e3c
--- /dev/null
+++ b/src/libexif/i18n.h
@@ -0,0 +1,51 @@
+/* i18n.h
+ *
+ * Copyright � 2001 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __I18N_H__
+#define __I18N_H__
+
+#include "config.h"
+
+#ifdef ENABLE_NLS
+#  include <libintl.h>
+#  undef _
+#  define _(String) dgettext (GETTEXT_PACKAGE, String)
+#  ifdef gettext_noop
+#    define N_(String) gettext_noop (String)
+#  else
+#    define N_(String) (String)
+#  endif
+#else
+#  define textdomain(String) (String)
+#  define gettext(String) (String)
+#  define dgettext(Domain,Message) (Message)
+#  define dcgettext(Domain,Message,Type) (Message)
+#ifdef __WATCOMC__
+#    define bind_textdomain_codeset(Domain,Codeset)
+#    define bindtextdomain(Domain,Directory)
+#else
+#    define bind_textdomain_codeset(Domain,Codeset) (Codeset)
+#    define bindtextdomain(Domain,Directory) (Domain)
+#endif
+#  define _(String) (String)
+#  define N_(String) (String)
+#endif
+
+#endif /* __I18N_H__ */
diff --git a/src/libexif/olympus/exif-mnote-data-olympus.c b/src/libexif/olympus/exif-mnote-data-olympus.c
new file mode 100644
index 0000000..02794c5
--- /dev/null
+++ b/src/libexif/olympus/exif-mnote-data-olympus.c
@@ -0,0 +1,403 @@
+/* exif-mnote-data-olympus.c
+ *
+ * Copyright � 2002, 2003 Lutz Mueller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "exif-mnote-data-olympus.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <libexif/exif-utils.h>
+#include <libexif/exif-data.h>
+
+#define DEBUG
+
+static void
+exif_mnote_data_olympus_clear (ExifMnoteDataOlympus *n)
+{
+	ExifMnoteData *d = (ExifMnoteData *) n;
+	unsigned int i;
+
+	if (!n) return;
+
+	if (n->entries) {
+		for (i = 0; i < n->count; i++)
+			if (n->entries[i].data) {
+				exif_mem_free (d->mem, n->entries[i].data);
+				n->entries[i].data = NULL;
+			}
+		exif_mem_free (d->mem, n->entries);
+		n->entries = NULL;
+		n->count = 0;
+	}
+}
+
+static void
+exif_mnote_data_olympus_free (ExifMnoteData *n)
+{
+	if (!n) return;
+
+	exif_mnote_data_olympus_clear ((ExifMnoteDataOlympus *) n);
+}
+
+static char *
+exif_mnote_data_olympus_get_value (ExifMnoteData *d, unsigned int i, char *val, unsigned int maxlen)
+{
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) d;
+
+	if (!d || !val) return NULL;
+	if (i > n->count -1) return NULL;
+	exif_log (d->log, EXIF_LOG_CODE_DEBUG, "ExifMnoteDataOlympus",
+		  "Querying value for tag '%s'...",
+		  mnote_olympus_tag_get_name (n->entries[i].tag));
+	return mnote_olympus_entry_get_value (&n->entries[i], val, maxlen);
+}
+
+static void
+exif_mnote_data_olympus_save (ExifMnoteData *ne,
+		unsigned char **buf, unsigned int *buf_size)
+{
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) ne;
+	unsigned int i, o, s, doff, base = 0, o2 = 6;
+	int datao = 0;
+
+	if (!n || !buf || !buf_size) return;
+
+	/*
+	 * Allocate enough memory for all entries and the number of entries.
+	 */
+	*buf_size = 6 + 2 + 2 + n->count * 12;
+	switch (n->version) {
+	case 0: /* Olympus */
+		*buf = exif_mem_alloc (ne->mem, *buf_size);
+		if (!*buf) return;
+
+		/* Write the header and the number of entries. */
+		strcpy (*buf, "OLYMP");
+		o2 += 2;
+		datao = n->offset;
+		break;
+	case 1: /* Nikon v1 */
+		base = MNOTE_NIKON1_TAG_BASE;
+		*buf_size -= 8;
+		/* Fall through */
+	case 2: /* Nikon v2 */
+		*buf_size += 8;
+		*buf = exif_mem_alloc (ne->mem, *buf_size);
+		if (!*buf) return;
+
+		/* Write the header and the number of entries. */
+		strcpy (*buf, "Nikon");
+		(*buf)[6] = n->version;
+		o2 += 2; *buf_size += 2;
+		if (n->version == 2) {
+			exif_set_short (*buf + 10, n->order, (ExifShort) (
+				(n->order == EXIF_BYTE_ORDER_INTEL) ?
+				('I' << 8) | 'I' :
+				('M' << 8) | 'M'));
+			exif_set_short (*buf + 12, n->order, (ExifShort) 0x2A);
+			exif_set_long (*buf + 14, n->order, (ExifShort) 8);
+			o2 += 2 + 8;
+		}
+		datao = -10;
+		break;
+	}
+
+	exif_set_short (*buf + o2, n->order, (ExifShort) n->count);
+	o2 += 2;
+
+	/* Save each entry */
+	for (i = 0; i < n->count; i++) {
+		o = o2 + i * 12;
+		exif_set_short (*buf + o + 0, n->order,
+				(ExifShort) (n->entries[i].tag - base));
+		exif_set_short (*buf + o + 2, n->order,
+				(ExifShort) n->entries[i].format);
+		exif_set_long  (*buf + o + 4, n->order,
+				n->entries[i].components);
+		o += 8;
+		s = exif_format_get_size (n->entries[i].format) *
+						n->entries[i].components;
+		if (s > 4) {
+			doff = *buf_size;
+			*buf_size += s;
+			*buf = exif_mem_realloc (ne->mem, *buf,
+						 sizeof (char) * *buf_size);
+			if (!*buf) return;
+			exif_set_long (*buf + o, n->order, datao + doff);
+		} else
+			doff = o;
+
+		/* Write the data. */
+		if (n->entries[i].data) {
+			memcpy (*buf + doff, n->entries[i].data, s);
+		} else {
+			/* Most certainly damaged input file */
+			memset (*buf + doff, 0, s);
+		}
+	}
+}
+
+static void
+exif_mnote_data_olympus_load (ExifMnoteData *en,
+			      const unsigned char *buf, unsigned int buf_size)
+{
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) en;
+	ExifShort c;
+	unsigned int i, s, o, o2 = 0, datao = 6, base = 0;
+
+	if (!n || !buf) return;
+
+	/* Start of interesting data */
+	o2 = 6 + n->offset;
+
+	/*
+	 * Olympus headers start with "OLYMP" and need to have at least
+	 * a size of 22 bytes (6 for 'OLYMP', 2 other bytes, 2 for the
+	 * number of entries, and 12 for one entry.
+	 *
+	 * Nikon headers start with "Nikon" (6 bytes including '\0'), 
+	 * version number (1 or 2).
+	 * 
+	 * Version 1 continues with 0, 1, 0, number_of_tags,
+	 * or just with number_of_tags (models D1H, D1X...).
+	 * 
+	 * Version 2 continues with an unknown byte (0 or 10),
+	 * two unknown bytes (0), "MM" or "II", another byte 0 and 
+	 * lastly 0x2A.
+	 */
+	if (buf_size - n->offset < 22) return;
+	if (!memcmp (buf + o2, "OLYMP", 5)) {
+		exif_log (en->log, EXIF_LOG_CODE_DEBUG, "ExifMnoteDataOlympus",
+			"Parsing Olympus maker note...");
+
+		/* The number of entries is at position 8. */
+		n->version = 0;
+		o2 += 8;
+
+	} else if (!memcmp (buf + o2, "Nikon", 6)) {
+		o2 += 6;
+		exif_log (en->log, EXIF_LOG_CODE_DEBUG, "ExifMnoteDataOlympus",
+			"Parsing Nikon maker note (0x%02x, %02x, %02x, "
+			"%02x, %02x, %02x, %02x, %02x)...",
+			buf[o2 + 0], buf[o2 + 1], buf[o2 + 2], buf[o2 + 3], 
+			buf[o2 + 4], buf[o2 + 5], buf[o2 + 6], buf[o2 + 7]); 
+
+		/* The first byte is the version. */
+		if (o2 >= buf_size) return;
+		n->version = buf[o2];
+		o2 += 1;
+
+		/* Skip an unknown byte (00 or 0A). */
+		o2 += 1;
+
+		switch (n->version) {
+		case 1:
+
+			base = MNOTE_NIKON1_TAG_BASE;
+			break;
+
+		case 2:
+
+			/* Skip 2 unknown bytes (00 00). */
+			o2 += 2;
+
+			/*
+			 * Byte order. From here the data offset
+			 * gets calculated.
+			 */
+			datao = o2;
+			if (o2 >= buf_size) return;
+			if (!strncmp (&buf[o2], "II", 2))
+				n->order = EXIF_BYTE_ORDER_INTEL;
+			else if (!strncmp (&buf[o2], "MM", 2))
+				n->order = EXIF_BYTE_ORDER_MOTOROLA;
+			else {
+				exif_log (en->log, EXIF_LOG_CODE_DEBUG,
+					"ExifMnoteDatalympus", "Unknown "
+					"byte order '%c%c'", buf[o2],
+					buf[o2 + 1]);
+				return;
+			}
+			o2 += 2;
+
+			/* Skip 2 unknown bytes (00 2A). */
+			o2 += 2;
+
+			/* Go to where the number of entries is. */
+			if (o2 >= buf_size) return;
+			o2 = datao + exif_get_long (buf + o2, n->order);
+			break;
+
+		default:
+			exif_log (en->log, EXIF_LOG_CODE_DEBUG,
+				"ExifMnoteDataOlympus", "Unknown version "
+				"number %i.", n->version);
+			return;
+		}
+	} else if (!memcmp (buf + o2, "\0\x1b", 2)) {
+		n->version = 2;
+	} else {
+		return;
+	}
+
+	/* Number of entries */
+	if (o2 >= buf_size) return;
+	c = exif_get_short (buf + o2, n->order);
+	o2 += 2;
+
+	/* Read the number of entries and remove old ones. */
+	exif_mnote_data_olympus_clear (n);
+
+	n->entries = exif_mem_alloc (en->mem, sizeof (MnoteOlympusEntry) * c);
+	if (!n->entries) return;
+
+	/* Parse the entries */
+	for (i = 0; i < c; i++) {
+	    o = o2 + 12 * i;
+	    if (o + 12 > buf_size) return;
+
+	    n->count = i + 1;
+	    n->entries[i].tag        = exif_get_short (buf + o, n->order) + base;
+	    n->entries[i].format     = exif_get_short (buf + o + 2, n->order);
+	    n->entries[i].components = exif_get_long (buf + o + 4, n->order);
+	    n->entries[i].order      = n->order;
+
+	    exif_log (en->log, EXIF_LOG_CODE_DEBUG, "ExifMnoteOlympus",
+		      "Loading entry 0x%x ('%s')...", n->entries[i].tag,
+		      mnote_olympus_tag_get_name (n->entries[i].tag));
+
+	    /*
+	     * Size? If bigger than 4 bytes, the actual data is not
+	     * in the entry but somewhere else (offset).
+	     */
+	    s = exif_format_get_size (n->entries[i].format) *
+		   			 n->entries[i].components;
+	    if (!s) continue;
+	    o += 8;
+	    if (s > 4) o = exif_get_long (buf + o, n->order) + datao;
+	    if (o + s > buf_size) continue;
+
+	    /* Sanity check */
+	    n->entries[i].data = exif_mem_alloc (en->mem, s);
+	    if (!n->entries[i].data) continue;
+	    n->entries[i].size = s;
+	    memcpy (n->entries[i].data, buf + o, s);
+	}
+}
+
+static unsigned int
+exif_mnote_data_olympus_count (ExifMnoteData *n)
+{
+	return n ? ((ExifMnoteDataOlympus *) n)->count : 0;
+}
+
+static unsigned int
+exif_mnote_data_olympus_get_id (ExifMnoteData *d, unsigned int n)
+{
+	ExifMnoteDataOlympus *note = (ExifMnoteDataOlympus *) d;
+
+	if (!note) return 0;
+	if (note->count <= n) return 0;
+	return note->entries[n].tag;
+}
+
+static const char *
+exif_mnote_data_olympus_get_name (ExifMnoteData *d, unsigned int i)
+{
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) d;
+
+	if (!n) return NULL;
+	if (i >= n->count) return NULL;
+	return mnote_olympus_tag_get_name (n->entries[i].tag);
+}
+
+static const char *
+exif_mnote_data_olympus_get_title (ExifMnoteData *d, unsigned int i)
+{
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) d;
+	
+	if (!n) return NULL;
+	if (i >= n->count) return NULL;
+        return mnote_olympus_tag_get_title (n->entries[i].tag);
+}
+
+static const char *
+exif_mnote_data_olympus_get_description (ExifMnoteData *d, unsigned int i)
+{
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) d;
+	
+	if (!n) return NULL;
+	if (i >= n->count) return NULL;
+        return mnote_olympus_tag_get_title (n->entries[i].tag);
+}
+
+static void
+exif_mnote_data_olympus_set_byte_order (ExifMnoteData *d, ExifByteOrder o)
+{
+	ExifByteOrder o_orig;
+	ExifMnoteDataOlympus *n = (ExifMnoteDataOlympus *) d;
+	unsigned int i;
+
+	if (!n) return;
+
+	o_orig = n->order;
+	n->order = o;
+	for (i = 0; i < n->count; i++) {
+		n->entries[i].order = o;
+		exif_array_set_byte_order (n->entries[i].format, n->entries[i].data,
+				n->entries[i].components, o_orig, o);
+	}
+}
+
+static void
+exif_mnote_data_olympus_set_offset (ExifMnoteData *n, unsigned int o)
+{
+	if (n) ((ExifMnoteDataOlympus *) n)->offset = o;
+}
+
+ExifMnoteData *
+exif_mnote_data_olympus_new (ExifMem *mem)
+{
+	ExifMnoteData *d;
+
+	if (!mem) return NULL;
+	
+	d = exif_mem_alloc (mem, sizeof (ExifMnoteDataOlympus));
+	if (!d) return NULL;
+
+	exif_mnote_data_construct (d, mem);
+
+	/* Set up function pointers */
+	d->methods.free            = exif_mnote_data_olympus_free;
+	d->methods.set_byte_order  = exif_mnote_data_olympus_set_byte_order;
+	d->methods.set_offset      = exif_mnote_data_olympus_set_offset;
+	d->methods.load            = exif_mnote_data_olympus_load;
+	d->methods.save            = exif_mnote_data_olympus_save;
+	d->methods.count           = exif_mnote_data_olympus_count;
+	d->methods.get_id          = exif_mnote_data_olympus_get_id;
+	d->methods.get_name        = exif_mnote_data_olympus_get_name;
+	d->methods.get_title       = exif_mnote_data_olympus_get_title;
+	d->methods.get_description = exif_mnote_data_olympus_get_description;
+	d->methods.get_value       = exif_mnote_data_olympus_get_value;
+
+	return d;
+}
diff --git a/src/libexif/olympus/exif-mnote-data-olympus.h b/src/libexif/olympus/exif-mnote-data-olympus.h
new file mode 100644
index 0000000..b9b4209
--- /dev/null
+++ b/src/libexif/olympus/exif-mnote-data-olympus.h
@@ -0,0 +1,45 @@
+/* mnote-olympus-data.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_OLYMPUS_CONTENT_H__
+#define __MNOTE_OLYMPUS_CONTENT_H__
+
+#include <libexif/exif-mnote-data-priv.h>
+#include <libexif/olympus/mnote-olympus-entry.h>
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-mem.h>
+
+typedef struct _ExifMnoteDataOlympus ExifMnoteDataOlympus;
+
+struct _ExifMnoteDataOlympus {
+	ExifMnoteData parent;
+
+	MnoteOlympusEntry *entries;
+	unsigned int count;
+
+	ExifByteOrder order;
+	unsigned int offset;
+	/* 0: Olympus; 1: Nikon v1; 2: Nikon v2 */
+	int version;
+};
+
+ExifMnoteData *exif_mnote_data_olympus_new (ExifMem *);
+
+#endif /* __MNOTE_OLYMPUS_CONTENT_H__ */
diff --git a/src/libexif/olympus/mnote-olympus-entry.c b/src/libexif/olympus/mnote-olympus-entry.c
new file mode 100644
index 0000000..1eff6fe
--- /dev/null
+++ b/src/libexif/olympus/mnote-olympus-entry.c
@@ -0,0 +1,540 @@
+/* mnote-olympus-entry.c
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "mnote-olympus-entry.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libexif/exif-format.h>
+#include <libexif/exif-utils.h>
+#include <libexif/exif-entry.h>
+#include <libexif/i18n.h>
+
+#define CF(format,target,v,maxlen)                              \
+{                                                               \
+        if (format != target) {                                 \
+                snprintf (v, maxlen,	                        \
+                        _("Invalid format '%s', "               \
+                        "expected '%s'."),                      \
+                        exif_format_get_name (format),          \
+                        exif_format_get_name (target));         \
+                break;                                          \
+        }                                                       \
+}
+
+#define CC(number,target,v,maxlen)                                      \
+{                                                                       \
+        if (number != target) {                                         \
+                snprintf (v, maxlen,                                    \
+                        _("Invalid number of components (%i, "          \
+                        "expected %i)."), (int) number, (int) target);  \
+                break;                                                  \
+        }                                                               \
+}
+
+#define CC2(number,t1,t2,v,maxlen)                                      \
+{                                                                       \
+	if ((number != t1) && (number != t2)) {                         \
+		snprintf (v, maxlen,                                    \
+			_("Invalid number of components (%i, "          \
+			"expected %i or %i)."), (int) number,		\
+			(int) t1, (int) t2);  				\
+		break;                                                  \
+	}                                                               \
+}
+
+static struct {
+	ExifTag tag;
+    ExifFormat fmt;
+	struct {
+		int index;
+		const char *string;
+	} elem[10];
+} items[] = {
+  { MNOTE_NIKON_TAG_LENSTYPE, EXIF_FORMAT_BYTE,
+    { {0, N_("AF non D Lens")},
+      {1, N_("Manual")},
+      {2, N_("AF-D or AF-S Lens")},
+      {6, N_("AF-D G Lens")},
+      {10, N_("AF-D VR Lens")},
+      {0, NULL}}},
+  { MNOTE_NIKON_TAG_FLASHUSED, EXIF_FORMAT_BYTE,
+    { {0, N_("Flash did not fire")},
+      {4, N_("Flash unit unknown")},
+      {7, N_("Flash is external")},
+      {9, N_("Flash is on Camera")},
+      {0, NULL}}},
+  { MNOTE_NIKON1_TAG_QUALITY, EXIF_FORMAT_SHORT,
+    { {1, N_("VGA Basic")},
+      {2, N_("VGA Normal")},
+      {3, N_("VGA Fine")},
+      {4, N_("SXGA Basic")},
+      {5, N_("SXGA Normal")},
+      {6, N_("SXGA Fine")},
+      {10, N_("2 MPixel Basic")},
+      {11, N_("2 MPixel Normal")},
+      {12, N_("2 MPixel Fine")},
+      {0, NULL}}},
+  { MNOTE_NIKON1_TAG_COLORMODE, EXIF_FORMAT_SHORT,
+    { {1, N_("Color")},
+      {2, N_("Monochrome")},
+      {0, NULL}}},
+  { MNOTE_NIKON1_TAG_IMAGEADJUSTMENT, EXIF_FORMAT_SHORT,
+    { {0, N_("Normal")},
+      {1, N_("Bright+")},
+      {2, N_("Bright-")},
+      {3, N_("Contrast+")},
+      {4, N_("Contrast-")},
+      {0, NULL}}},
+  { MNOTE_NIKON1_TAG_CCDSENSITIVITY, EXIF_FORMAT_SHORT,
+    { {0, N_("ISO80")},
+      {2, N_("ISO160")},
+      {4, N_("ISO320")},
+      {5, N_("ISO100")},
+      {0, NULL}}},
+  { MNOTE_NIKON1_TAG_WHITEBALANCE, EXIF_FORMAT_SHORT,
+    { {0, N_("Auto")},
+      {1, N_("Preset")},
+      {2, N_("Daylight")},
+      {3, N_("Incandescense")},
+      {4, N_("Fluorescence")},
+      {5, N_("Cloudy")},
+      {6, N_("SpeedLight")},
+      {0, NULL}}},
+  { MNOTE_NIKON1_TAG_CONVERTER, EXIF_FORMAT_SHORT,
+    { {0, N_("No Fisheye")},
+      {1, N_("Fisheye On")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_QUALITY, EXIF_FORMAT_SHORT,
+    { {1, N_("SQ")},
+      {2, N_("HQ")},
+      {3, N_("SHQ")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_MACRO, EXIF_FORMAT_SHORT,
+    { {0, N_("No")},
+      {1, N_("Yes")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_DIGIZOOM, EXIF_FORMAT_SHORT,
+    { {0, N_("1x")},
+      {2, N_("2x")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_FLASHMODE, EXIF_FORMAT_SHORT,
+    { {0, N_("Auto")},
+      {1, N_("Red-eye reduction")},
+      {2, N_("Fill")},
+      {3, N_("Off")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_SHARPNESS, EXIF_FORMAT_SHORT,
+    { {0, N_("Normal")},
+      {1, N_("Hard")},
+      {2, N_("Soft")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_CONTRAST, EXIF_FORMAT_SHORT,
+    { {0, N_("Hard")},
+      {1, N_("Normal")},
+      {2, N_("Soft")},
+      {0, NULL}}},
+  { MNOTE_OLYMPUS_TAG_MANFOCUS, EXIF_FORMAT_SHORT,
+    { {0, N_("No")},
+      {1, N_("Yes")},
+      {0, NULL}}},
+  { 0, }
+};
+
+char *
+mnote_olympus_entry_get_value (MnoteOlympusEntry *entry, char *v, unsigned int maxlen)
+{
+	char         buf[30];
+	ExifLong     vl;
+	ExifShort    vs = 0;
+	ExifRational vr;
+	int          i, j;
+	double       r, b;
+
+	if (!entry)
+		return (NULL);
+
+	memset (v, 0, maxlen);
+	maxlen--;
+
+	if ((!entry->data) && (entry->components > 0)) return (v);
+
+	switch (entry->tag) {
+	
+	/* Nikon */
+	case MNOTE_NIKON_TAG_FIRMWARE:
+		CF (entry->format,  EXIF_FORMAT_UNDEFINED, v, maxlen);
+		CC (entry->components, 4, v, maxlen);
+		vl = exif_get_long (entry->data, entry->order);
+		if ((vl & 0xF0F0F0F0) == 0x30303030) {
+			memcpy (v, entry->data, MIN (maxlen, 4));
+		} else {
+			snprintf (v, maxlen, "%04lx", (long unsigned int) vl);
+		}
+		break;
+	case MNOTE_NIKON_TAG_ISO:
+                CF (entry->format, EXIF_FORMAT_SHORT, v, maxlen);
+                CC (entry->components, 2, v, maxlen);
+                //vs = exif_get_short (entry->data, entry->order);
+                vs = exif_get_short (entry->data + 2, entry->order);
+                snprintf (v, maxlen, "ISO %hd", vs);
+                break;
+	case MNOTE_NIKON_TAG_ISO2:
+                CF (entry->format, EXIF_FORMAT_SHORT, v, maxlen);
+                CC (entry->components, 2, v, maxlen);
+                //vs = exif_get_short (entry->data, entry->order);
+                vs = exif_get_short (entry->data + 2, entry->order);
+                snprintf (v, maxlen, "ISO2 %hd", vs);
+                break;
+	case MNOTE_NIKON_TAG_QUALITY:
+                CF (entry->format, EXIF_FORMAT_ASCII, v, maxlen);
+                //CC (entry->components, 8, v, maxlen);
+                //vl =  exif_get_long (entry->data  , entry->order);
+                //printf("-> 0x%04x\n",entry->data);
+                //printf("-> 0x%s<\n",entry->data - 0);
+                memcpy(v, entry->data ,entry->components);
+                //snprintf (v, maxlen, "%s<",  ( entry->data - 9  );
+                break;
+	case MNOTE_NIKON_TAG_COLORMODE:
+	case MNOTE_NIKON_TAG_COLORMODE1:
+	case MNOTE_NIKON_TAG_WHITEBALANCE:
+	case MNOTE_NIKON_TAG_SHARPENING:
+	case MNOTE_NIKON_TAG_FOCUSMODE:
+	case MNOTE_NIKON_TAG_FLASHSETTING:
+	case MNOTE_NIKON_TAG_ISOSELECTION:
+	case MNOTE_NIKON_TAG_FLASHMODE:
+	case MNOTE_NIKON_TAG_IMAGEADJUSTMENT:
+	case MNOTE_NIKON_TAG_ADAPTER:
+                CF (entry->format, EXIF_FORMAT_ASCII, v, maxlen);
+                memcpy(v, entry->data, MIN (maxlen, entry->components));
+                break;
+	case MNOTE_NIKON_TAG_TOTALPICTURES:
+                CF (entry->format, EXIF_FORMAT_LONG, v, maxlen);
+                CC (entry->components, 1, v, maxlen);
+                vl =  exif_get_long (entry->data, entry->order);
+                snprintf (v, maxlen, "%lu",  (long unsigned int) vl );
+                break;
+	case MNOTE_NIKON_TAG_WHITEBALANCEFINE:
+                CF (entry->format, EXIF_FORMAT_SSHORT, v, maxlen);
+                CC (entry->components, 1, v, maxlen);
+                vs = exif_get_short (entry->data, entry->order);
+                snprintf (v, maxlen, "%hd", vs);
+                break;
+	case MNOTE_NIKON_TAG_WHITEBALANCERB:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 4, v, maxlen);
+		vr = exif_get_rational (entry->data, entry->order);
+		r = (double)vr.numerator / vr.denominator;
+		vr = exif_get_rational (entry->data+8, entry->order);
+		b = (double)vr.numerator / vr.denominator;
+		//printf("numerator %li, denominator %li\n", vr.numerator, vr.denominator);
+		snprintf (v, maxlen, "Red Correction %f, Blue Correction %f", r,b);
+		break;
+	case MNOTE_NIKON_TAG_MANUALFOCUSDISTANCE:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		vr = exif_get_rational (entry->data, entry->order);
+		if (vr.numerator) {
+			r = (double)vr.numerator / vr.denominator;
+			snprintf (v, maxlen, "%2.2f meters", r);
+		} else {
+			strncpy (v, _("No manual focus selection"), maxlen);
+		}
+		break;
+	case MNOTE_NIKON_TAG_DIGITALZOOM:
+	case MNOTE_NIKON1_TAG_DIGITALZOOM:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		vr = exif_get_rational (entry->data, entry->order);
+		r = (double)vr.numerator / vr.denominator;
+		snprintf (v, maxlen, "%2.2f", r);
+		break;
+	case MNOTE_NIKON_TAG_AFFOCUSPOSITION:
+		CF (entry->format, EXIF_FORMAT_UNDEFINED, v, maxlen);
+		CC (entry->components, 4, v, maxlen);
+		switch (  *( entry->data+1)  ) {
+		  	case  0: strncpy (v, "AF Position: Center", maxlen); break;
+		  	case  1: strncpy (v, "AF Position: Top", maxlen); break;
+		  	case  2: strncpy (v, "AF Position: Bottom", maxlen); break;
+		  	case  3: strncpy (v, "AF Position: Left", maxlen); break;
+		  	case  4: strncpy (v, "AF Position: Right", maxlen); break;
+		  	default: strncpy (v, "Unknown AF Position", maxlen);
+		}     
+		break;
+	case MNOTE_OLYMPUS_TAG_DIGIZOOM:
+		if (entry->format == EXIF_FORMAT_RATIONAL) {
+			CC (entry->components, 1, v, maxlen);
+			vr = exif_get_rational (entry->data, entry->order);
+			r = (double)vr.numerator / vr.denominator;
+			if (!vr.numerator) {
+				strncpy (v, _("None"), maxlen);
+			} else {
+				snprintf (v, maxlen, "%2.2f", r);
+			}
+			break;
+		}
+		/* fall through to handle SHORT version of this tag */
+	case MNOTE_NIKON_TAG_LENSTYPE:
+	case MNOTE_NIKON_TAG_FLASHUSED:
+	case MNOTE_NIKON1_TAG_QUALITY:
+	case MNOTE_NIKON1_TAG_COLORMODE:
+	case MNOTE_NIKON1_TAG_IMAGEADJUSTMENT:
+	case MNOTE_NIKON1_TAG_CCDSENSITIVITY:
+	case MNOTE_NIKON1_TAG_WHITEBALANCE:
+	case MNOTE_NIKON1_TAG_CONVERTER:
+	case MNOTE_OLYMPUS_TAG_QUALITY:
+	case MNOTE_OLYMPUS_TAG_MACRO:
+	case MNOTE_OLYMPUS_TAG_FLASHMODE:
+	case MNOTE_OLYMPUS_TAG_SHARPNESS:
+	case MNOTE_OLYMPUS_TAG_CONTRAST:
+	case MNOTE_OLYMPUS_TAG_MANFOCUS:
+		/* search the tag */
+		for (i = 0; (items[i].tag && items[i].tag != entry->tag); i++);
+		if (!items[i].tag) {
+		  	strncpy (v, "Internal error", maxlen);
+		  	break;
+		}
+		CF (entry->format, items[i].fmt, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		switch (entry->format) {
+		case EXIF_FORMAT_BYTE:
+		case EXIF_FORMAT_UNDEFINED:
+			vs = entry->data[0];
+			break;
+		case EXIF_FORMAT_SHORT:
+			vs = exif_get_short(entry->data, entry->order);
+			break;
+		default:
+			vs = 0;
+			break;
+		}
+		/* find the value */
+		for (j = 0; items[i].elem[j].string &&
+			    (items[i].elem[j].index < vs); j++);
+		if (items[i].elem[j].index != vs) {
+			snprintf (v, maxlen, "Unknown value %hi", vs);
+			break;
+		}
+		strncpy (v, items[i].elem[j].string, maxlen);
+		break;
+
+	case MNOTE_NIKON_TAG_LENS:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 4, v, maxlen);
+		{
+			double c,d;
+			unsigned long a,b;
+			vr = exif_get_rational (entry->data, entry->order);
+			a = vr.numerator / vr.denominator;
+			vr = exif_get_rational (entry->data+8, entry->order);
+			b = vr.numerator / vr.denominator;
+			vr = exif_get_rational (entry->data+16, entry->order);
+			c = (double)vr.numerator / vr.denominator;
+			vr = exif_get_rational (entry->data+24, entry->order);
+			d = (double)vr.numerator / vr.denominator;
+                        //printf("numerator %li, denominator %li\n", vr.numerator, vr.denominator);
+			snprintf (v, maxlen, "%ld-%ldmm 1:%3.1f - %3.1f",a,b,c,d);
+		}
+		break;
+	case MNOTE_NIKON1_TAG_FOCUS:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		vr = exif_get_rational (entry->data, entry->order);
+		if (!vr.denominator) {
+			strncpy (v, _("Infinite"), maxlen);
+		} else {
+			r = (double)vr.numerator / vr.denominator;
+			snprintf (v, maxlen, "%2.2f", r);
+		}
+		break;
+
+	/* Olympus */
+	case MNOTE_OLYMPUS_TAG_MODE:
+		CF (entry->format, EXIF_FORMAT_LONG, v, maxlen);
+		CC (entry->components, 3, v, maxlen);
+		vl = exif_get_long (entry->data, entry->order);
+		switch (vl) {
+		case 0:
+			strncpy (v, _("normal"), maxlen);
+			break;
+		case 1:
+			strncpy (v, _("unknown"), maxlen);
+			break;
+		case 2:
+			strncpy (v, _("fast"), maxlen);
+			break;
+		case 3:
+			strncpy (v, _("panorama"), maxlen);
+			break;
+		default:
+			snprintf (v, maxlen, _("%li"), (long int) vl);
+		}
+		vl = exif_get_long (entry->data + 4, entry->order);
+		snprintf (buf, sizeof (buf), "/%li/", (long int) vl);
+		strncat (v, buf, maxlen - strlen (v));
+		vl = exif_get_long (entry->data + 4, entry->order);
+		switch (vl) {
+		case 1:
+			strncat (v, _("left to right"), maxlen - strlen (v));
+			break;
+		case 2:
+			strncat (v, _("right to left"), maxlen - strlen (v));
+			break;
+		case 3:
+			strncat (v, _("bottom to top"), maxlen - strlen (v));
+			break;
+		case 4:
+			strncat (v, _("top to bottom"), maxlen - strlen (v));
+			break;
+		default:
+			snprintf (buf, sizeof (buf), _("%li"),
+				  (long int) vl);
+			strncat (v, buf, maxlen - strlen (v));
+		}
+		break;
+	case MNOTE_OLYMPUS_TAG_UNKNOWN_1:
+		CF (entry->format, EXIF_FORMAT_SHORT, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		strncpy (v, _("Unknown tag."), maxlen);
+		break;
+	case MNOTE_OLYMPUS_TAG_UNKNOWN_2:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		break;
+	case MNOTE_OLYMPUS_TAG_UNKNOWN_3:
+		CF (entry->format, EXIF_FORMAT_SSHORT, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		break;
+	case MNOTE_OLYMPUS_TAG_VERSION:
+		CF (entry->format, EXIF_FORMAT_ASCII, v, maxlen);
+		CC2 (entry->components, 5, 8, v, maxlen);
+		strncpy (v, entry->data, MIN (maxlen, entry->size));
+		break;
+	case MNOTE_OLYMPUS_TAG_INFO:
+		CF (entry->format, EXIF_FORMAT_ASCII, v, maxlen);
+		CC2 (entry->components, 52, 53, v, maxlen);
+		strncpy (v, entry->data, MIN (maxlen, entry->size));
+		break;
+	case MNOTE_OLYMPUS_TAG_ID:
+		CF (entry->format, EXIF_FORMAT_UNDEFINED, v, maxlen);
+		CC (entry->components, 32, v, maxlen);
+		strncpy (v, entry->data, MIN (maxlen, entry->size));
+		break;
+	case MNOTE_OLYMPUS_TAG_UNKNOWN_4:
+		CF (entry->format, EXIF_FORMAT_LONG, v, maxlen);
+		CC (entry->components, 30, v, maxlen);
+		break;
+	case MNOTE_OLYMPUS_TAG_FOCUSDIST:
+		CF (entry->format, EXIF_FORMAT_RATIONAL, v, maxlen);
+		CC (entry->components, 1, v, maxlen);
+		vr = exif_get_rational (entry->data, entry->order);
+		if (vr.numerator == 0) {
+			strncpy (v, _("Unknown"), maxlen);
+		}
+		else {
+			unsigned long tmp = vr.numerator / vr.denominator;
+			/* printf("numerator %li, denominator %li\n", vr.numerator, vr.denominator); */
+			snprintf (v, maxlen, "%li mm", tmp);
+		}
+		break;
+	case MNOTE_OLYMPUS_TAG_WBALANCE:
+		CF (entry->format, EXIF_FORMAT_SHORT, v, maxlen);
+		CC (entry->components, 2, v, maxlen);
+		vs = exif_get_short (entry->data, entry->order);
+		switch (vs) {
+		case 1:
+			strncpy (v, _("Automatic"), maxlen);
+			break;
+		case 2:
+			{
+				ExifShort v2 = exif_get_short (entry->data + 2, entry->order);
+				unsigned long colorTemp = 0;
+				switch (v2) {
+				case 2:
+					colorTemp = 3000;
+					break;
+				case 3:
+					colorTemp = 3700;
+					break;
+				case 4:
+					colorTemp = 4000;
+					break;
+				case 5:
+					colorTemp = 4500;
+					break;
+				case 6:
+					colorTemp = 5500;
+					break;
+				case 7:
+					colorTemp = 6500;
+					break;
+				case 9:
+					colorTemp = 7500;
+					break;
+				}
+				if (colorTemp) {
+					snprintf (v, maxlen, "Manual: %liK", colorTemp);
+				}
+				else {
+					strncpy (v, _("Manual: Unknown"), maxlen);
+				}
+
+			}
+			break;
+		case 3:
+			strncpy (v, _("One-touch"), maxlen);
+			break;
+		default:
+			strncpy (v, _("Unknown"), maxlen);
+			break;
+		}
+		break;
+	default:
+		switch (entry->format) {
+		case EXIF_FORMAT_ASCII:
+			strncpy (v, entry->data,
+				 MIN (maxlen, entry->components));
+			break;
+		case EXIF_FORMAT_SHORT:
+			vs = exif_get_short (entry->data, entry->order);
+			snprintf (v, maxlen, "%hi", vs);
+			break;
+		case EXIF_FORMAT_LONG:
+			vl = exif_get_long (entry->data, entry->order);
+			snprintf (v, maxlen, "%li", (long int) vl);
+			break;
+		case EXIF_FORMAT_UNDEFINED:
+		default:
+			snprintf (v, maxlen, _("%li bytes unknown data: "),
+				  (long int) entry->size);
+			for (i = 0; i < (int)entry->size; i++) {
+				sprintf (buf, "%02x", entry->data[i]);
+				strncat (v, buf, maxlen - strlen (v));
+			}
+			break;
+		}
+		break;
+	}
+
+	return (v);
+}
diff --git a/src/libexif/olympus/mnote-olympus-entry.h b/src/libexif/olympus/mnote-olympus-entry.h
new file mode 100644
index 0000000..a725228
--- /dev/null
+++ b/src/libexif/olympus/mnote-olympus-entry.h
@@ -0,0 +1,43 @@
+/* mnote-olympus-entry.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_OLYMPUS_ENTRY_H__
+#define __MNOTE_OLYMPUS_ENTRY_H__
+
+#include <libexif/exif-format.h>
+#include <libexif/exif-byte-order.h>
+#include <libexif/olympus/mnote-olympus-tag.h>
+
+typedef struct _MnoteOlympusEntry        MnoteOlympusEntry;
+
+struct _MnoteOlympusEntry {
+	MnoteOlympusTag tag;
+	ExifFormat format;
+	unsigned long components;
+
+	unsigned char *data;
+	unsigned int size;
+
+	ExifByteOrder order;
+};
+
+char *mnote_olympus_entry_get_value (MnoteOlympusEntry *entry, char *val, unsigned int maxlen);
+
+#endif /* __MNOTE_OLYMPUS_ENTRY_H__ */
diff --git a/src/libexif/olympus/mnote-olympus-tag.c b/src/libexif/olympus/mnote-olympus-tag.c
new file mode 100644
index 0000000..b7beacd
--- /dev/null
+++ b/src/libexif/olympus/mnote-olympus-tag.c
@@ -0,0 +1,155 @@
+/* mnote-olympus-tag.c:
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "mnote-olympus-tag.h"
+
+#include <libexif/i18n.h>
+#include <libexif/exif-utils.h>
+
+#include <stdlib.h>
+
+static struct {
+	MnoteOlympusTag tag;
+	const char *name;
+	const char *title;
+	const char *description;
+} table[] = {
+
+	/* Nikon v2 */
+	{MNOTE_NIKON_TAG_FIRMWARE,     "Firmware", N_("Firmware Version"), NULL},
+	{MNOTE_NIKON_TAG_ISO,          "ISO", N_("ISO Setting"), NULL},
+	{MNOTE_NIKON_TAG_COLORMODE1,   "COLORMODE1", N_("Colormode (?)"), NULL},
+	{MNOTE_NIKON_TAG_QUALITY,      "QUALITY", N_("Quality"), NULL},
+	{MNOTE_NIKON_TAG_WHITEBALANCE, "WHITEBALANCE", N_("Whitebalance"), NULL},
+	{MNOTE_NIKON_TAG_SHARPENING,   "SHARPENING",   N_("Image Sharpening"), NULL},
+	{MNOTE_NIKON_TAG_FOCUSMODE,    "FOCUSMODE",   N_("Focus Mode"), NULL},
+	{MNOTE_NIKON_TAG_FLASHSETTING, "FLASHSETTING",   N_("Flash Setting"), NULL},
+	{MNOTE_NIKON_TAG_FLASHMODE,    "FLASHMODE",    N_("Flash Mode"), NULL},
+	{MNOTE_NIKON_TAG_WHITEBALANCEFINE,"WHITEBALANCEFINE",N_("Whitebalance fine ajustment"), NULL},
+	{MNOTE_NIKON_TAG_WHITEBALANCERB,  "WHITEBALANCERB", N_("Whitebalance RB"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X000D,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_ISOSELECTION,    "ISOSELECTION", N_("Isoselection"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0011,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_EXPOSUREDIFF,    "EXPOSUREDIFF", N_("Exposurediff ?"), NULL},
+	{MNOTE_NIKON_TAG_FLASHCOMPENSATION, "FLASHCOMPENSATION", N_("Flashcompensation ?"), NULL},
+	{MNOTE_NIKON_TAG_ISO2,            "ISO", N_("ISO Setting"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0016,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0017,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0018,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0019,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_IMAGEADJUSTMENT, "ImageAdjustment", N_("Image Adjustment"), NULL},
+	{MNOTE_NIKON_TAG_TONECOMPENSATION, "TONECOMPENSATION", N_("Tonecompensation"), NULL},
+	{MNOTE_NIKON_TAG_ADAPTER,         "Adapter", N_("Adapter"), NULL},
+	{MNOTE_NIKON_TAG_LENSTYPE,        "LENSTYPE", N_("Lenstype"), NULL},
+	{MNOTE_NIKON_TAG_LENS,            "LENS", N_("Lens"), NULL},
+	{MNOTE_NIKON_TAG_MANUALFOCUSDISTANCE, "MANUALFOCUSDISTANCE", N_("Manual Focus Distance"), NULL},
+	{MNOTE_NIKON_TAG_DIGITALZOOM,     "DigitalZoom", N_("Digital Zoom"), NULL},
+	{MNOTE_NIKON_TAG_FLASHUSED,       "FLASHUSED", N_("Flash used"), NULL},
+	{MNOTE_NIKON_TAG_AFFOCUSPOSITION, "AFFOCUSPOSITION", N_("AF Focus position"), NULL},
+	{MNOTE_NIKON_TAG_BRACKETING,      "BRACKETING", N_("Bracketing"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X008A,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X008B,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_CURVE,           "CURVE,", N_("Contrast curve"), NULL},
+	{MNOTE_NIKON_TAG_COLORMODE,       "COLORMODE,", N_("Colormode"), NULL},
+	{MNOTE_NIKON_TAG_LIGHTYPE,        "LIGHTYPE,", N_("Lightype"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0091,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_HUE,             "Hue,", N_("Hue Adjustment"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0094,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_NOISEREDUCTION,  "NOISEREDUCTION,", N_("Noisereduction"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0097,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0098,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X009A,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X009B,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X00A0,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X00A2,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X00A3,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_TOTALPICTURES,   "TOTALPICTURES,", N_("Total number of pictures taken"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X00A8,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_OPTIMIZATION,    "OPTIMIZATION,", N_("Optimize Image"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X00AA,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X00AB,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_CAPTUREEDITORDATA, "CAPTUREEDITORDATA", N_("Capture Editor Data"), NULL},
+	{MNOTE_NIKON_TAG_CAPTUREEDITORVER, "CAPTUREEDITORVER", N_("Capture Editor Version"), NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0E0E,  NULL, NULL, NULL},
+	{MNOTE_NIKON_TAG_UNKNOWN_0X0E10,  NULL, NULL, NULL},
+	{MNOTE_NIKON1_TAG_UNKNOWN_0X0002, NULL, NULL, NULL},
+	{MNOTE_NIKON1_TAG_QUALITY,        "QUALITY", N_("Quality"), NULL},
+	{MNOTE_NIKON1_TAG_COLORMODE,      "COLORMODE,", N_("Colormode"), NULL},
+	{MNOTE_NIKON1_TAG_IMAGEADJUSTMENT, "ImageAdjustment", N_("Image Adjustment"), NULL},
+	{MNOTE_NIKON1_TAG_CCDSENSITIVITY, "CCDSensitivity", N_("CCD Sensitivity"), NULL},
+	{MNOTE_NIKON1_TAG_WHITEBALANCE,   "WhiteBalance", N_("Whitebalance"), NULL},
+	{MNOTE_NIKON1_TAG_FOCUS,          "Focus", N_("Focus"), NULL},
+	{MNOTE_NIKON1_TAG_UNKNOWN_0X0009, NULL, NULL, NULL},
+	{MNOTE_NIKON1_TAG_DIGITALZOOM,    "DigitalZoom", N_("Digital Zoom"), NULL},
+	{MNOTE_NIKON1_TAG_CONVERTER,      "Converter", N_("Converter"), NULL},
+
+	/* Olympus */
+	{MNOTE_OLYMPUS_TAG_MODE, "Mode", N_("Speed/Sequence/Panorama direction"), NULL},
+	{MNOTE_OLYMPUS_TAG_QUALITY, "Quality", N_("Quality"), NULL},
+	{MNOTE_OLYMPUS_TAG_MACRO, "Macro", N_("Macro"), NULL},
+	{MNOTE_OLYMPUS_TAG_UNKNOWN_1, NULL, NULL, NULL},
+	{MNOTE_OLYMPUS_TAG_DIGIZOOM, "DigiZoom", N_("Digital Zoom"), NULL},
+	{MNOTE_OLYMPUS_TAG_UNKNOWN_2, NULL, NULL, NULL},
+	{MNOTE_OLYMPUS_TAG_UNKNOWN_3, NULL, NULL, NULL},
+	{MNOTE_OLYMPUS_TAG_VERSION, "FirmwareVersion", N_("Firmware version"), NULL},
+	{MNOTE_OLYMPUS_TAG_INFO, "Info", N_("Info"), NULL},
+	{MNOTE_OLYMPUS_TAG_ID, "CameraID", N_("Camera ID"), NULL},
+	{MNOTE_OLYMPUS_TAG_UNKNOWN_4, NULL, NULL, NULL},
+	{MNOTE_OLYMPUS_TAG_FLASHMODE, "FlashMode", N_("Flash Mode"), NULL},
+	{MNOTE_OLYMPUS_TAG_FOCUSDIST, "ManualFocusDistance", N_("Manual Focus Distance"), NULL},
+	{MNOTE_OLYMPUS_TAG_SHARPNESS, "Sharpness", N_("Sharpness Setting"), NULL},
+	{MNOTE_OLYMPUS_TAG_WBALANCE, "WhiteBalance", N_("White Balance Setting"), NULL},
+	{MNOTE_OLYMPUS_TAG_CONTRAST, "Contrast", N_("Contrast Setting"), NULL},
+	{MNOTE_OLYMPUS_TAG_MANFOCUS, "ManualFocus", N_("Manual Focus"), NULL},
+	{0, NULL, NULL, NULL}
+};
+
+const char *
+mnote_olympus_tag_get_name (MnoteOlympusTag t)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (table[i].name);
+	return NULL;
+}
+
+const char *
+mnote_olympus_tag_get_title (MnoteOlympusTag t)
+{
+	unsigned int i;
+
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].title));
+	return NULL;
+}
+
+const char *
+mnote_olympus_tag_get_description (MnoteOlympusTag t)
+{
+	unsigned int i;
+
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].description));
+	return NULL;
+}
diff --git a/src/libexif/olympus/mnote-olympus-tag.h b/src/libexif/olympus/mnote-olympus-tag.h
new file mode 100644
index 0000000..22278ac
--- /dev/null
+++ b/src/libexif/olympus/mnote-olympus-tag.h
@@ -0,0 +1,130 @@
+/* mnote-olympus-tag.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_OLYMPUS_TAG_H__
+#define __MNOTE_OLYMPUS_TAG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+enum _MnoteOlympusTag {
+
+	/* Nikon v.2 */
+	MNOTE_NIKON_TAG_FIRMWARE                = 0x0001,
+	MNOTE_NIKON_TAG_ISO                     = 0x0002,
+	MNOTE_NIKON_TAG_COLORMODE1              = 0x0003,
+	MNOTE_NIKON_TAG_QUALITY                 = 0x0004,
+	MNOTE_NIKON_TAG_WHITEBALANCE            = 0x0005,
+	MNOTE_NIKON_TAG_SHARPENING              = 0x0006,
+	MNOTE_NIKON_TAG_FOCUSMODE               = 0x0007,
+	MNOTE_NIKON_TAG_FLASHSETTING            = 0x0008,
+	MNOTE_NIKON_TAG_FLASHMODE               = 0x0009,
+	MNOTE_NIKON_TAG_WHITEBALANCEFINE        = 0x000b,
+	MNOTE_NIKON_TAG_WHITEBALANCERB          = 0x000c,
+	MNOTE_NIKON_TAG_UNKNOWN_0X000D          = 0x000d,
+	MNOTE_NIKON_TAG_EXPOSUREDIFF            = 0x000e,
+	MNOTE_NIKON_TAG_ISOSELECTION            = 0x000f,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0011          = 0x0011,
+	MNOTE_NIKON_TAG_FLASHCOMPENSATION       = 0x0012,
+	MNOTE_NIKON_TAG_ISO2                    = 0x0013,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0016          = 0x0016,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0017          = 0x0017,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0018          = 0x0018,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0019          = 0x0019,
+	MNOTE_NIKON_TAG_IMAGEADJUSTMENT         = 0x0080,
+	MNOTE_NIKON_TAG_TONECOMPENSATION        = 0x0081,
+	MNOTE_NIKON_TAG_ADAPTER                 = 0x0082,
+	MNOTE_NIKON_TAG_LENSTYPE                = 0x0083,
+	MNOTE_NIKON_TAG_LENS                    = 0x0084,
+	MNOTE_NIKON_TAG_MANUALFOCUSDISTANCE     = 0x0085,
+	MNOTE_NIKON_TAG_DIGITALZOOM             = 0x0086,
+	MNOTE_NIKON_TAG_FLASHUSED               = 0x0087,
+	MNOTE_NIKON_TAG_AFFOCUSPOSITION         = 0x0088,
+	MNOTE_NIKON_TAG_BRACKETING              = 0x0089,
+	MNOTE_NIKON_TAG_UNKNOWN_0X008A          = 0x008a,
+	MNOTE_NIKON_TAG_UNKNOWN_0X008B          = 0x008b,
+	MNOTE_NIKON_TAG_CURVE                   = 0x008c,
+	MNOTE_NIKON_TAG_COLORMODE               = 0x008d,
+	MNOTE_NIKON_TAG_LIGHTYPE                = 0x0090,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0091          = 0x0091,
+	MNOTE_NIKON_TAG_HUE                     = 0x0092,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0094          = 0x0094,
+	MNOTE_NIKON_TAG_NOISEREDUCTION          = 0x0095,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0097          = 0x0097,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0098          = 0x0098,
+	MNOTE_NIKON_TAG_UNKNOWN_0X009A          = 0x009a,
+	MNOTE_NIKON_TAG_UNKNOWN_0X009B          = 0x009b,
+	MNOTE_NIKON_TAG_UNKNOWN_0X00A0          = 0x00a0,
+	MNOTE_NIKON_TAG_UNKNOWN_0X00A2          = 0x00a2,
+	MNOTE_NIKON_TAG_UNKNOWN_0X00A3          = 0x00a3,
+	MNOTE_NIKON_TAG_TOTALPICTURES           = 0x00a7,
+	MNOTE_NIKON_TAG_UNKNOWN_0X00A8          = 0x00a8,
+	MNOTE_NIKON_TAG_OPTIMIZATION            = 0x00a9,
+	MNOTE_NIKON_TAG_UNKNOWN_0X00AA          = 0x00aa,
+	MNOTE_NIKON_TAG_UNKNOWN_0X00AB          = 0x00ab,
+	MNOTE_NIKON_TAG_CAPTUREEDITORDATA       = 0x0e01,
+	MNOTE_NIKON_TAG_CAPTUREEDITORVER	= 0x0e09,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0E0E		= 0x0e0e,
+	MNOTE_NIKON_TAG_UNKNOWN_0X0E10		= 0x0e10,
+
+	/* Nikon v1: real values + our proprietary base to distinguish from v2 */
+	MNOTE_NIKON1_TAG_BASE                   = 0x8000,
+	MNOTE_NIKON1_TAG_UNKNOWN_0X0002         = 0x0002 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_QUALITY                = 0x0003 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_COLORMODE              = 0x0004 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_IMAGEADJUSTMENT        = 0x0005 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_CCDSENSITIVITY         = 0x0006 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_WHITEBALANCE           = 0x0007 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_FOCUS                  = 0x0008 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_UNKNOWN_0X0009         = 0x0009 + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_DIGITALZOOM            = 0x000a + MNOTE_NIKON1_TAG_BASE,
+	MNOTE_NIKON1_TAG_CONVERTER              = 0x000b + MNOTE_NIKON1_TAG_BASE,
+
+	/* Olympus */
+	MNOTE_OLYMPUS_TAG_MODE		= 0x0200,
+	MNOTE_OLYMPUS_TAG_QUALITY	= 0x0201,
+	MNOTE_OLYMPUS_TAG_MACRO		= 0x0202,
+	MNOTE_OLYMPUS_TAG_UNKNOWN_1	= 0x0203,
+	MNOTE_OLYMPUS_TAG_DIGIZOOM	= 0x0204,
+	MNOTE_OLYMPUS_TAG_UNKNOWN_2	= 0x0205,
+	MNOTE_OLYMPUS_TAG_UNKNOWN_3	= 0x0206,
+	MNOTE_OLYMPUS_TAG_VERSION	= 0x0207,
+	MNOTE_OLYMPUS_TAG_INFO		= 0x0208,
+	MNOTE_OLYMPUS_TAG_ID		= 0x0209,
+	MNOTE_OLYMPUS_TAG_UNKNOWN_4	= 0x0f04,
+	MNOTE_OLYMPUS_TAG_FLASHMODE	= 0x1004,
+	MNOTE_OLYMPUS_TAG_MANFOCUS	= 0x100b,
+	MNOTE_OLYMPUS_TAG_FOCUSDIST	= 0x100c,
+	MNOTE_OLYMPUS_TAG_SHARPNESS	= 0x100f,
+	MNOTE_OLYMPUS_TAG_WBALANCE	= 0x1015,
+	MNOTE_OLYMPUS_TAG_CONTRAST	= 0x1029
+};
+typedef enum _MnoteOlympusTag MnoteOlympusTag;
+
+const char *mnote_olympus_tag_get_name        (MnoteOlympusTag tag);
+const char *mnote_olympus_tag_get_title       (MnoteOlympusTag tag);
+const char *mnote_olympus_tag_get_description (MnoteOlympusTag tag);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MNOTE_OLYMPUS_TAG_H__ */
diff --git a/src/libexif/pentax/exif-mnote-data-pentax.c b/src/libexif/pentax/exif-mnote-data-pentax.c
new file mode 100644
index 0000000..cc2cc12
--- /dev/null
+++ b/src/libexif/pentax/exif-mnote-data-pentax.c
@@ -0,0 +1,209 @@
+/* exif-mnote-data-pentax.c
+ *
+ * Copyright � 2002, 2003 Lutz Mueller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include "config.h"
+#include "exif-mnote-data-pentax.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-utils.h>
+
+/* #define DEBUG */
+
+static void
+exif_mnote_data_pentax_clear (ExifMnoteDataPentax *n)
+{
+	ExifMnoteData *d = (ExifMnoteData *) n;
+	unsigned int i;
+
+	if (!n) return;
+
+	if (n->entries) {
+		for (i = 0; i < n->count; i++)
+			if (n->entries[i].data) {
+				exif_mem_free (d->mem, n->entries[i].data);
+				n->entries[i].data = NULL;
+			}
+		exif_mem_free (d->mem, n->entries);
+		n->entries = NULL;
+		n->count = 0;
+	}
+}
+
+static void
+exif_mnote_data_pentax_free (ExifMnoteData *n)
+{
+	if (!n) return;
+
+	exif_mnote_data_pentax_clear ((ExifMnoteDataPentax *) n);
+}
+
+static char *
+exif_mnote_data_pentax_get_value (ExifMnoteData *d, unsigned int i, char *val, unsigned int maxlen)
+{
+	ExifMnoteDataPentax *n = (ExifMnoteDataPentax *) d;
+
+	if (!n) return NULL;
+	if (n->count <= i) return NULL;
+	return mnote_pentax_entry_get_value (&n->entries[i], val, maxlen);
+}
+
+static void
+exif_mnote_data_pentax_load (ExifMnoteData *en,
+		const unsigned char *buf, unsigned int buf_size)
+{
+	ExifMnoteDataPentax *n = (ExifMnoteDataPentax *) en;
+	unsigned int i, o, s;
+	ExifShort c;
+
+	/* Number of entries */
+	if (buf_size < 2) return;
+	c = exif_get_short (buf + 6 + n->offset, n->order);
+	n->entries = exif_mem_alloc (en->mem, sizeof (MnotePentaxEntry) * c);
+	if (!n->entries) return;
+
+	for (i = 0; i < c; i++) {
+	    o = 6 + 2 + n->offset + 12 * i;
+	    if (o + 8 > buf_size) return;
+
+	    n->count = i + 1;
+	    n->entries[i].tag        = exif_get_short (buf + o + 0, n->order);
+	    n->entries[i].format     = exif_get_short (buf + o + 2, n->order);
+	    n->entries[i].components = exif_get_long  (buf + o + 4, n->order);
+	    n->entries[i].order      = n->order;
+
+            /*
+             * Size? If bigger than 4 bytes, the actual data is not
+             * in the entry but somewhere else (offset).
+             */
+            s = exif_format_get_size (n->entries[i].format) *
+                                      n->entries[i].components;
+            if (!s) return;
+            o += 8;
+            if (s > 4) o = exif_get_long (buf + o, n->order) + 6;
+            if (o + s > buf_size) return;
+                                                                                
+            /* Sanity check */
+            n->entries[i].data = exif_mem_alloc (en->mem, sizeof (char) * s);
+            if (!n->entries[i].data) return;
+            n->entries[i].size = s;
+            memcpy (n->entries[i].data, buf + o, s);
+        }
+}
+
+static unsigned int
+exif_mnote_data_pentax_count (ExifMnoteData *n)
+{
+	return n ? ((ExifMnoteDataPentax *) n)->count : 0;
+}
+
+static unsigned int
+exif_mnote_data_pentax_get_id (ExifMnoteData *d, unsigned int n)
+{
+	ExifMnoteDataPentax *note = (ExifMnoteDataPentax *) d;
+
+	if (!note) return 0;
+	if (note->count <= n) return 0;
+	return note->entries[n].tag;
+}
+
+static const char *
+exif_mnote_data_pentax_get_name (ExifMnoteData *d, unsigned int n)
+{
+	ExifMnoteDataPentax *note = (ExifMnoteDataPentax *) d;
+
+	if (!note) return NULL;
+	if (note->count <= n) return NULL;
+	return mnote_pentax_tag_get_name (note->entries[n].tag);
+}
+
+static const char *
+exif_mnote_data_pentax_get_title (ExifMnoteData *d, unsigned int n)
+{
+	ExifMnoteDataPentax *note = (ExifMnoteDataPentax *) d;
+
+	if (!note) return NULL;
+	if (note->count <= n) return NULL;
+	return mnote_pentax_tag_get_title (note->entries[n].tag);
+}
+
+static const char *
+exif_mnote_data_pentax_get_description (ExifMnoteData *d, unsigned int n)
+{
+	ExifMnoteDataPentax *note = (ExifMnoteDataPentax *) d;
+	
+	if (!note) return NULL;
+	if (note->count <= n) return NULL;
+	return mnote_pentax_tag_get_description (note->entries[n].tag);
+}
+
+static void
+exif_mnote_data_pentax_set_offset (ExifMnoteData *d, unsigned int o)
+{
+	if (d) ((ExifMnoteDataPentax *) d)->offset = o;
+}
+
+static void
+exif_mnote_data_pentax_set_byte_order (ExifMnoteData *d, ExifByteOrder o)
+{
+	ExifByteOrder o_orig;
+	ExifMnoteDataPentax *n = (ExifMnoteDataPentax *) d;
+	unsigned int i;
+
+	if (!n) return;
+
+	o_orig = n->order;
+	n->order = o;
+	for (i = 0; i < n->count; i++) {
+		n->entries[i].order = o;
+		exif_array_set_byte_order (n->entries[i].format, n->entries[i].data,
+				n->entries[i].components, o_orig, o);
+	}
+}
+
+ExifMnoteData *
+exif_mnote_data_pentax_new (ExifMem *mem)
+{
+	ExifMnoteData *d;
+
+	if (!mem) return NULL;
+
+	d = exif_mem_alloc (mem, sizeof (ExifMnoteDataPentax));
+	if (!d) return NULL;
+
+	exif_mnote_data_construct (d, mem);
+
+	/* Set up function pointers */
+	d->methods.free            = exif_mnote_data_pentax_free;
+	d->methods.set_byte_order  = exif_mnote_data_pentax_set_byte_order;
+	d->methods.set_offset      = exif_mnote_data_pentax_set_offset;
+	d->methods.load            = exif_mnote_data_pentax_load;
+	d->methods.count           = exif_mnote_data_pentax_count;
+	d->methods.get_id          = exif_mnote_data_pentax_get_id;
+	d->methods.get_name        = exif_mnote_data_pentax_get_name;
+	d->methods.get_title       = exif_mnote_data_pentax_get_title;
+	d->methods.get_description = exif_mnote_data_pentax_get_description;
+	d->methods.get_value       = exif_mnote_data_pentax_get_value;
+
+	return d;
+}
diff --git a/src/libexif/pentax/exif-mnote-data-pentax.h b/src/libexif/pentax/exif-mnote-data-pentax.h
new file mode 100644
index 0000000..c04bc41
--- /dev/null
+++ b/src/libexif/pentax/exif-mnote-data-pentax.h
@@ -0,0 +1,44 @@
+/* exif-mnote-data-pentax.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __EXIF_MNOTE_DATA_PENTAX_H__
+#define __EXIF_MNOTE_DATA_PENTAX_H__
+
+#include <libexif/exif-byte-order.h>
+#include <libexif/exif-mnote-data.h>
+#include <libexif/exif-mnote-data-priv.h>
+#include <libexif/pentax/mnote-pentax-entry.h>
+#include <libexif/exif-mem.h>
+
+typedef struct _ExifMnoteDataPentax ExifMnoteDataPentax;
+
+struct _ExifMnoteDataPentax {
+	ExifMnoteData parent;
+
+	MnotePentaxEntry *entries;
+	unsigned int count;
+
+	ExifByteOrder order;
+	unsigned int offset;
+};
+
+ExifMnoteData *exif_mnote_data_pentax_new (ExifMem *);
+
+#endif /* __EXIF_MNOTE_DATA_PENTAX_H__ */
diff --git a/src/libexif/pentax/mnote-pentax-entry.c b/src/libexif/pentax/mnote-pentax-entry.c
new file mode 100644
index 0000000..3469f22
--- /dev/null
+++ b/src/libexif/pentax/mnote-pentax-entry.c
@@ -0,0 +1,210 @@
+/* mnote-pentax-entry.c
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "mnote-pentax-entry.h"
+
+#include <libexif/i18n.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <libexif/exif-format.h>
+#include <libexif/exif-utils.h>
+#include <libexif/exif-entry.h>
+
+
+#define CF(format,target,v,maxlen)                              \
+{                                                               \
+        if (format != target) {                                 \
+                snprintf (v, maxlen,	                        \
+                        _("Invalid format '%s', "               \
+                        "expected '%s'."),                      \
+                        exif_format_get_name (format),          \
+                        exif_format_get_name (target));         \
+                break;                                          \
+        }                                                       \
+}
+
+#define CC(number,target,v,maxlen)                                      \
+{                                                                       \
+        if (number != target) {                                         \
+                snprintf (v, maxlen,                                    \
+                        _("Invalid number of components (%i, "          \
+                        "expected %i)."), (int) number, (int) target);  \
+                break;                                                  \
+        }                                                               \
+}
+
+static struct {
+	ExifTag tag;
+	struct {
+		int index;
+		const char *string;
+	} elem[7];
+} items[] = {
+  { MNOTE_PENTAX_TAG_MODE,
+    { {0, N_("Auto")},
+      {1, N_("Night-scene")},
+      {2, N_("Manual")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_QUALITY,
+    { {0, N_("Good")},
+      {1, N_("Better")},
+      {2, N_("Best")},{0,NULL}}},
+  { MNOTE_PENTAX_TAG_FOCUS,
+    { {2, N_("Custom")},
+      {3, N_("Auto")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_FLASH,
+    { {1, N_("Auto")},
+      {2, N_("Flash on")},
+      {4, N_("Flash off")},
+      {6, N_("Red-eye Reduction")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_WHITE_BALANCE,
+    { {0, N_("Auto")},
+      {1, N_("Daylight")},
+      {2, N_("Shade")},
+      {3, N_("Tungsten")},
+      {4, N_("Fluorescent")},
+      {5, N_("Manual")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_SHARPNESS,
+    { {0, N_("Normal")},
+      {1, N_("Soft")},
+      {2, N_("Hard")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_CONTRAST,
+    { {0, N_("Normal")},
+      {1, N_("Low")},
+      {2, N_("High")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_SATURATION,
+    { {0, N_("Normal")},
+      {1, N_("Low")},
+      {2, N_("High")},
+      {0, NULL}}},
+  { MNOTE_PENTAX_TAG_ISO_SPEED,
+    { {10,  N_("100")},
+      {16,  N_("200")},
+      {100, N_("100")},
+      {200, N_("200")},
+      { 0,  NULL}}},
+  { MNOTE_PENTAX_TAG_COLOR,
+    { {1, N_("Full")},
+      {2, N_("Black & White")},
+      {3, N_("Sepia")},
+      {0, NULL}}},
+};
+
+char *
+mnote_pentax_entry_get_value (MnotePentaxEntry *entry,
+			      char *val, unsigned int maxlen)
+{
+	ExifLong vl;
+	ExifShort vs;
+	int i = 0, j = 0;
+
+	if (!entry) return (NULL);
+
+	memset (val, 0, maxlen);
+	maxlen--;
+
+	switch (entry->tag) {
+	  case MNOTE_PENTAX_TAG_MODE:
+	  case MNOTE_PENTAX_TAG_QUALITY:
+	  case MNOTE_PENTAX_TAG_FOCUS:
+	  case MNOTE_PENTAX_TAG_FLASH:
+	  case MNOTE_PENTAX_TAG_WHITE_BALANCE:
+	  case MNOTE_PENTAX_TAG_SHARPNESS:
+	  case MNOTE_PENTAX_TAG_CONTRAST:
+	  case MNOTE_PENTAX_TAG_SATURATION:
+	  case MNOTE_PENTAX_TAG_ISO_SPEED:
+	  case MNOTE_PENTAX_TAG_COLOR:
+		CF (entry->format, EXIF_FORMAT_SHORT, val, maxlen);
+		CC (entry->components, 1, val, maxlen);
+		vs = exif_get_short (entry->data, entry->order);
+
+		/* search the tag */
+		for (i = 0; (items[i].tag && items[i].tag != entry->tag); i++);
+		if (!items[i].tag) {
+		  	strncpy (val, "Internal error", maxlen);
+		  	break;
+		}
+
+		/* find the value */
+		for (j = 0; items[i].elem[j].string &&
+			    (items[i].elem[j].index < vs); j++);
+		if (items[i].elem[j].index != vs) {
+			snprintf (val, maxlen,
+				  "Internal error (unknown value %i)", vs);
+			break;
+		}
+		snprintf (val, maxlen, "%s", items[i].elem[j].string);
+		break;
+
+	case MNOTE_PENTAX_TAG_ZOOM:
+		CF (entry->format, EXIF_FORMAT_LONG, val, maxlen);
+		CC (entry->components, 1, val, maxlen);
+		vl = exif_get_long (entry->data, entry->order);
+		snprintf (val, maxlen, "%li", (long int) vl);
+		break;
+	case MNOTE_PENTAX_TAG_PRINTIM:
+		CF (entry->format, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (entry->components, 124, val, maxlen);
+		snprintf (val, maxlen, "%li bytes unknown data",
+			  entry->components);
+		break;
+	case MNOTE_PENTAX_TAG_TZ_CITY:
+		CF (entry->format, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (entry->components, 4, val, maxlen);
+		snprintf (val, entry->components, "%s", entry->data);
+		break;
+	case MNOTE_PENTAX_TAG_TZ_DST:
+		CF (entry->format, EXIF_FORMAT_UNDEFINED, val, maxlen);
+		CC (entry->components, 4, val, maxlen);
+		snprintf (val, entry->components, "%s", entry->data);
+		break;
+	default:
+		switch (entry->format) {
+		case EXIF_FORMAT_ASCII:
+		  strncpy (val, entry->data, MIN(maxlen, entry->components));
+		  break;
+		case EXIF_FORMAT_SHORT:
+		  vs = exif_get_short (entry->data, entry->order);
+		  snprintf (val, maxlen, "%i", vs);
+		  break;
+		case EXIF_FORMAT_LONG:
+		  vl = exif_get_long (entry->data, entry->order);
+		  snprintf (val, maxlen, "%li", (long int) vl);
+		  break;
+		case EXIF_FORMAT_UNDEFINED:
+		default:
+		  snprintf (val, maxlen, "%li bytes unknown data",
+			    entry->components);
+		  break;
+		}
+		break;
+	}
+
+	return (val);
+}
diff --git a/src/libexif/pentax/mnote-pentax-entry.h b/src/libexif/pentax/mnote-pentax-entry.h
new file mode 100644
index 0000000..628d5dd
--- /dev/null
+++ b/src/libexif/pentax/mnote-pentax-entry.h
@@ -0,0 +1,43 @@
+/* mnote-pentax-entry.h
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_PENTAX_ENTRY_H__
+#define __MNOTE_PENTAX_ENTRY_H__
+
+#include <libexif/exif-format.h>
+#include <libexif/exif-byte-order.h>
+#include <libexif/pentax/mnote-pentax-tag.h>
+
+typedef struct _MnotePentaxEntry        MnotePentaxEntry;
+
+struct _MnotePentaxEntry {
+	MnotePentaxTag tag;
+	ExifFormat format;
+	unsigned long components;
+
+	unsigned char *data;
+	unsigned int size;
+
+	ExifByteOrder order;
+};
+
+char *mnote_pentax_entry_get_value (MnotePentaxEntry *entry, char *val, unsigned int maxlen);
+
+#endif /* __MNOTE_PENTAX_ENTRY_H__ */
diff --git a/src/libexif/pentax/mnote-pentax-tag.c b/src/libexif/pentax/mnote-pentax-tag.c
new file mode 100644
index 0000000..76823a4
--- /dev/null
+++ b/src/libexif/pentax/mnote-pentax-tag.c
@@ -0,0 +1,94 @@
+/* mnote-pentax-tag.c:
+ *
+ * Copyright � 2002 Lutz M�ller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+#include "mnote-pentax-tag.h"
+
+#include <stdlib.h>
+
+#include <libexif/i18n.h>
+
+static struct {
+	MnotePentaxTag tag;
+	const char *name;
+	const char *title;
+	const char *description;
+} table[] = {
+	{MNOTE_PENTAX_TAG_MODE, "Mode", N_("Capture Mode"), NULL},
+	{MNOTE_PENTAX_TAG_QUALITY, "Quality", N_("Quality Level"), NULL},
+	{MNOTE_PENTAX_TAG_FOCUS, "Focus", N_("Focus Mode"), NULL},
+	{MNOTE_PENTAX_TAG_FLASH, "Flash", N_("Flash Mode"), NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_05, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_06, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_WHITE_BALANCE, "WhiteBalance", N_("White Balance"), NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_08, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_09, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_ZOOM, "Zoom", N_("Zoom"), NULL},
+	{MNOTE_PENTAX_TAG_SHARPNESS, "Sharpness", N_("Sharpness"), NULL},
+	{MNOTE_PENTAX_TAG_CONTRAST, "Contrast", N_("Contrast"), NULL},
+	{MNOTE_PENTAX_TAG_SATURATION, "Saturation", N_("Saturation"), NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_14, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_15, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_16, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_17, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_18, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_19, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_ISO_SPEED, "ISOSpeed", N_("ISOSpeed"), NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_21, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_COLOR, "Color", N_("Color"), NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_24, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_UNKNOWN_25, NULL, NULL, NULL},
+	{MNOTE_PENTAX_TAG_PRINTIM, "PrintIM", N_("PrintIM Settings"), NULL},
+	{MNOTE_PENTAX_TAG_TZ_CITY, "TimeZone", N_("TimeZone"), NULL},
+	{MNOTE_PENTAX_TAG_TZ_DST, "DaylightSavings", N_("DaylightSavings"), NULL},
+	{0, NULL, NULL, NULL}
+};
+
+const char *
+mnote_pentax_tag_get_name (MnotePentaxTag t)
+{
+	unsigned int i;
+
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (table[i].name);
+	return NULL;
+}
+
+const char *
+mnote_pentax_tag_get_title (MnotePentaxTag t)
+{
+	unsigned int i;
+
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].title));
+	return NULL;
+}
+
+const char *
+mnote_pentax_tag_get_description (MnotePentaxTag t)
+{
+	unsigned int i;
+
+	bindtextdomain (GETTEXT_PACKAGE, LOCALEDIR);
+	for (i = 0; i < sizeof (table) / sizeof (table[0]); i++)
+		if (table[i].tag == t) return (_(table[i].description));
+	return NULL;
+}
diff --git a/src/libexif/pentax/mnote-pentax-tag.h b/src/libexif/pentax/mnote-pentax-tag.h
new file mode 100644
index 0000000..a261102
--- /dev/null
+++ b/src/libexif/pentax/mnote-pentax-tag.h
@@ -0,0 +1,74 @@
+/* mnote-pentax-tag.h
+ *
+ * Copyright � 2002, 2003 Lutz Mueller <lutz@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, 
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details. 
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __MNOTE_PENTAX_TAG_H__
+#define __MNOTE_PENTAX_TAG_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*
+ * Missing features which are probably in the unknowns somewhere ...
+ * 1/ AF Area (Wide, Spot, Free)
+ * 2/ AE Metering (Multi segment, Centre-weighted, Spot)
+ * 3/ 
+ */
+
+enum _MnotePentaxTag {
+	MNOTE_PENTAX_TAG_MODE		= 0x0001,
+	MNOTE_PENTAX_TAG_QUALITY	= 0x0002,
+	MNOTE_PENTAX_TAG_FOCUS		= 0x0003,
+	MNOTE_PENTAX_TAG_FLASH		= 0x0004,
+	MNOTE_PENTAX_TAG_UNKNOWN_05	= 0x0005,
+	MNOTE_PENTAX_TAG_UNKNOWN_06	= 0x0006,
+	MNOTE_PENTAX_TAG_WHITE_BALANCE	= 0x0007,
+	MNOTE_PENTAX_TAG_UNKNOWN_08	= 0x0008,
+	MNOTE_PENTAX_TAG_UNKNOWN_09	= 0x0009,
+	MNOTE_PENTAX_TAG_ZOOM		= 0x000a,
+	MNOTE_PENTAX_TAG_SHARPNESS	= 0x000b,
+	MNOTE_PENTAX_TAG_CONTRAST	= 0x000c,
+	MNOTE_PENTAX_TAG_SATURATION	= 0x000d,
+	MNOTE_PENTAX_TAG_UNKNOWN_14	= 0x000e,
+	MNOTE_PENTAX_TAG_UNKNOWN_15	= 0x000f,
+	MNOTE_PENTAX_TAG_UNKNOWN_16	= 0x0010,
+	MNOTE_PENTAX_TAG_UNKNOWN_17	= 0x0011,
+	MNOTE_PENTAX_TAG_UNKNOWN_18	= 0x0012,
+	MNOTE_PENTAX_TAG_UNKNOWN_19	= 0x0013,
+	MNOTE_PENTAX_TAG_ISO_SPEED	= 0x0014,
+	MNOTE_PENTAX_TAG_UNKNOWN_21	= 0x0015,
+	MNOTE_PENTAX_TAG_COLOR		= 0x0017,
+	MNOTE_PENTAX_TAG_UNKNOWN_24	= 0x0018,
+	MNOTE_PENTAX_TAG_UNKNOWN_25	= 0x0019,
+	MNOTE_PENTAX_TAG_PRINTIM	= 0x0e00,
+	MNOTE_PENTAX_TAG_TZ_CITY	= 0x1000,
+	MNOTE_PENTAX_TAG_TZ_DST		= 0x1001
+};
+typedef enum _MnotePentaxTag MnotePentaxTag;
+
+const char *mnote_pentax_tag_get_name        (MnotePentaxTag tag);
+const char *mnote_pentax_tag_get_title       (MnotePentaxTag tag);
+const char *mnote_pentax_tag_get_description (MnotePentaxTag tag);
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* __MNOTE_PENTAX_TAG_H__ */
diff --git a/src/libjasper/base/jas_cm.c b/src/libjasper/base/jas_cm.c
new file mode 100644
index 0000000..efeec99
--- /dev/null
+++ b/src/libjasper/base/jas_cm.c
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (c) 2002-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Color Management
+ *
+ * $Id: jas_cm.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#include <jasper/jas_config.h>
+#include <math.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <jasper/jas_cm.h>
+#include <jasper/jas_icc.h>
+#include <jasper/jas_init.h>
+#include <jasper/jas_stream.h>
+#include <jasper/jas_malloc.h>
+#include <jasper/jas_math.h>
+
+static jas_cmprof_t *jas_cmprof_create(void);
+static void jas_cmshapmatlut_cleanup(jas_cmshapmatlut_t *);
+static jas_cmreal_t jas_cmshapmatlut_lookup(jas_cmshapmatlut_t *lut, jas_cmreal_t x);
+
+static void jas_cmpxform_destroy(jas_cmpxform_t *pxform);
+static jas_cmpxform_t *jas_cmpxform_copy(jas_cmpxform_t *pxform);
+
+static void jas_cmshapmat_destroy(jas_cmpxform_t *pxform);
+static int jas_cmshapmat_apply(jas_cmpxform_t *pxform, jas_cmreal_t *in,
+  jas_cmreal_t *out, int cnt);
+
+static int jas_cmputint(long **bufptr, int sgnd, int prec, long val);
+static int jas_cmgetint(long **bufptr, int sgnd, int prec, long *val);
+static int jas_cmpxformseq_append(jas_cmpxformseq_t *pxformseq,
+  jas_cmpxformseq_t *othpxformseq);
+static int jas_cmpxformseq_appendcnvt(jas_cmpxformseq_t *pxformseq,
+  int, int);
+static int jas_cmpxformseq_resize(jas_cmpxformseq_t *pxformseq, int n);
+
+static int mono(jas_iccprof_t *prof, int op, jas_cmpxformseq_t **pxformseq);
+static int triclr(jas_iccprof_t *prof, int op, jas_cmpxformseq_t **retpxformseq);
+
+static void jas_cmpxformseq_destroy(jas_cmpxformseq_t *pxformseq);
+static int jas_cmpxformseq_delete(jas_cmpxformseq_t *pxformseq, int i);
+static jas_cmpxformseq_t *jas_cmpxformseq_create(void);
+static jas_cmpxformseq_t *jas_cmpxformseq_copy(jas_cmpxformseq_t *pxformseq);
+static int jas_cmshapmat_invmat(jas_cmreal_t out[3][4], jas_cmreal_t in[3][4]);
+static int jas_cmpxformseq_insertpxform(jas_cmpxformseq_t *pxformseq,
+  int i, jas_cmpxform_t *pxform);
+
+#define	SEQFWD(intent)	(intent)
+#define	SEQREV(intent)	(4 + (intent))
+#define	SEQSIM(intent)	(8 + (intent))
+#define	SEQGAM		12
+
+#define fwdpxformseq(prof, intent) \
+  (((prof)->pxformseqs[SEQFWD(intent)]) ? \
+  ((prof)->pxformseqs[SEQFWD(intent)]) : \
+  ((prof)->pxformseqs[SEQFWD(0)]))
+
+#define revpxformseq(prof, intent) \
+  (((prof)->pxformseqs[SEQREV(intent)]) ? \
+  ((prof)->pxformseqs[SEQREV(intent)]) : \
+  ((prof)->pxformseqs[SEQREV(0)]))
+
+#define simpxformseq(prof, intent) \
+  (((prof)->pxformseqs[SEQSIM(intent)]) ? \
+  ((prof)->pxformseqs[SEQSIM(intent)]) : \
+  ((prof)->pxformseqs[SEQSIM(0)]))
+
+#define gampxformseq(prof)	((prof)->pxformseqs[SEQGAM])
+
+static int icctoclrspc(int iccclrspc, int refflag);
+static jas_cmpxform_t *jas_cmpxform_create0(void);
+static jas_cmpxform_t *jas_cmpxform_createshapmat(void);
+static void jas_cmshapmatlut_init(jas_cmshapmatlut_t *lut);
+static int jas_cmshapmatlut_set(jas_cmshapmatlut_t *lut, jas_icccurv_t *curv);
+
+static jas_cmpxformops_t shapmat_ops = {jas_cmshapmat_destroy, jas_cmshapmat_apply, 0};
+static jas_cmprof_t *jas_cmprof_createsycc(void);
+
+/******************************************************************************\
+* Color profile class.
+\******************************************************************************/
+
+jas_cmprof_t *jas_cmprof_createfromclrspc(int clrspc)
+{
+	jas_iccprof_t *iccprof;
+	jas_cmprof_t *prof;
+
+	iccprof = 0;
+	prof = 0;
+	switch (clrspc) {
+	case JAS_CLRSPC_SYCBCR:
+		if (!(prof = jas_cmprof_createsycc()))
+			goto error;
+		break;
+	default:
+		if (!(iccprof = jas_iccprof_createfromclrspc(clrspc)))
+			goto error;
+		if (!(prof = jas_cmprof_createfromiccprof(iccprof)))
+			goto error;
+		jas_iccprof_destroy(iccprof);
+		iccprof = 0;
+		if (!jas_clrspc_isgeneric(clrspc))
+			prof->clrspc = clrspc;
+		break;
+	}
+	return prof;
+error:
+	if (iccprof)
+		jas_iccprof_destroy(iccprof);
+	return 0;
+}
+
+static jas_cmprof_t *jas_cmprof_createsycc()
+{
+	jas_cmprof_t *prof;
+	jas_cmpxform_t *fwdpxform;
+	jas_cmpxform_t *revpxform;
+	jas_cmshapmat_t *fwdshapmat;
+	jas_cmshapmat_t *revshapmat;
+	int i;
+	int j;
+
+	if (!(prof = jas_cmprof_createfromclrspc(JAS_CLRSPC_SRGB)))
+		goto error;
+	prof->clrspc = JAS_CLRSPC_SYCBCR;
+	assert(prof->numchans == 3 && prof->numrefchans == 3);
+	assert(prof->refclrspc == JAS_CLRSPC_CIEXYZ);
+	if (!(fwdpxform = jas_cmpxform_createshapmat()))
+		goto error;
+	fwdpxform->numinchans = 3;
+	fwdpxform->numoutchans = 3;
+	fwdshapmat = &fwdpxform->data.shapmat;
+	fwdshapmat->mono = 0;
+	fwdshapmat->order = 0;
+	fwdshapmat->useluts = 0;
+	fwdshapmat->usemat = 1;
+	fwdshapmat->mat[0][0] = 1.0;
+	fwdshapmat->mat[0][1] = 0.0;
+	fwdshapmat->mat[0][2] = 1.402;
+	fwdshapmat->mat[1][0] = 1.0;
+	fwdshapmat->mat[1][1] = -0.34413;
+	fwdshapmat->mat[1][2] = -0.71414;
+	fwdshapmat->mat[2][0] = 1.0;
+	fwdshapmat->mat[2][1] = 1.772;
+	fwdshapmat->mat[2][2] = 0.0;
+	fwdshapmat->mat[0][3] = -0.5 * (1.402);
+	fwdshapmat->mat[1][3] = -0.5 * (-0.34413 - 0.71414);
+	fwdshapmat->mat[2][3] = -0.5 * (1.772);
+	if (!(revpxform = jas_cmpxform_createshapmat()))
+		goto error;
+	revpxform->numinchans = 3;
+	revpxform->numoutchans = 3;
+	revshapmat = &revpxform->data.shapmat;
+	revshapmat->mono = 0;
+	revshapmat->order = 1;
+	revshapmat->useluts = 0;
+	revshapmat->usemat = 1;
+	jas_cmshapmat_invmat(revshapmat->mat, fwdshapmat->mat);
+
+	for (i = 0; i < JAS_CMXFORM_NUMINTENTS; ++i) {
+		j = SEQFWD(i);
+		if (prof->pxformseqs[j]) {
+			if (jas_cmpxformseq_insertpxform(prof->pxformseqs[j], 0,
+			  fwdpxform))
+				goto error;
+		}
+		j = SEQREV(i);
+		if (prof->pxformseqs[j]) {
+			if (jas_cmpxformseq_insertpxform(prof->pxformseqs[j],
+			  -1, revpxform))
+				goto error;
+		}
+	}
+
+	jas_cmpxform_destroy(fwdpxform);
+	jas_cmpxform_destroy(revpxform);
+	return prof;
+error:
+	return 0;
+}
+
+jas_cmprof_t *jas_cmprof_createfromiccprof(jas_iccprof_t *iccprof)
+{
+	jas_cmprof_t *prof;
+	jas_icchdr_t icchdr;
+	jas_cmpxformseq_t *fwdpxformseq;
+	jas_cmpxformseq_t *revpxformseq;
+
+	prof = 0;
+	fwdpxformseq = 0;
+	revpxformseq = 0;
+
+	if (!(prof = jas_cmprof_create()))
+		goto error;
+	jas_iccprof_gethdr(iccprof, &icchdr);
+	if (!(prof->iccprof = jas_iccprof_copy(iccprof)))
+		goto error;
+	prof->clrspc = icctoclrspc(icchdr.colorspc, 0);
+	prof->refclrspc = icctoclrspc(icchdr.refcolorspc, 1);
+	prof->numchans = jas_clrspc_numchans(prof->clrspc);
+	prof->numrefchans = jas_clrspc_numchans(prof->refclrspc);
+
+	if (prof->numchans == 1) {
+		if (mono(prof->iccprof, 0, &fwdpxformseq))
+			goto error;
+		if (mono(prof->iccprof, 1, &revpxformseq))
+			goto error;
+	} else if (prof->numchans == 3) {
+		if (triclr(prof->iccprof, 0, &fwdpxformseq))
+			goto error;
+		if (triclr(prof->iccprof, 1, &revpxformseq))
+			goto error;
+	}
+	prof->pxformseqs[SEQFWD(0)] = fwdpxformseq;
+	prof->pxformseqs[SEQREV(0)] = revpxformseq;
+
+#if 0
+	if (prof->numchans > 1) {
+		lut(prof->iccprof, 0, PER, &pxformseq);
+		pxformseqs_set(prof, SEQFWD(PER), pxformseq);
+		lut(prof->iccprof, 1, PER, &pxformseq);
+		pxformseqs_set(prof, SEQREV(PER), pxformseq);
+		lut(prof->iccprof, 0, CLR, &pxformseq);
+		pxformseqs_set(prof, SEQREV(CLR), pxformseq);
+		lut(prof->iccprof, 1, CLR, &pxformseq);
+		pxformseqs_set(prof, SEQREV(CLR), pxformseq);
+		lut(prof->iccprof, 0, SAT, &pxformseq);
+		pxformseqs_set(prof, SEQREV(SAT), pxformseq);
+		lut(prof->iccprof, 1, SAT, &pxformseq);
+		pxformseqs_set(prof, SEQREV(SAT), pxformseq);
+	}
+#endif
+
+	return prof;
+
+error:
+	if (fwdpxformseq) {
+		jas_cmpxformseq_destroy(fwdpxformseq);
+	}
+	if (revpxformseq) {
+		jas_cmpxformseq_destroy(revpxformseq);
+	}
+	if (prof) {
+		jas_cmprof_destroy(prof);
+	}
+
+	return 0;
+}
+
+static jas_cmprof_t *jas_cmprof_create()
+{
+	int i;
+	jas_cmprof_t *prof;
+	if (!(prof = jas_malloc(sizeof(jas_cmprof_t))))
+		return 0;
+	memset(prof, 0, sizeof(jas_cmprof_t));
+	prof->iccprof = 0;
+	for (i = 0; i < JAS_CMPROF_NUMPXFORMSEQS; ++i)
+		prof->pxformseqs[i] = 0;
+	return prof;
+}
+
+void jas_cmprof_destroy(jas_cmprof_t *prof)
+{ 
+	int i;
+	for (i = 0; i < JAS_CMPROF_NUMPXFORMSEQS; ++i) {
+		if (prof->pxformseqs[i]) {
+			jas_cmpxformseq_destroy(prof->pxformseqs[i]);
+			prof->pxformseqs[i] = 0;
+		}
+	}
+	if (prof->iccprof)
+		jas_iccprof_destroy(prof->iccprof);
+	jas_free(prof);
+}
+
+jas_cmprof_t *jas_cmprof_copy(jas_cmprof_t *prof)
+{
+	jas_cmprof_t *newprof;
+	int i;
+
+	if (!(newprof = jas_cmprof_create()))
+		goto error;
+	newprof->clrspc = prof->clrspc;
+	newprof->numchans = prof->numchans;
+	newprof->refclrspc = prof->refclrspc;
+	newprof->numrefchans = prof->numrefchans;
+	newprof->iccprof = jas_iccprof_copy(prof->iccprof);
+	for (i = 0; i < JAS_CMPROF_NUMPXFORMSEQS; ++i) {
+		if (prof->pxformseqs[i]) {
+			if (!(newprof->pxformseqs[i] = jas_cmpxformseq_copy(prof->pxformseqs[i])))
+				goto error;
+		}
+	}
+	return newprof;
+error:
+	return 0;
+}
+
+/******************************************************************************\
+* Transform class.
+\******************************************************************************/
+
+jas_cmxform_t *jas_cmxform_create(jas_cmprof_t *inprof, jas_cmprof_t *outprof,
+  jas_cmprof_t *prfprof, int op, int intent, int optimize)
+{
+	jas_cmxform_t *xform;
+	jas_cmpxformseq_t *inpxformseq;
+	jas_cmpxformseq_t *outpxformseq;
+	jas_cmpxformseq_t *altoutpxformseq;
+	jas_cmpxformseq_t *prfpxformseq;
+	int prfintent;
+
+	/* Avoid compiler warnings about unused parameters. */
+	optimize = 0;
+
+	prfintent = intent;
+
+	if (!(xform = jas_malloc(sizeof(jas_cmxform_t))))
+		goto error;
+	if (!(xform->pxformseq = jas_cmpxformseq_create()))
+		goto error;
+
+	switch (op) {
+	case JAS_CMXFORM_OP_FWD:
+		inpxformseq = fwdpxformseq(inprof, intent);
+		outpxformseq = revpxformseq(outprof, intent);
+		if (!inpxformseq || !outpxformseq)
+			goto error;
+		if (jas_cmpxformseq_append(xform->pxformseq, inpxformseq) ||
+		  jas_cmpxformseq_appendcnvt(xform->pxformseq,
+		  inprof->refclrspc, outprof->refclrspc) ||
+		  jas_cmpxformseq_append(xform->pxformseq, outpxformseq))
+			goto error;
+		xform->numinchans = jas_clrspc_numchans(inprof->clrspc);
+		xform->numoutchans = jas_clrspc_numchans(outprof->clrspc);
+		break;
+	case JAS_CMXFORM_OP_REV:
+		outpxformseq = fwdpxformseq(outprof, intent);
+		inpxformseq = revpxformseq(inprof, intent);
+		if (!outpxformseq || !inpxformseq)
+			goto error;
+		if (jas_cmpxformseq_append(xform->pxformseq, outpxformseq) ||
+		  jas_cmpxformseq_appendcnvt(xform->pxformseq,
+		  outprof->refclrspc, inprof->refclrspc) ||
+		  jas_cmpxformseq_append(xform->pxformseq, inpxformseq))
+			goto error;
+		xform->numinchans = jas_clrspc_numchans(outprof->clrspc);
+		xform->numoutchans = jas_clrspc_numchans(inprof->clrspc);
+		break;
+	case JAS_CMXFORM_OP_PROOF:
+		assert(prfprof);
+		inpxformseq = fwdpxformseq(inprof, intent);
+		prfpxformseq = fwdpxformseq(prfprof, prfintent);
+		if (!inpxformseq || !prfpxformseq)
+			goto error;
+		outpxformseq = simpxformseq(outprof, intent);
+		altoutpxformseq = 0;
+		if (!outpxformseq) {
+			outpxformseq = revpxformseq(outprof, intent);
+			altoutpxformseq = fwdpxformseq(outprof, intent);
+			if (!outpxformseq || !altoutpxformseq)
+				goto error;
+		}
+		if (jas_cmpxformseq_append(xform->pxformseq, inpxformseq) ||
+		  jas_cmpxformseq_appendcnvt(xform->pxformseq,
+		  inprof->refclrspc, outprof->refclrspc))
+			goto error;
+		if (altoutpxformseq) {
+			if (jas_cmpxformseq_append(xform->pxformseq, outpxformseq) ||
+			  jas_cmpxformseq_append(xform->pxformseq, altoutpxformseq))
+				goto error;
+		} else {
+			if (jas_cmpxformseq_append(xform->pxformseq, outpxformseq))
+				goto error;
+		}
+		if (jas_cmpxformseq_appendcnvt(xform->pxformseq,
+		  outprof->refclrspc, inprof->refclrspc) ||
+		  jas_cmpxformseq_append(xform->pxformseq, prfpxformseq))
+			goto error;
+		xform->numinchans = jas_clrspc_numchans(inprof->clrspc);
+		xform->numoutchans = jas_clrspc_numchans(prfprof->clrspc);
+		break;
+	case JAS_CMXFORM_OP_GAMUT:
+		inpxformseq = fwdpxformseq(inprof, intent);
+		outpxformseq = gampxformseq(outprof);
+		if (!inpxformseq || !outpxformseq)
+			goto error;
+		if (jas_cmpxformseq_append(xform->pxformseq, inpxformseq) ||
+		  jas_cmpxformseq_appendcnvt(xform->pxformseq,
+		  inprof->refclrspc, outprof->refclrspc) ||
+		  jas_cmpxformseq_append(xform->pxformseq, outpxformseq))
+			goto error;
+		xform->numinchans = jas_clrspc_numchans(inprof->clrspc);
+		xform->numoutchans = 1;
+		break;
+	}
+	return xform;
+error:
+	return 0;
+}
+
+#define	APPLYBUFSIZ	2048
+int jas_cmxform_apply(jas_cmxform_t *xform, jas_cmpixmap_t *in, jas_cmpixmap_t *out)
+{
+	jas_cmcmptfmt_t *fmt;
+	jas_cmreal_t buf[2][APPLYBUFSIZ];
+	jas_cmpxformseq_t *pxformseq;
+	int i;
+	int j;
+	int width;
+	int height;
+	int total;
+	int n;
+	jas_cmreal_t *inbuf;
+	jas_cmreal_t *outbuf;
+	jas_cmpxform_t *pxform;
+	long *dataptr;
+	int maxchans;
+	int bufmax;
+	int m;
+	int bias;
+	jas_cmreal_t scale;
+	long v;
+	jas_cmreal_t *bufptr;
+
+	if (xform->numinchans > in->numcmpts || xform->numoutchans > out->numcmpts)
+		goto error;
+
+	fmt = &in->cmptfmts[0];
+	width = fmt->width;
+	height = fmt->height;
+	for (i = 1; i < xform->numinchans; ++i) {
+		fmt = &in->cmptfmts[i];
+		if (fmt->width != width || fmt->height != height) {
+			goto error;
+		}
+	}
+	for (i = 0; i < xform->numoutchans; ++i) {
+		fmt = &out->cmptfmts[i];
+		if (fmt->width != width || fmt->height != height) {
+			goto error;
+		}
+	}
+
+	maxchans = 0;
+	pxformseq = xform->pxformseq;
+	for (i = 0; i < pxformseq->numpxforms; ++i) {
+		pxform = pxformseq->pxforms[i];
+		if (pxform->numinchans > maxchans) {
+			maxchans = pxform->numinchans;
+		}
+		if (pxform->numoutchans > maxchans) {
+			maxchans = pxform->numoutchans;
+		}
+	}
+	bufmax = APPLYBUFSIZ / maxchans;
+	assert(bufmax > 0);
+
+	total = width * height;
+	n = 0;
+	while (n < total) {
+
+		inbuf = &buf[0][0];
+		m = JAS_MIN(total - n, bufmax);
+
+		for (i = 0; i < xform->numinchans; ++i) {
+			fmt = &in->cmptfmts[i];
+			scale = (double)((1 << fmt->prec) - 1);
+			bias = fmt->sgnd ? (1 << (fmt->prec - 1)) : 0;
+			dataptr = &fmt->buf[n];
+			bufptr = &inbuf[i];
+			for (j = 0; j < m; ++j) {
+				if (jas_cmgetint(&dataptr, fmt->sgnd, fmt->prec, &v))
+					goto error;
+				*bufptr = (v - bias) / scale;
+				bufptr += xform->numinchans;
+			}
+		}
+
+		inbuf = &buf[0][0];
+		outbuf = inbuf;
+		for (i = 0; i < pxformseq->numpxforms; ++i) {
+			pxform = pxformseq->pxforms[i];
+			if (pxform->numoutchans > pxform->numinchans) {
+				outbuf = (inbuf == &buf[0][0]) ? &buf[1][0] : &buf[0][0];
+			} else {
+				outbuf = inbuf;
+			}
+			if ((*pxform->ops->apply)(pxform, inbuf, outbuf, m))
+				goto error;
+			inbuf = outbuf;
+		}
+
+		for (i = 0; i < xform->numoutchans; ++i) {
+			fmt = &out->cmptfmts[i];
+			scale = (double)((1 << fmt->prec) - 1);
+			bias = fmt->sgnd ? (1 << (fmt->prec - 1)) : 0;
+			bufptr = &outbuf[i];
+			dataptr = &fmt->buf[n];
+			for (j = 0; j < m; ++j) {
+				v = (*bufptr) * scale + bias;
+				bufptr += xform->numoutchans;
+				if (jas_cmputint(&dataptr, fmt->sgnd, fmt->prec, v))
+					goto error;
+			}
+		}
+	
+		n += m;
+	}
+	
+	return 0;
+error:
+	return -1;
+}
+
+void jas_cmxform_destroy(jas_cmxform_t *xform)
+{
+	if (xform->pxformseq)
+		jas_cmpxformseq_destroy(xform->pxformseq);
+	jas_free(xform);
+}
+
+/******************************************************************************\
+* Primitive transform sequence class.
+\******************************************************************************/
+
+static jas_cmpxformseq_t *jas_cmpxformseq_create()
+{
+	jas_cmpxformseq_t *pxformseq;
+	pxformseq = 0;
+	if (!(pxformseq = jas_malloc(sizeof(jas_cmpxformseq_t))))
+		goto error;
+	pxformseq->pxforms = 0;
+	pxformseq->numpxforms = 0;
+	pxformseq->maxpxforms = 0;
+	if (jas_cmpxformseq_resize(pxformseq, 16))
+		goto error;
+	return pxformseq;
+error:
+	if (pxformseq)
+		jas_cmpxformseq_destroy(pxformseq);
+	return 0;
+}
+
+static jas_cmpxformseq_t *jas_cmpxformseq_copy(jas_cmpxformseq_t *pxformseq)
+{
+	jas_cmpxformseq_t *newpxformseq;
+
+	if (!(newpxformseq = jas_cmpxformseq_create()))
+		goto error;
+	if (jas_cmpxformseq_append(newpxformseq, pxformseq))
+		goto error;
+	return newpxformseq;
+error:
+	return 0;
+}
+
+static void jas_cmpxformseq_destroy(jas_cmpxformseq_t *pxformseq)
+{
+	while (pxformseq->numpxforms > 0)
+		jas_cmpxformseq_delete(pxformseq, pxformseq->numpxforms - 1);
+	if (pxformseq->pxforms)
+		jas_free(pxformseq->pxforms);
+	jas_free(pxformseq);
+}
+
+static int jas_cmpxformseq_delete(jas_cmpxformseq_t *pxformseq, int i)
+{
+	assert(i >= 0 && i < pxformseq->numpxforms);
+	if (i != pxformseq->numpxforms - 1)
+		abort();
+	jas_cmpxform_destroy(pxformseq->pxforms[i]);
+	pxformseq->pxforms[i] = 0;
+	--pxformseq->numpxforms;
+	return 0;
+}
+
+static int jas_cmpxformseq_appendcnvt(jas_cmpxformseq_t *pxformseq,
+  int dstclrspc, int srcclrspc)
+{
+	if (dstclrspc == srcclrspc)
+		return 0;
+	abort();
+	/* Avoid compiler warnings about unused parameters. */
+	pxformseq = 0;
+	return -1;
+}
+
+static int jas_cmpxformseq_insertpxform(jas_cmpxformseq_t *pxformseq,
+  int i, jas_cmpxform_t *pxform)
+{
+	jas_cmpxform_t *tmppxform;
+	int n;
+	if (i < 0)
+		i = pxformseq->numpxforms;
+	assert(i >= 0 && i <= pxformseq->numpxforms);
+	if (pxformseq->numpxforms >= pxformseq->maxpxforms) {
+		if (jas_cmpxformseq_resize(pxformseq, pxformseq->numpxforms +
+		  16))
+			goto error;
+	}
+	assert(pxformseq->numpxforms < pxformseq->maxpxforms);
+	if (!(tmppxform = jas_cmpxform_copy(pxform)))
+		goto error;
+	n = pxformseq->numpxforms - i;
+	if (n > 0) {
+		memmove(&pxformseq->pxforms[i + 1], &pxformseq->pxforms[i],
+		  n * sizeof(jas_cmpxform_t *));
+	}
+	pxformseq->pxforms[i] = tmppxform;
+	++pxformseq->numpxforms;
+	return 0;
+error:
+	return -1;
+}
+
+static int jas_cmpxformseq_append(jas_cmpxformseq_t *pxformseq,
+  jas_cmpxformseq_t *othpxformseq)
+{
+	int n;
+	int i;
+	jas_cmpxform_t *pxform;
+	jas_cmpxform_t *othpxform;
+	n = pxformseq->numpxforms + othpxformseq->numpxforms;
+	if (n > pxformseq->maxpxforms) {
+		if (jas_cmpxformseq_resize(pxformseq, n))
+			goto error;
+	}
+	for (i = 0; i < othpxformseq->numpxforms; ++i) {
+		othpxform = othpxformseq->pxforms[i];
+		if (!(pxform = jas_cmpxform_copy(othpxform)))
+			goto error;
+		pxformseq->pxforms[pxformseq->numpxforms] = pxform;
+		++pxformseq->numpxforms;
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static int jas_cmpxformseq_resize(jas_cmpxformseq_t *pxformseq, int n)
+{
+	jas_cmpxform_t **p;
+	assert(n >= pxformseq->numpxforms);
+	p = (!pxformseq->pxforms) ? jas_malloc(n * sizeof(jas_cmpxform_t *)) :
+	  jas_realloc(pxformseq->pxforms, n * sizeof(jas_cmpxform_t *));
+	if (!p) {
+		return -1;
+	}
+	pxformseq->pxforms = p;
+	pxformseq->maxpxforms = n;
+	return 0;
+}
+
+/******************************************************************************\
+* Primitive transform class.
+\******************************************************************************/
+
+static jas_cmpxform_t *jas_cmpxform_create0()
+{
+	jas_cmpxform_t *pxform;
+	if (!(pxform = jas_malloc(sizeof(jas_cmpxform_t))))
+		return 0;
+	memset(pxform, 0, sizeof(jas_cmpxform_t));
+	pxform->refcnt = 0;
+	pxform->ops = 0;
+	return pxform;
+}
+
+static void jas_cmpxform_destroy(jas_cmpxform_t *pxform)
+{
+	if (--pxform->refcnt <= 0) {
+		(*pxform->ops->destroy)(pxform);
+		jas_free(pxform);
+	}
+}
+
+static jas_cmpxform_t *jas_cmpxform_copy(jas_cmpxform_t *pxform)
+{
+	++pxform->refcnt;
+	return pxform;
+}
+
+/******************************************************************************\
+* Shaper matrix class.
+\******************************************************************************/
+
+static jas_cmpxform_t *jas_cmpxform_createshapmat()
+{
+	int i;
+	int j;
+	jas_cmpxform_t *pxform;
+	jas_cmshapmat_t *shapmat;
+	if (!(pxform = jas_cmpxform_create0()))
+		return 0;
+	pxform->ops = &shapmat_ops;
+	shapmat = &pxform->data.shapmat;
+	shapmat->mono = 0;
+	shapmat->order = 0;
+	shapmat->useluts = 0;
+	shapmat->usemat = 0;
+	for (i = 0; i < 3; ++i)
+		jas_cmshapmatlut_init(&shapmat->luts[i]);
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 4; ++j)
+			shapmat->mat[i][j] = 0.0;
+	}
+	++pxform->refcnt;
+	return pxform;
+}
+
+static void jas_cmshapmat_destroy(jas_cmpxform_t *pxform)
+{
+	jas_cmshapmat_t *shapmat = &pxform->data.shapmat;
+	int i;
+	for (i = 0; i < 3; ++i)
+		jas_cmshapmatlut_cleanup(&shapmat->luts[i]);
+}
+
+static int jas_cmshapmat_apply(jas_cmpxform_t *pxform, jas_cmreal_t *in,
+  jas_cmreal_t *out, int cnt)
+{
+	jas_cmshapmat_t *shapmat = &pxform->data.shapmat;
+	jas_cmreal_t *src;
+	jas_cmreal_t *dst;
+	jas_cmreal_t a0;
+	jas_cmreal_t a1;
+	jas_cmreal_t a2;
+	jas_cmreal_t b0;
+	jas_cmreal_t b1;
+	jas_cmreal_t b2;
+	src = in;
+	dst = out;
+	if (!shapmat->mono) {
+		while (--cnt >= 0) {
+			a0 = *src++;
+			a1 = *src++;
+			a2 = *src++;
+			if (!shapmat->order && shapmat->useluts) {
+				a0 = jas_cmshapmatlut_lookup(&shapmat->luts[0], a0);
+				a1 = jas_cmshapmatlut_lookup(&shapmat->luts[1], a1);
+				a2 = jas_cmshapmatlut_lookup(&shapmat->luts[2], a2);
+			}
+			if (shapmat->usemat) {
+				b0 = shapmat->mat[0][0] * a0
+				  + shapmat->mat[0][1] * a1
+				  + shapmat->mat[0][2] * a2
+				  + shapmat->mat[0][3];
+				b1 = shapmat->mat[1][0] * a0
+				  + shapmat->mat[1][1] * a1
+				  + shapmat->mat[1][2] * a2
+				  + shapmat->mat[1][3];
+				b2 = shapmat->mat[2][0] * a0
+				  + shapmat->mat[2][1] * a1
+				  + shapmat->mat[2][2] * a2
+				  + shapmat->mat[2][3];
+				a0 = b0;
+				a1 = b1;
+				a2 = b2;
+			}
+			if (shapmat->order && shapmat->useluts) {
+				a0 = jas_cmshapmatlut_lookup(&shapmat->luts[0], a0);
+				a1 = jas_cmshapmatlut_lookup(&shapmat->luts[1], a1);
+				a2 = jas_cmshapmatlut_lookup(&shapmat->luts[2], a2);
+			}
+			*dst++ = a0;
+			*dst++ = a1;
+			*dst++ = a2;
+		}
+	} else {
+		if (!shapmat->order) {
+			while (--cnt >= 0) {
+				a0 = *src++;
+				if (shapmat->useluts)
+					a0 = jas_cmshapmatlut_lookup(&shapmat->luts[0], a0);
+				a2 = a0 * shapmat->mat[2][0];
+				a1 = a0 * shapmat->mat[1][0];
+				a0 = a0 * shapmat->mat[0][0];
+				*dst++ = a0;
+				*dst++ = a1;
+				*dst++ = a2;
+			}
+		} else {
+assert(0);
+			while (--cnt >= 0) {
+				a0 = *src++;
+				src++;
+				src++;
+				a0 = a0 * shapmat->mat[0][0];
+				if (shapmat->useluts)
+					a0 = jas_cmshapmatlut_lookup(&shapmat->luts[0], a0);
+				*dst++ = a0;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void jas_cmshapmatlut_init(jas_cmshapmatlut_t *lut)
+{
+	lut->data = 0;
+	lut->size = 0;
+}
+
+static void jas_cmshapmatlut_cleanup(jas_cmshapmatlut_t *lut)
+{
+	if (lut->data) {
+		jas_free(lut->data);
+		lut->data = 0;
+	}
+	lut->size = 0;
+}
+
+static double gammafn(double x, double gamma)
+{
+	if (x == 0.0)
+		return 0.0;
+	return pow(x, gamma);
+}
+
+static int jas_cmshapmatlut_set(jas_cmshapmatlut_t *lut, jas_icccurv_t *curv)
+{
+	jas_cmreal_t gamma;
+	int i;
+	gamma = 0;
+	jas_cmshapmatlut_cleanup(lut);
+	if (curv->numents == 0) {
+		lut->size = 2;
+		if (!(lut->data = jas_malloc(lut->size * sizeof(jas_cmreal_t))))
+			goto error;
+		lut->data[0] = 0.0;
+		lut->data[1] = 1.0;
+	} else if (curv->numents == 1) {
+		lut->size = 256;
+		if (!(lut->data = jas_malloc(lut->size * sizeof(jas_cmreal_t))))
+			goto error;
+		gamma = curv->ents[0] / 256.0;
+		for (i = 0; i < lut->size; ++i) {
+			lut->data[i] = gammafn(i / (double) (lut->size - 1), gamma);
+		}
+	} else {
+		lut->size = curv->numents;
+		if (!(lut->data = jas_malloc(lut->size * sizeof(jas_cmreal_t))))
+			goto error;
+		for (i = 0; i < lut->size; ++i) {
+			lut->data[i] = curv->ents[i] / 65535.0;
+		}
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static jas_cmreal_t jas_cmshapmatlut_lookup(jas_cmshapmatlut_t *lut, jas_cmreal_t x)
+{
+	jas_cmreal_t t;
+	int lo;
+	int hi;
+	t = x * (lut->size - 1);
+	lo = floor(t);
+	if (lo < 0)
+		return lut->data[0];
+	hi = ceil(t);
+	if (hi >= lut->size)
+		return lut->data[lut->size - 1];
+	return lut->data[lo] + (t - lo) * (lut->data[hi] - lut->data[lo]);
+}
+
+static int jas_cmshapmatlut_invert(jas_cmshapmatlut_t *invlut,
+  jas_cmshapmatlut_t *lut, int n)
+{
+	int i;
+	int j;
+	int k;
+	jas_cmreal_t ax;
+	jas_cmreal_t ay;
+	jas_cmreal_t bx;
+	jas_cmreal_t by;
+	jas_cmreal_t sx;
+	jas_cmreal_t sy;
+	assert(n >= 2);
+	if (invlut->data) {
+		jas_free(invlut->data);
+		invlut->data = 0;
+	}
+	/* The sample values should be nondecreasing. */
+	for (i = 1; i < lut->size; ++i) {
+		if (lut->data[i - 1] > lut->data[i]) {
+			assert(0);
+			return -1;
+		}
+	}
+	if (!(invlut->data = jas_malloc(n * sizeof(jas_cmreal_t))))
+		return -1;
+	invlut->size = n;
+	for (i = 0; i < invlut->size; ++i) {
+		sy = ((double) i) / (invlut->size - 1);
+		sx = 1.0;
+		for (j = 0; j < lut->size; ++j) {
+			ay = lut->data[j];
+			if (sy == ay) {
+				for (k = j + 1; k < lut->size; ++k) {
+					by = lut->data[k];
+					if (by != sy)
+						break;
+#if 0
+assert(0);
+#endif
+				}
+				if (k < lut->size) {
+					--k;
+					ax = ((double) j) / (lut->size - 1);
+					bx = ((double) k) / (lut->size - 1);
+					sx = (ax + bx) / 2.0;
+				}
+				break;
+			}
+			if (j < lut->size - 1) {
+				by = lut->data[j + 1];
+				if (sy > ay && sy < by) {
+					ax = ((double) j) / (lut->size - 1);
+					bx = ((double) j + 1) / (lut->size - 1);
+					sx = ax +
+					  (sy - ay) / (by - ay) * (bx - ax);
+					break;
+				}
+			}
+		}
+		invlut->data[i] = sx;
+	}
+#if 0
+for (i=0;i<lut->size;++i)
+	jas_eprintf("lut[%d]=%f ", i, lut->data[i]);
+for (i=0;i<invlut->size;++i)
+	jas_eprintf("invlut[%d]=%f ", i, invlut->data[i]);
+#endif
+	return 0;
+}
+
+static int jas_cmshapmat_invmat(jas_cmreal_t out[3][4], jas_cmreal_t in[3][4])
+{
+	jas_cmreal_t d;
+	d = in[0][0] * (in[1][1] * in[2][2] - in[1][2] * in[2][1])
+	  - in[0][1] * (in[1][0] * in[2][2] - in[1][2] * in[2][0])
+	  + in[0][2] * (in[1][0] * in[2][1] - in[1][1] * in[2][0]);
+#if 0
+jas_eprintf("delta=%f\n", d);
+#endif
+	if (JAS_ABS(d) < 1e-6)
+		return -1;
+	out[0][0] = (in[1][1] * in[2][2] - in[1][2] * in[2][1]) / d;
+	out[1][0] = -(in[1][0] * in[2][2] - in[1][2] * in[2][0]) / d;
+	out[2][0] = (in[1][0] * in[2][1] - in[1][1] * in[2][0]) / d;
+	out[0][1] = -(in[0][1] * in[2][2] - in[0][2] * in[2][1]) / d;
+	out[1][1] = (in[0][0] * in[2][2] - in[0][2] * in[2][0]) / d;
+	out[2][1] = -(in[0][0] * in[2][1] - in[0][1] * in[2][0]) / d;
+	out[0][2] = (in[0][1] * in[1][2] - in[0][2] * in[1][1]) / d;
+	out[1][2] = -(in[0][0] * in[1][2] - in[1][0] * in[0][2]) / d;
+	out[2][2] = (in[0][0] * in[1][1] - in[0][1] * in[1][0]) / d;
+	out[0][3] = -in[0][3];
+	out[1][3] = -in[1][3];
+	out[2][3] = -in[2][3];
+#if 0
+jas_eprintf("[ %f %f %f %f ]\n[ %f %f %f %f ]\n[ %f %f %f %f ]\n",
+in[0][0], in[0][1], in[0][2], in[0][3],
+in[1][0], in[1][1], in[1][2], in[1][3],
+in[2][0], in[2][1], in[2][2], in[2][3]);
+jas_eprintf("[ %f %f %f %f ]\n[ %f %f %f %f ]\n[ %f %f %f %f ]\n",
+out[0][0], out[0][1], out[0][2], out[0][3],
+out[1][0], out[1][1], out[1][2], out[1][3],
+out[2][0], out[2][1], out[2][2], out[2][3]);
+#endif
+	return 0;
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static int icctoclrspc(int iccclrspc, int refflag)
+{
+	if (refflag) {
+		switch (iccclrspc) {
+		case JAS_ICC_COLORSPC_XYZ:
+			return JAS_CLRSPC_CIEXYZ;
+		case JAS_ICC_COLORSPC_LAB:
+			return JAS_CLRSPC_CIELAB;
+		default:
+			abort();
+			break;
+		}
+	} else {
+		switch (iccclrspc) {
+		case JAS_ICC_COLORSPC_YCBCR:
+			return JAS_CLRSPC_GENYCBCR;
+		case JAS_ICC_COLORSPC_RGB:
+			return JAS_CLRSPC_GENRGB;
+		case JAS_ICC_COLORSPC_GRAY:
+			return JAS_CLRSPC_GENGRAY;
+		default:
+			abort();
+			break;
+		}
+	}
+}
+
+static int mono(jas_iccprof_t *iccprof, int op, jas_cmpxformseq_t **retpxformseq)
+{
+	jas_iccattrval_t *graytrc;
+	jas_cmshapmat_t *shapmat;
+	jas_cmpxform_t *pxform;
+	jas_cmpxformseq_t *pxformseq;
+	jas_cmshapmatlut_t lut;
+
+	jas_cmshapmatlut_init(&lut);
+	if (!(graytrc = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_GRYTRC)) ||
+	  graytrc->type != JAS_ICC_TYPE_CURV)
+		goto error;
+	if (!(pxform = jas_cmpxform_createshapmat()))
+		goto error;
+	shapmat = &pxform->data.shapmat;
+	if (!(pxformseq = jas_cmpxformseq_create()))
+		goto error;
+	if (jas_cmpxformseq_insertpxform(pxformseq, -1, pxform))
+		goto error;
+
+	pxform->numinchans = 1;
+	pxform->numoutchans = 3;
+
+	shapmat->mono = 1;
+	shapmat->useluts = 1;
+	shapmat->usemat = 1;
+	if (!op) {
+		shapmat->order = 0;
+		shapmat->mat[0][0] = 0.9642;
+		shapmat->mat[1][0] = 1.0;
+		shapmat->mat[2][0] = 0.8249;
+		if (jas_cmshapmatlut_set(&shapmat->luts[0], &graytrc->data.curv))
+			goto error;
+	} else {
+		shapmat->order = 1;
+		shapmat->mat[0][0] = 1.0 / 0.9642;
+		shapmat->mat[1][0] = 1.0;
+		shapmat->mat[2][0] = 1.0 / 0.8249;
+		jas_cmshapmatlut_init(&lut);
+		if (jas_cmshapmatlut_set(&lut, &graytrc->data.curv))
+			goto error;
+		if (jas_cmshapmatlut_invert(&shapmat->luts[0], &lut, lut.size))
+			goto error;
+		jas_cmshapmatlut_cleanup(&lut);
+	}
+	jas_iccattrval_destroy(graytrc);
+	jas_cmpxform_destroy(pxform);
+	*retpxformseq = pxformseq;
+	return 0;
+error:
+	return -1;
+}
+
+static int triclr(jas_iccprof_t *iccprof, int op, jas_cmpxformseq_t **retpxformseq)
+{
+	int i;
+	jas_iccattrval_t *trcs[3];
+	jas_iccattrval_t *cols[3];
+	jas_cmshapmat_t *shapmat;
+	jas_cmpxform_t *pxform;
+	jas_cmpxformseq_t *pxformseq;
+	jas_cmreal_t mat[3][4];
+	jas_cmshapmatlut_t lut;
+
+	pxform = 0;
+	pxformseq = 0;
+	for (i = 0; i < 3; ++i) {
+		trcs[i] = 0;
+		cols[i] = 0;
+	}
+	jas_cmshapmatlut_init(&lut);
+
+	if (!(trcs[0] = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_REDTRC)) ||
+	  !(trcs[1] = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_GRNTRC)) ||
+	  !(trcs[2] = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_BLUTRC)) ||
+	  !(cols[0] = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_REDMATCOL)) ||
+	  !(cols[1] = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_GRNMATCOL)) ||
+	  !(cols[2] = jas_iccprof_getattr(iccprof, JAS_ICC_TAG_BLUMATCOL)))
+		goto error;
+	for (i = 0; i < 3; ++i) {
+		if (trcs[i]->type != JAS_ICC_TYPE_CURV ||
+		  cols[i]->type != JAS_ICC_TYPE_XYZ)
+			goto error;
+	}
+	if (!(pxform = jas_cmpxform_createshapmat()))
+		goto error;
+	pxform->numinchans = 3;
+	pxform->numoutchans = 3;
+	shapmat = &pxform->data.shapmat;
+	if (!(pxformseq = jas_cmpxformseq_create()))
+		goto error;
+	if (jas_cmpxformseq_insertpxform(pxformseq, -1, pxform))
+		goto error;
+	shapmat->mono = 0;
+	shapmat->useluts = 1;
+	shapmat->usemat = 1;
+	if (!op) {
+		shapmat->order = 0;
+		for (i = 0; i < 3; ++i) {
+			shapmat->mat[0][i] = cols[i]->data.xyz.x / 65536.0;
+			shapmat->mat[1][i] = cols[i]->data.xyz.y / 65536.0;
+			shapmat->mat[2][i] = cols[i]->data.xyz.z / 65536.0;
+		}
+		for (i = 0; i < 3; ++i)
+			shapmat->mat[i][3] = 0.0;
+		for (i = 0; i < 3; ++i) {
+			if (jas_cmshapmatlut_set(&shapmat->luts[i], &trcs[i]->data.curv))
+				goto error;
+		}
+	} else {
+		shapmat->order = 1;
+		for (i = 0; i < 3; ++i) {
+			mat[0][i] = cols[i]->data.xyz.x / 65536.0;
+			mat[1][i] = cols[i]->data.xyz.y / 65536.0;
+			mat[2][i] = cols[i]->data.xyz.z / 65536.0;
+		}
+		for (i = 0; i < 3; ++i)
+			mat[i][3] = 0.0;
+		if (jas_cmshapmat_invmat(shapmat->mat, mat))
+			goto error;
+		for (i = 0; i < 3; ++i) {
+			jas_cmshapmatlut_init(&lut);
+			if (jas_cmshapmatlut_set(&lut, &trcs[i]->data.curv))
+				goto error;
+			if (jas_cmshapmatlut_invert(&shapmat->luts[i], &lut, lut.size))
+				goto error;
+			jas_cmshapmatlut_cleanup(&lut);
+		}
+	}
+	for (i = 0; i < 3; ++i) {
+		jas_iccattrval_destroy(trcs[i]);
+		jas_iccattrval_destroy(cols[i]);
+	}
+	jas_cmpxform_destroy(pxform);
+	*retpxformseq = pxformseq;
+	return 0;
+
+error:
+
+	for (i = 0; i < 3; ++i) {
+		if (trcs[i]) {
+			jas_iccattrval_destroy(trcs[i]);
+		}
+		if (cols[i]) {
+			jas_iccattrval_destroy(cols[i]);
+		}
+	}
+	if (pxformseq) {
+		jas_cmpxformseq_destroy(pxformseq);
+	}
+	if (pxform) {
+		jas_cmpxform_destroy(pxform);
+	}
+
+	return -1;
+}
+
+static int jas_cmgetint(long **bufptr, int sgnd, int prec, long *val)
+{
+	long v;
+	int m;
+	v = **bufptr;
+	if (sgnd) {
+		m = (1 << (prec - 1));
+		if (v < -m || v >= m)
+			return -1;
+	} else {
+		if (v < 0 || v >= (1 << prec))
+			return -1;
+	}
+	++(*bufptr);
+	*val = v;
+	return 0;
+}
+
+static int jas_cmputint(long **bufptr, int sgnd, int prec, long val)
+{
+	int m;
+	if (sgnd) {
+		m = (1 << (prec - 1));
+		if (val < -m || val >= m)
+			return -1;
+	} else {
+		if (val < 0 || val >= (1 << prec))
+			return -1;
+	}
+	**bufptr = val;
+	++(*bufptr);
+	return 0;
+}
+
+int jas_clrspc_numchans(int clrspc)
+{
+	switch (jas_clrspc_fam(clrspc)) {
+	case JAS_CLRSPC_FAM_XYZ:
+	case JAS_CLRSPC_FAM_LAB:
+	case JAS_CLRSPC_FAM_RGB:
+	case JAS_CLRSPC_FAM_YCBCR:
+		return 3;
+		break;
+	case JAS_CLRSPC_FAM_GRAY:
+		return 1;
+		break;
+	default:
+		abort();
+		break;
+	}
+}
+
+jas_iccprof_t *jas_iccprof_createfromcmprof(jas_cmprof_t *prof)
+{
+	return jas_iccprof_copy(prof->iccprof);
+}
diff --git a/src/libjasper/base/jas_debug.c b/src/libjasper/base/jas_debug.c
new file mode 100644
index 0000000..8a762d5
--- /dev/null
+++ b/src/libjasper/base/jas_debug.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdarg.h>
+#include <stdio.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_debug.h"
+
+/******************************************************************************\
+* Local data.
+\******************************************************************************/
+
+static int jas_dbglevel = 0;
+/* The debug level. */
+
+/******************************************************************************\
+* Code for getting/setting the debug level.
+\******************************************************************************/
+
+/* Set the library debug level. */
+int jas_setdbglevel(int dbglevel)
+{
+	int olddbglevel;
+
+	/* Save the old debug level. */
+	olddbglevel = jas_dbglevel;
+
+	/* Change the debug level. */
+	jas_dbglevel = dbglevel;
+
+	/* Return the old debug level. */
+	return olddbglevel;
+}
+
+/* Get the library debug level. */
+int jas_getdbglevel()
+{
+	return jas_dbglevel;
+}
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+/* Perform formatted output to standard error. */
+int jas_eprintf(const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, fmt);
+	ret = vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	return ret;
+}
+
+/* Dump memory to a stream. */
+int jas_memdump(FILE *out, void *data, size_t len)
+{
+	size_t i;
+	size_t j;
+	uchar *dp;
+	dp = data;
+	for (i = 0; i < len; i += 16) {
+		fprintf(out, "%04x:", i);
+		for (j = 0; j < 16; ++j) {
+			if (i + j < len) {
+				fprintf(out, " %02x", dp[i + j]);
+			}
+		}
+		fprintf(out, "\n");
+	}
+	return 0;
+}
diff --git a/src/libjasper/base/jas_getopt.c b/src/libjasper/base/jas_getopt.c
new file mode 100644
index 0000000..efb472e
--- /dev/null
+++ b/src/libjasper/base/jas_getopt.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 1999-2000, Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved. 
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Command Line Option Parsing Library
+ *
+ * $Id: jas_getopt.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <string.h>
+
+#include "jasper/jas_getopt.h"
+#include "jasper/jas_math.h"
+
+/******************************************************************************\
+* Global data.
+\******************************************************************************/
+
+int jas_optind = 0;
+int jas_opterr = 1;
+char *jas_optarg = 0;
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+static jas_opt_t *jas_optlookup(jas_opt_t *opts, char *name)
+{
+	jas_opt_t *opt;
+
+	for (opt = opts; opt->id >= 0 && opt->name; ++opt) {
+		if (!strcmp(opt->name, name)) {
+			return opt;
+		}
+	}
+	return 0;
+}
+
+int jas_getopt(int argc, char **argv, jas_opt_t *opts)
+{
+	char *cp;
+	int id;
+	int hasarg;
+	jas_opt_t *opt;
+	char *s;
+
+	if (!jas_optind) {
+		jas_optind = JAS_MIN(1, argc);
+	}
+	while (jas_optind < argc) {
+		s = cp = argv[jas_optind];
+		if (*cp == '-') {
+			/* We are processing an option. */
+			++jas_optind;
+			if (*++cp == '-') {
+				/* We are processing a long option. */
+				++cp;
+				if (*cp == '\0') {
+					/* This is the end of the options. */
+					return JAS_GETOPT_EOF;
+				}
+				if (!(opt = jas_optlookup(opts, cp))) {
+					if (jas_opterr) {
+						jas_eprintf("unknown long option %s\n", s);
+					}
+					return JAS_GETOPT_ERR;
+				}
+				hasarg = (opt->flags & JAS_OPT_HASARG) != 0;
+				id = opt->id;
+			} else {
+				/* We are processing a short option. */
+				if (strlen(cp) != 1 ||
+				  !(opt = jas_optlookup(opts, cp))) {
+					if (jas_opterr) {
+						jas_eprintf("unknown short option %s\n", s);
+					}
+					return JAS_GETOPT_ERR;
+				}
+				hasarg = (opt->flags & JAS_OPT_HASARG) != 0;
+				id = opt->id;
+			}
+			if (hasarg) {
+				/* The option has an argument. */
+				if (jas_optind >= argc) {
+					if (jas_opterr) {
+						jas_eprintf("missing argument for option %s\n", s);
+					}
+					return JAS_GETOPT_ERR;
+				}
+				jas_optarg = argv[jas_optind];
+				++jas_optind;
+			} else {
+				/* The option does not have an argument. */
+				jas_optarg = 0;
+			}
+			return id;
+		} else {
+			/* We are not processing an option. */
+			return JAS_GETOPT_EOF;
+		}
+	}
+	return JAS_GETOPT_EOF;
+}
diff --git a/src/libjasper/base/jas_icc.c b/src/libjasper/base/jas_icc.c
new file mode 100644
index 0000000..e5a0a2e
--- /dev/null
+++ b/src/libjasper/base/jas_icc.c
@@ -0,0 +1,1722 @@
+/*
+ * Copyright (c) 2002-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#include <assert.h>
+#include <jasper/jas_config.h>
+#include <jasper/jas_types.h>
+#include <jasper/jas_malloc.h>
+#include <jasper/jas_debug.h>
+#include <jasper/jas_icc.h>
+#include <jasper/jas_cm.h>
+#include <jasper/jas_stream.h>
+#include <jasper/jas_string.h>
+
+#include <stdlib.h>
+#include <ctype.h>
+
+#define	jas_iccputuint8(out, val)	jas_iccputuint(out, 1, val)
+#define	jas_iccputuint16(out, val)	jas_iccputuint(out, 2, val)
+#define	jas_iccputsint32(out, val)	jas_iccputsint(out, 4, val)
+#define	jas_iccputuint32(out, val)	jas_iccputuint(out, 4, val)
+#define	jas_iccputuint64(out, val)	jas_iccputuint(out, 8, val)
+
+static jas_iccattrval_t *jas_iccattrval_create0(void);
+
+static int jas_iccgetuint(jas_stream_t *in, int n, ulonglong *val);
+static int jas_iccgetuint8(jas_stream_t *in, jas_iccuint8_t *val);
+static int jas_iccgetuint16(jas_stream_t *in, jas_iccuint16_t *val);
+static int jas_iccgetsint32(jas_stream_t *in, jas_iccsint32_t *val);
+static int jas_iccgetuint32(jas_stream_t *in, jas_iccuint32_t *val);
+static int jas_iccgetuint64(jas_stream_t *in, jas_iccuint64_t *val);
+static int jas_iccputuint(jas_stream_t *out, int n, ulonglong val);
+static int jas_iccputsint(jas_stream_t *out, int n, longlong val);
+static jas_iccprof_t *jas_iccprof_create(void);
+static int jas_iccprof_readhdr(jas_stream_t *in, jas_icchdr_t *hdr);
+static int jas_iccprof_writehdr(jas_stream_t *out, jas_icchdr_t *hdr);
+static int jas_iccprof_gettagtab(jas_stream_t *in, jas_icctagtab_t *tagtab);
+static void jas_iccprof_sorttagtab(jas_icctagtab_t *tagtab);
+static int jas_iccattrtab_lookup(jas_iccattrtab_t *attrtab, jas_iccuint32_t name);
+static jas_iccattrtab_t *jas_iccattrtab_copy(jas_iccattrtab_t *attrtab);
+static jas_iccattrvalinfo_t *jas_iccattrvalinfo_lookup(jas_iccsig_t name);
+static int jas_iccgettime(jas_stream_t *in, jas_icctime_t *time);
+static int jas_iccgetxyz(jas_stream_t *in, jas_iccxyz_t *xyz);
+static int jas_icctagtabent_cmp(const void *src, const void *dst);
+
+static void jas_icccurv_destroy(jas_iccattrval_t *attrval);
+static int jas_icccurv_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval);
+static int jas_icccurv_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt);
+static int jas_icccurv_getsize(jas_iccattrval_t *attrval);
+static int jas_icccurv_output(jas_iccattrval_t *attrval, jas_stream_t *out);
+static void jas_icccurv_dump(jas_iccattrval_t *attrval, FILE *out);
+
+static void jas_icctxtdesc_destroy(jas_iccattrval_t *attrval);
+static int jas_icctxtdesc_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval);
+static int jas_icctxtdesc_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt);
+static int jas_icctxtdesc_getsize(jas_iccattrval_t *attrval);
+static int jas_icctxtdesc_output(jas_iccattrval_t *attrval, jas_stream_t *out);
+static void jas_icctxtdesc_dump(jas_iccattrval_t *attrval, FILE *out);
+
+static void jas_icctxt_destroy(jas_iccattrval_t *attrval);
+static int jas_icctxt_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval);
+static int jas_icctxt_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt);
+static int jas_icctxt_getsize(jas_iccattrval_t *attrval);
+static int jas_icctxt_output(jas_iccattrval_t *attrval, jas_stream_t *out);
+static void jas_icctxt_dump(jas_iccattrval_t *attrval, FILE *out);
+
+static int jas_iccxyz_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt);
+static int jas_iccxyz_getsize(jas_iccattrval_t *attrval);
+static int jas_iccxyz_output(jas_iccattrval_t *attrval, jas_stream_t *out);
+static void jas_iccxyz_dump(jas_iccattrval_t *attrval, FILE *out);
+
+static jas_iccattrtab_t *jas_iccattrtab_create(void);
+static void jas_iccattrtab_destroy(jas_iccattrtab_t *tab);
+static int jas_iccattrtab_resize(jas_iccattrtab_t *tab, int maxents);
+static int jas_iccattrtab_add(jas_iccattrtab_t *attrtab, int i,
+  jas_iccuint32_t name, jas_iccattrval_t *val);
+static int jas_iccattrtab_replace(jas_iccattrtab_t *attrtab, int i,
+  jas_iccuint32_t name, jas_iccattrval_t *val);
+static void jas_iccattrtab_delete(jas_iccattrtab_t *attrtab, int i);
+static long jas_iccpadtomult(long x, long y);
+static int jas_iccattrtab_get(jas_iccattrtab_t *attrtab, int i,
+  jas_iccattrname_t *name, jas_iccattrval_t **val);
+static int jas_iccprof_puttagtab(jas_stream_t *out, jas_icctagtab_t *tagtab);
+
+static void jas_icclut16_destroy(jas_iccattrval_t *attrval);
+static int jas_icclut16_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval);
+static int jas_icclut16_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt);
+static int jas_icclut16_getsize(jas_iccattrval_t *attrval);
+static int jas_icclut16_output(jas_iccattrval_t *attrval, jas_stream_t *out);
+static void jas_icclut16_dump(jas_iccattrval_t *attrval, FILE *out);
+
+static void jas_icclut8_destroy(jas_iccattrval_t *attrval);
+static int jas_icclut8_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval);
+static int jas_icclut8_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt);
+static int jas_icclut8_getsize(jas_iccattrval_t *attrval);
+static int jas_icclut8_output(jas_iccattrval_t *attrval, jas_stream_t *out);
+static void jas_icclut8_dump(jas_iccattrval_t *attrval, FILE *out);
+
+static int jas_iccputtime(jas_stream_t *out, jas_icctime_t *ctime);
+static int jas_iccputxyz(jas_stream_t *out, jas_iccxyz_t *xyz);
+
+static long jas_iccpowi(int x, int n);
+
+static char *jas_iccsigtostr(int sig, char *buf);
+
+
+jas_iccattrvalinfo_t jas_iccattrvalinfos[] = {
+	{JAS_ICC_TYPE_CURV, {jas_icccurv_destroy, jas_icccurv_copy,
+	  jas_icccurv_input, jas_icccurv_output, jas_icccurv_getsize,
+	  jas_icccurv_dump}},
+	{JAS_ICC_TYPE_XYZ, {0, 0, jas_iccxyz_input, jas_iccxyz_output,
+	  jas_iccxyz_getsize, jas_iccxyz_dump}},
+	{JAS_ICC_TYPE_TXTDESC, {jas_icctxtdesc_destroy,
+	  jas_icctxtdesc_copy, jas_icctxtdesc_input, jas_icctxtdesc_output,
+	  jas_icctxtdesc_getsize, jas_icctxtdesc_dump}},
+	{JAS_ICC_TYPE_TXT, {jas_icctxt_destroy, jas_icctxt_copy,
+	  jas_icctxt_input, jas_icctxt_output, jas_icctxt_getsize,
+	  jas_icctxt_dump}},
+	{JAS_ICC_TYPE_LUT8, {jas_icclut8_destroy, jas_icclut8_copy,
+	  jas_icclut8_input, jas_icclut8_output, jas_icclut8_getsize,
+	  jas_icclut8_dump}},
+	{JAS_ICC_TYPE_LUT16, {jas_icclut16_destroy, jas_icclut16_copy,
+	  jas_icclut16_input, jas_icclut16_output, jas_icclut16_getsize,
+	  jas_icclut16_dump}},
+	{0, {0, 0, 0, 0, 0, 0}}
+};
+
+typedef struct {
+	jas_iccuint32_t tag;
+	char *name;
+} jas_icctaginfo_t;
+
+/******************************************************************************\
+* profile class
+\******************************************************************************/
+
+static jas_iccprof_t *jas_iccprof_create()
+{
+	jas_iccprof_t *prof;
+	prof = 0;
+	if (!(prof = jas_malloc(sizeof(jas_iccprof_t)))) {
+		goto error;
+	}
+	if (!(prof->attrtab = jas_iccattrtab_create()))
+		goto error;
+	memset(&prof->hdr, 0, sizeof(jas_icchdr_t));
+	prof->tagtab.numents = 0;
+	prof->tagtab.ents = 0;
+	return prof;
+error:
+	if (prof)
+		jas_iccprof_destroy(prof);
+	return 0;
+}
+
+jas_iccprof_t *jas_iccprof_copy(jas_iccprof_t *prof)
+{
+	jas_iccprof_t *newprof;
+	newprof = 0;
+	if (!(newprof = jas_iccprof_create()))
+		goto error;
+	newprof->hdr = prof->hdr;
+	newprof->tagtab.numents = 0;
+	newprof->tagtab.ents = 0;
+	assert(newprof->attrtab);
+	jas_iccattrtab_destroy(newprof->attrtab);
+	if (!(newprof->attrtab = jas_iccattrtab_copy(prof->attrtab)))
+		goto error;
+	return newprof;
+error:
+	if (newprof)
+		jas_iccprof_destroy(newprof);
+	return 0;
+}
+
+void jas_iccprof_destroy(jas_iccprof_t *prof)
+{
+	if (prof->attrtab)
+		jas_iccattrtab_destroy(prof->attrtab);
+	if (prof->tagtab.ents)
+		jas_free(prof->tagtab.ents);
+	jas_free(prof);
+}
+
+void jas_iccprof_dump(jas_iccprof_t *prof, FILE *out)
+{
+	jas_iccattrtab_dump(prof->attrtab, out);
+}
+
+jas_iccprof_t *jas_iccprof_load(jas_stream_t *in)
+{
+	jas_iccprof_t *prof;
+	int numtags;
+	long curoff;
+	long reloff;
+	long prevoff;
+	jas_iccsig_t type;
+	jas_iccattrval_t *attrval;
+	jas_iccattrval_t *prevattrval;
+	jas_icctagtabent_t *tagtabent;
+	jas_iccattrvalinfo_t *attrvalinfo;
+	int i;
+	int len;
+
+	prof = 0;
+	attrval = 0;
+
+	if (!(prof = jas_iccprof_create())) {
+		goto error;
+	}
+
+	if (jas_iccprof_readhdr(in, &prof->hdr)) {
+		jas_eprintf("cannot get header\n");
+		goto error;
+	}
+	if (jas_iccprof_gettagtab(in, &prof->tagtab)) {
+		jas_eprintf("cannot get tab table\n");
+		goto error;
+	}
+	jas_iccprof_sorttagtab(&prof->tagtab);
+
+	numtags = prof->tagtab.numents;
+	curoff = JAS_ICC_HDRLEN + 4 + 12 * numtags;
+	prevoff = 0;
+	prevattrval = 0;
+	for (i = 0; i < numtags; ++i) {
+		tagtabent = &prof->tagtab.ents[i];
+		if (tagtabent->off == JAS_CAST(jas_iccuint32_t, prevoff)) {
+			if (prevattrval) {
+				if (!(attrval = jas_iccattrval_clone(prevattrval)))
+					goto error;
+				if (jas_iccprof_setattr(prof, tagtabent->tag, attrval))
+					goto error;
+				jas_iccattrval_destroy(attrval);
+			} else {
+#if 0
+				jas_eprintf("warning: skipping unknown tag type\n");
+#endif
+			}
+			continue;
+		}
+		reloff = tagtabent->off - curoff;
+		if (reloff > 0) {
+			if (jas_stream_gobble(in, reloff) != reloff)
+				goto error;
+			curoff += reloff;
+		} else if (reloff < 0) {
+			/* This should never happen since we read the tagged
+			element data in a single pass. */
+			abort();
+		}
+		prevoff = curoff;
+		if (jas_iccgetuint32(in, &type)) {
+			goto error;
+		}
+		if (jas_stream_gobble(in, 4) != 4) {
+			goto error;
+		}
+		curoff += 8;
+		if (!(attrvalinfo = jas_iccattrvalinfo_lookup(type))) {
+#if 0
+			jas_eprintf("warning: skipping unknown tag type\n");
+#endif
+			prevattrval = 0;
+			continue;
+		}
+		if (!(attrval = jas_iccattrval_create(type))) {
+			goto error;
+		}
+		len = tagtabent->len - 8;
+		if ((*attrval->ops->input)(attrval, in, len)) {
+			goto error;
+		}
+		curoff += len;
+		if (jas_iccprof_setattr(prof, tagtabent->tag, attrval)) {
+			goto error;
+		}
+		prevattrval = attrval; /* This is correct, but slimey. */
+		jas_iccattrval_destroy(attrval);
+		attrval = 0;
+	}
+
+	return prof;
+
+error:
+	if (prof)
+		jas_iccprof_destroy(prof);
+	if (attrval)
+		jas_iccattrval_destroy(attrval);
+	return 0;
+}
+
+int jas_iccprof_save(jas_iccprof_t *prof, jas_stream_t *out)
+{
+	long curoff;
+	long reloff;
+	long newoff;
+	int i;
+	int j;
+	jas_icctagtabent_t *tagtabent;
+	jas_icctagtabent_t *sharedtagtabent;
+	jas_icctagtabent_t *tmptagtabent;
+	jas_iccuint32_t attrname;
+	jas_iccattrval_t *attrval;
+	jas_icctagtab_t *tagtab;
+
+	tagtab = &prof->tagtab;
+	if (!(tagtab->ents = jas_malloc(prof->attrtab->numattrs *
+	  sizeof(jas_icctagtabent_t))))
+		goto error;
+	tagtab->numents = prof->attrtab->numattrs;
+	curoff = JAS_ICC_HDRLEN + 4 + 12 * tagtab->numents;
+	for (i = 0; i < JAS_CAST(int, tagtab->numents); ++i) {
+		tagtabent = &tagtab->ents[i];
+		if (jas_iccattrtab_get(prof->attrtab, i, &attrname, &attrval))
+			goto error;
+		assert(attrval->ops->output);
+		tagtabent->tag = attrname;
+		tagtabent->data = &attrval->data;
+		sharedtagtabent = 0;
+		for (j = 0; j < i; ++j) {
+			tmptagtabent = &tagtab->ents[j];
+			if (tagtabent->data == tmptagtabent->data) {
+				sharedtagtabent = tmptagtabent;
+				break;
+			}
+		}
+		if (sharedtagtabent) {
+			tagtabent->off = sharedtagtabent->off;
+			tagtabent->len = sharedtagtabent->len;
+			tagtabent->first = sharedtagtabent;
+		} else {
+			tagtabent->off = curoff;
+			tagtabent->len = (*attrval->ops->getsize)(attrval) + 8;
+			tagtabent->first = 0;
+			if (i < JAS_CAST(int, tagtab->numents - 1)) {
+				curoff = jas_iccpadtomult(curoff + tagtabent->len, 4);
+			} else {
+				curoff += tagtabent->len;
+			}
+		}
+		jas_iccattrval_destroy(attrval);
+	}
+	prof->hdr.size = curoff;
+	if (jas_iccprof_writehdr(out, &prof->hdr))
+		goto error;
+	if (jas_iccprof_puttagtab(out, &prof->tagtab))
+		goto error;
+	curoff = JAS_ICC_HDRLEN + 4 + 12 * tagtab->numents;
+	for (i = 0; i < JAS_CAST(int, tagtab->numents);) {
+		tagtabent = &tagtab->ents[i];
+		assert(curoff == JAS_CAST(long, tagtabent->off));
+		if (jas_iccattrtab_get(prof->attrtab, i, &attrname, &attrval))
+			goto error;
+		if (jas_iccputuint32(out, attrval->type) || jas_stream_pad(out,
+		  4, 0) != 4)
+			goto error;
+		if ((*attrval->ops->output)(attrval, out))
+			goto error;
+		jas_iccattrval_destroy(attrval);
+		curoff += tagtabent->len;
+		++i;
+		while (i < JAS_CAST(int, tagtab->numents) &&
+		  tagtab->ents[i].first)
+			++i;
+		newoff = (i < JAS_CAST(int, tagtab->numents)) ?
+		  tagtab->ents[i].off : prof->hdr.size;
+		reloff = newoff - curoff;
+		assert(reloff >= 0);
+		if (reloff > 0) {
+			if (jas_stream_pad(out, reloff, 0) != reloff)
+				goto error;
+			curoff += reloff;
+		}
+	}	
+	return 0;
+error:
+	/* XXX - need to free some resources here */
+	return -1;
+}
+
+static int jas_iccprof_writehdr(jas_stream_t *out, jas_icchdr_t *hdr)
+{
+	if (jas_iccputuint32(out, hdr->size) ||
+	  jas_iccputuint32(out, hdr->cmmtype) ||
+	  jas_iccputuint32(out, hdr->version) ||
+	  jas_iccputuint32(out, hdr->clas) ||
+	  jas_iccputuint32(out, hdr->colorspc) ||
+	  jas_iccputuint32(out, hdr->refcolorspc) ||
+	  jas_iccputtime(out, &hdr->ctime) ||
+	  jas_iccputuint32(out, hdr->magic) ||
+	  jas_iccputuint32(out, hdr->platform) ||
+	  jas_iccputuint32(out, hdr->flags) ||
+	  jas_iccputuint32(out, hdr->maker) ||
+	  jas_iccputuint32(out, hdr->model) ||
+	  jas_iccputuint64(out, hdr->attr) ||
+	  jas_iccputuint32(out, hdr->intent) ||
+	  jas_iccputxyz(out, &hdr->illum) ||
+	  jas_iccputuint32(out, hdr->creator) ||
+	  jas_stream_pad(out, 44, 0) != 44)
+		return -1;
+	return 0;
+}
+
+static int jas_iccprof_puttagtab(jas_stream_t *out, jas_icctagtab_t *tagtab)
+{
+	int i;
+	jas_icctagtabent_t *tagtabent;
+	if (jas_iccputuint32(out, tagtab->numents))
+		goto error;
+	for (i = 0; i < JAS_CAST(int, tagtab->numents); ++i) {
+		tagtabent = &tagtab->ents[i];
+		if (jas_iccputuint32(out, tagtabent->tag) ||
+		  jas_iccputuint32(out, tagtabent->off) ||
+		  jas_iccputuint32(out, tagtabent->len))
+			goto error;
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static int jas_iccprof_readhdr(jas_stream_t *in, jas_icchdr_t *hdr)
+{
+	if (jas_iccgetuint32(in, &hdr->size) ||
+	  jas_iccgetuint32(in, &hdr->cmmtype) ||
+	  jas_iccgetuint32(in, &hdr->version) ||
+	  jas_iccgetuint32(in, &hdr->clas) ||
+	  jas_iccgetuint32(in, &hdr->colorspc) ||
+	  jas_iccgetuint32(in, &hdr->refcolorspc) ||
+	  jas_iccgettime(in, &hdr->ctime) ||
+	  jas_iccgetuint32(in, &hdr->magic) ||
+	  jas_iccgetuint32(in, &hdr->platform) ||
+	  jas_iccgetuint32(in, &hdr->flags) ||
+	  jas_iccgetuint32(in, &hdr->maker) ||
+	  jas_iccgetuint32(in, &hdr->model) ||
+	  jas_iccgetuint64(in, &hdr->attr) ||
+	  jas_iccgetuint32(in, &hdr->intent) ||
+	  jas_iccgetxyz(in, &hdr->illum) ||
+	  jas_iccgetuint32(in, &hdr->creator) ||
+	  jas_stream_gobble(in, 44) != 44)
+		return -1;
+	return 0;
+}
+
+static int jas_iccprof_gettagtab(jas_stream_t *in, jas_icctagtab_t *tagtab)
+{
+	int i;
+	jas_icctagtabent_t *tagtabent;
+
+	if (tagtab->ents) {
+		jas_free(tagtab->ents);
+		tagtab->ents = 0;
+	}
+	if (jas_iccgetuint32(in, &tagtab->numents))
+		goto error;
+	if (!(tagtab->ents = jas_malloc(tagtab->numents *
+	  sizeof(jas_icctagtabent_t))))
+		goto error;
+	tagtabent = tagtab->ents;
+	for (i = 0; i < JAS_CAST(long, tagtab->numents); ++i) {
+		if (jas_iccgetuint32(in, &tagtabent->tag) ||
+		jas_iccgetuint32(in, &tagtabent->off) ||
+		jas_iccgetuint32(in, &tagtabent->len))
+			goto error;
+		++tagtabent;
+	}
+	return 0;
+error:
+	if (tagtab->ents) {
+		jas_free(tagtab->ents);
+		tagtab->ents = 0;
+	}
+	return -1;
+}
+
+jas_iccattrval_t *jas_iccprof_getattr(jas_iccprof_t *prof,
+  jas_iccattrname_t name)
+{
+	int i;
+	jas_iccattrval_t *attrval;
+	if ((i = jas_iccattrtab_lookup(prof->attrtab, name)) < 0)
+		goto error;
+	if (!(attrval = jas_iccattrval_clone(prof->attrtab->attrs[i].val)))
+		goto error;
+	return attrval;
+error:
+	return 0;
+}
+
+int jas_iccprof_setattr(jas_iccprof_t *prof, jas_iccattrname_t name,
+  jas_iccattrval_t *val)
+{
+	int i;
+	if ((i = jas_iccattrtab_lookup(prof->attrtab, name)) >= 0) {
+		if (val) {
+			if (jas_iccattrtab_replace(prof->attrtab, i, name, val))
+				goto error;
+		} else {
+			jas_iccattrtab_delete(prof->attrtab, i);
+		}
+	} else {
+		if (val) {
+			if (jas_iccattrtab_add(prof->attrtab, -1, name, val))
+				goto error;
+		} else {
+			/* NOP */
+		}
+	}
+	return 0;
+error:
+	return -1;
+}
+
+int jas_iccprof_gethdr(jas_iccprof_t *prof, jas_icchdr_t *hdr)
+{
+	*hdr = prof->hdr;
+	return 0;
+}
+
+int jas_iccprof_sethdr(jas_iccprof_t *prof, jas_icchdr_t *hdr)
+{
+	prof->hdr = *hdr;
+	return 0;
+}
+
+static void jas_iccprof_sorttagtab(jas_icctagtab_t *tagtab)
+{
+	qsort(tagtab->ents, tagtab->numents, sizeof(jas_icctagtabent_t),
+	  jas_icctagtabent_cmp);
+}
+
+static int jas_icctagtabent_cmp(const void *src, const void *dst)
+{
+	jas_icctagtabent_t *srctagtabent = JAS_CAST(jas_icctagtabent_t *, src);
+	jas_icctagtabent_t *dsttagtabent = JAS_CAST(jas_icctagtabent_t *, dst);
+	if (srctagtabent->off > dsttagtabent->off) {
+		return 1;
+	} else if (srctagtabent->off < dsttagtabent->off) {
+		return -1;
+	}
+	return 0;
+}
+
+static jas_iccattrvalinfo_t *jas_iccattrvalinfo_lookup(jas_iccsig_t type)
+{
+	jas_iccattrvalinfo_t *info;
+	info = jas_iccattrvalinfos;
+	for (info = jas_iccattrvalinfos; info->type; ++info) {
+		if (info->type == type) {
+			return info;
+		}
+	}
+	return 0;
+}
+
+static int jas_iccgettime(jas_stream_t *in, jas_icctime_t *time)
+{
+	if (jas_iccgetuint16(in, &time->year) ||
+	  jas_iccgetuint16(in, &time->month) ||
+	  jas_iccgetuint16(in, &time->day) ||
+	  jas_iccgetuint16(in, &time->hour) ||
+	  jas_iccgetuint16(in, &time->min) ||
+	  jas_iccgetuint16(in, &time->sec)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jas_iccgetxyz(jas_stream_t *in, jas_iccxyz_t *xyz)
+{
+	if (jas_iccgetsint32(in, &xyz->x) ||
+	  jas_iccgetsint32(in, &xyz->y) ||
+	  jas_iccgetsint32(in, &xyz->z)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jas_iccputtime(jas_stream_t *out, jas_icctime_t *time)
+{
+	jas_iccputuint16(out, time->year);
+	jas_iccputuint16(out, time->month);
+	jas_iccputuint16(out, time->day);
+	jas_iccputuint16(out, time->hour);
+	jas_iccputuint16(out, time->min);
+	jas_iccputuint16(out, time->sec);
+	return 0;
+}
+
+static int jas_iccputxyz(jas_stream_t *out, jas_iccxyz_t *xyz)
+{
+	jas_iccputuint32(out, xyz->x);
+	jas_iccputuint32(out, xyz->y);
+	jas_iccputuint32(out, xyz->z);
+	return 0;
+}
+
+/******************************************************************************\
+* attribute table class
+\******************************************************************************/
+
+static jas_iccattrtab_t *jas_iccattrtab_create()
+{
+	jas_iccattrtab_t *tab;
+	tab = 0;
+	if (!(tab = jas_malloc(sizeof(jas_iccattrtab_t))))
+		goto error;
+	tab->maxattrs = 0;
+	tab->numattrs = 0;
+	tab->attrs = 0;
+	if (jas_iccattrtab_resize(tab, 32))
+		goto error;
+	return tab;
+error:
+	if (tab)
+		jas_iccattrtab_destroy(tab);
+	return 0;
+}
+
+static jas_iccattrtab_t *jas_iccattrtab_copy(jas_iccattrtab_t *attrtab)
+{
+	jas_iccattrtab_t *newattrtab;
+	int i;
+	if (!(newattrtab = jas_iccattrtab_create()))
+		goto error;
+	for (i = 0; i < attrtab->numattrs; ++i) {
+		if (jas_iccattrtab_add(newattrtab, i, attrtab->attrs[i].name,
+		  attrtab->attrs[i].val))
+			goto error;
+	}
+	return newattrtab;
+error:
+	return 0;
+}
+
+static void jas_iccattrtab_destroy(jas_iccattrtab_t *tab)
+{
+	if (tab->attrs) {
+		while (tab->numattrs > 0) {
+			jas_iccattrtab_delete(tab, 0);
+		}
+		jas_free(tab->attrs);
+	}
+	jas_free(tab);
+}
+
+void jas_iccattrtab_dump(jas_iccattrtab_t *attrtab, FILE *out)
+{
+	int i;
+	jas_iccattr_t *attr;
+	jas_iccattrval_t *attrval;
+	jas_iccattrvalinfo_t *info;
+	char buf[16];
+	fprintf(out, "numattrs=%d\n", attrtab->numattrs);
+	fprintf(out, "---\n");
+	for (i = 0; i < attrtab->numattrs; ++i) {
+		attr = &attrtab->attrs[i];
+		attrval = attr->val;
+		info = jas_iccattrvalinfo_lookup(attrval->type);
+		if (!info) abort();
+		fprintf(out, "attrno=%d; attrname=\"%s\"(0x%08x); attrtype=\"%s\"(0x%08x)\n",
+		  i,
+		  jas_iccsigtostr(attr->name, &buf[0]),
+		  attr->name,
+		  jas_iccsigtostr(attrval->type, &buf[8]),
+		  attrval->type
+		  );
+		jas_iccattrval_dump(attrval, out);
+		fprintf(out, "---\n");
+	}
+}
+
+static int jas_iccattrtab_resize(jas_iccattrtab_t *tab, int maxents)
+{
+	jas_iccattr_t *newattrs;
+	assert(maxents >= tab->numattrs);
+	newattrs = tab->attrs ? jas_realloc(tab->attrs, maxents *
+	  sizeof(jas_iccattr_t)) : jas_malloc(maxents * sizeof(jas_iccattr_t));
+	if (!newattrs)
+		return -1;
+	tab->attrs = newattrs;
+	tab->maxattrs = maxents;
+	return 0;
+}
+
+static int jas_iccattrtab_add(jas_iccattrtab_t *attrtab, int i,
+  jas_iccuint32_t name, jas_iccattrval_t *val)
+{
+	int n;
+	jas_iccattr_t *attr;
+	jas_iccattrval_t *tmpattrval;
+	tmpattrval = 0;
+	if (i < 0) {
+		i = attrtab->numattrs;
+	}
+	assert(i >= 0 && i <= attrtab->numattrs);
+	if (attrtab->numattrs >= attrtab->maxattrs) {
+		if (jas_iccattrtab_resize(attrtab, attrtab->numattrs + 32)) {
+			goto error;
+		}
+	}
+	if (!(tmpattrval = jas_iccattrval_clone(val)))
+		goto error;
+	n = attrtab->numattrs - i;
+	if (n > 0)
+		memmove(&attrtab->attrs[i + 1], &attrtab->attrs[i],
+		  n * sizeof(jas_iccattr_t));
+	attr = &attrtab->attrs[i];
+	attr->name = name;
+	attr->val = tmpattrval;
+	++attrtab->numattrs;
+	return 0;
+error:
+	if (tmpattrval)
+		jas_iccattrval_destroy(tmpattrval);
+	return -1;
+}
+
+static int jas_iccattrtab_replace(jas_iccattrtab_t *attrtab, int i,
+  jas_iccuint32_t name, jas_iccattrval_t *val)
+{
+	jas_iccattrval_t *newval;
+	jas_iccattr_t *attr;
+	if (!(newval = jas_iccattrval_clone(val)))
+		goto error;
+	attr = &attrtab->attrs[i];
+	jas_iccattrval_destroy(attr->val);
+	attr->name = name;
+	attr->val = newval;
+	return 0;
+error:
+	return -1;
+}
+
+static void jas_iccattrtab_delete(jas_iccattrtab_t *attrtab, int i)
+{
+	int n;
+	jas_iccattrval_destroy(attrtab->attrs[i].val);
+	if ((n = attrtab->numattrs - i - 1) > 0)
+		memmove(&attrtab->attrs[i], &attrtab->attrs[i + 1],
+		  n * sizeof(jas_iccattr_t));
+	--attrtab->numattrs;
+}
+
+static int jas_iccattrtab_get(jas_iccattrtab_t *attrtab, int i,
+  jas_iccattrname_t *name, jas_iccattrval_t **val)
+{
+	jas_iccattr_t *attr;
+	if (i < 0 || i >= attrtab->numattrs)
+		goto error;
+	attr = &attrtab->attrs[i];
+	*name = attr->name;
+	if (!(*val = jas_iccattrval_clone(attr->val)))
+		goto error;
+	return 0;
+error:
+	return -1;
+}
+
+static int jas_iccattrtab_lookup(jas_iccattrtab_t *attrtab,
+  jas_iccuint32_t name)
+{
+	int i;
+	jas_iccattr_t *attr;
+	for (i = 0; i < attrtab->numattrs; ++i) {
+		attr = &attrtab->attrs[i];
+		if (attr->name == name)
+			return i;
+	}
+	return -1;
+}
+
+/******************************************************************************\
+* attribute value class
+\******************************************************************************/
+
+jas_iccattrval_t *jas_iccattrval_create(jas_iccuint32_t type)
+{
+	jas_iccattrval_t *attrval;
+	jas_iccattrvalinfo_t *info;
+
+	if (!(info = jas_iccattrvalinfo_lookup(type)))
+		goto error;
+	if (!(attrval = jas_iccattrval_create0()))
+		goto error;
+	attrval->ops = &info->ops;
+	attrval->type = type;
+	++attrval->refcnt;
+	memset(&attrval->data, 0, sizeof(attrval->data));
+	return attrval;
+error:
+	return 0;
+}
+
+jas_iccattrval_t *jas_iccattrval_clone(jas_iccattrval_t *attrval)
+{
+	++attrval->refcnt;
+	return attrval;
+}
+
+void jas_iccattrval_destroy(jas_iccattrval_t *attrval)
+{
+#if 0
+jas_eprintf("refcnt=%d\n", attrval->refcnt);
+#endif
+	if (--attrval->refcnt <= 0) {
+		if (attrval->ops->destroy)
+			(*attrval->ops->destroy)(attrval);
+		jas_free(attrval);
+	}
+}
+
+void jas_iccattrval_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	char buf[8];
+	jas_iccsigtostr(attrval->type, buf);
+	fprintf(out, "refcnt = %d; type = 0x%08x %s\n", attrval->refcnt,
+	  attrval->type, jas_iccsigtostr(attrval->type, &buf[0]));
+	if (attrval->ops->dump) {
+		(*attrval->ops->dump)(attrval, out);
+	}
+}
+
+int jas_iccattrval_allowmodify(jas_iccattrval_t **attrvalx)
+{
+	jas_iccattrval_t *newattrval;
+	jas_iccattrval_t *attrval = *attrvalx;
+	newattrval = 0;
+	if (attrval->refcnt > 1) {
+		if (!(newattrval = jas_iccattrval_create0()))
+			goto error;
+		newattrval->ops = attrval->ops;
+		newattrval->type = attrval->type;
+		++newattrval->refcnt;
+		if (newattrval->ops->copy) {
+			if ((*newattrval->ops->copy)(newattrval, attrval))
+				goto error;
+		} else {
+			memcpy(&newattrval->data, &attrval->data,
+			  sizeof(newattrval->data));
+		}
+		*attrvalx = newattrval;
+	}
+	return 0;
+error:
+	if (newattrval) {
+		jas_free(newattrval);
+	}
+	return -1;
+}
+
+static jas_iccattrval_t *jas_iccattrval_create0()
+{
+	jas_iccattrval_t *attrval;
+	if (!(attrval = jas_malloc(sizeof(jas_iccattrval_t))))
+		return 0;
+	memset(attrval, 0, sizeof(jas_iccattrval_t));
+	attrval->refcnt = 0;
+	attrval->ops = 0;
+	attrval->type = 0;
+	return attrval;
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static int jas_iccxyz_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int len)
+{
+	if (len != 4 * 3) abort();
+	return jas_iccgetxyz(in, &attrval->data.xyz);
+}
+
+static int jas_iccxyz_output(jas_iccattrval_t *attrval, jas_stream_t *out)
+{
+	jas_iccxyz_t *xyz = &attrval->data.xyz;
+	if (jas_iccputuint32(out, xyz->x) ||
+	  jas_iccputuint32(out, xyz->y) ||
+	  jas_iccputuint32(out, xyz->z))
+		return -1;
+	return 0;
+}
+
+static int jas_iccxyz_getsize(jas_iccattrval_t *attrval)
+{
+	/* Avoid compiler warnings about unused parameters. */
+	attrval = 0;
+
+	return 12;
+}
+
+static void jas_iccxyz_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	jas_iccxyz_t *xyz = &attrval->data.xyz;
+	fprintf(out, "(%f, %f, %f)\n", xyz->x / 65536.0, xyz->y / 65536.0, xyz->z / 65536.0);
+}
+
+/******************************************************************************\
+* attribute table class
+\******************************************************************************/
+
+static void jas_icccurv_destroy(jas_iccattrval_t *attrval)
+{
+	jas_icccurv_t *curv = &attrval->data.curv;
+	if (curv->ents)
+		jas_free(curv->ents);
+}
+
+static int jas_icccurv_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval)
+{
+	/* Avoid compiler warnings about unused parameters. */
+	attrval = 0;
+	othattrval = 0;
+
+	/* Not yet implemented. */
+	abort();
+	return -1;
+}
+
+static int jas_icccurv_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt)
+{
+	jas_icccurv_t *curv = &attrval->data.curv;
+	unsigned int i;
+
+	curv->numents = 0;
+	curv->ents = 0;
+
+	if (jas_iccgetuint32(in, &curv->numents))
+		goto error;
+	if (!(curv->ents = jas_malloc(curv->numents * sizeof(jas_iccuint16_t))))
+		goto error;
+	for (i = 0; i < curv->numents; ++i) {
+		if (jas_iccgetuint16(in, &curv->ents[i]))
+			goto error;
+	}
+
+	if (JAS_CAST(int, 4 + 2 * curv->numents) != cnt)
+		goto error;
+	return 0;
+
+error:
+	jas_icccurv_destroy(attrval);
+	return -1;
+}
+
+static int jas_icccurv_getsize(jas_iccattrval_t *attrval)
+{
+	jas_icccurv_t *curv = &attrval->data.curv;
+	return 4 + 2 * curv->numents;
+}
+
+static int jas_icccurv_output(jas_iccattrval_t *attrval, jas_stream_t *out)
+{
+	jas_icccurv_t *curv = &attrval->data.curv;
+	unsigned int i;
+
+	if (jas_iccputuint32(out, curv->numents))
+		goto error;
+	for (i = 0; i < curv->numents; ++i) {
+		if (jas_iccputuint16(out, curv->ents[i]))
+			goto error;
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static void jas_icccurv_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	int i;
+	jas_icccurv_t *curv = &attrval->data.curv;
+	fprintf(out, "number of entires = %d\n", curv->numents);
+	if (curv->numents == 1) {
+		fprintf(out, "gamma = %f\n", curv->ents[0] / 256.0);
+	} else {
+		for (i = 0; i < JAS_CAST(int, curv->numents); ++i) {
+			if (i < 3 || i >= JAS_CAST(int, curv->numents) - 3) {
+				fprintf(out, "entry[%d] = %f\n", i, curv->ents[i] / 65535.0);
+			}
+		}
+	}
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static void jas_icctxtdesc_destroy(jas_iccattrval_t *attrval)
+{
+	jas_icctxtdesc_t *txtdesc = &attrval->data.txtdesc;
+	if (txtdesc->ascdata)
+		jas_free(txtdesc->ascdata);
+	if (txtdesc->ucdata)
+		jas_free(txtdesc->ucdata);
+}
+
+static int jas_icctxtdesc_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval)
+{
+	jas_icctxtdesc_t *txtdesc = &attrval->data.txtdesc;
+
+	/* Avoid compiler warnings about unused parameters. */
+	attrval = 0;
+	othattrval = 0;
+	txtdesc = 0;
+
+	/* Not yet implemented. */
+	abort();
+	return -1;
+}
+
+static int jas_icctxtdesc_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt)
+{
+	int n;
+	int c;
+	jas_icctxtdesc_t *txtdesc = &attrval->data.txtdesc;
+	txtdesc->ascdata = 0;
+	txtdesc->ucdata = 0;
+	if (jas_iccgetuint32(in, &txtdesc->asclen))
+		goto error;
+	if (!(txtdesc->ascdata = jas_malloc(txtdesc->asclen)))
+		goto error;
+	if (jas_stream_read(in, txtdesc->ascdata, txtdesc->asclen) !=
+	  JAS_CAST(int, txtdesc->asclen))
+		goto error;
+	txtdesc->ascdata[txtdesc->asclen - 1] = '\0';
+	if (jas_iccgetuint32(in, &txtdesc->uclangcode) ||
+	  jas_iccgetuint32(in, &txtdesc->uclen))
+		goto error;
+	if (!(txtdesc->ucdata = jas_malloc(txtdesc->uclen * 2)))
+		goto error;
+	if (jas_stream_read(in, txtdesc->ucdata, txtdesc->uclen * 2) !=
+	  JAS_CAST(int, txtdesc->uclen * 2))
+		goto error;
+	if (jas_iccgetuint16(in, &txtdesc->sccode))
+		goto error;
+	if ((c = jas_stream_getc(in)) == EOF)
+		goto error;
+	txtdesc->maclen = c;
+	if (jas_stream_read(in, txtdesc->macdata, 67) != 67)
+		goto error;
+	txtdesc->asclen = strlen(txtdesc->ascdata) + 1;
+#define WORKAROUND_BAD_PROFILES
+#ifdef WORKAROUND_BAD_PROFILES
+	n = txtdesc->asclen + txtdesc->uclen * 2 + 15 + 67;
+	if (n > cnt) {
+		return -1;
+	}
+	if (n < cnt) {
+		if (jas_stream_gobble(in, cnt - n) != cnt - n)
+			goto error;
+	}
+#else
+	if (txtdesc->asclen + txtdesc->uclen * 2 + 15 + 67 != cnt)
+		return -1;
+#endif
+	return 0;
+error:
+	jas_icctxtdesc_destroy(attrval);
+	return -1;
+}
+
+static int jas_icctxtdesc_getsize(jas_iccattrval_t *attrval)
+{
+	jas_icctxtdesc_t *txtdesc = &attrval->data.txtdesc;
+	return strlen(txtdesc->ascdata) + 1 + txtdesc->uclen * 2 + 15 + 67;
+}
+
+static int jas_icctxtdesc_output(jas_iccattrval_t *attrval, jas_stream_t *out)
+{
+	jas_icctxtdesc_t *txtdesc = &attrval->data.txtdesc;
+	if (jas_iccputuint32(out, txtdesc->asclen) ||
+	  jas_stream_puts(out, txtdesc->ascdata) ||
+	  jas_stream_putc(out, 0) == EOF ||
+	  jas_iccputuint32(out, txtdesc->uclangcode) ||
+	  jas_iccputuint32(out, txtdesc->uclen) ||
+	  jas_stream_write(out, txtdesc->ucdata, txtdesc->uclen * 2) != JAS_CAST(int, txtdesc->uclen * 2) ||
+	  jas_iccputuint16(out, txtdesc->sccode) ||
+	  jas_stream_putc(out, txtdesc->maclen) == EOF)
+		goto error;
+	if (txtdesc->maclen > 0) {
+		if (jas_stream_write(out, txtdesc->macdata, 67) != 67)
+			goto error;
+	} else {
+		if (jas_stream_pad(out, 67, 0) != 67)
+			goto error;
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static void jas_icctxtdesc_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	jas_icctxtdesc_t *txtdesc = &attrval->data.txtdesc;
+	fprintf(out, "ascii = \"%s\"\n", txtdesc->ascdata);
+	fprintf(out, "uclangcode = %d; uclen = %d\n", txtdesc->uclangcode,
+	  txtdesc->uclen);
+	fprintf(out, "sccode = %d\n", txtdesc->sccode);
+	fprintf(out, "maclen = %d\n", txtdesc->maclen);
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static void jas_icctxt_destroy(jas_iccattrval_t *attrval)
+{
+	jas_icctxt_t *txt = &attrval->data.txt;
+	if (txt->string)
+		jas_free(txt->string);
+}
+
+static int jas_icctxt_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval)
+{
+	jas_icctxt_t *txt = &attrval->data.txt;
+	jas_icctxt_t *othtxt = &othattrval->data.txt;
+	if (!(txt->string = jas_strdup(othtxt->string)))
+		return -1;
+	return 0;
+}
+
+static int jas_icctxt_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt)
+{
+	jas_icctxt_t *txt = &attrval->data.txt;
+	txt->string = 0;
+	if (!(txt->string = jas_malloc(cnt)))
+		goto error;
+	if (jas_stream_read(in, txt->string, cnt) != cnt)
+		goto error;
+	txt->string[cnt - 1] = '\0';
+	if (JAS_CAST(int, strlen(txt->string)) + 1 != cnt)
+		goto error;
+	return 0;
+error:
+	if (txt->string)
+		jas_free(txt->string);
+	return -1;
+}
+
+static int jas_icctxt_getsize(jas_iccattrval_t *attrval)
+{
+	jas_icctxt_t *txt = &attrval->data.txt;
+	return strlen(txt->string) + 1;
+}
+
+static int jas_icctxt_output(jas_iccattrval_t *attrval, jas_stream_t *out)
+{
+	jas_icctxt_t *txt = &attrval->data.txt;
+	if (jas_stream_puts(out, txt->string) ||
+	  jas_stream_putc(out, 0) == EOF)
+		return -1;
+	return 0;
+}
+
+static void jas_icctxt_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	jas_icctxt_t *txt = &attrval->data.txt;
+	fprintf(out, "string = \"%s\"\n", txt->string);
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static void jas_icclut8_destroy(jas_iccattrval_t *attrval)
+{
+	jas_icclut8_t *lut8 = &attrval->data.lut8;
+	if (lut8->clut)
+		jas_free(lut8->clut);
+	if (lut8->intabs)
+		jas_free(lut8->intabs);
+	if (lut8->intabsbuf)
+		jas_free(lut8->intabsbuf);
+	if (lut8->outtabs)
+		jas_free(lut8->outtabs);
+	if (lut8->outtabsbuf)
+		jas_free(lut8->outtabsbuf);
+}
+
+static int jas_icclut8_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval)
+{
+	jas_icclut8_t *lut8 = &attrval->data.lut8;
+	/* Avoid compiler warnings about unused parameters. */
+	attrval = 0;
+	othattrval = 0;
+	lut8 = 0;
+	abort();
+	return -1;
+}
+
+static int jas_icclut8_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt)
+{
+	int i;
+	int j;
+	int clutsize;
+	jas_icclut8_t *lut8 = &attrval->data.lut8;
+	lut8->clut = 0;
+	lut8->intabs = 0;
+	lut8->intabsbuf = 0;
+	lut8->outtabs = 0;
+	lut8->outtabsbuf = 0;
+	if (jas_iccgetuint8(in, &lut8->numinchans) ||
+	  jas_iccgetuint8(in, &lut8->numoutchans) ||
+	  jas_iccgetuint8(in, &lut8->clutlen) ||
+	  jas_stream_getc(in) == EOF)
+		goto error;
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 3; ++j) {
+			if (jas_iccgetsint32(in, &lut8->e[i][j]))
+				goto error;
+		}
+	}
+	if (jas_iccgetuint16(in, &lut8->numintabents) ||
+	  jas_iccgetuint16(in, &lut8->numouttabents))
+		goto error;
+	clutsize = jas_iccpowi(lut8->clutlen, lut8->numinchans) * lut8->numoutchans;
+	if (!(lut8->clut = jas_malloc(clutsize * sizeof(jas_iccuint8_t))) ||
+	  !(lut8->intabsbuf = jas_malloc(lut8->numinchans *
+	  lut8->numintabents * sizeof(jas_iccuint8_t))) ||
+	  !(lut8->intabs = jas_malloc(lut8->numinchans *
+	  sizeof(jas_iccuint8_t *))))
+		goto error;
+	for (i = 0; i < lut8->numinchans; ++i)
+		lut8->intabs[i] = &lut8->intabsbuf[i * lut8->numintabents];
+	if (!(lut8->outtabsbuf = jas_malloc(lut8->numoutchans *
+	  lut8->numouttabents * sizeof(jas_iccuint8_t))) ||
+	  !(lut8->outtabs = jas_malloc(lut8->numoutchans *
+	  sizeof(jas_iccuint8_t *))))
+		goto error;
+	for (i = 0; i < lut8->numoutchans; ++i)
+		lut8->outtabs[i] = &lut8->outtabsbuf[i * lut8->numouttabents];
+	for (i = 0; i < lut8->numinchans; ++i) {
+		for (j = 0; j < JAS_CAST(int, lut8->numintabents); ++j) {
+			if (jas_iccgetuint8(in, &lut8->intabs[i][j]))
+				goto error;
+		}
+	}
+	for (i = 0; i < lut8->numoutchans; ++i) {
+		for (j = 0; j < JAS_CAST(int, lut8->numouttabents); ++j) {
+			if (jas_iccgetuint8(in, &lut8->outtabs[i][j]))
+				goto error;
+		}
+	}
+	for (i = 0; i < clutsize; ++i) {
+		if (jas_iccgetuint8(in, &lut8->clut[i]))
+			goto error;
+	}
+	if (JAS_CAST(int, 44 + lut8->numinchans * lut8->numintabents +
+	  lut8->numoutchans * lut8->numouttabents +
+	  jas_iccpowi(lut8->clutlen, lut8->numinchans) * lut8->numoutchans) !=
+	  cnt)
+		goto error;
+	return 0;
+error:
+	jas_icclut8_destroy(attrval);
+	return -1;
+}
+
+static int jas_icclut8_getsize(jas_iccattrval_t *attrval)
+{
+	jas_icclut8_t *lut8 = &attrval->data.lut8;
+	return 44 + lut8->numinchans * lut8->numintabents +
+	  lut8->numoutchans * lut8->numouttabents +
+	  jas_iccpowi(lut8->clutlen, lut8->numinchans) * lut8->numoutchans;
+}
+
+static int jas_icclut8_output(jas_iccattrval_t *attrval, jas_stream_t *out)
+{
+	jas_icclut8_t *lut8 = &attrval->data.lut8;
+	int i;
+	int j;
+	int n;
+	lut8->clut = 0;
+	lut8->intabs = 0;
+	lut8->intabsbuf = 0;
+	lut8->outtabs = 0;
+	lut8->outtabsbuf = 0;
+	if (jas_stream_putc(out, lut8->numinchans) == EOF ||
+	  jas_stream_putc(out, lut8->numoutchans) == EOF ||
+	  jas_stream_putc(out, lut8->clutlen) == EOF ||
+	  jas_stream_putc(out, 0) == EOF)
+		goto error;
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 3; ++j) {
+			if (jas_iccputsint32(out, lut8->e[i][j]))
+				goto error;
+		}
+	}
+	if (jas_iccputuint16(out, lut8->numintabents) ||
+	  jas_iccputuint16(out, lut8->numouttabents))
+		goto error;
+	n = lut8->numinchans * lut8->numintabents;
+	for (i = 0; i < n; ++i) {
+		if (jas_iccputuint8(out, lut8->intabsbuf[i]))
+			goto error;
+	}
+	n = lut8->numoutchans * lut8->numouttabents;
+	for (i = 0; i < n; ++i) {
+		if (jas_iccputuint8(out, lut8->outtabsbuf[i]))
+			goto error;
+	}
+	n = jas_iccpowi(lut8->clutlen, lut8->numinchans) * lut8->numoutchans;
+	for (i = 0; i < n; ++i) {
+		if (jas_iccputuint8(out, lut8->clut[i]))
+			goto error;
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static void jas_icclut8_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	jas_icclut8_t *lut8 = &attrval->data.lut8;
+	int i;
+	int j;
+	fprintf(out, "numinchans=%d, numoutchans=%d, clutlen=%d\n",
+	  lut8->numinchans, lut8->numoutchans, lut8->clutlen);
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 3; ++j) {
+			fprintf(out, "e[%d][%d]=%f ", i, j, lut8->e[i][j] / 65536.0);
+		}
+		fprintf(out, "\n");
+	}
+	fprintf(out, "numintabents=%d, numouttabents=%d\n",
+	  lut8->numintabents, lut8->numouttabents);
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static void jas_icclut16_destroy(jas_iccattrval_t *attrval)
+{
+	jas_icclut16_t *lut16 = &attrval->data.lut16;
+	if (lut16->clut)
+		jas_free(lut16->clut);
+	if (lut16->intabs)
+		jas_free(lut16->intabs);
+	if (lut16->intabsbuf)
+		jas_free(lut16->intabsbuf);
+	if (lut16->outtabs)
+		jas_free(lut16->outtabs);
+	if (lut16->outtabsbuf)
+		jas_free(lut16->outtabsbuf);
+}
+
+static int jas_icclut16_copy(jas_iccattrval_t *attrval,
+  jas_iccattrval_t *othattrval)
+{
+	/* Avoid compiler warnings about unused parameters. */
+	attrval = 0;
+	othattrval = 0;
+	/* Not yet implemented. */
+	abort();
+	return -1;
+}
+
+static int jas_icclut16_input(jas_iccattrval_t *attrval, jas_stream_t *in,
+  int cnt)
+{
+	int i;
+	int j;
+	int clutsize;
+	jas_icclut16_t *lut16 = &attrval->data.lut16;
+	lut16->clut = 0;
+	lut16->intabs = 0;
+	lut16->intabsbuf = 0;
+	lut16->outtabs = 0;
+	lut16->outtabsbuf = 0;
+	if (jas_iccgetuint8(in, &lut16->numinchans) ||
+	  jas_iccgetuint8(in, &lut16->numoutchans) ||
+	  jas_iccgetuint8(in, &lut16->clutlen) ||
+	  jas_stream_getc(in) == EOF)
+		goto error;
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 3; ++j) {
+			if (jas_iccgetsint32(in, &lut16->e[i][j]))
+				goto error;
+		}
+	}
+	if (jas_iccgetuint16(in, &lut16->numintabents) ||
+	  jas_iccgetuint16(in, &lut16->numouttabents))
+		goto error;
+	clutsize = jas_iccpowi(lut16->clutlen, lut16->numinchans) * lut16->numoutchans;
+	if (!(lut16->clut = jas_malloc(clutsize * sizeof(jas_iccuint16_t))) ||
+	  !(lut16->intabsbuf = jas_malloc(lut16->numinchans *
+	  lut16->numintabents * sizeof(jas_iccuint16_t))) ||
+	  !(lut16->intabs = jas_malloc(lut16->numinchans *
+	  sizeof(jas_iccuint16_t *))))
+		goto error;
+	for (i = 0; i < lut16->numinchans; ++i)
+		lut16->intabs[i] = &lut16->intabsbuf[i * lut16->numintabents];
+	if (!(lut16->outtabsbuf = jas_malloc(lut16->numoutchans *
+	  lut16->numouttabents * sizeof(jas_iccuint16_t))) ||
+	  !(lut16->outtabs = jas_malloc(lut16->numoutchans *
+	  sizeof(jas_iccuint16_t *))))
+		goto error;
+	for (i = 0; i < lut16->numoutchans; ++i)
+		lut16->outtabs[i] = &lut16->outtabsbuf[i * lut16->numouttabents];
+	for (i = 0; i < lut16->numinchans; ++i) {
+		for (j = 0; j < JAS_CAST(int, lut16->numintabents); ++j) {
+			if (jas_iccgetuint16(in, &lut16->intabs[i][j]))
+				goto error;
+		}
+	}
+	for (i = 0; i < lut16->numoutchans; ++i) {
+		for (j = 0; j < JAS_CAST(int, lut16->numouttabents); ++j) {
+			if (jas_iccgetuint16(in, &lut16->outtabs[i][j]))
+				goto error;
+		}
+	}
+	for (i = 0; i < clutsize; ++i) {
+		if (jas_iccgetuint16(in, &lut16->clut[i]))
+			goto error;
+	}
+	if (JAS_CAST(int, 44 + 2 * (lut16->numinchans * lut16->numintabents +
+          lut16->numoutchans * lut16->numouttabents +
+          jas_iccpowi(lut16->clutlen, lut16->numinchans) *
+	  lut16->numoutchans)) != cnt)
+		goto error;
+	return 0;
+error:
+	jas_icclut16_destroy(attrval);
+	return -1;
+}
+
+static int jas_icclut16_getsize(jas_iccattrval_t *attrval)
+{
+	jas_icclut16_t *lut16 = &attrval->data.lut16;
+	return 44 + 2 * (lut16->numinchans * lut16->numintabents +
+	  lut16->numoutchans * lut16->numouttabents +
+	  jas_iccpowi(lut16->clutlen, lut16->numinchans) * lut16->numoutchans);
+}
+
+static int jas_icclut16_output(jas_iccattrval_t *attrval, jas_stream_t *out)
+{
+	jas_icclut16_t *lut16 = &attrval->data.lut16;
+	int i;
+	int j;
+	int n;
+	if (jas_stream_putc(out, lut16->numinchans) == EOF ||
+	  jas_stream_putc(out, lut16->numoutchans) == EOF ||
+	  jas_stream_putc(out, lut16->clutlen) == EOF ||
+	  jas_stream_putc(out, 0) == EOF)
+		goto error;
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 3; ++j) {
+			if (jas_iccputsint32(out, lut16->e[i][j]))
+				goto error;
+		}
+	}
+	if (jas_iccputuint16(out, lut16->numintabents) ||
+	  jas_iccputuint16(out, lut16->numouttabents))
+		goto error;
+	n = lut16->numinchans * lut16->numintabents;
+	for (i = 0; i < n; ++i) {
+		if (jas_iccputuint16(out, lut16->intabsbuf[i]))
+			goto error;
+	}
+	n = lut16->numoutchans * lut16->numouttabents;
+	for (i = 0; i < n; ++i) {
+		if (jas_iccputuint16(out, lut16->outtabsbuf[i]))
+			goto error;
+	}
+	n = jas_iccpowi(lut16->clutlen, lut16->numinchans) * lut16->numoutchans;
+	for (i = 0; i < n; ++i) {
+		if (jas_iccputuint16(out, lut16->clut[i]))
+			goto error;
+	}
+	return 0;
+error:
+	return -1;
+}
+
+static void jas_icclut16_dump(jas_iccattrval_t *attrval, FILE *out)
+{
+	jas_icclut16_t *lut16 = &attrval->data.lut16;
+	int i;
+	int j;
+	fprintf(out, "numinchans=%d, numoutchans=%d, clutlen=%d\n",
+	  lut16->numinchans, lut16->numoutchans, lut16->clutlen);
+	for (i = 0; i < 3; ++i) {
+		for (j = 0; j < 3; ++j) {
+			fprintf(out, "e[%d][%d]=%f ", i, j, lut16->e[i][j] / 65536.0);
+		}
+		fprintf(out, "\n");
+	}
+	fprintf(out, "numintabents=%d, numouttabents=%d\n",
+	  lut16->numintabents, lut16->numouttabents);
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static int jas_iccgetuint(jas_stream_t *in, int n, ulonglong *val)
+{
+	int i;
+	int c;
+	ulonglong v;
+	v = 0;
+	for (i = n; i > 0; --i) {
+		if ((c = jas_stream_getc(in)) == EOF)
+			return -1;
+		v = (v << 8) | c;
+	}
+	*val = v;
+	return 0;
+}
+
+static int jas_iccgetuint8(jas_stream_t *in, jas_iccuint8_t *val)
+{
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF)
+		return -1;
+	*val = c;
+	return 0;
+}
+
+static int jas_iccgetuint16(jas_stream_t *in, jas_iccuint16_t *val)
+{
+	ulonglong tmp;
+	if (jas_iccgetuint(in, 2, &tmp))
+		return -1;
+	*val = tmp;
+	return 0;
+}
+
+static int jas_iccgetsint32(jas_stream_t *in, jas_iccsint32_t *val)
+{
+	ulonglong tmp;
+	if (jas_iccgetuint(in, 4, &tmp))
+		return -1;
+	*val = (tmp & 0x80000000) ? (-JAS_CAST(longlong, (((~tmp) &
+	  0x7fffffff) + 1))) : JAS_CAST(longlong, tmp);
+	return 0;
+}
+
+static int jas_iccgetuint32(jas_stream_t *in, jas_iccuint32_t *val)
+{
+	ulonglong tmp;
+	if (jas_iccgetuint(in, 4, &tmp))
+		return -1;
+	*val = tmp;
+	return 0;
+}
+
+static int jas_iccgetuint64(jas_stream_t *in, jas_iccuint64_t *val)
+{
+	ulonglong tmp;
+	if (jas_iccgetuint(in, 8, &tmp))
+		return -1;
+	*val = tmp;
+	return 0;
+}
+
+static int jas_iccputuint(jas_stream_t *out, int n, ulonglong val)
+{
+	int i;
+	int c;
+	for (i = n; i > 0; --i) {
+		c = (val >> (8 * (i - 1))) & 0xff;
+		if (jas_stream_putc(out, c) == EOF)
+			return -1;
+	}
+	return 0;
+}
+
+static int jas_iccputsint(jas_stream_t *out, int n, longlong val)
+{
+	ulonglong tmp;
+	tmp = (val < 0) ? (abort(), 0) : val;
+	return jas_iccputuint(out, n, tmp);
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static char *jas_iccsigtostr(int sig, char *buf)
+{
+	int n;
+	int c;
+	char *bufptr;
+	bufptr = buf;
+	for (n = 4; n > 0; --n) {
+		c = (sig >> 24) & 0xff;
+		if (isalpha(c) || isdigit(c)) {
+			*bufptr++ = c;
+		}
+		sig <<= 8;
+	}
+	*bufptr = '\0';
+	return buf;
+}
+
+static long jas_iccpadtomult(long x, long y)
+{
+	return ((x + y - 1) / y) * y;
+}
+
+static long jas_iccpowi(int x, int n)
+{
+	long y;
+	y = 1;
+	while (--n >= 0)
+		y *= x;
+	return y;
+}
+
+
+jas_iccprof_t *jas_iccprof_createfrombuf(uchar *buf, int len)
+{
+	jas_stream_t *in;
+	jas_iccprof_t *prof;
+	if (!(in = jas_stream_memopen(JAS_CAST(char *, buf), len)))
+		goto error;
+	if (!(prof = jas_iccprof_load(in)))
+		goto error;
+	jas_stream_close(in);
+	return prof;
+error:
+	return 0;
+}
+
+jas_iccprof_t *jas_iccprof_createfromclrspc(int clrspc)
+{
+	jas_iccprof_t *prof;
+	switch (clrspc) {
+	case JAS_CLRSPC_SRGB:
+		prof = jas_iccprof_createfrombuf(jas_iccprofdata_srgb,
+		  jas_iccprofdata_srgblen);
+		break;
+	case JAS_CLRSPC_SGRAY:
+		prof = jas_iccprof_createfrombuf(jas_iccprofdata_sgray,
+		  jas_iccprofdata_sgraylen);
+		break;
+	default:
+		prof = 0;
+		break;
+	}
+	return prof;
+}
diff --git a/src/libjasper/base/jas_iccdata.c b/src/libjasper/base/jas_iccdata.c
new file mode 100644
index 0000000..bf68bf0
--- /dev/null
+++ b/src/libjasper/base/jas_iccdata.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright (c) 2002-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#include <jasper/jas_config.h>
+#include <jasper/jas_types.h>
+
+uchar jas_iccprofdata_srgb[] =
+{
+	0x00, 0x00, 0x0c, 0x48, 0x4c, 0x69, 0x6e, 0x6f,
+	0x02, 0x10, 0x00, 0x00, 0x6d, 0x6e, 0x74, 0x72,
+	0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+	0x07, 0xce, 0x00, 0x02, 0x00, 0x09, 0x00, 0x06,
+	0x00, 0x31, 0x00, 0x00, 0x61, 0x63, 0x73, 0x70,
+	0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00,
+	0x49, 0x45, 0x43, 0x20, 0x73, 0x52, 0x47, 0x42,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+	0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d,
+	0x48, 0x50, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x11, 0x63, 0x70, 0x72, 0x74,
+	0x00, 0x00, 0x01, 0x50, 0x00, 0x00, 0x00, 0x33,
+	0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x01, 0x84,
+	0x00, 0x00, 0x00, 0x6c, 0x77, 0x74, 0x70, 0x74,
+	0x00, 0x00, 0x01, 0xf0, 0x00, 0x00, 0x00, 0x14,
+	0x62, 0x6b, 0x70, 0x74, 0x00, 0x00, 0x02, 0x04,
+	0x00, 0x00, 0x00, 0x14, 0x72, 0x58, 0x59, 0x5a,
+	0x00, 0x00, 0x02, 0x18, 0x00, 0x00, 0x00, 0x14,
+	0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0x2c,
+	0x00, 0x00, 0x00, 0x14, 0x62, 0x58, 0x59, 0x5a,
+	0x00, 0x00, 0x02, 0x40, 0x00, 0x00, 0x00, 0x14,
+	0x64, 0x6d, 0x6e, 0x64, 0x00, 0x00, 0x02, 0x54,
+	0x00, 0x00, 0x00, 0x70, 0x64, 0x6d, 0x64, 0x64,
+	0x00, 0x00, 0x02, 0xc4, 0x00, 0x00, 0x00, 0x88,
+	0x76, 0x75, 0x65, 0x64, 0x00, 0x00, 0x03, 0x4c,
+	0x00, 0x00, 0x00, 0x86, 0x76, 0x69, 0x65, 0x77,
+	0x00, 0x00, 0x03, 0xd4, 0x00, 0x00, 0x00, 0x24,
+	0x6c, 0x75, 0x6d, 0x69, 0x00, 0x00, 0x03, 0xf8,
+	0x00, 0x00, 0x00, 0x14, 0x6d, 0x65, 0x61, 0x73,
+	0x00, 0x00, 0x04, 0x0c, 0x00, 0x00, 0x00, 0x24,
+	0x74, 0x65, 0x63, 0x68, 0x00, 0x00, 0x04, 0x30,
+	0x00, 0x00, 0x00, 0x0c, 0x72, 0x54, 0x52, 0x43,
+	0x00, 0x00, 0x04, 0x3c, 0x00, 0x00, 0x08, 0x0c,
+	0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x04, 0x3c,
+	0x00, 0x00, 0x08, 0x0c, 0x62, 0x54, 0x52, 0x43,
+	0x00, 0x00, 0x04, 0x3c, 0x00, 0x00, 0x08, 0x0c,
+	0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00,
+	0x43, 0x6f, 0x70, 0x79, 0x72, 0x69, 0x67, 0x68,
+	0x74, 0x20, 0x28, 0x63, 0x29, 0x20, 0x31, 0x39,
+	0x39, 0x38, 0x20, 0x48, 0x65, 0x77, 0x6c, 0x65,
+	0x74, 0x74, 0x2d, 0x50, 0x61, 0x63, 0x6b, 0x61,
+	0x72, 0x64, 0x20, 0x43, 0x6f, 0x6d, 0x70, 0x61,
+	0x6e, 0x79, 0x00, 0x00, 0x64, 0x65, 0x73, 0x63,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12,
+	0x73, 0x52, 0x47, 0x42, 0x20, 0x49, 0x45, 0x43,
+	0x36, 0x31, 0x39, 0x36, 0x36, 0x2d, 0x32, 0x2e,
+	0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x12, 0x73, 0x52, 0x47,
+	0x42, 0x20, 0x49, 0x45, 0x43, 0x36, 0x31, 0x39,
+	0x36, 0x36, 0x2d, 0x32, 0x2e, 0x31, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0xf3, 0x51, 0x00, 0x01, 0x00, 0x00,
+	0x00, 0x01, 0x16, 0xcc, 0x58, 0x59, 0x5a, 0x20,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x6f, 0xa2, 0x00, 0x00, 0x38, 0xf5,
+	0x00, 0x00, 0x03, 0x90, 0x58, 0x59, 0x5a, 0x20,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x99,
+	0x00, 0x00, 0xb7, 0x85, 0x00, 0x00, 0x18, 0xda,
+	0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x24, 0xa0, 0x00, 0x00, 0x0f, 0x84,
+	0x00, 0x00, 0xb6, 0xcf, 0x64, 0x65, 0x73, 0x63,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16,
+	0x49, 0x45, 0x43, 0x20, 0x68, 0x74, 0x74, 0x70,
+	0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e, 0x69,
+	0x65, 0x63, 0x2e, 0x63, 0x68, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x16, 0x49, 0x45, 0x43, 0x20, 0x68, 0x74, 0x74,
+	0x70, 0x3a, 0x2f, 0x2f, 0x77, 0x77, 0x77, 0x2e,
+	0x69, 0x65, 0x63, 0x2e, 0x63, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x64, 0x65, 0x73, 0x63,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e,
+	0x49, 0x45, 0x43, 0x20, 0x36, 0x31, 0x39, 0x36,
+	0x36, 0x2d, 0x32, 0x2e, 0x31, 0x20, 0x44, 0x65,
+	0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x52, 0x47,
+	0x42, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x75, 0x72,
+	0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x20, 0x2d,
+	0x20, 0x73, 0x52, 0x47, 0x42, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x2e, 0x49, 0x45, 0x43, 0x20, 0x36, 0x31, 0x39,
+	0x36, 0x36, 0x2d, 0x32, 0x2e, 0x31, 0x20, 0x44,
+	0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x20, 0x52,
+	0x47, 0x42, 0x20, 0x63, 0x6f, 0x6c, 0x6f, 0x75,
+	0x72, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x20,
+	0x2d, 0x20, 0x73, 0x52, 0x47, 0x42, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x64, 0x65, 0x73, 0x63,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c,
+	0x52, 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63,
+	0x65, 0x20, 0x56, 0x69, 0x65, 0x77, 0x69, 0x6e,
+	0x67, 0x20, 0x43, 0x6f, 0x6e, 0x64, 0x69, 0x74,
+	0x69, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x49,
+	0x45, 0x43, 0x36, 0x31, 0x39, 0x36, 0x36, 0x2d,
+	0x32, 0x2e, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x52,
+	0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
+	0x20, 0x56, 0x69, 0x65, 0x77, 0x69, 0x6e, 0x67,
+	0x20, 0x43, 0x6f, 0x6e, 0x64, 0x69, 0x74, 0x69,
+	0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x49, 0x45,
+	0x43, 0x36, 0x31, 0x39, 0x36, 0x36, 0x2d, 0x32,
+	0x2e, 0x31, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x76, 0x69, 0x65, 0x77,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0xa4, 0xfe,
+	0x00, 0x14, 0x5f, 0x2e, 0x00, 0x10, 0xcf, 0x14,
+	0x00, 0x03, 0xed, 0xcc, 0x00, 0x04, 0x13, 0x0b,
+	0x00, 0x03, 0x5c, 0x9e, 0x00, 0x00, 0x00, 0x01,
+	0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x4c, 0x09, 0x56, 0x00, 0x50, 0x00, 0x00,
+	0x00, 0x57, 0x1f, 0xe7, 0x6d, 0x65, 0x61, 0x73,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x02, 0x8f, 0x00, 0x00, 0x00, 0x02,
+	0x73, 0x69, 0x67, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x43, 0x52, 0x54, 0x20, 0x63, 0x75, 0x72, 0x76,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+	0x00, 0x00, 0x00, 0x05, 0x00, 0x0a, 0x00, 0x0f,
+	0x00, 0x14, 0x00, 0x19, 0x00, 0x1e, 0x00, 0x23,
+	0x00, 0x28, 0x00, 0x2d, 0x00, 0x32, 0x00, 0x37,
+	0x00, 0x3b, 0x00, 0x40, 0x00, 0x45, 0x00, 0x4a,
+	0x00, 0x4f, 0x00, 0x54, 0x00, 0x59, 0x00, 0x5e,
+	0x00, 0x63, 0x00, 0x68, 0x00, 0x6d, 0x00, 0x72,
+	0x00, 0x77, 0x00, 0x7c, 0x00, 0x81, 0x00, 0x86,
+	0x00, 0x8b, 0x00, 0x90, 0x00, 0x95, 0x00, 0x9a,
+	0x00, 0x9f, 0x00, 0xa4, 0x00, 0xa9, 0x00, 0xae,
+	0x00, 0xb2, 0x00, 0xb7, 0x00, 0xbc, 0x00, 0xc1,
+	0x00, 0xc6, 0x00, 0xcb, 0x00, 0xd0, 0x00, 0xd5,
+	0x00, 0xdb, 0x00, 0xe0, 0x00, 0xe5, 0x00, 0xeb,
+	0x00, 0xf0, 0x00, 0xf6, 0x00, 0xfb, 0x01, 0x01,
+	0x01, 0x07, 0x01, 0x0d, 0x01, 0x13, 0x01, 0x19,
+	0x01, 0x1f, 0x01, 0x25, 0x01, 0x2b, 0x01, 0x32,
+	0x01, 0x38, 0x01, 0x3e, 0x01, 0x45, 0x01, 0x4c,
+	0x01, 0x52, 0x01, 0x59, 0x01, 0x60, 0x01, 0x67,
+	0x01, 0x6e, 0x01, 0x75, 0x01, 0x7c, 0x01, 0x83,
+	0x01, 0x8b, 0x01, 0x92, 0x01, 0x9a, 0x01, 0xa1,
+	0x01, 0xa9, 0x01, 0xb1, 0x01, 0xb9, 0x01, 0xc1,
+	0x01, 0xc9, 0x01, 0xd1, 0x01, 0xd9, 0x01, 0xe1,
+	0x01, 0xe9, 0x01, 0xf2, 0x01, 0xfa, 0x02, 0x03,
+	0x02, 0x0c, 0x02, 0x14, 0x02, 0x1d, 0x02, 0x26,
+	0x02, 0x2f, 0x02, 0x38, 0x02, 0x41, 0x02, 0x4b,
+	0x02, 0x54, 0x02, 0x5d, 0x02, 0x67, 0x02, 0x71,
+	0x02, 0x7a, 0x02, 0x84, 0x02, 0x8e, 0x02, 0x98,
+	0x02, 0xa2, 0x02, 0xac, 0x02, 0xb6, 0x02, 0xc1,
+	0x02, 0xcb, 0x02, 0xd5, 0x02, 0xe0, 0x02, 0xeb,
+	0x02, 0xf5, 0x03, 0x00, 0x03, 0x0b, 0x03, 0x16,
+	0x03, 0x21, 0x03, 0x2d, 0x03, 0x38, 0x03, 0x43,
+	0x03, 0x4f, 0x03, 0x5a, 0x03, 0x66, 0x03, 0x72,
+	0x03, 0x7e, 0x03, 0x8a, 0x03, 0x96, 0x03, 0xa2,
+	0x03, 0xae, 0x03, 0xba, 0x03, 0xc7, 0x03, 0xd3,
+	0x03, 0xe0, 0x03, 0xec, 0x03, 0xf9, 0x04, 0x06,
+	0x04, 0x13, 0x04, 0x20, 0x04, 0x2d, 0x04, 0x3b,
+	0x04, 0x48, 0x04, 0x55, 0x04, 0x63, 0x04, 0x71,
+	0x04, 0x7e, 0x04, 0x8c, 0x04, 0x9a, 0x04, 0xa8,
+	0x04, 0xb6, 0x04, 0xc4, 0x04, 0xd3, 0x04, 0xe1,
+	0x04, 0xf0, 0x04, 0xfe, 0x05, 0x0d, 0x05, 0x1c,
+	0x05, 0x2b, 0x05, 0x3a, 0x05, 0x49, 0x05, 0x58,
+	0x05, 0x67, 0x05, 0x77, 0x05, 0x86, 0x05, 0x96,
+	0x05, 0xa6, 0x05, 0xb5, 0x05, 0xc5, 0x05, 0xd5,
+	0x05, 0xe5, 0x05, 0xf6, 0x06, 0x06, 0x06, 0x16,
+	0x06, 0x27, 0x06, 0x37, 0x06, 0x48, 0x06, 0x59,
+	0x06, 0x6a, 0x06, 0x7b, 0x06, 0x8c, 0x06, 0x9d,
+	0x06, 0xaf, 0x06, 0xc0, 0x06, 0xd1, 0x06, 0xe3,
+	0x06, 0xf5, 0x07, 0x07, 0x07, 0x19, 0x07, 0x2b,
+	0x07, 0x3d, 0x07, 0x4f, 0x07, 0x61, 0x07, 0x74,
+	0x07, 0x86, 0x07, 0x99, 0x07, 0xac, 0x07, 0xbf,
+	0x07, 0xd2, 0x07, 0xe5, 0x07, 0xf8, 0x08, 0x0b,
+	0x08, 0x1f, 0x08, 0x32, 0x08, 0x46, 0x08, 0x5a,
+	0x08, 0x6e, 0x08, 0x82, 0x08, 0x96, 0x08, 0xaa,
+	0x08, 0xbe, 0x08, 0xd2, 0x08, 0xe7, 0x08, 0xfb,
+	0x09, 0x10, 0x09, 0x25, 0x09, 0x3a, 0x09, 0x4f,
+	0x09, 0x64, 0x09, 0x79, 0x09, 0x8f, 0x09, 0xa4,
+	0x09, 0xba, 0x09, 0xcf, 0x09, 0xe5, 0x09, 0xfb,
+	0x0a, 0x11, 0x0a, 0x27, 0x0a, 0x3d, 0x0a, 0x54,
+	0x0a, 0x6a, 0x0a, 0x81, 0x0a, 0x98, 0x0a, 0xae,
+	0x0a, 0xc5, 0x0a, 0xdc, 0x0a, 0xf3, 0x0b, 0x0b,
+	0x0b, 0x22, 0x0b, 0x39, 0x0b, 0x51, 0x0b, 0x69,
+	0x0b, 0x80, 0x0b, 0x98, 0x0b, 0xb0, 0x0b, 0xc8,
+	0x0b, 0xe1, 0x0b, 0xf9, 0x0c, 0x12, 0x0c, 0x2a,
+	0x0c, 0x43, 0x0c, 0x5c, 0x0c, 0x75, 0x0c, 0x8e,
+	0x0c, 0xa7, 0x0c, 0xc0, 0x0c, 0xd9, 0x0c, 0xf3,
+	0x0d, 0x0d, 0x0d, 0x26, 0x0d, 0x40, 0x0d, 0x5a,
+	0x0d, 0x74, 0x0d, 0x8e, 0x0d, 0xa9, 0x0d, 0xc3,
+	0x0d, 0xde, 0x0d, 0xf8, 0x0e, 0x13, 0x0e, 0x2e,
+	0x0e, 0x49, 0x0e, 0x64, 0x0e, 0x7f, 0x0e, 0x9b,
+	0x0e, 0xb6, 0x0e, 0xd2, 0x0e, 0xee, 0x0f, 0x09,
+	0x0f, 0x25, 0x0f, 0x41, 0x0f, 0x5e, 0x0f, 0x7a,
+	0x0f, 0x96, 0x0f, 0xb3, 0x0f, 0xcf, 0x0f, 0xec,
+	0x10, 0x09, 0x10, 0x26, 0x10, 0x43, 0x10, 0x61,
+	0x10, 0x7e, 0x10, 0x9b, 0x10, 0xb9, 0x10, 0xd7,
+	0x10, 0xf5, 0x11, 0x13, 0x11, 0x31, 0x11, 0x4f,
+	0x11, 0x6d, 0x11, 0x8c, 0x11, 0xaa, 0x11, 0xc9,
+	0x11, 0xe8, 0x12, 0x07, 0x12, 0x26, 0x12, 0x45,
+	0x12, 0x64, 0x12, 0x84, 0x12, 0xa3, 0x12, 0xc3,
+	0x12, 0xe3, 0x13, 0x03, 0x13, 0x23, 0x13, 0x43,
+	0x13, 0x63, 0x13, 0x83, 0x13, 0xa4, 0x13, 0xc5,
+	0x13, 0xe5, 0x14, 0x06, 0x14, 0x27, 0x14, 0x49,
+	0x14, 0x6a, 0x14, 0x8b, 0x14, 0xad, 0x14, 0xce,
+	0x14, 0xf0, 0x15, 0x12, 0x15, 0x34, 0x15, 0x56,
+	0x15, 0x78, 0x15, 0x9b, 0x15, 0xbd, 0x15, 0xe0,
+	0x16, 0x03, 0x16, 0x26, 0x16, 0x49, 0x16, 0x6c,
+	0x16, 0x8f, 0x16, 0xb2, 0x16, 0xd6, 0x16, 0xfa,
+	0x17, 0x1d, 0x17, 0x41, 0x17, 0x65, 0x17, 0x89,
+	0x17, 0xae, 0x17, 0xd2, 0x17, 0xf7, 0x18, 0x1b,
+	0x18, 0x40, 0x18, 0x65, 0x18, 0x8a, 0x18, 0xaf,
+	0x18, 0xd5, 0x18, 0xfa, 0x19, 0x20, 0x19, 0x45,
+	0x19, 0x6b, 0x19, 0x91, 0x19, 0xb7, 0x19, 0xdd,
+	0x1a, 0x04, 0x1a, 0x2a, 0x1a, 0x51, 0x1a, 0x77,
+	0x1a, 0x9e, 0x1a, 0xc5, 0x1a, 0xec, 0x1b, 0x14,
+	0x1b, 0x3b, 0x1b, 0x63, 0x1b, 0x8a, 0x1b, 0xb2,
+	0x1b, 0xda, 0x1c, 0x02, 0x1c, 0x2a, 0x1c, 0x52,
+	0x1c, 0x7b, 0x1c, 0xa3, 0x1c, 0xcc, 0x1c, 0xf5,
+	0x1d, 0x1e, 0x1d, 0x47, 0x1d, 0x70, 0x1d, 0x99,
+	0x1d, 0xc3, 0x1d, 0xec, 0x1e, 0x16, 0x1e, 0x40,
+	0x1e, 0x6a, 0x1e, 0x94, 0x1e, 0xbe, 0x1e, 0xe9,
+	0x1f, 0x13, 0x1f, 0x3e, 0x1f, 0x69, 0x1f, 0x94,
+	0x1f, 0xbf, 0x1f, 0xea, 0x20, 0x15, 0x20, 0x41,
+	0x20, 0x6c, 0x20, 0x98, 0x20, 0xc4, 0x20, 0xf0,
+	0x21, 0x1c, 0x21, 0x48, 0x21, 0x75, 0x21, 0xa1,
+	0x21, 0xce, 0x21, 0xfb, 0x22, 0x27, 0x22, 0x55,
+	0x22, 0x82, 0x22, 0xaf, 0x22, 0xdd, 0x23, 0x0a,
+	0x23, 0x38, 0x23, 0x66, 0x23, 0x94, 0x23, 0xc2,
+	0x23, 0xf0, 0x24, 0x1f, 0x24, 0x4d, 0x24, 0x7c,
+	0x24, 0xab, 0x24, 0xda, 0x25, 0x09, 0x25, 0x38,
+	0x25, 0x68, 0x25, 0x97, 0x25, 0xc7, 0x25, 0xf7,
+	0x26, 0x27, 0x26, 0x57, 0x26, 0x87, 0x26, 0xb7,
+	0x26, 0xe8, 0x27, 0x18, 0x27, 0x49, 0x27, 0x7a,
+	0x27, 0xab, 0x27, 0xdc, 0x28, 0x0d, 0x28, 0x3f,
+	0x28, 0x71, 0x28, 0xa2, 0x28, 0xd4, 0x29, 0x06,
+	0x29, 0x38, 0x29, 0x6b, 0x29, 0x9d, 0x29, 0xd0,
+	0x2a, 0x02, 0x2a, 0x35, 0x2a, 0x68, 0x2a, 0x9b,
+	0x2a, 0xcf, 0x2b, 0x02, 0x2b, 0x36, 0x2b, 0x69,
+	0x2b, 0x9d, 0x2b, 0xd1, 0x2c, 0x05, 0x2c, 0x39,
+	0x2c, 0x6e, 0x2c, 0xa2, 0x2c, 0xd7, 0x2d, 0x0c,
+	0x2d, 0x41, 0x2d, 0x76, 0x2d, 0xab, 0x2d, 0xe1,
+	0x2e, 0x16, 0x2e, 0x4c, 0x2e, 0x82, 0x2e, 0xb7,
+	0x2e, 0xee, 0x2f, 0x24, 0x2f, 0x5a, 0x2f, 0x91,
+	0x2f, 0xc7, 0x2f, 0xfe, 0x30, 0x35, 0x30, 0x6c,
+	0x30, 0xa4, 0x30, 0xdb, 0x31, 0x12, 0x31, 0x4a,
+	0x31, 0x82, 0x31, 0xba, 0x31, 0xf2, 0x32, 0x2a,
+	0x32, 0x63, 0x32, 0x9b, 0x32, 0xd4, 0x33, 0x0d,
+	0x33, 0x46, 0x33, 0x7f, 0x33, 0xb8, 0x33, 0xf1,
+	0x34, 0x2b, 0x34, 0x65, 0x34, 0x9e, 0x34, 0xd8,
+	0x35, 0x13, 0x35, 0x4d, 0x35, 0x87, 0x35, 0xc2,
+	0x35, 0xfd, 0x36, 0x37, 0x36, 0x72, 0x36, 0xae,
+	0x36, 0xe9, 0x37, 0x24, 0x37, 0x60, 0x37, 0x9c,
+	0x37, 0xd7, 0x38, 0x14, 0x38, 0x50, 0x38, 0x8c,
+	0x38, 0xc8, 0x39, 0x05, 0x39, 0x42, 0x39, 0x7f,
+	0x39, 0xbc, 0x39, 0xf9, 0x3a, 0x36, 0x3a, 0x74,
+	0x3a, 0xb2, 0x3a, 0xef, 0x3b, 0x2d, 0x3b, 0x6b,
+	0x3b, 0xaa, 0x3b, 0xe8, 0x3c, 0x27, 0x3c, 0x65,
+	0x3c, 0xa4, 0x3c, 0xe3, 0x3d, 0x22, 0x3d, 0x61,
+	0x3d, 0xa1, 0x3d, 0xe0, 0x3e, 0x20, 0x3e, 0x60,
+	0x3e, 0xa0, 0x3e, 0xe0, 0x3f, 0x21, 0x3f, 0x61,
+	0x3f, 0xa2, 0x3f, 0xe2, 0x40, 0x23, 0x40, 0x64,
+	0x40, 0xa6, 0x40, 0xe7, 0x41, 0x29, 0x41, 0x6a,
+	0x41, 0xac, 0x41, 0xee, 0x42, 0x30, 0x42, 0x72,
+	0x42, 0xb5, 0x42, 0xf7, 0x43, 0x3a, 0x43, 0x7d,
+	0x43, 0xc0, 0x44, 0x03, 0x44, 0x47, 0x44, 0x8a,
+	0x44, 0xce, 0x45, 0x12, 0x45, 0x55, 0x45, 0x9a,
+	0x45, 0xde, 0x46, 0x22, 0x46, 0x67, 0x46, 0xab,
+	0x46, 0xf0, 0x47, 0x35, 0x47, 0x7b, 0x47, 0xc0,
+	0x48, 0x05, 0x48, 0x4b, 0x48, 0x91, 0x48, 0xd7,
+	0x49, 0x1d, 0x49, 0x63, 0x49, 0xa9, 0x49, 0xf0,
+	0x4a, 0x37, 0x4a, 0x7d, 0x4a, 0xc4, 0x4b, 0x0c,
+	0x4b, 0x53, 0x4b, 0x9a, 0x4b, 0xe2, 0x4c, 0x2a,
+	0x4c, 0x72, 0x4c, 0xba, 0x4d, 0x02, 0x4d, 0x4a,
+	0x4d, 0x93, 0x4d, 0xdc, 0x4e, 0x25, 0x4e, 0x6e,
+	0x4e, 0xb7, 0x4f, 0x00, 0x4f, 0x49, 0x4f, 0x93,
+	0x4f, 0xdd, 0x50, 0x27, 0x50, 0x71, 0x50, 0xbb,
+	0x51, 0x06, 0x51, 0x50, 0x51, 0x9b, 0x51, 0xe6,
+	0x52, 0x31, 0x52, 0x7c, 0x52, 0xc7, 0x53, 0x13,
+	0x53, 0x5f, 0x53, 0xaa, 0x53, 0xf6, 0x54, 0x42,
+	0x54, 0x8f, 0x54, 0xdb, 0x55, 0x28, 0x55, 0x75,
+	0x55, 0xc2, 0x56, 0x0f, 0x56, 0x5c, 0x56, 0xa9,
+	0x56, 0xf7, 0x57, 0x44, 0x57, 0x92, 0x57, 0xe0,
+	0x58, 0x2f, 0x58, 0x7d, 0x58, 0xcb, 0x59, 0x1a,
+	0x59, 0x69, 0x59, 0xb8, 0x5a, 0x07, 0x5a, 0x56,
+	0x5a, 0xa6, 0x5a, 0xf5, 0x5b, 0x45, 0x5b, 0x95,
+	0x5b, 0xe5, 0x5c, 0x35, 0x5c, 0x86, 0x5c, 0xd6,
+	0x5d, 0x27, 0x5d, 0x78, 0x5d, 0xc9, 0x5e, 0x1a,
+	0x5e, 0x6c, 0x5e, 0xbd, 0x5f, 0x0f, 0x5f, 0x61,
+	0x5f, 0xb3, 0x60, 0x05, 0x60, 0x57, 0x60, 0xaa,
+	0x60, 0xfc, 0x61, 0x4f, 0x61, 0xa2, 0x61, 0xf5,
+	0x62, 0x49, 0x62, 0x9c, 0x62, 0xf0, 0x63, 0x43,
+	0x63, 0x97, 0x63, 0xeb, 0x64, 0x40, 0x64, 0x94,
+	0x64, 0xe9, 0x65, 0x3d, 0x65, 0x92, 0x65, 0xe7,
+	0x66, 0x3d, 0x66, 0x92, 0x66, 0xe8, 0x67, 0x3d,
+	0x67, 0x93, 0x67, 0xe9, 0x68, 0x3f, 0x68, 0x96,
+	0x68, 0xec, 0x69, 0x43, 0x69, 0x9a, 0x69, 0xf1,
+	0x6a, 0x48, 0x6a, 0x9f, 0x6a, 0xf7, 0x6b, 0x4f,
+	0x6b, 0xa7, 0x6b, 0xff, 0x6c, 0x57, 0x6c, 0xaf,
+	0x6d, 0x08, 0x6d, 0x60, 0x6d, 0xb9, 0x6e, 0x12,
+	0x6e, 0x6b, 0x6e, 0xc4, 0x6f, 0x1e, 0x6f, 0x78,
+	0x6f, 0xd1, 0x70, 0x2b, 0x70, 0x86, 0x70, 0xe0,
+	0x71, 0x3a, 0x71, 0x95, 0x71, 0xf0, 0x72, 0x4b,
+	0x72, 0xa6, 0x73, 0x01, 0x73, 0x5d, 0x73, 0xb8,
+	0x74, 0x14, 0x74, 0x70, 0x74, 0xcc, 0x75, 0x28,
+	0x75, 0x85, 0x75, 0xe1, 0x76, 0x3e, 0x76, 0x9b,
+	0x76, 0xf8, 0x77, 0x56, 0x77, 0xb3, 0x78, 0x11,
+	0x78, 0x6e, 0x78, 0xcc, 0x79, 0x2a, 0x79, 0x89,
+	0x79, 0xe7, 0x7a, 0x46, 0x7a, 0xa5, 0x7b, 0x04,
+	0x7b, 0x63, 0x7b, 0xc2, 0x7c, 0x21, 0x7c, 0x81,
+	0x7c, 0xe1, 0x7d, 0x41, 0x7d, 0xa1, 0x7e, 0x01,
+	0x7e, 0x62, 0x7e, 0xc2, 0x7f, 0x23, 0x7f, 0x84,
+	0x7f, 0xe5, 0x80, 0x47, 0x80, 0xa8, 0x81, 0x0a,
+	0x81, 0x6b, 0x81, 0xcd, 0x82, 0x30, 0x82, 0x92,
+	0x82, 0xf4, 0x83, 0x57, 0x83, 0xba, 0x84, 0x1d,
+	0x84, 0x80, 0x84, 0xe3, 0x85, 0x47, 0x85, 0xab,
+	0x86, 0x0e, 0x86, 0x72, 0x86, 0xd7, 0x87, 0x3b,
+	0x87, 0x9f, 0x88, 0x04, 0x88, 0x69, 0x88, 0xce,
+	0x89, 0x33, 0x89, 0x99, 0x89, 0xfe, 0x8a, 0x64,
+	0x8a, 0xca, 0x8b, 0x30, 0x8b, 0x96, 0x8b, 0xfc,
+	0x8c, 0x63, 0x8c, 0xca, 0x8d, 0x31, 0x8d, 0x98,
+	0x8d, 0xff, 0x8e, 0x66, 0x8e, 0xce, 0x8f, 0x36,
+	0x8f, 0x9e, 0x90, 0x06, 0x90, 0x6e, 0x90, 0xd6,
+	0x91, 0x3f, 0x91, 0xa8, 0x92, 0x11, 0x92, 0x7a,
+	0x92, 0xe3, 0x93, 0x4d, 0x93, 0xb6, 0x94, 0x20,
+	0x94, 0x8a, 0x94, 0xf4, 0x95, 0x5f, 0x95, 0xc9,
+	0x96, 0x34, 0x96, 0x9f, 0x97, 0x0a, 0x97, 0x75,
+	0x97, 0xe0, 0x98, 0x4c, 0x98, 0xb8, 0x99, 0x24,
+	0x99, 0x90, 0x99, 0xfc, 0x9a, 0x68, 0x9a, 0xd5,
+	0x9b, 0x42, 0x9b, 0xaf, 0x9c, 0x1c, 0x9c, 0x89,
+	0x9c, 0xf7, 0x9d, 0x64, 0x9d, 0xd2, 0x9e, 0x40,
+	0x9e, 0xae, 0x9f, 0x1d, 0x9f, 0x8b, 0x9f, 0xfa,
+	0xa0, 0x69, 0xa0, 0xd8, 0xa1, 0x47, 0xa1, 0xb6,
+	0xa2, 0x26, 0xa2, 0x96, 0xa3, 0x06, 0xa3, 0x76,
+	0xa3, 0xe6, 0xa4, 0x56, 0xa4, 0xc7, 0xa5, 0x38,
+	0xa5, 0xa9, 0xa6, 0x1a, 0xa6, 0x8b, 0xa6, 0xfd,
+	0xa7, 0x6e, 0xa7, 0xe0, 0xa8, 0x52, 0xa8, 0xc4,
+	0xa9, 0x37, 0xa9, 0xa9, 0xaa, 0x1c, 0xaa, 0x8f,
+	0xab, 0x02, 0xab, 0x75, 0xab, 0xe9, 0xac, 0x5c,
+	0xac, 0xd0, 0xad, 0x44, 0xad, 0xb8, 0xae, 0x2d,
+	0xae, 0xa1, 0xaf, 0x16, 0xaf, 0x8b, 0xb0, 0x00,
+	0xb0, 0x75, 0xb0, 0xea, 0xb1, 0x60, 0xb1, 0xd6,
+	0xb2, 0x4b, 0xb2, 0xc2, 0xb3, 0x38, 0xb3, 0xae,
+	0xb4, 0x25, 0xb4, 0x9c, 0xb5, 0x13, 0xb5, 0x8a,
+	0xb6, 0x01, 0xb6, 0x79, 0xb6, 0xf0, 0xb7, 0x68,
+	0xb7, 0xe0, 0xb8, 0x59, 0xb8, 0xd1, 0xb9, 0x4a,
+	0xb9, 0xc2, 0xba, 0x3b, 0xba, 0xb5, 0xbb, 0x2e,
+	0xbb, 0xa7, 0xbc, 0x21, 0xbc, 0x9b, 0xbd, 0x15,
+	0xbd, 0x8f, 0xbe, 0x0a, 0xbe, 0x84, 0xbe, 0xff,
+	0xbf, 0x7a, 0xbf, 0xf5, 0xc0, 0x70, 0xc0, 0xec,
+	0xc1, 0x67, 0xc1, 0xe3, 0xc2, 0x5f, 0xc2, 0xdb,
+	0xc3, 0x58, 0xc3, 0xd4, 0xc4, 0x51, 0xc4, 0xce,
+	0xc5, 0x4b, 0xc5, 0xc8, 0xc6, 0x46, 0xc6, 0xc3,
+	0xc7, 0x41, 0xc7, 0xbf, 0xc8, 0x3d, 0xc8, 0xbc,
+	0xc9, 0x3a, 0xc9, 0xb9, 0xca, 0x38, 0xca, 0xb7,
+	0xcb, 0x36, 0xcb, 0xb6, 0xcc, 0x35, 0xcc, 0xb5,
+	0xcd, 0x35, 0xcd, 0xb5, 0xce, 0x36, 0xce, 0xb6,
+	0xcf, 0x37, 0xcf, 0xb8, 0xd0, 0x39, 0xd0, 0xba,
+	0xd1, 0x3c, 0xd1, 0xbe, 0xd2, 0x3f, 0xd2, 0xc1,
+	0xd3, 0x44, 0xd3, 0xc6, 0xd4, 0x49, 0xd4, 0xcb,
+	0xd5, 0x4e, 0xd5, 0xd1, 0xd6, 0x55, 0xd6, 0xd8,
+	0xd7, 0x5c, 0xd7, 0xe0, 0xd8, 0x64, 0xd8, 0xe8,
+	0xd9, 0x6c, 0xd9, 0xf1, 0xda, 0x76, 0xda, 0xfb,
+	0xdb, 0x80, 0xdc, 0x05, 0xdc, 0x8a, 0xdd, 0x10,
+	0xdd, 0x96, 0xde, 0x1c, 0xde, 0xa2, 0xdf, 0x29,
+	0xdf, 0xaf, 0xe0, 0x36, 0xe0, 0xbd, 0xe1, 0x44,
+	0xe1, 0xcc, 0xe2, 0x53, 0xe2, 0xdb, 0xe3, 0x63,
+	0xe3, 0xeb, 0xe4, 0x73, 0xe4, 0xfc, 0xe5, 0x84,
+	0xe6, 0x0d, 0xe6, 0x96, 0xe7, 0x1f, 0xe7, 0xa9,
+	0xe8, 0x32, 0xe8, 0xbc, 0xe9, 0x46, 0xe9, 0xd0,
+	0xea, 0x5b, 0xea, 0xe5, 0xeb, 0x70, 0xeb, 0xfb,
+	0xec, 0x86, 0xed, 0x11, 0xed, 0x9c, 0xee, 0x28,
+	0xee, 0xb4, 0xef, 0x40, 0xef, 0xcc, 0xf0, 0x58,
+	0xf0, 0xe5, 0xf1, 0x72, 0xf1, 0xff, 0xf2, 0x8c,
+	0xf3, 0x19, 0xf3, 0xa7, 0xf4, 0x34, 0xf4, 0xc2,
+	0xf5, 0x50, 0xf5, 0xde, 0xf6, 0x6d, 0xf6, 0xfb,
+	0xf7, 0x8a, 0xf8, 0x19, 0xf8, 0xa8, 0xf9, 0x38,
+	0xf9, 0xc7, 0xfa, 0x57, 0xfa, 0xe7, 0xfb, 0x77,
+	0xfc, 0x07, 0xfc, 0x98, 0xfd, 0x29, 0xfd, 0xba,
+	0xfe, 0x4b, 0xfe, 0xdc, 0xff, 0x6d, 0xff, 0xff
+};
+
+int jas_iccprofdata_srgblen = sizeof(jas_iccprofdata_srgb);
+
+uchar jas_iccprofdata_sgray[] = {
+	0x00, 0x00, 0x01, 0x8a, 0x00, 0x00, 0x00, 0x00,
+	0x02, 0x20, 0x00, 0x00, 0x73, 0x63, 0x6e, 0x72,
+	0x47, 0x52, 0x41, 0x59, 0x58, 0x59, 0x5a, 0x20,
+	0x07, 0xd3, 0x00, 0x01, 0x00, 0x1f, 0x00, 0x0d,
+	0x00, 0x35, 0x00, 0x21, 0x61, 0x63, 0x73, 0x70,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+	0x4b, 0x4f, 0x44, 0x41, 0x73, 0x47, 0x72, 0x79,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+	0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d,
+	0x4a, 0x50, 0x45, 0x47, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x04, 0x64, 0x65, 0x73, 0x63,
+	0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x86,
+	0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x3c,
+	0x00, 0x00, 0x00, 0x2b, 0x77, 0x74, 0x70, 0x74,
+	0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x14,
+	0x6b, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x7c,
+	0x00, 0x00, 0x00, 0x0e, 0x64, 0x65, 0x73, 0x63,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c,
+	0x52, 0x65, 0x73, 0x74, 0x72, 0x69, 0x63, 0x74,
+	0x65, 0x64, 0x20, 0x49, 0x43, 0x43, 0x20, 0x70,
+	0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x20, 0x64,
+	0x65, 0x73, 0x63, 0x72, 0x69, 0x62, 0x69, 0x6e,
+	0x67, 0x20, 0x73, 0x52, 0x47, 0x42, 0x2d, 0x67,
+	0x72, 0x65, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74,
+	0x00, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x70, 0x79,
+	0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0x32, 0x30,
+	0x30, 0x33, 0x20, 0x73, 0x52, 0x47, 0x42, 0x2d,
+	0x67, 0x72, 0x65, 0x79, 0x20, 0x52, 0x65, 0x66,
+	0x65, 0x72, 0x65, 0x6e, 0x63, 0x65, 0x00, 0x00,
+	0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0xf3, 0x54, 0x00, 0x01, 0x00, 0x00,
+	0x00, 0x01, 0x16, 0xcf, 0x63, 0x75, 0x72, 0x76,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+	0x01, 0xcd
+};
+
+int jas_iccprofdata_sgraylen = sizeof(jas_iccprofdata_sgray);
diff --git a/src/libjasper/base/jas_image.c b/src/libjasper/base/jas_image.c
new file mode 100644
index 0000000..3f5af8c
--- /dev/null
+++ b/src/libjasper/base/jas_image.c
@@ -0,0 +1,1516 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+
+ GeoJasper revision: dima <dima@dimin.net>
+   11/07/2003 15:00 - dima - aux_buf added for j_image_t
+   22/09/2003 14:40 - dima - small correction in jas_image_writecmpt
+   2007-04-23 12:23 - dima - updated for a vector of metadata boxes
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Image Library
+ *
+ * $Id: jas_image.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "jasper/jas_math.h"
+#include "jasper/jas_image.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_string.h"
+
+/******************************************************************************\
+* GeoJasper: dima - progress functions
+\******************************************************************************/
+
+#if !defined( JAS_PROGRESS_PROC_DEFINED )
+  #define JAS_PROGRESS_PROC_DEFINED
+  jas_progress_proc_t   progress_proc   = NULL;
+  jas_test_abort_proc_t test_abort_proc = NULL;
+#endif
+
+void jas_set_progress_proc( jas_progress_proc_t new_proc ) {
+  progress_proc = new_proc;
+}
+
+void jas_do_progress( int done, int total, char *descr ) {
+  if (progress_proc != NULL) progress_proc( done, total, descr );
+}
+
+void jas_set_test_abort_proc( jas_test_abort_proc_t new_proc ) {
+  test_abort_proc = new_proc;
+}
+
+int jas_test_abort( ) {
+  if (test_abort_proc != NULL) return test_abort_proc();
+  else return 0;
+}
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+#define	FLOORDIV(x, y) ((x) / (y))
+
+/******************************************************************************\
+* Local prototypes.
+\******************************************************************************/
+
+static jas_image_cmpt_t *jas_image_cmpt_create0(void);
+static void jas_image_cmpt_destroy(jas_image_cmpt_t *cmpt);
+static jas_image_cmpt_t *jas_image_cmpt_create(uint_fast32_t tlx, uint_fast32_t tly,
+  uint_fast32_t hstep, uint_fast32_t vstep, uint_fast32_t width, uint_fast32_t
+  height, uint_fast16_t depth, bool sgnd, uint_fast32_t inmem);
+static void jas_image_setbbox(jas_image_t *image);
+static jas_image_cmpt_t *jas_image_cmpt_copy(jas_image_cmpt_t *cmpt);
+static int jas_image_growcmpts(jas_image_t *image, int maxcmpts);
+static uint_fast32_t inttobits(jas_seqent_t v, int prec, bool sgnd);
+static jas_seqent_t bitstoint(uint_fast32_t v, int prec, bool sgnd);
+static int putint(jas_stream_t *out, int sgnd, int prec, long val);
+static int getint(jas_stream_t *in, int sgnd, int prec, long *val);
+static void jas_image_calcbbox2(jas_image_t *image, jas_image_coord_t *tlx,
+  jas_image_coord_t *tly, jas_image_coord_t *brx, jas_image_coord_t *bry);
+static long uptomult(long x, long y);
+static long downtomult(long x, long y);
+static long convert(long val, int oldsgnd, int oldprec, int newsgnd,
+  int newprec);
+static void jas_image_calcbbox2(jas_image_t *image, jas_image_coord_t *tlx,
+  jas_image_coord_t *tly, jas_image_coord_t *brx, jas_image_coord_t *bry);
+
+/******************************************************************************\
+* Global data.
+\******************************************************************************/
+
+static int jas_image_numfmts = 0;
+static jas_image_fmtinfo_t jas_image_fmtinfos[JAS_IMAGE_MAXFMTS];
+
+/******************************************************************************\
+* Create and destroy operations.
+\******************************************************************************/
+
+jas_image_t *jas_image_create(int numcmpts, jas_image_cmptparm_t *cmptparms,
+  int clrspc)
+{
+	jas_image_t *image;
+	uint_fast32_t rawsize;
+	uint_fast32_t inmem;
+	int cmptno;
+	jas_image_cmptparm_t *cmptparm;
+
+	if (!(image = jas_image_create0())) {
+		return 0;
+	}
+
+	image->clrspc_ = clrspc;
+	image->maxcmpts_ = numcmpts;
+	image->inmem_ = true;
+
+	/* Allocate memory for the per-component information. */
+	if (!(image->cmpts_ = jas_malloc(image->maxcmpts_ *
+	  sizeof(jas_image_cmpt_t *)))) {
+		jas_image_destroy(image);
+		return 0;
+	}
+	/* Initialize in case of failure. */
+	for (cmptno = 0; cmptno < image->maxcmpts_; ++cmptno) {
+		image->cmpts_[cmptno] = 0;
+	}
+
+	/* Compute the approximate raw size of the image. */
+	rawsize = 0;
+	for (cmptno = 0, cmptparm = cmptparms; cmptno < numcmpts; ++cmptno,
+	  ++cmptparm) {
+		rawsize += cmptparm->width * cmptparm->height *
+		  (cmptparm->prec + 7) / 8;
+	}
+	/* Decide whether to buffer the image data in memory, based on the
+	  raw size of the image. */
+	inmem = (rawsize < JAS_IMAGE_INMEMTHRESH);
+
+	/* Create the individual image components. */
+	for (cmptno = 0, cmptparm = cmptparms; cmptno < numcmpts; ++cmptno,
+	  ++cmptparm) {
+		if (!(image->cmpts_[cmptno] = jas_image_cmpt_create(cmptparm->tlx,
+		  cmptparm->tly, cmptparm->hstep, cmptparm->vstep,
+		  cmptparm->width, cmptparm->height, cmptparm->prec,
+		  cmptparm->sgnd, inmem))) {
+			jas_image_destroy(image);
+			return 0;
+		}
+		++image->numcmpts_;
+	}
+
+	/* Determine the bounding box for all of the components on the
+	  reference grid (i.e., the image area) */
+	jas_image_setbbox(image);
+
+	return image;
+}
+
+// GeoJasper: begin - dima - metadata box buffer utils
+jas_metadata_box_t jas_box_init( void ) {
+  jas_metadata_box_t box;
+  memset( (void *)box.id, 0, 16 );
+  box.size = 0;
+  box.buf  = NULL;
+  return box;
+}
+
+bool jas_box_alloc ( jas_metadata_box_t *box, unsigned long size ) {
+  if (box == NULL) return false;
+  *box = jas_box_init();        
+  if ( box->buf = (uint_fast8_t *) jas_malloc( size ) )
+    box->size = size;
+  return (bool) box->buf;
+}
+
+void jas_box_free( jas_metadata_box_t *box ) {
+  if (box == NULL) return;
+  if ( (box->size != 0) && (box->buf != NULL) )
+    jas_free(box->buf);
+  *box = jas_box_init();
+}
+// GeoJasper: end   - dima - metadata box buffer utils
+
+jas_image_t *jas_image_create0()
+{
+	jas_image_t *image;
+
+	if (!(image = jas_malloc(sizeof(jas_image_t)))) {
+		return 0;
+	}
+
+	image->tlx_ = 0;
+	image->tly_ = 0;
+	image->brx_ = 0;
+	image->bry_ = 0;
+	image->clrspc_ = JAS_CLRSPC_UNKNOWN;
+	image->numcmpts_ = 0;
+	image->maxcmpts_ = 0;
+	image->cmpts_ = 0;
+	image->inmem_ = true;
+	image->cmprof_ = 0;
+
+  // GeoJasper: begin - dima - buffer defines
+  {
+    int i;
+    image->metadata.count = JAS_IMAGE_NUM_BOXES;
+    for (i=0; i<image->metadata.count; ++i)
+      image->metadata.boxes[i] = jas_box_init();
+  }
+  // GeoJasper: end - dima - buffer defines
+
+	return image;
+}
+
+jas_image_t *jas_image_copy(jas_image_t *image)
+{
+	jas_image_t *newimage;
+	int cmptno;
+
+	newimage = jas_image_create0();
+	if (jas_image_growcmpts(newimage, image->numcmpts_)) {
+		goto error;
+	}
+	for (cmptno = 0; cmptno < image->numcmpts_; ++cmptno) {
+		if (!(newimage->cmpts_[cmptno] = jas_image_cmpt_copy(image->cmpts_[cmptno]))) {
+			goto error;
+		}
+		++newimage->numcmpts_;
+	}
+
+	jas_image_setbbox(newimage);
+
+	if (image->cmprof_) {
+		if (!(newimage->cmprof_ = jas_cmprof_copy(image->cmprof_)))
+			goto error;
+	}
+
+	return newimage;
+error:
+	if (newimage) {
+		jas_image_destroy(newimage);
+	}
+	return 0;
+}
+
+static jas_image_cmpt_t *jas_image_cmpt_create0()
+{
+	jas_image_cmpt_t *cmpt;
+	if (!(cmpt = jas_malloc(sizeof(jas_image_cmpt_t)))) {
+		return 0;
+	}
+	memset(cmpt, 0, sizeof(jas_image_cmpt_t));
+	cmpt->type_ = JAS_IMAGE_CT_UNKNOWN;
+	return cmpt;
+}
+
+static jas_image_cmpt_t *jas_image_cmpt_copy(jas_image_cmpt_t *cmpt)
+{
+	jas_image_cmpt_t *newcmpt;
+
+	if (!(newcmpt = jas_image_cmpt_create0())) {
+		return 0;
+	}
+	newcmpt->tlx_ = cmpt->tlx_;
+	newcmpt->tly_ = cmpt->tly_;
+	newcmpt->hstep_ = cmpt->hstep_;
+	newcmpt->vstep_ = cmpt->vstep_;
+	newcmpt->width_ = cmpt->width_;
+	newcmpt->height_ = cmpt->height_;
+	newcmpt->prec_ = cmpt->prec_;
+	newcmpt->sgnd_ = cmpt->sgnd_;
+	newcmpt->cps_ = cmpt->cps_;
+	newcmpt->type_ = cmpt->type_;
+	if (!(newcmpt->stream_ = jas_stream_memopen(0, 0))) {
+		return 0;
+	}
+	if (jas_stream_seek(cmpt->stream_, 0, SEEK_SET)) {
+		return 0;
+	}
+	if (jas_stream_copy(newcmpt->stream_, cmpt->stream_, -1)) {
+		return 0;
+	}
+	if (jas_stream_seek(newcmpt->stream_, 0, SEEK_SET)) {
+		return 0;
+	}
+	return newcmpt;
+}
+
+void jas_image_destroy(jas_image_t *image)
+{
+	int i;
+
+  // GeoJasper: begin - dima - free buffers
+  for (i=0; i<image->metadata.count; ++i)
+    jas_box_free( &image->metadata.boxes[i] );
+  // GeoJasper: end - dima - free buffers
+
+	if (image->cmpts_) {
+		for (i = 0; i < image->numcmpts_; ++i) {
+			jas_image_cmpt_destroy(image->cmpts_[i]);
+			image->cmpts_[i] = 0;
+		}
+		jas_free(image->cmpts_);
+	}
+	if (image->cmprof_)
+		jas_cmprof_destroy(image->cmprof_);
+	jas_free(image);
+}
+
+static jas_image_cmpt_t *jas_image_cmpt_create(uint_fast32_t tlx, uint_fast32_t tly,
+  uint_fast32_t hstep, uint_fast32_t vstep, uint_fast32_t width, uint_fast32_t
+  height, uint_fast16_t depth, bool sgnd, uint_fast32_t inmem)
+{
+	jas_image_cmpt_t *cmpt;
+	long size;
+
+	if (!(cmpt = jas_malloc(sizeof(jas_image_cmpt_t)))) {
+		return 0;
+	}
+
+	cmpt->type_ = JAS_IMAGE_CT_UNKNOWN;
+	cmpt->tlx_ = tlx;
+	cmpt->tly_ = tly;
+	cmpt->hstep_ = hstep;
+	cmpt->vstep_ = vstep;
+	cmpt->width_ = width;
+	cmpt->height_ = height;
+	cmpt->prec_ = depth;
+	cmpt->sgnd_ = sgnd;
+	cmpt->stream_ = 0;
+	cmpt->cps_ = (depth + 7) / 8;
+
+	size = cmpt->width_ * cmpt->height_ * cmpt->cps_;
+	cmpt->stream_ = (inmem) ? jas_stream_memopen(0, size) : jas_stream_tmpfile();
+	if (!cmpt->stream_) {
+		jas_image_cmpt_destroy(cmpt);
+		return 0;
+	}
+
+	/* Zero the component data.  This isn't necessary, but it is
+	convenient for debugging purposes. */
+	if (jas_stream_seek(cmpt->stream_, size - 1, SEEK_SET) < 0 ||
+	  jas_stream_putc(cmpt->stream_, 0) == EOF ||
+	  jas_stream_seek(cmpt->stream_, 0, SEEK_SET) < 0) {
+		jas_image_cmpt_destroy(cmpt);
+		return 0;
+	}
+
+	return cmpt;
+}
+
+static void jas_image_cmpt_destroy(jas_image_cmpt_t *cmpt)
+{
+	if (cmpt->stream_) {
+		jas_stream_close(cmpt->stream_);
+	}
+	jas_free(cmpt);
+}
+
+/******************************************************************************\
+* Load and save operations.
+\******************************************************************************/
+
+jas_image_t *jas_image_decode(jas_stream_t *in, int fmt, char *optstr)
+{
+	jas_image_fmtinfo_t *fmtinfo;
+	jas_image_t *image;
+
+	image = 0;
+
+	/* If possible, try to determine the format of the input data. */
+	if (fmt < 0) {
+		if ((fmt = jas_image_getfmt(in)) < 0)
+			goto error;
+	}
+
+	/* Is it possible to decode an image represented in this format? */
+	if (!(fmtinfo = jas_image_lookupfmtbyid(fmt)))
+		goto error;
+	if (!fmtinfo->ops.decode)
+		goto error;
+
+	/* Decode the image. */
+	if (!(image = (*fmtinfo->ops.decode)(in, optstr)))
+		goto error;
+
+	/* Create a color profile if needed. */
+	if (!jas_clrspc_isunknown(image->clrspc_) &&
+	  !jas_clrspc_isgeneric(image->clrspc_) && !image->cmprof_) {
+		if (!(image->cmprof_ =
+		  jas_cmprof_createfromclrspc(jas_image_clrspc(image))))
+			goto error;
+	}
+
+	return image;
+error:
+	if (image)
+		jas_image_destroy(image);
+	return 0;
+}
+
+int jas_image_encode(jas_image_t *image, jas_stream_t *out, int fmt, char *optstr)
+{
+	jas_image_fmtinfo_t *fmtinfo;
+	if (!(fmtinfo = jas_image_lookupfmtbyid(fmt))) {
+		return -1;
+	}
+	return (fmtinfo->ops.encode) ? (*fmtinfo->ops.encode)(image, out,
+	  optstr) : (-1);
+}
+
+/******************************************************************************\
+* Component read and write operations.
+\******************************************************************************/
+
+int jas_image_readcmpt(jas_image_t *image, int cmptno, jas_image_coord_t x,
+  jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  jas_matrix_t *data)
+{
+	jas_image_cmpt_t *cmpt;
+	jas_image_coord_t i;
+	jas_image_coord_t j;
+	int k;
+	jas_seqent_t v;
+	int c;
+	jas_seqent_t *dr;
+	jas_seqent_t *d;
+	int drs;
+
+	if (cmptno < 0 || cmptno >= image->numcmpts_) {
+		return -1;
+	}
+
+	cmpt = image->cmpts_[cmptno];
+	if (x >= cmpt->width_ || y >= cmpt->height_ ||
+	  x + width > cmpt->width_ ||
+	  y + height > cmpt->height_) {
+		return -1;
+	}
+
+	if (jas_matrix_numrows(data) != height || jas_matrix_numcols(data) != width) {
+		if (jas_matrix_resize(data, height, width)) {
+			return -1;
+		}
+	}
+
+	dr = jas_matrix_getref(data, 0, 0);
+	drs = jas_matrix_rowstep(data);
+	for (i = 0; i < height; ++i, dr += drs) {
+		d = dr;
+		if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * (y + i) + x)
+		  * cmpt->cps_, SEEK_SET) < 0) {
+			return -1;
+		}
+		for (j = width; j > 0; --j, ++d) {
+			v = 0;
+			for (k = cmpt->cps_; k > 0; --k) {
+				if ((c = jas_stream_getc(cmpt->stream_)) == EOF) {
+					return -1;
+				}
+				v = (v << 8) | (c & 0xff);
+			}
+			*d = bitstoint(v, cmpt->prec_, cmpt->sgnd_);
+		}
+	}
+
+	return 0;
+}
+
+int jas_image_writecmpt(jas_image_t *image, int cmptno, jas_image_coord_t x, jas_image_coord_t y, jas_image_coord_t width,
+  jas_image_coord_t height, jas_matrix_t *data)
+{
+	jas_image_cmpt_t *cmpt;
+	jas_image_coord_t i;
+	jas_image_coord_t j;
+	jas_seqent_t *d;
+	jas_seqent_t *dr;
+	int drs;
+	jas_seqent_t v;
+	int k;
+	int c;
+
+	if (cmptno < 0 || cmptno >= image->numcmpts_) {
+		return -1;
+	}
+
+	cmpt = image->cmpts_[cmptno];
+	if (x >= cmpt->width_ || y >= cmpt->height_ ||
+	  x + width > cmpt->width_ ||
+	  y + height > cmpt->height_) {
+		return -1;
+	}
+
+	if (jas_matrix_numrows(data) < height || jas_matrix_numcols(data) < width) {
+		return -1;
+	}	// GeoJasper: dima, change != to <
+
+	dr = jas_matrix_getref(data, 0, 0);
+	drs = jas_matrix_rowstep(data);
+	for (i = 0; i < height; ++i, dr += drs) {
+		d = dr;
+		if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * (y + i) + x)
+		  * cmpt->cps_, SEEK_SET) < 0) {
+			return -1;
+		}
+		for (j = width; j > 0; --j, ++d) {
+			v = inttobits(*d, cmpt->prec_, cmpt->sgnd_);
+			for (k = cmpt->cps_; k > 0; --k) {
+				c = (v >> (8 * (cmpt->cps_ - 1))) & 0xff;
+				if (jas_stream_putc(cmpt->stream_,
+				  (unsigned char) c) == EOF) {
+					return -1;
+				}
+				v <<= 8;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/******************************************************************************\
+* File format operations.
+\******************************************************************************/
+
+void jas_image_clearfmts()
+{
+	int i;
+	jas_image_fmtinfo_t *fmtinfo;
+	for (i = 0; i < jas_image_numfmts; ++i) {
+		fmtinfo = &jas_image_fmtinfos[i];
+		if (fmtinfo->name) {
+			jas_free(fmtinfo->name);
+			fmtinfo->name = 0;
+		}
+		if (fmtinfo->ext) {
+			jas_free(fmtinfo->ext);
+			fmtinfo->ext = 0;
+		}
+		if (fmtinfo->desc) {
+			jas_free(fmtinfo->desc);
+			fmtinfo->desc = 0;
+		}
+	}
+	jas_image_numfmts = 0;
+}
+
+int jas_image_addfmt(int id, char *name, char *ext, char *desc,
+  jas_image_fmtops_t *ops)
+{
+	jas_image_fmtinfo_t *fmtinfo;
+	assert(id >= 0 && name && ext && ops);
+	if (jas_image_numfmts >= JAS_IMAGE_MAXFMTS) {
+		return -1;
+	}
+	fmtinfo = &jas_image_fmtinfos[jas_image_numfmts];
+	fmtinfo->id = id;
+	if (!(fmtinfo->name = jas_strdup(name))) {
+		return -1;
+	}
+	if (!(fmtinfo->ext = jas_strdup(ext))) {
+		jas_free(fmtinfo->name);
+		return -1;
+	}
+	if (!(fmtinfo->desc = jas_strdup(desc))) {
+		jas_free(fmtinfo->name);
+		jas_free(fmtinfo->ext);
+		return -1;
+	}
+	fmtinfo->ops = *ops;
+	++jas_image_numfmts;
+	return 0;
+}
+
+int jas_image_strtofmt(char *name)
+{
+	jas_image_fmtinfo_t *fmtinfo;
+	if (!(fmtinfo = jas_image_lookupfmtbyname(name))) {
+		return -1;
+	}
+	return fmtinfo->id;
+}
+
+char *jas_image_fmttostr(int fmt)
+{
+	jas_image_fmtinfo_t *fmtinfo;
+	if (!(fmtinfo = jas_image_lookupfmtbyid(fmt))) {
+		return 0;
+	}
+	return fmtinfo->name;
+}
+
+int jas_image_getfmt(jas_stream_t *in)
+{
+	jas_image_fmtinfo_t *fmtinfo;
+	int found;
+	int i;
+
+	/* Check for data in each of the supported formats. */
+	found = 0;
+	for (i = 0, fmtinfo = jas_image_fmtinfos; i < jas_image_numfmts; ++i,
+	  ++fmtinfo) {
+		if (fmtinfo->ops.validate) {
+			/* Is the input data valid for this format? */
+			if (!(*fmtinfo->ops.validate)(in)) {
+				found = 1;
+				break;
+			}
+		}
+	}
+	return found ? fmtinfo->id : (-1);
+}
+
+int jas_image_fmtfromname(char *name)
+{
+	int i;
+	char *ext;
+	jas_image_fmtinfo_t *fmtinfo;
+	/* Get the file name extension. */
+	if (!(ext = strrchr(name, '.'))) {
+		return -1;
+	}
+	++ext;
+	/* Try to find a format that uses this extension. */	
+	for (i = 0, fmtinfo = jas_image_fmtinfos; i < jas_image_numfmts; ++i,
+	  ++fmtinfo) {
+		/* Do we have a match? */
+		if (!strcmp(ext, fmtinfo->ext)) {
+			return fmtinfo->id;
+		}
+	}
+	return -1;
+}
+
+/******************************************************************************\
+* Miscellaneous operations.
+\******************************************************************************/
+
+uint_fast32_t jas_image_rawsize(jas_image_t *image)
+{
+	uint_fast32_t rawsize;
+	int cmptno;
+	jas_image_cmpt_t *cmpt;
+
+	rawsize = 0;
+	for (cmptno = 0; cmptno < image->numcmpts_; ++cmptno) {
+		cmpt = image->cmpts_[cmptno];
+		rawsize += (cmpt->width_ * cmpt->height_ * cmpt->prec_ +
+		  7) / 8;
+	}
+	return rawsize;
+}
+
+void jas_image_delcmpt(jas_image_t *image, int cmptno)
+{
+	if (cmptno >= image->numcmpts_) {
+		return;
+	}
+	jas_image_cmpt_destroy(image->cmpts_[cmptno]);
+	if (cmptno < image->numcmpts_) {
+		memmove(&image->cmpts_[cmptno], &image->cmpts_[cmptno + 1],
+		  (image->numcmpts_ - 1 - cmptno) * sizeof(jas_image_cmpt_t *));
+	}
+	--image->numcmpts_;
+
+	jas_image_setbbox(image);
+}
+
+int jas_image_addcmpt(jas_image_t *image, int cmptno,
+  jas_image_cmptparm_t *cmptparm)
+{
+	jas_image_cmpt_t *newcmpt;
+	if (cmptno < 0)
+		cmptno = image->numcmpts_;
+	assert(cmptno >= 0 && cmptno <= image->numcmpts_);
+	if (image->numcmpts_ >= image->maxcmpts_) {
+		if (jas_image_growcmpts(image, image->maxcmpts_ + 128)) {
+			return -1;
+		}
+	}
+	if (!(newcmpt = jas_image_cmpt_create(cmptparm->tlx,
+	  cmptparm->tly, cmptparm->hstep, cmptparm->vstep,
+	  cmptparm->width, cmptparm->height, cmptparm->prec,
+	  cmptparm->sgnd, 1))) {
+		return -1;
+	}
+	if (cmptno < image->numcmpts_) {
+		memmove(&image->cmpts_[cmptno + 1], &image->cmpts_[cmptno],
+		  (image->numcmpts_ - cmptno) * sizeof(jas_image_cmpt_t *));
+	}
+	image->cmpts_[cmptno] = newcmpt;
+	++image->numcmpts_;
+
+	jas_image_setbbox(image);
+
+	return 0;
+}
+
+jas_image_fmtinfo_t *jas_image_lookupfmtbyid(int id)
+{
+	int i;
+	jas_image_fmtinfo_t *fmtinfo;
+
+	for (i = 0, fmtinfo = jas_image_fmtinfos; i < jas_image_numfmts; ++i, ++fmtinfo) {
+		if (fmtinfo->id == id) {
+			return fmtinfo;
+		}
+	}
+	return 0;
+}
+
+jas_image_fmtinfo_t *jas_image_lookupfmtbyname(const char *name)
+{
+	int i;
+	jas_image_fmtinfo_t *fmtinfo;
+
+	for (i = 0, fmtinfo = jas_image_fmtinfos; i < jas_image_numfmts; ++i, ++fmtinfo) {
+		if (!strcmp(fmtinfo->name, name)) {
+			return fmtinfo;
+		}
+	}
+	return 0;
+}
+
+
+
+
+
+static uint_fast32_t inttobits(jas_seqent_t v, int prec, bool sgnd)
+{
+	uint_fast32_t ret;
+	ret = ((sgnd && v < 0) ? ((1 << prec) + v) : v) & JAS_ONES(prec);
+	return ret;
+}
+
+static jas_seqent_t bitstoint(uint_fast32_t v, int prec, bool sgnd)
+{
+	jas_seqent_t ret;
+	v &= JAS_ONES(prec);
+	ret = (sgnd && (v & (1 << (prec - 1)))) ? (v - (1 << prec)) : v;
+	return ret;
+}
+
+static void jas_image_setbbox(jas_image_t *image)
+{
+	jas_image_cmpt_t *cmpt;
+	int cmptno;
+	int_fast32_t x;
+	int_fast32_t y;
+
+	if (image->numcmpts_ > 0) {
+		/* Determine the bounding box for all of the components on the
+		  reference grid (i.e., the image area) */
+		cmpt = image->cmpts_[0];
+		image->tlx_ = cmpt->tlx_;
+		image->tly_ = cmpt->tly_;
+		image->brx_ = cmpt->tlx_ + cmpt->hstep_ * (cmpt->width_ - 1) + 1;
+		image->bry_ = cmpt->tly_ + cmpt->vstep_ * (cmpt->height_ - 1) + 1;
+		for (cmptno = 1; cmptno < image->numcmpts_; ++cmptno) {
+			cmpt = image->cmpts_[cmptno];
+			if (image->tlx_ > cmpt->tlx_) {
+				image->tlx_ = cmpt->tlx_;
+			}
+			if (image->tly_ > cmpt->tly_) {
+				image->tly_ = cmpt->tly_;
+			}
+			x = cmpt->tlx_ + cmpt->hstep_ * (cmpt->width_ - 1) + 1;
+			if (image->brx_ < x) {
+				image->brx_ = x;
+			}
+			y = cmpt->tly_ + cmpt->vstep_ * (cmpt->height_ - 1) + 1;
+			if (image->bry_ < y) {
+				image->bry_ = y;
+			}
+		}
+	} else {
+		image->tlx_ = 0;
+		image->tly_ = 0;
+		image->brx_ = 0;
+		image->bry_ = 0;
+	}
+}
+
+static int jas_image_growcmpts(jas_image_t *image, int maxcmpts)
+{
+	jas_image_cmpt_t **newcmpts;
+	int cmptno;
+
+	newcmpts = (!image->cmpts_) ? jas_malloc(maxcmpts * sizeof(jas_image_cmpt_t *)) :
+	  jas_realloc(image->cmpts_, maxcmpts * sizeof(jas_image_cmpt_t *));
+	if (!newcmpts) {
+		return -1;
+	}
+	image->cmpts_ = newcmpts;
+	image->maxcmpts_ = maxcmpts;
+	for (cmptno = image->numcmpts_; cmptno < image->maxcmpts_; ++cmptno) {
+		image->cmpts_[cmptno] = 0;
+	}
+	return 0;
+}
+
+int jas_image_copycmpt(jas_image_t *dstimage, int dstcmptno, jas_image_t *srcimage,
+  int srccmptno)
+{
+	jas_image_cmpt_t *newcmpt;
+	if (dstimage->numcmpts_ >= dstimage->maxcmpts_) {
+		if (jas_image_growcmpts(dstimage, dstimage->maxcmpts_ + 128)) {
+			return -1;
+		}
+	}
+	if (!(newcmpt = jas_image_cmpt_copy(srcimage->cmpts_[srccmptno]))) {
+		return -1;
+	}
+	if (dstcmptno < dstimage->numcmpts_) {
+		memmove(&dstimage->cmpts_[dstcmptno + 1], &dstimage->cmpts_[dstcmptno],
+		  (dstimage->numcmpts_ - dstcmptno) * sizeof(jas_image_cmpt_t *));
+	}
+	dstimage->cmpts_[dstcmptno] = newcmpt;
+	++dstimage->numcmpts_;
+
+	jas_image_setbbox(dstimage);
+	return 0;
+}
+
+void jas_image_dump(jas_image_t *image, FILE *out)
+{
+	long buf[1024];
+	int cmptno;
+	int n;
+	int i;
+	int width;
+	int height;
+	jas_image_cmpt_t *cmpt;
+	for (cmptno = 0; cmptno < image->numcmpts_; ++cmptno) {
+		cmpt = image->cmpts_[cmptno];
+		fprintf(out, "prec=%d, sgnd=%d, cmpttype=%d\n", cmpt->prec_,
+		  cmpt->sgnd_, cmpt->type_);
+		width = jas_image_cmptwidth(image, cmptno);
+		height = jas_image_cmptheight(image, cmptno);
+		n = JAS_MIN(16, width);
+		if (jas_image_readcmpt2(image, cmptno, 0, 0, n, 1, buf)) {
+			abort();
+		}
+		for (i = 0; i < n; ++i) {
+			fprintf(out, " f(%d,%d)=%ld", i, 0, buf[i]);
+		}
+		fprintf(out, "\n");
+		if (jas_image_readcmpt2(image, cmptno, width - n, height - 1, n, 1, buf)) {
+			abort();
+		}
+		for (i = 0; i < n; ++i) {
+			fprintf(out, " f(%d,%d)=%ld", width - n + i, height - 1, buf[i]);
+		}
+		fprintf(out, "\n");
+	}
+}
+
+int jas_image_depalettize(jas_image_t *image, int cmptno, int numlutents,
+  int_fast32_t *lutents, int dtype, int newcmptno)
+{
+	jas_image_cmptparm_t cmptparms;
+	int_fast32_t v;
+	int i;
+	int j;
+	jas_image_cmpt_t *cmpt;
+
+	cmpt = image->cmpts_[cmptno];
+	cmptparms.tlx = cmpt->tlx_;
+	cmptparms.tly = cmpt->tly_;
+	cmptparms.hstep = cmpt->hstep_;
+	cmptparms.vstep = cmpt->vstep_;
+	cmptparms.width = cmpt->width_;
+	cmptparms.height = cmpt->height_;
+	cmptparms.prec = JAS_IMAGE_CDT_GETPREC(dtype);
+	cmptparms.sgnd = JAS_IMAGE_CDT_GETSGND(dtype);
+
+	if (jas_image_addcmpt(image, newcmptno, &cmptparms)) {
+		return -1;
+	}
+	if (newcmptno <= cmptno) {
+		++cmptno;
+		cmpt = image->cmpts_[cmptno];
+	}
+
+	for (j = 0; j < cmpt->height_; ++j) {
+		for (i = 0; i < cmpt->width_; ++i) {
+			v = jas_image_readcmptsample(image, cmptno, i, j);
+			if (v < 0) {
+				v = 0;
+			} else if (v >= numlutents) {
+				v = numlutents - 1;
+			}
+			jas_image_writecmptsample(image, newcmptno, i, j,
+			  lutents[v]);
+		}
+	}
+	return 0;
+}
+
+int jas_image_readcmptsample(jas_image_t *image, int cmptno, int x, int y)
+{
+	jas_image_cmpt_t *cmpt;
+	uint_fast32_t v;
+	int k;
+	int c;
+
+	cmpt = image->cmpts_[cmptno];
+
+	if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * y + x) * cmpt->cps_,
+	  SEEK_SET) < 0) {
+		return -1;
+	}
+	v = 0;
+	for (k = cmpt->cps_; k > 0; --k) {
+		if ((c = jas_stream_getc(cmpt->stream_)) == EOF) {
+			return -1;
+		}
+		v = (v << 8) | (c & 0xff);
+	}
+	return bitstoint(v, cmpt->prec_, cmpt->sgnd_);
+}
+
+void jas_image_writecmptsample(jas_image_t *image, int cmptno, int x, int y,
+  int_fast32_t v)
+{
+	jas_image_cmpt_t *cmpt;
+	uint_fast32_t t;
+	int k;
+	int c;
+
+	cmpt = image->cmpts_[cmptno];
+
+	if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * y + x) * cmpt->cps_,
+	  SEEK_SET) < 0) {
+		return;
+	}
+	t = inttobits(v, cmpt->prec_, cmpt->sgnd_);
+	for (k = cmpt->cps_; k > 0; --k) {
+		c = (t >> (8 * (cmpt->cps_ - 1))) & 0xff;
+		if (jas_stream_putc(cmpt->stream_, (unsigned char) c) == EOF) {
+			return;
+		}
+		t <<= 8;
+	}
+}
+
+int jas_image_getcmptbytype(jas_image_t *image, int ctype)
+{
+	int cmptno;
+
+	for (cmptno = 0; cmptno < image->numcmpts_; ++cmptno) {
+		if (image->cmpts_[cmptno]->type_ == ctype) {
+			return cmptno;
+		}
+	}
+	return -1;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/***********************************************/
+/***********************************************/
+/***********************************************/
+/***********************************************/
+
+int jas_image_readcmpt2(jas_image_t *image, int cmptno, jas_image_coord_t x,
+  jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  long *buf)
+{
+	jas_image_cmpt_t *cmpt;
+	jas_image_coord_t i;
+	jas_image_coord_t j;
+	long v;
+	long *bufptr;
+
+	if (cmptno < 0 || cmptno >= image->numcmpts_)
+		goto error;
+	cmpt = image->cmpts_[cmptno];
+	if (x < 0 || x >= cmpt->width_ || y < 0 || y >= cmpt->height_ ||
+	  width < 0 || height < 0 || x + width > cmpt->width_ ||
+	  y + height > cmpt->height_)
+		goto error;
+
+	bufptr = buf;
+	for (i = 0; i < height; ++i) {
+		if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * (y + i) + x)
+		  * cmpt->cps_, SEEK_SET) < 0)
+			goto error;
+		for (j = 0; j < width; ++j) {
+			if (getint(cmpt->stream_, cmpt->sgnd_, cmpt->prec_, &v))
+				goto error;
+			*bufptr++ = v;
+		}
+	}
+
+	return 0;
+error:
+	return -1;
+}
+
+int jas_image_writecmpt2(jas_image_t *image, int cmptno, jas_image_coord_t x,
+  jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  long *buf)
+{
+	jas_image_cmpt_t *cmpt;
+	jas_image_coord_t i;
+	jas_image_coord_t j;
+	long v;
+	long *bufptr;
+
+	if (cmptno < 0 || cmptno >= image->numcmpts_)
+		goto error;
+	cmpt = image->cmpts_[cmptno];
+	if (x < 0 || x >= cmpt->width_ || y < 0 || y >= cmpt->height_ ||
+	  width < 0 || height < 0 || x + width > cmpt->width_ ||
+	  y + height > cmpt->height_)
+		goto error;
+
+	bufptr = buf;
+	for (i = 0; i < height; ++i) {
+		if (jas_stream_seek(cmpt->stream_, (cmpt->width_ * (y + i) + x)
+		  * cmpt->cps_, SEEK_SET) < 0)
+			goto error;
+		for (j = 0; j < width; ++j) {
+			v = *bufptr++;
+			if (putint(cmpt->stream_, cmpt->sgnd_, cmpt->prec_, v))
+				goto error;
+		}
+	}
+
+	return 0;
+error:
+	return -1;
+}
+
+int jas_image_sampcmpt(jas_image_t *image, int cmptno, int newcmptno,
+  jas_image_coord_t ho, jas_image_coord_t vo, jas_image_coord_t hs,
+  jas_image_coord_t vs, int sgnd, int prec)
+{
+	jas_image_cmpt_t *oldcmpt;
+	jas_image_cmpt_t *newcmpt;
+	int width;
+	int height;
+	jas_image_coord_t tlx;
+	jas_image_coord_t tly;
+	jas_image_coord_t brx;
+	jas_image_coord_t bry;
+	int i;
+	int j;
+	jas_image_cmptparm_t cmptparm;
+	jas_image_coord_t ax;
+	jas_image_coord_t ay;
+	jas_image_coord_t bx;
+	jas_image_coord_t by;
+	jas_image_coord_t d0;
+	jas_image_coord_t d1;
+	jas_image_coord_t d2;
+	jas_image_coord_t d3;
+	jas_image_coord_t oldx;
+	jas_image_coord_t oldy;
+	jas_image_coord_t x;
+	jas_image_coord_t y;
+	long v;
+	jas_image_coord_t cmptbrx;
+	jas_image_coord_t cmptbry;
+
+	assert(cmptno >= 0 && cmptno < image->numcmpts_);
+	oldcmpt = image->cmpts_[cmptno];
+	assert(oldcmpt->tlx_ == 0 && oldcmpt->tly_ == 0);
+	jas_image_calcbbox2(image, &tlx, &tly, &brx, &bry);
+	width = FLOORDIV(brx - ho + hs, hs);
+	height = FLOORDIV(bry - vo + vs, vs);
+	cmptparm.tlx = ho;
+	cmptparm.tly = vo;
+	cmptparm.hstep = hs;
+	cmptparm.vstep = vs;
+	cmptparm.width = width;
+	cmptparm.height = height;
+	cmptparm.prec = prec;
+	cmptparm.sgnd = sgnd;
+	if (jas_image_addcmpt(image, newcmptno, &cmptparm))
+		goto error;
+cmptbrx = oldcmpt->tlx_ + (oldcmpt->width_ - 1) * oldcmpt->hstep_;
+cmptbry = oldcmpt->tly_ + (oldcmpt->height_ - 1) * oldcmpt->vstep_;
+	newcmpt = image->cmpts_[newcmptno];
+	jas_stream_rewind(newcmpt->stream_);
+	for (i = 0; i < height; ++i) {
+		y = newcmpt->tly_ + newcmpt->vstep_ * i;
+		for (j = 0; j < width; ++j) {
+			x = newcmpt->tlx_ + newcmpt->hstep_ * j;
+			ax = downtomult(x - oldcmpt->tlx_, oldcmpt->hstep_) + oldcmpt->tlx_;
+			ay = downtomult(y - oldcmpt->tly_, oldcmpt->vstep_) + oldcmpt->tly_;
+			bx = uptomult(x - oldcmpt->tlx_, oldcmpt->hstep_) + oldcmpt->tlx_;
+			if (bx > cmptbrx)
+				bx = cmptbrx;
+			by = uptomult(y - oldcmpt->tly_, oldcmpt->vstep_) + oldcmpt->tly_;
+			if (by > cmptbry)
+				by = cmptbry;
+			d0 = (ax - x) * (ax - x) + (ay - y) * (ay - y);
+			d1 = (bx - x) * (bx - x) + (ay - y) * (ay - y);
+			d2 = (bx - x) * (bx - x) + (by - y) * (by - y);
+			d3 = (ax - x) * (ax - x) + (by - y) * (by - y);
+			if (d0 <= d1 && d0 <= d2 && d0 <= d3) {
+				oldx = (ax - oldcmpt->tlx_) / oldcmpt->hstep_;
+				oldy = (ay - oldcmpt->tly_) / oldcmpt->vstep_;
+			} else if (d1 <= d0 && d1 <= d2 && d1 <= d3) {
+				oldx = (bx - oldcmpt->tlx_) / oldcmpt->hstep_;
+				oldy = (ay - oldcmpt->tly_) / oldcmpt->vstep_;
+			} else if (d2 <= d0 && d2 <= d1 && d1 <= d3) {
+				oldx = (bx - oldcmpt->tlx_) / oldcmpt->hstep_;
+				oldy = (by - oldcmpt->tly_) / oldcmpt->vstep_;
+			} else {
+				oldx = (ax - oldcmpt->tlx_) / oldcmpt->hstep_;
+				oldy = (by - oldcmpt->tly_) / oldcmpt->vstep_;
+			}
+			assert(oldx >= 0 && oldx < oldcmpt->width_ &&
+			  oldy >= 0 && oldy < oldcmpt->height_);
+			if (jas_stream_seek(oldcmpt->stream_, oldcmpt->cps_ *
+			  (oldy * oldcmpt->width_ + oldx), SEEK_SET) < 0)
+				goto error;
+			if (getint(oldcmpt->stream_, oldcmpt->sgnd_,
+			  oldcmpt->prec_, &v))
+				goto error;
+			if (newcmpt->prec_ != oldcmpt->prec_ ||
+			  newcmpt->sgnd_ != oldcmpt->sgnd_) {
+				v = convert(v, oldcmpt->sgnd_, oldcmpt->prec_,
+				  newcmpt->sgnd_, newcmpt->prec_);
+			}
+			if (putint(newcmpt->stream_, newcmpt->sgnd_,
+			  newcmpt->prec_, v))
+				goto error;
+		}
+	}
+	return 0;
+error:
+	return -1;
+}
+
+int jas_image_ishomosamp(jas_image_t *image)
+{
+	jas_image_coord_t hstep;
+	jas_image_coord_t vstep;
+	int result;
+	int i;
+	hstep = jas_image_cmpthstep(image, 0);
+	vstep = jas_image_cmptvstep(image, 0);
+	result = 1;
+	for (i = 0; i < image->numcmpts_; ++i) {
+		if (jas_image_cmpthstep(image, i) != hstep ||
+		  jas_image_cmptvstep(image, i) != vstep) {
+			result = 0;
+			break;
+		}
+	}
+	return result;
+}
+
+/* Note: This function defines a bounding box differently. */
+static void jas_image_calcbbox2(jas_image_t *image, jas_image_coord_t *tlx,
+  jas_image_coord_t *tly, jas_image_coord_t *brx, jas_image_coord_t *bry)
+{
+	jas_image_cmpt_t *cmpt;
+	jas_image_coord_t tmptlx;
+	jas_image_coord_t tmptly;
+	jas_image_coord_t tmpbrx;
+	jas_image_coord_t tmpbry;
+	jas_image_coord_t t;
+	int i;
+	if (image->numcmpts_ > 0) {
+		cmpt = image->cmpts_[0];
+		tmptlx = cmpt->tlx_;
+		tmptly = cmpt->tly_;
+		tmpbrx = cmpt->tlx_ + cmpt->hstep_ * (cmpt->width_ - 1);
+		tmpbry = cmpt->tly_ + cmpt->vstep_ * (cmpt->height_ - 1);
+		for (i = 0; i < image->numcmpts_; ++i) {
+			cmpt = image->cmpts_[i];
+			if (cmpt->tlx_ < tmptlx)
+				tmptlx = cmpt->tlx_;
+			if (cmpt->tly_ < tmptly)
+				tmptly = cmpt->tly_;
+			t = cmpt->tlx_ + cmpt->hstep_ * (cmpt->width_ - 1);
+			if (t > tmpbrx)
+				tmpbrx = t;
+			t = cmpt->tly_ + cmpt->vstep_ * (cmpt->height_ - 1);
+			if (t > tmpbry)
+				tmpbry = t;
+		}
+	} else {
+		tmptlx = 0;
+		tmptly = 0;
+		tmpbrx = -1;
+		tmpbry = -1;
+	}
+	*tlx = tmptlx;
+	*tly = tmptly;
+	*brx = tmpbrx;
+	*bry = tmpbry;
+}
+
+
+
+static int getint(jas_stream_t *in, int sgnd, int prec, long *val)
+{
+	long v;
+	int n;
+	int c;
+	n = (prec + 7) / 8;
+	v = 0;
+	while (--n >= 0) {
+		if ((c = jas_stream_getc(in)) == EOF)
+			return -1;
+		v = (v << 8) | c;
+	}
+	v &= ((1 << prec) - 1);
+	if (sgnd) {
+		/* XXX - Do something here. */
+		abort();
+	} else {
+		*val = v;
+	}
+	return 0;
+}
+
+static int putint(jas_stream_t *out, int sgnd, int prec, long val)
+{
+	int n;
+	int c;
+	if (sgnd) {
+		/* XXX - Do something here. */
+		abort();
+	}
+	val &= (1 << prec) - 1;
+	n = (prec + 7) / 8;
+	while (--n >= 0) {
+		c = (val >> (n * 8)) & 0xff;
+		if (jas_stream_putc(out, c) != c)
+			return -1;
+	}
+	return 0;
+}
+
+static long convert(long val, int oldsgnd, int oldprec, int newsgnd,
+  int newprec)
+{
+	if (newsgnd != oldsgnd) {
+	}
+	if (newprec != oldprec) {
+		if (newprec > oldprec) {
+			val <<= newprec - oldprec;
+		} else if (oldprec > newprec) {
+			val >>= oldprec - newprec;
+		}
+	}
+	return val;
+}
+
+static long downtomult(long x, long y)
+{
+	assert(x >= 0);
+	return (x / y) * y;
+}
+
+static long uptomult(long x, long y)
+{
+	assert(x >= 0);
+	return ((x + y - 1) / y) * y;
+}
+
+jas_image_t *jas_image_chclrspc(jas_image_t *image, jas_cmprof_t *outprof,
+  int intent)
+{
+	jas_image_t *inimage;
+	int minhstep;
+	int minvstep;
+	int i;
+	int j;
+	int k;
+	int n;
+	int hstep;
+	int vstep;
+	int numinauxchans;
+	int numoutauxchans;
+	int numinclrchans;
+	int numoutclrchans;
+	int prec;
+	jas_image_t *outimage;
+	int cmpttype;
+	int numoutchans;
+	jas_cmprof_t *inprof;
+	jas_cmprof_t *tmpprof;
+	jas_image_cmptparm_t cmptparm;
+	int width;
+	int height;
+	jas_cmxform_t *xform;
+	jas_cmpixmap_t inpixmap;
+	jas_cmpixmap_t outpixmap;
+	jas_cmcmptfmt_t *incmptfmts;
+	jas_cmcmptfmt_t *outcmptfmts;
+
+#if 0
+jas_eprintf("IMAGE\n");
+jas_image_dump(image, stderr);
+#endif
+
+	if (!(inimage = jas_image_copy(image)))
+		goto error;
+	image = 0;
+
+	if (!jas_image_ishomosamp(inimage)) {
+		minhstep = jas_image_cmpthstep(inimage, 0);
+		minvstep = jas_image_cmptvstep(inimage, 0);
+		for (i = 1; i < jas_image_numcmpts(inimage); ++i) {
+			hstep = jas_image_cmpthstep(inimage, i);
+			vstep = jas_image_cmptvstep(inimage, i);
+			if (hstep < minhstep)
+				minhstep = hstep;
+			if (vstep < minvstep)
+				minvstep = vstep;
+		}
+		n = jas_image_numcmpts(inimage);
+		for (i = 0; i < n; ++i) {
+			cmpttype = jas_image_cmpttype(inimage, i);
+			if (jas_image_sampcmpt(inimage, i, i + 1, 0, 0, minhstep, minvstep, jas_image_cmptsgnd(inimage, i), jas_image_cmptprec(inimage, i)))
+				goto error;
+			jas_image_setcmpttype(inimage, i + 1, cmpttype);
+			jas_image_delcmpt(inimage, i);
+		}
+	}
+
+	width = jas_image_cmptwidth(inimage, 0);
+	height = jas_image_cmptheight(inimage, 0);
+	hstep = jas_image_cmpthstep(inimage, 0);
+	vstep = jas_image_cmptvstep(inimage, 0);
+
+	inprof = jas_image_cmprof(inimage);
+	assert(inprof);
+	numinclrchans = jas_clrspc_numchans(jas_cmprof_clrspc(inprof));
+	numinauxchans = jas_image_numcmpts(inimage) - numinclrchans;
+	numoutclrchans = jas_clrspc_numchans(jas_cmprof_clrspc(outprof));
+	numoutauxchans = 0;
+	numoutchans = numoutclrchans + numoutauxchans;
+	prec = 8;
+
+	if (!(outimage = jas_image_create0()))
+		goto error;
+
+	/* Create a component for each of the colorants. */
+	for (i = 0; i < numoutclrchans; ++i) {
+		cmptparm.tlx = 0;
+		cmptparm.tly = 0;
+		cmptparm.hstep = hstep;
+		cmptparm.vstep = vstep;
+		cmptparm.width = width;
+		cmptparm.height = height;
+		cmptparm.prec = prec;
+		cmptparm.sgnd = 0;
+		if (jas_image_addcmpt(outimage, -1, &cmptparm))
+			goto error;
+		jas_image_setcmpttype(outimage, i, JAS_IMAGE_CT_COLOR(i));
+	}
+#if 0
+	/* Copy the auxiliary components without modification. */
+	for (i = 0; i < jas_image_numcmpts(inimage); ++i) {
+		if (!ISCOLOR(jas_image_cmpttype(inimage, i))) {
+			jas_image_copycmpt(outimage, -1, inimage, i);
+/* XXX - need to specify laydown of component on ref. grid */
+		}
+	}
+#endif
+
+	if (!(tmpprof = jas_cmprof_copy(outprof)))
+		goto error;
+	assert(!jas_image_cmprof(outimage));
+	jas_image_setcmprof(outimage, tmpprof);
+	tmpprof = 0;
+	jas_image_setclrspc(outimage, jas_cmprof_clrspc(outprof));
+
+	if (!(xform = jas_cmxform_create(inprof, outprof, 0, JAS_CMXFORM_OP_FWD, intent, 0)))
+		goto error;
+
+	inpixmap.numcmpts = numinclrchans;
+	incmptfmts = malloc(numinclrchans * sizeof(jas_cmcmptfmt_t));
+	assert(incmptfmts);
+	inpixmap.cmptfmts = incmptfmts;
+	for (i = 0; i < numinclrchans; ++i) {
+		j = jas_image_getcmptbytype(inimage, JAS_IMAGE_CT_COLOR(i));
+		assert(j >= 0);
+		if (!(incmptfmts[i].buf = malloc(width * sizeof(long))))
+			goto error;
+		incmptfmts[i].prec = jas_image_cmptprec(inimage, j);
+		incmptfmts[i].sgnd = jas_image_cmptsgnd(inimage, j);
+		incmptfmts[i].width = width;
+		incmptfmts[i].height = 1;
+	}
+
+	outpixmap.numcmpts = numoutclrchans;
+	outcmptfmts = malloc(numoutclrchans * sizeof(jas_cmcmptfmt_t));
+	assert(outcmptfmts);
+	outpixmap.cmptfmts = outcmptfmts;
+
+	for (i = 0; i < numoutclrchans; ++i) {
+		j = jas_image_getcmptbytype(outimage, JAS_IMAGE_CT_COLOR(i));
+		assert(j >= 0);
+		if (!(outcmptfmts[i].buf = malloc(width * sizeof(long))))
+			goto error;
+		outcmptfmts[i].prec = jas_image_cmptprec(outimage, j);
+		outcmptfmts[i].sgnd = jas_image_cmptsgnd(outimage, j);
+		outcmptfmts[i].width = width;
+		outcmptfmts[i].height = 1;
+	}
+
+	for (i = 0; i < height; ++i) {
+		for (j = 0; j < numinclrchans; ++j) {
+			k = jas_image_getcmptbytype(inimage, JAS_IMAGE_CT_COLOR(j));
+			if (jas_image_readcmpt2(inimage, k, 0, i, width, 1, incmptfmts[j].buf))
+				goto error;
+		}
+		jas_cmxform_apply(xform, &inpixmap, &outpixmap);
+		for (j = 0; j < numoutclrchans; ++j) {
+			k = jas_image_getcmptbytype(outimage, JAS_IMAGE_CT_COLOR(j));
+			if (jas_image_writecmpt2(outimage, k, 0, i, width, 1, outcmptfmts[j].buf))
+				goto error;
+		}
+	}
+
+	for (i = 0; i < numoutclrchans; ++i)
+		jas_free(outcmptfmts[i].buf);
+	jas_free(outcmptfmts);
+	for (i = 0; i < numinclrchans; ++i)
+		jas_free(incmptfmts[i].buf);
+	jas_free(incmptfmts);
+	jas_cmxform_destroy(xform);
+	jas_image_destroy(inimage);
+
+#if 0
+jas_eprintf("INIMAGE\n");
+jas_image_dump(inimage, stderr);
+jas_eprintf("OUTIMAGE\n");
+jas_image_dump(outimage, stderr);
+#endif
+	return outimage;
+error:
+	return 0;
+}
diff --git a/src/libjasper/base/jas_init.c b/src/libjasper/base/jas_init.c
new file mode 100644
index 0000000..960d1e9
--- /dev/null
+++ b/src/libjasper/base/jas_init.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_image.h"
+#include "jasper/jas_init.h"
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+/* Initialize the image format table. */
+int jas_init()
+{
+	jas_image_fmtops_t fmtops;
+	int fmtid;
+
+	fmtid = 0;
+
+#if !defined(EXCLUDE_MIF_SUPPORT)
+	fmtops.decode = mif_decode;
+	fmtops.encode = mif_encode;
+	fmtops.validate = mif_validate;
+	jas_image_addfmt(fmtid, "mif", "mif", "My Image Format (MIF)", &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_PNM_SUPPORT)
+	fmtops.decode = pnm_decode;
+	fmtops.encode = pnm_encode;
+	fmtops.validate = pnm_validate;
+	jas_image_addfmt(fmtid, "pnm", "pnm", "Portable Graymap/Pixmap (PNM)",
+	  &fmtops);
+	jas_image_addfmt(fmtid, "pnm", "pgm", "Portable Graymap/Pixmap (PNM)",
+	  &fmtops);
+	jas_image_addfmt(fmtid, "pnm", "ppm", "Portable Graymap/Pixmap (PNM)",
+	  &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_BMP_SUPPORT)
+	fmtops.decode = bmp_decode;
+	fmtops.encode = bmp_encode;
+	fmtops.validate = bmp_validate;
+	jas_image_addfmt(fmtid, "bmp", "bmp", "Microsoft Bitmap (BMP)", &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_RAS_SUPPORT)
+	fmtops.decode = ras_decode;
+	fmtops.encode = ras_encode;
+	fmtops.validate = ras_validate;
+	jas_image_addfmt(fmtid, "ras", "ras", "Sun Rasterfile (RAS)", &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_JP2_SUPPORT)
+	fmtops.decode = jp2_decode;
+	fmtops.encode = jp2_encode;
+	fmtops.validate = jp2_validate;
+	jas_image_addfmt(fmtid, "jp2", "jp2",
+	  "JPEG-2000 JP2 File Format Syntax (ISO/IEC 15444-1)", &fmtops);
+	++fmtid;
+	fmtops.decode = jpc_decode;
+	fmtops.encode = jpc_encode;
+	fmtops.validate = jpc_validate;
+	jas_image_addfmt(fmtid, "jpc", "jpc",
+	  "JPEG-2000 Code Stream Syntax (ISO/IEC 15444-1)", &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_JPG_SUPPORT)
+	fmtops.decode = jpg_decode;
+	fmtops.encode = jpg_encode;
+	fmtops.validate = jpg_validate;
+	jas_image_addfmt(fmtid, "jpg", "jpg", "JPEG (ISO/IEC 10918-1)", &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_PGX_SUPPORT)
+	fmtops.decode = pgx_decode;
+	fmtops.encode = pgx_encode;
+	fmtops.validate = pgx_validate;
+	jas_image_addfmt(fmtid, "pgx", "pgx", "JPEG-2000 VM Format (PGX)", &fmtops);
+	++fmtid;
+#endif
+
+#if !defined(EXCLUDE_TIFF_SUPPORT)
+	fmtops.decode = tiff_decode;
+	fmtops.encode = tiff_encode;
+	fmtops.validate = tiff_validate;
+	jas_image_addfmt(fmtid, "tif", "tif", "Tagged Image File (TIFF)", &fmtops);
+	++fmtid;
+#endif
+
+	/* We must not register the JasPer library exit handler until after
+	at least one memory allocation is performed.  This is desirable
+	as it ensures that the JasPer exit handler is called before the
+	debug memory allocator exit handler. */
+	atexit(jas_cleanup);
+
+	return 0;
+}
+
+void jas_cleanup()
+{
+	jas_image_clearfmts();
+}
diff --git a/src/libjasper/base/jas_malloc.c b/src/libjasper/base/jas_malloc.c
new file mode 100644
index 0000000..77f2bc6
--- /dev/null
+++ b/src/libjasper/base/jas_malloc.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Memory Allocator
+ *
+ * $Id: jas_malloc.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* We need the prototype for memset. */
+#include <string.h>
+
+#include "jasper/jas_malloc.h"
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+#if defined(DEBUG_MEMALLOC)
+#include "../../../local/src/memalloc.c"
+#endif
+
+#if !defined(DEBUG_MEMALLOC)
+
+#define MEMALLOC_ALIGNMENT	32
+#define MEMALLOC_ALIGN2
+#undef MEMALLOC_ALIGN2
+
+void *jas_malloc(size_t size)
+{
+#if defined(MEMALLOC_ALIGN2)
+	void *ptr;
+abort();
+	if (posix_memalign(&ptr, MEMALLOC_ALIGNMENT, size)) {
+		return 0;
+	}
+	return ptr;
+#endif
+	return malloc(size);
+}
+
+void jas_free(void *ptr)
+{
+	free(ptr);
+}
+
+void *jas_realloc(void *ptr, size_t size)
+{
+	return realloc(ptr, size);
+}
+
+void *jas_calloc(size_t nmemb, size_t size)
+{
+	void *ptr;
+	size_t n;
+	n = nmemb * size;
+	if (!(ptr = jas_malloc(n * sizeof(char)))) {
+		return 0;
+	}
+	memset(ptr, 0, n);
+	return ptr;
+}
+
+#endif
diff --git a/src/libjasper/base/jas_seq.c b/src/libjasper/base/jas_seq.c
new file mode 100644
index 0000000..fcf14f0
--- /dev/null
+++ b/src/libjasper/base/jas_seq.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Sequence/Matrix Library
+ *
+ * $Id: jas_seq.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+
+#include "jasper/jas_seq.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+
+/******************************************************************************\
+* Constructors and destructors.
+\******************************************************************************/
+
+jas_matrix_t *jas_seq2d_create(int xstart, int ystart, int xend, int yend)
+{
+	jas_matrix_t *matrix;
+	assert(xstart <= xend && ystart <= yend);
+	if (!(matrix = jas_matrix_create(yend - ystart, xend - xstart))) {
+		return 0;
+	}
+	matrix->xstart_ = xstart;
+	matrix->ystart_ = ystart;
+	matrix->xend_ = xend;
+	matrix->yend_ = yend;
+	return matrix;
+}
+
+jas_matrix_t *jas_matrix_create(int numrows, int numcols)
+{
+	jas_matrix_t *matrix;
+	int i;
+
+	if (!(matrix = jas_malloc(sizeof(jas_matrix_t)))) {
+		return 0;
+	}
+	matrix->flags_ = 0;
+	matrix->numrows_ = numrows;
+	matrix->numcols_ = numcols;
+	matrix->rows_ = 0;
+	matrix->maxrows_ = numrows;
+	matrix->data_ = 0;
+	matrix->datasize_ = numrows * numcols;
+
+	if (matrix->maxrows_ > 0) {
+		if (!(matrix->rows_ = jas_malloc(matrix->maxrows_ *
+		  sizeof(jas_seqent_t *)))) {
+			jas_matrix_destroy(matrix);
+			return 0;
+		}
+	}
+
+	if (matrix->datasize_ > 0) {
+		if (!(matrix->data_ = jas_malloc(matrix->datasize_ *
+		  sizeof(jas_seqent_t)))) {
+			jas_matrix_destroy(matrix);
+			return 0;
+		}
+	}
+
+	for (i = 0; i < numrows; ++i) {
+		matrix->rows_[i] = &matrix->data_[i * matrix->numcols_];
+	}
+
+	for (i = 0; i < matrix->datasize_; ++i) {
+		matrix->data_[i] = 0;
+	}
+
+	matrix->xstart_ = 0;
+	matrix->ystart_ = 0;
+	matrix->xend_ = matrix->numcols_;
+	matrix->yend_ = matrix->numrows_;
+
+	return matrix;
+}
+
+void jas_matrix_destroy(jas_matrix_t *matrix)
+{
+	if (matrix->data_) {
+		assert(!(matrix->flags_ & JAS_MATRIX_REF));
+		jas_free(matrix->data_);
+		matrix->data_ = 0;
+	}
+	if (matrix->rows_) {
+		jas_free(matrix->rows_);
+		matrix->rows_ = 0;
+	}
+	jas_free(matrix);
+}
+
+jas_seq2d_t *jas_seq2d_copy(jas_seq2d_t *x)
+{
+	jas_matrix_t *y;
+	int i;
+	int j;
+	y = jas_seq2d_create(jas_seq2d_xstart(x), jas_seq2d_ystart(x), jas_seq2d_xend(x),
+	  jas_seq2d_yend(x));
+	assert(y);
+	for (i = 0; i < x->numrows_; ++i) {
+		for (j = 0; j < x->numcols_; ++j) {
+			*jas_matrix_getref(y, i, j) = jas_matrix_get(x, i, j);
+		}
+	}
+	return y;
+}
+
+jas_matrix_t *jas_matrix_copy(jas_matrix_t *x)
+{
+	jas_matrix_t *y;
+	int i;
+	int j;
+	y = jas_matrix_create(x->numrows_, x->numcols_);
+	for (i = 0; i < x->numrows_; ++i) {
+		for (j = 0; j < x->numcols_; ++j) {
+			*jas_matrix_getref(y, i, j) = jas_matrix_get(x, i, j);
+		}
+	}
+	return y;
+}
+
+/******************************************************************************\
+* Bind operations.
+\******************************************************************************/
+
+void jas_seq2d_bindsub(jas_matrix_t *s, jas_matrix_t *s1, int xstart, int ystart,
+  int xend, int yend)
+{
+	jas_matrix_bindsub(s, s1, ystart - s1->ystart_, xstart - s1->xstart_,
+	  yend - s1->ystart_ - 1, xend - s1->xstart_ - 1);
+}
+
+void jas_matrix_bindsub(jas_matrix_t *mat0, jas_matrix_t *mat1, int r0, int c0,
+  int r1, int c1)
+{
+	int i;
+
+	if (mat0->data_) {
+		if (!(mat0->flags_ & JAS_MATRIX_REF)) {
+			jas_free(mat0->data_);
+		}
+		mat0->data_ = 0;
+		mat0->datasize_ = 0;
+	}
+	if (mat0->rows_) {
+		jas_free(mat0->rows_);
+		mat0->rows_ = 0;
+	}
+	mat0->flags_ |= JAS_MATRIX_REF;
+	mat0->numrows_ = r1 - r0 + 1;
+	mat0->numcols_ = c1 - c0 + 1;
+	mat0->maxrows_ = mat0->numrows_;
+	mat0->rows_ = jas_malloc(mat0->maxrows_ * sizeof(jas_seqent_t *));
+	for (i = 0; i < mat0->numrows_; ++i) {
+		mat0->rows_[i] = mat1->rows_[r0 + i] + c0;
+	}
+
+	mat0->xstart_ = mat1->xstart_ + c0;
+	mat0->ystart_ = mat1->ystart_ + r0;
+	mat0->xend_ = mat0->xstart_ + mat0->numcols_;
+	mat0->yend_ = mat0->ystart_ + mat0->numrows_;
+}
+
+/******************************************************************************\
+* Arithmetic operations.
+\******************************************************************************/
+
+int jas_matrix_cmp(jas_matrix_t *mat0, jas_matrix_t *mat1)
+{
+	int i;
+	int j;
+
+	if (mat0->numrows_ != mat1->numrows_ || mat0->numcols_ !=
+	  mat1->numcols_) {
+		return 1;
+	}
+	for (i = 0; i < mat0->numrows_; i++) {
+		for (j = 0; j < mat0->numcols_; j++) {
+			if (jas_matrix_get(mat0, i, j) != jas_matrix_get(mat1, i, j)) {
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+void jas_matrix_divpow2(jas_matrix_t *matrix, int n)
+{
+	int i;
+	int j;
+	jas_seqent_t *rowstart;
+	int rowstep;
+	jas_seqent_t *data;
+
+	rowstep = jas_matrix_rowstep(matrix);
+	for (i = matrix->numrows_, rowstart = matrix->rows_[0]; i > 0; --i,
+	  rowstart += rowstep) {
+		for (j = matrix->numcols_, data = rowstart; j > 0; --j,
+		  ++data) {
+			*data = (*data >= 0) ? ((*data) >> n) :
+			  (-((-(*data)) >> n));
+		}
+	}
+}
+
+void jas_matrix_clip(jas_matrix_t *matrix, jas_seqent_t minval, jas_seqent_t maxval)
+{
+	int i;
+	int j;
+	jas_seqent_t v;
+	jas_seqent_t *rowstart;
+	jas_seqent_t *data;
+	int rowstep;
+
+	rowstep = jas_matrix_rowstep(matrix);
+	for (i = matrix->numrows_, rowstart = matrix->rows_[0]; i > 0; --i,
+	  rowstart += rowstep) {
+		data = rowstart;
+		for (j = matrix->numcols_, data = rowstart; j > 0; --j,
+		  ++data) {
+			v = *data;
+			if (v < minval) {
+				*data = minval;
+			} else if (v > maxval) {
+				*data = maxval;
+			}
+		}
+	}
+}
+
+void jas_matrix_asr(jas_matrix_t *matrix, int n)
+{
+	int i;
+	int j;
+	jas_seqent_t *rowstart;
+	int rowstep;
+	jas_seqent_t *data;
+
+	assert(n >= 0);
+	rowstep = jas_matrix_rowstep(matrix);
+	for (i = matrix->numrows_, rowstart = matrix->rows_[0]; i > 0; --i,
+	  rowstart += rowstep) {
+		for (j = matrix->numcols_, data = rowstart; j > 0; --j,
+		  ++data) {
+			*data >>= n;
+		}
+	}
+}
+
+void jas_matrix_asl(jas_matrix_t *matrix, int n)
+{
+	int i;
+	int j;
+	jas_seqent_t *rowstart;
+	int rowstep;
+	jas_seqent_t *data;
+
+	rowstep = jas_matrix_rowstep(matrix);
+	for (i = matrix->numrows_, rowstart = matrix->rows_[0]; i > 0; --i,
+	  rowstart += rowstep) {
+		for (j = matrix->numcols_, data = rowstart; j > 0; --j,
+		  ++data) {
+			*data <<= n;
+		}
+	}
+}
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+int jas_matrix_resize(jas_matrix_t *matrix, int numrows, int numcols)
+{
+	int size;
+	int i;
+
+	size = numrows * numcols;
+	if (size > matrix->datasize_ || numrows > matrix->maxrows_) {
+		return -1;
+	}
+
+	matrix->numrows_ = numrows;
+	matrix->numcols_ = numcols;
+
+	for (i = 0; i < numrows; ++i) {
+		matrix->rows_[i] = &matrix->data_[numcols * i];
+	}
+
+	return 0;
+}
+
+void jas_matrix_setall(jas_matrix_t *matrix, jas_seqent_t val)
+{
+	int i;
+	int j;
+	jas_seqent_t *rowstart;
+	int rowstep;
+	jas_seqent_t *data;
+
+	rowstep = jas_matrix_rowstep(matrix);
+	for (i = matrix->numrows_, rowstart = matrix->rows_[0]; i > 0; --i,
+	  rowstart += rowstep) {
+		for (j = matrix->numcols_, data = rowstart; j > 0; --j,
+		  ++data) {
+			*data = val;
+		}
+	}
+}
+
+jas_matrix_t *jas_seq2d_input(FILE *in)
+{
+	jas_matrix_t *matrix;
+	int i;
+	int j;
+	long x;
+	int numrows;
+	int numcols;
+	int xoff;
+	int yoff;
+
+	if (fscanf(in, "%d %d", &xoff, &yoff) != 2)
+		return 0;
+	if (fscanf(in, "%d %d", &numcols, &numrows) != 2)
+		return 0;
+	if (!(matrix = jas_seq2d_create(xoff, yoff, xoff + numcols, yoff + numrows)))
+		return 0;
+
+	if (jas_matrix_numrows(matrix) != numrows || jas_matrix_numcols(matrix) != numcols) {
+		abort();
+	}
+
+	/* Get matrix data. */
+	for (i = 0; i < jas_matrix_numrows(matrix); i++) {
+		for (j = 0; j < jas_matrix_numcols(matrix); j++) {
+			if (fscanf(in, "%ld", &x) != 1) {
+				jas_matrix_destroy(matrix);
+				return 0;
+			}
+			jas_matrix_set(matrix, i, j, JAS_CAST(jas_seqent_t, x));
+		}
+	}
+
+	return matrix;
+}
+
+int jas_seq2d_output(jas_matrix_t *matrix, FILE *out)
+{
+#define MAXLINELEN	80
+	int i;
+	int j;
+	jas_seqent_t x;
+	char buf[MAXLINELEN + 1];
+	char sbuf[MAXLINELEN + 1];
+	int n;
+
+	fprintf(out, "%d %d\n", jas_seq2d_xstart(matrix),
+	  jas_seq2d_ystart(matrix));
+	fprintf(out, "%d %d\n", jas_matrix_numcols(matrix),
+	  jas_matrix_numrows(matrix));
+
+	buf[0] = '\0';
+	for (i = 0; i < jas_matrix_numrows(matrix); ++i) {
+		for (j = 0; j < jas_matrix_numcols(matrix); ++j) {
+			x = jas_matrix_get(matrix, i, j);
+			sprintf(sbuf, "%s%4ld", (strlen(buf) > 0) ? " " : "",
+			  JAS_CAST(long, x));
+			n = strlen(buf);
+			if (n + strlen(sbuf) > MAXLINELEN) {
+				fputs(buf, out);
+				fputs("\n", out);
+				buf[0] = '\0';
+			}
+			strcat(buf, sbuf);
+			if (j == jas_matrix_numcols(matrix) - 1) {
+				fputs(buf, out);
+				fputs("\n", out);
+				buf[0] = '\0';
+			}
+		}
+	}
+	fputs(buf, out);
+
+	return 0;
+}
diff --git a/src/libjasper/base/jas_stream.c b/src/libjasper/base/jas_stream.c
new file mode 100644
index 0000000..f2b6d3a
--- /dev/null
+++ b/src/libjasper/base/jas_stream.c
@@ -0,0 +1,1151 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * I/O Stream Library
+ *
+ * $Id: jas_stream.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#if defined(HAVE_FCNTL_H)
+#include <fcntl.h>
+#endif
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <ctype.h>
+#if defined(HAVE_UNISTD_H)
+#include <unistd.h>
+#endif
+#if defined(WIN32) || defined(HAVE_IO_H)
+#include <io.h>
+#endif
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_stream.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+
+/******************************************************************************\
+* Local function prototypes.
+\******************************************************************************/
+
+static int jas_strtoopenmode(const char *s);
+static void jas_stream_destroy(jas_stream_t *stream);
+jas_stream_t *jas_stream_create(void);  /* IMLIB - removed static, so it can be used in jas_binfile.c */
+void jas_stream_initbuf(jas_stream_t *stream, int bufmode, char *buf,  /* IMLIB - removed static, so it can be used in jas_binfile.c */
+  int bufsize);
+
+static int mem_read(jas_stream_obj_t *obj, char *buf, int cnt);
+static int mem_write(jas_stream_obj_t *obj, char *buf, int cnt);
+static long mem_seek(jas_stream_obj_t *obj, long offset, int origin);
+static int mem_close(jas_stream_obj_t *obj);
+
+static int sfile_read(jas_stream_obj_t *obj, char *buf, int cnt);
+static int sfile_write(jas_stream_obj_t *obj, char *buf, int cnt);
+static long sfile_seek(jas_stream_obj_t *obj, long offset, int origin);
+static int sfile_close(jas_stream_obj_t *obj);
+
+static int file_read(jas_stream_obj_t *obj, char *buf, int cnt);
+static int file_write(jas_stream_obj_t *obj, char *buf, int cnt);
+static long file_seek(jas_stream_obj_t *obj, long offset, int origin);
+static int file_close(jas_stream_obj_t *obj);
+
+/******************************************************************************\
+* Local data.
+\******************************************************************************/
+
+static jas_stream_ops_t jas_stream_fileops = {
+	file_read,
+	file_write,
+	file_seek,
+	file_close
+};
+
+static jas_stream_ops_t jas_stream_sfileops = {
+	sfile_read,
+	sfile_write,
+	sfile_seek,
+	sfile_close
+};
+
+static jas_stream_ops_t jas_stream_memops = {
+	mem_read,
+	mem_write,
+	mem_seek,
+	mem_close
+};
+
+/******************************************************************************\
+* Code for opening and closing streams.
+\******************************************************************************/
+
+jas_stream_t *jas_stream_create()  /* IMLIB - removed static, so it can be used in jas_binfile.c */
+{
+	jas_stream_t *stream;
+
+	if (!(stream = jas_malloc(sizeof(jas_stream_t)))) {
+		return 0;
+	}
+	stream->openmode_ = 0;
+	stream->bufmode_ = 0;
+	stream->flags_ = 0;
+	stream->bufbase_ = 0;
+	stream->bufstart_ = 0;
+	stream->bufsize_ = 0;
+	stream->ptr_ = 0;
+	stream->cnt_ = 0;
+	stream->ops_ = 0;
+	stream->obj_ = 0;
+	stream->rwcnt_ = 0;
+	stream->rwlimit_ = -1;
+
+	return stream;
+}
+
+jas_stream_t *jas_stream_memopen(char *buf, int bufsize)
+{
+	jas_stream_t *stream;
+	jas_stream_memobj_t *obj;
+
+	if (!(stream = jas_stream_create())) {
+		return 0;
+	}
+
+	/* A stream associated with a memory buffer is always opened
+	for both reading and writing in binary mode. */
+	stream->openmode_ = JAS_STREAM_READ | JAS_STREAM_WRITE | JAS_STREAM_BINARY;
+
+	/* Since the stream data is already resident in memory, buffering
+	is not necessary. */
+	/* But... It still may be faster to use buffering anyways. */
+	jas_stream_initbuf(stream, JAS_STREAM_FULLBUF, 0, 0);
+
+	/* Select the operations for a memory stream. */
+	stream->ops_ = &jas_stream_memops;
+
+	/* Allocate memory for the underlying memory stream object. */
+	if (!(obj = jas_malloc(sizeof(jas_stream_memobj_t)))) {
+		jas_stream_destroy(stream);
+		return 0;
+	}
+	stream->obj_ = (void *) obj;
+
+	/* Initialize a few important members of the memory stream object. */
+	obj->myalloc_ = 0;
+	obj->buf_ = 0;
+
+	/* If the buffer size specified is nonpositive, then the buffer
+	is allocated internally and automatically grown as needed. */
+	if (bufsize <= 0) {
+		obj->bufsize_ = 1024;
+		obj->growable_ = 1;
+	} else {
+		obj->bufsize_ = bufsize;
+		obj->growable_ = 0;
+	}
+	if (buf) {
+		obj->buf_ = (unsigned char *) buf;
+	} else {
+		obj->buf_ = jas_malloc(obj->bufsize_ * sizeof(char));
+		obj->myalloc_ = 1;
+	}
+	if (!obj->buf_) {
+		jas_stream_close(stream);
+		return 0;
+	}
+
+	if (bufsize > 0 && buf) {
+		/* If a buffer was supplied by the caller and its length is positive,
+		  make the associated buffer data appear in the stream initially. */
+		obj->len_ = bufsize;
+	} else {
+		/* The stream is initially empty. */
+		obj->len_ = 0;
+	}
+	obj->pos_ = 0;
+	
+	return stream;
+}
+
+jas_stream_t *jas_stream_fopen(const char *filename, const char *mode)
+{
+	jas_stream_t *stream;
+	jas_stream_fileobj_t *obj;
+	int openflags;
+
+	/* Allocate a stream object. */
+	if (!(stream = jas_stream_create())) {
+		return 0;
+	}
+
+	/* Parse the mode string. */
+	stream->openmode_ = jas_strtoopenmode(mode);
+
+	/* Determine the correct flags to use for opening the file. */
+	if ((stream->openmode_ & JAS_STREAM_READ) &&
+	  (stream->openmode_ & JAS_STREAM_WRITE)) {
+		openflags = O_RDWR;
+	} else if (stream->openmode_ & JAS_STREAM_READ) {
+		openflags = O_RDONLY;
+	} else if (stream->openmode_ & JAS_STREAM_WRITE) {
+		openflags = O_WRONLY;
+	} else {
+		openflags = 0;
+	}
+	if (stream->openmode_ & JAS_STREAM_APPEND) {
+		openflags |= O_APPEND;
+	}
+	if (stream->openmode_ & JAS_STREAM_BINARY) {
+		openflags |= O_BINARY;
+	}
+	if (stream->openmode_ & JAS_STREAM_CREATE) {
+		openflags |= O_CREAT | O_TRUNC;
+	}
+
+	/* Allocate space for the underlying file stream object. */
+	if (!(obj = jas_malloc(sizeof(jas_stream_fileobj_t)))) {
+		jas_stream_destroy(stream);
+		return 0;
+	}
+	obj->fd = -1;
+	obj->flags = 0;
+	//obj->pathname[0] = '\0';
+  strncpy(obj->pathname, filename, DIM_MAX_FILE_NAME); // GeoJasper: dima
+	stream->obj_ = (void *) obj;
+
+	/* Select the operations for a file stream object. */
+	stream->ops_ = &jas_stream_fileops;
+
+	/* Open the underlying file. */
+	if ((obj->fd = open(filename, openflags, JAS_STREAM_PERMS)) < 0) {
+		jas_stream_destroy(stream);
+		return 0;
+	}
+
+	/* By default, use full buffering for this type of stream. */
+	jas_stream_initbuf(stream, JAS_STREAM_FULLBUF, 0, 0);
+
+	return stream;
+}
+
+jas_stream_t *jas_stream_freopen(const char *path, const char *mode, FILE *fp)
+{
+	jas_stream_t *stream;
+	int openflags;
+
+	/* Eliminate compiler warning about unused variable. */
+	path = 0;
+
+	/* Allocate a stream object. */
+	if (!(stream = jas_stream_create())) {
+		return 0;
+	}
+
+	/* Parse the mode string. */
+	stream->openmode_ = jas_strtoopenmode(mode);
+
+	/* Determine the correct flags to use for opening the file. */
+	if ((stream->openmode_ & JAS_STREAM_READ) &&
+	  (stream->openmode_ & JAS_STREAM_WRITE)) {
+		openflags = O_RDWR;
+	} else if (stream->openmode_ & JAS_STREAM_READ) {
+		openflags = O_RDONLY;
+	} else if (stream->openmode_ & JAS_STREAM_WRITE) {
+		openflags = O_WRONLY;
+	} else {
+		openflags = 0;
+	}
+	if (stream->openmode_ & JAS_STREAM_APPEND) {
+		openflags |= O_APPEND;
+	}
+	if (stream->openmode_ & JAS_STREAM_BINARY) {
+		openflags |= O_BINARY;
+	}
+	if (stream->openmode_ & JAS_STREAM_CREATE) {
+		openflags |= O_CREAT | O_TRUNC;
+	}
+
+	stream->obj_ = JAS_CAST(void *, fp);
+
+	/* Select the operations for a file stream object. */
+	stream->ops_ = &jas_stream_sfileops;
+
+	/* By default, use full buffering for this type of stream. */
+	jas_stream_initbuf(stream, JAS_STREAM_FULLBUF, 0, 0);
+
+	return stream;
+}
+
+jas_stream_t *jas_stream_tmpfile()
+{
+	jas_stream_t *stream;
+	jas_stream_fileobj_t *obj;
+
+	if (!(stream = jas_stream_create())) {
+		return 0;
+	}
+
+	/* A temporary file stream is always opened for both reading and
+	writing in binary mode. */
+	stream->openmode_ = JAS_STREAM_READ | JAS_STREAM_WRITE | JAS_STREAM_BINARY;
+
+	/* Allocate memory for the underlying temporary file object. */
+	if (!(obj = jas_malloc(sizeof(jas_stream_fileobj_t)))) {
+		jas_stream_destroy(stream);
+		return 0;
+	}
+	obj->fd = -1;
+	obj->flags = 0;
+	obj->pathname[0] = '\0';
+	stream->obj_ = obj;
+
+	/* Choose a file name. */
+	tmpnam(obj->pathname);
+
+	/* Open the underlying file. */
+	if ((obj->fd = open(obj->pathname, O_CREAT | O_EXCL | O_RDWR | O_TRUNC | O_BINARY,
+	  JAS_STREAM_PERMS)) < 0) {
+		jas_stream_destroy(stream);
+		return 0;
+	}
+
+	/* Unlink the file so that it will disappear if the program
+	terminates abnormally. */
+	/* Under UNIX, one can unlink an open file and continue to do I/O
+	on it.  Not all operating systems support this functionality, however.
+	For example, under Microsoft Windows the unlink operation will fail,
+	since the file is open. */
+	if (unlink(obj->pathname)) {
+		/* We will try unlinking the file again after it is closed. */
+		obj->flags |= JAS_STREAM_FILEOBJ_DELONCLOSE;
+	}
+
+	/* Use full buffering. */
+	jas_stream_initbuf(stream, JAS_STREAM_FULLBUF, 0, 0);
+
+	stream->ops_ = &jas_stream_fileops;
+
+	return stream;
+}
+
+jas_stream_t *jas_stream_fdopen(int fd, const char *mode)
+{
+	jas_stream_t *stream;
+	jas_stream_fileobj_t *obj;
+
+	/* Allocate a stream object. */
+	if (!(stream = jas_stream_create())) {
+		return 0;
+	}
+
+	/* Parse the mode string. */
+	stream->openmode_ = jas_strtoopenmode(mode);
+
+#if defined(WIN32)
+	/* Argh!!!  Someone ought to banish text mode (i.e., O_TEXT) to the
+	  greatest depths of purgatory! */
+	/* Ensure that the file descriptor is in binary mode, if the caller
+	  has specified the binary mode flag.  Arguably, the caller ought to
+	  take care of this, but text mode is a ugly wart anyways, so we save
+	  the caller some grief by handling this within the stream library. */
+	/* This ugliness is mainly for the benefit of those who run the
+	  JasPer software under Windows from shells that insist on opening
+	  files in text mode.  For example, in the Cygwin environment,
+	  shells often open files in text mode when I/O redirection is
+	  used.  Grr... */
+	if (stream->openmode_ & JAS_STREAM_BINARY) {
+		setmode(fd, O_BINARY);
+	}
+#endif
+
+	/* Allocate space for the underlying file stream object. */
+	if (!(obj = jas_malloc(sizeof(jas_stream_fileobj_t)))) {
+		jas_stream_destroy(stream);
+		return 0;
+	}
+	obj->fd = fd;
+	obj->flags = 0;
+	obj->pathname[0] = '\0';
+	stream->obj_ = (void *) obj;
+
+	/* Do not close the underlying file descriptor when the stream is
+	closed. */
+	obj->flags |= JAS_STREAM_FILEOBJ_NOCLOSE;
+
+	/* By default, use full buffering for this type of stream. */
+	jas_stream_initbuf(stream, JAS_STREAM_FULLBUF, 0, 0);
+
+	/* Select the operations for a file stream object. */
+	stream->ops_ = &jas_stream_fileops;
+
+	return stream;
+}
+
+static void jas_stream_destroy(jas_stream_t *stream)
+{
+	/* If the memory for the buffer was allocated with malloc, free
+	this memory. */
+	if ((stream->bufmode_ & JAS_STREAM_FREEBUF) && stream->bufbase_) {
+		jas_free(stream->bufbase_);
+		stream->bufbase_ = 0;
+	}
+	jas_free(stream);
+}
+
+int jas_stream_close(jas_stream_t *stream)
+{
+	/* Flush buffer if necessary. */
+	jas_stream_flush(stream);
+
+	/* Close the underlying stream object. */
+	(*stream->ops_->close_)(stream->obj_);
+
+	jas_stream_destroy(stream);
+
+	return 0;
+}
+
+/******************************************************************************\
+* Code for reading and writing streams.
+\******************************************************************************/
+
+int jas_stream_getc_func(jas_stream_t *stream)
+{
+	assert(stream->ptr_ - stream->bufbase_ <= stream->bufsize_ +
+	  JAS_STREAM_MAXPUTBACK);
+	return jas_stream_getc_macro(stream);
+}
+
+int jas_stream_putc_func(jas_stream_t *stream, int c)
+{
+	assert(stream->ptr_ - stream->bufstart_ <= stream->bufsize_);
+	return jas_stream_putc_macro(stream, c);
+}
+
+int jas_stream_ungetc(jas_stream_t *stream, int c)
+{
+	if (!stream->ptr_ || stream->ptr_ == stream->bufbase_) {
+		return -1;
+	}
+
+	/* Reset the EOF indicator (since we now have at least one character
+	  to read). */
+	stream->flags_ &= ~JAS_STREAM_EOF;
+
+	--stream->rwcnt_;
+	--stream->ptr_;
+	++stream->cnt_;
+	*stream->ptr_ = c;
+	return 0;
+}
+
+int jas_stream_read(jas_stream_t *stream, void *buf, int cnt)
+{
+	int n;
+	int c;
+	char *bufptr;
+
+	bufptr = buf;
+
+	n = 0;
+	while (n < cnt) {
+		if ((c = jas_stream_getc(stream)) == EOF) {
+			return n;
+		}
+		*bufptr++ = c;
+		++n;
+	}
+
+	return n;
+}
+
+int jas_stream_write(jas_stream_t *stream, const void *buf, int cnt)
+{
+	int n;
+	const char *bufptr;
+
+	bufptr = buf;
+
+	n = 0;
+	while (n < cnt) {
+		if (jas_stream_putc(stream, *bufptr) == EOF) {
+			return n;
+		}
+		++bufptr;
+		++n;
+	}
+
+	return n;
+}
+
+/* Note: This function uses a fixed size buffer.  Therefore, it cannot
+  handle invocations that will produce more output than can be held
+  by the buffer. */
+int jas_stream_printf(jas_stream_t *stream, const char *fmt, ...)
+{
+	va_list ap;
+	char buf[4096];
+	int ret;
+
+	va_start(ap, fmt);
+	ret = vsprintf(buf, fmt, ap);
+	jas_stream_puts(stream, buf);
+	va_end(ap);
+	return ret;
+}
+
+int jas_stream_puts(jas_stream_t *stream, const char *s)
+{
+	while (*s != '\0') {
+		if (jas_stream_putc_macro(stream, *s) == EOF) {
+			return -1;
+		}
+		++s;
+	}
+	return 0;
+}
+
+char *jas_stream_gets(jas_stream_t *stream, char *buf, int bufsize)
+{
+	int c;
+	char *bufptr;
+	assert(bufsize > 0);
+
+	bufptr = buf;
+	while (bufsize > 1) {
+		if ((c = jas_stream_getc(stream)) == EOF) {
+			break;
+		}
+		*bufptr++ = c;
+		--bufsize;
+		if (c == '\n') {
+			break;
+		}
+	}
+	*bufptr = '\0';
+	return buf;
+}
+
+int jas_stream_gobble(jas_stream_t *stream, int n)
+{
+	int m;
+	m = n;
+	for (m = n; m > 0; --m) {
+		if (jas_stream_getc(stream) == EOF) {
+			return n - m;
+		}
+	}
+	return n;
+}
+
+int jas_stream_pad(jas_stream_t *stream, int n, int c)
+{
+	int m;
+	m = n;
+	for (m = n; m > 0; --m) {
+		if (jas_stream_putc(stream, c) == EOF)
+			return n - m;
+	}
+	return n;
+}
+
+/******************************************************************************\
+* Code for getting and setting the stream position.
+\******************************************************************************/
+
+int jas_stream_isseekable(jas_stream_t *stream)
+{
+	if (stream->ops_ == &jas_stream_memops) {
+		return 1;
+	} else if (stream->ops_ == &jas_stream_fileops) {
+		if ((*stream->ops_->seek_)(stream->obj_, 0, SEEK_CUR) < 0) {
+			return 0;
+		}
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
+int jas_stream_rewind(jas_stream_t *stream)
+{
+	return jas_stream_seek(stream, 0, SEEK_SET);
+}
+
+long jas_stream_seek(jas_stream_t *stream, long offset, int origin)
+{
+	long newpos;
+
+	/* The buffer cannot be in use for both reading and writing. */
+	assert(!((stream->bufmode_ & JAS_STREAM_RDBUF) && (stream->bufmode_ &
+	  JAS_STREAM_WRBUF)));
+
+	/* Reset the EOF indicator (since we may not be at the EOF anymore). */
+	stream->flags_ &= ~JAS_STREAM_EOF;
+
+	if (stream->bufmode_ & JAS_STREAM_RDBUF) {
+		if (origin == SEEK_CUR) {
+			offset -= stream->cnt_;
+		}
+	} else if (stream->bufmode_ & JAS_STREAM_WRBUF) {
+		if (jas_stream_flush(stream)) {
+			return -1;
+		}
+	}
+	stream->cnt_ = 0;
+	stream->ptr_ = stream->bufstart_;
+	stream->bufmode_ &= ~(JAS_STREAM_RDBUF | JAS_STREAM_WRBUF);
+
+	if ((newpos = (*stream->ops_->seek_)(stream->obj_, offset, origin))
+	  < 0) {
+		return -1;
+	}
+
+	return newpos;
+}
+
+long jas_stream_tell(jas_stream_t *stream)
+{
+	int adjust;
+	int offset;
+
+	if (stream->bufmode_ & JAS_STREAM_RDBUF) {
+		adjust = -stream->cnt_;
+	} else if (stream->bufmode_ & JAS_STREAM_WRBUF) {
+		adjust = stream->ptr_ - stream->bufstart_;
+	} else {
+		adjust = 0;
+	}
+
+	if ((offset = (*stream->ops_->seek_)(stream->obj_, 0, SEEK_CUR)) < 0) {
+		return -1;
+	}
+
+	return offset + adjust;
+}
+
+/******************************************************************************\
+* Buffer initialization code.
+\******************************************************************************/
+
+void jas_stream_initbuf(jas_stream_t *stream, int bufmode, char *buf, /* IMLIB - removed static, so it can be used in jas_binfile.c */
+  int bufsize)
+{
+	/* If this function is being called, the buffer should not have been
+	  initialized yet. */
+	assert(!stream->bufbase_);
+
+	if (bufmode != JAS_STREAM_UNBUF) {
+		/* The full- or line-buffered mode is being employed. */
+		if (!buf) {
+			/* The caller has not specified a buffer to employ, so allocate
+			  one. */
+			if ((stream->bufbase_ = jas_malloc(JAS_STREAM_BUFSIZE +
+			  JAS_STREAM_MAXPUTBACK))) {
+				stream->bufmode_ |= JAS_STREAM_FREEBUF;
+				stream->bufsize_ = JAS_STREAM_BUFSIZE;
+			} else {
+				/* The buffer allocation has failed.  Resort to unbuffered
+				  operation. */
+				stream->bufbase_ = stream->tinybuf_;
+				stream->bufsize_ = 1;
+			}
+		} else {
+			/* The caller has specified a buffer to employ. */
+			/* The buffer must be large enough to accommodate maximum
+			  putback. */
+			assert(bufsize > JAS_STREAM_MAXPUTBACK);
+			stream->bufbase_ = JAS_CAST(uchar *, buf);
+			stream->bufsize_ = bufsize - JAS_STREAM_MAXPUTBACK;
+		}
+	} else {
+		/* The unbuffered mode is being employed. */
+		/* A buffer should not have been supplied by the caller. */
+		assert(!buf);
+		/* Use a trivial one-character buffer. */
+		stream->bufbase_ = stream->tinybuf_;
+		stream->bufsize_ = 1;
+	}
+	stream->bufstart_ = &stream->bufbase_[JAS_STREAM_MAXPUTBACK];
+	stream->ptr_ = stream->bufstart_;
+	stream->cnt_ = 0;
+	stream->bufmode_ |= bufmode & JAS_STREAM_BUFMODEMASK;
+}
+
+/******************************************************************************\
+* Buffer filling and flushing code.
+\******************************************************************************/
+
+int jas_stream_flush(jas_stream_t *stream)
+{
+	if (stream->bufmode_ & JAS_STREAM_RDBUF) {
+		return 0;
+	}
+	return jas_stream_flushbuf(stream, EOF);
+}
+
+int jas_stream_fillbuf(jas_stream_t *stream, int getflag)
+{
+	int c;
+
+	/* The stream must not be in an error or EOF state. */
+	if ((stream->flags_ & (JAS_STREAM_ERRMASK)) != 0) {
+		return EOF;
+	}
+
+	/* The stream must be open for reading. */
+	if ((stream->openmode_ & JAS_STREAM_READ) == 0) {
+		return EOF;
+	}
+
+	/* Make a half-hearted attempt to confirm that the buffer is not
+	currently being used for writing.  This check is not intended
+	to be foolproof! */
+	assert((stream->bufmode_ & JAS_STREAM_WRBUF) == 0);
+
+	assert(stream->ptr_ - stream->bufstart_ <= stream->bufsize_);
+
+	/* Mark the buffer as being used for reading. */
+	stream->bufmode_ |= JAS_STREAM_RDBUF;
+
+	/* Read new data into the buffer. */
+	stream->ptr_ = stream->bufstart_;
+	if ((stream->cnt_ = (*stream->ops_->read_)(stream->obj_,
+	  (char *) stream->bufstart_, stream->bufsize_)) <= 0) {
+		if (stream->cnt_ < 0) {
+			stream->flags_ |= JAS_STREAM_ERR;
+		} else {
+			stream->flags_ |= JAS_STREAM_EOF;
+		}
+		stream->cnt_ = 0;
+		return EOF;
+	}
+
+	assert(stream->cnt_ > 0);
+	/* Get or peek at the first character in the buffer. */
+	c = (getflag) ? jas_stream_getc2(stream) : (*stream->ptr_);
+
+	return c;
+}
+
+int jas_stream_flushbuf(jas_stream_t *stream, int c)
+{
+	int len;
+	int n;
+
+	/* The stream should not be in an error or EOF state. */
+	if ((stream->flags_ & (JAS_STREAM_ERRMASK)) != 0) {
+		return EOF;
+	}
+
+	/* The stream must be open for writing. */
+	if ((stream->openmode_ & (JAS_STREAM_WRITE | JAS_STREAM_APPEND)) == 0) {
+		return EOF;
+	}
+
+	/* The buffer should not currently be in use for reading. */
+	assert(!(stream->bufmode_ & JAS_STREAM_RDBUF));
+
+	/* Note: Do not use the quantity stream->cnt to determine the number
+	of characters in the buffer!  Depending on how this function was
+	called, the stream->cnt value may be "off-by-one". */
+	len = stream->ptr_ - stream->bufstart_;
+	if (len > 0) {
+		n = (*stream->ops_->write_)(stream->obj_, (char *)
+		  stream->bufstart_, len);
+		if (n != len) {
+			stream->flags_ |= JAS_STREAM_ERR;
+			return EOF;
+		}
+	}
+	stream->cnt_ = stream->bufsize_;
+	stream->ptr_ = stream->bufstart_;
+
+	stream->bufmode_ |= JAS_STREAM_WRBUF;
+
+	if (c != EOF) {
+		assert(stream->cnt_ > 0);
+		return jas_stream_putc2(stream, c);
+	}
+
+	return 0;
+}
+
+/******************************************************************************\
+* Miscellaneous code.
+\******************************************************************************/
+
+static int jas_strtoopenmode(const char *s)
+{
+	int openmode = 0;
+	while (*s != '\0') {
+		switch (*s) {
+		case 'r':
+			openmode |= JAS_STREAM_READ;
+			break;
+		case 'w':
+			openmode |= JAS_STREAM_WRITE | JAS_STREAM_CREATE;
+			break;
+		case 'b':
+			openmode |= JAS_STREAM_BINARY;
+			break;
+		case 'a':
+			openmode |= JAS_STREAM_APPEND;
+			break;
+		case '+':
+			openmode |= JAS_STREAM_READ | JAS_STREAM_WRITE;
+			break;
+		default:
+			break;
+		}
+		++s;
+	}
+	return openmode;
+}
+
+int jas_stream_copy(jas_stream_t *out, jas_stream_t *in, int n)
+{
+	int all;
+	int c;
+	int m;
+
+	all = (n < 0) ? 1 : 0;
+
+	m = n;
+	while (all || m > 0) {
+		if ((c = jas_stream_getc_macro(in)) == EOF) {
+			/* The next character of input could not be read. */
+			/* Return with an error if an I/O error occured
+			  (not including EOF) or if an explicit copy count
+			  was specified. */
+			return (!all || jas_stream_error(in)) ? (-1) : 0;
+		}
+		if (jas_stream_putc_macro(out, c) == EOF) {
+			return -1;
+		}
+		--m;
+	}
+	return 0;
+}
+
+long jas_stream_setrwcount(jas_stream_t *stream, long rwcnt)
+{
+	int old;
+
+	old = stream->rwcnt_;
+	stream->rwcnt_ = rwcnt;
+	return old;
+}
+
+int jas_stream_display(jas_stream_t *stream, FILE *fp, int n)
+{
+	unsigned char buf[16];
+	int i;
+	int j;
+	int m;
+	int c;
+	int display;
+	int cnt;
+
+	cnt = n - (n % 16);
+	display = 1;
+
+	for (i = 0; i < n; i += 16) {
+		if (n > 16 && i > 0) {
+			display = (i >= cnt) ? 1 : 0;
+		}
+		if (display) {
+			fprintf(fp, "%08x:", i);
+		}
+		m = JAS_MIN(n - i, 16);
+		for (j = 0; j < m; ++j) {
+			if ((c = jas_stream_getc(stream)) == EOF) {
+				abort();
+				return -1;
+			}
+			buf[j] = c;
+		}
+		if (display) {
+			for (j = 0; j < m; ++j) {
+				fprintf(fp, " %02x", buf[j]);
+			}
+			fputc(' ', fp);
+			for (; j < 16; ++j) {
+				fprintf(fp, "   ");
+			}
+			for (j = 0; j < m; ++j) {
+				if (isprint(buf[j])) {
+					fputc(buf[j], fp);
+				} else {
+					fputc(' ', fp);
+				}
+			}
+			fprintf(fp, "\n");
+		}
+
+
+	}
+	return 0;
+}
+
+long jas_stream_length(jas_stream_t *stream)
+{
+	long oldpos;
+	long pos;
+	if ((oldpos = jas_stream_tell(stream)) < 0) {
+		return -1;
+	}
+	if (jas_stream_seek(stream, 0, SEEK_END) < 0) {
+		return -1;
+	}
+	if ((pos = jas_stream_tell(stream)) < 0) {
+		return -1;
+	}
+	if (jas_stream_seek(stream, oldpos, SEEK_SET) < 0) {
+		return -1;
+	}
+	return pos;
+}
+
+/******************************************************************************\
+* Memory stream object.
+\******************************************************************************/
+
+static int mem_read(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+	int n;
+	jas_stream_memobj_t *m = (jas_stream_memobj_t *)obj;
+	n = m->len_ - m->pos_;
+	cnt = JAS_MIN(n, cnt);
+	memcpy(buf, &m->buf_[m->pos_], cnt);
+	m->pos_ += cnt;
+	return cnt;
+}
+
+static int mem_resize(jas_stream_memobj_t *m, int bufsize)
+{
+	unsigned char *buf;
+
+	assert(m->buf_);
+	if (!(buf = jas_realloc(m->buf_, bufsize * sizeof(unsigned char)))) {
+		return -1;
+	}
+	m->buf_ = buf;
+	m->bufsize_ = bufsize;
+	return 0;
+}
+
+static int mem_write(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+	int n;
+	int ret;
+	jas_stream_memobj_t *m = (jas_stream_memobj_t *)obj;
+	long newbufsize;
+	long newpos;
+
+	newpos = m->pos_ + cnt;
+	if (newpos > m->bufsize_ && m->growable_) {
+		newbufsize = m->bufsize_;
+		while (newbufsize < newpos) {
+			newbufsize <<= 1;
+			assert(newbufsize >= 0);
+		}
+		if (mem_resize(m, newbufsize)) {
+			return -1;
+		}
+	}
+	if (m->pos_ > m->len_) {
+		/* The current position is beyond the end of the file, so
+		  pad the file to the current position with zeros. */
+		n = JAS_MIN(m->pos_, m->bufsize_) - m->len_;
+		if (n > 0) {
+			memset(&m->buf_[m->len_], 0, n);
+			m->len_ += n;
+		}
+		if (m->pos_ != m->len_) {
+			/* The buffer is not big enough. */
+			return 0;
+		}
+	}
+	n = m->bufsize_ - m->pos_;
+	ret = JAS_MIN(n, cnt);
+	if (ret > 0) {
+		memcpy(&m->buf_[m->pos_], buf, ret);
+		m->pos_ += ret;
+	}
+	if (m->pos_ > m->len_) {
+		m->len_ = m->pos_;
+	}
+assert(ret == cnt);
+	return ret;
+}
+
+static long mem_seek(jas_stream_obj_t *obj, long offset, int origin)
+{
+	jas_stream_memobj_t *m = (jas_stream_memobj_t *)obj;
+	long newpos;
+
+	switch (origin) {
+	case SEEK_SET:
+		newpos = offset;
+		break;
+	case SEEK_END:
+		newpos = m->len_ - offset;
+		break;
+	case SEEK_CUR:
+		newpos = m->pos_ + offset;
+		break;
+	default:
+		abort();
+		break;
+	}
+	if (newpos < 0) {
+		return -1;
+	}
+	m->pos_ = newpos;
+
+	return m->pos_;
+}
+
+static int mem_close(jas_stream_obj_t *obj)
+{
+	jas_stream_memobj_t *m = (jas_stream_memobj_t *)obj;
+	if (m->myalloc_ && m->buf_) {
+		jas_free(m->buf_);
+		m->buf_ = 0;
+	}
+	jas_free(obj);
+	return 0;
+}
+
+/******************************************************************************\
+* File stream object.
+\******************************************************************************/
+
+static int file_read(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+	jas_stream_fileobj_t *fileobj = JAS_CAST(jas_stream_fileobj_t *, obj);
+	return read(fileobj->fd, buf, cnt);
+}
+
+static int file_write(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+	jas_stream_fileobj_t *fileobj = JAS_CAST(jas_stream_fileobj_t *, obj);
+	return write(fileobj->fd, buf, cnt);
+}
+
+static long file_seek(jas_stream_obj_t *obj, long offset, int origin)
+{
+	jas_stream_fileobj_t *fileobj = JAS_CAST(jas_stream_fileobj_t *, obj);
+	return lseek(fileobj->fd, offset, origin);
+}
+
+static int file_close(jas_stream_obj_t *obj)
+{
+	jas_stream_fileobj_t *fileobj = JAS_CAST(jas_stream_fileobj_t *, obj);
+	int ret;
+	ret = close(fileobj->fd);
+	if (fileobj->flags & JAS_STREAM_FILEOBJ_DELONCLOSE) {
+		unlink(fileobj->pathname);
+	}
+	jas_free(fileobj);
+	return ret;
+}
+
+/******************************************************************************\
+* Stdio file stream object.
+\******************************************************************************/
+
+static int sfile_read(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+	FILE *fp;
+	fp = JAS_CAST(FILE *, obj);
+	return fread(buf, 1, cnt, fp);
+}
+
+static int sfile_write(jas_stream_obj_t *obj, char *buf, int cnt)
+{
+	FILE *fp;
+	fp = JAS_CAST(FILE *, obj);
+	return fwrite(buf, 1, cnt, fp);
+}
+
+static long sfile_seek(jas_stream_obj_t *obj, long offset, int origin)
+{
+	FILE *fp;
+	fp = JAS_CAST(FILE *, obj);
+	return fseek(fp, offset, origin);
+}
+
+static int sfile_close(jas_stream_obj_t *obj)
+{
+	FILE *fp;
+	fp = JAS_CAST(FILE *, obj);
+	return fclose(fp);
+}
diff --git a/src/libjasper/base/jas_string.c b/src/libjasper/base/jas_string.c
new file mode 100644
index 0000000..787e703
--- /dev/null
+++ b/src/libjasper/base/jas_string.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * String Library
+ *
+ * $Id: jas_string.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes
+\******************************************************************************/
+
+#include <string.h>
+
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_string.h"
+
+/******************************************************************************\
+* Miscellaneous Functions
+\******************************************************************************/
+
+/* This function is equivalent to the popular but non-standard (and
+  not-always-available) strdup function. */
+
+char *jas_strdup(const char *s)
+{
+	int n;
+	char *p;
+	n = strlen(s) + 1;
+	if (!(p = jas_malloc(n * sizeof(char)))) {
+		return 0;
+	}
+	strcpy(p, s);
+	return p;
+}
diff --git a/src/libjasper/base/jas_tmr.c b/src/libjasper/base/jas_tmr.c
new file mode 100644
index 0000000..942a083
--- /dev/null
+++ b/src/libjasper/base/jas_tmr.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2004 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Timing Routines
+ *
+ * $Id: jas_tmr.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "jasper/jas_tmr.h"
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+#if defined(HAVE_GETTIMEOFDAY)
+
+void jas_tmr_start(jas_tmr_t *tmr)
+{
+	if (gettimeofday(&tmr->start, 0)) {
+		abort();
+	}
+}
+
+void jas_tmr_stop(jas_tmr_t *tmr)
+{
+	if (gettimeofday(&tmr->stop, 0)) {
+		abort();
+	}
+}
+
+double jas_tmr_get(jas_tmr_t *tmr)
+{
+	double t0;
+	double t1;
+	t0 = ((double) tmr->start.tv_sec) + ((double) tmr->start.tv_usec) / 1e6;
+	t1 = ((double) tmr->stop.tv_sec) + ((double) tmr->stop.tv_usec) / 1e6;
+	return t1 - t0;
+}
+
+#elif defined(HAVE_GETRUSAGE)
+
+void jas_tmr_start(jas_tmr_t *tmr)
+{
+	if (getrusage(RUSAGE_SELF, &tmr->start) < 0) {
+		abort();
+	}
+}
+
+void jas_tmr_stop(jas_tmr_t *tmr)
+{
+	if (getrusage(RUSAGE_SELF, &tmr->stop) < 0) {
+		abort();
+	}
+}
+
+double jas_tmr_get(jas_tmr_t *tmr)
+{
+	double t;
+	t = ((tmr->stop.ru_utime.tv_sec * 1e6 + tmr->stop.ru_utime.tv_usec) -
+	  (tmr->start.ru_utime.tv_sec * 1e6 + tmr->start.ru_utime.tv_usec)) / 1e6;
+	t += ((tmr->stop.ru_stime.tv_sec * 1e6 + tmr->stop.ru_stime.tv_usec) -
+	  (tmr->start.ru_stime.tv_sec * 1e6 + tmr->start.ru_stime.tv_usec)) / 1e6;
+	return t;
+}
+
+#else
+
+void jas_tmr_start(jas_tmr_t *tmr)
+{
+}
+
+void jas_tmr_stop(jas_tmr_t *tmr)
+{
+}
+
+double jas_tmr_get(jas_tmr_t *tmr)
+{
+	return 0.0;
+}
+
+#endif
+
diff --git a/src/libjasper/base/jas_tvp.c b/src/libjasper/base/jas_tvp.c
new file mode 100644
index 0000000..4c23476
--- /dev/null
+++ b/src/libjasper/base/jas_tvp.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tag-Value Parser Library
+ *
+ * $Id: jas_tvp.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_string.h"
+#include "jasper/jas_tvp.h"
+
+/******************************************************************************\
+* Macros.
+\******************************************************************************/
+
+/* Is the specified character valid for a tag name? */
+#define	JAS_TVP_ISTAG(x) \
+	(isalpha(x) || (x) == '_' || isdigit(x))
+
+/******************************************************************************\
+* Code for creating and destroying a tag-value parser.
+\******************************************************************************/
+
+jas_tvparser_t *jas_tvparser_create(const char *s)
+{
+	jas_tvparser_t *tvp;
+	if (!(tvp = jas_malloc(sizeof(jas_tvparser_t)))) {
+		return 0;
+	}
+	if (!(tvp->buf = jas_strdup(s))) {
+		jas_tvparser_destroy(tvp);
+		return 0;
+	}
+	tvp->pos = tvp->buf;
+	tvp->tag = 0;
+	tvp->val = 0;
+	return tvp;
+}
+
+void jas_tvparser_destroy(jas_tvparser_t *tvp)
+{
+	if (tvp->buf) {
+		jas_free(tvp->buf);
+	}
+	jas_free(tvp);
+}
+
+/******************************************************************************\
+* Main parsing code.
+\******************************************************************************/
+
+/* Get the next tag-value pair. */
+int jas_tvparser_next(jas_tvparser_t *tvp)
+{
+	char *p;
+	char *tag;
+	char *val;
+
+	/* Skip any leading whitespace. */
+	p = tvp->pos;
+	while (*p != '\0' && isspace(*p)) {
+		++p;
+	}
+
+	/* Has the end of the input data been reached? */
+	if (*p == '\0') {
+		/* No more tags are present. */
+		tvp->pos = p;
+		return 1;
+	}
+
+	/* Does the tag name begin with a valid character? */
+	if (!JAS_TVP_ISTAG(*p)) {
+		return -1;
+	}
+
+	/* Remember where the tag name begins. */
+	tag = p;
+
+	/* Find the end of the tag name. */
+	while (*p != '\0' && JAS_TVP_ISTAG(*p)) {
+		++p;
+	}
+
+	/* Has the end of the input data been reached? */
+	if (*p == '\0') {
+		/* The value field is empty. */
+		tvp->tag = tag;
+		tvp->val = "";
+		tvp->pos = p;
+		return 0;
+	}
+
+	/* Is a value field not present? */
+	if (*p != '=') {
+		if (*p != '\0' && !isspace(*p)) {
+			return -1;
+		}
+		*p++ = '\0';
+		tvp->tag = tag;
+		tvp->val = "";
+		tvp->pos = p;
+		return 0;
+	}
+
+	*p++ = '\0';
+
+	val = p;
+	while (*p != '\0' && !isspace(*p)) {
+		++p;
+	}
+
+	if (*p != '\0') {
+		*p++ = '\0';
+	}
+
+	tvp->pos = p;
+	tvp->tag = tag;
+	tvp->val = val;
+
+	return 0;
+}
+
+/******************************************************************************\
+* Code for querying the current tag/value.
+\******************************************************************************/
+
+/* Get the current tag. */
+char *jas_tvparser_gettag(jas_tvparser_t *tvp)
+{
+	return tvp->tag;
+}
+
+/* Get the current value. */
+char *jas_tvparser_getval(jas_tvparser_t *tvp)
+{
+	return tvp->val;
+}
+
+/******************************************************************************\
+* Miscellaneous code.
+\******************************************************************************/
+
+/* Lookup a tag by name. */
+jas_taginfo_t *jas_taginfos_lookup(jas_taginfo_t *taginfos, const char *name)
+{
+	jas_taginfo_t *taginfo;
+	taginfo = taginfos;
+	while (taginfo->id >= 0) {
+		if (!strcmp(taginfo->name, name)) {
+			return taginfo;
+		}
+		++taginfo;
+	}
+	return 0;
+}
+
+/* This function is simply for convenience. */
+/* One can avoid testing for the special case of a null pointer, by
+  using this function.   This function never returns a null pointer.  */
+jas_taginfo_t *jas_taginfo_nonull(jas_taginfo_t *taginfo)
+{
+	static jas_taginfo_t invalidtaginfo = {
+		-1, 0
+	};
+	
+	return taginfo ? taginfo : &invalidtaginfo;
+}
diff --git a/src/libjasper/base/jas_version.c b/src/libjasper/base/jas_version.c
new file mode 100644
index 0000000..f56f253
--- /dev/null
+++ b/src/libjasper/base/jas_version.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#include "jasper/jas_version.h"
+
+const char *jas_getversion()
+{
+	return JAS_VERSION;
+}
diff --git a/src/libjasper/jasper/jas_cm.h b/src/libjasper/jasper/jas_cm.h
new file mode 100644
index 0000000..f9ad8cc
--- /dev/null
+++ b/src/libjasper/jasper/jas_cm.h
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2002-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Color Management
+ *
+ * $Id: jas_cm.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_CM_H
+#define JAS_CM_H
+
+#include <jasper/jas_config.h>
+#include <jasper/jas_icc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int jas_clrspc_t;
+
+/* transform operations */
+#define	JAS_CMXFORM_OP_FWD	0
+#define	JAS_CMXFORM_OP_REV	1
+#define	JAS_CMXFORM_OP_PROOF	2
+#define	JAS_CMXFORM_OP_GAMUT	3
+
+/* rendering intents */
+#define	JAS_CMXFORM_INTENT_PER		0
+#define	JAS_CMXFORM_INTENT_RELCLR	1
+#define	JAS_CMXFORM_INTENT_ABSCLR	2
+#define	JAS_CMXFORM_INTENT_SAT		3
+#define	JAS_CMXFORM_NUMINTENTS		4
+
+#define	JAS_CMXFORM_OPTM_SPEED	0
+#define JAS_CMXFORM_OPTM_SIZE	1
+#define	JAS_CMXFORM_OPTM_ACC	2
+
+
+#define	jas_clrspc_create(fam, mbr)	(((fam) << 8) | (mbr))
+#define	jas_clrspc_fam(clrspc)	((clrspc) >> 8)
+#define	jas_clrspc_mbr(clrspc)	((clrspc) & 0xff)
+#define	jas_clrspc_isgeneric(clrspc)	(!jas_clrspc_mbr(clrspc))
+#define	jas_clrspc_isunknown(clrspc)	((clrspc) & JAS_CLRSPC_UNKNOWNMASK)
+
+#define	JAS_CLRSPC_UNKNOWNMASK	0x4000
+
+/* color space families */
+#define	JAS_CLRSPC_FAM_UNKNOWN	0
+#define	JAS_CLRSPC_FAM_XYZ	1
+#define	JAS_CLRSPC_FAM_LAB	2
+#define	JAS_CLRSPC_FAM_GRAY	3
+#define	JAS_CLRSPC_FAM_RGB	4
+#define	JAS_CLRSPC_FAM_YCBCR	5
+
+/* specific color spaces */
+#define	JAS_CLRSPC_UNKNOWN	JAS_CLRSPC_UNKNOWNMASK
+#define	JAS_CLRSPC_CIEXYZ	jas_clrspc_create(JAS_CLRSPC_FAM_XYZ, 1)
+#define	JAS_CLRSPC_CIELAB	jas_clrspc_create(JAS_CLRSPC_FAM_LAB, 1)
+#define	JAS_CLRSPC_SGRAY	jas_clrspc_create(JAS_CLRSPC_FAM_GRAY, 1)
+#define	JAS_CLRSPC_SRGB		jas_clrspc_create(JAS_CLRSPC_FAM_RGB, 1)
+#define	JAS_CLRSPC_SYCBCR	jas_clrspc_create(JAS_CLRSPC_FAM_YCBCR, 1)
+
+/* generic color spaces */
+#define	JAS_CLRSPC_GENRGB	jas_clrspc_create(JAS_CLRSPC_FAM_RGB, 0)
+#define	JAS_CLRSPC_GENGRAY	jas_clrspc_create(JAS_CLRSPC_FAM_GRAY, 0)
+#define	JAS_CLRSPC_GENYCBCR	jas_clrspc_create(JAS_CLRSPC_FAM_YCBCR, 0)
+
+#define	JAS_CLRSPC_CHANIND_YCBCR_Y	0
+#define	JAS_CLRSPC_CHANIND_YCBCR_CB	1
+#define	JAS_CLRSPC_CHANIND_YCBCR_CR	2
+
+#define	JAS_CLRSPC_CHANIND_RGB_R	0
+#define	JAS_CLRSPC_CHANIND_RGB_G	1
+#define	JAS_CLRSPC_CHANIND_RGB_B	2
+
+#define	JAS_CLRSPC_CHANIND_GRAY_Y	0
+
+typedef double jas_cmreal_t;
+
+struct jas_cmpxform_s;
+
+typedef struct {
+	long *buf;
+	int prec;
+	int sgnd;
+	int width;
+	int height;
+} jas_cmcmptfmt_t;
+
+typedef struct {
+	int numcmpts;
+	jas_cmcmptfmt_t *cmptfmts;
+} jas_cmpixmap_t;
+
+typedef struct {
+	void (*destroy)(struct jas_cmpxform_s *pxform);
+	int (*apply)(struct jas_cmpxform_s *pxform, jas_cmreal_t *in, jas_cmreal_t *out, int cnt);
+	void (*dump)(struct jas_cmpxform_s *pxform);
+} jas_cmpxformops_t;
+
+typedef struct {
+	jas_cmreal_t *data;
+	int size;
+} jas_cmshapmatlut_t;
+
+typedef struct {
+	int mono;
+	int order;
+	int useluts;
+	int usemat;
+	jas_cmshapmatlut_t luts[3];
+	jas_cmreal_t mat[3][4];
+} jas_cmshapmat_t;
+
+typedef struct {
+	int order;
+} jas_cmshaplut_t;
+
+typedef struct {
+	int inclrspc;
+	int outclrspc;
+} jas_cmclrspcconv_t;
+
+#define	jas_align_t	double
+
+typedef struct jas_cmpxform_s {
+	int refcnt;
+	jas_cmpxformops_t *ops;
+	int numinchans;
+	int numoutchans;
+	union {
+		jas_align_t dummy;
+		jas_cmshapmat_t shapmat;
+		jas_cmshaplut_t shaplut;
+		jas_cmclrspcconv_t clrspcconv;
+	} data;
+} jas_cmpxform_t;
+
+typedef struct {
+	int numpxforms;
+	int maxpxforms;
+	jas_cmpxform_t **pxforms;
+} jas_cmpxformseq_t;
+
+typedef struct {
+	int numinchans;
+	int numoutchans;
+	jas_cmpxformseq_t *pxformseq;
+} jas_cmxform_t;
+
+#define	JAS_CMPROF_TYPE_DEV	1
+#define	JAS_CMPROF_TYPE_CLRSPC	2
+
+#define	JAS_CMPROF_NUMPXFORMSEQS	13
+
+typedef struct {
+	int clrspc;
+	int numchans;
+	int refclrspc;
+	int numrefchans;
+	jas_iccprof_t *iccprof;
+	jas_cmpxformseq_t *pxformseqs[JAS_CMPROF_NUMPXFORMSEQS];
+} jas_cmprof_t;
+
+/* Create a profile. */
+
+/* Destroy a profile. */
+void jas_cmprof_destroy(jas_cmprof_t *prof);
+
+#if 0
+typedef int_fast32_t jas_cmattrname_t;
+typedef int_fast32_t jas_cmattrval_t;
+typedef int_fast32_t jas_cmattrtype_t;
+/* Load a profile. */
+int jas_cmprof_load(jas_cmprof_t *prof, jas_stream_t *in, int fmt);
+/* Save a profile. */
+int jas_cmprof_save(jas_cmprof_t *prof, jas_stream_t *out, int fmt);
+/* Set an attribute of a profile. */
+int jas_cm_prof_setattr(jas_cm_prof_t *prof, jas_cm_attrname_t name, void *val);
+/* Get an attribute of a profile. */
+void *jas_cm_prof_getattr(jas_cm_prof_t *prof, jas_cm_attrname_t name);
+#endif
+
+jas_cmxform_t *jas_cmxform_create(jas_cmprof_t *inprof, jas_cmprof_t *outprof,
+  jas_cmprof_t *proofprof, int op, int intent, int optimize);
+
+void jas_cmxform_destroy(jas_cmxform_t *xform);
+
+/* Apply a transform to data. */
+int jas_cmxform_apply(jas_cmxform_t *xform, jas_cmpixmap_t *in,
+  jas_cmpixmap_t *out);
+
+int jas_cxform_optimize(jas_cmxform_t *xform, int optimize);
+
+int jas_clrspc_numchans(int clrspc);
+jas_cmprof_t *jas_cmprof_createfromiccprof(jas_iccprof_t *iccprof);
+jas_cmprof_t *jas_cmprof_createfromclrspc(int clrspc);
+jas_iccprof_t *jas_iccprof_createfromcmprof(jas_cmprof_t *prof);
+
+#define	jas_cmprof_clrspc(prof) ((prof)->clrspc)
+jas_cmprof_t *jas_cmprof_copy(jas_cmprof_t *prof);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_config.h b/src/libjasper/jasper/jas_config.h
new file mode 100644
index 0000000..e58ed6d
--- /dev/null
+++ b/src/libjasper/jasper/jas_config.h
@@ -0,0 +1,32 @@
+/* IMLIB - jas_config.h for IM */
+
+/* Avoid problems due to multiple inclusion. */
+#ifndef JAS_CONFIG_H
+#define JAS_CONFIG_H
+
+/* This preprocessor symbol identifies the version of JasPer. */
+#define	JAS_VERSION "1.900.1"
+
+#define HAVE_FCNTL_H     1
+#define HAVE_LIMITS_H    1
+#define HAVE_STDLIB_H    1
+#define HAVE_STDDEF_H    1
+#define HAVE_STRING_H    1
+#define HAVE_MEMORY_H    1
+#define HAVE_SYS_TYPES_H 1
+
+/* #define HAVE_UNISTD_H 1 (must control this in the makefile) */
+
+/* #define JAS_CONFIGURE 0  to include some definitions in "jas_types.h" ifdef WIN32 */
+
+#ifdef JAS_TYPES
+typedef unsigned long ulong;
+typedef unsigned char uchar;
+#endif
+
+#if !defined(WIN32)     /* These will be defined in "jas_types.h" ifdef WIN32 */
+#define longlong long long
+#define ulonglong unsigned long long
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_debug.h b/src/libjasper/jasper/jas_debug.h
new file mode 100644
index 0000000..32a7258
--- /dev/null
+++ b/src/libjasper/jasper/jas_debug.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Debugging-Related Code
+ *
+ * $Id: jas_debug.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_DEBUG_H
+#define JAS_DEBUG_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+
+#include <jasper/jas_config.h>
+#include "jasper/jas_types.h"
+#include "jasper/jas_debug.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Macros and functions.
+\******************************************************************************/
+
+/* Output debugging information to standard error provided that the debug
+  level is set sufficiently high. */
+#if defined(DEBUG)
+#define	JAS_DBGLOG(n, x) \
+	((jas_getdbglevel() >= (n)) ? (jas_eprintf x) : 0)
+#else
+#define	JAS_DBGLOG(n, x)
+#endif
+
+/* Get the library debug level. */
+int jas_getdbglevel(void);
+
+/* Set the library debug level. */
+int jas_setdbglevel(int dbglevel);
+
+/* Perform formatted output to standard error. */
+int jas_eprintf(const char *fmt, ...);
+
+/* Dump memory to a stream. */
+int jas_memdump(FILE *out, void *data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_fix.h b/src/libjasper/jasper/jas_fix.h
new file mode 100644
index 0000000..b700940
--- /dev/null
+++ b/src/libjasper/jasper/jas_fix.h
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Fixed-Point Number Class
+ *
+ * $Id: jas_fix.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_FIX_H
+#define JAS_FIX_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include <jasper/jas_config.h>
+#include <jasper/jas_types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* The representation of the value zero. */
+#define	JAS_FIX_ZERO(fix_t, fracbits) \
+	JAS_CAST(fix_t, 0)
+
+/* The representation of the value one. */
+#define	JAS_FIX_ONE(fix_t, fracbits) \
+	(JAS_CAST(fix_t, 1) << (fracbits))
+
+/* The representation of the value one half. */
+#define	JAS_FIX_HALF(fix_t, fracbits) \
+	(JAS_CAST(fix_t, 1) << ((fracbits) - 1))
+
+/******************************************************************************\
+* Conversion operations.
+\******************************************************************************/
+
+/* Convert an int to a fixed-point number. */
+#define JAS_INTTOFIX(fix_t, fracbits, x) \
+	JAS_CAST(fix_t, (x) << (fracbits))
+
+/* Convert a fixed-point number to an int. */
+#define JAS_FIXTOINT(fix_t, fracbits, x) \
+	JAS_CAST(int, (x) >> (fracbits))
+
+/* Convert a fixed-point number to a double. */
+#define JAS_FIXTODBL(fix_t, fracbits, x) \
+	(JAS_CAST(double, x) / (JAS_CAST(fix_t, 1) << (fracbits)))
+
+/* Convert a double to a fixed-point number. */
+#define JAS_DBLTOFIX(fix_t, fracbits, x) \
+	JAS_CAST(fix_t, ((x) * JAS_CAST(double, JAS_CAST(fix_t, 1) << (fracbits))))
+
+/******************************************************************************\
+* Basic arithmetic operations.
+* All other arithmetic operations are synthesized from these basic operations.
+* There are three macros for each type of arithmetic operation.
+* One macro always performs overflow/underflow checking, one never performs
+* overflow/underflow checking, and one is generic with its behavior
+* depending on compile-time flags.
+* Only the generic macros should be invoked directly by application code.
+\******************************************************************************/
+
+/* Calculate the sum of two fixed-point numbers. */
+#if !defined(DEBUG_OVERFLOW)
+#define JAS_FIX_ADD			JAS_FIX_ADD_FAST
+#else
+#define JAS_FIX_ADD			JAS_FIX_ADD_OFLOW
+#endif
+
+/* Calculate the sum of two fixed-point numbers without overflow checking. */
+#define	JAS_FIX_ADD_FAST(fix_t, fracbits, x, y)	((x) + (y))
+
+/* Calculate the sum of two fixed-point numbers with overflow checking. */
+#define	JAS_FIX_ADD_OFLOW(fix_t, fracbits, x, y) \
+	((x) >= 0) ? \
+	  (((y) >= 0) ? ((x) + (y) >= 0 || JAS_FIX_OFLOW(), (x) + (y)) : \
+	  ((x) + (y))) : \
+	  (((y) >= 0) ? ((x) + (y)) : ((x) + (y) < 0 || JAS_FIX_OFLOW(), \
+	  (x) + (y)))
+
+/* Calculate the product of two fixed-point numbers. */
+#if !defined(DEBUG_OVERFLOW)
+#define JAS_FIX_MUL			JAS_FIX_MUL_FAST
+#else
+#define JAS_FIX_MUL			JAS_FIX_MUL_OFLOW
+#endif
+
+/* Calculate the product of two fixed-point numbers without overflow
+  checking. */
+#define	JAS_FIX_MUL_FAST(fix_t, fracbits, bigfix_t, x, y) \
+	JAS_CAST(fix_t, (JAS_CAST(bigfix_t, x) * JAS_CAST(bigfix_t, y)) >> \
+	  (fracbits))
+
+/* Calculate the product of two fixed-point numbers with overflow
+  checking. */
+#define JAS_FIX_MUL_OFLOW(fix_t, fracbits, bigfix_t, x, y) \
+	((JAS_CAST(bigfix_t, x) * JAS_CAST(bigfix_t, y) >> (fracbits)) == \
+	  JAS_CAST(fix_t, (JAS_CAST(bigfix_t, x) * JAS_CAST(bigfix_t, y) >> \
+	  (fracbits))) ? \
+	  JAS_CAST(fix_t, (JAS_CAST(bigfix_t, x) * JAS_CAST(bigfix_t, y) >> \
+	  (fracbits))) : JAS_FIX_OFLOW())
+
+/* Calculate the product of a fixed-point number and an int. */
+#if !defined(DEBUG_OVERFLOW)
+#define	JAS_FIX_MULBYINT	JAS_FIX_MULBYINT_FAST
+#else
+#define	JAS_FIX_MULBYINT	JAS_FIX_MULBYINT_OFLOW
+#endif
+
+/* Calculate the product of a fixed-point number and an int without overflow
+  checking. */
+#define	JAS_FIX_MULBYINT_FAST(fix_t, fracbits, x, y) \
+	JAS_CAST(fix_t, ((x) * (y)))
+
+/* Calculate the product of a fixed-point number and an int with overflow
+  checking. */
+#define	JAS_FIX_MULBYINT_OFLOW(fix_t, fracbits, x, y) \
+	JAS_FIX_MULBYINT_FAST(fix_t, fracbits, x, y)
+
+/* Calculate the quotient of two fixed-point numbers. */
+#if !defined(DEBUG_OVERFLOW)
+#define JAS_FIX_DIV			JAS_FIX_DIV_FAST
+#else
+#define JAS_FIX_DIV			JAS_FIX_DIV_UFLOW
+#endif
+
+/* Calculate the quotient of two fixed-point numbers without underflow
+  checking. */
+#define	JAS_FIX_DIV_FAST(fix_t, fracbits, bigfix_t, x, y) \
+	JAS_CAST(fix_t, (JAS_CAST(bigfix_t, x) << (fracbits)) / (y))
+
+/* Calculate the quotient of two fixed-point numbers with underflow
+  checking. */
+#define JAS_FIX_DIV_UFLOW(fix_t, fracbits, bigfix_t, x, y) \
+	JAS_FIX_DIV_FAST(fix_t, fracbits, bigfix_t, x, y)
+
+/* Negate a fixed-point number. */
+#if !defined(DEBUG_OVERFLOW)
+#define	JAS_FIX_NEG			JAS_FIX_NEG_FAST
+#else
+#define	JAS_FIX_NEG			JAS_FIX_NEG_OFLOW
+#endif
+
+/* Negate a fixed-point number without overflow checking. */
+#define	JAS_FIX_NEG_FAST(fix_t, fracbits, x) \
+	(-(x))
+
+/* Negate a fixed-point number with overflow checking. */
+/* Yes, overflow is actually possible for two's complement representations,
+  although highly unlikely to occur. */
+#define	JAS_FIX_NEG_OFLOW(fix_t, fracbits, x) \
+	(((x) < 0) ? (-(x) > 0 || JAS_FIX_OFLOW(), -(x)) : (-(x)))
+
+/* Perform an arithmetic shift left of a fixed-point number. */
+#if !defined(DEBUG_OVERFLOW)
+#define	JAS_FIX_ASL			JAS_FIX_ASL_FAST
+#else
+#define	JAS_FIX_ASL			JAS_FIX_ASL_OFLOW
+#endif
+
+/* Perform an arithmetic shift left of a fixed-point number without overflow
+  checking. */
+#define	JAS_FIX_ASL_FAST(fix_t, fracbits, x, n) \
+	((x) << (n))
+
+/* Perform an arithmetic shift left of a fixed-point number with overflow
+  checking. */
+#define	JAS_FIX_ASL_OFLOW(fix_t, fracbits, x, n) \
+	((((x) << (n)) >> (n)) == (x) || JAS_FIX_OFLOW(), (x) << (n))
+
+/* Perform an arithmetic shift right of a fixed-point number. */
+#if !defined(DEBUG_OVERFLOW)
+#define	JAS_FIX_ASR			JAS_FIX_ASR_FAST
+#else
+#define	JAS_FIX_ASR			JAS_FIX_ASR_UFLOW
+#endif
+
+/* Perform an arithmetic shift right of a fixed-point number without underflow
+  checking. */
+#define	JAS_FIX_ASR_FAST(fix_t, fracbits, x, n) \
+	((x) >> (n))
+
+/* Perform an arithmetic shift right of a fixed-point number with underflow
+  checking. */
+#define	JAS_FIX_ASR_UFLOW(fix_t, fracbits, x, n) \
+	JAS_FIX_ASR_FAST(fix_t, fracbits, x, n)
+
+/******************************************************************************\
+* Other basic arithmetic operations.
+\******************************************************************************/
+
+/* Calculate the difference between two fixed-point numbers. */
+#define JAS_FIX_SUB(fix_t, fracbits, x, y) \
+	JAS_FIX_ADD(fix_t, fracbits, x, JAS_FIX_NEG(fix_t, fracbits, y))
+
+/* Add one fixed-point number to another. */
+#define JAS_FIX_PLUSEQ(fix_t, fracbits, x, y) \
+	((x) = JAS_FIX_ADD(fix_t, fracbits, x, y))
+
+/* Subtract one fixed-point number from another. */
+#define JAS_FIX_MINUSEQ(fix_t, fracbits, x, y) \
+	((x) = JAS_FIX_SUB(fix_t, fracbits, x, y))
+
+/* Multiply one fixed-point number by another. */
+#define	JAS_FIX_MULEQ(fix_t, fracbits, bigfix_t, x, y) \
+	((x) = JAS_FIX_MUL(fix_t, fracbits, bigfix_t, x, y))
+
+/******************************************************************************\
+* Miscellaneous operations.
+\******************************************************************************/
+
+/* Calculate the absolute value of a fixed-point number. */
+#define	JAS_FIX_ABS(fix_t, fracbits, x) \
+	(((x) >= 0) ? (x) : (JAS_FIX_NEG(fix_t, fracbits, x)))
+
+/* Is a fixed-point number an integer? */
+#define	JAS_FIX_ISINT(fix_t, fracbits, x) \
+	(JAS_FIX_FLOOR(fix_t, fracbits, x) == (x))
+
+/* Get the sign of a fixed-point number. */
+#define JAS_FIX_SGN(fix_t, fracbits, x) \
+	((x) >= 0 ? 1 : (-1))
+
+/******************************************************************************\
+* Relational operations.
+\******************************************************************************/
+
+/* Compare two fixed-point numbers. */
+#define JAS_FIX_CMP(fix_t, fracbits, x, y) \
+	((x) > (y) ? 1 : (((x) == (y)) ? 0 : (-1)))
+
+/* Less than. */
+#define	JAS_FIX_LT(fix_t, fracbits, x, y) \
+	((x) < (y))
+
+/* Less than or equal. */
+#define	JAS_FIX_LTE(fix_t, fracbits, x, y) \
+	((x) <= (y))
+
+/* Greater than. */
+#define	JAS_FIX_GT(fix_t, fracbits, x, y) \
+	((x) > (y))
+
+/* Greater than or equal. */
+#define	JAS_FIX_GTE(fix_t, fracbits, x, y) \
+	((x) >= (y))
+
+/******************************************************************************\
+* Rounding functions.
+\******************************************************************************/
+
+/* Round a fixed-point number to the nearest integer. */
+#define	JAS_FIX_ROUND(fix_t, fracbits, x) \
+	(((x) < 0) ? JAS_FIX_FLOOR(fix_t, fracbits, JAS_FIX_ADD(fix_t, fracbits, \
+	  (x), JAS_FIX_HALF(fix_t, fracbits))) : \
+	  JAS_FIX_NEG(fix_t, fracbits, JAS_FIX_FLOOR(fix_t, fracbits, \
+	  JAS_FIX_ADD(fix_t, fracbits, (-(x)), JAS_FIX_HALF(fix_t, fracbits)))))
+
+/* Round a fixed-point number to the nearest integer in the direction of
+  negative infinity (i.e., the floor function). */
+#define	JAS_FIX_FLOOR(fix_t, fracbits, x) \
+	((x) & (~((JAS_CAST(fix_t, 1) << (fracbits)) - 1)))
+
+/* Round a fixed-point number to the nearest integer in the direction
+  of zero. */
+#define JAS_FIX_TRUNC(fix_t, fracbits, x) \
+	(((x) >= 0) ? JAS_FIX_FLOOR(fix_t, fracbits, x) : \
+	  JAS_FIX_CEIL(fix_t, fracbits, x))
+
+/******************************************************************************\
+* The below macros are for internal library use only.  Do not invoke them
+* directly in application code.
+\******************************************************************************/
+
+/* Handle overflow. */
+#define	JAS_FIX_OFLOW() \
+	jas_eprintf("overflow error: file %s, line %d\n", __FILE__, __LINE__)
+
+/* Handle underflow. */
+#define	JAS_FIX_UFLOW() \
+	jas_eprintf("underflow error: file %s, line %d\n", __FILE__, __LINE__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_getopt.h b/src/libjasper/jasper/jas_getopt.h
new file mode 100644
index 0000000..dac4a55
--- /dev/null
+++ b/src/libjasper/jasper/jas_getopt.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Command Line Option Parsing Code
+ *
+ * $Id: jas_getopt.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_GETOPT_H
+#define JAS_GETOPT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <jasper/jas_config.h>
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+#define	JAS_GETOPT_EOF	(-1)
+#define	JAS_GETOPT_ERR	'?'
+
+/* option flags. */
+#define	JAS_OPT_HASARG	0x01	/* option has argument */
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* Command line option type. */
+typedef struct {
+
+	int id;
+	/* The unique identifier for this option. */
+
+	char *name;
+	/* The name of this option. */
+
+	int flags;
+	/* option flags. */
+
+} jas_opt_t;
+
+/******************************************************************************\
+* External data.
+\******************************************************************************/
+
+/* The current option index. */
+extern int jas_optind;
+
+/* The current option argument. */
+extern char *jas_optarg;
+
+/* The debug level. */
+extern int jas_opterr;
+
+/******************************************************************************\
+* Prototypes.
+\******************************************************************************/
+
+/* Get the next option. */
+int jas_getopt(int argc, char **argv, jas_opt_t *opts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_icc.h b/src/libjasper/jasper/jas_icc.h
new file mode 100644
index 0000000..616a572
--- /dev/null
+++ b/src/libjasper/jasper/jas_icc.h
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 2002-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef JAS_ICC_H
+#define	JAS_ICC_H
+
+#include <jasper/jas_config.h>
+#include <jasper/jas_types.h>
+#include <jasper/jas_stream.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Profile file signature. */
+#define	JAS_ICC_MAGIC		0x61637370
+
+#define	JAS_ICC_HDRLEN	128
+
+/* Profile/device class signatures. */
+#define	JAS_ICC_CLAS_IN	0x73636e72 /* input device */
+#define	JAS_ICC_CLAS_DPY	0x6d6e7472 /* display device */
+#define	JAS_ICC_CLAS_OUT	0x70727472 /* output device */
+#define	JAS_ICC_CLAS_LNK	0x6c696e6b /* device link */
+#define	JAS_ICC_CLAS_CNV	0x73706163 /* color space conversion */
+#define	JAS_ICC_CLAS_ABS	0x61627374 /* abstract */
+#define	JAS_ICC_CLAS_NAM	0x6e6d636c /* named color */
+
+/* Color space signatures. */
+#define	JAS_ICC_COLORSPC_XYZ	0x58595a20 /* XYZ */
+#define	JAS_ICC_COLORSPC_LAB	0x4c616220 /* LAB */
+#define	JAS_ICC_COLORSPC_LUV	0x4c757620 /* LUV */
+#define	JAS_ICC_COLORSPC_YCBCR	0x59436272 /* YCbCr */
+#define	JAS_ICC_COLORSPC_YXY	0x59787920 /* Yxy */
+#define	JAS_ICC_COLORSPC_RGB	0x52474220 /* RGB */
+#define	JAS_ICC_COLORSPC_GRAY	0x47524159 /* Gray */
+#define	JAS_ICC_COLORSPC_HSV	0x48535620 /* HSV */
+#define	JAS_ICC_COLORSPC_HLS	0x484c5320 /* HLS */
+#define	JAS_ICC_COLORSPC_CMYK	0x434d594b /* CMYK */
+#define	JAS_ICC_COLORSPC_CMY	0x434d5920 /* CMY */
+#define	JAS_ICC_COLORSPC_2	0x32434c52 /* 2 channel color */
+#define	JAS_ICC_COLORSPC_3	0x33434c52 /* 3 channel color */
+#define	JAS_ICC_COLORSPC_4	0x34434c52 /* 4 channel color */
+#define	JAS_ICC_COLORSPC_5	0x35434c52 /* 5 channel color */
+#define	JAS_ICC_COLORSPC_6	0x36434c52 /* 6 channel color */
+#define	JAS_ICC_COLORSPC_7	0x37434c52 /* 7 channel color */
+#define	JAS_ICC_COLORSPC_8	0x38434c52 /* 8 channel color */
+#define	JAS_ICC_COLORSPC_9	0x39434c52 /* 9 channel color */
+#define	JAS_ICC_COLORSPC_10	0x41434c52 /* 10 channel color */
+#define	JAS_ICC_COLORSPC_11	0x42434c52 /* 11 channel color */
+#define	JAS_ICC_COLORSPC_12	0x43434c52 /* 12 channel color */
+#define	JAS_ICC_COLORSPC_13	0x44434c52 /* 13 channel color */
+#define	JAS_ICC_COLORSPC_14	0x45434c52 /* 14 channel color */
+#define	JAS_ICC_COLORSPC_15	0x46434c52 /* 15 channel color */
+
+/* Profile connection color space (PCS) signatures. */
+#define	JAS_ICC_REFCOLORSPC_XYZ		0x58595a20 /* CIE XYZ */
+#define	JAS_ICC_REFCOLORSPC_LAB		0x4c616220 /* CIE Lab */
+
+/* Primary platform signatures. */
+#define	JAS_ICC_PLATFORM_APPL	0x4150504c /* Apple Computer */
+#define	JAS_ICC_PLATFORM_MSFT	0x4d534654 /* Microsoft */
+#define	JAS_ICC_PLATFORM_SGI	0x53474920 /* Silicon Graphics */
+#define	JAS_ICC_PLATFORM_SUNW	0x53554e57 /* Sun Microsystems */
+#define	JAS_ICC_PLATFORM_TGNT	0x54474e54 /* Taligent */
+
+/* Profile flags. */
+#define	JAS_ICC_FLAGS_EMBED	0x01 /* embedded */
+#define	JAS_ICC_FLAGS_NOSEP	0x02 /* no separate use */
+
+/* Attributes. */
+#define	JAS_ICC_ATTR_TRANS	0x01 /* transparent */
+#define	JAS_ICC_ATTR_MATTE	0x02 /* matte */
+
+/* Rendering intents. */
+#define	JAS_ICC_INTENT_PER	0 /* perceptual */
+#define	JAS_ICC_INTENT_REL	1 /* relative colorimetric */
+#define	JAS_ICC_INTENT_SAT	2 /* saturation */
+#define	JAS_ICC_INTENT_ABS	3 /* absolute colorimetric */
+
+/* Tag signatures. */
+#define	JAS_ICC_TAG_ATOB0		0x41324230 /* */
+#define	JAS_ICC_TAG_ATOB1		0x41324231 /* */
+#define	JAS_ICC_TAG_ATOB2		0x41324232 /* */
+#define	JAS_ICC_TAG_BLUMATCOL		0x6258595a /* */
+#define	JAS_ICC_TAG_BLUTRC		0x62545243 /* */
+#define	JAS_ICC_TAG_BTOA0		0x42324130 /* */
+#define	JAS_ICC_TAG_BTOA1		0x42324131 /* */
+#define	JAS_ICC_TAG_BTOA2		0x42324132 /* */
+#define	JAS_ICC_TAG_CALTIME		0x63616c74 /* */
+#define	JAS_ICC_TAG_CHARTARGET		0x74617267 /* */
+#define	JAS_ICC_TAG_CPYRT		0x63707274 /* */
+#define	JAS_ICC_TAG_CRDINFO		0x63726469 /* */
+#define	JAS_ICC_TAG_DEVMAKERDESC	0x646d6e64 /* */
+#define	JAS_ICC_TAG_DEVMODELDESC	0x646d6464 /* */
+#define	JAS_ICC_TAG_DEVSET		0x64657673 /* */
+#define	JAS_ICC_TAG_GAMUT		0x67616d74 /* */
+#define	JAS_ICC_TAG_GRYTRC		0x6b545243 /* */
+#define	JAS_ICC_TAG_GRNMATCOL		0x6758595a /* */
+#define	JAS_ICC_TAG_GRNTRC		0x67545243 /* */
+#define	JAS_ICC_TAG_LUM			0x6c756d69 /* */
+#define	JAS_ICC_TAG_MEASURE		0x6d656173 /* */
+#define	JAS_ICC_TAG_MEDIABLKPT		0x626b7074 /* */
+#define	JAS_ICC_TAG_MEDIAWHIPT		0x77747074 /* */
+#define	JAS_ICC_TAG_NAMCOLR		0x6e636f6c /* */
+#define	JAS_ICC_TAG_NAMCOLR2		0x6e636c32 /* */
+#define	JAS_ICC_TAG_OUTRESP		0x72657370 /* */
+#define	JAS_ICC_TAG_PREVIEW0		0x70726530 /* */
+#define	JAS_ICC_TAG_PREVIEW1		0x70726531 /* */
+#define	JAS_ICC_TAG_PREVIEW2		0x70726532 /* */
+#define	JAS_ICC_TAG_PROFDESC		0x64657363 /* */
+#define	JAS_ICC_TAG_PROFSEQDESC		0x70736571 /* */
+#define	JAS_ICC_TAG_PSDCRD0		0x70736430 /* */
+#define	JAS_ICC_TAG_PSCRDD1		0x70736431 /* */
+#define	JAS_ICC_TAG_PSCRDD2		0x70736432 /* */
+#define	JAS_ICC_TAG_PSCRDD3		0x70736433 /* */
+#define	JAS_ICC_TAG_PS2CSA		0x70733273 /* */
+#define	JAS_ICC_TAG_PS2RENINTENT	0x70733269 /* */
+#define	JAS_ICC_TAG_REDMATCOL		0x7258595a /* */
+#define	JAS_ICC_TAG_REDTRC		0x72545243 /* */
+#define	JAS_ICC_TAG_SCRNGDES		0x73637264 /* */
+#define	JAS_ICC_TAG_SCRNG		0x7363726e /* */
+#define	JAS_ICC_TAG_TECH		0x74656368 /* */
+#define	JAS_ICC_TAG_UCRBG		0x62666420 /* */
+#define	JAS_ICC_TAG_VIEWCONDDESC	0x76756564 /* */
+#define	JAS_ICC_TAG_VIEWCOND		0x76696577 /* */
+
+/* Type signatures. */
+#define	JAS_ICC_TYPE_CRDINFO		0x63726469 /* CRD information */
+#define	JAS_ICC_TYPE_CURV		0x63757276 /* curve */
+#define	JAS_ICC_TYPE_DATA		0x64617461 /* data */
+#define	JAS_ICC_TYPE_TIME		0x6474696d /* date/time */
+#define	JAS_ICC_TYPE_DEVSET		0x64657673 /* device settings */
+#define	JAS_ICC_TYPE_LUT16		0x6d667432 /* */
+#define	JAS_ICC_TYPE_LUT8		0x6d667431 /* */
+#define	JAS_ICC_TYPE_MEASURE		0x6d656173 /* */
+#define	JAS_ICC_TYPE_NAMCOLR		0x6e636f6c /* */
+#define	JAS_ICC_TYPE_NAMCOLR2		0x6e636c32 /* */
+#define	JAS_ICC_TYPE_PROFSEQDESC	0x70736571 /* profile sequence description */
+#define	JAS_ICC_TYPE_RESPCURVSET16	0x72637332 /* response curve set 16 */
+#define	JAS_ICC_TYPE_SF32		0x73663332 /* signed 32-bit fixed-point */
+#define	JAS_ICC_TYPE_SCRNG		0x7363726e /* screening */
+#define	JAS_ICC_TYPE_SIG		0x73696720 /* signature */
+#define	JAS_ICC_TYPE_TXTDESC		0x64657363 /* text description */
+#define	JAS_ICC_TYPE_TXT		0x74657874 /* text */
+#define	JAS_ICC_TYPE_UF32		0x75663332 /* unsigned 32-bit fixed-point */
+#define	JAS_ICC_TYPE_UCRBG		0x62666420 /* */
+#define	JAS_ICC_TYPE_UI16		0x75693136 /* */
+#define	JAS_ICC_TYPE_UI32		0x75693332 /* */
+#define	JAS_ICC_TYPE_UI8		0x75693038 /* */
+#define	JAS_ICC_TYPE_UI64		0x75693634 /* */
+#define	JAS_ICC_TYPE_VIEWCOND		0x76696577 /* */
+#define	JAS_ICC_TYPE_XYZ		0x58595a20 /* XYZ */
+
+typedef uint_fast8_t jas_iccuint8_t;
+typedef uint_fast16_t jas_iccuint16_t;
+typedef uint_fast32_t jas_iccuint32_t;
+typedef int_fast32_t jas_iccsint32_t;
+typedef int_fast32_t jas_iccs15fixed16_t;
+typedef uint_fast32_t jas_iccu16fixed16_t;
+typedef uint_fast64_t jas_iccuint64_t;
+typedef uint_fast32_t jas_iccsig_t;
+
+typedef jas_iccsig_t jas_icctagsig_t;
+typedef jas_iccsig_t jas_icctagtype_t;
+typedef jas_iccsig_t jas_iccattrname_t;
+
+/* Date/time type. */
+typedef struct {
+	jas_iccuint16_t year;
+	jas_iccuint16_t month;
+	jas_iccuint16_t day;
+	jas_iccuint16_t hour;
+	jas_iccuint16_t min;
+	jas_iccuint16_t sec;
+} jas_icctime_t;
+
+/* XYZ type. */
+typedef struct {
+	jas_iccs15fixed16_t x;
+	jas_iccs15fixed16_t y;
+	jas_iccs15fixed16_t z;
+} jas_iccxyz_t;
+
+/* Curve type. */
+typedef struct {
+	jas_iccuint32_t numents;
+	jas_iccuint16_t *ents;
+} jas_icccurv_t;
+
+/* Text description type. */
+typedef struct {
+	jas_iccuint32_t asclen;
+	char *ascdata; /* ASCII invariant description */
+	jas_iccuint32_t uclangcode; /* Unicode language code */
+	jas_iccuint32_t uclen; /* Unicode localizable description count */
+	uchar *ucdata; /* Unicode localizable description */
+	jas_iccuint16_t sccode; /* ScriptCode code */
+	jas_iccuint8_t maclen; /* Localizable Macintosh description count */
+	uchar macdata[69]; /* Localizable Macintosh description */
+} jas_icctxtdesc_t;
+
+/* Text type. */
+typedef struct {
+	char *string;	/* ASCII character string */
+} jas_icctxt_t;
+
+typedef struct {
+	jas_iccuint8_t numinchans;
+	jas_iccuint8_t numoutchans;
+	jas_iccsint32_t e[3][3];
+	jas_iccuint8_t clutlen;
+	jas_iccuint8_t *clut;
+	jas_iccuint16_t numintabents;
+	jas_iccuint8_t **intabs;
+	jas_iccuint8_t *intabsbuf;
+	jas_iccuint16_t numouttabents;
+	jas_iccuint8_t **outtabs;
+	jas_iccuint8_t *outtabsbuf;
+} jas_icclut8_t;
+
+typedef struct {
+	jas_iccuint8_t numinchans;
+	jas_iccuint8_t numoutchans;
+	jas_iccsint32_t e[3][3];
+	jas_iccuint8_t clutlen;
+	jas_iccuint16_t *clut;
+	jas_iccuint16_t numintabents;
+	jas_iccuint16_t **intabs;
+	jas_iccuint16_t *intabsbuf;
+	jas_iccuint16_t numouttabents;
+	jas_iccuint16_t **outtabs;
+	jas_iccuint16_t *outtabsbuf;
+} jas_icclut16_t;
+
+struct jas_iccattrval_s;
+
+typedef struct {
+	void (*destroy)(struct jas_iccattrval_s *);
+	int (*copy)(struct jas_iccattrval_s *, struct jas_iccattrval_s *);
+	int (*input)(struct jas_iccattrval_s *, jas_stream_t *, int);
+	int (*output)(struct jas_iccattrval_s *, jas_stream_t *);
+	int (*getsize)(struct jas_iccattrval_s *);
+	void (*dump)(struct jas_iccattrval_s *, FILE *);
+} jas_iccattrvalops_t;
+
+/* Attribute value type (type and value information). */
+typedef struct jas_iccattrval_s {
+	int refcnt; /* reference count */
+	jas_iccsig_t type; /* type */
+	jas_iccattrvalops_t *ops; /* type-dependent operations */
+	union {
+		jas_iccxyz_t xyz;
+		jas_icccurv_t curv;
+		jas_icctxtdesc_t txtdesc;
+		jas_icctxt_t txt;
+		jas_icclut8_t lut8;
+		jas_icclut16_t lut16;
+	} data; /* value */
+} jas_iccattrval_t;
+
+/* Header type. */
+typedef struct {
+	jas_iccuint32_t size; /* profile size */
+	jas_iccsig_t cmmtype; /* CMM type signature */
+	jas_iccuint32_t version; /* profile version */
+	jas_iccsig_t clas; /* profile/device class signature */
+	jas_iccsig_t colorspc; /* color space of data */
+	jas_iccsig_t refcolorspc; /* profile connection space */
+	jas_icctime_t ctime; /* creation time */
+	jas_iccsig_t magic; /* profile file signature */
+	jas_iccsig_t platform; /* primary platform */
+	jas_iccuint32_t flags; /* profile flags */
+	jas_iccsig_t maker; /* device manufacturer signature */
+	jas_iccsig_t model; /* device model signature */
+	jas_iccuint64_t attr; /* device setup attributes */
+	jas_iccsig_t intent; /* rendering intent */
+	jas_iccxyz_t illum; /* illuminant */
+	jas_iccsig_t creator; /* profile creator signature */
+} jas_icchdr_t;
+
+typedef struct {
+	jas_iccsig_t name;
+	jas_iccattrval_t *val;
+} jas_iccattr_t;
+
+typedef struct {
+	int numattrs;
+	int maxattrs;
+	jas_iccattr_t *attrs;
+} jas_iccattrtab_t;
+
+typedef struct jas_icctagtabent_s {
+	jas_iccuint32_t tag;
+	jas_iccuint32_t off;
+	jas_iccuint32_t len;
+	void *data;
+	struct jas_icctagtabent_s *first;
+} jas_icctagtabent_t;
+
+typedef struct {
+	jas_iccuint32_t numents;
+	jas_icctagtabent_t *ents;
+} jas_icctagtab_t;
+
+/* ICC profile type. */
+typedef struct {
+	jas_icchdr_t hdr;
+	jas_icctagtab_t tagtab;
+	jas_iccattrtab_t *attrtab;
+} jas_iccprof_t;
+
+typedef struct {
+	jas_iccuint32_t type;
+	jas_iccattrvalops_t ops;
+} jas_iccattrvalinfo_t;
+
+jas_iccprof_t *jas_iccprof_load(jas_stream_t *in);
+int jas_iccprof_save(jas_iccprof_t *prof, jas_stream_t *out);
+void jas_iccprof_destroy(jas_iccprof_t *prof);
+jas_iccattrval_t *jas_iccprof_getattr(jas_iccprof_t *prof,
+  jas_iccattrname_t name);
+int jas_iccprof_setattr(jas_iccprof_t *prof, jas_iccattrname_t name,
+  jas_iccattrval_t *val);
+void jas_iccprof_dump(jas_iccprof_t *prof, FILE *out);
+jas_iccprof_t *jas_iccprof_copy(jas_iccprof_t *prof);
+int jas_iccprof_gethdr(jas_iccprof_t *prof, jas_icchdr_t *hdr);
+int jas_iccprof_sethdr(jas_iccprof_t *prof, jas_icchdr_t *hdr);
+
+void jas_iccattrval_destroy(jas_iccattrval_t *attrval);
+void jas_iccattrval_dump(jas_iccattrval_t *attrval, FILE *out);
+int jas_iccattrval_allowmodify(jas_iccattrval_t **attrval);
+jas_iccattrval_t *jas_iccattrval_clone(jas_iccattrval_t *attrval);
+jas_iccattrval_t *jas_iccattrval_create(jas_iccuint32_t type);
+
+void jas_iccattrtab_dump(jas_iccattrtab_t *attrtab, FILE *out);
+
+extern uchar jas_iccprofdata_srgb[];
+extern int jas_iccprofdata_srgblen;
+extern uchar jas_iccprofdata_sgray[];
+extern int jas_iccprofdata_sgraylen;
+jas_iccprof_t *jas_iccprof_createfrombuf(uchar *buf, int len);
+jas_iccprof_t *jas_iccprof_createfromclrspc(int clrspc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_image.h b/src/libjasper/jasper/jas_image.h
new file mode 100644
index 0000000..ad6d80d
--- /dev/null
+++ b/src/libjasper/jasper/jas_image.h
@@ -0,0 +1,617 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+
+ GeoJasper Revision:
+   11/07/2003 15:00 - dima - auxBuffer added for j_image_t : dima <dima@dimin.net>
+   2007-04-23 12:23 - dima - updated for a vector of metadata boxes
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Image Class
+ *
+ * $Id: jas_image.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_IMAGE_H
+#define JAS_IMAGE_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+#include <jasper/jas_stream.h>
+#include <jasper/jas_seq.h>
+#include <jasper/jas_cm.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/*
+ * Miscellaneous constants.
+ */
+
+/* The threshold at which image data is no longer stored in memory. */
+#define JAS_IMAGE_INMEMTHRESH	(16 * 1024 * 1024)
+
+/*
+ * Component types
+ */
+
+#define	JAS_IMAGE_CT_UNKNOWN	0x10000
+#define	JAS_IMAGE_CT_COLOR(n)	((n) & 0x7fff)
+#define	JAS_IMAGE_CT_OPACITY	0x08000
+
+#define	JAS_IMAGE_CT_RGB_R	0
+#define	JAS_IMAGE_CT_RGB_G	1
+#define	JAS_IMAGE_CT_RGB_B	2
+
+#define	JAS_IMAGE_CT_YCBCR_Y	0
+#define	JAS_IMAGE_CT_YCBCR_CB	1
+#define	JAS_IMAGE_CT_YCBCR_CR	2
+
+#define	JAS_IMAGE_CT_GRAY_Y	0
+
+/******************************************************************************\
+* Simple types.
+\******************************************************************************/
+
+/* Image coordinate. */
+typedef int_fast32_t jas_image_coord_t;
+
+/* Color space (e.g., RGB, YCbCr). */
+typedef int_fast16_t jas_image_colorspc_t;
+
+/* Component type (e.g., color, opacity). */
+typedef int_fast32_t jas_image_cmpttype_t;
+
+/* Component sample data format (e.g., real/integer, signedness, precision). */
+typedef int_fast16_t jas_image_smpltype_t;
+
+/******************************************************************************\
+* Image class and supporting classes.
+\******************************************************************************/
+
+/* Image component class. */
+
+typedef struct {
+
+	jas_image_coord_t tlx_;
+	/* The x-coordinate of the top-left corner of the component. */
+
+	jas_image_coord_t tly_;
+	/* The y-coordinate of the top-left corner of the component. */
+
+	jas_image_coord_t hstep_;
+	/* The horizontal sampling period in units of the reference grid. */
+
+	jas_image_coord_t vstep_;
+	/* The vertical sampling period in units of the reference grid. */
+
+	jas_image_coord_t width_;
+	/* The component width in samples. */
+
+	jas_image_coord_t height_;
+	/* The component height in samples. */
+
+#ifdef FIX_ME
+	int smpltype_;
+#else
+	int prec_;
+	/* The precision of the sample data (i.e., the number of bits per
+	sample).  If the samples are signed values, this quantity
+	includes the sign bit. */
+
+	int sgnd_;
+	/* The signedness of the sample data. */
+#endif
+
+	jas_stream_t *stream_;
+	/* The stream containing the component data. */
+
+	int cps_;
+	/* The number of characters per sample in the stream. */
+
+	jas_image_cmpttype_t type_;
+	/* The type of component (e.g., opacity, red, green, blue, luma). */
+
+} jas_image_cmpt_t;
+
+
+// GeoJasper: dima - begin - metadata boxes defs
+// this field was added for aditional information about the image e.g. GeoTiff information
+#define JAS_IMAGE_NUM_BOXES 2
+#define JAS_IMAGE_BOX_GEO 0
+#define JAS_IMAGE_BOX_XMP 1
+
+typedef struct {
+  unsigned char   id[16]; // magic number of the UUID box stored
+  unsigned int    size;   // size of the buffer
+  unsigned char  *buf;    // the buffer wirh the box contents
+} jas_metadata_box_t;
+
+// here we use static array for metadata boxes for robustness, there are only two we eventually use anyways, 
+// in the future this can become dynamic, thus the count variable
+typedef struct {
+  unsigned int       count; // number of boxes, currently always 2
+  jas_metadata_box_t boxes[JAS_IMAGE_NUM_BOXES]; // the vector with box buffers
+} jas_metadata_boxes_t;
+
+jas_metadata_box_t jas_box_init( void );
+bool jas_box_alloc ( jas_metadata_box_t *box, unsigned long size );
+void jas_box_free  ( jas_metadata_box_t *box );
+// GeoJasper: dima - end - metadata boxes defs
+
+// GeoJasper: dima - begin - progress functions
+typedef void (*jas_progress_proc_t) (int done, int total, char *descr);
+typedef int  (*jas_test_abort_proc_t)( void );
+
+void jas_set_progress_proc( jas_progress_proc_t new_proc );
+void jas_set_test_abort_proc( jas_test_abort_proc_t new_proc );
+
+void jas_do_progress( int done, int total, char *descr ); 
+int  jas_test_abort( );
+// GeoJasper: dima - end - progress functions
+
+
+/* Image class. */
+
+typedef struct {
+
+	jas_image_coord_t tlx_;
+	/* The x-coordinate of the top-left corner of the image bounding box. */
+
+	jas_image_coord_t tly_;
+	/* The y-coordinate of the top-left corner of the image bounding box. */
+
+	jas_image_coord_t brx_;
+	/* The x-coordinate of the bottom-right corner of the image bounding
+	  box (plus one). */
+
+	jas_image_coord_t bry_;
+	/* The y-coordinate of the bottom-right corner of the image bounding
+	  box (plus one). */
+
+	int numcmpts_;
+	/* The number of components. */
+
+	int maxcmpts_;
+	/* The maximum number of components that this image can have (i.e., the
+	  allocated size of the components array). */
+
+	jas_image_cmpt_t **cmpts_;
+	/* Per-component information. */
+
+	jas_clrspc_t clrspc_;
+
+	jas_cmprof_t *cmprof_;
+
+	bool inmem_;
+
+  // GeoJasper: dima 
+  // this field here added for aditional information about the image e.g. GeoTiff information
+  jas_metadata_boxes_t metadata;
+  // GeoJasper
+
+} jas_image_t;
+
+/* Component parameters class. */
+/* This data type exists solely/mainly for the purposes of the
+  jas_image_create function. */
+
+typedef struct {
+
+	jas_image_coord_t tlx;
+	/* The x-coordinate of the top-left corner of the component. */
+
+	jas_image_coord_t tly;
+	/* The y-coordinate of the top-left corner of the component. */
+
+	jas_image_coord_t hstep;
+	/* The horizontal sampling period in units of the reference grid. */
+
+	jas_image_coord_t vstep;
+	/* The vertical sampling period in units of the reference grid. */
+
+	jas_image_coord_t width;
+	/* The width of the component in samples. */
+
+	jas_image_coord_t height;
+	/* The height of the component in samples. */
+
+#ifdef FIX_ME
+	int smpltype;
+#else
+	int prec;
+	/* The precision of the component sample data. */
+
+	int sgnd;
+	/* The signedness of the component sample data. */
+#endif
+
+} jas_image_cmptparm_t;
+
+/******************************************************************************\
+* File format related classes.
+\******************************************************************************/
+
+#define	JAS_IMAGE_MAXFMTS	32
+/* The maximum number of image data formats supported. */
+
+/* Image format-dependent operations. */
+
+typedef struct {
+
+	jas_image_t *(*decode)(jas_stream_t *in, char *opts);
+	/* Decode image data from a stream. */
+
+	int (*encode)(jas_image_t *image, jas_stream_t *out, char *opts);
+	/* Encode image data to a stream. */
+
+	int (*validate)(jas_stream_t *in);
+	/* Determine if stream data is in a particular format. */
+
+} jas_image_fmtops_t;
+
+/* Image format information. */
+
+typedef struct {
+
+	int id;
+	/* The ID for this format. */
+
+	char *name;
+	/* The name by which this format is identified. */
+
+	char *ext;
+	/* The file name extension associated with this format. */
+
+	char *desc;
+	/* A brief description of the format. */
+
+	jas_image_fmtops_t ops;
+	/* The operations for this format. */
+
+} jas_image_fmtinfo_t;
+
+/******************************************************************************\
+* Image operations.
+\******************************************************************************/
+
+/* Create an image. */
+jas_image_t *jas_image_create(int numcmpts,
+  jas_image_cmptparm_t *cmptparms, jas_clrspc_t clrspc);
+
+/* Create an "empty" image. */
+jas_image_t *jas_image_create0(void);
+
+/* Clone an image. */
+jas_image_t *jas_image_copy(jas_image_t *image);
+
+/* Deallocate any resources associated with an image. */
+void jas_image_destroy(jas_image_t *image);
+
+/* Get the width of the image in units of the image reference grid. */
+#define jas_image_width(image) \
+	((image)->brx_ - (image)->tlx_)
+
+/* Get the height of the image in units of the image reference grid. */
+#define	jas_image_height(image) \
+	((image)->bry_ - (image)->tly_)
+
+/* Get the x-coordinate of the top-left corner of the image bounding box
+  on the reference grid. */
+#define jas_image_tlx(image) \
+	((image)->tlx_)
+
+/* Get the y-coordinate of the top-left corner of the image bounding box
+  on the reference grid. */
+#define jas_image_tly(image) \
+	((image)->tly_)
+
+/* Get the x-coordinate of the bottom-right corner of the image bounding box
+  on the reference grid (plus one). */
+#define jas_image_brx(image) \
+	((image)->brx_)
+
+/* Get the y-coordinate of the bottom-right corner of the image bounding box
+  on the reference grid (plus one). */
+#define jas_image_bry(image) \
+	((image)->bry_)
+
+/* Get the number of image components. */
+#define	jas_image_numcmpts(image) \
+	((image)->numcmpts_)
+
+/* Get the color model used by the image. */
+#define	jas_image_clrspc(image) \
+	((image)->clrspc_)
+
+/* Set the color model for an image. */
+#define jas_image_setclrspc(image, clrspc) \
+	((image)->clrspc_ = (clrspc))
+
+#define jas_image_cmpttype(image, cmptno) \
+	((image)->cmpts_[(cmptno)]->type_)
+#define jas_image_setcmpttype(image, cmptno, type) \
+	((image)->cmpts_[(cmptno)]->type_ = (type))
+
+/* Get the width of a component. */
+#define	jas_image_cmptwidth(image, cmptno) \
+	((image)->cmpts_[cmptno]->width_)
+
+/* Get the height of a component. */
+#define	jas_image_cmptheight(image, cmptno) \
+	((image)->cmpts_[cmptno]->height_)
+
+/* Get the signedness of the sample data for a component. */
+#define	jas_image_cmptsgnd(image, cmptno) \
+	((image)->cmpts_[cmptno]->sgnd_)
+
+/* Get the precision of the sample data for a component. */
+#define	jas_image_cmptprec(image, cmptno) \
+	((image)->cmpts_[cmptno]->prec_)
+
+/* Get the horizontal subsampling factor for a component. */
+#define	jas_image_cmpthstep(image, cmptno) \
+	((image)->cmpts_[cmptno]->hstep_)
+
+/* Get the vertical subsampling factor for a component. */
+#define	jas_image_cmptvstep(image, cmptno) \
+	((image)->cmpts_[cmptno]->vstep_)
+
+/* Get the x-coordinate of the top-left corner of a component. */
+#define	jas_image_cmpttlx(image, cmptno) \
+	((image)->cmpts_[cmptno]->tlx_)
+
+/* Get the y-coordinate of the top-left corner of a component. */
+#define	jas_image_cmpttly(image, cmptno) \
+	((image)->cmpts_[cmptno]->tly_)
+
+/* Get the x-coordinate of the bottom-right corner of a component
+  (plus "one"). */
+#define	jas_image_cmptbrx(image, cmptno) \
+	((image)->cmpts_[cmptno]->tlx_ + (image)->cmpts_[cmptno]->width_ * \
+	  (image)->cmpts_[cmptno]->hstep_)
+
+/* Get the y-coordinate of the bottom-right corner of a component
+  (plus "one"). */
+#define	jas_image_cmptbry(image, cmptno) \
+	((image)->cmpts_[cmptno]->tly_ + (image)->cmpts_[cmptno]->height_ * \
+	  (image)->cmpts_[cmptno]->vstep_)
+
+/* Get the raw size of an image (i.e., the nominal size of the image without
+  any compression. */
+uint_fast32_t jas_image_rawsize(jas_image_t *image);
+
+/* Create an image from a stream in some specified format. */
+jas_image_t *jas_image_decode(jas_stream_t *in, int fmt, char *optstr);
+
+/* Write an image to a stream in a specified format. */
+int jas_image_encode(jas_image_t *image, jas_stream_t *out, int fmt,
+  char *optstr);
+
+/* Read a rectangular region of an image component. */
+/* The position and size of the rectangular region to be read is specified
+relative to the component's coordinate system. */
+int jas_image_readcmpt(jas_image_t *image, int cmptno,
+  jas_image_coord_t x, jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  jas_matrix_t *data);
+
+/* Write a rectangular region of an image component. */
+int jas_image_writecmpt(jas_image_t *image, int cmptno,
+  jas_image_coord_t x, jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  jas_matrix_t *data);
+
+/* Delete a component from an image. */
+void jas_image_delcmpt(jas_image_t *image, int cmptno);
+
+/* Add a component to an image. */
+int jas_image_addcmpt(jas_image_t *image, int cmptno,
+  jas_image_cmptparm_t *cmptparm);
+
+/* Copy a component from one image to another. */
+int jas_image_copycmpt(jas_image_t *dstimage, int dstcmptno,
+  jas_image_t *srcimage, int srccmptno);
+
+#define	JAS_IMAGE_CDT_GETSGND(dtype) (((dtype) >> 7) & 1)
+#define	JAS_IMAGE_CDT_SETSGND(dtype) (((dtype) & 1) << 7)
+#define	JAS_IMAGE_CDT_GETPREC(dtype) ((dtype) & 0x7f)
+#define	JAS_IMAGE_CDT_SETPREC(dtype) ((dtype) & 0x7f)
+
+#define	jas_image_cmptdtype(image, cmptno) \
+	(JAS_IMAGE_CDT_SETSGND((image)->cmpts_[cmptno]->sgnd_) | JAS_IMAGE_CDT_SETPREC((image)->cmpts_[cmptno]->prec_))
+
+int jas_image_depalettize(jas_image_t *image, int cmptno, int numlutents,
+  int_fast32_t *lutents, int dtype, int newcmptno);
+
+int jas_image_readcmptsample(jas_image_t *image, int cmptno, int x, int y);
+void jas_image_writecmptsample(jas_image_t *image, int cmptno, int x, int y,
+  int_fast32_t v);
+
+int jas_image_getcmptbytype(jas_image_t *image, int ctype);
+
+/******************************************************************************\
+* Image format-related operations.
+\******************************************************************************/
+
+/* Clear the table of image formats. */
+void jas_image_clearfmts(void);
+
+/* Add entry to table of image formats. */
+int jas_image_addfmt(int id, char *name, char *ext, char *desc,
+  jas_image_fmtops_t *ops);
+
+/* Get the ID for the image format with the specified name. */
+int jas_image_strtofmt(char *s);
+
+/* Get the name of the image format with the specified ID. */
+char *jas_image_fmttostr(int fmt);
+
+/* Lookup image format information by the format ID. */
+jas_image_fmtinfo_t *jas_image_lookupfmtbyid(int id);
+
+/* Lookup image format information by the format name. */
+jas_image_fmtinfo_t *jas_image_lookupfmtbyname(const char *name);
+
+/* Guess the format of an image file based on its name. */
+int jas_image_fmtfromname(char *filename);
+
+/* Get the format of image data in a stream. */
+int jas_image_getfmt(jas_stream_t *in);
+
+
+#define	jas_image_cmprof(image)	((image)->cmprof_)
+int jas_image_ishomosamp(jas_image_t *image);
+int jas_image_sampcmpt(jas_image_t *image, int cmptno, int newcmptno,
+  jas_image_coord_t ho, jas_image_coord_t vo, jas_image_coord_t hs,
+  jas_image_coord_t vs, int sgnd, int prec);
+int jas_image_writecmpt2(jas_image_t *image, int cmptno, jas_image_coord_t x,
+  jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  long *buf);
+int jas_image_readcmpt2(jas_image_t *image, int cmptno, jas_image_coord_t x,
+  jas_image_coord_t y, jas_image_coord_t width, jas_image_coord_t height,
+  long *buf);
+
+#define	jas_image_setcmprof(image, cmprof) ((image)->cmprof_ = cmprof)
+jas_image_t *jas_image_chclrspc(jas_image_t *image, jas_cmprof_t *outprof,
+  int intent);
+void jas_image_dump(jas_image_t *image, FILE *out);
+
+/******************************************************************************\
+* Image format-dependent operations.
+\******************************************************************************/
+
+#if !defined(EXCLUDE_JPG_SUPPORT)
+/* Format-dependent operations for JPG support. */
+jas_image_t *jpg_decode(jas_stream_t *in, char *optstr);
+int jpg_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int jpg_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_MIF_SUPPORT)
+/* Format-dependent operations for MIF support. */
+jas_image_t *mif_decode(jas_stream_t *in, char *optstr);
+int mif_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int mif_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_PNM_SUPPORT)
+/* Format-dependent operations for PNM support. */
+jas_image_t *pnm_decode(jas_stream_t *in, char *optstr);
+int pnm_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int pnm_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_RAS_SUPPORT)
+/* Format-dependent operations for Sun Rasterfile support. */
+jas_image_t *ras_decode(jas_stream_t *in, char *optstr);
+int ras_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int ras_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_BMP_SUPPORT)
+/* Format-dependent operations for BMP support. */
+jas_image_t *bmp_decode(jas_stream_t *in, char *optstr);
+int bmp_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int bmp_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_JP2_SUPPORT)
+/* Format-dependent operations for JP2 support. */
+jas_image_t *jp2_decode(jas_stream_t *in, char *optstr);
+int jp2_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int jp2_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_JPC_SUPPORT)
+/* Format-dependent operations for JPEG-2000 code stream support. */
+jas_image_t *jpc_decode(jas_stream_t *in, char *optstr);
+int jpc_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int jpc_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_PGX_SUPPORT)
+/* Format-dependent operations for PGX support. */
+jas_image_t *pgx_decode(jas_stream_t *in, char *optstr);
+int pgx_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int pgx_validate(jas_stream_t *in);
+#endif
+
+#if !defined(EXCLUDE_TIFF_SUPPORT)
+/* Format-dependent operations for TIFF support. */
+jas_image_t *tiff_decode(jas_stream_t *in, char *optstr);
+int tiff_encode(jas_image_t *image, jas_stream_t *out, char *optstr);
+int tiff_validate(jas_stream_t *in);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_init.h b/src/libjasper/jasper/jas_init.h
new file mode 100644
index 0000000..10301e9
--- /dev/null
+++ b/src/libjasper/jasper/jas_init.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef JAS_INIT_H
+#define JAS_INIT_H
+
+#include <jasper/jas_config.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+int jas_init(void);
+
+void jas_cleanup(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_malloc.h b/src/libjasper/jasper/jas_malloc.h
new file mode 100644
index 0000000..e1449e4
--- /dev/null
+++ b/src/libjasper/jasper/jas_malloc.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Memory Allocator
+ *
+ * $Id: jas_malloc.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_MALLOC_H
+#define JAS_MALLOC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Hack follows...
+\******************************************************************************/
+
+#if defined(DEBUG_MEMALLOC)
+/* This is somewhat of a hack, but it's a useful hack. :-) */
+/* Use my own custom memory allocator for debugging. */
+#include "../../../../local/src/memalloc.h"
+#define jas_malloc	MEMALLOC
+#define	jas_free	MEMFREE
+#define	jas_realloc	MEMREALLOC
+#define	jas_calloc	MEMCALLOC
+#endif
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+#if !defined(DEBUG_MEMALLOC)
+
+/* Allocate memory. */
+void *jas_malloc(size_t size);
+
+/* Free memory. */
+void jas_free(void *ptr);
+
+/* Resize a block of allocated memory. */
+void *jas_realloc(void *ptr, size_t size);
+
+/* Allocate a block of memory and initialize the contents to zero. */
+void *jas_calloc(size_t nmemb, size_t size);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_math.h b/src/libjasper/jasper/jas_math.h
new file mode 100644
index 0000000..862d396
--- /dev/null
+++ b/src/libjasper/jasper/jas_math.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Math-Related Code
+ *
+ * $Id: jas_math.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef	JAS_MATH_H
+#define	JAS_MATH_H
+
+/******************************************************************************\
+* Includes
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+
+#include	<assert.h>
+#include	<stdio.h>
+#include	<string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Macros
+\******************************************************************************/
+
+/* Compute the absolute value. */
+#define	JAS_ABS(x) \
+	(((x) >= 0) ? (x) : (-(x)))
+
+/* Compute the minimum of two values. */
+#define	JAS_MIN(x, y) \
+	(((x) < (y)) ? (x) : (y))
+
+/* Compute the maximum of two values. */
+#define	JAS_MAX(x, y) \
+	(((x) > (y)) ? (x) : (y))
+
+/* Compute the remainder from division (where division is defined such
+  that the remainder is always nonnegative). */
+#define	JAS_MOD(x, y) \
+	(((x) < 0) ? (((-x) % (y)) ? ((y) - ((-(x)) % (y))) : (0)) : ((x) % (y)))
+
+/* Compute the integer with the specified number of least significant bits
+  set to one. */
+#define	JAS_ONES(n) \
+  ((1 << (n)) - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_seq.h b/src/libjasper/jasper/jas_seq.h
new file mode 100644
index 0000000..fa075e3
--- /dev/null
+++ b/src/libjasper/jasper/jas_seq.h
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Sequence/Matrix Library
+ *
+ * $Id: jas_seq.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_SEQ_H
+#define JAS_SEQ_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+
+#include <jasper/jas_stream.h>
+#include <jasper/jas_types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* This matrix is a reference to another matrix. */
+#define JAS_MATRIX_REF	0x0001
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* An element in a sequence. */
+typedef int_fast32_t jas_seqent_t;
+
+/* An element in a matrix. */
+typedef int_fast32_t jas_matent_t;
+
+/* Matrix. */
+
+typedef struct {
+
+	/* Additional state information. */
+	int flags_;
+
+	/* The starting horizontal index. */
+	int_fast32_t xstart_;
+
+	/* The starting vertical index. */
+	int_fast32_t ystart_;
+
+	/* The ending horizontal index. */
+	int_fast32_t xend_;
+
+	/* The ending vertical index. */
+	int_fast32_t yend_;
+
+	/* The number of rows in the matrix. */
+	int_fast32_t numrows_;
+
+	/* The number of columns in the matrix. */
+	int_fast32_t numcols_;
+
+	/* Pointers to the start of each row. */
+	jas_seqent_t **rows_;
+
+	/* The allocated size of the rows array. */
+	int_fast32_t maxrows_;
+
+	/* The matrix data buffer. */
+	jas_seqent_t *data_;
+
+	/* The allocated size of the data array. */
+	int_fast32_t datasize_;
+
+} jas_matrix_t;
+
+typedef jas_matrix_t jas_seq2d_t;
+typedef jas_matrix_t jas_seq_t;
+
+/******************************************************************************\
+* Functions/macros for matrix class.
+\******************************************************************************/
+
+/* Get the number of rows. */
+#define jas_matrix_numrows(matrix) \
+	((matrix)->numrows_)
+
+/* Get the number of columns. */
+#define jas_matrix_numcols(matrix) \
+	((matrix)->numcols_)
+
+/* Get a matrix element. */
+#define jas_matrix_get(matrix, i, j) \
+	((matrix)->rows_[i][j])
+
+/* Set a matrix element. */
+#define jas_matrix_set(matrix, i, j, v) \
+	((matrix)->rows_[i][j] = (v))
+
+/* Get an element from a matrix that is known to be a row or column vector. */
+#define jas_matrix_getv(matrix, i) \
+	(((matrix)->numrows_ == 1) ? ((matrix)->rows_[0][i]) : \
+	  ((matrix)->rows_[i][0]))
+
+/* Set an element in a matrix that is known to be a row or column vector. */
+#define jas_matrix_setv(matrix, i, v) \
+	(((matrix)->numrows_ == 1) ? ((matrix)->rows_[0][i] = (v)) : \
+	  ((matrix)->rows_[i][0] = (v)))
+
+/* Get the address of an element in a matrix. */
+#define	jas_matrix_getref(matrix, i, j) \
+	(&(matrix)->rows_[i][j])
+
+#define	jas_matrix_getvref(matrix, i) \
+	(((matrix)->numrows_ > 1) ? jas_matrix_getref(matrix, i, 0) : jas_matrix_getref(matrix, 0, i))
+
+#define jas_matrix_length(matrix) \
+	(max((matrix)->numrows_, (matrix)->numcols_))
+
+/* Create a matrix with the specified dimensions. */
+jas_matrix_t *jas_matrix_create(int numrows, int numcols);
+
+/* Destroy a matrix. */
+void jas_matrix_destroy(jas_matrix_t *matrix);
+
+/* Resize a matrix.  The previous contents of the matrix are lost. */
+int jas_matrix_resize(jas_matrix_t *matrix, int numrows, int numcols);
+
+int jas_matrix_output(jas_matrix_t *matrix, FILE *out);
+
+/* Create a matrix that references part of another matrix. */
+void jas_matrix_bindsub(jas_matrix_t *mat0, jas_matrix_t *mat1, int r0, int c0,
+  int r1, int c1);
+
+/* Create a matrix that is a reference to a row of another matrix. */
+#define jas_matrix_bindrow(mat0, mat1, r) \
+  (jas_matrix_bindsub((mat0), (mat1), (r), 0, (r), (mat1)->numcols_ - 1))
+
+/* Create a matrix that is a reference to a column of another matrix. */
+#define jas_matrix_bindcol(mat0, mat1, c) \
+  (jas_matrix_bindsub((mat0), (mat1), 0, (c), (mat1)->numrows_ - 1, (c)))
+
+/* Clip the values of matrix elements to the specified range. */
+void jas_matrix_clip(jas_matrix_t *matrix, jas_seqent_t minval,
+  jas_seqent_t maxval);
+
+/* Arithmetic shift left of all elements in a matrix. */
+void jas_matrix_asl(jas_matrix_t *matrix, int n);
+
+/* Arithmetic shift right of all elements in a matrix. */
+void jas_matrix_asr(jas_matrix_t *matrix, int n);
+
+/* Almost-but-not-quite arithmetic shift right of all elements in a matrix. */
+void jas_matrix_divpow2(jas_matrix_t *matrix, int n);
+
+/* Set all elements of a matrix to the specified value. */
+void jas_matrix_setall(jas_matrix_t *matrix, jas_seqent_t val);
+
+/* The spacing between rows of a matrix. */
+#define	jas_matrix_rowstep(matrix) \
+	(((matrix)->numrows_ > 1) ? ((matrix)->rows_[1] - (matrix)->rows_[0]) : (0))
+
+/* The spacing between columns of a matrix. */
+#define	jas_matrix_step(matrix) \
+	(((matrix)->numrows_ > 1) ? (jas_matrix_rowstep(matrix)) : (1))
+
+/* Compare two matrices for equality. */
+int jas_matrix_cmp(jas_matrix_t *mat0, jas_matrix_t *mat1);
+
+jas_matrix_t *jas_matrix_copy(jas_matrix_t *x);
+
+jas_matrix_t *jas_matrix_input(FILE *);
+
+/******************************************************************************\
+* Functions/macros for 2-D sequence class.
+\******************************************************************************/
+
+jas_seq2d_t *jas_seq2d_copy(jas_seq2d_t *x);
+
+jas_matrix_t *jas_seq2d_create(int xstart, int ystart, int xend, int yend);
+
+#define	jas_seq2d_destroy(s) \
+	jas_matrix_destroy(s)
+
+#define	jas_seq2d_xstart(s) \
+	((s)->xstart_)
+#define	jas_seq2d_ystart(s) \
+	((s)->ystart_)
+#define	jas_seq2d_xend(s) \
+	((s)->xend_)
+#define	jas_seq2d_yend(s) \
+	((s)->yend_)
+#define	jas_seq2d_getref(s, x, y) \
+	(jas_matrix_getref(s, (y) - (s)->ystart_, (x) - (s)->xstart_))
+#define	jas_seq2d_get(s, x, y) \
+	(jas_matrix_get(s, (y) - (s)->ystart_, (x) - (s)->xstart_))
+#define	jas_seq2d_rowstep(s) \
+	jas_matrix_rowstep(s)
+#define	jas_seq2d_width(s) \
+	((s)->xend_ - (s)->xstart_)
+#define	jas_seq2d_height(s) \
+	((s)->yend_ - (s)->ystart_)
+#define	jas_seq2d_setshift(s, x, y) \
+	((s)->xstart_ = (x), (s)->ystart_ = (y), \
+	  (s)->xend_ = (s)->xstart_ + (s)->numcols_, \
+	  (s)->yend_ = (s)->ystart_ + (s)->numrows_)
+
+void jas_seq2d_bindsub(jas_matrix_t *s, jas_matrix_t *s1, int xstart,
+  int ystart, int xend, int yend);
+
+/******************************************************************************\
+* Functions/macros for 1-D sequence class.
+\******************************************************************************/
+
+#define	jas_seq_create(start, end) \
+	(jas_seq2d_create(start, 0, end, 1))
+
+#define	jas_seq_destroy(seq) \
+	(jas_seq2d_destroy(seq))
+
+#define jas_seq_set(seq, i, v) \
+	((seq)->rows_[0][(i) - (seq)->xstart_] = (v))
+#define	jas_seq_getref(seq, i) \
+	(&(seq)->rows_[0][(i) - (seq)->xstart_])
+#define	jas_seq_get(seq, i) \
+	((seq)->rows_[0][(i) - (seq)->xstart_])
+#define	jas_seq_start(seq) \
+	((seq)->xstart_)
+#define	jas_seq_end(seq) \
+	((seq)->xend_)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_stream.h b/src/libjasper/jasper/jas_stream.h
new file mode 100644
index 0000000..6207fde
--- /dev/null
+++ b/src/libjasper/jasper/jas_stream.h
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * I/O Stream Class
+ *
+ * $Id: jas_stream.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_STREAM_H
+#define JAS_STREAM_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+
+#include <stdio.h>
+#if defined(HAVE_FCNTL_H)
+#include <fcntl.h>
+#endif
+#include <string.h>
+#if defined(HAVE_UNISTD_H)
+#include <unistd.h>
+#endif
+#include <jasper/jas_types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* On most UNIX systems, we probably need to define O_BINARY ourselves. */
+#ifndef O_BINARY
+#define O_BINARY	0
+#endif
+
+/*
+ * Stream open flags.
+ */
+
+/* The stream was opened for reading. */
+#define JAS_STREAM_READ	0x0001
+/* The stream was opened for writing. */
+#define JAS_STREAM_WRITE	0x0002
+/* The stream was opened for appending. */
+#define JAS_STREAM_APPEND	0x0004
+/* The stream was opened in binary mode. */
+#define JAS_STREAM_BINARY	0x0008
+/* The stream should be created/truncated. */
+#define JAS_STREAM_CREATE	0x0010
+
+
+/*
+ * Stream buffering flags.
+ */
+
+/* The stream is unbuffered. */
+#define JAS_STREAM_UNBUF	0x0000
+/* The stream is line buffered. */
+#define JAS_STREAM_LINEBUF	0x0001
+/* The stream is fully buffered. */
+#define JAS_STREAM_FULLBUF	0x0002
+/* The buffering mode mask. */
+#define	JAS_STREAM_BUFMODEMASK	0x000f
+
+/* The memory associated with the buffer needs to be deallocated when the
+  stream is destroyed. */
+#define JAS_STREAM_FREEBUF	0x0008
+/* The buffer is currently being used for reading. */
+#define JAS_STREAM_RDBUF	0x0010
+/* The buffer is currently being used for writing. */
+#define JAS_STREAM_WRBUF	0x0020
+
+/*
+ * Stream error flags.
+ */
+
+/* The end-of-file has been encountered (on reading). */
+#define JAS_STREAM_EOF	0x0001
+/* An I/O error has been encountered on the stream. */
+#define JAS_STREAM_ERR	0x0002
+/* The read/write limit has been exceeded. */
+#define	JAS_STREAM_RWLIMIT	0x0004
+/* The error mask. */
+#define JAS_STREAM_ERRMASK \
+	(JAS_STREAM_EOF | JAS_STREAM_ERR | JAS_STREAM_RWLIMIT)
+
+/*
+ * Other miscellaneous constants.
+ */
+
+/* The default buffer size (for fully-buffered operation). */
+#define JAS_STREAM_BUFSIZE	8192
+/* The default permission mask for file creation. */
+#define JAS_STREAM_PERMS	0666
+
+/* The maximum number of characters that can always be put back on a stream. */
+#define	JAS_STREAM_MAXPUTBACK	16
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/*
+ * Generic file object.
+ */
+
+typedef void jas_stream_obj_t;
+
+/*
+ * Generic file object operations.
+ */
+
+typedef struct {
+
+	/* Read characters from a file object. */
+	int (*read_)(jas_stream_obj_t *obj, char *buf, int cnt);
+
+	/* Write characters to a file object. */
+	int (*write_)(jas_stream_obj_t *obj, char *buf, int cnt);
+
+	/* Set the position for a file object. */
+	long (*seek_)(jas_stream_obj_t *obj, long offset, int origin);
+
+	/* Close a file object. */
+	int (*close_)(jas_stream_obj_t *obj);
+
+} jas_stream_ops_t;
+
+/*
+ * Stream object.
+ */
+
+typedef struct {
+
+	/* The mode in which the stream was opened. */
+	int openmode_;
+
+	/* The buffering mode. */
+	int bufmode_;
+
+	/* The stream status. */
+	int flags_;
+
+	/* The start of the buffer area to use for reading/writing. */
+	uchar *bufbase_;
+
+	/* The start of the buffer area excluding the extra initial space for
+	  character putback. */
+	uchar *bufstart_;
+
+	/* The buffer size. */
+	int bufsize_;
+
+	/* The current position in the buffer. */
+	uchar *ptr_;
+
+	/* The number of characters that must be read/written before
+	the buffer needs to be filled/flushed. */
+	int cnt_;
+
+	/* A trivial buffer to be used for unbuffered operation. */
+	uchar tinybuf_[JAS_STREAM_MAXPUTBACK + 1];
+
+	/* The operations for the underlying stream file object. */
+	jas_stream_ops_t *ops_;
+
+	/* The underlying stream file object. */
+	jas_stream_obj_t *obj_;
+
+	/* The number of characters read/written. */
+	long rwcnt_;
+
+	/* The maximum number of characters that may be read/written. */
+	long rwlimit_;
+
+} jas_stream_t;
+
+/*
+ * Regular file object.
+ */
+
+/*
+ * File descriptor file object.
+ */
+#define DIM_MAX_FILE_NAME 2048
+typedef struct {
+	int fd;
+	int flags;
+	//char pathname[L_tmpnam + 1];
+	char pathname[DIM_MAX_FILE_NAME]; // dima: GeoJasper uses this field to store full file name
+} jas_stream_fileobj_t;
+
+#define	JAS_STREAM_FILEOBJ_DELONCLOSE	0x01
+#define JAS_STREAM_FILEOBJ_NOCLOSE	0x02
+
+/*
+ * Memory file object.
+ */
+
+typedef struct {
+
+	/* The data associated with this file. */
+	uchar *buf_;
+
+	/* The allocated size of the buffer for holding file data. */
+	int bufsize_;
+
+	/* The length of the file. */
+	int_fast32_t len_;
+
+	/* The seek position. */
+	int_fast32_t pos_;
+
+	/* Is the buffer growable? */
+	int growable_;
+
+	/* Was the buffer allocated internally? */
+	int myalloc_;
+
+} jas_stream_memobj_t;
+
+/******************************************************************************\
+* Macros/functions for opening and closing streams.
+\******************************************************************************/
+
+/* Open a file as a stream. */
+jas_stream_t *jas_stream_fopen(const char *filename, const char *mode);
+
+/* Open a memory buffer as a stream. */
+jas_stream_t *jas_stream_memopen(char *buf, int bufsize);
+
+/* Open a file descriptor as a stream. */
+jas_stream_t *jas_stream_fdopen(int fd, const char *mode);
+
+/* Open a stdio stream as a stream. */
+jas_stream_t *jas_stream_freopen(const char *path, const char *mode, FILE *fp);
+
+/* Open a temporary file as a stream. */
+jas_stream_t *jas_stream_tmpfile(void);
+
+/* Close a stream. */
+int jas_stream_close(jas_stream_t *stream);
+
+/******************************************************************************\
+* Macros/functions for getting/setting the stream state.
+\******************************************************************************/
+
+/* Get the EOF indicator for a stream. */
+#define jas_stream_eof(stream) \
+	(((stream)->flags_ & JAS_STREAM_EOF) != 0)
+
+/* Get the error indicator for a stream. */
+#define jas_stream_error(stream) \
+	(((stream)->flags_ & JAS_STREAM_ERR) != 0)
+
+/* Clear the error indicator for a stream. */
+#define jas_stream_clearerr(stream) \
+	((stream)->flags_ &= ~(JAS_STREAM_ERR | JAS_STREAM_EOF))
+
+/* Get the read/write limit for a stream. */
+#define	jas_stream_getrwlimit(stream) \
+	(((const jas_stream_t *)(stream))->rwlimit_)
+
+/* Set the read/write limit for a stream. */
+int jas_stream_setrwlimit(jas_stream_t *stream, long rwlimit);
+
+/* Get the read/write count for a stream. */
+#define	jas_stream_getrwcount(stream) \
+	(((const jas_stream_t *)(stream))->rwcnt_)
+
+/* Set the read/write count for a stream. */
+long jas_stream_setrwcount(jas_stream_t *stream, long rwcnt);
+
+/******************************************************************************\
+* Macros/functions for I/O.
+\******************************************************************************/
+
+/* Read a character from a stream. */
+#if defined(DEBUG)
+#define	jas_stream_getc(stream)	jas_stream_getc_func(stream)
+#else
+#define jas_stream_getc(stream)	jas_stream_getc_macro(stream)
+#endif
+
+/* Write a character to a stream. */
+#if defined(DEBUG)
+#define jas_stream_putc(stream, c)	jas_stream_putc_func(stream, c)
+#else
+#define jas_stream_putc(stream, c)	jas_stream_putc_macro(stream, c)
+#endif
+
+/* Read characters from a stream into a buffer. */
+int jas_stream_read(jas_stream_t *stream, void *buf, int cnt);
+
+/* Write characters from a buffer to a stream. */
+int jas_stream_write(jas_stream_t *stream, const void *buf, int cnt);
+
+/* Write formatted output to a stream. */
+int jas_stream_printf(jas_stream_t *stream, const char *fmt, ...);
+
+/* Write a string to a stream. */
+int jas_stream_puts(jas_stream_t *stream, const char *s);
+
+/* Read a line of input from a stream. */
+char *jas_stream_gets(jas_stream_t *stream, char *buf, int bufsize);
+
+/* Look at the next character to be read from a stream without actually
+  removing it from the stream. */
+#define	jas_stream_peekc(stream) \
+	(((stream)->cnt_ <= 0) ? jas_stream_fillbuf(stream, 0) : \
+	  ((int)(*(stream)->ptr_)))
+
+/* Put a character back on a stream. */
+int jas_stream_ungetc(jas_stream_t *stream, int c);
+
+/******************************************************************************\
+* Macros/functions for getting/setting the stream position.
+\******************************************************************************/
+
+/* Is it possible to seek on this stream? */
+int jas_stream_isseekable(jas_stream_t *stream);
+
+/* Set the current position within the stream. */
+long jas_stream_seek(jas_stream_t *stream, long offset, int origin);
+
+/* Get the current position within the stream. */
+long jas_stream_tell(jas_stream_t *stream);
+
+/* Seek to the beginning of a stream. */
+int jas_stream_rewind(jas_stream_t *stream);
+
+/******************************************************************************\
+* Macros/functions for flushing.
+\******************************************************************************/
+
+/* Flush any pending output to a stream. */
+int jas_stream_flush(jas_stream_t *stream);
+
+/******************************************************************************\
+* Miscellaneous macros/functions.
+\******************************************************************************/
+
+/* Copy data from one stream to another. */
+int jas_stream_copy(jas_stream_t *dst, jas_stream_t *src, int n);
+
+/* Display stream contents (for debugging purposes). */
+int jas_stream_display(jas_stream_t *stream, FILE *fp, int n);
+
+/* Consume (i.e., discard) characters from stream. */
+int jas_stream_gobble(jas_stream_t *stream, int n);
+
+/* Write a character multiple times to a stream. */
+int jas_stream_pad(jas_stream_t *stream, int n, int c);
+
+/* Get the size of the file associated with the specified stream.
+  The specified stream must be seekable. */
+long jas_stream_length(jas_stream_t *stream);
+
+/******************************************************************************\
+* Internal functions.
+\******************************************************************************/
+
+/* The following functions are for internal use only!  If you call them
+directly, you will die a horrible, miserable, and painful death! */
+
+/* Read a character from a stream. */
+#define jas_stream_getc_macro(stream) \
+	((!((stream)->flags_ & (JAS_STREAM_ERR | JAS_STREAM_EOF | \
+	  JAS_STREAM_RWLIMIT))) ? \
+	  (((stream)->rwlimit_ >= 0 && (stream)->rwcnt_ >= (stream)->rwlimit_) ? \
+	  (stream->flags_ |= JAS_STREAM_RWLIMIT, EOF) : \
+	  jas_stream_getc2(stream)) : EOF)
+#define jas_stream_getc2(stream) \
+	((--(stream)->cnt_ < 0) ? jas_stream_fillbuf(stream, 1) : \
+	  (++(stream)->rwcnt_, (int)(*(stream)->ptr_++)))
+
+/* Write a character to a stream. */
+#define jas_stream_putc_macro(stream, c) \
+	((!((stream)->flags_ & (JAS_STREAM_ERR | JAS_STREAM_EOF | \
+	  JAS_STREAM_RWLIMIT))) ? \
+	  (((stream)->rwlimit_ >= 0 && (stream)->rwcnt_ >= (stream)->rwlimit_) ? \
+	  (stream->flags_ |= JAS_STREAM_RWLIMIT, EOF) : \
+	  jas_stream_putc2(stream, c)) : EOF)
+#define jas_stream_putc2(stream, c) \
+	(((stream)->bufmode_ |= JAS_STREAM_WRBUF, --(stream)->cnt_ < 0) ? \
+	  jas_stream_flushbuf((stream), (uchar)(c)) : \
+	  (++(stream)->rwcnt_, (int)(*(stream)->ptr_++ = (c))))
+
+/* These prototypes need to be here for the sake of the stream_getc and
+stream_putc macros. */
+int jas_stream_fillbuf(jas_stream_t *stream, int getflag);
+int jas_stream_flushbuf(jas_stream_t *stream, int c);
+int jas_stream_getc_func(jas_stream_t *stream);
+int jas_stream_putc_func(jas_stream_t *stream, int c);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_string.h b/src/libjasper/jasper/jas_string.h
new file mode 100644
index 0000000..366f8e0
--- /dev/null
+++ b/src/libjasper/jasper/jas_string.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * String Library
+ *
+ * $Id: jas_string.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef	JAS_STRING_H
+#define	JAS_STRING_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Copy a string (a la strdup). */
+char *jas_strdup(const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_tmr.h b/src/libjasper/jasper/jas_tmr.h
new file mode 100644
index 0000000..837f2bf
--- /dev/null
+++ b/src/libjasper/jasper/jas_tmr.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2004 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef JAS_TMR_H
+#define JAS_TMR_H
+
+#include<time.h>
+#include <jasper/jas_config.h>
+#if defined(HAVE_SYS_TIME_H)
+#include <sys/time.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(HAVE_GETTIMEOFDAY)
+
+typedef struct {
+	struct timeval start;
+	struct timeval stop;
+} jas_tmr_t;
+
+#elif defined(HAVE_GETRUSAGE)
+
+typedef struct {
+	struct rusage start;
+	struct rusage stop;
+} jas_tmr_t;
+
+#else
+
+typedef int jas_tmr_t;
+
+#endif
+
+void jas_tmr_start(jas_tmr_t *tmr);
+void jas_tmr_stop(jas_tmr_t *tmr);
+double jas_tmr_get(jas_tmr_t *tmr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_tvp.h b/src/libjasper/jasper/jas_tvp.h
new file mode 100644
index 0000000..b70c4bd
--- /dev/null
+++ b/src/libjasper/jasper/jas_tvp.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tag/Value Parser
+ *
+ * $Id: jas_tvp.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_TVP_H
+#define JAS_TVP_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <jasper/jas_config.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* Tag information type. */
+
+typedef struct {
+
+	int id;
+	/* The ID for the tag. */
+
+	char *name;
+	/* The name of the tag. */
+
+} jas_taginfo_t;
+
+/* Tag-value parser type. */
+
+typedef struct {
+
+	char *buf;
+	/* The parsing buffer. */
+
+	char *tag;
+	/* The current tag name. */
+
+	char *val;
+	/* The current value. */
+
+	char *pos;
+	/* The current position in the parsing buffer. */
+
+} jas_tvparser_t;
+
+/******************************************************************************\
+* Tag information functions.
+\******************************************************************************/
+
+/* Lookup a tag by name. */
+jas_taginfo_t *jas_taginfos_lookup(jas_taginfo_t *taginfos, const char *name);
+
+/* This function returns a pointer to the specified taginfo object if it
+  exists (i.e., the pointer is nonnull); otherwise, a pointer to a dummy
+  object is returned.  This is useful in some situations to avoid checking
+  for a null pointer. */
+jas_taginfo_t *jas_taginfo_nonull(jas_taginfo_t *taginfo);
+
+/******************************************************************************\
+* Tag-value parser functions.
+\******************************************************************************/
+
+/* Create a tag-value parser for the specified string. */
+jas_tvparser_t *jas_tvparser_create(const char *s);
+
+/* Destroy a tag-value parser. */
+void jas_tvparser_destroy(jas_tvparser_t *tvparser);
+
+/* Get the next tag-value pair. */
+int jas_tvparser_next(jas_tvparser_t *tvparser);
+
+/* Get the tag name for the current tag-value pair. */
+char *jas_tvparser_gettag(jas_tvparser_t *tvparser);
+
+/* Get the value for the current tag-value pair. */
+char *jas_tvparser_getval(jas_tvparser_t *tvparser);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_types.h b/src/libjasper/jasper/jas_types.h
new file mode 100644
index 0000000..ccd6a26
--- /dev/null
+++ b/src/libjasper/jasper/jas_types.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Primitive Types
+ *
+ * $Id: jas_types.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_TYPES_H
+#define JAS_TYPES_H
+
+#include <jasper/jas_config.h>
+
+#if !defined(JAS_CONFIGURE)
+
+#if defined(WIN32) || defined(HAVE_WINDOWS_H)
+/*
+   We are dealing with Microsoft Windows and most likely Microsoft
+   Visual C (MSVC).  (Heaven help us.)  Sadly, MSVC does not correctly
+   define some of the standard types specified in ISO/IEC 9899:1999.
+   In particular, it does not define the "long long" and "unsigned long
+   long" types.  So, we work around this problem by using the "INT64"
+   and "UINT64" types that are defined in the header file "windows.h".
+ */
+#include <windows.h>
+#undef longlong
+#define	longlong	INT64
+#undef ulonglong
+#define	ulonglong	UINT64
+#endif
+
+#endif
+
+#if defined(HAVE_STDLIB_H)
+#undef false
+#undef true
+#include <stdlib.h>
+#endif
+#if defined(HAVE_STDDEF_H)
+#include <stddef.h>
+#endif
+#if defined(HAVE_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+#ifndef __cplusplus
+#if defined(HAVE_STDBOOL_H)
+/*
+ * The C language implementation does correctly provide the standard header
+ * file "stdbool.h".
+ */
+#include <stdbool.h>
+#else
+
+/*
+ * The C language implementation does not provide the standard header file
+ * "stdbool.h" as required by ISO/IEC 9899:1999.  Try to compensate for this
+ * braindamage below.
+ */
+#if !defined(bool)
+#define	bool	int
+#endif
+#if !defined(true)
+#define true	1
+#endif
+#if !defined(false)
+#define	false	0
+#endif
+#endif
+
+#endif
+
+#if defined(HAVE_STDINT_H)
+/*
+ * The C language implementation does correctly provide the standard header
+ * file "stdint.h".
+ */
+#include <stdint.h>
+#else
+/*
+ * The C language implementation does not provide the standard header file
+ * "stdint.h" as required by ISO/IEC 9899:1999.  Try to compensate for this
+ * braindamage below.
+ */
+#include <limits.h>
+/**********/
+#if !defined(INT_FAST8_MIN)
+typedef signed char int_fast8_t;
+#define INT_FAST8_MIN	(-127)
+#define INT_FAST8_MAX	128
+#endif
+/**********/
+#if !defined(UINT_FAST8_MAX)
+typedef unsigned char uint_fast8_t;
+#define UINT_FAST8_MAX	255
+#endif
+/**********/
+#if !defined(INT_FAST16_MIN)
+typedef short int_fast16_t;
+#define INT_FAST16_MIN	SHRT_MIN
+#define INT_FAST16_MAX	SHRT_MAX
+#endif
+/**********/
+#if !defined(UINT_FAST16_MAX)
+typedef unsigned short uint_fast16_t;
+#define UINT_FAST16_MAX	USHRT_MAX
+#endif
+/**********/
+#if !defined(INT_FAST32_MIN)
+typedef int int_fast32_t;
+#define INT_FAST32_MIN	INT_MIN
+#define INT_FAST32_MAX	INT_MAX
+#endif
+/**********/
+#if !defined(UINT_FAST32_MAX)
+typedef unsigned int uint_fast32_t;
+#define UINT_FAST32_MAX	UINT_MAX
+#endif
+/**********/
+#if !defined(INT_FAST64_MIN)
+typedef longlong int_fast64_t;
+#define INT_FAST64_MIN	LLONG_MIN
+#define INT_FAST64_MAX	LLONG_MAX
+#endif
+/**********/
+#if !defined(UINT_FAST64_MAX)
+typedef ulonglong uint_fast64_t;
+#define UINT_FAST64_MAX	ULLONG_MAX
+#endif
+/**********/
+#endif
+
+/* Hopefully, these macro definitions will fix more problems than they cause. */
+#if !defined(uchar)
+#define uchar unsigned char
+#endif
+#if !defined(ushort)
+#define ushort unsigned short
+#endif
+#if !defined(uint)
+#define uint unsigned int
+#endif
+#if !defined(ulong)
+#define ulong unsigned long
+#endif
+#if !defined(longlong)
+#define longlong long long
+#endif
+#if !defined(ulonglong)
+#define ulonglong unsigned long long
+#endif
+
+/* The below macro is intended to be used for type casts.  By using this
+  macro, type casts can be easily located in the source code with
+  tools like "grep". */
+#define	JAS_CAST(t, e) \
+	((t) (e))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jas_version.h b/src/libjasper/jasper/jas_version.h
new file mode 100644
index 0000000..2c7d5cf
--- /dev/null
+++ b/src/libjasper/jasper/jas_version.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jas_version.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JAS_VERSION_H
+#define JAS_VERSION_H
+
+#include <jasper/jas_config.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************************************************\
+* Constants and types.
+\******************************************************************************/
+
+#if !defined(JAS_VERSION)
+/* The version information below should match that specified in
+  the "configure.in" file! */
+#define	JAS_VERSION		"unknown"
+#endif
+
+#define GJAS_VERSION "1.4.0"
+
+#define	JAS_COPYRIGHT \
+  "GeoJasPer Copyright (c) 2003-2007 Dmitry Fedorov.\n" \
+	"Copyright (c) 2001-2006 Michael David Adams.\n" \
+	"Copyright (c) 1999-2000 Image Power, Inc. and the University of\n" \
+	"  British Columbia.\n" \
+	"All rights reserved.\n"
+
+#define	JAS_NOTES \
+	"For more information about this software, please visit the following\n" \
+	"web sites/pages:\n" \
+  "\nGeoJasper - Geographical enhancements:\n" \
+	"    dimin@dimin.net <http://www.dimin.net>\n" \
+	"\nJPEG-2000 implementation:\n" \
+	"    http://www.ece.uvic.ca/~mdadams/jasper\n" \
+	"    http://www.jpeg.org/software\n"
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+const char *jas_getversion(void);
+/* Get the version information for the JasPer library. */
+/* Note:  Since libjasper can be built as a shared library, the version
+  returned by this function may not necessarily correspond to JAS_VERSION. */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jasper/jasper.h b/src/libjasper/jasper/jasper.h
new file mode 100644
index 0000000..eb74ba0
--- /dev/null
+++ b/src/libjasper/jasper/jasper.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef JAS_JASPER_H
+#define JAS_JASPER_H
+
+#include <jasper/jas_config.h>
+#include <jasper/jas_types.h>
+#include <jasper/jas_version.h>
+
+#include <jasper/jas_init.h>
+#include <jasper/jas_cm.h>
+#include <jasper/jas_icc.h>
+#include <jasper/jas_fix.h>
+#include <jasper/jas_debug.h>
+#include <jasper/jas_getopt.h>
+#include <jasper/jas_image.h>
+#include <jasper/jas_icc.h>
+#include <jasper/jas_math.h>
+#include <jasper/jas_malloc.h>
+#include <jasper/jas_seq.h>
+#include <jasper/jas_stream.h>
+#include <jasper/jas_string.h>
+#include <jasper/jas_tmr.h>
+#include <jasper/jas_tvp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/libjasper/jp2/jp2_cod.c b/src/libjasper/jp2/jp2_cod.c
new file mode 100644
index 0000000..81b01ce
--- /dev/null
+++ b/src/libjasper/jp2/jp2_cod.c
@@ -0,0 +1,980 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+
+ GeoJasper revision: Dima (11/07/2003 17:29 - UUID from j_image_t)
+ Modifications by Andrey Kiselev <dron@remotesensing.org> to handle UUID box properly.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JP2 Library
+ *
+ * $Id: jp2_cod.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "jasper/jas_stream.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_debug.h"
+
+#include "jp2_cod.h"
+
+/******************************************************************************\
+* Function prototypes.
+\******************************************************************************/
+
+#define	ONES(n)	((1 << (n)) - 1)
+
+jp2_boxinfo_t *jp2_boxinfolookup(int type);
+
+static int jp2_getuint8(jas_stream_t *in, uint_fast8_t *val);
+static int jp2_getuint16(jas_stream_t *in, uint_fast16_t *val);
+static int jp2_getuint32(jas_stream_t *in, uint_fast32_t *val);
+static int jp2_getuint64(jas_stream_t *in, uint_fast64_t *val);
+static int jp2_putuint8(jas_stream_t *out, uint_fast8_t val);
+static int jp2_putuint16(jas_stream_t *out, uint_fast16_t val);
+static int jp2_putuint32(jas_stream_t *out, uint_fast32_t val);
+static int jp2_putuint64(jas_stream_t *out, uint_fast64_t val);
+
+static int jp2_getint(jas_stream_t *in, int s, int n, int_fast32_t *val);
+
+jp2_box_t *jp2_box_get(jas_stream_t *in);
+void jp2_box_dump(jp2_box_t *box, FILE *out);
+
+static int jp2_jp_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_jp_putdata(jp2_box_t *box, jas_stream_t *out);
+static int jp2_ftyp_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_ftyp_putdata(jp2_box_t *box, jas_stream_t *out);
+static int jp2_ihdr_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_ihdr_putdata(jp2_box_t *box, jas_stream_t *out);
+static void jp2_bpcc_destroy(jp2_box_t *box);
+static int jp2_bpcc_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_bpcc_putdata(jp2_box_t *box, jas_stream_t *out);
+static int jp2_colr_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_colr_putdata(jp2_box_t *box, jas_stream_t *out);
+static void jp2_colr_dumpdata(jp2_box_t *box, FILE *out);
+static void jp2_colr_destroy(jp2_box_t *box);
+static void jp2_cdef_destroy(jp2_box_t *box);
+static int jp2_cdef_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_cdef_putdata(jp2_box_t *box, jas_stream_t *out);
+static void jp2_cdef_dumpdata(jp2_box_t *box, FILE *out);
+static void jp2_cmap_destroy(jp2_box_t *box);
+static int jp2_cmap_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_cmap_putdata(jp2_box_t *box, jas_stream_t *out);
+static void jp2_cmap_dumpdata(jp2_box_t *box, FILE *out);
+static void jp2_pclr_destroy(jp2_box_t *box);
+static int jp2_pclr_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_pclr_putdata(jp2_box_t *box, jas_stream_t *out);
+static void jp2_pclr_dumpdata(jp2_box_t *box, FILE *out);
+// GeoJasper: dima - uuid
+static void jp2_uuid_destroy(jp2_box_t *box);
+static int jp2_uuid_getdata(jp2_box_t *box, jas_stream_t *in);
+static int jp2_uuid_putdata(jp2_box_t *box, jas_stream_t *out);
+
+
+/******************************************************************************\
+* Local data.
+\******************************************************************************/
+
+jp2_boxinfo_t jp2_boxinfos[] = {
+	{JP2_BOX_JP, "JP", 0,
+	  {0, 0, jp2_jp_getdata, jp2_jp_putdata, 0}},
+	{JP2_BOX_FTYP, "FTYP", 0,
+	  {0, 0, jp2_ftyp_getdata, jp2_ftyp_putdata, 0}},
+	{JP2_BOX_JP2H, "JP2H", JP2_BOX_SUPER,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_IHDR, "IHDR", 0,
+	  {0, 0, jp2_ihdr_getdata, jp2_ihdr_putdata, 0}},
+	{JP2_BOX_BPCC, "BPCC", 0,
+	  {0, jp2_bpcc_destroy, jp2_bpcc_getdata, jp2_bpcc_putdata, 0}},
+	{JP2_BOX_COLR, "COLR", 0,
+	  {0, jp2_colr_destroy, jp2_colr_getdata, jp2_colr_putdata, jp2_colr_dumpdata}},
+	{JP2_BOX_PCLR, "PCLR", 0,
+	  {0, jp2_pclr_destroy, jp2_pclr_getdata, jp2_pclr_putdata, jp2_pclr_dumpdata}},
+	{JP2_BOX_CMAP, "CMAP", 0,
+	  {0, jp2_cmap_destroy, jp2_cmap_getdata, jp2_cmap_putdata, jp2_cmap_dumpdata}},
+	{JP2_BOX_CDEF, "CDEF", 0,
+	  {0, jp2_cdef_destroy, jp2_cdef_getdata, jp2_cdef_putdata, jp2_cdef_dumpdata}},
+	{JP2_BOX_RES, "RES", JP2_BOX_SUPER,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_RESC, "RESC", 0,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_RESD, "RESD", 0,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_JP2C, "JP2C", JP2_BOX_NODATA,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_JP2I, "JP2I", 0,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_XML, "XML", 0,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_UUID, "UUID", 0,
+	  //{0, 0, 0, 0, 0}},
+	  {0, jp2_uuid_destroy, jp2_uuid_getdata, jp2_uuid_putdata, 0}}, // GeoJasper: dima - uuid
+	{JP2_BOX_UINF, "UINF", JP2_BOX_SUPER,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_ULST, "ULST", 0,
+	  {0, 0, 0, 0, 0}},
+	{JP2_BOX_URL, "URL", 0,
+	  {0, 0, 0, 0, 0}},
+	{0, 0, 0, {0, 0, 0, 0, 0}},
+};
+
+jp2_boxinfo_t jp2_boxinfo_unk = {
+	0, "Unknown", 0, {0, 0, 0, 0, 0}
+};
+
+/******************************************************************************\
+* Box constructor.
+\******************************************************************************/
+
+jp2_box_t *jp2_box_create(int type)
+{
+	jp2_box_t *box;
+	jp2_boxinfo_t *boxinfo;
+
+	if (!(box = jas_malloc(sizeof(jp2_box_t)))) {
+		return 0;
+	}
+	memset(box, 0, sizeof(jp2_box_t));
+	box->type = type;
+	box->len = 0;
+	if (!(boxinfo = jp2_boxinfolookup(type))) {
+		return 0;
+	}
+	box->info = boxinfo;
+	box->ops = &boxinfo->ops;
+	return box;
+}
+
+/******************************************************************************\
+* Box destructor.
+\******************************************************************************/
+
+void jp2_box_destroy(jp2_box_t *box)
+{
+	if (box->ops->destroy) {
+		(*box->ops->destroy)(box);
+	}
+	jas_free(box);
+}
+
+static void jp2_bpcc_destroy(jp2_box_t *box)
+{
+	jp2_bpcc_t *bpcc = &box->data.bpcc;
+	if (bpcc->bpcs) {
+		jas_free(bpcc->bpcs);
+		bpcc->bpcs = 0;
+	}
+}
+
+static void jp2_cdef_destroy(jp2_box_t *box)
+{
+	jp2_cdef_t *cdef = &box->data.cdef;
+	if (cdef->ents) {
+		jas_free(cdef->ents);
+		cdef->ents = 0;
+	}
+}
+
+/******************************************************************************\
+* Box input.
+\******************************************************************************/
+
+jp2_box_t *jp2_box_get(jas_stream_t *in)
+{
+	jp2_box_t *box;
+	jp2_boxinfo_t *boxinfo;
+	jas_stream_t *tmpstream;
+	uint_fast32_t len;
+	uint_fast64_t extlen;
+	bool dataflag;
+
+	box = 0;
+	tmpstream = 0;
+
+	if (!(box = jas_malloc(sizeof(jp2_box_t)))) {
+		goto error;
+	}
+	box->ops = &jp2_boxinfo_unk.ops;
+	if (jp2_getuint32(in, &len) || jp2_getuint32(in, &box->type)) {
+		goto error;
+	}
+	boxinfo = jp2_boxinfolookup(box->type);
+	box->info = boxinfo;
+	box->ops = &boxinfo->ops;
+	box->len = len;
+	if (box->len == 1) {
+		if (jp2_getuint64(in, &extlen)) {
+			goto error;
+		}
+		if (extlen > 0xffffffffUL) {
+			jas_eprintf("warning: cannot handle large 64-bit box length\n");
+			extlen = 0xffffffffUL;
+		}
+		box->len = extlen;
+		box->datalen = extlen - JP2_BOX_HDRLEN(true);
+	} else {
+		box->datalen = box->len - JP2_BOX_HDRLEN(false);
+	}
+	if (box->len != 0 && box->len < 8) {
+		goto error;
+	}
+
+	dataflag = !(box->info->flags & (JP2_BOX_SUPER | JP2_BOX_NODATA));
+
+	if (dataflag) {
+		if (!(tmpstream = jas_stream_memopen(0, 0))) {
+			goto error;
+		}
+		if (jas_stream_copy(tmpstream, in, box->datalen)) {
+			jas_eprintf("cannot copy box data\n");
+			goto error;
+		}
+		jas_stream_rewind(tmpstream);
+
+		if (box->ops->getdata) {
+			if ((*box->ops->getdata)(box, tmpstream)) {
+				jas_eprintf("cannot parse box data\n");
+				goto error;
+			}
+		}
+		jas_stream_close(tmpstream);
+	}
+
+	if (jas_getdbglevel() >= 1) {
+		jp2_box_dump(box, stderr);
+	}
+
+	return box;
+	abort();
+
+error:
+	if (box) {
+		jp2_box_destroy(box);
+	}
+	if (tmpstream) {
+		jas_stream_close(tmpstream);
+	}
+	return 0;
+}
+
+void jp2_box_dump(jp2_box_t *box, FILE *out)
+{
+	jp2_boxinfo_t *boxinfo;
+	boxinfo = jp2_boxinfolookup(box->type);
+	assert(boxinfo);
+
+	fprintf(out, "JP2 box: ");
+	fprintf(out, "type=%c%s%c (0x%08x); length=%d\n", '"', boxinfo->name,
+	  '"', box->type, box->len);
+	if (box->ops->dumpdata) {
+		(*box->ops->dumpdata)(box, out);
+	}
+}
+
+static int jp2_jp_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_jp_t *jp = &box->data.jp;
+	if (jp2_getuint32(in, &jp->magic)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_ftyp_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_ftyp_t *ftyp = &box->data.ftyp;
+	unsigned int i;
+	if (jp2_getuint32(in, &ftyp->majver) || jp2_getuint32(in, &ftyp->minver)) {
+		return -1;
+	}
+	ftyp->numcompatcodes = (box->datalen - 8) / 4;
+	if (ftyp->numcompatcodes > JP2_FTYP_MAXCOMPATCODES) {
+		return -1;
+	}
+	for (i = 0; i < ftyp->numcompatcodes; ++i) {
+		if (jp2_getuint32(in, &ftyp->compatcodes[i])) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jp2_ihdr_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_ihdr_t *ihdr = &box->data.ihdr;
+	if (jp2_getuint32(in, &ihdr->height) || jp2_getuint32(in, &ihdr->width) ||
+	  jp2_getuint16(in, &ihdr->numcmpts) || jp2_getuint8(in, &ihdr->bpc) ||
+	  jp2_getuint8(in, &ihdr->comptype) || jp2_getuint8(in, &ihdr->csunk) ||
+	  jp2_getuint8(in, &ihdr->ipr)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_bpcc_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_bpcc_t *bpcc = &box->data.bpcc;
+	unsigned int i;
+	bpcc->numcmpts = box->datalen;
+	if (!(bpcc->bpcs = jas_malloc(bpcc->numcmpts * sizeof(uint_fast8_t)))) {
+		return -1;
+	}
+	for (i = 0; i < bpcc->numcmpts; ++i) {
+		if (jp2_getuint8(in, &bpcc->bpcs[i])) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static void jp2_colr_dumpdata(jp2_box_t *box, FILE *out)
+{
+	jp2_colr_t *colr = &box->data.colr;
+	fprintf(out, "method=%d; pri=%d; approx=%d\n", (int)colr->method, (int)colr->pri, (int)colr->approx);
+	switch (colr->method) {
+	case JP2_COLR_ENUM:
+		fprintf(out, "csid=%d\n", (int)colr->csid);
+		break;
+	case JP2_COLR_ICC:
+		jas_memdump(out, colr->iccp, colr->iccplen);
+		break;
+	}
+}
+
+static int jp2_colr_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_colr_t *colr = &box->data.colr;
+	colr->csid = 0;
+	colr->iccp = 0;
+	colr->iccplen = 0;
+
+	if (jp2_getuint8(in, &colr->method) || jp2_getuint8(in, &colr->pri) ||
+	  jp2_getuint8(in, &colr->approx)) {
+		return -1;
+	}
+	switch (colr->method) {
+	case JP2_COLR_ENUM:
+		if (jp2_getuint32(in, &colr->csid)) {
+			return -1;
+		}
+		break;
+	case JP2_COLR_ICC:
+		colr->iccplen = box->datalen - 3;
+		if (!(colr->iccp = jas_malloc(colr->iccplen * sizeof(uint_fast8_t)))) {
+			return -1;
+		}
+		if (jas_stream_read(in, colr->iccp, colr->iccplen) != colr->iccplen) {
+			return -1;
+		}
+		break;
+	}
+	return 0;
+}
+
+static void jp2_cdef_dumpdata(jp2_box_t *box, FILE *out)
+{
+	jp2_cdef_t *cdef = &box->data.cdef;
+	unsigned int i;
+	for (i = 0; i < cdef->numchans; ++i) {
+		fprintf(out, "channo=%d; type=%d; assoc=%d\n",
+		  cdef->ents[i].channo, cdef->ents[i].type, cdef->ents[i].assoc);
+	}
+}
+
+static void jp2_colr_destroy(jp2_box_t *box)
+{
+	jp2_colr_t *colr = &box->data.colr;
+	if (colr->iccp) {
+		jas_free(colr->iccp);
+	}
+}
+
+static int jp2_cdef_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_cdef_t *cdef = &box->data.cdef;
+	jp2_cdefchan_t *chan;
+	unsigned int channo;
+	if (jp2_getuint16(in, &cdef->numchans)) {
+		return -1;
+	}
+	if (!(cdef->ents = jas_malloc(cdef->numchans * sizeof(jp2_cdefchan_t)))) {
+		return -1;
+	}
+	for (channo = 0; channo < cdef->numchans; ++channo) {
+		chan = &cdef->ents[channo];
+		if (jp2_getuint16(in, &chan->channo) || jp2_getuint16(in, &chan->type) ||
+		  jp2_getuint16(in, &chan->assoc)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Box output.
+\******************************************************************************/
+
+int jp2_box_put(jp2_box_t *box, jas_stream_t *out)
+{
+	jas_stream_t *tmpstream;
+	bool extlen;
+	bool dataflag;
+
+	tmpstream = 0;
+
+	dataflag = !(box->info->flags & (JP2_BOX_SUPER | JP2_BOX_NODATA));
+
+	if (dataflag) {
+		tmpstream = jas_stream_memopen(0, 0);
+		if (box->ops->putdata) {
+			if ((*box->ops->putdata)(box, tmpstream)) {
+				goto error;
+			}
+		}
+		box->len = jas_stream_tell(tmpstream) + JP2_BOX_HDRLEN(false);
+		jas_stream_rewind(tmpstream);
+	}
+	extlen = (box->len >= (((uint_fast64_t)1) << 32)) != 0;
+	if (jp2_putuint32(out, extlen ? 1 : box->len)) {
+		goto error;
+	}
+	if (jp2_putuint32(out, box->type)) {
+		goto error;
+	}
+	if (extlen) {
+		if (jp2_putuint64(out, box->len)) {
+			goto error;
+		}
+	}
+
+	if (dataflag) {
+		if (jas_stream_copy(out, tmpstream, box->len - JP2_BOX_HDRLEN(false))) {
+			goto error;
+		}
+		jas_stream_close(tmpstream);
+	}
+
+	return 0;
+	abort();
+
+error:
+
+	if (tmpstream) {
+		jas_stream_close(tmpstream);
+	}
+	return -1;
+}
+
+static int jp2_jp_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	jp2_jp_t *jp = &box->data.jp;
+	if (jp2_putuint32(out, jp->magic)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_ftyp_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	jp2_ftyp_t *ftyp = &box->data.ftyp;
+	unsigned int i;
+	if (jp2_putuint32(out, ftyp->majver) || jp2_putuint32(out, ftyp->minver)) {
+		return -1;
+	}
+	for (i = 0; i < ftyp->numcompatcodes; ++i) {
+		if (jp2_putuint32(out, ftyp->compatcodes[i])) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jp2_ihdr_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	jp2_ihdr_t *ihdr = &box->data.ihdr;
+	if (jp2_putuint32(out, ihdr->height) || jp2_putuint32(out, ihdr->width) ||
+	  jp2_putuint16(out, ihdr->numcmpts) || jp2_putuint8(out, ihdr->bpc) ||
+	  jp2_putuint8(out, ihdr->comptype) || jp2_putuint8(out, ihdr->csunk) ||
+	  jp2_putuint8(out, ihdr->ipr)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_bpcc_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	jp2_bpcc_t *bpcc = &box->data.bpcc;
+	unsigned int i;
+	for (i = 0; i < bpcc->numcmpts; ++i) {
+		if (jp2_putuint8(out, bpcc->bpcs[i])) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jp2_colr_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	jp2_colr_t *colr = &box->data.colr;
+	if (jp2_putuint8(out, colr->method) || jp2_putuint8(out, colr->pri) ||
+	  jp2_putuint8(out, colr->approx)) {
+		return -1;
+	}
+	switch (colr->method) {
+	case JP2_COLR_ENUM:
+		if (jp2_putuint32(out, colr->csid)) {
+			return -1;
+		}
+		break;
+	case JP2_COLR_ICC:
+		if (jas_stream_write(out, colr->iccp,
+		  JAS_CAST(int, colr->iccplen)) != JAS_CAST(int, colr->iccplen))
+			return -1;
+		break;
+	}
+	return 0;
+}
+
+static int jp2_cdef_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	jp2_cdef_t *cdef = &box->data.cdef;
+	unsigned int i;
+	jp2_cdefchan_t *ent;
+
+	if (jp2_putuint16(out, cdef->numchans)) {
+		return -1;
+	}
+
+	for (i = 0; i < cdef->numchans; ++i) {
+		ent = &cdef->ents[i];
+		if (jp2_putuint16(out, ent->channo) ||
+		  jp2_putuint16(out, ent->type) ||
+		  jp2_putuint16(out, ent->assoc)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Input operations for primitive types.
+\******************************************************************************/
+
+static int jp2_getuint8(jas_stream_t *in, uint_fast8_t *val)
+{
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	if (val) {
+		*val = c;
+	}
+	return 0;
+}
+
+static int jp2_getuint16(jas_stream_t *in, uint_fast16_t *val)
+{
+	uint_fast16_t v;
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if (val) {
+		*val = v;
+	}
+	return 0;
+}
+
+static int jp2_getuint32(jas_stream_t *in, uint_fast32_t *val)
+{
+	uint_fast32_t v;
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if (val) {
+		*val = v;
+	}
+	return 0;
+}
+
+static int jp2_getuint64(jas_stream_t *in, uint_fast64_t *val)
+{
+	uint_fast64_t tmpval;
+	int i;
+	int c;
+
+	tmpval = 0;
+	for (i = 0; i < 8; ++i) {
+		tmpval <<= 8;
+		if ((c = jas_stream_getc(in)) == EOF) {
+			return -1;
+		}
+		tmpval |= (c & 0xff);
+	}
+	*val = tmpval;
+
+	return 0;
+}
+
+/******************************************************************************\
+* Output operations for primitive types.
+\******************************************************************************/
+
+static int jp2_putuint8(jas_stream_t *out, uint_fast8_t val)
+{
+	if (jas_stream_putc(out, val & 0xff) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_putuint16(jas_stream_t *out, uint_fast16_t val)
+{
+	if (jas_stream_putc(out, (val >> 8) & 0xff) == EOF ||
+	  jas_stream_putc(out, val & 0xff) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_putuint32(jas_stream_t *out, uint_fast32_t val)
+{
+	if (jas_stream_putc(out, (val >> 24) & 0xff) == EOF ||
+	  jas_stream_putc(out, (val >> 16) & 0xff) == EOF ||
+	  jas_stream_putc(out, (val >> 8) & 0xff) == EOF ||
+	  jas_stream_putc(out, val & 0xff) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jp2_putuint64(jas_stream_t *out, uint_fast64_t val)
+{
+	if (jp2_putuint32(out, (val >> 32) & 0xffffffffUL) ||
+	  jp2_putuint32(out, val & 0xffffffffUL)) {
+		return -1;
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Miscellaneous code.
+\******************************************************************************/
+
+jp2_boxinfo_t *jp2_boxinfolookup(int type)
+{
+	jp2_boxinfo_t *boxinfo;
+	for (boxinfo = jp2_boxinfos; boxinfo->name; ++boxinfo) {
+		if (boxinfo->type == type) {
+			return boxinfo;
+		}
+	}
+	return &jp2_boxinfo_unk;
+}
+
+
+
+
+
+static void jp2_cmap_destroy(jp2_box_t *box)
+{
+	jp2_cmap_t *cmap = &box->data.cmap;
+	if (cmap->ents) {
+		jas_free(cmap->ents);
+	}
+}
+
+static int jp2_cmap_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_cmap_t *cmap = &box->data.cmap;
+	jp2_cmapent_t *ent;
+	unsigned int i;
+
+	cmap->numchans = (box->datalen) / 4;
+	if (!(cmap->ents = jas_malloc(cmap->numchans * sizeof(jp2_cmapent_t)))) {
+		return -1;
+	}
+	for (i = 0; i < cmap->numchans; ++i) {
+		ent = &cmap->ents[i];
+		if (jp2_getuint16(in, &ent->cmptno) ||
+		  jp2_getuint8(in, &ent->map) ||
+		  jp2_getuint8(in, &ent->pcol)) {
+			return -1;
+		}
+	}
+	
+	return 0;
+}
+
+static int jp2_cmap_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+	/* Eliminate compiler warning about unused variables. */
+	box = 0;
+	out = 0;
+
+	return -1;
+}
+
+static void jp2_cmap_dumpdata(jp2_box_t *box, FILE *out)
+{
+	jp2_cmap_t *cmap = &box->data.cmap;
+	unsigned int i;
+	jp2_cmapent_t *ent;
+	fprintf(out, "numchans = %d\n", (int) cmap->numchans);
+	for (i = 0; i < cmap->numchans; ++i) {
+		ent = &cmap->ents[i];
+		fprintf(out, "cmptno=%d; map=%d; pcol=%d\n",
+		  (int) ent->cmptno, (int) ent->map, (int) ent->pcol);
+	}
+}
+
+static void jp2_pclr_destroy(jp2_box_t *box)
+{
+	jp2_pclr_t *pclr = &box->data.pclr;
+	if (pclr->lutdata) {
+		jas_free(pclr->lutdata);
+	}
+	if (pclr->bpc)
+		jas_free(pclr->bpc);
+}
+
+static int jp2_pclr_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+	jp2_pclr_t *pclr = &box->data.pclr;
+	int lutsize;
+	unsigned int i;
+	unsigned int j;
+	int_fast32_t x;
+
+	pclr->lutdata = 0;
+
+	if (jp2_getuint16(in, &pclr->numlutents) ||
+	  jp2_getuint8(in, &pclr->numchans)) {
+		return -1;
+	}
+	lutsize = pclr->numlutents * pclr->numchans;
+	if (!(pclr->lutdata = jas_malloc(lutsize * sizeof(int_fast32_t)))) {
+		return -1;
+	}
+	if (!(pclr->bpc = jas_malloc(pclr->numchans * sizeof(uint_fast8_t)))) {
+		return -1;
+	}
+	for (i = 0; i < pclr->numchans; ++i) {
+		if (jp2_getuint8(in, &pclr->bpc[i])) {
+			return -1;
+		}
+	}
+	for (i = 0; i < pclr->numlutents; ++i) {
+		for (j = 0; j < pclr->numchans; ++j) {
+			if (jp2_getint(in, (pclr->bpc[j] & 0x80) != 0,
+			  (pclr->bpc[j] & 0x7f) + 1, &x)) {
+				return -1;
+			}
+			pclr->lutdata[i * pclr->numchans + j] = x;
+		}
+	}
+	return 0;
+}
+
+static int jp2_pclr_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+#if 0
+	jp2_pclr_t *pclr = &box->data.pclr;
+#endif
+/* Eliminate warning about unused variable. */
+box = 0;
+out = 0;
+	return -1;
+}
+
+static void jp2_pclr_dumpdata(jp2_box_t *box, FILE *out)
+{
+	jp2_pclr_t *pclr = &box->data.pclr;
+	unsigned int i;
+	int j;
+	fprintf(out, "numents=%d; numchans=%d\n", (int) pclr->numlutents,
+	  (int) pclr->numchans);
+	for (i = 0; i < pclr->numlutents; ++i) {
+		for (j = 0; j < pclr->numchans; ++j) {
+			fprintf(out, "LUT[%d][%d]=%d\n", i, j, pclr->lutdata[i * pclr->numchans + j]);
+		}
+	}
+}
+
+static void jp2_uuid_destroy(jp2_box_t *box)
+{
+  jp2_uuid_t *uuid = &box->data.uuid;
+  if (uuid->data)
+  {
+      jas_free(uuid->data);
+      uuid->data = NULL;
+  }
+}
+
+static int jp2_uuid_getdata(jp2_box_t *box, jas_stream_t *in)
+{
+  jp2_uuid_t *uuid = &box->data.uuid;
+  int i;
+  
+  for (i = 0; i < 16; i++)
+  {
+    if (jp2_getuint8(in, &uuid->uuid[i]))
+      return -1;
+  }
+  
+  uuid->data_len = box->datalen - 16;
+  uuid->data = jas_malloc(uuid->data_len * sizeof(uint_fast8_t));
+  for (i = 0; i < uuid->data_len; i++)
+  {
+    if (jp2_getuint8(in, &uuid->data[i]))
+      return -1;
+  }
+  return 0;
+}
+
+static int jp2_uuid_putdata(jp2_box_t *box, jas_stream_t *out)
+{
+  jp2_uuid_t *uuid = &box->data.uuid;
+  int i;
+  
+  for (i = 0; i < 16; i++)
+  {
+      if (jp2_putuint8(out, uuid->uuid[i]))
+    return -1;
+  }
+  
+  for (i = 0; i < uuid->data_len; i++)
+  {
+      if (jp2_putuint8(out, uuid->data[i]))
+    return -1;
+  }
+  return 0;
+}
+
+static int jp2_getint(jas_stream_t *in, int s, int n, int_fast32_t *val)
+{
+	int c;
+	int i;
+	uint_fast32_t v;
+	int m;
+
+	m = (n + 7) / 8;
+
+	v = 0;
+	for (i = 0; i < m; ++i) {
+		if ((c = jas_stream_getc(in)) == EOF) {
+			return -1;
+		}
+		v = (v << 8) | c;
+	}
+	v &= ONES(n);
+	if (s) {
+		int sb;
+		sb = v & (1 << (8 * m - 1));
+		*val = ((~v) + 1) & ONES(8 * m);
+		if (sb) {
+			*val = -*val;
+		}
+	} else {
+		*val = v;
+	}
+
+	return 0;
+}
+
+jp2_cdefchan_t *jp2_cdef_lookup(jp2_cdef_t *cdef, int channo)
+{
+	unsigned int i;
+	jp2_cdefchan_t *cdefent;
+	for (i = 0; i < cdef->numchans; ++i) {
+		cdefent = &cdef->ents[i];
+		if (cdefent->channo == JAS_CAST(unsigned int, channo)) {
+			return cdefent;
+		}
+	}
+	return 0;
+}
diff --git a/src/libjasper/jp2/jp2_cod.h b/src/libjasper/jp2/jp2_cod.h
new file mode 100644
index 0000000..5bad06f
--- /dev/null
+++ b/src/libjasper/jp2/jp2_cod.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+
+ GeoJasper revision: Dima (11/07/2003 17:29 - UUID from j_image_t)
+ Modifications by Andrey Kiselev <dron@remotesensing.org> to handle UUID box properly.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JP2 Library
+ *
+ * $Id: jp2_cod.h,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+#ifndef JP2_COD_H
+#define JP2_COD_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_types.h"
+
+/******************************************************************************\
+* Macros.
+\******************************************************************************/
+
+#define	JP2_SPTOBPC(s, p) \
+	((((p) - 1) & 0x7f) | (((s) & 1) << 7))
+
+/******************************************************************************\
+* Box class.
+\******************************************************************************/
+
+#define	JP2_BOX_HDRLEN(ext) ((ext) ? 16 : 8)
+
+/* Box types. */
+#define	JP2_BOX_JP		0x6a502020	/* Signature */
+#define JP2_BOX_FTYP	0x66747970	/* File Type */
+#define	JP2_BOX_JP2H	0x6a703268	/* JP2 Header */
+#define	JP2_BOX_IHDR	0x69686472	/* Image Header */
+#define	JP2_BOX_BPCC	0x62706363	/* Bits Per Component */
+#define	JP2_BOX_COLR	0x636f6c72	/* Color Specification */
+#define	JP2_BOX_PCLR	0x70636c72	/* Palette */
+#define	JP2_BOX_CMAP	0x636d6170	/* Component Mapping */
+#define	JP2_BOX_CDEF	0x63646566	/* Channel Definition */
+#define	JP2_BOX_RES		0x72657320	/* Resolution */
+#define	JP2_BOX_RESC	0x72657363	/* Capture Resolution */
+#define	JP2_BOX_RESD	0x72657364	/* Default Display Resolution */
+#define	JP2_BOX_JP2C	0x6a703263	/* Contiguous Code Stream */
+#define	JP2_BOX_JP2I	0x6a703269	/* Intellectual Property */
+#define	JP2_BOX_XML		0x786d6c20	/* XML */
+#define	JP2_BOX_UUID	0x75756964	/* UUID */
+#define	JP2_BOX_UINF	0x75696e66	/* UUID Info */
+#define	JP2_BOX_ULST	0x75637374	/* UUID List */
+#define	JP2_BOX_URL		0x75726c20	/* URL */
+
+#define	JP2_BOX_SUPER	0x01
+#define	JP2_BOX_NODATA	0x02
+
+/* JP box data. */
+
+#define	JP2_JP_MAGIC	0x0d0a870a
+#define	JP2_JP_LEN		12
+
+// Magic sequence for GeoJP2 UUID box
+static unsigned char msi_uuid[16] =
+        { 0xb1,0x4b,0xf8,0xbd,
+          0x08,0x3d,0x4b,0x43, 
+          0xa5,0xae,0x8c,0xd7,
+          0xd5,0xa6,0xce,0x03};
+
+// Magic sequence for XMP UUID box
+static unsigned char xmp_uuid[16] =
+        { 0xBE, 0x7A, 0xCF, 0xCB, 
+          0x97, 0xA9, 0x42, 0xE8, 
+          0x9C, 0x71, 0x99, 0x94,
+          0x91, 0xE3, 0xAF, 0xAC };
+
+typedef struct {
+	uint_fast32_t magic;
+} jp2_jp_t;
+
+/* FTYP box data. */
+
+#define	JP2_FTYP_MAXCOMPATCODES	32
+#define	JP2_FTYP_MAJVER		0x6a703220
+#define	JP2_FTYP_MINVER		0
+#define	JP2_FTYP_COMPATCODE		JP2_FTYP_MAJVER
+
+typedef struct {
+	uint_fast32_t majver;
+	uint_fast32_t minver;
+	uint_fast32_t numcompatcodes;
+	uint_fast32_t compatcodes[JP2_FTYP_MAXCOMPATCODES];
+} jp2_ftyp_t;
+
+/* IHDR box data. */
+
+#define	JP2_IHDR_COMPTYPE	7
+#define	JP2_IHDR_BPCNULL	255
+
+typedef struct {
+	uint_fast32_t width;
+	uint_fast32_t height;
+	uint_fast16_t numcmpts;
+	uint_fast8_t bpc;
+	uint_fast8_t comptype;
+	uint_fast8_t csunk;
+	uint_fast8_t ipr;
+} jp2_ihdr_t;
+
+/* BPCC box data. */
+
+typedef struct {
+	uint_fast16_t numcmpts;
+	uint_fast8_t *bpcs;
+} jp2_bpcc_t;
+
+/* COLR box data. */
+
+#define	JP2_COLR_ENUM	1
+#define	JP2_COLR_ICC	2
+#define	JP2_COLR_PRI	0
+
+#define	JP2_COLR_SRGB	16
+#define	JP2_COLR_SGRAY	17
+#define	JP2_COLR_SYCC	18
+
+typedef struct {
+	uint_fast8_t method;
+	uint_fast8_t pri;
+	uint_fast8_t approx;
+	uint_fast32_t csid;
+	uint_fast8_t *iccp;
+	int iccplen;
+	/* XXX - Someday we ought to add ICC profile data here. */
+} jp2_colr_t;
+
+/* PCLR box data. */
+
+typedef struct {
+	uint_fast16_t numlutents;
+	uint_fast8_t numchans;
+	int_fast32_t *lutdata;
+	uint_fast8_t *bpc;
+} jp2_pclr_t;
+
+/* CDEF box per-channel data. */
+
+#define JP2_CDEF_RGB_R	1
+#define JP2_CDEF_RGB_G	2
+#define JP2_CDEF_RGB_B	3
+
+#define JP2_CDEF_YCBCR_Y	1
+#define JP2_CDEF_YCBCR_CB	2
+#define JP2_CDEF_YCBCR_CR	3
+
+#define	JP2_CDEF_GRAY_Y	1
+
+#define	JP2_CDEF_TYPE_COLOR	0
+#define	JP2_CDEF_TYPE_OPACITY	1
+#define	JP2_CDEF_TYPE_UNSPEC	65535
+#define	JP2_CDEF_ASOC_ALL	0
+#define	JP2_CDEF_ASOC_NONE	65535
+
+typedef struct {
+	uint_fast16_t channo;
+	uint_fast16_t type;
+	uint_fast16_t assoc;
+} jp2_cdefchan_t;
+
+/* CDEF box data. */
+
+typedef struct {
+	uint_fast16_t numchans;
+	jp2_cdefchan_t *ents;
+} jp2_cdef_t;
+
+typedef struct {
+	uint_fast16_t cmptno;
+	uint_fast8_t map;
+	uint_fast8_t pcol;
+} jp2_cmapent_t;
+
+typedef struct {
+	uint_fast16_t numchans;
+	jp2_cmapent_t *ents;
+} jp2_cmap_t;
+
+typedef struct {
+	uint_fast32_t data_len;
+	uint_fast8_t uuid[16];
+	uint_fast8_t *data;
+} jp2_uuid_t;
+
+#define	JP2_CMAP_DIRECT		0
+#define	JP2_CMAP_PALETTE	1
+
+/* Generic box. */
+
+struct jp2_boxops_s;
+typedef struct {
+
+	struct jp2_boxops_s *ops;
+	struct jp2_boxinfo_s *info;
+
+	uint_fast32_t type;
+
+	/* The length of the box including the (variable-length) header. */
+	uint_fast32_t len;
+
+	/* The length of the box data. */
+	uint_fast32_t datalen;
+
+	union {
+		jp2_jp_t jp;
+		jp2_ftyp_t ftyp;
+		jp2_ihdr_t ihdr;
+		jp2_bpcc_t bpcc;
+		jp2_colr_t colr;
+		jp2_pclr_t pclr;
+		jp2_cdef_t cdef;
+		jp2_cmap_t cmap;
+		jp2_uuid_t uuid;
+	} data;
+
+} jp2_box_t;
+
+typedef struct jp2_boxops_s {
+	void (*init)(jp2_box_t *box);
+	void (*destroy)(jp2_box_t *box);
+	int (*getdata)(jp2_box_t *box, jas_stream_t *in);
+	int (*putdata)(jp2_box_t *box, jas_stream_t *out);
+	void (*dumpdata)(jp2_box_t *box, FILE *out);
+} jp2_boxops_t;
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+typedef struct jp2_boxinfo_s {
+	int type;
+	char *name;
+	int flags;
+	jp2_boxops_t ops;
+} jp2_boxinfo_t;
+
+/******************************************************************************\
+* Box class.
+\******************************************************************************/
+
+jp2_box_t *jp2_box_create(int type);
+void jp2_box_destroy(jp2_box_t *box);
+jp2_box_t *jp2_box_get(jas_stream_t *in);
+int jp2_box_put(jp2_box_t *box, jas_stream_t *out);
+
+#define JP2_DTYPETOBPC(dtype) \
+  ((JAS_IMAGE_CDT_GETSGND(dtype) << 7) | (JAS_IMAGE_CDT_GETPREC(dtype) - 1))
+#define	JP2_BPCTODTYPE(bpc) \
+  (JAS_IMAGE_CDT_SETSGND(bpc >> 7) | JAS_IMAGE_CDT_SETPREC((bpc & 0x7f) + 1))
+
+#define ICC_CS_RGB	0x52474220
+#define ICC_CS_YCBCR	0x59436272
+#define ICC_CS_GRAY	0x47524159
+
+jp2_cdefchan_t *jp2_cdef_lookup(jp2_cdef_t *cdef, int channo);
+
+
+#endif
diff --git a/src/libjasper/jp2/jp2_dec.c b/src/libjasper/jp2/jp2_dec.c
new file mode 100644
index 0000000..45a8993
--- /dev/null
+++ b/src/libjasper/jp2/jp2_dec.c
@@ -0,0 +1,702 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+
+ GeoJasper revision: Dima (11/07/2003 17:29 - UUID and additional args)
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JP2 Library
+ *
+ * $Id: jp2_dec.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_image.h"
+#include "jasper/jas_stream.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_debug.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_version.h"
+
+// GeoJasper: dima
+#if !defined( JAS_GEO_OMIT_PRINTING_CODE )
+#include "../tiff/geotiff_buffer.h"
+#endif
+// end: dima
+
+#include "jp2_cod.h"
+#include "jp2_dec.h"
+
+#define	JP2_VALIDATELEN	(JAS_MIN(JP2_JP_LEN + 16, JAS_STREAM_MAXPUTBACK))
+
+static jp2_dec_t *jp2_dec_create(void);
+static void jp2_dec_destroy(jp2_dec_t *dec);
+static int jp2_getcs(jp2_colr_t *colr);
+static int fromiccpcs(int cs);
+static int jp2_getct(int colorspace, int type, int assoc);
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+jas_image_t *jp2_decode(jas_stream_t *in, char *optstr)
+{
+	jp2_box_t *box;
+	int found;
+	jas_image_t *image;
+	jp2_dec_t *dec;
+	bool samedtype;
+	int dtype;
+	unsigned int i;
+	jp2_cmap_t *cmapd;
+	jp2_pclr_t *pclrd;
+	jp2_cdef_t *cdefd;
+	unsigned int channo;
+	int newcmptno;
+	int_fast32_t *lutents;
+#if 0
+	jp2_cdefchan_t *cdefent;
+	int cmptno;
+#endif
+	jp2_cmapent_t *cmapent;
+	jas_icchdr_t icchdr;
+	jas_iccprof_t *iccprof;
+
+  // GeoJasper: dima - begin - temporary metadata buffers
+  //jas_aux_buffer_t aux_buf;
+  //aux_buf.id = 0;
+  jas_metadata_boxes_t tmp_metadata;
+  tmp_metadata.count = JAS_IMAGE_NUM_BOXES;
+  for (i=0; i<tmp_metadata.count; ++i)
+    tmp_metadata.boxes[i] = jas_box_init();
+  // GeoJasper: dima - end - temporary metadata buffers
+
+	dec = 0;
+	box = 0;
+	image = 0;
+
+	if (!(dec = jp2_dec_create())) {
+		goto error;
+	}
+
+	/* Get the first box.  This should be a JP box. */
+	if (!(box = jp2_box_get(in))) {
+		jas_eprintf("error: cannot get box\n");
+		goto error;
+	}
+	if (box->type != JP2_BOX_JP) {
+		jas_eprintf("error: expecting signature box\n");
+		goto error;
+	}
+	if (box->data.jp.magic != JP2_JP_MAGIC) {
+		jas_eprintf("incorrect magic number\n");
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	/* Get the second box.  This should be a FTYP box. */
+	if (!(box = jp2_box_get(in))) {
+		goto error;
+	}
+	if (box->type != JP2_BOX_FTYP) {
+		jas_eprintf("expecting file type box\n");
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	/* Get more boxes... */
+	found = 0;
+	while ((box = jp2_box_get(in))) {
+		if (jas_getdbglevel() >= 1) {
+			jas_eprintf("box type %s\n", box->info->name);
+		}
+		switch (box->type) {
+		case JP2_BOX_JP2C:
+			found = 1;
+			break;
+		case JP2_BOX_IHDR:
+			if (!dec->ihdr) {
+				dec->ihdr = box;
+				box = 0;
+			}
+			break;
+		case JP2_BOX_BPCC:
+			if (!dec->bpcc) {
+				dec->bpcc = box;
+				box = 0;
+			}
+			break;
+		case JP2_BOX_CDEF:
+			if (!dec->cdef) {
+				dec->cdef = box;
+				box = 0;
+			}
+			break;
+		case JP2_BOX_PCLR:
+			if (!dec->pclr) {
+				dec->pclr = box;
+				box = 0;
+			}
+			break;
+		case JP2_BOX_CMAP:
+			if (!dec->cmap) {
+				dec->cmap = box;
+				box = 0;
+			}
+			break;
+		case JP2_BOX_COLR:
+			if (!dec->colr) {
+				dec->colr = box;
+				box = 0;
+			}
+			break;
+    //-------------------------------------------------------
+    case JP2_BOX_UUID: // GeoJasper: dima - begin extract uuid
+      {
+        int box_id = -1;
+        if ( memcmp( box->data.uuid.uuid, msi_uuid, sizeof(msi_uuid) ) == 0 ) box_id = JAS_IMAGE_BOX_GEO;
+        if ( memcmp( box->data.uuid.uuid, xmp_uuid, sizeof(xmp_uuid) ) == 0 ) box_id = JAS_IMAGE_BOX_XMP;
+
+        if( box_id >= 0 ) {
+          jas_metadata_box_t *metabox = &tmp_metadata.boxes[box_id]; 
+          if ( jas_box_alloc( metabox, box->data.uuid.data_len ) ) {
+            memcpy( metabox->id, box->data.uuid.uuid, sizeof(msi_uuid) );
+            memcpy( metabox->buf, box->data.uuid.data, metabox->size );
+          } // if box allocated
+        } // if uuid box contains info to copy
+      }
+
+			break; // GeoJasper: end - begin extract uuid
+    //-------------------------------------------------------
+		}
+		if (box) {
+			jp2_box_destroy(box);
+			box = 0;
+		}
+		if (found) {
+			break;
+		}
+	}
+
+  //-------------------------------------------------------
+  // GeoJasper: dima - begin - print geojpeg2000 if needed
+#if !defined( JAS_GEO_OMIT_PRINTING_CODE )
+  if ( (optstr) && ( strstr(optstr, "listgeo") != NULL ) ) {
+    jas_metadata_box_t *metabox = &tmp_metadata.boxes[JAS_IMAGE_BOX_GEO]; 
+    long w=1, h=1;
+
+    if (dec->ihdr) {
+      w = dec->ihdr->data.ihdr.width;
+      h = dec->ihdr->data.ihdr.height;
+    }
+    if ( (memcmp(metabox->id, msi_uuid, sizeof(msi_uuid))==0) && metabox->buf != NULL )
+      printGTIFFromMemBufA( metabox->buf, metabox->size, w, h );
+    else
+      jas_eprintf("geojasper info: GeoJp2 info not found!\n");
+    exit(0);
+  }
+
+  if ( (optstr) && ( strstr(optstr, "listxmp") != NULL ) ) {
+    jas_metadata_box_t *metabox = &tmp_metadata.boxes[JAS_IMAGE_BOX_XMP]; 
+
+    if ( (memcmp(metabox->id, xmp_uuid, sizeof(xmp_uuid))==0) && metabox->buf != NULL ) {
+      // xmp buffer in is not guaranteed to be null terminated...
+      char *strbuf = (char *) jas_malloc(metabox->size+1);
+      strncpy(strbuf, metabox->buf, metabox->size);
+      strbuf[metabox->size] = 0; 
+      fprintf( stdout, "%s\n", strbuf );
+      jas_free(strbuf);
+    } else
+      jas_eprintf("geojasper info: Adobe XMP info not found!\n");
+    exit(0);
+  }
+
+  // simple check if we have boxes
+  if ( (memcmp(tmp_metadata.boxes[JAS_IMAGE_BOX_GEO].id, msi_uuid, sizeof(msi_uuid))==0) && 
+       tmp_metadata.boxes[JAS_IMAGE_BOX_GEO].buf != NULL )
+    jas_eprintf("geojasper info: GeoJp2 found, size: %d\n", tmp_metadata.boxes[JAS_IMAGE_BOX_GEO].size);
+
+  if ( (memcmp(tmp_metadata.boxes[JAS_IMAGE_BOX_XMP].id, xmp_uuid, sizeof(xmp_uuid))==0) && 
+       tmp_metadata.boxes[JAS_IMAGE_BOX_XMP].buf != NULL )
+    jas_eprintf("geojasper info: Adobe XMP found, size: %d\n", tmp_metadata.boxes[JAS_IMAGE_BOX_XMP].size);
+
+#endif // #if !defined( JAS_GEO_OMIT_PRINTING_CODE )
+  // GeoJasper: dima - end - print geojpeg2000 if needed
+  //-------------------------------------------------------
+
+	if (!found) {
+		jas_eprintf("error: no code stream found\n");
+		goto error;
+	}
+
+	if (!(dec->image = jpc_decode(in, optstr))) {
+		jas_eprintf("error: cannot decode code stream\n");
+		goto error;
+	}
+
+	/* An IHDR box must be present. */
+	if (!dec->ihdr) {
+		jas_eprintf("error: missing IHDR box\n");
+		goto error;
+	}
+
+	/* Does the number of components indicated in the IHDR box match
+	  the value specified in the code stream? */
+	if (dec->ihdr->data.ihdr.numcmpts != JAS_CAST(unsigned int, jas_image_numcmpts(dec->image))) {  /* IMLIB - changed unit to unsigned int */
+		jas_eprintf("warning: number of components mismatch\n");
+	}
+
+	/* At least one component must be present. */
+	if (!jas_image_numcmpts(dec->image)) {
+		jas_eprintf("error: no components\n");
+		goto error;
+	}
+
+	/* Determine if all components have the same data type. */
+	samedtype = true;
+	dtype = jas_image_cmptdtype(dec->image, 0);
+	for (i = 1; i < JAS_CAST(unsigned int, jas_image_numcmpts(dec->image)); ++i) { /* IMLIB - changed unit to unsigned int */
+		if (jas_image_cmptdtype(dec->image, i) != dtype) {
+			samedtype = false;
+			break;
+		}
+	}
+
+	/* Is the component data type indicated in the IHDR box consistent
+	  with the data in the code stream? */
+	if ((samedtype && dec->ihdr->data.ihdr.bpc != JP2_DTYPETOBPC(dtype)) ||
+	  (!samedtype && dec->ihdr->data.ihdr.bpc != JP2_IHDR_BPCNULL)) {
+		jas_eprintf("warning: component data type mismatch\n");
+	}
+
+	/* Is the compression type supported? */
+	if (dec->ihdr->data.ihdr.comptype != JP2_IHDR_COMPTYPE) {
+		jas_eprintf("error: unsupported compression type\n");
+		goto error;
+	}
+
+	if (dec->bpcc) {
+		/* Is the number of components indicated in the BPCC box
+		  consistent with the code stream data? */
+		if (dec->bpcc->data.bpcc.numcmpts != JAS_CAST(unsigned int, jas_image_numcmpts( /* IMLIB - changed unit to unsigned int */
+		  dec->image))) {
+			jas_eprintf("warning: number of components mismatch\n");
+		}
+		/* Is the component data type information indicated in the BPCC
+		  box consistent with the code stream data? */
+		if (!samedtype) {
+			for (i = 0; i < JAS_CAST(unsigned int, jas_image_numcmpts(dec->image)); ++i) { /* IMLIB - changed unit to unsigned int */
+				if (jas_image_cmptdtype(dec->image, i) != JP2_BPCTODTYPE(dec->bpcc->data.bpcc.bpcs[i])) {
+					jas_eprintf("warning: component data type mismatch\n");
+				}
+			}
+		} else {
+			jas_eprintf("warning: superfluous BPCC box\n");
+		}
+	}
+
+	/* A COLR box must be present. */
+	if (!dec->colr) {
+		jas_eprintf("error: no COLR box\n");
+		goto error;
+	}
+
+	switch (dec->colr->data.colr.method) {
+	case JP2_COLR_ENUM:
+		jas_image_setclrspc(dec->image, jp2_getcs(&dec->colr->data.colr));
+		break;
+	case JP2_COLR_ICC:
+		iccprof = jas_iccprof_createfrombuf(dec->colr->data.colr.iccp,
+		  dec->colr->data.colr.iccplen);
+		assert(iccprof);
+		jas_iccprof_gethdr(iccprof, &icchdr);
+		jas_eprintf("ICC Profile CS %08x\n", icchdr.colorspc);
+		jas_image_setclrspc(dec->image, fromiccpcs(icchdr.colorspc));
+		dec->image->cmprof_ = jas_cmprof_createfromiccprof(iccprof);
+		assert(dec->image->cmprof_);
+		jas_iccprof_destroy(iccprof);
+		break;
+	}
+
+	/* If a CMAP box is present, a PCLR box must also be present. */
+	if (dec->cmap && !dec->pclr) {
+		jas_eprintf("warning: missing PCLR box or superfluous CMAP box\n");
+		jp2_box_destroy(dec->cmap);
+		dec->cmap = 0;
+	}
+
+	/* If a CMAP box is not present, a PCLR box must not be present. */
+	if (!dec->cmap && dec->pclr) {
+		jas_eprintf("warning: missing CMAP box or superfluous PCLR box\n");
+		jp2_box_destroy(dec->pclr);
+		dec->pclr = 0;
+	}
+
+	/* Determine the number of channels (which is essentially the number
+	  of components after any palette mappings have been applied). */
+	dec->numchans = dec->cmap ? dec->cmap->data.cmap.numchans : JAS_CAST(unsigned int, jas_image_numcmpts(dec->image)); /* IMLIB - changed unit to unsigned int */
+
+	/* Perform a basic sanity check on the CMAP box if present. */
+	if (dec->cmap) {
+		for (i = 0; i < dec->numchans; ++i) {
+			/* Is the component number reasonable? */
+			if (dec->cmap->data.cmap.ents[i].cmptno >= JAS_CAST(unsigned int, jas_image_numcmpts(dec->image))) { /* IMLIB - changed unit to unsigned int */
+				jas_eprintf("error: invalid component number in CMAP box\n");
+				goto error;
+			}
+			/* Is the LUT index reasonable? */
+			if (dec->cmap->data.cmap.ents[i].pcol >= dec->pclr->data.pclr.numchans) {
+				jas_eprintf("error: invalid CMAP LUT index\n");
+				goto error;
+			}
+		}
+	}
+
+	/* Allocate space for the channel-number to component-number LUT. */
+	if (!(dec->chantocmptlut = jas_malloc(dec->numchans * sizeof(uint_fast16_t)))) {
+		jas_eprintf("error: no memory\n");
+		goto error;
+	}
+
+	if (!dec->cmap) {
+		for (i = 0; i < dec->numchans; ++i) {
+			dec->chantocmptlut[i] = i;
+		}
+	} else {
+		cmapd = &dec->cmap->data.cmap;
+		pclrd = &dec->pclr->data.pclr;
+		cdefd = &dec->cdef->data.cdef;
+		for (channo = 0; channo < cmapd->numchans; ++channo) {
+			cmapent = &cmapd->ents[channo];
+			if (cmapent->map == JP2_CMAP_DIRECT) {
+				dec->chantocmptlut[channo] = channo;
+			} else if (cmapent->map == JP2_CMAP_PALETTE) {
+				lutents = jas_malloc(pclrd->numlutents * sizeof(int_fast32_t));
+				for (i = 0; i < pclrd->numlutents; ++i) {
+					lutents[i] = pclrd->lutdata[cmapent->pcol + i * pclrd->numchans];
+				}
+				newcmptno = jas_image_numcmpts(dec->image);
+				jas_image_depalettize(dec->image, cmapent->cmptno, pclrd->numlutents, lutents, JP2_BPCTODTYPE(pclrd->bpc[cmapent->pcol]), newcmptno);
+				dec->chantocmptlut[channo] = newcmptno;
+				jas_free(lutents);
+#if 0
+				if (dec->cdef) {
+					cdefent = jp2_cdef_lookup(cdefd, channo);
+					if (!cdefent) {
+						abort();
+					}
+				jas_image_setcmpttype(dec->image, newcmptno, jp2_getct(jas_image_clrspc(dec->image), cdefent->type, cdefent->assoc));
+				} else {
+				jas_image_setcmpttype(dec->image, newcmptno, jp2_getct(jas_image_clrspc(dec->image), 0, channo + 1));
+				}
+#endif
+			}
+		}
+	}
+
+	/* Mark all components as being of unknown type. */
+
+	for (i = 0; i < JAS_CAST(unsigned int, jas_image_numcmpts(dec->image)); ++i) { /* IMLIB - changed unit to unsigned int */
+		jas_image_setcmpttype(dec->image, i, JAS_IMAGE_CT_UNKNOWN);
+	}
+
+	/* Determine the type of each component. */
+	if (dec->cdef) {
+		for (i = 0; i < dec->numchans; ++i) {
+			jas_image_setcmpttype(dec->image,
+			  dec->chantocmptlut[dec->cdef->data.cdef.ents[i].channo],
+			  jp2_getct(jas_image_clrspc(dec->image),
+			  dec->cdef->data.cdef.ents[i].type, dec->cdef->data.cdef.ents[i].assoc));
+		}
+	} else {
+		for (i = 0; i < dec->numchans; ++i) {
+			jas_image_setcmpttype(dec->image, dec->chantocmptlut[i],
+			  jp2_getct(jas_image_clrspc(dec->image), 0, i + 1));
+		}
+	}
+
+  // GeoJasper: dima - let's preserve the actual image data by marking unknown components as gray
+	/* Delete any components that are not of interest. */
+  /*
+	for (i = jas_image_numcmpts(dec->image); i > 0; --i) {
+		if (jas_image_cmpttype(dec->image, i - 1) == JAS_IMAGE_CT_UNKNOWN) {
+			jas_image_delcmpt(dec->image, i - 1);
+		}
+	}
+  */
+  // GeoJasper: dima - let's preserve the actual image data by marking unknown components as gray
+	for (i = jas_image_numcmpts(dec->image); i > 0; --i) {
+		if (jas_image_cmpttype(dec->image, i-1) == JAS_IMAGE_CT_UNKNOWN) {
+			jas_image_setcmpttype(dec->image, i-1, JAS_IMAGE_CT_GRAY_Y);
+		}
+	}
+
+
+	/* Ensure that some components survived. */
+	if (!jas_image_numcmpts(dec->image)) {
+		jas_eprintf("error: no components\n");
+		goto error;
+	}
+#if 0
+jas_eprintf("no of components is %d\n", jas_image_numcmpts(dec->image));
+#endif
+
+	/* Prevent the image from being destroyed later. */
+
+  // GeoJasper: begin - dima - copy buffers to the output image
+  for (i=0; i<tmp_metadata.count; ++i) {
+    dec->image->metadata.boxes[i] = tmp_metadata.boxes[i];
+    tmp_metadata.boxes[i] = jas_box_init();
+  }
+  // GeoJasper: end - dima - copy buffers to the output image
+
+	image = dec->image;
+	dec->image = 0;
+
+	jp2_dec_destroy(dec);
+
+	return image;
+
+error:
+	if (box) {
+		jp2_box_destroy(box);
+	}
+	if (dec) {
+		jp2_dec_destroy(dec);
+	}
+	return 0;
+}
+
+int jp2_validate(jas_stream_t *in)
+{
+	char buf[JP2_VALIDATELEN];
+	int i;
+	int n;
+#if 0
+	jas_stream_t *tmpstream;
+	jp2_box_t *box;
+#endif
+
+	assert(JAS_STREAM_MAXPUTBACK >= JP2_VALIDATELEN);
+
+	/* Read the validation data (i.e., the data used for detecting
+	  the format). */
+	if ((n = jas_stream_read(in, buf, JP2_VALIDATELEN)) < 0) {
+		return -1;
+	}
+
+	/* Put the validation data back onto the stream, so that the
+	  stream position will not be changed. */
+	for (i = n - 1; i >= 0; --i) {
+		if (jas_stream_ungetc(in, buf[i]) == EOF) {
+			return -1;
+		}
+	}
+
+	/* Did we read enough data? */
+	if (n < JP2_VALIDATELEN) {
+		return -1;
+	}
+
+	/* Is the box type correct? */
+	if (((buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]) !=
+	  JP2_BOX_JP)
+	{
+		return -1;
+	}
+
+	return 0;
+}
+
+static jp2_dec_t *jp2_dec_create(void)
+{
+	jp2_dec_t *dec;
+
+	if (!(dec = jas_malloc(sizeof(jp2_dec_t)))) {
+		return 0;
+	}
+	dec->ihdr = 0;
+	dec->bpcc = 0;
+	dec->cdef = 0;
+	dec->pclr = 0;
+	dec->image = 0;
+	dec->chantocmptlut = 0;
+	dec->cmap = 0;
+	dec->colr = 0;
+	return dec;
+}
+
+static void jp2_dec_destroy(jp2_dec_t *dec)
+{
+	if (dec->ihdr) {
+		jp2_box_destroy(dec->ihdr);
+	}
+	if (dec->bpcc) {
+		jp2_box_destroy(dec->bpcc);
+	}
+	if (dec->cdef) {
+		jp2_box_destroy(dec->cdef);
+	}
+	if (dec->pclr) {
+		jp2_box_destroy(dec->pclr);
+	}
+	if (dec->image) {
+		jas_image_destroy(dec->image);
+	}
+	if (dec->cmap) {
+		jp2_box_destroy(dec->cmap);
+	}
+	if (dec->colr) {
+		jp2_box_destroy(dec->colr);
+	}
+	if (dec->chantocmptlut) {
+		jas_free(dec->chantocmptlut);
+	}
+	jas_free(dec);
+}
+
+static int jp2_getct(int colorspace, int type, int assoc)
+{
+	if (type == 1 && assoc == 0) {
+		return JAS_IMAGE_CT_OPACITY;
+	}
+	if (type == 0 && assoc >= 1 && assoc <= 65534) {
+		switch (colorspace) {
+		case JAS_CLRSPC_FAM_RGB:
+			switch (assoc) {
+			case JP2_CDEF_RGB_R:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_R);
+				break;
+			case JP2_CDEF_RGB_G:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_G);
+				break;
+			case JP2_CDEF_RGB_B:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_B);
+				break;
+			}
+			break;
+		case JAS_CLRSPC_FAM_YCBCR:
+			switch (assoc) {
+			case JP2_CDEF_YCBCR_Y:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_YCBCR_Y);
+				break;
+			case JP2_CDEF_YCBCR_CB:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_YCBCR_CB);
+				break;
+			case JP2_CDEF_YCBCR_CR:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_YCBCR_CR);
+				break;
+			}
+			break;
+		case JAS_CLRSPC_FAM_GRAY:
+			switch (assoc) {
+			case JP2_CDEF_GRAY_Y:
+				return JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_GRAY_Y);
+				break;
+			}
+			break;
+		default:
+			return JAS_IMAGE_CT_COLOR(assoc - 1);
+			break;
+		}
+	}
+	return JAS_IMAGE_CT_UNKNOWN;
+}
+
+static int jp2_getcs(jp2_colr_t *colr)
+{
+	if (colr->method == JP2_COLR_ENUM) {
+		switch (colr->csid) {
+		case JP2_COLR_SRGB:
+			return JAS_CLRSPC_SRGB;
+			break;
+		case JP2_COLR_SYCC:
+			return JAS_CLRSPC_SYCBCR;
+			break;
+		case JP2_COLR_SGRAY:
+			return JAS_CLRSPC_SGRAY;
+			break;
+		}
+	}
+	return JAS_CLRSPC_UNKNOWN;
+}
+
+static int fromiccpcs(int cs)
+{
+	switch (cs) {
+	case ICC_CS_RGB:
+		return JAS_CLRSPC_GENRGB;
+		break;
+	case ICC_CS_YCBCR:
+		return JAS_CLRSPC_GENYCBCR;
+		break;
+	case ICC_CS_GRAY:
+		return JAS_CLRSPC_GENGRAY;
+		break;
+	}
+	return JAS_CLRSPC_UNKNOWN;
+}
diff --git a/src/libjasper/jp2/jp2_dec.h b/src/libjasper/jp2/jp2_dec.h
new file mode 100644
index 0000000..1cb95a2
--- /dev/null
+++ b/src/libjasper/jp2/jp2_dec.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef JP2_DEC_H
+#define JP2_DEC_H
+
+#include "jasper/jas_image.h"
+#include "jasper/jas_stream.h"
+#include "jp2_cod.h"
+
+typedef struct {
+
+	jp2_box_t *pclr;
+	jp2_box_t *cdef;
+	jp2_box_t *ihdr;
+	jp2_box_t *bpcc;
+	jp2_box_t *cmap;
+	jp2_box_t *colr;
+	jas_image_t *image;
+	uint_fast16_t numchans;
+	uint_fast16_t *chantocmptlut;
+
+} jp2_dec_t;
+
+#endif
diff --git a/src/libjasper/jp2/jp2_enc.c b/src/libjasper/jp2/jp2_enc.c
new file mode 100644
index 0000000..cb6299f
--- /dev/null
+++ b/src/libjasper/jp2/jp2_enc.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+
+  GeoJasper revision: Dima (11/07/2003 17:29 - UUID)
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JP2 Library
+ *
+ * $Id: jp2_enc.c,v 1.1 2008/10/17 06:14:59 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_image.h"
+#include "jasper/jas_stream.h"
+#include "jasper/jas_cm.h"
+#include "jasper/jas_icc.h"
+#include "jp2_cod.h"
+
+static uint_fast32_t jp2_gettypeasoc(int colorspace, int ctype);
+static int clrspctojp2(jas_clrspc_t clrspc);
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+int jp2_encode(jas_image_t *image, jas_stream_t *out, char *optstr)
+{
+	jp2_box_t *box;
+	jp2_ftyp_t *ftyp;
+	jp2_ihdr_t *ihdr;
+	jas_stream_t *tmpstream;
+	int allcmptssame;
+	jp2_bpcc_t *bpcc;
+	long len;
+	uint_fast16_t cmptno;
+	jp2_colr_t *colr;
+	char buf[4096];
+	uint_fast32_t overhead;
+	jp2_cdefchan_t *cdefchanent;
+	jp2_cdef_t *cdef;
+	int i;
+	uint_fast32_t typeasoc;
+jas_iccprof_t *iccprof;
+jas_stream_t *iccstream;
+int pos;
+int needcdef;
+int prec;
+int sgnd;
+
+	box = 0;
+	tmpstream = 0;
+
+	allcmptssame = 1;
+	sgnd = jas_image_cmptsgnd(image, 0);
+	prec = jas_image_cmptprec(image, 0);
+	for (i = 1; i < jas_image_numcmpts(image); ++i) {
+		if (jas_image_cmptsgnd(image, i) != sgnd ||
+		  jas_image_cmptprec(image, i) != prec) {
+			allcmptssame = 0;
+			break;
+		}
+	}
+
+	/* Output the signature box. */
+
+	if (!(box = jp2_box_create(JP2_BOX_JP))) {
+		goto error;
+	}
+	box->data.jp.magic = JP2_JP_MAGIC;
+	if (jp2_box_put(box, out)) {
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	/* Output the file type box. */
+
+	if (!(box = jp2_box_create(JP2_BOX_FTYP))) {
+		goto error;
+	}
+	ftyp = &box->data.ftyp;
+	ftyp->majver = JP2_FTYP_MAJVER;
+	ftyp->minver = JP2_FTYP_MINVER;
+	ftyp->numcompatcodes = 1;
+	ftyp->compatcodes[0] = JP2_FTYP_COMPATCODE;
+	if (jp2_box_put(box, out)) {
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	/*
+	 * Generate the data portion of the JP2 header box.
+	 * We cannot simply output the header for this box
+	 * since we do not yet know the correct value for the length
+	 * field.
+	 */
+
+	if (!(tmpstream = jas_stream_memopen(0, 0))) {
+		goto error;
+	}
+
+	/* Generate image header box. */
+
+	if (!(box = jp2_box_create(JP2_BOX_IHDR))) {
+		goto error;
+	}
+	ihdr = &box->data.ihdr;
+	ihdr->width = jas_image_width(image);
+	ihdr->height = jas_image_height(image);
+	ihdr->numcmpts = jas_image_numcmpts(image);
+	ihdr->bpc = allcmptssame ? JP2_SPTOBPC(jas_image_cmptsgnd(image, 0),
+	  jas_image_cmptprec(image, 0)) : JP2_IHDR_BPCNULL;
+	ihdr->comptype = JP2_IHDR_COMPTYPE;
+	ihdr->csunk = 0;
+	ihdr->ipr = 0;
+	if (jp2_box_put(box, tmpstream)) {
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	/* Generate bits per component box. */
+
+	if (!allcmptssame) {
+		if (!(box = jp2_box_create(JP2_BOX_BPCC))) {
+			goto error;
+		}
+		bpcc = &box->data.bpcc;
+		bpcc->numcmpts = jas_image_numcmpts(image);
+		if (!(bpcc->bpcs = jas_malloc(bpcc->numcmpts *
+		  sizeof(uint_fast8_t)))) {
+			goto error;
+		}
+		for (cmptno = 0; cmptno < bpcc->numcmpts; ++cmptno) {
+			bpcc->bpcs[cmptno] = JP2_SPTOBPC(jas_image_cmptsgnd(image,
+			  cmptno), jas_image_cmptprec(image, cmptno));
+		}
+		if (jp2_box_put(box, tmpstream)) {
+			goto error;
+		}
+		jp2_box_destroy(box);
+		box = 0;
+	}
+
+	/* Generate color specification box. */
+
+	if (!(box = jp2_box_create(JP2_BOX_COLR))) {
+		goto error;
+	}
+	colr = &box->data.colr;
+	switch (jas_image_clrspc(image)) {
+	case JAS_CLRSPC_SRGB:
+	case JAS_CLRSPC_SYCBCR:
+	case JAS_CLRSPC_SGRAY:
+		colr->method = JP2_COLR_ENUM;
+		colr->csid = clrspctojp2(jas_image_clrspc(image));
+		colr->pri = JP2_COLR_PRI;
+		colr->approx = 0;
+		break;
+	default:
+		colr->method = JP2_COLR_ICC;
+		colr->pri = JP2_COLR_PRI;
+		colr->approx = 0;
+		iccprof = jas_iccprof_createfromcmprof(jas_image_cmprof(image));
+		assert(iccprof);
+		iccstream = jas_stream_memopen(0, 0);
+		assert(iccstream);
+		if (jas_iccprof_save(iccprof, iccstream))
+			abort();
+		if ((pos = jas_stream_tell(iccstream)) < 0)
+			abort();
+		colr->iccplen = pos;
+		colr->iccp = jas_malloc(pos);
+		assert(colr->iccp);
+		jas_stream_rewind(iccstream);
+		if (jas_stream_read(iccstream, colr->iccp, colr->iccplen) != colr->iccplen)
+			abort();
+		jas_stream_close(iccstream);
+		jas_iccprof_destroy(iccprof);
+		break;
+	}
+	if (jp2_box_put(box, tmpstream)) {
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	needcdef = 1;
+	switch (jas_clrspc_fam(jas_image_clrspc(image))) {
+	case JAS_CLRSPC_FAM_RGB:
+		if (jas_image_cmpttype(image, 0) ==
+		  JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_R) &&
+		  jas_image_cmpttype(image, 1) ==
+		  JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_G) &&
+		  jas_image_cmpttype(image, 2) ==
+		  JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_B))
+			needcdef = 0;
+		break;
+	case JAS_CLRSPC_FAM_YCBCR:
+		if (jas_image_cmpttype(image, 0) ==
+		  JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_YCBCR_Y) &&
+		  jas_image_cmpttype(image, 1) ==
+		  JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_YCBCR_CB) &&
+		  jas_image_cmpttype(image, 2) ==
+		  JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_YCBCR_CR))
+			needcdef = 0;
+		break;
+	case JAS_CLRSPC_FAM_GRAY:
+		if (jas_image_cmpttype(image, 0) ==
+		  JAS_IMAGE_CT_COLOR(JAS_IMAGE_CT_GRAY_Y))
+			needcdef = 0;
+		break;
+	default:
+		abort();
+		break;
+	}
+
+	if (needcdef) {
+		if (!(box = jp2_box_create(JP2_BOX_CDEF))) {
+			goto error;
+		}
+		cdef = &box->data.cdef;
+		cdef->numchans = jas_image_numcmpts(image);
+		cdef->ents = jas_malloc(cdef->numchans * sizeof(jp2_cdefchan_t));
+		for (i = 0; i < jas_image_numcmpts(image); ++i) {
+			cdefchanent = &cdef->ents[i];
+			cdefchanent->channo = i;
+			typeasoc = jp2_gettypeasoc(jas_image_clrspc(image), jas_image_cmpttype(image, i));
+			cdefchanent->type = typeasoc >> 16;
+			cdefchanent->assoc = typeasoc & 0x7fff;
+		}
+		if (jp2_box_put(box, tmpstream)) {
+			goto error;
+		}
+		jp2_box_destroy(box);
+		box = 0;
+	}
+
+	/* Determine the total length of the JP2 header box. */
+
+	len = jas_stream_tell(tmpstream);
+	jas_stream_rewind(tmpstream);
+
+	/*
+	 * Output the JP2 header box and all of the boxes which it contains.
+	 */
+
+	if (!(box = jp2_box_create(JP2_BOX_JP2H))) {
+		goto error;
+	}
+	box->len = len + JP2_BOX_HDRLEN(false);
+	if (jp2_box_put(box, out)) {
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	if (jas_stream_copy(out, tmpstream, len)) {
+		goto error;
+	}
+
+	jas_stream_close(tmpstream);
+	tmpstream = 0;
+
+	/*
+	 * Output the UUID box
+	*/
+  // GeoJasper: dima - write UUID if received any metadata
+  if ( image->metadata.count > 0 ) {
+    int b;
+    for (b=0; b<image->metadata.count; ++b) {
+      jas_metadata_box_t *metabox = &image->metadata.boxes[b]; 
+
+      if ( metabox->size>0 && metabox->buf ) {
+        if (!(box = jp2_box_create( JP2_BOX_UUID )))
+	  	    goto error;
+
+        memcpy( box->data.uuid.uuid, metabox->id, sizeof(msi_uuid) );
+        box->data.uuid.data_len = metabox->size;
+        if ( !(box->data.uuid.data = (uint_fast8_t *)jas_malloc(metabox->size)) )
+          goto error;
+        memcpy( box->data.uuid.data, metabox->buf, metabox->size );
+      
+        if (jp2_box_put(box, out))
+			      goto error;
+
+	      jp2_box_destroy(box);
+	      box = 0;
+	    } // if box contains data
+    } // for boxes
+  } // if there are boxes
+  // GeoJasper: dima - write UUID if received
+
+
+
+	/*
+	 * Output the contiguous code stream box.
+	 */
+
+	if (!(box = jp2_box_create(JP2_BOX_JP2C))) {
+		goto error;
+	}
+	box->len = 0;
+	if (jp2_box_put(box, out)) {
+		goto error;
+	}
+	jp2_box_destroy(box);
+	box = 0;
+
+	/* Output the JPEG-2000 code stream. */
+
+	overhead = jas_stream_getrwcount(out);
+	sprintf(buf, "%s\n_jp2overhead=%lu\n", (optstr ? optstr : ""),
+	  (unsigned long) overhead);
+
+	if (jpc_encode(image, out, buf)) {
+		goto error;
+	}
+
+	return 0;
+	abort();
+
+error:
+
+	if (box) {
+		jp2_box_destroy(box);
+	}
+	if (tmpstream) {
+		jas_stream_close(tmpstream);
+	}
+	return -1;
+}
+
+static uint_fast32_t jp2_gettypeasoc(int colorspace, int ctype)
+{
+	int type;
+	int asoc;
+
+	if (ctype & JAS_IMAGE_CT_OPACITY) {
+		type = JP2_CDEF_TYPE_OPACITY;
+		asoc = JP2_CDEF_ASOC_ALL;
+		goto done;
+	}
+
+	type = JP2_CDEF_TYPE_UNSPEC;
+	asoc = JP2_CDEF_ASOC_NONE;
+	switch (jas_clrspc_fam(colorspace)) {
+	case JAS_CLRSPC_FAM_RGB:
+		switch (JAS_IMAGE_CT_COLOR(ctype)) {
+		case JAS_IMAGE_CT_RGB_R:
+			type = JP2_CDEF_TYPE_COLOR;
+			asoc = JP2_CDEF_RGB_R;
+			break;
+		case JAS_IMAGE_CT_RGB_G:
+			type = JP2_CDEF_TYPE_COLOR;
+			asoc = JP2_CDEF_RGB_G;
+			break;
+		case JAS_IMAGE_CT_RGB_B:
+			type = JP2_CDEF_TYPE_COLOR;
+			asoc = JP2_CDEF_RGB_B;
+			break;
+		}
+		break;
+	case JAS_CLRSPC_FAM_YCBCR:
+		switch (JAS_IMAGE_CT_COLOR(ctype)) {
+		case JAS_IMAGE_CT_YCBCR_Y:
+			type = JP2_CDEF_TYPE_COLOR;
+			asoc = JP2_CDEF_YCBCR_Y;
+			break;
+		case JAS_IMAGE_CT_YCBCR_CB:
+			type = JP2_CDEF_TYPE_COLOR;
+			asoc = JP2_CDEF_YCBCR_CB;
+			break;
+		case JAS_IMAGE_CT_YCBCR_CR:
+			type = JP2_CDEF_TYPE_COLOR;
+			asoc = JP2_CDEF_YCBCR_CR;
+			break;
+		}
+		break;
+	case JAS_CLRSPC_FAM_GRAY:
+		type = JP2_CDEF_TYPE_COLOR;
+		asoc = JP2_CDEF_GRAY_Y;
+		break;
+	}
+
+done:
+	return (type << 16) | asoc;
+}
+
+static int clrspctojp2(jas_clrspc_t clrspc)
+{
+	switch (clrspc) {
+	case JAS_CLRSPC_SRGB:
+		return JP2_COLR_SRGB;
+	case JAS_CLRSPC_SYCBCR:
+		return JP2_COLR_SYCC;
+	case JAS_CLRSPC_SGRAY:
+		return JP2_COLR_SGRAY;
+	default:
+		abort();
+		break;
+	}
+}
diff --git a/src/libjasper/jpc/jpc_bs.c b/src/libjasper/jpc/jpc_bs.c
new file mode 100644
index 0000000..0471665
--- /dev/null
+++ b/src/libjasper/jpc/jpc_bs.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 1999-2000, Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Bit Stream Class
+ *
+ * $Id: jpc_bs.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_bs.h"
+
+/******************************************************************************\
+* Local function prototypes.
+\******************************************************************************/
+
+static jpc_bitstream_t *jpc_bitstream_alloc(void);
+
+/******************************************************************************\
+* Code for opening and closing bit streams.
+\******************************************************************************/
+
+/* Open a bit stream from a stream. */
+jpc_bitstream_t *jpc_bitstream_sopen(jas_stream_t *stream, char *mode)
+{
+	jpc_bitstream_t *bitstream;
+
+	/* Ensure that the open mode is valid. */
+#if 1
+/* This causes a string literal too long error (with c99 pedantic mode). */
+	assert(!strcmp(mode, "r") || !strcmp(mode, "w") || !strcmp(mode, "r+")
+	  || !strcmp(mode, "w+"));
+#endif
+
+	if (!(bitstream = jpc_bitstream_alloc())) {
+		return 0;
+	}
+
+	/* By default, do not close the underlying (character) stream, upon
+	  the close of the bit stream. */
+	bitstream->flags_ = JPC_BITSTREAM_NOCLOSE;
+
+	bitstream->stream_ = stream;
+	bitstream->openmode_ = (mode[0] == 'w') ? JPC_BITSTREAM_WRITE :
+	  JPC_BITSTREAM_READ;
+
+	/* Mark the data buffer as empty. */
+	bitstream->cnt_ = (bitstream->openmode_ == JPC_BITSTREAM_READ) ? 0 : 8;
+	bitstream->buf_ = 0;
+
+	return bitstream;
+}
+
+/* Close a bit stream. */
+int jpc_bitstream_close(jpc_bitstream_t *bitstream)
+{
+	int ret = 0;
+
+	/* Align to the next byte boundary while considering the effects of
+	  bit stuffing. */
+	if (jpc_bitstream_align(bitstream)) {
+		ret = -1;
+	}
+
+	/* If necessary, close the underlying (character) stream. */
+	if (!(bitstream->flags_ & JPC_BITSTREAM_NOCLOSE) && bitstream->stream_) {
+		if (jas_stream_close(bitstream->stream_)) {
+			ret = -1;
+		}
+		bitstream->stream_ = 0;
+	}
+
+	jas_free(bitstream);
+	return ret;
+}
+
+/* Allocate a new bit stream. */
+static jpc_bitstream_t *jpc_bitstream_alloc()
+{
+	jpc_bitstream_t *bitstream;
+
+	/* Allocate memory for the new bit stream object. */
+	if (!(bitstream = jas_malloc(sizeof(jpc_bitstream_t)))) {
+		return 0;
+	}
+	/* Initialize all of the data members. */
+	bitstream->stream_ = 0;
+	bitstream->cnt_ = 0;
+	bitstream->flags_ = 0;
+	bitstream->openmode_ = 0;
+
+	return bitstream;
+}
+
+/******************************************************************************\
+* Code for reading/writing from/to bit streams.
+\******************************************************************************/
+
+/* Get a bit from a bit stream. */
+int jpc_bitstream_getbit_func(jpc_bitstream_t *bitstream)
+{
+	int ret;
+	JAS_DBGLOG(1000, ("jpc_bitstream_getbit_func(%p)\n", bitstream));
+	ret = jpc_bitstream_getbit_macro(bitstream);
+	JAS_DBGLOG(1000, ("jpc_bitstream_getbit_func -> %d\n", ret));
+	return ret;
+}
+
+/* Put a bit to a bit stream. */
+int jpc_bitstream_putbit_func(jpc_bitstream_t *bitstream, int b)
+{
+	int ret;
+	JAS_DBGLOG(1000, ("jpc_bitstream_putbit_func(%p, %d)\n", bitstream, b));
+	ret = jpc_bitstream_putbit_macro(bitstream, b);
+	JAS_DBGLOG(1000, ("jpc_bitstream_putbit_func() -> %d\n", ret));
+	return ret;
+}
+
+/* Get one or more bits from a bit stream. */
+long jpc_bitstream_getbits(jpc_bitstream_t *bitstream, int n)
+{
+	long v;
+	int u;
+
+	/* We can reliably get at most 31 bits since ISO/IEC 9899 only
+	  guarantees that a long can represent values up to 2^31-1. */
+	assert(n >= 0 && n < 32);
+
+	/* Get the number of bits requested from the specified bit stream. */
+	v = 0;
+	while (--n >= 0) {
+		if ((u = jpc_bitstream_getbit(bitstream)) < 0) {
+			return -1;
+		}
+		v = (v << 1) | u;
+	}
+	return v;
+}
+
+/* Put one or more bits to a bit stream. */
+int jpc_bitstream_putbits(jpc_bitstream_t *bitstream, int n, long v)
+{
+	int m;
+
+	/* We can reliably put at most 31 bits since ISO/IEC 9899 only
+	  guarantees that a long can represent values up to 2^31-1. */
+	assert(n >= 0 && n < 32);
+	/* Ensure that only the bits to be output are nonzero. */
+	assert(!(v & (~JAS_ONES(n))));
+
+	/* Put the desired number of bits to the specified bit stream. */
+	m = n - 1;
+	while (--n >= 0) {
+		if (jpc_bitstream_putbit(bitstream, (v >> m) & 1) == EOF) {
+			return EOF;
+		}
+		v <<= 1;
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Code for buffer filling and flushing.
+\******************************************************************************/
+
+/* Fill the buffer for a bit stream. */
+int jpc_bitstream_fillbuf(jpc_bitstream_t *bitstream)
+{
+	int c;
+	/* Note: The count has already been decremented by the caller. */
+	assert(bitstream->openmode_ & JPC_BITSTREAM_READ);
+	assert(bitstream->cnt_ <= 0);
+
+	if (bitstream->flags_ & JPC_BITSTREAM_ERR) {
+		bitstream->cnt_ = 0;
+		return -1;
+	}
+
+	if (bitstream->flags_ & JPC_BITSTREAM_EOF) {
+		bitstream->buf_ = 0x7f;
+		bitstream->cnt_ = 7;
+		return 1;
+	}
+
+	bitstream->buf_ = (bitstream->buf_ << 8) & 0xffff;
+	if ((c = jas_stream_getc((bitstream)->stream_)) == EOF) {
+		bitstream->flags_ |= JPC_BITSTREAM_EOF;
+		return 1;
+	}
+	bitstream->cnt_ = (bitstream->buf_ == 0xff00) ? 6 : 7;
+	bitstream->buf_ |= c & ((1 << (bitstream->cnt_ + 1)) - 1);
+	return (bitstream->buf_ >> bitstream->cnt_) & 1;
+}
+
+
+/******************************************************************************\
+* Code related to flushing.
+\******************************************************************************/
+
+/* Does the bit stream need to be aligned to a byte boundary (considering
+  the effects of bit stuffing)? */
+int jpc_bitstream_needalign(jpc_bitstream_t *bitstream)
+{
+	if (bitstream->openmode_ & JPC_BITSTREAM_READ) {
+		/* The bit stream is open for reading. */
+		/* If there are any bits buffered for reading, or the
+		  previous byte forced a stuffed bit, alignment is
+		  required. */
+		if ((bitstream->cnt_ < 8 && bitstream->cnt_ > 0) ||
+		  ((bitstream->buf_ >> 8) & 0xff) == 0xff) {
+			return 1;
+		}
+	} else if (bitstream->openmode_ & JPC_BITSTREAM_WRITE) {
+		/* The bit stream is open for writing. */
+		/* If there are any bits buffered for writing, or the
+		  previous byte forced a stuffed bit, alignment is
+		  required. */
+		if ((bitstream->cnt_ < 8 && bitstream->cnt_ >= 0) ||
+		  ((bitstream->buf_ >> 8) & 0xff) == 0xff) {
+			return 1;
+		}
+	} else {
+		/* This should not happen.  Famous last words, eh? :-) */
+		assert(0);
+		return -1;
+	}
+	return 0;
+}
+
+/* How many additional bytes would be output if we align the bit stream? */
+int jpc_bitstream_pending(jpc_bitstream_t *bitstream)
+{
+	if (bitstream->openmode_ & JPC_BITSTREAM_WRITE) {
+		/* The bit stream is being used for writing. */
+#if 1
+		/* XXX - Is this really correct?  Check someday... */
+		if (bitstream->cnt_ < 8) {
+			return 1;
+		}
+#else
+		if (bitstream->cnt_ < 8) {
+			if (((bitstream->buf_ >> 8) & 0xff) == 0xff) {
+				return 2;
+			}
+			return 1;
+		}
+#endif
+		return 0;
+	} else {
+		/* This operation should not be invoked on a bit stream that
+		  is being used for reading. */
+		return -1;
+	}
+}
+
+/* Align the bit stream to a byte boundary. */
+int jpc_bitstream_align(jpc_bitstream_t *bitstream)
+{
+	int ret;
+	if (bitstream->openmode_ & JPC_BITSTREAM_READ) {
+		ret = jpc_bitstream_inalign(bitstream, 0, 0);
+	} else if (bitstream->openmode_ & JPC_BITSTREAM_WRITE) {
+		ret = jpc_bitstream_outalign(bitstream, 0);
+	} else {
+		abort();
+	}
+	return ret;
+}
+
+/* Align a bit stream in the input case. */
+int jpc_bitstream_inalign(jpc_bitstream_t *bitstream, int fillmask,
+  int filldata)
+{
+	int n;
+	int v;
+	int u;
+	int numfill;
+	int m;
+
+	numfill = 7;
+	m = 0;
+	v = 0;
+	if (bitstream->cnt_ > 0) {
+		n = bitstream->cnt_;
+	} else if (!bitstream->cnt_) {
+		n = ((bitstream->buf_ & 0xff) == 0xff) ? 7 : 0;
+	} else {
+		n = 0;
+	}
+	if (n > 0) {
+		if ((u = jpc_bitstream_getbits(bitstream, n)) < 0) {
+			return -1;
+		}
+		m += n;
+		v = (v << n) | u;
+	}
+	if ((bitstream->buf_ & 0xff) == 0xff) {
+		if ((u = jpc_bitstream_getbits(bitstream, 7)) < 0) {
+			return -1;
+		}
+		v = (v << 7) | u;
+		m += 7;
+	}
+	if (m > numfill) {
+		v >>= m - numfill;
+	} else {
+		filldata >>= numfill - m;
+		fillmask >>= numfill - m;
+	}
+	if (((~(v ^ filldata)) & fillmask) != fillmask) {
+		/* The actual fill pattern does not match the expected one. */
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Align a bit stream in the output case. */
+int jpc_bitstream_outalign(jpc_bitstream_t *bitstream, int filldata)
+{
+	int n;
+	int v;
+
+	/* Ensure that this bit stream is open for writing. */
+	assert(bitstream->openmode_ & JPC_BITSTREAM_WRITE);
+
+	/* Ensure that the first bit of fill data is zero. */
+	/* Note: The first bit of fill data must be zero.  If this were not
+	  the case, the fill data itself could cause further bit stuffing to
+	  be required (which would cause numerous complications). */
+	assert(!(filldata & (~0x3f)));
+
+	if (!bitstream->cnt_) {
+		if ((bitstream->buf_ & 0xff) == 0xff) {
+			n = 7;
+			v = filldata;
+		} else {
+			n = 0;
+			v = 0;
+		}
+	} else if (bitstream->cnt_ > 0 && bitstream->cnt_ < 8) {
+		n = bitstream->cnt_;
+		v = filldata >> (7 - n);
+	} else {
+		n = 0;
+		v = 0;
+		return 0;
+	}
+
+	/* Write the appropriate fill data to the bit stream. */
+	if (n > 0) {
+		if (jpc_bitstream_putbits(bitstream, n, v)) {
+			return -1;
+		}
+	}
+	if (bitstream->cnt_ < 8) {
+		assert(bitstream->cnt_ >= 0 && bitstream->cnt_ < 8);
+		assert((bitstream->buf_ & 0xff) != 0xff);
+		/* Force the pending byte of output to be written to the
+		  underlying (character) stream. */
+		if (jas_stream_putc(bitstream->stream_, bitstream->buf_ & 0xff) == EOF) {
+			return -1;
+		}
+		bitstream->cnt_ = 8;
+		bitstream->buf_ = (bitstream->buf_ << 8) & 0xffff;
+	}
+
+	return 0;
+}
diff --git a/src/libjasper/jpc/jpc_bs.h b/src/libjasper/jpc/jpc_bs.h
new file mode 100644
index 0000000..78f2974
--- /dev/null
+++ b/src/libjasper/jpc/jpc_bs.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Bit Stream Class
+ *
+ * $Id: jpc_bs.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_BS_H
+#define JPC_BS_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_stream.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/*
+ * Bit stream open mode flags.
+ */
+
+/* Bit stream open for reading. */
+#define	JPC_BITSTREAM_READ	0x01
+/* Bit stream open for writing. */
+#define	JPC_BITSTREAM_WRITE	0x02
+
+/*
+ * Bit stream flags.
+ */
+
+/* Do not close underlying character stream. */
+#define	JPC_BITSTREAM_NOCLOSE	0x01
+/* End of file has been reached while reading. */
+#define	JPC_BITSTREAM_EOF	0x02
+/* An I/O error has occured. */
+#define	JPC_BITSTREAM_ERR	0x04
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* Bit stream class. */
+
+typedef struct {
+
+	/* Some miscellaneous flags. */
+	int flags_;
+
+	/* The input/output buffer. */
+	uint_fast16_t buf_;
+
+	/* The number of bits remaining in the byte being read/written. */
+	int cnt_;
+
+	/* The underlying stream associated with this bit stream. */
+	jas_stream_t *stream_;
+
+	/* The mode in which this bit stream was opened. */
+	int openmode_;
+
+} jpc_bitstream_t;
+
+/******************************************************************************\
+* Functions/macros for opening and closing bit streams..
+\******************************************************************************/
+
+/* Open a stream as a bit stream. */
+jpc_bitstream_t *jpc_bitstream_sopen(jas_stream_t *stream, char *mode);
+
+/* Close a bit stream. */
+int jpc_bitstream_close(jpc_bitstream_t *bitstream);
+
+/******************************************************************************\
+* Functions/macros for reading from and writing to bit streams..
+\******************************************************************************/
+
+/* Read a bit from a bit stream. */
+#if defined(DEBUG)
+#define	jpc_bitstream_getbit(bitstream) \
+	jpc_bitstream_getbit_func(bitstream)
+#else
+#define jpc_bitstream_getbit(bitstream) \
+	jpc_bitstream_getbit_macro(bitstream)
+#endif
+
+/* Write a bit to a bit stream. */
+#if defined(DEBUG)
+#define	jpc_bitstream_putbit(bitstream, v) \
+	jpc_bitstream_putbit_func(bitstream, v)
+#else
+#define	jpc_bitstream_putbit(bitstream, v) \
+	jpc_bitstream_putbit_macro(bitstream, v)
+#endif
+
+/* Read one or more bits from a bit stream. */
+long jpc_bitstream_getbits(jpc_bitstream_t *bitstream, int n);
+
+/* Write one or more bits to a bit stream. */
+int jpc_bitstream_putbits(jpc_bitstream_t *bitstream, int n, long v);
+
+/******************************************************************************\
+* Functions/macros for flushing and aligning bit streams.
+\******************************************************************************/
+
+/* Align the current position within the bit stream to the next byte
+  boundary. */
+int jpc_bitstream_align(jpc_bitstream_t *bitstream);
+
+/* Align the current position in the bit stream with the next byte boundary,
+  ensuring that certain bits consumed in the process match a particular
+  pattern. */
+int jpc_bitstream_inalign(jpc_bitstream_t *bitstream, int fillmask,
+  int filldata);
+
+/* Align the current position in the bit stream with the next byte boundary,
+  writing bits from the specified pattern (if necessary) in the process. */
+int jpc_bitstream_outalign(jpc_bitstream_t *bitstream, int filldata);
+
+/* Check if a bit stream needs alignment. */
+int jpc_bitstream_needalign(jpc_bitstream_t *bitstream);
+
+/* How many additional bytes would be output if the bit stream was aligned? */
+int jpc_bitstream_pending(jpc_bitstream_t *bitstream);
+
+/******************************************************************************\
+* Functions/macros for querying state information for bit streams.
+\******************************************************************************/
+
+/* Has EOF been encountered on a bit stream? */
+#define jpc_bitstream_eof(bitstream) \
+	((bitstream)->flags_ & JPC_BITSTREAM_EOF)
+
+/******************************************************************************\
+* Internals.
+\******************************************************************************/
+
+/* DO NOT DIRECTLY INVOKE ANY OF THE MACROS OR FUNCTIONS BELOW.  THEY ARE
+  FOR INTERNAL USE ONLY. */
+
+int jpc_bitstream_getbit_func(jpc_bitstream_t *bitstream);
+
+int jpc_bitstream_putbit_func(jpc_bitstream_t *bitstream, int v);
+
+int jpc_bitstream_fillbuf(jpc_bitstream_t *bitstream);
+
+#define	jpc_bitstream_getbit_macro(bitstream) \
+	(assert((bitstream)->openmode_ & JPC_BITSTREAM_READ), \
+	  (--(bitstream)->cnt_ >= 0) ? \
+	  ((int)(((bitstream)->buf_ >> (bitstream)->cnt_) & 1)) : \
+	  jpc_bitstream_fillbuf(bitstream))
+
+#define jpc_bitstream_putbit_macro(bitstream, bit) \
+	(assert((bitstream)->openmode_ & JPC_BITSTREAM_WRITE), \
+	  (--(bitstream)->cnt_ < 0) ? \
+	  ((bitstream)->buf_ = ((bitstream)->buf_ << 8) & 0xffff, \
+	  (bitstream)->cnt_ = ((bitstream)->buf_ == 0xff00) ? 6 : 7, \
+	  (bitstream)->buf_ |= ((bit) & 1) << (bitstream)->cnt_, \
+	  (jas_stream_putc((bitstream)->stream_, (bitstream)->buf_ >> 8) == EOF) \
+	  ? (EOF) : ((bit) & 1)) : \
+	  ((bitstream)->buf_ |= ((bit) & 1) << (bitstream)->cnt_, \
+	  (bit) & 1))
+
+#endif
diff --git a/src/libjasper/jpc/jpc_cod.h b/src/libjasper/jpc/jpc_cod.h
new file mode 100644
index 0000000..31325e2
--- /dev/null
+++ b/src/libjasper/jpc/jpc_cod.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_cod.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_COD_H
+#define JPC_COD_H
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* The nominal word size used by this implementation. */
+#define	JPC_PREC	32
+
+#endif
diff --git a/src/libjasper/jpc/jpc_cs.c b/src/libjasper/jpc/jpc_cs.c
new file mode 100644
index 0000000..0bd8400
--- /dev/null
+++ b/src/libjasper/jpc/jpc_cs.c
@@ -0,0 +1,1644 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JPEG-2000 Code Stream Library
+ *
+ * $Id: jpc_cs.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdlib.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_cs.h"
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* Marker segment table entry. */
+typedef struct {
+	int id;
+	char *name;
+	jpc_msops_t ops;
+} jpc_mstabent_t;
+
+/******************************************************************************\
+* Local prototypes.
+\******************************************************************************/
+
+static jpc_mstabent_t *jpc_mstab_lookup(int id);
+
+static int jpc_poc_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_poc_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_poc_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static void jpc_poc_destroyparms(jpc_ms_t *ms);
+
+static int jpc_unk_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_sot_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_siz_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_cod_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_coc_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_qcd_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_qcc_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_rgn_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_sop_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_ppm_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_ppt_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_crg_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+static int jpc_com_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+
+static int jpc_sot_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_siz_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_cod_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_coc_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_qcd_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_qcc_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_rgn_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_unk_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_sop_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_ppm_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_ppt_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_crg_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+static int jpc_com_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+
+static int jpc_sot_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_siz_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_cod_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_coc_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_qcd_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_qcc_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_rgn_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_unk_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_sop_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_ppm_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_ppt_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_crg_dumpparms(jpc_ms_t *ms, FILE *out);
+static int jpc_com_dumpparms(jpc_ms_t *ms, FILE *out);
+
+static void jpc_siz_destroyparms(jpc_ms_t *ms);
+static void jpc_qcd_destroyparms(jpc_ms_t *ms);
+static void jpc_qcc_destroyparms(jpc_ms_t *ms);
+static void jpc_cod_destroyparms(jpc_ms_t *ms);
+static void jpc_coc_destroyparms(jpc_ms_t *ms);
+static void jpc_unk_destroyparms(jpc_ms_t *ms);
+static void jpc_ppm_destroyparms(jpc_ms_t *ms);
+static void jpc_ppt_destroyparms(jpc_ms_t *ms);
+static void jpc_crg_destroyparms(jpc_ms_t *ms);
+static void jpc_com_destroyparms(jpc_ms_t *ms);
+
+static void jpc_qcx_destroycompparms(jpc_qcxcp_t *compparms);
+static int jpc_qcx_getcompparms(jpc_qcxcp_t *compparms, jpc_cstate_t *cstate,
+  jas_stream_t *in, uint_fast16_t len);
+static int jpc_qcx_putcompparms(jpc_qcxcp_t *compparms, jpc_cstate_t *cstate,
+  jas_stream_t *out);
+static void jpc_cox_destroycompparms(jpc_coxcp_t *compparms);
+static int jpc_cox_getcompparms(jpc_ms_t *ms, jpc_cstate_t *cstate,
+  jas_stream_t *in, int prtflag, jpc_coxcp_t *compparms);
+static int jpc_cox_putcompparms(jpc_ms_t *ms, jpc_cstate_t *cstate,
+  jas_stream_t *out, int prtflag, jpc_coxcp_t *compparms);
+
+/******************************************************************************\
+* Global data.
+\******************************************************************************/
+
+static jpc_mstabent_t jpc_mstab[] = {
+	{JPC_MS_SOC, "SOC", {0, 0, 0, 0}},
+	{JPC_MS_SOT, "SOT", {0, jpc_sot_getparms, jpc_sot_putparms,
+	  jpc_sot_dumpparms}},
+	{JPC_MS_SOD, "SOD", {0, 0, 0, 0}},
+	{JPC_MS_EOC, "EOC", {0, 0, 0, 0}},
+	{JPC_MS_SIZ, "SIZ", {jpc_siz_destroyparms, jpc_siz_getparms,
+	  jpc_siz_putparms, jpc_siz_dumpparms}},
+	{JPC_MS_COD, "COD", {jpc_cod_destroyparms, jpc_cod_getparms,
+	  jpc_cod_putparms, jpc_cod_dumpparms}},
+	{JPC_MS_COC, "COC", {jpc_coc_destroyparms, jpc_coc_getparms,
+	  jpc_coc_putparms, jpc_coc_dumpparms}},
+	{JPC_MS_RGN, "RGN", {0, jpc_rgn_getparms, jpc_rgn_putparms,
+	  jpc_rgn_dumpparms}},
+	{JPC_MS_QCD, "QCD", {jpc_qcd_destroyparms, jpc_qcd_getparms,
+	  jpc_qcd_putparms, jpc_qcd_dumpparms}},
+	{JPC_MS_QCC, "QCC", {jpc_qcc_destroyparms, jpc_qcc_getparms,
+	  jpc_qcc_putparms, jpc_qcc_dumpparms}},
+	{JPC_MS_POC, "POC", {jpc_poc_destroyparms, jpc_poc_getparms,
+	  jpc_poc_putparms, jpc_poc_dumpparms}},
+	{JPC_MS_TLM, "TLM", {0, jpc_unk_getparms, jpc_unk_putparms, 0}},
+	{JPC_MS_PLM, "PLM", {0, jpc_unk_getparms, jpc_unk_putparms, 0}},
+	{JPC_MS_PPM, "PPM", {jpc_ppm_destroyparms, jpc_ppm_getparms,
+	  jpc_ppm_putparms, jpc_ppm_dumpparms}},
+	{JPC_MS_PPT, "PPT", {jpc_ppt_destroyparms, jpc_ppt_getparms,
+	  jpc_ppt_putparms, jpc_ppt_dumpparms}},
+	{JPC_MS_SOP, "SOP", {0, jpc_sop_getparms, jpc_sop_putparms,
+	  jpc_sop_dumpparms}},
+	{JPC_MS_EPH, "EPH", {0, 0, 0, 0}},
+	{JPC_MS_CRG, "CRG", {0, jpc_crg_getparms, jpc_crg_putparms,
+	  jpc_crg_dumpparms}},
+	{JPC_MS_COM, "COM", {jpc_com_destroyparms, jpc_com_getparms,
+	  jpc_com_putparms, jpc_com_dumpparms}},
+	{-1, "UNKNOWN",  {jpc_unk_destroyparms, jpc_unk_getparms,
+	  jpc_unk_putparms, jpc_unk_dumpparms}}
+};
+
+/******************************************************************************\
+* Code stream manipulation functions.
+\******************************************************************************/
+
+/* Create a code stream state object. */
+jpc_cstate_t *jpc_cstate_create()
+{
+	jpc_cstate_t *cstate;
+	if (!(cstate = jas_malloc(sizeof(jpc_cstate_t)))) {
+		return 0;
+	}
+	cstate->numcomps = 0;
+	return cstate;
+}
+
+/* Destroy a code stream state object. */
+void jpc_cstate_destroy(jpc_cstate_t *cstate)
+{
+	jas_free(cstate);
+}
+
+/* Read a marker segment from a stream. */
+jpc_ms_t *jpc_getms(jas_stream_t *in, jpc_cstate_t *cstate)
+{
+	jpc_ms_t *ms;
+	jpc_mstabent_t *mstabent;
+	jas_stream_t *tmpstream;
+
+	if (!(ms = jpc_ms_create(0))) {
+		return 0;
+	}
+
+	/* Get the marker type. */
+	if (jpc_getuint16(in, &ms->id) || ms->id < JPC_MS_MIN ||
+	  ms->id > JPC_MS_MAX) {
+		jpc_ms_destroy(ms);
+		return 0;
+	}
+
+	mstabent = jpc_mstab_lookup(ms->id);
+	ms->ops = &mstabent->ops;
+
+	/* Get the marker segment length and parameters if present. */
+	/* Note: It is tacitly assumed that a marker segment cannot have
+	  parameters unless it has a length field.  That is, there cannot
+	  be a parameters field without a length field and vice versa. */
+	if (JPC_MS_HASPARMS(ms->id)) {
+		/* Get the length of the marker segment. */
+		if (jpc_getuint16(in, &ms->len) || ms->len < 3) {
+			jpc_ms_destroy(ms);
+			return 0;
+		}
+		/* Calculate the length of the marker segment parameters. */
+		ms->len -= 2;
+		/* Create and prepare a temporary memory stream from which to
+		  read the marker segment parameters. */
+		/* Note: This approach provides a simple way of ensuring that
+		  we never read beyond the end of the marker segment (even if
+		  the marker segment length is errantly set too small). */
+		if (!(tmpstream = jas_stream_memopen(0, 0))) {
+			jpc_ms_destroy(ms);
+			return 0;
+		}
+		if (jas_stream_copy(tmpstream, in, ms->len) ||
+		  jas_stream_seek(tmpstream, 0, SEEK_SET) < 0) {
+			jas_stream_close(tmpstream);
+			jpc_ms_destroy(ms);
+			return 0;
+		}
+		/* Get the marker segment parameters. */
+		if ((*ms->ops->getparms)(ms, cstate, tmpstream)) {
+			ms->ops = 0;
+			jpc_ms_destroy(ms);
+			jas_stream_close(tmpstream);
+			return 0;
+		}
+
+		if (jas_getdbglevel() > 0) {
+			jpc_ms_dump(ms, stderr);
+		}
+
+		if (JAS_CAST(unsigned long, jas_stream_tell(tmpstream)) != ms->len) { /* IMLIB - changed ulong to unsigned long */
+			jas_eprintf("warning: trailing garbage in marker segment (%ld bytes)\n",
+			  ms->len - jas_stream_tell(tmpstream));
+		}
+
+		/* Close the temporary stream. */
+		jas_stream_close(tmpstream);
+
+	} else {
+		/* There are no marker segment parameters. */
+		ms->len = 0;
+
+		if (jas_getdbglevel() > 0) {
+			jpc_ms_dump(ms, stderr);
+		}
+	}
+
+	/* Update the code stream state information based on the type of
+	  marker segment read. */
+	/* Note: This is a bit of a hack, but I'm not going to define another
+	  type of virtual function for this one special case. */
+	if (ms->id == JPC_MS_SIZ) {
+		cstate->numcomps = ms->parms.siz.numcomps;
+	}
+
+	return ms;
+}
+
+/* Write a marker segment to a stream. */
+int jpc_putms(jas_stream_t *out, jpc_cstate_t *cstate, jpc_ms_t *ms)
+{
+	jas_stream_t *tmpstream;
+	int len;
+
+	/* Output the marker segment type. */
+	if (jpc_putuint16(out, ms->id)) {
+		return -1;
+	}
+
+	/* Output the marker segment length and parameters if necessary. */
+	if (ms->ops->putparms) {
+		/* Create a temporary stream in which to buffer the
+		  parameter data. */
+		if (!(tmpstream = jas_stream_memopen(0, 0))) {
+			return -1;
+		}
+		if ((*ms->ops->putparms)(ms, cstate, tmpstream)) {
+			jas_stream_close(tmpstream);
+			return -1;
+		}
+		/* Get the number of bytes of parameter data written. */
+		if ((len = jas_stream_tell(tmpstream)) < 0) {
+			jas_stream_close(tmpstream);
+			return -1;
+		}
+		ms->len = len;
+		/* Write the marker segment length and parameter data to
+		  the output stream. */
+		if (jas_stream_seek(tmpstream, 0, SEEK_SET) < 0 ||
+		  jpc_putuint16(out, ms->len + 2) ||
+		  jas_stream_copy(out, tmpstream, ms->len) < 0) {
+			jas_stream_close(tmpstream);
+			return -1;
+		}
+		/* Close the temporary stream. */
+		jas_stream_close(tmpstream);
+	}
+
+	/* This is a bit of a hack, but I'm not going to define another
+	  type of virtual function for this one special case. */
+	if (ms->id == JPC_MS_SIZ) {
+		cstate->numcomps = ms->parms.siz.numcomps;
+	}
+
+	if (jas_getdbglevel() > 0) {
+		jpc_ms_dump(ms, stderr);
+	}
+
+	return 0;
+}
+
+/******************************************************************************\
+* Marker segment operations.
+\******************************************************************************/
+
+/* Create a marker segment of the specified type. */
+jpc_ms_t *jpc_ms_create(int type)
+{
+	jpc_ms_t *ms;
+	jpc_mstabent_t *mstabent;
+
+	if (!(ms = jas_malloc(sizeof(jpc_ms_t)))) {
+		return 0;
+	}
+	ms->id = type;
+	ms->len = 0;
+	mstabent = jpc_mstab_lookup(ms->id);
+	ms->ops = &mstabent->ops;
+	memset(&ms->parms, 0, sizeof(jpc_msparms_t));
+	return ms;
+}
+
+/* Destroy a marker segment. */
+void jpc_ms_destroy(jpc_ms_t *ms)
+{
+	if (ms->ops && ms->ops->destroyparms) {
+		(*ms->ops->destroyparms)(ms);
+	}
+	jas_free(ms);
+}
+
+/* Dump a marker segment to a stream for debugging. */
+void jpc_ms_dump(jpc_ms_t *ms, FILE *out)
+{
+	jpc_mstabent_t *mstabent;
+	mstabent = jpc_mstab_lookup(ms->id);
+	fprintf(out, "type = 0x%04x (%s);", ms->id, mstabent->name);
+	if (JPC_MS_HASPARMS(ms->id)) {
+		fprintf(out, " len = %d;", ms->len + 2);
+		if (ms->ops->dumpparms) {
+			(*ms->ops->dumpparms)(ms, out);
+		} else {
+			fprintf(out, "\n");
+		}
+	} else {
+		fprintf(out, "\n");
+	}
+}
+
+/******************************************************************************\
+* SOT marker segment operations.
+\******************************************************************************/
+
+static int jpc_sot_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_sot_t *sot = &ms->parms.sot;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (jpc_getuint16(in, &sot->tileno) ||
+	  jpc_getuint32(in, &sot->len) ||
+	  jpc_getuint8(in, &sot->partno) ||
+	  jpc_getuint8(in, &sot->numparts)) {
+		return -1;
+	}
+	if (jas_stream_eof(in)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_sot_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_sot_t *sot = &ms->parms.sot;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (jpc_putuint16(out, sot->tileno) ||
+	  jpc_putuint32(out, sot->len) ||
+	  jpc_putuint8(out, sot->partno) ||
+	  jpc_putuint8(out, sot->numparts)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_sot_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_sot_t *sot = &ms->parms.sot;
+	fprintf(out, "tileno = %d; len = %d; partno = %d; numparts = %d\n",
+	  sot->tileno, sot->len, sot->partno, sot->numparts);
+	return 0;
+}
+
+/******************************************************************************\
+* SIZ marker segment operations.
+\******************************************************************************/
+
+static void jpc_siz_destroyparms(jpc_ms_t *ms)
+{
+	jpc_siz_t *siz = &ms->parms.siz;
+	if (siz->comps) {
+		jas_free(siz->comps);
+	}
+}
+
+static int jpc_siz_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate,
+  jas_stream_t *in)
+{
+	jpc_siz_t *siz = &ms->parms.siz;
+	unsigned int i;
+	uint_fast8_t tmp;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (jpc_getuint16(in, &siz->caps) ||
+	  jpc_getuint32(in, &siz->width) ||
+	  jpc_getuint32(in, &siz->height) ||
+	  jpc_getuint32(in, &siz->xoff) ||
+	  jpc_getuint32(in, &siz->yoff) ||
+	  jpc_getuint32(in, &siz->tilewidth) ||
+	  jpc_getuint32(in, &siz->tileheight) ||
+	  jpc_getuint32(in, &siz->tilexoff) ||
+	  jpc_getuint32(in, &siz->tileyoff) ||
+	  jpc_getuint16(in, &siz->numcomps)) {
+		return -1;
+	}
+	if (!siz->width || !siz->height || !siz->tilewidth ||
+	  !siz->tileheight || !siz->numcomps) {
+		return -1;
+	}
+	if (!(siz->comps = jas_malloc(siz->numcomps * sizeof(jpc_sizcomp_t)))) {
+		return -1;
+	}
+	for (i = 0; i < siz->numcomps; ++i) {
+		if (jpc_getuint8(in, &tmp) ||
+		  jpc_getuint8(in, &siz->comps[i].hsamp) ||
+		  jpc_getuint8(in, &siz->comps[i].vsamp)) {
+			jas_free(siz->comps);
+			return -1;
+		}
+		siz->comps[i].sgnd = (tmp >> 7) & 1;
+		siz->comps[i].prec = (tmp & 0x7f) + 1;
+	}
+	if (jas_stream_eof(in)) {
+		jas_free(siz->comps);
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_siz_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_siz_t *siz = &ms->parms.siz;
+	unsigned int i;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	assert(siz->width && siz->height && siz->tilewidth &&
+	  siz->tileheight && siz->numcomps);
+	if (jpc_putuint16(out, siz->caps) ||
+	  jpc_putuint32(out, siz->width) ||
+	  jpc_putuint32(out, siz->height) ||
+	  jpc_putuint32(out, siz->xoff) ||
+	  jpc_putuint32(out, siz->yoff) ||
+	  jpc_putuint32(out, siz->tilewidth) ||
+	  jpc_putuint32(out, siz->tileheight) ||
+	  jpc_putuint32(out, siz->tilexoff) ||
+	  jpc_putuint32(out, siz->tileyoff) ||
+	  jpc_putuint16(out, siz->numcomps)) {
+		return -1;
+	}
+	for (i = 0; i < siz->numcomps; ++i) {
+		if (jpc_putuint8(out, ((siz->comps[i].sgnd & 1) << 7) |
+		  ((siz->comps[i].prec - 1) & 0x7f)) ||
+		  jpc_putuint8(out, siz->comps[i].hsamp) ||
+		  jpc_putuint8(out, siz->comps[i].vsamp)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jpc_siz_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_siz_t *siz = &ms->parms.siz;
+	unsigned int i;
+	fprintf(out, "caps = 0x%02x;\n", siz->caps);
+	fprintf(out, "width = %d; height = %d; xoff = %d; yoff = %d;\n",
+	  siz->width, siz->height, siz->xoff, siz->yoff);
+	fprintf(out, "tilewidth = %d; tileheight = %d; tilexoff = %d; "
+	  "tileyoff = %d;\n", siz->tilewidth, siz->tileheight, siz->tilexoff,
+	  siz->tileyoff);
+	for (i = 0; i < siz->numcomps; ++i) {
+		fprintf(out, "prec[%d] = %d; sgnd[%d] = %d; hsamp[%d] = %d; "
+		  "vsamp[%d] = %d\n", i, siz->comps[i].prec, i,
+		  siz->comps[i].sgnd, i, siz->comps[i].hsamp, i,
+		  siz->comps[i].vsamp);
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* COD marker segment operations.
+\******************************************************************************/
+
+static void jpc_cod_destroyparms(jpc_ms_t *ms)
+{
+	jpc_cod_t *cod = &ms->parms.cod;
+	jpc_cox_destroycompparms(&cod->compparms);
+}
+
+static int jpc_cod_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_cod_t *cod = &ms->parms.cod;
+	if (jpc_getuint8(in, &cod->csty)) {
+		return -1;
+	}
+	if (jpc_getuint8(in, &cod->prg) ||
+	  jpc_getuint16(in, &cod->numlyrs) ||
+	  jpc_getuint8(in, &cod->mctrans)) {
+		return -1;
+	}
+	if (jpc_cox_getcompparms(ms, cstate, in,
+	  (cod->csty & JPC_COX_PRT) != 0, &cod->compparms)) {
+		return -1;
+	}
+	if (jas_stream_eof(in)) {
+		jpc_cod_destroyparms(ms);
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_cod_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_cod_t *cod = &ms->parms.cod;
+	assert(cod->numlyrs > 0 && cod->compparms.numdlvls <= 32);
+	assert(cod->compparms.numdlvls == cod->compparms.numrlvls - 1);
+	if (jpc_putuint8(out, cod->compparms.csty) ||
+	  jpc_putuint8(out, cod->prg) ||
+	  jpc_putuint16(out, cod->numlyrs) ||
+	  jpc_putuint8(out, cod->mctrans)) {
+		return -1;
+	}
+	if (jpc_cox_putcompparms(ms, cstate, out,
+	  (cod->csty & JPC_COX_PRT) != 0, &cod->compparms)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_cod_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_cod_t *cod = &ms->parms.cod;
+	int i;
+	fprintf(out, "csty = 0x%02x;\n", cod->compparms.csty);
+	fprintf(out, "numdlvls = %d; qmfbid = %d; mctrans = %d\n",
+	  cod->compparms.numdlvls, cod->compparms.qmfbid, cod->mctrans);
+	fprintf(out, "prg = %d; numlyrs = %d;\n",
+	  cod->prg, cod->numlyrs);
+	fprintf(out, "cblkwidthval = %d; cblkheightval = %d; "
+	  "cblksty = 0x%02x;\n", cod->compparms.cblkwidthval, cod->compparms.cblkheightval,
+	  cod->compparms.cblksty);
+	if (cod->csty & JPC_COX_PRT) {
+		for (i = 0; i < cod->compparms.numrlvls; ++i) {
+			jas_eprintf("prcwidth[%d] = %d, prcheight[%d] = %d\n",
+			  i, cod->compparms.rlvls[i].parwidthval,
+			  i, cod->compparms.rlvls[i].parheightval);
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* COC marker segment operations.
+\******************************************************************************/
+
+static void jpc_coc_destroyparms(jpc_ms_t *ms)
+{
+	jpc_coc_t *coc = &ms->parms.coc;
+	jpc_cox_destroycompparms(&coc->compparms);
+}
+
+static int jpc_coc_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_coc_t *coc = &ms->parms.coc;
+	uint_fast8_t tmp;
+	if (cstate->numcomps <= 256) {
+		if (jpc_getuint8(in, &tmp)) {
+			return -1;
+		}
+		coc->compno = tmp;
+	} else {
+		if (jpc_getuint16(in, &coc->compno)) {
+			return -1;
+		}
+	}
+	if (jpc_getuint8(in, &coc->compparms.csty)) {
+		return -1;
+	}
+	if (jpc_cox_getcompparms(ms, cstate, in,
+	  (coc->compparms.csty & JPC_COX_PRT) != 0, &coc->compparms)) {
+		return -1;
+	}
+	if (jas_stream_eof(in)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_coc_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_coc_t *coc = &ms->parms.coc;
+	assert(coc->compparms.numdlvls <= 32);
+	if (cstate->numcomps <= 256) {
+		if (jpc_putuint8(out, coc->compno)) {
+			return -1;
+		}
+	} else {
+		if (jpc_putuint16(out, coc->compno)) {
+			return -1;
+		}
+	}
+	if (jpc_putuint8(out, coc->compparms.csty)) {
+		return -1;
+	}
+	if (jpc_cox_putcompparms(ms, cstate, out,
+	  (coc->compparms.csty & JPC_COX_PRT) != 0, &coc->compparms)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_coc_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_coc_t *coc = &ms->parms.coc;
+	fprintf(out, "compno = %d; csty = 0x%02x; numdlvls = %d;\n",
+	  coc->compno, coc->compparms.csty, coc->compparms.numdlvls);
+	fprintf(out, "cblkwidthval = %d; cblkheightval = %d; "
+	  "cblksty = 0x%02x; qmfbid = %d;\n", coc->compparms.cblkwidthval,
+	  coc->compparms.cblkheightval, coc->compparms.cblksty, coc->compparms.qmfbid);
+	return 0;
+}
+/******************************************************************************\
+* COD/COC marker segment operation helper functions.
+\******************************************************************************/
+
+static void jpc_cox_destroycompparms(jpc_coxcp_t *compparms)
+{
+	/* Eliminate compiler warning about unused variables. */
+	compparms = 0;
+}
+
+static int jpc_cox_getcompparms(jpc_ms_t *ms, jpc_cstate_t *cstate,
+  jas_stream_t *in, int prtflag, jpc_coxcp_t *compparms)
+{
+	uint_fast8_t tmp;
+	int i;
+
+	/* Eliminate compiler warning about unused variables. */
+	ms = 0;
+	cstate = 0;
+
+	if (jpc_getuint8(in, &compparms->numdlvls) ||
+	  jpc_getuint8(in, &compparms->cblkwidthval) ||
+	  jpc_getuint8(in, &compparms->cblkheightval) ||
+	  jpc_getuint8(in, &compparms->cblksty) ||
+	  jpc_getuint8(in, &compparms->qmfbid)) {
+		return -1;
+	}
+	compparms->numrlvls = compparms->numdlvls + 1;
+	if (prtflag) {
+		for (i = 0; i < compparms->numrlvls; ++i) {
+			if (jpc_getuint8(in, &tmp)) {
+				jpc_cox_destroycompparms(compparms);
+				return -1;
+			}
+			compparms->rlvls[i].parwidthval = tmp & 0xf;
+			compparms->rlvls[i].parheightval = (tmp >> 4) & 0xf;
+		}
+/* Sigh.  This bit should be in the same field in both COC and COD mrk segs. */
+compparms->csty |= JPC_COX_PRT;
+	} else {
+	}
+	if (jas_stream_eof(in)) {
+		jpc_cox_destroycompparms(compparms);
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_cox_putcompparms(jpc_ms_t *ms, jpc_cstate_t *cstate,
+  jas_stream_t *out, int prtflag, jpc_coxcp_t *compparms)
+{
+	int i;
+	assert(compparms->numdlvls <= 32);
+
+	/* Eliminate compiler warning about unused variables. */
+	ms = 0;
+	cstate = 0;
+
+	if (jpc_putuint8(out, compparms->numdlvls) ||
+	  jpc_putuint8(out, compparms->cblkwidthval) ||
+	  jpc_putuint8(out, compparms->cblkheightval) ||
+	  jpc_putuint8(out, compparms->cblksty) ||
+	  jpc_putuint8(out, compparms->qmfbid)) {
+		return -1;
+	}
+	if (prtflag) {
+		for (i = 0; i < compparms->numrlvls; ++i) {
+			if (jpc_putuint8(out,
+			  ((compparms->rlvls[i].parheightval & 0xf) << 4) |
+			  (compparms->rlvls[i].parwidthval & 0xf))) {
+				return -1;
+			}
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* RGN marker segment operations.
+\******************************************************************************/
+
+static int jpc_rgn_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_rgn_t *rgn = &ms->parms.rgn;
+	uint_fast8_t tmp;
+	if (cstate->numcomps <= 256) {
+		if (jpc_getuint8(in, &tmp)) {
+			return -1;
+		}
+		rgn->compno = tmp;
+	} else {
+		if (jpc_getuint16(in, &rgn->compno)) {
+			return -1;
+		}
+	}
+	if (jpc_getuint8(in, &rgn->roisty) ||
+	  jpc_getuint8(in, &rgn->roishift)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_rgn_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_rgn_t *rgn = &ms->parms.rgn;
+	if (cstate->numcomps <= 256) {
+		if (jpc_putuint8(out, rgn->compno)) {
+			return -1;
+		}
+	} else {
+		if (jpc_putuint16(out, rgn->compno)) {
+			return -1;
+		}
+	}
+	if (jpc_putuint8(out, rgn->roisty) ||
+	  jpc_putuint8(out, rgn->roishift)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_rgn_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_rgn_t *rgn = &ms->parms.rgn;
+	fprintf(out, "compno = %d; roisty = %d; roishift = %d\n",
+	  rgn->compno, rgn->roisty, rgn->roishift);
+	return 0;
+}
+
+/******************************************************************************\
+* QCD marker segment operations.
+\******************************************************************************/
+
+static void jpc_qcd_destroyparms(jpc_ms_t *ms)
+{
+	jpc_qcd_t *qcd = &ms->parms.qcd;
+	jpc_qcx_destroycompparms(&qcd->compparms);
+}
+
+static int jpc_qcd_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_qcxcp_t *compparms = &ms->parms.qcd.compparms;
+	return jpc_qcx_getcompparms(compparms, cstate, in, ms->len);
+}
+
+static int jpc_qcd_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_qcxcp_t *compparms = &ms->parms.qcd.compparms;
+	return jpc_qcx_putcompparms(compparms, cstate, out);
+}
+
+static int jpc_qcd_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_qcd_t *qcd = &ms->parms.qcd;
+	int i;
+	fprintf(out, "qntsty = %d; numguard = %d; numstepsizes = %d\n",
+	  (int) qcd->compparms.qntsty, qcd->compparms.numguard, qcd->compparms.numstepsizes);
+	for (i = 0; i < qcd->compparms.numstepsizes; ++i) {
+		fprintf(out, "expn[%d] = 0x%04x; mant[%d] = 0x%04x;\n",
+		  i, (unsigned) JPC_QCX_GETEXPN(qcd->compparms.stepsizes[i]),
+		  i, (unsigned) JPC_QCX_GETMANT(qcd->compparms.stepsizes[i]));
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* QCC marker segment operations.
+\******************************************************************************/
+
+static void jpc_qcc_destroyparms(jpc_ms_t *ms)
+{
+	jpc_qcc_t *qcc = &ms->parms.qcc;
+	jpc_qcx_destroycompparms(&qcc->compparms);
+}
+
+static int jpc_qcc_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_qcc_t *qcc = &ms->parms.qcc;
+	uint_fast8_t tmp;
+	int len;
+	len = ms->len;
+	if (cstate->numcomps <= 256) {
+		jpc_getuint8(in, &tmp);
+		qcc->compno = tmp;
+		--len;
+	} else {
+		jpc_getuint16(in, &qcc->compno);
+		len -= 2;
+	}
+	if (jpc_qcx_getcompparms(&qcc->compparms, cstate, in, len)) {
+		return -1;
+	}
+	if (jas_stream_eof(in)) {
+		jpc_qcc_destroyparms(ms);
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_qcc_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_qcc_t *qcc = &ms->parms.qcc;
+	if (cstate->numcomps <= 256) {
+		jpc_putuint8(out, qcc->compno);
+	} else {
+		jpc_putuint16(out, qcc->compno);
+	}
+	if (jpc_qcx_putcompparms(&qcc->compparms, cstate, out)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_qcc_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_qcc_t *qcc = &ms->parms.qcc;
+	int i;
+	fprintf(out, "compno = %d; qntsty = %d; numguard = %d; "
+	  "numstepsizes = %d\n", qcc->compno, qcc->compparms.qntsty, qcc->compparms.numguard,
+	  qcc->compparms.numstepsizes);
+	for (i = 0; i < qcc->compparms.numstepsizes; ++i) {
+		fprintf(out, "expn[%d] = 0x%04x; mant[%d] = 0x%04x;\n",
+		  i, (unsigned) JPC_QCX_GETEXPN(qcc->compparms.stepsizes[i]),
+		  i, (unsigned) JPC_QCX_GETMANT(qcc->compparms.stepsizes[i]));
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* QCD/QCC marker segment helper functions.
+\******************************************************************************/
+
+static void jpc_qcx_destroycompparms(jpc_qcxcp_t *compparms)
+{
+	if (compparms->stepsizes) {
+		jas_free(compparms->stepsizes);
+	}
+}
+
+static int jpc_qcx_getcompparms(jpc_qcxcp_t *compparms, jpc_cstate_t *cstate,
+  jas_stream_t *in, uint_fast16_t len)
+{
+	uint_fast8_t tmp;
+	int n;
+	int i;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	n = 0;
+	jpc_getuint8(in, &tmp);
+	++n;
+	compparms->qntsty = tmp & 0x1f;
+	compparms->numguard = (tmp >> 5) & 7;
+	switch (compparms->qntsty) {
+	case JPC_QCX_SIQNT:
+		compparms->numstepsizes = 1;
+		break;
+	case JPC_QCX_NOQNT:
+		compparms->numstepsizes = (len - n);
+		break;
+	case JPC_QCX_SEQNT:
+		/* XXX - this is a hack */
+		compparms->numstepsizes = (len - n) / 2;
+		break;
+	}
+	if (compparms->numstepsizes > 0) {
+		compparms->stepsizes = jas_malloc(compparms->numstepsizes *
+		  sizeof(uint_fast16_t));
+		assert(compparms->stepsizes);
+		for (i = 0; i < compparms->numstepsizes; ++i) {
+			if (compparms->qntsty == JPC_QCX_NOQNT) {
+				jpc_getuint8(in, &tmp);
+				compparms->stepsizes[i] = JPC_QCX_EXPN(tmp >> 3);
+			} else {
+				jpc_getuint16(in, &compparms->stepsizes[i]);
+			}
+		}
+	} else {
+		compparms->stepsizes = 0;
+	}
+	if (jas_stream_error(in) || jas_stream_eof(in)) {
+		jpc_qcx_destroycompparms(compparms);
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_qcx_putcompparms(jpc_qcxcp_t *compparms, jpc_cstate_t *cstate,
+  jas_stream_t *out)
+{
+	int i;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	jpc_putuint8(out, ((compparms->numguard & 7) << 5) | compparms->qntsty);
+	for (i = 0; i < compparms->numstepsizes; ++i) {
+		if (compparms->qntsty == JPC_QCX_NOQNT) {
+			jpc_putuint8(out, JPC_QCX_GETEXPN(
+			  compparms->stepsizes[i]) << 3);
+		} else {
+			jpc_putuint16(out, compparms->stepsizes[i]);
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* SOP marker segment operations.
+\******************************************************************************/
+
+static int jpc_sop_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_sop_t *sop = &ms->parms.sop;
+
+	/* Eliminate compiler warning about unused variable. */
+	cstate = 0;
+
+	if (jpc_getuint16(in, &sop->seqno)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_sop_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_sop_t *sop = &ms->parms.sop;
+
+	/* Eliminate compiler warning about unused variable. */
+	cstate = 0;
+
+	if (jpc_putuint16(out, sop->seqno)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_sop_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_sop_t *sop = &ms->parms.sop;
+	fprintf(out, "seqno = %d;\n", sop->seqno);
+	return 0;
+}
+
+/******************************************************************************\
+* PPM marker segment operations.
+\******************************************************************************/
+
+static void jpc_ppm_destroyparms(jpc_ms_t *ms)
+{
+	jpc_ppm_t *ppm = &ms->parms.ppm;
+	if (ppm->data) {
+		jas_free(ppm->data);
+	}
+}
+
+static int jpc_ppm_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_ppm_t *ppm = &ms->parms.ppm;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	ppm->data = 0;
+
+	if (ms->len < 1) {
+		goto error;
+	}
+	if (jpc_getuint8(in, &ppm->ind)) {
+		goto error;
+	}
+
+	ppm->len = ms->len - 1;
+	if (ppm->len > 0) {
+		if (!(ppm->data = jas_malloc(ppm->len * sizeof(unsigned char)))) {
+			goto error;
+		}
+		if (JAS_CAST(unsigned int, jas_stream_read(in, ppm->data, ppm->len)) != ppm->len) { /* IMLIB - changed uint to unsigned int */
+			goto error;
+		}
+	} else {
+		ppm->data = 0;
+	}
+	return 0;
+
+error:
+	jpc_ppm_destroyparms(ms);
+	return -1;
+}
+
+static int jpc_ppm_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_ppm_t *ppm = &ms->parms.ppm;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (JAS_CAST(unsigned int, jas_stream_write(out, (char *) ppm->data, ppm->len)) != ppm->len) { /* IMLIB - changed uint to unsigned int */
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_ppm_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_ppm_t *ppm = &ms->parms.ppm;
+	fprintf(out, "ind=%d; len = %d;\n", ppm->ind, ppm->len);
+	if (ppm->len > 0) {
+		fprintf(out, "data =\n");
+		jas_memdump(out, ppm->data, ppm->len);
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* PPT marker segment operations.
+\******************************************************************************/
+
+static void jpc_ppt_destroyparms(jpc_ms_t *ms)
+{
+	jpc_ppt_t *ppt = &ms->parms.ppt;
+	if (ppt->data) {
+		jas_free(ppt->data);
+	}
+}
+
+static int jpc_ppt_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_ppt_t *ppt = &ms->parms.ppt;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	ppt->data = 0;
+
+	if (ms->len < 1) {
+		goto error;
+	}
+	if (jpc_getuint8(in, &ppt->ind)) {
+		goto error;
+	}
+	ppt->len = ms->len - 1;
+	if (ppt->len > 0) {
+		if (!(ppt->data = jas_malloc(ppt->len * sizeof(unsigned char)))) {
+			goto error;
+		}
+		if (jas_stream_read(in, (char *) ppt->data, ppt->len) != JAS_CAST(int, ppt->len)) {
+			goto error;
+		}
+	} else {
+		ppt->data = 0;
+	}
+	return 0;
+
+error:
+	jpc_ppt_destroyparms(ms);
+	return -1;
+}
+
+static int jpc_ppt_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_ppt_t *ppt = &ms->parms.ppt;
+
+	/* Eliminate compiler warning about unused variable. */
+	cstate = 0;
+
+	if (jpc_putuint8(out, ppt->ind)) {
+		return -1;
+	}
+	if (jas_stream_write(out, (char *) ppt->data, ppt->len) != JAS_CAST(int, ppt->len)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_ppt_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_ppt_t *ppt = &ms->parms.ppt;
+	fprintf(out, "ind=%d; len = %d;\n", ppt->ind, ppt->len);
+	if (ppt->len > 0) {
+		fprintf(out, "data =\n");
+		jas_memdump(out, ppt->data, ppt->len);
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* POC marker segment operations.
+\******************************************************************************/
+
+static void jpc_poc_destroyparms(jpc_ms_t *ms)
+{
+	jpc_poc_t *poc = &ms->parms.poc;
+	if (poc->pchgs) {
+		jas_free(poc->pchgs);
+	}
+}
+
+static int jpc_poc_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_poc_t *poc = &ms->parms.poc;
+	jpc_pocpchg_t *pchg;
+	int pchgno;
+	uint_fast8_t tmp;
+	poc->numpchgs = (cstate->numcomps > 256) ? (ms->len / 9) :
+	  (ms->len / 7);
+	if (!(poc->pchgs = jas_malloc(poc->numpchgs * sizeof(jpc_pocpchg_t)))) {
+		goto error;
+	}
+	for (pchgno = 0, pchg = poc->pchgs; pchgno < poc->numpchgs; ++pchgno,
+	  ++pchg) {
+		if (jpc_getuint8(in, &pchg->rlvlnostart)) {
+			goto error;
+		}
+		if (cstate->numcomps > 256) {
+			if (jpc_getuint16(in, &pchg->compnostart)) {
+				goto error;
+			}
+		} else {
+			if (jpc_getuint8(in, &tmp)) {
+				goto error;
+			};
+			pchg->compnostart = tmp;
+		}
+		if (jpc_getuint16(in, &pchg->lyrnoend) ||
+		  jpc_getuint8(in, &pchg->rlvlnoend)) {
+			goto error;
+		}
+		if (cstate->numcomps > 256) {
+			if (jpc_getuint16(in, &pchg->compnoend)) {
+				goto error;
+			}
+		} else {
+			if (jpc_getuint8(in, &tmp)) {
+				goto error;
+			}
+			pchg->compnoend = tmp;
+		}
+		if (jpc_getuint8(in, &pchg->prgord)) {
+			goto error;
+		}
+		if (pchg->rlvlnostart > pchg->rlvlnoend ||
+		  pchg->compnostart > pchg->compnoend) {
+			goto error;
+		}
+	}
+	return 0;
+
+error:
+	jpc_poc_destroyparms(ms);
+	return -1;
+}
+
+static int jpc_poc_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_poc_t *poc = &ms->parms.poc;
+	jpc_pocpchg_t *pchg;
+	int pchgno;
+	for (pchgno = 0, pchg = poc->pchgs; pchgno < poc->numpchgs; ++pchgno,
+	  ++pchg) {
+		if (jpc_putuint8(out, pchg->rlvlnostart) ||
+		  ((cstate->numcomps > 256) ?
+		  jpc_putuint16(out, pchg->compnostart) :
+		  jpc_putuint8(out, pchg->compnostart)) ||
+		  jpc_putuint16(out, pchg->lyrnoend) ||
+		  jpc_putuint8(out, pchg->rlvlnoend) ||
+		  ((cstate->numcomps > 256) ?
+		  jpc_putuint16(out, pchg->compnoend) :
+		  jpc_putuint8(out, pchg->compnoend)) ||
+		  jpc_putuint8(out, pchg->prgord)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jpc_poc_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_poc_t *poc = &ms->parms.poc;
+	jpc_pocpchg_t *pchg;
+	int pchgno;
+	for (pchgno = 0, pchg = poc->pchgs; pchgno < poc->numpchgs;
+	  ++pchgno, ++pchg) {
+		fprintf(out, "po[%d] = %d; ", pchgno, pchg->prgord);
+		fprintf(out, "cs[%d] = %d; ce[%d] = %d; ",
+		  pchgno, pchg->compnostart, pchgno, pchg->compnoend);
+		fprintf(out, "rs[%d] = %d; re[%d] = %d; ",
+		  pchgno, pchg->rlvlnostart, pchgno, pchg->rlvlnoend);
+		fprintf(out, "le[%d] = %d\n", pchgno, pchg->lyrnoend);
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* CRG marker segment operations.
+\******************************************************************************/
+
+static void jpc_crg_destroyparms(jpc_ms_t *ms)
+{
+	jpc_crg_t *crg = &ms->parms.crg;
+	if (crg->comps) {
+		jas_free(crg->comps);
+	}
+}
+
+static int jpc_crg_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_crg_t *crg = &ms->parms.crg;
+	jpc_crgcomp_t *comp;
+	uint_fast16_t compno;
+	crg->numcomps = cstate->numcomps;
+	if (!(crg->comps = jas_malloc(cstate->numcomps * sizeof(uint_fast16_t)))) {
+		return -1;
+	}
+	for (compno = 0, comp = crg->comps; compno < cstate->numcomps;
+	  ++compno, ++comp) {
+		if (jpc_getuint16(in, &comp->hoff) ||
+		  jpc_getuint16(in, &comp->voff)) {
+			jpc_crg_destroyparms(ms);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jpc_crg_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_crg_t *crg = &ms->parms.crg;
+	int compno;
+	jpc_crgcomp_t *comp;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	for (compno = 0, comp = crg->comps; compno < crg->numcomps; ++compno,
+	  ++comp) {
+		if (jpc_putuint16(out, comp->hoff) ||
+		  jpc_putuint16(out, comp->voff)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jpc_crg_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_crg_t *crg = &ms->parms.crg;
+	int compno;
+	jpc_crgcomp_t *comp;
+	for (compno = 0, comp = crg->comps; compno < crg->numcomps; ++compno,
+	  ++comp) {
+		fprintf(out, "hoff[%d] = %d; voff[%d] = %d\n", compno,
+		  comp->hoff, compno, comp->voff);
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Operations for COM marker segment.
+\******************************************************************************/
+
+static void jpc_com_destroyparms(jpc_ms_t *ms)
+{
+	jpc_com_t *com = &ms->parms.com;
+	if (com->data) {
+		jas_free(com->data);
+	}
+}
+
+static int jpc_com_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_com_t *com = &ms->parms.com;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (jpc_getuint16(in, &com->regid)) {
+		return -1;
+	}
+	com->len = ms->len - 2;
+	if (com->len > 0) {
+		if (!(com->data = jas_malloc(com->len))) {
+			return -1;
+		}
+		if (jas_stream_read(in, com->data, com->len) != JAS_CAST(int, com->len)) {
+			return -1;
+		}
+	} else {
+		com->data = 0;
+	}
+	return 0;
+}
+
+static int jpc_com_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	jpc_com_t *com = &ms->parms.com;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (jpc_putuint16(out, com->regid)) {
+		return -1;
+	}
+	if (jas_stream_write(out, com->data, com->len) != JAS_CAST(int, com->len)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_com_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	jpc_com_t *com = &ms->parms.com;
+	unsigned int i;
+	int printable;
+	fprintf(out, "regid = %d;\n", com->regid);
+	printable = 1;
+	for (i = 0; i < com->len; ++i) {
+		if (!isprint(com->data[i])) {
+			printable = 0;
+			break;
+		}
+	}
+	if (printable) {
+		fprintf(out, "data = ");
+		fwrite(com->data, sizeof(char), com->len, out);
+		fprintf(out, "\n");
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Operations for unknown types of marker segments.
+\******************************************************************************/
+
+static void jpc_unk_destroyparms(jpc_ms_t *ms)
+{
+	jpc_unk_t *unk = &ms->parms.unk;
+	if (unk->data) {
+		jas_free(unk->data);
+	}
+}
+
+static int jpc_unk_getparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in)
+{
+	jpc_unk_t *unk = &ms->parms.unk;
+
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+
+	if (ms->len > 0) {
+		if (!(unk->data = jas_malloc(ms->len * sizeof(unsigned char)))) {
+			return -1;
+		}
+		if (jas_stream_read(in, (char *) unk->data, ms->len) != JAS_CAST(int, ms->len)) {
+			jas_free(unk->data);
+			return -1;
+		}
+		unk->len = ms->len;
+	} else {
+		unk->data = 0;
+		unk->len = 0;
+	}
+	return 0;
+}
+
+static int jpc_unk_putparms(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out)
+{
+	/* Eliminate compiler warning about unused variables. */
+	cstate = 0;
+	ms = 0;
+	out = 0;
+
+	/* If this function is called, we are trying to write an unsupported
+	  type of marker segment.  Return with an error indication.  */
+	return -1;
+}
+
+static int jpc_unk_dumpparms(jpc_ms_t *ms, FILE *out)
+{
+	unsigned int i;
+	jpc_unk_t *unk = &ms->parms.unk;
+	for (i = 0; i < unk->len; ++i) {
+		fprintf(out, "%02x ", unk->data[i]);
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Primitive I/O operations.
+\******************************************************************************/
+
+int jpc_getuint8(jas_stream_t *in, uint_fast8_t *val)
+{
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	if (val) {
+		*val = c;
+	}
+	return 0;
+}
+
+int jpc_putuint8(jas_stream_t *out, uint_fast8_t val)
+{
+	if (jas_stream_putc(out, val & 0xff) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+int jpc_getuint16(jas_stream_t *in, uint_fast16_t *val)
+{
+	uint_fast16_t v;
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if (val) {
+		*val = v;
+	}
+	return 0;
+}
+
+int jpc_putuint16(jas_stream_t *out, uint_fast16_t val)
+{
+	if (jas_stream_putc(out, (val >> 8) & 0xff) == EOF ||
+	  jas_stream_putc(out, val & 0xff) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+int jpc_getuint32(jas_stream_t *in, uint_fast32_t *val)
+{
+	uint_fast32_t v;
+	int c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if ((c = jas_stream_getc(in)) == EOF) {
+		return -1;
+	}
+	v = (v << 8) | c;
+	if (val) {
+		*val = v;
+	}
+	return 0;
+}
+
+int jpc_putuint32(jas_stream_t *out, uint_fast32_t val)
+{
+	if (jas_stream_putc(out, (val >> 24) & 0xff) == EOF ||
+	  jas_stream_putc(out, (val >> 16) & 0xff) == EOF ||
+	  jas_stream_putc(out, (val >> 8) & 0xff) == EOF ||
+	  jas_stream_putc(out, val & 0xff) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Miscellany
+\******************************************************************************/
+
+static jpc_mstabent_t *jpc_mstab_lookup(int id)
+{
+	jpc_mstabent_t *mstabent;
+	for (mstabent = jpc_mstab;; ++mstabent) {
+		if (mstabent->id == id || mstabent->id < 0) {
+			return mstabent;
+		}
+	}
+	assert(0);
+	return 0;
+}
+
+int jpc_validate(jas_stream_t *in)
+{
+	int n;
+	int i;
+	unsigned char buf[2];
+
+	assert(JAS_STREAM_MAXPUTBACK >= 2);
+
+	if ((n = jas_stream_read(in, (char *) buf, 2)) < 0) {
+		return -1;
+	}
+	for (i = n - 1; i >= 0; --i) {
+		if (jas_stream_ungetc(in, buf[i]) == EOF) {
+			return -1;
+		}
+	}
+	if (n < 2) {
+		return -1;
+	}
+	if (buf[0] == (JPC_MS_SOC >> 8) && buf[1] == (JPC_MS_SOC & 0xff)) {
+		return 0;
+	}
+	return -1;
+}
+
+int jpc_getdata(jas_stream_t *in, jas_stream_t *out, long len)
+{
+	return jas_stream_copy(out, in, len);
+}
+
+int jpc_putdata(jas_stream_t *out, jas_stream_t *in, long len)
+{
+	return jas_stream_copy(out, in, len);
+}
diff --git a/src/libjasper/jpc/jpc_cs.h b/src/libjasper/jpc/jpc_cs.h
new file mode 100644
index 0000000..49140d4
--- /dev/null
+++ b/src/libjasper/jpc/jpc_cs.h
@@ -0,0 +1,763 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JPEG-2000 Code Stream Library
+ *
+ * $Id: jpc_cs.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_CS_H
+#define JPC_CS_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_image.h"
+#include "jasper/jas_stream.h"
+
+#include "jpc_cod.h"
+
+/******************************************************************************\
+* Constants and Types.
+\******************************************************************************/
+
+/* The maximum number of resolution levels. */
+#define	JPC_MAXRLVLS	33
+
+/* The maximum number of bands. */
+#define	JPC_MAXBANDS	(3 * JPC_MAXRLVLS + 1)
+
+/* The maximum number of layers. */
+#define JPC_MAXLYRS	16384
+
+/**************************************\
+* Code stream.
+\**************************************/
+
+/*
+ * Code stream states.
+ */
+
+/* Initial. */
+#define	JPC_CS_INIT	0
+/* Main header. */
+#define	JPC_CS_MHDR	1
+/* Tile-part header. */
+#define	JPC_CS_THDR	2
+/* Main trailer. */
+#define	JPC_CS_MTLR	3
+/* Tile-part data. */
+#define	JPC_CS_TDATA	4
+
+/*
+ * Unfortunately, the code stream syntax was not designed in such a way that
+ * any given marker segment can be correctly decoded without additional state
+ * derived from previously decoded marker segments.
+ * For example, a RGN/COC/QCC marker segment cannot be decoded unless the
+ * number of components is known.
+ */
+
+/*
+ * Code stream state information.
+ */
+
+typedef struct {
+
+	/* The number of components. */
+	uint_fast16_t numcomps;
+
+} jpc_cstate_t;
+
+/**************************************\
+* SOT marker segment parameters.
+\**************************************/
+
+typedef struct {
+
+	/* The tile number. */
+	uint_fast16_t tileno;
+
+	/* The combined length of the marker segment and its auxilary data
+	  (i.e., packet data). */
+	uint_fast32_t len;
+
+	/* The tile-part instance. */
+	uint_fast8_t partno;
+
+	/* The number of tile-parts. */
+	uint_fast8_t numparts;
+
+} jpc_sot_t;
+
+/**************************************\
+* SIZ marker segment parameters.
+\**************************************/
+
+/* Per component information. */
+
+typedef struct {
+
+	/* The precision of the samples. */
+	uint_fast8_t prec;
+
+	/* The signedness of the samples. */
+	uint_fast8_t sgnd;
+
+	/* The horizontal separation of samples with respect to the reference
+	  grid. */
+	uint_fast8_t hsamp;
+
+	/* The vertical separation of samples with respect to the reference
+	  grid. */
+	uint_fast8_t vsamp;
+
+} jpc_sizcomp_t;
+
+/* SIZ marker segment parameters. */
+
+typedef struct {
+
+	/* The code stream capabilities. */
+	uint_fast16_t caps;
+
+	/* The width of the image in units of the reference grid. */
+	uint_fast32_t width;
+
+	/* The height of the image in units of the reference grid. */
+	uint_fast32_t height;
+
+	/* The horizontal offset from the origin of the reference grid to the
+	  left side of the image area. */
+	uint_fast32_t xoff;
+
+	/* The vertical offset from the origin of the reference grid to the
+	  top side of the image area. */
+	uint_fast32_t yoff;
+
+	/* The nominal width of a tile in units of the reference grid. */
+	uint_fast32_t tilewidth;
+
+	/* The nominal height of a tile in units of the reference grid. */
+	uint_fast32_t tileheight;
+
+	/* The horizontal offset from the origin of the reference grid to the
+	  left side of the first tile. */
+	uint_fast32_t tilexoff;
+
+	/* The vertical offset from the origin of the reference grid to the
+	  top side of the first tile. */
+	uint_fast32_t tileyoff;
+
+	/* The number of components. */
+	uint_fast16_t numcomps;
+
+	/* The per-component information. */
+	jpc_sizcomp_t *comps;
+
+} jpc_siz_t;
+
+/**************************************\
+* COD marker segment parameters.
+\**************************************/
+
+/*
+ * Coding style constants.
+ */
+
+/* Precincts may be used. */
+#define	JPC_COX_PRT	0x01
+/* SOP marker segments may be used. */
+#define	JPC_COD_SOP	0x02
+/* EPH marker segments may be used. */
+#define	JPC_COD_EPH	0x04
+
+/*
+ * Progression order constants.
+ */
+
+/* Layer-resolution-component-precinct progressive
+  (i.e., progressive by fidelity). */
+#define	JPC_COD_LRCPPRG	0
+/* Resolution-layer-component-precinct progressive
+  (i.e., progressive by resolution). */
+#define	JPC_COD_RLCPPRG	1
+/* Resolution-precinct-component-layer progressive. */
+#define	JPC_COD_RPCLPRG	2
+/* Precinct-component-resolution-layer progressive. */
+#define	JPC_COD_PCRLPRG	3
+/* Component-position-resolution-layer progressive. */
+#define	JPC_COD_CPRLPRG	4
+
+/*
+ * Code block style constants.
+ */
+
+#define	JPC_COX_LAZY	0x01 /* Selective arithmetic coding bypass. */
+#define	JPC_COX_RESET	0x02 /* Reset context probabilities. */
+#define	JPC_COX_TERMALL	0x04 /* Terminate all coding passes. */
+#define	JPC_COX_VSC		0x08 /* Vertical stripe causal context formation. */
+#define	JPC_COX_PTERM	0x10 /* Predictable termination. */
+#define	JPC_COX_SEGSYM	0x20 /* Use segmentation symbols. */
+
+/* Transform constants. */
+#define	JPC_COX_INS	0x00 /* Irreversible 9/7. */
+#define	JPC_COX_RFT	0x01 /* Reversible 5/3. */
+
+/* Multicomponent transform constants. */
+#define	JPC_COD_NOMCT	0x00 /* No multicomponent transform. */
+#define	JPC_COD_MCT		0x01 /* Multicomponent transform. */
+
+/* Get the code block size value from the code block size exponent. */
+#define	JPC_COX_CBLKSIZEEXPN(x)		((x) - 2)
+/* Get the code block size exponent from the code block size value. */
+#define	JPC_COX_GETCBLKSIZEEXPN(x)	((x) + 2)
+
+/* Per resolution-level information. */
+
+typedef struct {
+
+	/* The packet partition width. */
+	uint_fast8_t parwidthval;
+
+	/* The packet partition height. */
+	uint_fast8_t parheightval;
+
+} jpc_coxrlvl_t;
+
+/* Per component information. */
+
+typedef struct {
+
+	/* The coding style. */
+	uint_fast8_t csty;
+
+	/* The number of decomposition levels. */
+	uint_fast8_t numdlvls;
+
+	/* The nominal code block width specifier. */
+	uint_fast8_t cblkwidthval;
+
+	/* The nominal code block height specifier. */
+	uint_fast8_t cblkheightval;
+
+	/* The style of coding passes. */
+	uint_fast8_t cblksty;
+
+	/* The QMFB employed. */
+	uint_fast8_t qmfbid;
+
+	/* The number of resolution levels. */
+	int numrlvls;
+
+	/* The per-resolution-level information. */
+	jpc_coxrlvl_t rlvls[JPC_MAXRLVLS];
+
+} jpc_coxcp_t;
+
+/* COD marker segment parameters. */
+
+typedef struct {
+
+	/* The general coding style. */
+	uint_fast8_t csty;
+
+	/* The progression order. */
+	uint_fast8_t prg;
+
+	/* The number of layers. */
+	uint_fast16_t numlyrs;
+
+	/* The multicomponent transform. */
+	uint_fast8_t mctrans;
+
+	/* Component-related parameters. */
+	jpc_coxcp_t compparms;
+
+} jpc_cod_t;
+
+/* COC marker segment parameters. */
+
+typedef struct {
+
+	/* The component number. */
+	uint_fast16_t compno;
+
+	/* Component-related parameters. */
+	jpc_coxcp_t compparms;
+
+} jpc_coc_t;
+
+/**************************************\
+* RGN marker segment parameters.
+\**************************************/
+
+/* The maxshift ROI style. */
+#define	JPC_RGN_MAXSHIFT	0x00
+
+typedef struct {
+
+	/* The component to which the marker applies. */
+	uint_fast16_t compno;
+
+	/* The ROI style. */
+	uint_fast8_t roisty;
+
+	/* The ROI shift value. */
+	uint_fast8_t roishift;
+
+} jpc_rgn_t;
+
+/**************************************\
+* QCD/QCC marker segment parameters.
+\**************************************/
+
+/*
+ * Quantization style constants.
+ */
+
+#define	JPC_QCX_NOQNT	0 /* No quantization. */
+#define	JPC_QCX_SIQNT	1 /* Scalar quantization, implicit. */
+#define	JPC_QCX_SEQNT	2 /* Scalar quantization, explicit. */
+
+/*
+ * Stepsize manipulation macros.
+ */
+
+#define	JPC_QCX_GETEXPN(x)	((x) >> 11)
+#define	JPC_QCX_GETMANT(x)	((x) & 0x07ff)
+#define	JPC_QCX_EXPN(x)		(assert(!((x) & (~0x1f))), (((x) & 0x1f) << 11))
+#define	JPC_QCX_MANT(x)		(assert(!((x) & (~0x7ff))), ((x) & 0x7ff))
+
+/* Per component information. */
+
+typedef struct {
+
+	/* The quantization style. */
+	uint_fast8_t qntsty;
+
+	/* The number of step sizes. */
+	int numstepsizes;
+
+	/* The step sizes. */
+	uint_fast16_t *stepsizes;
+
+	/* The number of guard bits. */
+	uint_fast8_t numguard;
+
+} jpc_qcxcp_t;
+
+/* QCC marker segment parameters. */
+
+typedef struct {
+
+	/* The component associated with this marker segment. */
+	uint_fast16_t compno;
+
+	/* The parameters. */
+	jpc_qcxcp_t compparms;
+
+} jpc_qcc_t;
+
+/* QCD marker segment parameters. */
+
+typedef struct {
+
+	/* The parameters. */
+	jpc_qcxcp_t compparms;
+
+} jpc_qcd_t;
+
+/**************************************\
+* POD marker segment parameters.
+\**************************************/
+
+typedef struct {
+
+	/* The progression order. */
+	uint_fast8_t prgord;
+
+	/* The lower bound (inclusive) on the resolution level for the
+	  progression order volume. */
+	uint_fast8_t rlvlnostart;
+
+	/* The upper bound (exclusive) on the resolution level for the
+	  progression order volume. */
+	uint_fast8_t rlvlnoend;
+
+	/* The lower bound (inclusive) on the component for the progression
+	  order volume. */
+	uint_fast16_t compnostart;
+
+	/* The upper bound (exclusive) on the component for the progression
+	  order volume. */
+	uint_fast16_t compnoend;
+
+	/* The upper bound (exclusive) on the layer for the progression
+	  order volume. */
+	uint_fast16_t lyrnoend;
+
+} jpc_pocpchg_t;
+
+/* An alias for the above type. */
+typedef jpc_pocpchg_t jpc_pchg_t;
+
+/* POC marker segment parameters. */
+
+typedef struct {
+
+	/* The number of progression order changes. */
+	int numpchgs;
+
+	/* The per-progression-order-change information. */
+	jpc_pocpchg_t *pchgs;
+
+} jpc_poc_t;
+
+/**************************************\
+* PPM/PPT marker segment parameters.
+\**************************************/
+
+/* PPM marker segment parameters. */
+
+typedef struct {
+
+	/* The index. */
+	uint_fast8_t ind;
+
+	/* The length. */
+	uint_fast16_t len;
+
+	/* The data. */
+	uchar *data;
+
+} jpc_ppm_t;
+
+/* PPT marker segment parameters. */
+
+typedef struct {
+
+	/* The index. */
+	uint_fast8_t ind;
+
+	/* The length. */
+	uint_fast32_t len;
+
+	/* The data. */
+	unsigned char *data;
+
+} jpc_ppt_t;
+
+/**************************************\
+* COM marker segment parameters.
+\**************************************/
+
+/*
+ * Registration IDs.
+ */
+
+#define	JPC_COM_BIN		0x00
+#define	JPC_COM_LATIN	0x01
+
+typedef struct {
+
+	/* The registration ID. */
+	uint_fast16_t regid;
+
+	/* The length of the data in bytes. */
+	uint_fast16_t len;
+
+	/* The data. */
+	uchar *data;
+
+} jpc_com_t;
+
+/**************************************\
+* SOP marker segment parameters.
+\**************************************/
+
+typedef struct {
+
+	/* The sequence number. */
+	uint_fast16_t seqno;
+
+} jpc_sop_t;
+
+/**************************************\
+* CRG marker segment parameters.
+\**************************************/
+
+/* Per component information. */
+
+typedef struct {
+
+	/* The horizontal offset. */
+	uint_fast16_t hoff;
+
+	/* The vertical offset. */
+	uint_fast16_t voff;
+
+} jpc_crgcomp_t;
+
+typedef struct {
+
+	/* The number of components. */
+	int numcomps;
+
+	/* Per component information. */
+	jpc_crgcomp_t *comps;
+
+} jpc_crg_t;
+
+/**************************************\
+* Marker segment parameters for unknown marker type.
+\**************************************/
+
+typedef struct {
+
+	/* The data. */
+	uchar *data;
+
+	/* The length. */
+	uint_fast16_t len;
+
+} jpc_unk_t;
+
+/**************************************\
+* Generic marker segment parameters.
+\**************************************/
+
+typedef union {
+	int soc;	/* unused */
+	jpc_sot_t sot;
+	int sod;	/* unused */
+	int eoc;	/* unused */
+	jpc_siz_t siz;
+	jpc_cod_t cod;
+	jpc_coc_t coc;
+	jpc_rgn_t rgn;
+	jpc_qcd_t qcd;
+	jpc_qcc_t qcc;
+	jpc_poc_t poc;
+	/* jpc_plm_t plm; */
+	/* jpc_plt_t plt; */
+	jpc_ppm_t ppm;
+	jpc_ppt_t ppt;
+	jpc_sop_t sop;
+	int eph;	/* unused */
+	jpc_com_t com;
+	jpc_crg_t crg;
+	jpc_unk_t unk;
+} jpc_msparms_t;
+
+/**************************************\
+* Marker segment.
+\**************************************/
+
+/* Marker segment IDs. */
+
+/* The smallest valid marker value. */
+#define	JPC_MS_MIN	0xff00
+
+/* The largest valid marker value. */
+#define	JPC_MS_MAX	0xffff
+
+/* The minimum marker value that cannot occur within packet data. */
+#define	JPC_MS_INMIN	0xff80
+/* The maximum marker value that cannot occur within packet data. */
+#define	JPC_MS_INMAX	0xffff
+
+/* Delimiting marker segments. */
+#define	JPC_MS_SOC	0xff4f /* Start of code stream (SOC). */
+#define	JPC_MS_SOT	0xff90 /* Start of tile-part (SOT). */
+#define	JPC_MS_SOD	0xff93 /* Start of data (SOD). */
+#define	JPC_MS_EOC	0xffd9 /* End of code stream (EOC). */
+
+/* Fixed information marker segments. */
+#define	JPC_MS_SIZ	0xff51 /* Image and tile size (SIZ). */
+
+/* Functional marker segments. */
+#define	JPC_MS_COD	0xff52 /* Coding style default (COD). */
+#define JPC_MS_COC	0xff53 /* Coding style component (COC). */
+#define	JPC_MS_RGN	0xff5e /* Region of interest (RGN). */
+#define JPC_MS_QCD	0xff5c /* Quantization default (QCD). */
+#define JPC_MS_QCC	0xff5d /* Quantization component (QCC). */
+#define JPC_MS_POC	0xff5f /* Progression order default (POC). */
+
+/* Pointer marker segments. */
+#define	JPC_MS_TLM	0xff55 /* Tile-part lengths, main header (TLM). */
+#define	JPC_MS_PLM	0xff57 /* Packet length, main header (PLM). */
+#define	JPC_MS_PLT	0xff58 /* Packet length, tile-part header (PLT). */
+#define	JPC_MS_PPM	0xff60 /* Packed packet headers, main header (PPM). */
+#define	JPC_MS_PPT	0xff61 /* Packet packet headers, tile-part header (PPT). */
+
+/* In bit stream marker segments. */
+#define	JPC_MS_SOP	0xff91	/* Start of packet (SOP). */
+#define	JPC_MS_EPH	0xff92	/* End of packet header (EPH). */
+
+/* Informational marker segments. */
+#define	JPC_MS_CRG	0xff63 /* Component registration (CRG). */
+#define JPC_MS_COM	0xff64 /* Comment (COM). */
+
+/* Forward declaration. */
+struct jpc_msops_s;
+
+/* Generic marker segment class. */
+
+typedef struct {
+
+	/* The type of marker segment. */
+	uint_fast16_t id;
+
+	/* The length of the marker segment. */
+	uint_fast16_t len;
+
+	/* The starting offset within the stream. */
+	uint_fast32_t off;
+
+	/* The parameters of the marker segment. */
+	jpc_msparms_t parms;
+
+	/* The marker segment operations. */
+	struct jpc_msops_s *ops;
+
+} jpc_ms_t;
+
+/* Marker segment operations (which depend on the marker segment type). */
+
+typedef struct jpc_msops_s {
+
+	/* Destroy the marker segment parameters. */
+	void (*destroyparms)(jpc_ms_t *ms);
+
+	/* Get the marker segment parameters from a stream. */
+	int (*getparms)(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *in);
+
+	/* Put the marker segment parameters to a stream. */
+	int (*putparms)(jpc_ms_t *ms, jpc_cstate_t *cstate, jas_stream_t *out);
+
+	/* Dump the marker segment parameters (for debugging). */
+	int (*dumpparms)(jpc_ms_t *ms, FILE *out);
+
+} jpc_msops_t;
+
+/******************************************************************************\
+* Macros/Functions.
+\******************************************************************************/
+
+/* Create a code-stream state object. */
+jpc_cstate_t *jpc_cstate_create(void);
+
+/* Destroy a code-stream state object. */
+void jpc_cstate_destroy(jpc_cstate_t *cstate);
+
+/* Create a marker segment. */
+jpc_ms_t *jpc_ms_create(int type);
+
+/* Destroy a marker segment. */
+void jpc_ms_destroy(jpc_ms_t *ms);
+
+/* Does a marker segment have parameters? */
+#define	JPC_MS_HASPARMS(x) \
+	(!((x) == JPC_MS_SOC || (x) == JPC_MS_SOD || (x) == JPC_MS_EOC || \
+	  (x) == JPC_MS_EPH || ((x) >= 0xff30 && (x) <= 0xff3f)))
+
+/* Get the marker segment type. */
+#define	jpc_ms_gettype(ms) \
+	((ms)->id)
+
+/* Read a marker segment from a stream. */
+jpc_ms_t *jpc_getms(jas_stream_t *in, jpc_cstate_t *cstate);
+
+/* Write a marker segment to a stream. */
+int jpc_putms(jas_stream_t *out, jpc_cstate_t *cstate, jpc_ms_t *ms);
+
+/* Copy code stream data from one stream to another. */
+int jpc_getdata(jas_stream_t *in, jas_stream_t *out, long n);
+
+/* Copy code stream data from one stream to another. */
+int jpc_putdata(jas_stream_t *out, jas_stream_t *in, long n);
+
+/* Dump a marker segment (for debugging). */
+void jpc_ms_dump(jpc_ms_t *ms, FILE *out);
+
+/* Read a 8-bit unsigned integer from a stream. */
+int jpc_getuint8(jas_stream_t *in, uint_fast8_t *val);
+
+/* Read a 16-bit unsigned integer from a stream. */
+int jpc_getuint16(jas_stream_t *in, uint_fast16_t *val);
+
+/* Read a 32-bit unsigned integer from a stream. */
+int jpc_getuint32(jas_stream_t *in, uint_fast32_t *val);
+
+/* Write a 8-bit unsigned integer to a stream. */
+int jpc_putuint8(jas_stream_t *out, uint_fast8_t val);
+
+/* Write a 16-bit unsigned integer to a stream. */
+int jpc_putuint16(jas_stream_t *out, uint_fast16_t val);
+
+/* Write a 32-bit unsigned integer to a stream. */
+int jpc_putuint32(jas_stream_t *out, uint_fast32_t val);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_dec.c b/src/libjasper/jpc/jpc_dec.c
new file mode 100644
index 0000000..f61c7e6
--- /dev/null
+++ b/src/libjasper/jpc/jpc_dec.c
@@ -0,0 +1,2348 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+
+ GeoJasper revision: Dima - multiple chanels and photoshop cs2 support
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_dec.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_tvp.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_fix.h"
+#include "jpc_dec.h"
+#include "jpc_cs.h"
+#include "jpc_mct.h"
+#include "jpc_t2dec.h"
+#include "jpc_t1dec.h"
+#include "jpc_math.h"
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+#define	JPC_MHSOC	0x0001
+  /* In the main header, expecting a SOC marker segment. */
+#define	JPC_MHSIZ	0x0002
+  /* In the main header, expecting a SIZ marker segment. */
+#define	JPC_MH		0x0004
+  /* In the main header, expecting "other" marker segments. */
+#define	JPC_TPHSOT	0x0008
+  /* In a tile-part header, expecting a SOT marker segment. */
+#define	JPC_TPH		0x0010
+  /* In a tile-part header, expecting "other" marker segments. */
+#define	JPC_MT		0x0020
+  /* In the main trailer. */
+
+typedef struct {
+
+	uint_fast16_t id;
+	/* The marker segment type. */
+
+	int validstates;
+	/* The states in which this type of marker segment can be
+	  validly encountered. */
+
+	int (*action)(jpc_dec_t *dec, jpc_ms_t *ms);
+	/* The action to take upon encountering this type of marker segment. */
+
+} jpc_dec_mstabent_t;
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+/* COD/COC parameters have been specified. */
+#define	JPC_CSET	0x0001
+/* QCD/QCC parameters have been specified. */
+#define	JPC_QSET	0x0002
+/* COD/COC parameters set from a COC marker segment. */
+#define	JPC_COC	0x0004
+/* QCD/QCC parameters set from a QCC marker segment. */
+#define	JPC_QCC	0x0008
+
+/******************************************************************************\
+* Local function prototypes.
+\******************************************************************************/
+
+static int jpc_dec_dump(jpc_dec_t *dec, FILE *out);
+
+jpc_ppxstab_t *jpc_ppxstab_create(void);
+void jpc_ppxstab_destroy(jpc_ppxstab_t *tab);
+int jpc_ppxstab_grow(jpc_ppxstab_t *tab, int maxents);
+int jpc_ppxstab_insert(jpc_ppxstab_t *tab, jpc_ppxstabent_t *ent);
+jpc_streamlist_t *jpc_ppmstabtostreams(jpc_ppxstab_t *tab);
+int jpc_pptstabwrite(jas_stream_t *out, jpc_ppxstab_t *tab);
+jpc_ppxstabent_t *jpc_ppxstabent_create(void);
+void jpc_ppxstabent_destroy(jpc_ppxstabent_t *ent);
+
+int jpc_streamlist_numstreams(jpc_streamlist_t *streamlist);
+jpc_streamlist_t *jpc_streamlist_create(void);
+int jpc_streamlist_insert(jpc_streamlist_t *streamlist, int streamno,
+  jas_stream_t *stream);
+jas_stream_t *jpc_streamlist_remove(jpc_streamlist_t *streamlist, int streamno);
+void jpc_streamlist_destroy(jpc_streamlist_t *streamlist);
+jas_stream_t *jpc_streamlist_get(jpc_streamlist_t *streamlist, int streamno);
+
+static void jpc_dec_cp_resetflags(jpc_dec_cp_t *cp);
+static jpc_dec_cp_t *jpc_dec_cp_create(uint_fast16_t numcomps);
+static int jpc_dec_cp_isvalid(jpc_dec_cp_t *cp);
+static jpc_dec_cp_t *jpc_dec_cp_copy(jpc_dec_cp_t *cp);
+static int jpc_dec_cp_setfromcod(jpc_dec_cp_t *cp, jpc_cod_t *cod);
+static int jpc_dec_cp_setfromcoc(jpc_dec_cp_t *cp, jpc_coc_t *coc);
+static int jpc_dec_cp_setfromcox(jpc_dec_cp_t *cp, jpc_dec_ccp_t *ccp,
+  jpc_coxcp_t *compparms, int flags);
+static int jpc_dec_cp_setfromqcd(jpc_dec_cp_t *cp, jpc_qcd_t *qcd);
+static int jpc_dec_cp_setfromqcc(jpc_dec_cp_t *cp, jpc_qcc_t *qcc);
+static int jpc_dec_cp_setfromqcx(jpc_dec_cp_t *cp, jpc_dec_ccp_t *ccp,
+  jpc_qcxcp_t *compparms, int flags);
+static int jpc_dec_cp_setfromrgn(jpc_dec_cp_t *cp, jpc_rgn_t *rgn);
+static int jpc_dec_cp_prepare(jpc_dec_cp_t *cp);
+static void jpc_dec_cp_destroy(jpc_dec_cp_t *cp);
+static int jpc_dec_cp_setfrompoc(jpc_dec_cp_t *cp, jpc_poc_t *poc, int reset);
+static int jpc_pi_addpchgfrompoc(jpc_pi_t *pi, jpc_poc_t *poc);
+
+static int jpc_dec_decode(jpc_dec_t *dec);
+static jpc_dec_t *jpc_dec_create(jpc_dec_importopts_t *impopts, jas_stream_t *in);
+static void jpc_dec_destroy(jpc_dec_t *dec);
+static void jpc_dequantize(jas_matrix_t *x, jpc_fix_t absstepsize);
+static void jpc_undo_roi(jas_matrix_t *x, int roishift, int bgshift, int numbps);
+static jpc_fix_t jpc_calcabsstepsize(int stepsize, int numbits);
+static int jpc_dec_tiledecode(jpc_dec_t *dec, jpc_dec_tile_t *tile);
+static int jpc_dec_tileinit(jpc_dec_t *dec, jpc_dec_tile_t *tile);
+static int jpc_dec_tilefini(jpc_dec_t *dec, jpc_dec_tile_t *tile);
+static int jpc_dec_process_soc(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_sot(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_sod(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_eoc(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_siz(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_cod(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_coc(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_rgn(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_qcd(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_qcc(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_poc(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_ppm(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_ppt(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_com(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_unk(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_process_crg(jpc_dec_t *dec, jpc_ms_t *ms);
+static int jpc_dec_parseopts(char *optstr, jpc_dec_importopts_t *opts);
+
+static jpc_dec_mstabent_t *jpc_dec_mstab_lookup(uint_fast16_t id);
+
+/******************************************************************************\
+* Global data.
+\******************************************************************************/
+
+jpc_dec_mstabent_t jpc_dec_mstab[] = {
+	{JPC_MS_SOC, JPC_MHSOC, jpc_dec_process_soc},
+	{JPC_MS_SOT, JPC_MH | JPC_TPHSOT, jpc_dec_process_sot},
+	{JPC_MS_SOD, JPC_TPH, jpc_dec_process_sod},
+	{JPC_MS_EOC, JPC_TPHSOT, jpc_dec_process_eoc},
+	{JPC_MS_SIZ, JPC_MHSIZ, jpc_dec_process_siz},
+	{JPC_MS_COD, JPC_MH | JPC_TPH, jpc_dec_process_cod},
+	{JPC_MS_COC, JPC_MH | JPC_TPH, jpc_dec_process_coc},
+	{JPC_MS_RGN, JPC_MH | JPC_TPH, jpc_dec_process_rgn},
+	{JPC_MS_QCD, JPC_MH | JPC_TPH, jpc_dec_process_qcd},
+	{JPC_MS_QCC, JPC_MH | JPC_TPH, jpc_dec_process_qcc},
+	{JPC_MS_POC, JPC_MH | JPC_TPH, jpc_dec_process_poc},
+	{JPC_MS_TLM, JPC_MH, 0},
+	{JPC_MS_PLM, JPC_MH, 0},
+	{JPC_MS_PLT, JPC_TPH, 0},
+	{JPC_MS_PPM, JPC_MH, jpc_dec_process_ppm},
+	{JPC_MS_PPT, JPC_TPH, jpc_dec_process_ppt},
+	{JPC_MS_SOP, 0, 0},
+	{JPC_MS_CRG, JPC_MH, jpc_dec_process_crg},
+	{JPC_MS_COM, JPC_MH | JPC_TPH, jpc_dec_process_com},
+	{0, JPC_MH | JPC_TPH, jpc_dec_process_unk}
+};
+
+/******************************************************************************\
+* The main entry point for the JPEG-2000 decoder.
+\******************************************************************************/
+
+jas_image_t *jpc_decode(jas_stream_t *in, char *optstr)
+{
+	jpc_dec_importopts_t opts;
+	jpc_dec_t *dec;
+	jas_image_t *image;
+  unsigned int i;
+
+	dec = 0;
+
+	if (jpc_dec_parseopts(optstr, &opts)) {
+		goto error;
+	}
+
+	jpc_initluts();
+
+	if (!(dec = jpc_dec_create(&opts, in))) {
+		goto error;
+	}
+
+	/* Do most of the work. */
+	if (jpc_dec_decode(dec)) {
+		goto error;
+	}
+
+  // GeoJasper: dima - begin - first declare all components as GRAY and if there are more than 3 set first 3 as RGB
+  /*
+  if (jas_image_numcmpts(dec->image) >= 3) {
+    jas_image_setclrspc(dec->image, JAS_CLRSPC_SRGB);
+    jas_image_setcmpttype(dec->image, 0, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_R));
+    jas_image_setcmpttype(dec->image, 1, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_G));
+    jas_image_setcmpttype(dec->image, 2, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_B));
+  } else {
+    jas_image_setclrspc(dec->image, JAS_CLRSPC_SGRAY);
+    jas_image_setcmpttype(dec->image, 0,
+      JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_GRAY_Y));
+  }
+  */
+
+  // GeoJasper: dima: fisrt define the default for color space to GRAY
+  jas_image_setclrspc(dec->image, JAS_CLRSPC_SGRAY);
+  for (i=0; i<jas_image_numcmpts(dec->image); ++i)
+    jas_image_setcmpttype(dec->image, i, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_GRAY_Y));
+
+  if (jas_image_numcmpts(dec->image) >= 3) {
+    jas_image_setclrspc(dec->image, JAS_CLRSPC_SRGB);
+    jas_image_setcmpttype(dec->image, 0, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_R));
+    jas_image_setcmpttype(dec->image, 1, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_G));
+    jas_image_setcmpttype(dec->image, 2, JAS_IMAGE_CT_COLOR(JAS_CLRSPC_CHANIND_RGB_B));
+  } 
+  // GeoJasper: dima - end - first declare all components as GRAY and if there are more than 3 set first 3 as RGB
+
+	/* Save the return value. */
+	image = dec->image;
+
+	/* Stop the image from being discarded. */
+	dec->image = 0;
+
+	/* Destroy decoder. */
+	jpc_dec_destroy(dec);
+
+	return image;
+
+error:
+	if (dec) {
+		jpc_dec_destroy(dec);
+	}
+	return 0;
+}
+
+typedef enum {
+	OPT_MAXLYRS,
+	OPT_MAXPKTS,
+	OPT_DEBUG
+} optid_t;
+
+jas_taginfo_t decopts[] = {
+	{OPT_MAXLYRS, "maxlyrs"},
+	{OPT_MAXPKTS, "maxpkts"},
+	{OPT_DEBUG, "debug"},
+	{-1, 0}
+};
+
+static int jpc_dec_parseopts(char *optstr, jpc_dec_importopts_t *opts)
+{
+	jas_tvparser_t *tvp;
+
+	opts->debug = 0;
+	opts->maxlyrs = JPC_MAXLYRS;
+	opts->maxpkts = -1;
+
+	if (!(tvp = jas_tvparser_create(optstr ? optstr : ""))) {
+		return -1;
+	}
+
+	while (!jas_tvparser_next(tvp)) {
+		switch (jas_taginfo_nonull(jas_taginfos_lookup(decopts,
+		  jas_tvparser_gettag(tvp)))->id) {
+		case OPT_MAXLYRS:
+			opts->maxlyrs = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_DEBUG:
+			opts->debug = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_MAXPKTS:
+			opts->maxpkts = atoi(jas_tvparser_getval(tvp));
+			break;
+		default:
+			jas_eprintf("warning: ignoring invalid option %s\n",
+			  jas_tvparser_gettag(tvp));
+			break;
+		}
+	}
+
+	jas_tvparser_destroy(tvp);
+
+	return 0;
+}
+
+/******************************************************************************\
+* Code for table-driven code stream decoder.
+\******************************************************************************/
+
+static jpc_dec_mstabent_t *jpc_dec_mstab_lookup(uint_fast16_t id)
+{
+	jpc_dec_mstabent_t *mstabent;
+	for (mstabent = jpc_dec_mstab; mstabent->id != 0; ++mstabent) {
+		if (mstabent->id == id) {
+			break;
+		}
+	}
+	return mstabent;
+}
+
+static int jpc_dec_decode(jpc_dec_t *dec)
+{
+	jpc_ms_t *ms;
+	jpc_dec_mstabent_t *mstabent;
+	int ret;
+	jpc_cstate_t *cstate;
+
+	if (!(cstate = jpc_cstate_create())) {
+		return -1;
+	}
+	dec->cstate = cstate;
+
+	/* Initially, we should expect to encounter a SOC marker segment. */
+	dec->state = JPC_MHSOC;
+
+	for (;;) {
+
+    /* Get the next marker segment in the code stream. */
+    if (!(ms = jpc_getms(dec->in, cstate))) {
+
+      // GeoJasper: dima - adobe photoshop cs2 files seem not to end with the EOC marker
+      // although they carry additional pair of SOT/SOD markers
+      // we can catch this by checking for tile number and leave
+      if (dec->tiles && dec->tiles->partno >= dec->tiles->numparts && dec->state == JPC_TPHSOT) {
+        return 0;
+      }
+
+      jas_eprintf("cannot get marker segment\n");
+      return -1;
+    }
+
+		mstabent = jpc_dec_mstab_lookup(ms->id);
+		assert(mstabent);
+
+		/* Ensure that this type of marker segment is permitted
+		  at this point in the code stream. */
+		if (!(dec->state & mstabent->validstates)) {
+			jas_eprintf("unexpected marker segment type\n");
+			jpc_ms_destroy(ms);
+			return -1;
+		}
+
+		/* Process the marker segment. */
+		if (mstabent->action) {
+			ret = (*mstabent->action)(dec, ms);
+		} else {
+			/* No explicit action is required. */
+			ret = 0;
+		}
+
+		/* Destroy the marker segment. */
+		jpc_ms_destroy(ms);
+
+		if (ret < 0) {
+			return -1;
+		} else if (ret > 0) {
+			break;
+		}
+
+	}
+
+	return 0;
+}
+
+static int jpc_dec_process_crg(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	int cmptno;
+	jpc_dec_cmpt_t *cmpt;
+	jpc_crg_t *crg;
+
+	crg = &ms->parms.crg;
+	for (cmptno = 0, cmpt = dec->cmpts; cmptno < dec->numcomps; ++cmptno,
+	  ++cmpt) {
+		/* Ignore the information in the CRG marker segment for now.
+		  This information serves no useful purpose for decoding anyhow.
+		  Some other parts of the code need to be changed if these lines
+		  are uncommented.
+		cmpt->hsubstep = crg->comps[cmptno].hoff;
+		cmpt->vsubstep = crg->comps[cmptno].voff;
+		*/
+	}
+	return 0;
+}
+
+static int jpc_dec_process_soc(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	/* Eliminate warnings about unused variables. */
+	ms = 0;
+
+	/* We should expect to encounter a SIZ marker segment next. */
+	dec->state = JPC_MHSIZ;
+
+	return 0;
+}
+
+static int jpc_dec_process_sot(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_dec_tile_t *tile;
+	jpc_sot_t *sot = &ms->parms.sot;
+	jas_image_cmptparm_t *compinfos;
+	jas_image_cmptparm_t *compinfo;
+	jpc_dec_cmpt_t *cmpt;
+	int cmptno;
+
+	if (dec->state == JPC_MH) {
+
+		compinfos = jas_malloc(dec->numcomps * sizeof(jas_image_cmptparm_t));
+		assert(compinfos);
+		for (cmptno = 0, cmpt = dec->cmpts, compinfo = compinfos;
+		  cmptno < dec->numcomps; ++cmptno, ++cmpt, ++compinfo) {
+			compinfo->tlx = 0;
+			compinfo->tly = 0;
+			compinfo->prec = cmpt->prec;
+			compinfo->sgnd = cmpt->sgnd;
+			compinfo->width = cmpt->width;
+			compinfo->height = cmpt->height;
+			compinfo->hstep = cmpt->hstep;
+			compinfo->vstep = cmpt->vstep;
+		}
+
+		if (!(dec->image = jas_image_create(dec->numcomps, compinfos,
+		  JAS_CLRSPC_UNKNOWN))) {
+			return -1;
+		}
+		jas_free(compinfos);
+
+		/* Is the packet header information stored in PPM marker segments in
+		  the main header? */
+		if (dec->ppmstab) {
+			/* Convert the PPM marker segment data into a collection of streams
+			  (one stream per tile-part). */
+			if (!(dec->pkthdrstreams = jpc_ppmstabtostreams(dec->ppmstab))) {
+				abort();
+			}
+			jpc_ppxstab_destroy(dec->ppmstab);
+			dec->ppmstab = 0;
+		}
+	}
+
+	if (sot->len > 0) {
+		dec->curtileendoff = jas_stream_getrwcount(dec->in) - ms->len -
+		  4 + sot->len;
+	} else {
+		dec->curtileendoff = 0;
+	}
+
+	if (JAS_CAST(int, sot->tileno) > dec->numtiles) {
+		jas_eprintf("invalid tile number in SOT marker segment\n");
+		return -1;
+	}
+	/* Set the current tile. */
+	dec->curtile = &dec->tiles[sot->tileno];
+	tile = dec->curtile;
+	/* Ensure that this is the expected part number. */
+	if (sot->partno != tile->partno) {
+		return -1;
+	}
+	if (tile->numparts > 0 && sot->partno >= tile->numparts) {
+
+    // GeoJasper: dima - photoshop cs2 saves jpeg2000 with additional group of SOT/SOD
+    // here we simply ignore these boxes
+    if (tile->state == JPC_TILE_DONE) {
+      dec->state = JPC_TPH;
+      return 0;
+    }
+
+		return -1;
+	}
+	if (!tile->numparts && sot->numparts > 0) {
+		tile->numparts = sot->numparts;
+	}
+
+	tile->pptstab = 0;
+
+	switch (tile->state) {
+	case JPC_TILE_INIT:
+		/* This is the first tile-part for this tile. */
+		tile->state = JPC_TILE_ACTIVE;
+		assert(!tile->cp);
+		if (!(tile->cp = jpc_dec_cp_copy(dec->cp))) {
+			return -1;
+		}
+		jpc_dec_cp_resetflags(dec->cp);
+		break;
+	default:
+		if (sot->numparts == sot->partno - 1) {
+			tile->state = JPC_TILE_ACTIVELAST;
+		}
+		break;
+	}
+
+	/* Note: We do not increment the expected tile-part number until
+	  all processing for this tile-part is complete. */
+
+	/* We should expect to encounter other tile-part header marker
+	  segments next. */
+	dec->state = JPC_TPH;
+
+	return 0;
+}
+
+static int jpc_dec_process_sod(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_dec_tile_t *tile;
+	int pos;
+
+	/* Eliminate compiler warnings about unused variables. */
+	ms = 0;
+
+	if (!(tile = dec->curtile)) {
+		return -1;
+	}
+
+	if (!tile->partno) {
+		if (!jpc_dec_cp_isvalid(tile->cp)) {
+			return -1;
+		}
+		jpc_dec_cp_prepare(tile->cp);
+		if (jpc_dec_tileinit(dec, tile)) {
+			return -1;
+		}
+	}
+
+  // GeoJasper: dima - photoshop cs2 saves jpeg2000 with additional group of SOT/SOD
+  // here we simply ignore these markers
+  if (tile->numparts > 0 && tile->partno >= tile->numparts) {
+    dec->curtile = 0;
+    // Increment the expected tile-part number.
+    ++tile->partno;
+    // We should expect to encounter a SOT marker segment next.
+    dec->state = JPC_TPHSOT;
+    return 0;
+  }
+
+	/* Are packet headers stored in the main header or tile-part header? */
+	if (dec->pkthdrstreams) {
+		/* Get the stream containing the packet header data for this
+		  tile-part. */
+		if (!(tile->pkthdrstream = jpc_streamlist_remove(dec->pkthdrstreams, 0))) {
+			return -1;
+		}
+	}
+
+	if (tile->pptstab) {
+		if (!tile->pkthdrstream) {
+			if (!(tile->pkthdrstream = jas_stream_memopen(0, 0))) {
+				return -1;
+			}
+		}
+		pos = jas_stream_tell(tile->pkthdrstream);
+		jas_stream_seek(tile->pkthdrstream, 0, SEEK_END);
+		if (jpc_pptstabwrite(tile->pkthdrstream, tile->pptstab)) {
+			return -1;
+		}
+		jas_stream_seek(tile->pkthdrstream, pos, SEEK_SET);
+		jpc_ppxstab_destroy(tile->pptstab);
+		tile->pptstab = 0;
+	}
+
+	if (jas_getdbglevel() >= 10) {
+		jpc_dec_dump(dec, stderr);
+	}
+
+	if (jpc_dec_decodepkts(dec, (tile->pkthdrstream) ? tile->pkthdrstream :
+	  dec->in, dec->in)) {
+		jas_eprintf("jpc_dec_decodepkts failed\n");
+		return -1;
+	}
+
+	/* Gobble any unconsumed tile data. */
+	if (dec->curtileendoff > 0) {
+		long curoff;
+		uint_fast32_t n;
+		curoff = jas_stream_getrwcount(dec->in);
+		if (curoff < dec->curtileendoff) {
+			n = dec->curtileendoff - curoff;
+			jas_eprintf("warning: ignoring trailing garbage (%lu bytes)\n",
+			  (unsigned long) n);
+
+			while (n-- > 0) {
+				if (jas_stream_getc(dec->in) == EOF) {
+					jas_eprintf("read error\n");
+					return -1;
+				}
+			}
+		} else if (curoff > dec->curtileendoff) {
+			jas_eprintf("warning: not enough tile data (%lu bytes)\n",
+			  (unsigned long) curoff - dec->curtileendoff);
+		}
+
+	}
+
+	if (tile->numparts > 0 && tile->partno == tile->numparts - 1) {
+		if (jpc_dec_tiledecode(dec, tile)) {
+			return -1;
+		}
+		jpc_dec_tilefini(dec, tile);
+	}
+
+	dec->curtile = 0;
+
+	/* Increment the expected tile-part number. */
+	++tile->partno;
+
+	/* We should expect to encounter a SOT marker segment next. */
+	dec->state = JPC_TPHSOT;
+
+	return 0;
+}
+
+static int jpc_dec_tileinit(jpc_dec_t *dec, jpc_dec_tile_t *tile)
+{
+	jpc_dec_tcomp_t *tcomp;
+	int compno;
+	int rlvlno;
+	jpc_dec_rlvl_t *rlvl;
+	jpc_dec_band_t *band;
+	jpc_dec_prc_t *prc;
+	int bndno;
+	jpc_tsfb_band_t *bnd;
+	int bandno;
+	jpc_dec_ccp_t *ccp;
+	int prccnt;
+	jpc_dec_cblk_t *cblk;
+	int cblkcnt;
+	uint_fast32_t tlprcxstart;
+	uint_fast32_t tlprcystart;
+	uint_fast32_t brprcxend;
+	uint_fast32_t brprcyend;
+	uint_fast32_t tlcbgxstart;
+	uint_fast32_t tlcbgystart;
+	uint_fast32_t brcbgxend;
+	uint_fast32_t brcbgyend;
+	uint_fast32_t cbgxstart;
+	uint_fast32_t cbgystart;
+	uint_fast32_t cbgxend;
+	uint_fast32_t cbgyend;
+	uint_fast32_t tlcblkxstart;
+	uint_fast32_t tlcblkystart;
+	uint_fast32_t brcblkxend;
+	uint_fast32_t brcblkyend;
+	uint_fast32_t cblkxstart;
+	uint_fast32_t cblkystart;
+	uint_fast32_t cblkxend;
+	uint_fast32_t cblkyend;
+	uint_fast32_t tmpxstart;
+	uint_fast32_t tmpystart;
+	uint_fast32_t tmpxend;
+	uint_fast32_t tmpyend;
+	jpc_dec_cp_t *cp;
+	jpc_tsfb_band_t bnds[64];
+	jpc_pchg_t *pchg;
+	int pchgno;
+	jpc_dec_cmpt_t *cmpt;
+
+	cp = tile->cp;
+	tile->realmode = 0;
+	if (cp->mctid == JPC_MCT_ICT) {
+		tile->realmode = 1;
+	}
+
+	for (compno = 0, tcomp = tile->tcomps, cmpt = dec->cmpts; compno <
+	  dec->numcomps; ++compno, ++tcomp, ++cmpt) {
+		ccp = &tile->cp->ccps[compno];
+		if (ccp->qmfbid == JPC_COX_INS) {
+			tile->realmode = 1;
+		}
+		tcomp->numrlvls = ccp->numrlvls;
+		if (!(tcomp->rlvls = jas_malloc(tcomp->numrlvls *
+		  sizeof(jpc_dec_rlvl_t)))) {
+			return -1;
+		}
+		if (!(tcomp->data = jas_seq2d_create(JPC_CEILDIV(tile->xstart,
+		  cmpt->hstep), JPC_CEILDIV(tile->ystart, cmpt->vstep),
+		  JPC_CEILDIV(tile->xend, cmpt->hstep), JPC_CEILDIV(tile->yend,
+		  cmpt->vstep)))) {
+			return -1;
+		}
+		if (!(tcomp->tsfb = jpc_cod_gettsfb(ccp->qmfbid,
+		  tcomp->numrlvls - 1))) {
+			return -1;
+		}
+{
+	jpc_tsfb_getbands(tcomp->tsfb, jas_seq2d_xstart(tcomp->data), jas_seq2d_ystart(tcomp->data), jas_seq2d_xend(tcomp->data), jas_seq2d_yend(tcomp->data), bnds);
+}
+		for (rlvlno = 0, rlvl = tcomp->rlvls; rlvlno < tcomp->numrlvls;
+		  ++rlvlno, ++rlvl) {
+rlvl->bands = 0;
+			rlvl->xstart = JPC_CEILDIVPOW2(tcomp->xstart,
+			  tcomp->numrlvls - 1 - rlvlno);
+			rlvl->ystart = JPC_CEILDIVPOW2(tcomp->ystart,
+			  tcomp->numrlvls - 1 - rlvlno);
+			rlvl->xend = JPC_CEILDIVPOW2(tcomp->xend,
+			  tcomp->numrlvls - 1 - rlvlno);
+			rlvl->yend = JPC_CEILDIVPOW2(tcomp->yend,
+			  tcomp->numrlvls - 1 - rlvlno);
+			rlvl->prcwidthexpn = ccp->prcwidthexpns[rlvlno];
+			rlvl->prcheightexpn = ccp->prcheightexpns[rlvlno];
+			tlprcxstart = JPC_FLOORDIVPOW2(rlvl->xstart,
+			  rlvl->prcwidthexpn) << rlvl->prcwidthexpn;
+			tlprcystart = JPC_FLOORDIVPOW2(rlvl->ystart,
+			  rlvl->prcheightexpn) << rlvl->prcheightexpn;
+			brprcxend = JPC_CEILDIVPOW2(rlvl->xend,
+			  rlvl->prcwidthexpn) << rlvl->prcwidthexpn;
+			brprcyend = JPC_CEILDIVPOW2(rlvl->yend,
+			  rlvl->prcheightexpn) << rlvl->prcheightexpn;
+			rlvl->numhprcs = (brprcxend - tlprcxstart) >>
+			  rlvl->prcwidthexpn;
+			rlvl->numvprcs = (brprcyend - tlprcystart) >>
+			  rlvl->prcheightexpn;
+			rlvl->numprcs = rlvl->numhprcs * rlvl->numvprcs;
+
+			if (rlvl->xstart >= rlvl->xend || rlvl->ystart >= rlvl->yend) {
+				rlvl->bands = 0;
+				rlvl->numprcs = 0;
+				rlvl->numhprcs = 0;
+				rlvl->numvprcs = 0;
+				continue;
+			}	
+			if (!rlvlno) {
+				tlcbgxstart = tlprcxstart;
+				tlcbgystart = tlprcystart;
+				brcbgxend = brprcxend;
+				brcbgyend = brprcyend;
+				rlvl->cbgwidthexpn = rlvl->prcwidthexpn;
+				rlvl->cbgheightexpn = rlvl->prcheightexpn;
+			} else {
+				tlcbgxstart = JPC_CEILDIVPOW2(tlprcxstart, 1);
+				tlcbgystart = JPC_CEILDIVPOW2(tlprcystart, 1);
+				brcbgxend = JPC_CEILDIVPOW2(brprcxend, 1);
+				brcbgyend = JPC_CEILDIVPOW2(brprcyend, 1);
+				rlvl->cbgwidthexpn = rlvl->prcwidthexpn - 1;
+				rlvl->cbgheightexpn = rlvl->prcheightexpn - 1;
+			}
+			rlvl->cblkwidthexpn = JAS_MIN(ccp->cblkwidthexpn,
+			  rlvl->cbgwidthexpn);
+			rlvl->cblkheightexpn = JAS_MIN(ccp->cblkheightexpn,
+			  rlvl->cbgheightexpn);
+
+			rlvl->numbands = (!rlvlno) ? 1 : 3;
+			if (!(rlvl->bands = jas_malloc(rlvl->numbands *
+			  sizeof(jpc_dec_band_t)))) {
+				return -1;
+			}
+			for (bandno = 0, band = rlvl->bands;
+			  bandno < rlvl->numbands; ++bandno, ++band) {
+				bndno = (!rlvlno) ? 0 : (3 * (rlvlno - 1) +
+				  bandno + 1);
+				bnd = &bnds[bndno];
+
+				band->orient = bnd->orient;
+				band->stepsize = ccp->stepsizes[bndno];
+				band->analgain = JPC_NOMINALGAIN(ccp->qmfbid,
+				  tcomp->numrlvls - 1, rlvlno, band->orient);
+				band->absstepsize = jpc_calcabsstepsize(band->stepsize,
+				  cmpt->prec + band->analgain);
+				band->numbps = ccp->numguardbits +
+				  JPC_QCX_GETEXPN(band->stepsize) - 1;
+				band->roishift = (ccp->roishift + band->numbps >= JPC_PREC) ?
+				  (JPC_PREC - 1 - band->numbps) : ccp->roishift;
+				band->data = 0;
+				band->prcs = 0;
+				if (bnd->xstart == bnd->xend || bnd->ystart == bnd->yend) {
+					continue;
+				}
+				if (!(band->data = jas_seq2d_create(0, 0, 0, 0))) {
+					return -1;
+				}
+				jas_seq2d_bindsub(band->data, tcomp->data, bnd->locxstart, bnd->locystart, bnd->locxend, bnd->locyend);
+				jas_seq2d_setshift(band->data, bnd->xstart, bnd->ystart);
+
+				assert(rlvl->numprcs);
+
+				if (!(band->prcs = jas_malloc(rlvl->numprcs * sizeof(jpc_dec_prc_t)))) {
+					return -1;
+				}
+
+/************************************************/
+	cbgxstart = tlcbgxstart;
+	cbgystart = tlcbgystart;
+	for (prccnt = rlvl->numprcs, prc = band->prcs;
+	  prccnt > 0; --prccnt, ++prc) {
+		cbgxend = cbgxstart + (1 << rlvl->cbgwidthexpn);
+		cbgyend = cbgystart + (1 << rlvl->cbgheightexpn);
+		prc->xstart = JAS_MAX(cbgxstart, JAS_CAST(uint_fast32_t, jas_seq2d_xstart(band->data)));
+		prc->ystart = JAS_MAX(cbgystart, JAS_CAST(uint_fast32_t, jas_seq2d_ystart(band->data)));
+		prc->xend = JAS_MIN(cbgxend, JAS_CAST(uint_fast32_t, jas_seq2d_xend(band->data)));
+		prc->yend = JAS_MIN(cbgyend, JAS_CAST(uint_fast32_t, jas_seq2d_yend(band->data)));
+		if (prc->xend > prc->xstart && prc->yend > prc->ystart) {
+			tlcblkxstart = JPC_FLOORDIVPOW2(prc->xstart,
+			  rlvl->cblkwidthexpn) << rlvl->cblkwidthexpn;
+			tlcblkystart = JPC_FLOORDIVPOW2(prc->ystart,
+			  rlvl->cblkheightexpn) << rlvl->cblkheightexpn;
+			brcblkxend = JPC_CEILDIVPOW2(prc->xend,
+			  rlvl->cblkwidthexpn) << rlvl->cblkwidthexpn;
+			brcblkyend = JPC_CEILDIVPOW2(prc->yend,
+			  rlvl->cblkheightexpn) << rlvl->cblkheightexpn;
+			prc->numhcblks = (brcblkxend - tlcblkxstart) >>
+			  rlvl->cblkwidthexpn;
+			prc->numvcblks = (brcblkyend - tlcblkystart) >>
+			  rlvl->cblkheightexpn;
+			prc->numcblks = prc->numhcblks * prc->numvcblks;
+			assert(prc->numcblks > 0);
+
+			if (!(prc->incltagtree = jpc_tagtree_create(prc->numhcblks, prc->numvcblks))) {
+				return -1;
+			}
+			if (!(prc->numimsbstagtree = jpc_tagtree_create(prc->numhcblks, prc->numvcblks))) {
+				return -1;
+			}
+			if (!(prc->cblks = jas_malloc(prc->numcblks * sizeof(jpc_dec_cblk_t)))) {
+				return -1;
+			}
+
+			cblkxstart = cbgxstart;
+			cblkystart = cbgystart;
+			for (cblkcnt = prc->numcblks, cblk = prc->cblks; cblkcnt > 0;) {
+				cblkxend = cblkxstart + (1 << rlvl->cblkwidthexpn);
+				cblkyend = cblkystart + (1 << rlvl->cblkheightexpn);
+				tmpxstart = JAS_MAX(cblkxstart, prc->xstart);
+				tmpystart = JAS_MAX(cblkystart, prc->ystart);
+				tmpxend = JAS_MIN(cblkxend, prc->xend);
+				tmpyend = JAS_MIN(cblkyend, prc->yend);
+				if (tmpxend > tmpxstart && tmpyend > tmpystart) {
+					cblk->firstpassno = -1;
+					cblk->mqdec = 0;
+					cblk->nulldec = 0;
+					cblk->flags = 0;
+					cblk->numpasses = 0;
+					cblk->segs.head = 0;
+					cblk->segs.tail = 0;
+					cblk->curseg = 0;
+					cblk->numimsbs = 0;
+					cblk->numlenbits = 3;
+					cblk->flags = 0;
+					if (!(cblk->data = jas_seq2d_create(0, 0, 0, 0))) {
+						return -1;
+					}
+					jas_seq2d_bindsub(cblk->data, band->data, tmpxstart, tmpystart, tmpxend, tmpyend);
+					++cblk;
+					--cblkcnt;
+				}
+				cblkxstart += 1 << rlvl->cblkwidthexpn;
+				if (cblkxstart >= cbgxend) {
+					cblkxstart = cbgxstart;
+					cblkystart += 1 << rlvl->cblkheightexpn;
+				}
+			}
+
+		} else {
+			prc->cblks = 0;
+			prc->incltagtree = 0;
+			prc->numimsbstagtree = 0;
+		}
+		cbgxstart += 1 << rlvl->cbgwidthexpn;
+		if (cbgxstart >= brcbgxend) {
+			cbgxstart = tlcbgxstart;
+			cbgystart += 1 << rlvl->cbgheightexpn;
+		}
+
+	}
+/********************************************/
+			}
+		}
+	}
+
+if (!(tile->pi = jpc_dec_pi_create(dec, tile)))
+{
+	return -1;
+}
+
+	for (pchgno = 0; pchgno < jpc_pchglist_numpchgs(tile->cp->pchglist);
+	  ++pchgno) {
+		pchg = jpc_pchg_copy(jpc_pchglist_get(tile->cp->pchglist, pchgno));
+		assert(pchg);
+		jpc_pi_addpchg(tile->pi, pchg);
+	}
+	jpc_pi_init(tile->pi);
+
+	return 0;
+}
+
+static int jpc_dec_tilefini(jpc_dec_t *dec, jpc_dec_tile_t *tile)
+{
+	jpc_dec_tcomp_t *tcomp;
+	int compno;
+	int bandno;
+	int rlvlno;
+	jpc_dec_band_t *band;
+	jpc_dec_rlvl_t *rlvl;
+	int prcno;
+	jpc_dec_prc_t *prc;
+	jpc_dec_seg_t *seg;
+	jpc_dec_cblk_t *cblk;
+	int cblkno;
+
+if (tile->tcomps) {
+
+	for (compno = 0, tcomp = tile->tcomps; compno < dec->numcomps;
+	  ++compno, ++tcomp) {
+		for (rlvlno = 0, rlvl = tcomp->rlvls; rlvlno < tcomp->numrlvls;
+		  ++rlvlno, ++rlvl) {
+if (!rlvl->bands) {
+	continue;
+}
+			for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands; ++bandno, ++band) {
+if (band->prcs) {
+				for (prcno = 0, prc = band->prcs; prcno <
+				  rlvl->numprcs; ++prcno, ++prc) {
+if (!prc->cblks) {
+	continue;
+}
+					for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks; ++cblkno, ++cblk) {
+
+	while (cblk->segs.head) {
+		seg = cblk->segs.head;
+		jpc_seglist_remove(&cblk->segs, seg);
+		jpc_seg_destroy(seg);
+	}
+	jas_matrix_destroy(cblk->data);
+	if (cblk->mqdec) {
+		jpc_mqdec_destroy(cblk->mqdec);
+	}
+	if (cblk->nulldec) {
+		jpc_bitstream_close(cblk->nulldec);
+	}
+	if (cblk->flags) {
+		jas_matrix_destroy(cblk->flags);
+	}
+					}
+					if (prc->incltagtree) {
+						jpc_tagtree_destroy(prc->incltagtree);
+					}
+					if (prc->numimsbstagtree) {
+						jpc_tagtree_destroy(prc->numimsbstagtree);
+					}
+					if (prc->cblks) {
+						jas_free(prc->cblks);
+					}
+				}
+}
+				if (band->data) {
+					jas_matrix_destroy(band->data);
+				}
+				if (band->prcs) {
+					jas_free(band->prcs);
+				}
+			}
+			if (rlvl->bands) {
+				jas_free(rlvl->bands);
+			}
+		}
+		if (tcomp->rlvls) {
+			jas_free(tcomp->rlvls);
+		}
+		if (tcomp->data) {
+			jas_matrix_destroy(tcomp->data);
+		}
+		if (tcomp->tsfb) {
+			jpc_tsfb_destroy(tcomp->tsfb);
+		}
+	}
+}
+	if (tile->cp) {
+		jpc_dec_cp_destroy(tile->cp);
+		tile->cp = 0;
+	}
+	if (tile->tcomps) {
+		jas_free(tile->tcomps);
+		tile->tcomps = 0;
+	}
+	if (tile->pi) {
+		jpc_pi_destroy(tile->pi);
+		tile->pi = 0;
+	}
+	if (tile->pkthdrstream) {
+		jas_stream_close(tile->pkthdrstream);
+		tile->pkthdrstream = 0;
+	}
+	if (tile->pptstab) {
+		jpc_ppxstab_destroy(tile->pptstab);
+		tile->pptstab = 0;
+	}
+
+	tile->state = JPC_TILE_DONE;
+
+	return 0;
+}
+
+static int jpc_dec_tiledecode(jpc_dec_t *dec, jpc_dec_tile_t *tile)
+{
+	int i;
+	int j;
+	jpc_dec_tcomp_t *tcomp;
+	jpc_dec_rlvl_t *rlvl;
+	jpc_dec_band_t *band;
+	int compno;
+	int rlvlno;
+	int bandno;
+	int adjust;
+	int v;
+	jpc_dec_ccp_t *ccp;
+	jpc_dec_cmpt_t *cmpt;
+
+	if (jpc_dec_decodecblks(dec, tile)) {
+		jas_eprintf("jpc_dec_decodecblks failed\n");
+		return -1;
+	}
+
+	/* Perform dequantization. */
+	for (compno = 0, tcomp = tile->tcomps; compno < dec->numcomps;
+	  ++compno, ++tcomp) {
+		ccp = &tile->cp->ccps[compno];
+		for (rlvlno = 0, rlvl = tcomp->rlvls; rlvlno < tcomp->numrlvls;
+		  ++rlvlno, ++rlvl) {
+			if (!rlvl->bands) {
+				continue;
+			}
+			for (bandno = 0, band = rlvl->bands;
+			  bandno < rlvl->numbands; ++bandno, ++band) {
+				if (!band->data) {
+					continue;
+				}
+				jpc_undo_roi(band->data, band->roishift, ccp->roishift -
+				  band->roishift, band->numbps);
+				if (tile->realmode) {
+					jas_matrix_asl(band->data, JPC_FIX_FRACBITS);
+					jpc_dequantize(band->data, band->absstepsize);
+				}
+
+			}
+		}
+	}
+
+	/* Apply an inverse wavelet transform if necessary. */
+	for (compno = 0, tcomp = tile->tcomps; compno < dec->numcomps;
+	  ++compno, ++tcomp) {
+		ccp = &tile->cp->ccps[compno];
+		jpc_tsfb_synthesize(tcomp->tsfb, tcomp->data);
+	}
+
+
+	/* Apply an inverse intercomponent transform if necessary. */
+  // GeoJasper: dima - if there are more components than 3 then do apply the intercomponent transform on first three
+	if (dec->numcomps >= 3)
+	switch (tile->cp->mctid) {
+	case JPC_MCT_RCT:
+		//assert(dec->numcomps == 3); // GeoJasper: dima - still apply if there are more component
+		jpc_irct(tile->tcomps[0].data, tile->tcomps[1].data, tile->tcomps[2].data);
+		break;
+	case JPC_MCT_ICT:
+		//assert(dec->numcomps == 3); // GeoJasper: dima - still apply if there are more component
+		jpc_iict(tile->tcomps[0].data, tile->tcomps[1].data, tile->tcomps[2].data);
+		break;
+	}
+
+	/* Perform rounding and convert to integer values. */
+	if (tile->realmode) {
+		for (compno = 0, tcomp = tile->tcomps; compno < dec->numcomps;
+		  ++compno, ++tcomp) {
+			for (i = 0; i < jas_matrix_numrows(tcomp->data); ++i) {
+				for (j = 0; j < jas_matrix_numcols(tcomp->data); ++j) {
+					v = jas_matrix_get(tcomp->data, i, j);
+					v = jpc_fix_round(v);
+					jas_matrix_set(tcomp->data, i, j, jpc_fixtoint(v));
+				}
+			}
+		}
+	}
+
+	/* Perform level shift. */
+	for (compno = 0, tcomp = tile->tcomps, cmpt = dec->cmpts; compno <
+	  dec->numcomps; ++compno, ++tcomp, ++cmpt) {
+		adjust = cmpt->sgnd ? 0 : (1 << (cmpt->prec - 1));
+		for (i = 0; i < jas_matrix_numrows(tcomp->data); ++i) {
+			for (j = 0; j < jas_matrix_numcols(tcomp->data); ++j) {
+				*jas_matrix_getref(tcomp->data, i, j) += adjust;
+			}
+		}
+	}
+
+	/* Perform clipping. */
+	for (compno = 0, tcomp = tile->tcomps, cmpt = dec->cmpts; compno <
+	  dec->numcomps; ++compno, ++tcomp, ++cmpt) {
+		jpc_fix_t mn;
+		jpc_fix_t mx;
+		mn = cmpt->sgnd ? (-(1 << (cmpt->prec - 1))) : (0);
+		mx = cmpt->sgnd ? ((1 << (cmpt->prec - 1)) - 1) : ((1 <<
+		  cmpt->prec) - 1);
+		jas_matrix_clip(tcomp->data, mn, mx);
+	}
+
+	/* XXX need to free tsfb struct */
+
+	/* Write the data for each component of the image. */
+	for (compno = 0, tcomp = tile->tcomps, cmpt = dec->cmpts; compno <
+	  dec->numcomps; ++compno, ++tcomp, ++cmpt) {
+		if (jas_image_writecmpt(dec->image, compno, tcomp->xstart -
+		  JPC_CEILDIV(dec->xstart, cmpt->hstep), tcomp->ystart -
+		  JPC_CEILDIV(dec->ystart, cmpt->vstep), jas_matrix_numcols(
+		  tcomp->data), jas_matrix_numrows(tcomp->data), tcomp->data)) {
+			jas_eprintf("write component failed\n");
+			return -4;
+		}
+	}
+
+	return 0;
+}
+
+static int jpc_dec_process_eoc(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	int tileno;
+	jpc_dec_tile_t *tile;
+
+	/* Eliminate compiler warnings about unused variables. */
+	ms = 0;
+
+	for (tileno = 0, tile = dec->tiles; tileno < dec->numtiles; ++tileno,
+	  ++tile) {
+		if (tile->state == JPC_TILE_ACTIVE) {
+			if (jpc_dec_tiledecode(dec, tile)) {
+				return -1;
+			}
+		}
+		jpc_dec_tilefini(dec, tile);
+	}
+
+	/* We are done processing the code stream. */
+	dec->state = JPC_MT;
+
+	return 1;
+}
+
+static int jpc_dec_process_siz(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_siz_t *siz = &ms->parms.siz;
+	int compno;
+	int tileno;
+	jpc_dec_tile_t *tile;
+	jpc_dec_tcomp_t *tcomp;
+	int htileno;
+	int vtileno;
+	jpc_dec_cmpt_t *cmpt;
+
+	dec->xstart = siz->xoff;
+	dec->ystart = siz->yoff;
+	dec->xend = siz->width;
+	dec->yend = siz->height;
+	dec->tilewidth = siz->tilewidth;
+	dec->tileheight = siz->tileheight;
+	dec->tilexoff = siz->tilexoff;
+	dec->tileyoff = siz->tileyoff;
+	dec->numcomps = siz->numcomps;
+	if (!(dec->cp = jpc_dec_cp_create(dec->numcomps))) {
+		return -1;
+	}
+
+	if (!(dec->cmpts = jas_malloc(dec->numcomps * sizeof(jpc_dec_cmpt_t)))) {
+		return -1;
+	}
+
+	for (compno = 0, cmpt = dec->cmpts; compno < dec->numcomps; ++compno,
+	  ++cmpt) {
+		cmpt->prec = siz->comps[compno].prec;
+		cmpt->sgnd = siz->comps[compno].sgnd;
+		cmpt->hstep = siz->comps[compno].hsamp;
+		cmpt->vstep = siz->comps[compno].vsamp;
+		cmpt->width = JPC_CEILDIV(dec->xend, cmpt->hstep) -
+		  JPC_CEILDIV(dec->xstart, cmpt->hstep);
+		cmpt->height = JPC_CEILDIV(dec->yend, cmpt->vstep) -
+		  JPC_CEILDIV(dec->ystart, cmpt->vstep);
+		cmpt->hsubstep = 0;
+		cmpt->vsubstep = 0;
+	}
+
+	dec->image = 0;
+
+	dec->numhtiles = JPC_CEILDIV(dec->xend - dec->tilexoff, dec->tilewidth);
+	dec->numvtiles = JPC_CEILDIV(dec->yend - dec->tileyoff, dec->tileheight);
+	dec->numtiles = dec->numhtiles * dec->numvtiles;
+	if (!(dec->tiles = jas_malloc(dec->numtiles * sizeof(jpc_dec_tile_t)))) {
+		return -1;
+	}
+
+	for (tileno = 0, tile = dec->tiles; tileno < dec->numtiles; ++tileno,
+	  ++tile) {
+		htileno = tileno % dec->numhtiles;
+		vtileno = tileno / dec->numhtiles;
+		tile->realmode = 0;
+		tile->state = JPC_TILE_INIT;
+		tile->xstart = JAS_MAX(dec->tilexoff + htileno * dec->tilewidth,
+		  dec->xstart);
+		tile->ystart = JAS_MAX(dec->tileyoff + vtileno * dec->tileheight,
+		  dec->ystart);
+		tile->xend = JAS_MIN(dec->tilexoff + (htileno + 1) *
+		  dec->tilewidth, dec->xend);
+		tile->yend = JAS_MIN(dec->tileyoff + (vtileno + 1) *
+		  dec->tileheight, dec->yend);
+		tile->numparts = 0;
+		tile->partno = 0;
+		tile->pkthdrstream = 0;
+		tile->pkthdrstreampos = 0;
+		tile->pptstab = 0;
+		tile->cp = 0;
+		if (!(tile->tcomps = jas_malloc(dec->numcomps *
+		  sizeof(jpc_dec_tcomp_t)))) {
+			return -1;
+		}
+		for (compno = 0, cmpt = dec->cmpts, tcomp = tile->tcomps;
+		  compno < dec->numcomps; ++compno, ++cmpt, ++tcomp) {
+			tcomp->rlvls = 0;
+			tcomp->data = 0;
+			tcomp->xstart = JPC_CEILDIV(tile->xstart, cmpt->hstep);
+			tcomp->ystart = JPC_CEILDIV(tile->ystart, cmpt->vstep);
+			tcomp->xend = JPC_CEILDIV(tile->xend, cmpt->hstep);
+			tcomp->yend = JPC_CEILDIV(tile->yend, cmpt->vstep);
+			tcomp->tsfb = 0;
+		}
+	}
+
+	dec->pkthdrstreams = 0;
+
+	/* We should expect to encounter other main header marker segments
+	  or an SOT marker segment next. */
+	dec->state = JPC_MH;
+
+	return 0;
+}
+
+static int jpc_dec_process_cod(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_cod_t *cod = &ms->parms.cod;
+	jpc_dec_tile_t *tile;
+
+	switch (dec->state) {
+	case JPC_MH:
+		jpc_dec_cp_setfromcod(dec->cp, cod);
+		break;
+	case JPC_TPH:
+		if (!(tile = dec->curtile)) {
+			return -1;
+		}
+		if (tile->partno != 0) {
+			return -1;
+		}
+		jpc_dec_cp_setfromcod(tile->cp, cod);
+		break;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_coc(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_coc_t *coc = &ms->parms.coc;
+	jpc_dec_tile_t *tile;
+
+	if (JAS_CAST(int, coc->compno) > dec->numcomps) {
+		jas_eprintf("invalid component number in COC marker segment\n");
+		return -1;
+	}
+	switch (dec->state) {
+	case JPC_MH:
+		jpc_dec_cp_setfromcoc(dec->cp, coc);
+		break;
+	case JPC_TPH:
+		if (!(tile = dec->curtile)) {
+			return -1;
+		}
+		if (tile->partno > 0) {
+			return -1;
+		}
+		jpc_dec_cp_setfromcoc(tile->cp, coc);
+		break;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_rgn(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_rgn_t *rgn = &ms->parms.rgn;
+	jpc_dec_tile_t *tile;
+
+	if (JAS_CAST(int, rgn->compno) > dec->numcomps) {
+		jas_eprintf("invalid component number in RGN marker segment\n");
+		return -1;
+	}
+	switch (dec->state) {
+	case JPC_MH:
+		jpc_dec_cp_setfromrgn(dec->cp, rgn);
+		break;
+	case JPC_TPH:
+		if (!(tile = dec->curtile)) {
+			return -1;
+		}
+		if (tile->partno > 0) {
+			return -1;
+		}
+		jpc_dec_cp_setfromrgn(tile->cp, rgn);
+		break;
+	}
+
+	return 0;
+}
+
+static int jpc_dec_process_qcd(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_qcd_t *qcd = &ms->parms.qcd;
+	jpc_dec_tile_t *tile;
+
+	switch (dec->state) {
+	case JPC_MH:
+		jpc_dec_cp_setfromqcd(dec->cp, qcd);
+		break;
+	case JPC_TPH:
+		if (!(tile = dec->curtile)) {
+			return -1;
+		}
+		if (tile->partno > 0) {
+			return -1;
+		}
+		jpc_dec_cp_setfromqcd(tile->cp, qcd);
+		break;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_qcc(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_qcc_t *qcc = &ms->parms.qcc;
+	jpc_dec_tile_t *tile;
+
+	if (JAS_CAST(int, qcc->compno) > dec->numcomps) {
+		jas_eprintf("invalid component number in QCC marker segment\n");
+		return -1;
+	}
+	switch (dec->state) {
+	case JPC_MH:
+		jpc_dec_cp_setfromqcc(dec->cp, qcc);
+		break;
+	case JPC_TPH:
+		if (!(tile = dec->curtile)) {
+			return -1;
+		}
+		if (tile->partno > 0) {
+			return -1;
+		}
+		jpc_dec_cp_setfromqcc(tile->cp, qcc);
+		break;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_poc(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_poc_t *poc = &ms->parms.poc;
+	jpc_dec_tile_t *tile;
+	switch (dec->state) {
+	case JPC_MH:
+		if (jpc_dec_cp_setfrompoc(dec->cp, poc, 1)) {
+			return -1;
+		}
+		break;
+	case JPC_TPH:
+		if (!(tile = dec->curtile)) {
+			return -1;
+		}
+		if (!tile->partno) {
+			if (jpc_dec_cp_setfrompoc(tile->cp, poc, (!tile->partno))) {
+				return -1;
+			}
+		} else {
+			jpc_pi_addpchgfrompoc(tile->pi, poc);
+		}
+		break;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_ppm(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_ppm_t *ppm = &ms->parms.ppm;
+	jpc_ppxstabent_t *ppmstabent;
+
+	if (!dec->ppmstab) {
+		if (!(dec->ppmstab = jpc_ppxstab_create())) {
+			return -1;
+		}
+	}
+
+	if (!(ppmstabent = jpc_ppxstabent_create())) {
+		return -1;
+	}
+	ppmstabent->ind = ppm->ind;
+	ppmstabent->data = ppm->data;
+	ppm->data = 0;
+	ppmstabent->len = ppm->len;
+	if (jpc_ppxstab_insert(dec->ppmstab, ppmstabent)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_ppt(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	jpc_ppt_t *ppt = &ms->parms.ppt;
+	jpc_dec_tile_t *tile;
+	jpc_ppxstabent_t *pptstabent;
+
+	tile = dec->curtile;
+	if (!tile->pptstab) {
+		if (!(tile->pptstab = jpc_ppxstab_create())) {
+			return -1;
+		}
+	}
+	if (!(pptstabent = jpc_ppxstabent_create())) {
+		return -1;
+	}
+	pptstabent->ind = ppt->ind;
+	pptstabent->data = ppt->data;
+	ppt->data = 0;
+	pptstabent->len = ppt->len;
+	if (jpc_ppxstab_insert(tile->pptstab, pptstabent)) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_dec_process_com(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	/* Eliminate compiler warnings about unused variables. */
+	dec = 0;
+	ms = 0;
+
+	return 0;
+}
+
+static int jpc_dec_process_unk(jpc_dec_t *dec, jpc_ms_t *ms)
+{
+	/* Eliminate compiler warnings about unused variables. */
+	dec = 0;
+
+	jas_eprintf("warning: ignoring unknown marker segment\n");
+	jpc_ms_dump(ms, stderr);
+	return 0;
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static jpc_dec_cp_t *jpc_dec_cp_create(uint_fast16_t numcomps)
+{
+	jpc_dec_cp_t *cp;
+	jpc_dec_ccp_t *ccp;
+	int compno;
+
+	if (!(cp = jas_malloc(sizeof(jpc_dec_cp_t)))) {
+		return 0;
+	}
+	cp->flags = 0;
+	cp->numcomps = numcomps;
+	cp->prgord = 0;
+	cp->numlyrs = 0;
+	cp->mctid = 0;
+	cp->csty = 0;
+	if (!(cp->ccps = jas_malloc(cp->numcomps * sizeof(jpc_dec_ccp_t)))) {
+		return 0;
+	}
+	if (!(cp->pchglist = jpc_pchglist_create())) {
+		jas_free(cp->ccps);
+		return 0;
+	}
+	for (compno = 0, ccp = cp->ccps; compno < cp->numcomps;
+	  ++compno, ++ccp) {
+		ccp->flags = 0;
+		ccp->numrlvls = 0;
+		ccp->cblkwidthexpn = 0;
+		ccp->cblkheightexpn = 0;
+		ccp->qmfbid = 0;
+		ccp->numstepsizes = 0;
+		ccp->numguardbits = 0;
+		ccp->roishift = 0;
+		ccp->cblkctx = 0;
+	}
+	return cp;
+}
+
+static jpc_dec_cp_t *jpc_dec_cp_copy(jpc_dec_cp_t *cp)
+{
+	jpc_dec_cp_t *newcp;
+	jpc_dec_ccp_t *newccp;
+	jpc_dec_ccp_t *ccp;
+	int compno;
+
+	if (!(newcp = jpc_dec_cp_create(cp->numcomps))) {
+		return 0;
+	}
+	newcp->flags = cp->flags;
+	newcp->prgord = cp->prgord;
+	newcp->numlyrs = cp->numlyrs;
+	newcp->mctid = cp->mctid;
+	newcp->csty = cp->csty;
+	jpc_pchglist_destroy(newcp->pchglist);
+	newcp->pchglist = 0;
+	if (!(newcp->pchglist = jpc_pchglist_copy(cp->pchglist))) {
+		jas_free(newcp);
+		return 0;
+	}
+	for (compno = 0, newccp = newcp->ccps, ccp = cp->ccps;
+	  compno < cp->numcomps;
+	  ++compno, ++newccp, ++ccp) {
+		*newccp = *ccp;
+	}
+	return newcp;
+}
+
+static void jpc_dec_cp_resetflags(jpc_dec_cp_t *cp)
+{
+	int compno;
+	jpc_dec_ccp_t *ccp;
+	cp->flags &= (JPC_CSET | JPC_QSET);
+	for (compno = 0, ccp = cp->ccps; compno < cp->numcomps;
+	  ++compno, ++ccp) {
+		ccp->flags = 0;
+	}
+}
+
+static void jpc_dec_cp_destroy(jpc_dec_cp_t *cp)
+{
+	if (cp->ccps) {
+		jas_free(cp->ccps);
+	}
+	if (cp->pchglist) {
+		jpc_pchglist_destroy(cp->pchglist);
+	}
+	jas_free(cp);
+}
+
+static int jpc_dec_cp_isvalid(jpc_dec_cp_t *cp)
+{
+	uint_fast16_t compcnt;
+	jpc_dec_ccp_t *ccp;
+
+	if (!(cp->flags & JPC_CSET) || !(cp->flags & JPC_QSET)) {
+		return 0;
+	}
+	for (compcnt = cp->numcomps, ccp = cp->ccps; compcnt > 0; --compcnt,
+	  ++ccp) {
+		/* Is there enough step sizes for the number of bands? */
+		if ((ccp->qsty != JPC_QCX_SIQNT && JAS_CAST(int, ccp->numstepsizes) < 3 *
+		  ccp->numrlvls - 2) || (ccp->qsty == JPC_QCX_SIQNT &&
+		  ccp->numstepsizes != 1)) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static void calcstepsizes(uint_fast16_t refstepsize, int numrlvls,
+  uint_fast16_t *stepsizes)
+{
+	int bandno;
+	int numbands;
+	uint_fast16_t expn;
+	uint_fast16_t mant;
+	expn = JPC_QCX_GETEXPN(refstepsize);
+	mant = JPC_QCX_GETMANT(refstepsize);
+	numbands = 3 * numrlvls - 2;
+	for (bandno = 0; bandno < numbands; ++bandno) {
+		stepsizes[bandno] = JPC_QCX_MANT(mant) | JPC_QCX_EXPN(expn +
+		  (numrlvls - 1) - (numrlvls - 1 - ((bandno > 0) ? ((bandno + 2) / 3) : (0))));
+	}
+}
+
+static int jpc_dec_cp_prepare(jpc_dec_cp_t *cp)
+{
+	jpc_dec_ccp_t *ccp;
+	int compno;
+	int i;
+	for (compno = 0, ccp = cp->ccps; compno < cp->numcomps;
+	  ++compno, ++ccp) {
+		if (!(ccp->csty & JPC_COX_PRT)) {
+			for (i = 0; i < JPC_MAXRLVLS; ++i) {
+				ccp->prcwidthexpns[i] = 15;
+				ccp->prcheightexpns[i] = 15;
+			}
+		}
+		if (ccp->qsty == JPC_QCX_SIQNT) {
+			calcstepsizes(ccp->stepsizes[0], ccp->numrlvls, ccp->stepsizes);
+		}
+	}
+	return 0;
+}
+
+static int jpc_dec_cp_setfromcod(jpc_dec_cp_t *cp, jpc_cod_t *cod)
+{
+	jpc_dec_ccp_t *ccp;
+	int compno;
+	cp->flags |= JPC_CSET;
+	cp->prgord = cod->prg;
+	if (cod->mctrans) {
+		cp->mctid = (cod->compparms.qmfbid == JPC_COX_INS) ? (JPC_MCT_ICT) : (JPC_MCT_RCT);
+	} else {
+		cp->mctid = JPC_MCT_NONE;
+	}
+	cp->numlyrs = cod->numlyrs;
+	cp->csty = cod->csty & (JPC_COD_SOP | JPC_COD_EPH);
+	for (compno = 0, ccp = cp->ccps; compno < cp->numcomps;
+	  ++compno, ++ccp) {
+		jpc_dec_cp_setfromcox(cp, ccp, &cod->compparms, 0);
+	}
+	cp->flags |= JPC_CSET;
+	return 0;
+}
+
+static int jpc_dec_cp_setfromcoc(jpc_dec_cp_t *cp, jpc_coc_t *coc)
+{
+	jpc_dec_cp_setfromcox(cp, &cp->ccps[coc->compno], &coc->compparms, JPC_COC);
+	return 0;
+}
+
+static int jpc_dec_cp_setfromcox(jpc_dec_cp_t *cp, jpc_dec_ccp_t *ccp,
+  jpc_coxcp_t *compparms, int flags)
+{
+	int rlvlno;
+
+	/* Eliminate compiler warnings about unused variables. */
+	cp = 0;
+
+	if ((flags & JPC_COC) || !(ccp->flags & JPC_COC)) {
+		ccp->numrlvls = compparms->numdlvls + 1;
+		ccp->cblkwidthexpn = JPC_COX_GETCBLKSIZEEXPN(
+		  compparms->cblkwidthval);
+		ccp->cblkheightexpn = JPC_COX_GETCBLKSIZEEXPN(
+		  compparms->cblkheightval);
+		ccp->qmfbid = compparms->qmfbid;
+		ccp->cblkctx = compparms->cblksty;
+		ccp->csty = compparms->csty & JPC_COX_PRT;
+		for (rlvlno = 0; rlvlno < compparms->numrlvls; ++rlvlno) {
+			ccp->prcwidthexpns[rlvlno] =
+			  compparms->rlvls[rlvlno].parwidthval;
+			ccp->prcheightexpns[rlvlno] =
+			  compparms->rlvls[rlvlno].parheightval;
+		}
+		ccp->flags |= flags | JPC_CSET;
+	}
+	return 0;
+}
+
+static int jpc_dec_cp_setfromqcd(jpc_dec_cp_t *cp, jpc_qcd_t *qcd)
+{
+	int compno;
+	jpc_dec_ccp_t *ccp;
+	for (compno = 0, ccp = cp->ccps; compno < cp->numcomps;
+	  ++compno, ++ccp) {
+		jpc_dec_cp_setfromqcx(cp, ccp, &qcd->compparms, 0);
+	}
+	cp->flags |= JPC_QSET;
+	return 0;
+}
+
+static int jpc_dec_cp_setfromqcc(jpc_dec_cp_t *cp, jpc_qcc_t *qcc)
+{
+	return jpc_dec_cp_setfromqcx(cp, &cp->ccps[qcc->compno], &qcc->compparms, JPC_QCC);
+}
+
+static int jpc_dec_cp_setfromqcx(jpc_dec_cp_t *cp, jpc_dec_ccp_t *ccp,
+  jpc_qcxcp_t *compparms, int flags)
+{
+	int bandno;
+
+	/* Eliminate compiler warnings about unused variables. */
+	cp = 0;
+
+	if ((flags & JPC_QCC) || !(ccp->flags & JPC_QCC)) {
+		ccp->flags |= flags | JPC_QSET;
+		for (bandno = 0; bandno < compparms->numstepsizes; ++bandno) {
+			ccp->stepsizes[bandno] = compparms->stepsizes[bandno];
+		}
+		ccp->numstepsizes = compparms->numstepsizes;
+		ccp->numguardbits = compparms->numguard;
+		ccp->qsty = compparms->qntsty;
+	}
+	return 0;
+}
+
+static int jpc_dec_cp_setfromrgn(jpc_dec_cp_t *cp, jpc_rgn_t *rgn)
+{
+	jpc_dec_ccp_t *ccp;
+	ccp = &cp->ccps[rgn->compno];
+	ccp->roishift = rgn->roishift;
+	return 0;
+}
+
+static int jpc_pi_addpchgfrompoc(jpc_pi_t *pi, jpc_poc_t *poc)
+{
+	int pchgno;
+	jpc_pchg_t *pchg;
+	for (pchgno = 0; pchgno < poc->numpchgs; ++pchgno) {
+		if (!(pchg = jpc_pchg_copy(&poc->pchgs[pchgno]))) {
+			return -1;
+		}
+		if (jpc_pchglist_insert(pi->pchglist, -1, pchg)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int jpc_dec_cp_setfrompoc(jpc_dec_cp_t *cp, jpc_poc_t *poc, int reset)
+{
+	int pchgno;
+	jpc_pchg_t *pchg;
+	if (reset) {
+		while (jpc_pchglist_numpchgs(cp->pchglist) > 0) {
+			pchg = jpc_pchglist_remove(cp->pchglist, 0);
+			jpc_pchg_destroy(pchg);
+		}
+	}
+	for (pchgno = 0; pchgno < poc->numpchgs; ++pchgno) {
+		if (!(pchg = jpc_pchg_copy(&poc->pchgs[pchgno]))) {
+			return -1;
+		}
+		if (jpc_pchglist_insert(cp->pchglist, -1, pchg)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static jpc_fix_t jpc_calcabsstepsize(int stepsize, int numbits)
+{
+	jpc_fix_t absstepsize;
+	int n;
+
+	absstepsize = jpc_inttofix(1);
+	n = JPC_FIX_FRACBITS - 11;
+	absstepsize |= (n >= 0) ? (JPC_QCX_GETMANT(stepsize) << n) :
+	  (JPC_QCX_GETMANT(stepsize) >> (-n));
+	n = numbits - JPC_QCX_GETEXPN(stepsize);
+	absstepsize = (n >= 0) ? (absstepsize << n) : (absstepsize >> (-n));
+	return absstepsize;
+}
+
+static void jpc_dequantize(jas_matrix_t *x, jpc_fix_t absstepsize)
+{
+	int i;
+	int j;
+	int t;
+
+	assert(absstepsize >= 0);
+	if (absstepsize == jpc_inttofix(1)) {
+		return;
+	}
+
+	for (i = 0; i < jas_matrix_numrows(x); ++i) {
+		for (j = 0; j < jas_matrix_numcols(x); ++j) {
+			t = jas_matrix_get(x, i, j);
+			if (t) {
+				t = jpc_fix_mul(t, absstepsize);
+			} else {
+				t = 0;
+			}
+			jas_matrix_set(x, i, j, t);
+		}
+	}
+
+}
+
+static void jpc_undo_roi(jas_matrix_t *x, int roishift, int bgshift, int numbps)
+{
+	int i;
+	int j;
+	int thresh;
+	jpc_fix_t val;
+	jpc_fix_t mag;
+	bool warn;
+	uint_fast32_t mask;
+
+	if (roishift == 0 && bgshift == 0) {
+		return;
+	}
+	thresh = 1 << roishift;
+
+	warn = false;
+	for (i = 0; i < jas_matrix_numrows(x); ++i) {
+		for (j = 0; j < jas_matrix_numcols(x); ++j) {
+			val = jas_matrix_get(x, i, j);
+			mag = JAS_ABS(val);
+			if (mag >= thresh) {
+				/* We are dealing with ROI data. */
+				mag >>= roishift;
+				val = (val < 0) ? (-mag) : mag;
+				jas_matrix_set(x, i, j, val);
+			} else {
+				/* We are dealing with non-ROI (i.e., background) data. */
+				mag <<= bgshift;
+				mask = (1 << numbps) - 1;
+				/* Perform a basic sanity check on the sample value. */
+				/* Some implementations write garbage in the unused
+				  most-significant bit planes introduced by ROI shifting.
+				  Here we ensure that any such bits are masked off. */
+				if (mag & (~mask)) {
+					if (!warn) {
+						jas_eprintf("warning: possibly corrupt code stream\n");
+						warn = true;
+					}
+					mag &= mask;
+				}
+				val = (val < 0) ? (-mag) : mag;
+				jas_matrix_set(x, i, j, val);
+			}
+		}
+	}
+}
+
+static jpc_dec_t *jpc_dec_create(jpc_dec_importopts_t *impopts, jas_stream_t *in)
+{
+	jpc_dec_t *dec;
+
+	if (!(dec = jas_malloc(sizeof(jpc_dec_t)))) {
+		return 0;
+	}
+
+	dec->image = 0;
+	dec->xstart = 0;
+	dec->ystart = 0;
+	dec->xend = 0;
+	dec->yend = 0;
+	dec->tilewidth = 0;
+	dec->tileheight = 0;
+	dec->tilexoff = 0;
+	dec->tileyoff = 0;
+	dec->numhtiles = 0;
+	dec->numvtiles = 0;
+	dec->numtiles = 0;
+	dec->tiles = 0;
+	dec->curtile = 0;
+	dec->numcomps = 0;
+	dec->in = in;
+	dec->cp = 0;
+	dec->maxlyrs = impopts->maxlyrs;
+	dec->maxpkts = impopts->maxpkts;
+dec->numpkts = 0;
+	dec->ppmseqno = 0;
+	dec->state = 0;
+	dec->cmpts = 0;
+	dec->pkthdrstreams = 0;
+	dec->ppmstab = 0;
+	dec->curtileendoff = 0;
+
+	return dec;
+}
+
+static void jpc_dec_destroy(jpc_dec_t *dec)
+{
+	if (dec->cstate) {
+		jpc_cstate_destroy(dec->cstate);
+	}
+	if (dec->pkthdrstreams) {
+		jpc_streamlist_destroy(dec->pkthdrstreams);
+	}
+	if (dec->image) {
+		jas_image_destroy(dec->image);
+	}
+
+	if (dec->cp) {
+		jpc_dec_cp_destroy(dec->cp);
+	}
+
+	if (dec->cmpts) {
+		jas_free(dec->cmpts);
+	}
+
+	if (dec->tiles) {
+		jas_free(dec->tiles);
+	}
+
+	jas_free(dec);
+}
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+void jpc_seglist_insert(jpc_dec_seglist_t *list, jpc_dec_seg_t *ins, jpc_dec_seg_t *node)
+{
+	jpc_dec_seg_t *prev;
+	jpc_dec_seg_t *next;
+
+	prev = ins;
+	node->prev = prev;
+	next = prev ? (prev->next) : 0;
+	node->prev = prev;
+	node->next = next;
+	if (prev) {
+		prev->next = node;
+	} else {
+		list->head = node;
+	}
+	if (next) {
+		next->prev = node;
+	} else {
+		list->tail = node;
+	}
+}
+
+void jpc_seglist_remove(jpc_dec_seglist_t *list, jpc_dec_seg_t *seg)
+{
+	jpc_dec_seg_t *prev;
+	jpc_dec_seg_t *next;
+
+	prev = seg->prev;
+	next = seg->next;
+	if (prev) {
+		prev->next = next;
+	} else {
+		list->head = next;
+	}
+	if (next) {
+		next->prev = prev;
+	} else {
+		list->tail = prev;
+	}
+	seg->prev = 0;
+	seg->next = 0;
+}
+
+jpc_dec_seg_t *jpc_seg_alloc()
+{
+	jpc_dec_seg_t *seg;
+
+	if (!(seg = jas_malloc(sizeof(jpc_dec_seg_t)))) {
+		return 0;
+	}
+	seg->prev = 0;
+	seg->next = 0;
+	seg->passno = -1;
+	seg->numpasses = 0;
+	seg->maxpasses = 0;
+	seg->type = JPC_SEG_INVALID;
+	seg->stream = 0;
+	seg->cnt = 0;
+	seg->complete = 0;
+	seg->lyrno = -1;
+	return seg;
+}
+
+void jpc_seg_destroy(jpc_dec_seg_t *seg)
+{
+	if (seg->stream) {
+		jas_stream_close(seg->stream);
+	}
+	jas_free(seg);
+}
+
+static int jpc_dec_dump(jpc_dec_t *dec, FILE *out)
+{
+	jpc_dec_tile_t *tile;
+	int tileno;
+	jpc_dec_tcomp_t *tcomp;
+	int compno;
+	jpc_dec_rlvl_t *rlvl;
+	int rlvlno;
+	jpc_dec_band_t *band;
+	int bandno;
+	jpc_dec_prc_t *prc;
+	int prcno;
+	jpc_dec_cblk_t *cblk;
+	int cblkno;
+
+	for (tileno = 0, tile = dec->tiles; tileno < dec->numtiles;
+	  ++tileno, ++tile) {
+		for (compno = 0, tcomp = tile->tcomps; compno < dec->numcomps;
+		  ++compno, ++tcomp) {
+			for (rlvlno = 0, rlvl = tcomp->rlvls; rlvlno <
+			  tcomp->numrlvls; ++rlvlno, ++rlvl) {
+fprintf(out, "RESOLUTION LEVEL %d\n", rlvlno);
+fprintf(out, "xs =%d, ys = %d, xe = %d, ye = %d, w = %d, h = %d\n",
+  rlvl->xstart, rlvl->ystart, rlvl->xend, rlvl->yend, rlvl->xend -
+  rlvl->xstart, rlvl->yend - rlvl->ystart);
+				for (bandno = 0, band = rlvl->bands;
+				  bandno < rlvl->numbands; ++bandno, ++band) {
+fprintf(out, "BAND %d\n", bandno);
+fprintf(out, "xs =%d, ys = %d, xe = %d, ye = %d, w = %d, h = %d\n",
+  jas_seq2d_xstart(band->data), jas_seq2d_ystart(band->data), jas_seq2d_xend(band->data),
+  jas_seq2d_yend(band->data), jas_seq2d_xend(band->data) - jas_seq2d_xstart(band->data),
+  jas_seq2d_yend(band->data) - jas_seq2d_ystart(band->data));
+					for (prcno = 0, prc = band->prcs;
+					  prcno < rlvl->numprcs; ++prcno,
+					  ++prc) {
+fprintf(out, "CODE BLOCK GROUP %d\n", prcno);
+fprintf(out, "xs =%d, ys = %d, xe = %d, ye = %d, w = %d, h = %d\n",
+  prc->xstart, prc->ystart, prc->xend, prc->yend, prc->xend -
+  prc->xstart, prc->yend - prc->ystart);
+						for (cblkno = 0, cblk =
+						  prc->cblks; cblkno <
+						  prc->numcblks; ++cblkno,
+						  ++cblk) {
+fprintf(out, "CODE BLOCK %d\n", cblkno);
+fprintf(out, "xs =%d, ys = %d, xe = %d, ye = %d, w = %d, h = %d\n",
+  jas_seq2d_xstart(cblk->data), jas_seq2d_ystart(cblk->data), jas_seq2d_xend(cblk->data),
+  jas_seq2d_yend(cblk->data), jas_seq2d_xend(cblk->data) - jas_seq2d_xstart(cblk->data),
+  jas_seq2d_yend(cblk->data) - jas_seq2d_ystart(cblk->data));
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+jpc_streamlist_t *jpc_streamlist_create()
+{
+	jpc_streamlist_t *streamlist;
+	int i;
+
+	if (!(streamlist = jas_malloc(sizeof(jpc_streamlist_t)))) {
+		return 0;
+	}
+	streamlist->numstreams = 0;
+	streamlist->maxstreams = 100;
+	if (!(streamlist->streams = jas_malloc(streamlist->maxstreams *
+	  sizeof(jas_stream_t *)))) {
+		jas_free(streamlist);
+		return 0;
+	}
+	for (i = 0; i < streamlist->maxstreams; ++i) {
+		streamlist->streams[i] = 0;
+	}
+	return streamlist;
+}
+
+int jpc_streamlist_insert(jpc_streamlist_t *streamlist, int streamno,
+  jas_stream_t *stream)
+{
+	jas_stream_t **newstreams;
+	int newmaxstreams;
+	int i;
+	/* Grow the array of streams if necessary. */
+	if (streamlist->numstreams >= streamlist->maxstreams) {
+		newmaxstreams = streamlist->maxstreams + 1024;
+		if (!(newstreams = jas_realloc(streamlist->streams,
+		  (newmaxstreams + 1024) * sizeof(jas_stream_t *)))) {
+			return -1;
+		}
+		for (i = streamlist->numstreams; i < streamlist->maxstreams; ++i) {
+			streamlist->streams[i] = 0;
+		}
+		streamlist->maxstreams = newmaxstreams;
+		streamlist->streams = newstreams;
+	}
+	if (streamno != streamlist->numstreams) {
+		/* Can only handle insertion at start of list. */
+		return -1;
+	}
+	streamlist->streams[streamno] = stream;
+	++streamlist->numstreams;
+	return 0;
+}
+
+jas_stream_t *jpc_streamlist_remove(jpc_streamlist_t *streamlist, int streamno)
+{
+	jas_stream_t *stream;
+	int i;
+	if (streamno >= streamlist->numstreams) {
+		abort();
+	}
+	stream = streamlist->streams[streamno];
+	for (i = streamno + 1; i < streamlist->numstreams; ++i) {
+		streamlist->streams[i - 1] = streamlist->streams[i];
+	}
+	--streamlist->numstreams;
+	return stream;
+}
+
+void jpc_streamlist_destroy(jpc_streamlist_t *streamlist)
+{
+	int streamno;
+	if (streamlist->streams) {
+		for (streamno = 0; streamno < streamlist->numstreams;
+		  ++streamno) {
+			jas_stream_close(streamlist->streams[streamno]);
+		}
+		jas_free(streamlist->streams);
+	}
+	jas_free(streamlist);
+}
+
+jas_stream_t *jpc_streamlist_get(jpc_streamlist_t *streamlist, int streamno)
+{
+	assert(streamno < streamlist->numstreams);
+	return streamlist->streams[streamno];
+}
+
+int jpc_streamlist_numstreams(jpc_streamlist_t *streamlist)
+{
+	return streamlist->numstreams;
+}
+
+jpc_ppxstab_t *jpc_ppxstab_create()
+{
+	jpc_ppxstab_t *tab;
+
+	if (!(tab = jas_malloc(sizeof(jpc_ppxstab_t)))) {
+		return 0;
+	}
+	tab->numents = 0;
+	tab->maxents = 0;
+	tab->ents = 0;
+	return tab;
+}
+
+void jpc_ppxstab_destroy(jpc_ppxstab_t *tab)
+{
+	int i;
+	for (i = 0; i < tab->numents; ++i) {
+		jpc_ppxstabent_destroy(tab->ents[i]);
+	}
+	if (tab->ents) {
+		jas_free(tab->ents);
+	}
+	jas_free(tab);
+}
+
+int jpc_ppxstab_grow(jpc_ppxstab_t *tab, int maxents)
+{
+	jpc_ppxstabent_t **newents;
+	if (tab->maxents < maxents) {
+		newents = (tab->ents) ? jas_realloc(tab->ents, maxents *
+		  sizeof(jpc_ppxstabent_t *)) : jas_malloc(maxents * sizeof(jpc_ppxstabent_t *));
+		if (!newents) {
+			return -1;
+		}
+		tab->ents = newents;
+		tab->maxents = maxents;
+	}
+	return 0;
+}
+
+int jpc_ppxstab_insert(jpc_ppxstab_t *tab, jpc_ppxstabent_t *ent)
+{
+	int inspt;
+	int i;
+
+	for (i = 0; i < tab->numents; ++i) {
+		if (tab->ents[i]->ind > ent->ind) {
+			break;
+		}
+	}
+	inspt = i;
+
+	if (tab->numents >= tab->maxents) {
+		if (jpc_ppxstab_grow(tab, tab->maxents + 128)) {
+			return -1;
+		}
+	}
+
+	for (i = tab->numents; i > inspt; --i) {
+		tab->ents[i] = tab->ents[i - 1];
+	}
+	tab->ents[i] = ent;
+	++tab->numents;
+
+	return 0;
+}
+
+jpc_streamlist_t *jpc_ppmstabtostreams(jpc_ppxstab_t *tab)
+{
+	jpc_streamlist_t *streams;
+	uchar *dataptr;
+	uint_fast32_t datacnt;
+	uint_fast32_t tpcnt;
+	jpc_ppxstabent_t *ent;
+	int entno;
+	jas_stream_t *stream;
+	int n;
+
+	if (!(streams = jpc_streamlist_create())) {
+		goto error;
+	}
+
+	if (!tab->numents) {
+		return streams;
+	}
+
+	entno = 0;
+	ent = tab->ents[entno];
+	dataptr = ent->data;
+	datacnt = ent->len;
+	for (;;) {
+
+		/* Get the length of the packet header data for the current
+		  tile-part. */
+		if (datacnt < 4) {
+			goto error;
+		}
+		if (!(stream = jas_stream_memopen(0, 0))) {
+			goto error;
+		}
+		if (jpc_streamlist_insert(streams, jpc_streamlist_numstreams(streams),
+		  stream)) {
+			goto error;
+		}
+		tpcnt = (dataptr[0] << 24) | (dataptr[1] << 16) | (dataptr[2] << 8)
+		  | dataptr[3];
+		datacnt -= 4;
+		dataptr += 4;
+
+		/* Get the packet header data for the current tile-part. */
+		while (tpcnt) {
+			if (!datacnt) {
+				if (++entno >= tab->numents) {
+					goto error;
+				}
+				ent = tab->ents[entno];
+				dataptr = ent->data;
+				datacnt = ent->len;
+			}
+			n = JAS_MIN(tpcnt, datacnt);
+			if (jas_stream_write(stream, dataptr, n) != n) {
+				goto error;
+			}
+			tpcnt -= n;
+			dataptr += n;
+			datacnt -= n;
+		}
+		jas_stream_rewind(stream);
+		if (!datacnt) {
+			if (++entno >= tab->numents) {
+				break;
+			}
+			ent = tab->ents[entno];
+			dataptr = ent->data;
+			datacnt = ent->len;
+		}
+	}
+
+	return streams;
+
+error:
+	jpc_streamlist_destroy(streams);
+	return 0;
+}
+
+int jpc_pptstabwrite(jas_stream_t *out, jpc_ppxstab_t *tab)
+{
+	int i;
+	jpc_ppxstabent_t *ent;
+	for (i = 0; i < tab->numents; ++i) {
+		ent = tab->ents[i];
+		if (jas_stream_write(out, ent->data, ent->len) != JAS_CAST(int, ent->len)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+jpc_ppxstabent_t *jpc_ppxstabent_create()
+{
+	jpc_ppxstabent_t *ent;
+	if (!(ent = jas_malloc(sizeof(jpc_ppxstabent_t)))) {
+		return 0;
+	}
+	ent->data = 0;
+	ent->len = 0;
+	ent->ind = 0;
+	return ent;
+}
+
+void jpc_ppxstabent_destroy(jpc_ppxstabent_t *ent)
+{
+	if (ent->data) {
+		jas_free(ent->data);
+	}
+	jas_free(ent);
+}
diff --git a/src/libjasper/jpc/jpc_dec.h b/src/libjasper/jpc/jpc_dec.h
new file mode 100644
index 0000000..20e114a
--- /dev/null
+++ b/src/libjasper/jpc/jpc_dec.h
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * JPEG-2000 Decoder
+ *
+ * $Id: jpc_dec.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_DEC_H
+#define JPC_DEC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_stream.h"
+
+#include "jpc_tsfb.h"
+#include "jpc_bs.h"
+#include "jpc_tagtree.h"
+#include "jpc_cs.h"
+#include "jpc_cod.h"
+#include "jpc_mqdec.h"
+#include "jpc_t2cod.h"
+
+/******************************************************************************\
+* Below are some ugly warts necessary to support packed packet headers.
+\******************************************************************************/
+
+/* PPM/PPT marker segment table entry. */
+
+typedef struct {
+
+	/* The index for this entry. */
+	uint_fast16_t ind;
+
+	/* The data length. */
+	uint_fast32_t len;
+
+	/* The data. */
+	uchar *data;
+
+} jpc_ppxstabent_t;
+
+/* PPM/PPT marker segment table. */
+
+typedef struct {
+
+	/* The number of entries. */
+	int numents;
+
+	/* The maximum number of entries (i.e., the allocated size of the array
+	  below). */
+	int maxents;
+
+	/* The table entries. */
+	jpc_ppxstabent_t **ents;
+
+} jpc_ppxstab_t;
+
+/* Stream list class. */
+
+typedef struct {
+
+	/* The number of streams in this list. */
+	int numstreams;
+
+	/* The maximum number of streams that can be accomodated without
+	  growing the streams array. */
+	int maxstreams;
+
+	/* The streams. */
+	jas_stream_t **streams;
+
+} jpc_streamlist_t;
+
+/******************************************************************************\
+* Coding parameters class.
+\******************************************************************************/
+
+/* Per-component coding parameters. */
+
+typedef struct {
+
+	/* How were various coding parameters set? */
+	int flags;
+
+	/* Per-component coding style parameters (e.g., explicit precinct sizes) */
+	uint_fast8_t csty;
+
+	/* The number of resolution levels. */
+	uint_fast8_t numrlvls;
+
+	/* The code block width exponent. */
+	uint_fast8_t cblkwidthexpn;
+
+	/* The code block height exponent. */
+	uint_fast8_t cblkheightexpn;
+
+	/* The QMFB ID. */
+	uint_fast8_t qmfbid;
+
+	/* The quantization style. */
+	uint_fast8_t qsty;
+
+	/* The number of quantizer step sizes. */
+	uint_fast16_t numstepsizes;
+
+	/* The step sizes. */
+	uint_fast16_t stepsizes[3 * JPC_MAXRLVLS + 1];
+
+	/* The number of guard bits. */
+	uint_fast8_t numguardbits;
+
+	/* The ROI shift value. */
+	uint_fast8_t roishift;
+
+	/* The code block parameters. */
+	uint_fast8_t cblkctx;
+
+	/* The precinct width exponents. */
+	uint_fast8_t prcwidthexpns[JPC_MAXRLVLS];
+
+	/* The precinct height exponents. */
+	uint_fast8_t prcheightexpns[JPC_MAXRLVLS];
+
+} jpc_dec_ccp_t;
+
+/* Coding paramters. */
+
+typedef struct {
+
+	/* How were these coding parameters set? */
+	int flags;
+
+	/* Progression change list. */
+	jpc_pchglist_t *pchglist;
+
+	/* Progression order. */
+	uint_fast8_t prgord;
+
+	/* The number of layers. */
+	uint_fast16_t numlyrs;
+
+	/* The MCT ID. */
+	uint_fast8_t mctid;
+
+	/* The coding style parameters (e.g., SOP, EPH). */
+	uint_fast8_t csty;
+
+	/* The number of components. */
+	int numcomps;
+
+	/* The per-component coding parameters. */
+	jpc_dec_ccp_t *ccps;
+
+} jpc_dec_cp_t;
+
+/******************************************************************************\
+* Decoder class.
+\******************************************************************************/
+
+/* Decoder per-segment state information. */
+
+typedef struct jpc_dec_seg_s {
+
+	/* The next segment in the list. */
+	struct jpc_dec_seg_s *next;
+
+	/* The previous segment in the list. */
+	struct jpc_dec_seg_s *prev;
+
+	/* The starting pass number for this segment. */
+	int passno;
+
+	/* The number of passes in this segment. */
+	int numpasses;
+
+	/* The maximum number of passes in this segment. */
+	int maxpasses;
+
+	/* The type of data in this segment (i.e., MQ or raw). */
+	int type;
+
+	/* A stream containing the data for this segment. */
+	jas_stream_t *stream;
+
+	/* The number of bytes destined for this segment from the packet
+	  currently being decoded. */
+	int cnt;
+
+	/* A flag indicating if this segment has been terminated. */
+	int complete;
+
+	/* The layer number to which this segment belongs. */
+	/* If the segment spans multiple layers, then the largest layer number
+	  spanned by the segment is used. */
+	int lyrno;
+
+} jpc_dec_seg_t;
+
+/* Decoder segment list. */
+
+typedef struct {
+
+	/* The first entry in the list. */
+	jpc_dec_seg_t *head;
+
+	/* The last entry in the list. */
+	jpc_dec_seg_t *tail;
+
+} jpc_dec_seglist_t;
+
+/* Decoder per-code-block state information. */
+
+typedef struct {
+
+	/* The number of passes. */
+	int numpasses;
+
+	/* A list of segments that still need to be decoded. */
+	jpc_dec_seglist_t segs;
+
+	/* The first incomplete/partial segment. */
+	jpc_dec_seg_t *curseg;
+
+	/* The number of leading insignificant bit planes for this code block. */
+	int numimsbs;
+
+	/* The number of bits used to encode pass data lengths. */
+	int numlenbits;
+
+	/* The first pass number containing data for this code block. */
+	int firstpassno;
+
+	/* The MQ decoder. */
+	jpc_mqdec_t *mqdec;
+
+	/* The raw bit stream decoder. */
+	jpc_bitstream_t *nulldec;
+
+	/* The per-sample state information for this code block. */
+	jas_matrix_t *flags;
+
+	/* The sample data associated with this code block. */
+	jas_matrix_t *data;
+
+} jpc_dec_cblk_t;
+
+/* Decoder per-code-block-group state information. */
+
+typedef struct {
+
+	/* The x-coordinate of the top-left corner of the precinct. */
+	uint_fast32_t xstart;
+
+	/* The y-coordinate of the top-left corner of the precinct. */
+	uint_fast32_t ystart;
+
+	/* The x-coordinate of the bottom-right corner of the precinct
+	  (plus one). */
+	uint_fast32_t xend;
+
+	/* The y-coordinate of the bottom-right corner of the precinct
+	  (plus one). */
+	uint_fast32_t yend;
+
+	/* The number of code blocks spanning this precinct in the horizontal
+	  direction. */
+	int numhcblks;
+
+	/* The number of code blocks spanning this precinct in the vertical
+	  direction. */
+	int numvcblks;
+
+	/* The total number of code blocks in this precinct. */
+	int numcblks;
+
+	/* The per code block information. */
+	jpc_dec_cblk_t *cblks;
+
+	/* The inclusion tag tree. */
+	jpc_tagtree_t *incltagtree;
+
+	/* The insignificant MSBs tag tree. */
+	jpc_tagtree_t *numimsbstagtree;
+
+} jpc_dec_prc_t;
+
+/* Decoder per-band state information. */
+
+typedef struct {
+
+	/* The per-code-block-group state information. */
+	jpc_dec_prc_t *prcs;
+
+	/* The sample data associated with this band. */
+	jas_matrix_t *data;
+
+	/* The orientation of this band (i.e., LL, LH, HL, or HH). */
+	int orient;
+
+	/* The encoded quantizer step size. */
+	int stepsize;
+
+	/* The absolute quantizer step size. */
+	jpc_fix_t absstepsize;
+
+	/* The number of bit planes for this band. */
+	int numbps;
+
+	/* The analysis gain associated with this band. */
+	int analgain;
+
+	/* The ROI shift value for this band. */
+	int roishift;
+
+} jpc_dec_band_t;
+
+/* Decoder per-resolution-level state information. */
+
+typedef struct {
+
+	/* The number of bands associated with this resolution level. */
+	int numbands;
+
+	/* The per-band information. */
+	jpc_dec_band_t *bands;
+
+	/* The x-coordinate of the top-left corner of the tile-component
+	  at this resolution. */
+	uint_fast32_t xstart;
+
+	/* The y-coordinate of the top-left corner of the tile-component
+	  at this resolution. */
+	uint_fast32_t ystart;
+
+	/* The x-coordinate of the bottom-right corner of the tile-component
+	  at this resolution (plus one). */
+	uint_fast32_t xend;
+
+	/* The y-coordinate of the bottom-right corner of the tile-component
+	  at this resolution (plus one). */
+	uint_fast32_t yend;
+
+	/* The exponent value for the nominal precinct width measured
+	  relative to the associated LL band. */
+	int prcwidthexpn;
+
+	/* The exponent value for the nominal precinct height measured
+	  relative to the associated LL band. */
+	int prcheightexpn;
+
+	/* The number of precincts in the horizontal direction. */
+	int numhprcs;
+
+	/* The number of precincts in the vertical direction. */
+	int numvprcs;
+
+	/* The total number of precincts. */
+	int numprcs;
+
+	/* The exponent value for the nominal code block group width.
+	  This quantity is associated with the next lower resolution level
+	  (assuming that there is one). */
+	int cbgwidthexpn;
+
+	/* The exponent value for the nominal code block group height
+	  This quantity is associated with the next lower resolution level
+	  (assuming that there is one). */
+	int cbgheightexpn;
+
+	/* The exponent value for the code block width. */
+	uint_fast16_t cblkwidthexpn;
+
+	/* The exponent value for the code block height. */
+	uint_fast16_t cblkheightexpn;
+
+} jpc_dec_rlvl_t;
+
+/* Decoder per-tile-component state information. */
+
+typedef struct {
+
+	/* The x-coordinate of the top-left corner of the tile-component
+	  in the coordinate system of the tile-component. */
+	uint_fast32_t xstart;
+
+	/* The y-coordinate of the top-left corner of the tile-component
+	  in the coordinate system of the tile-component. */
+	uint_fast32_t ystart;
+
+	/* The x-coordinate of the bottom-right corner of the tile-component
+	  in the coordinate system of the tile-component (plus one). */
+	uint_fast32_t xend;
+
+	/* The y-coordinate of the bottom-right corner of the tile-component
+	  in the coordinate system of the tile-component (plus one). */
+	uint_fast32_t yend;
+
+	/* The component data for the current tile. */
+	jas_matrix_t *data;
+
+	/* The number of resolution levels. */
+	int numrlvls;
+
+	/* The per resolution level information. */
+	jpc_dec_rlvl_t *rlvls;
+
+	/* The TSFB. */
+	jpc_tsfb_t *tsfb;
+
+} jpc_dec_tcomp_t;
+
+/*
+ * Tile states.
+ */
+
+#define	JPC_TILE_INIT	0
+#define	JPC_TILE_ACTIVE	1
+#define	JPC_TILE_ACTIVELAST	2
+#define	JPC_TILE_DONE	3
+
+/* Decoder per-tile state information. */
+
+typedef struct {
+
+	/* The processing state for this tile. */
+	int state;
+
+	/* The x-coordinate of the top-left corner of the tile on the reference
+	  grid. */
+	uint_fast32_t xstart;
+
+	/* The y-coordinate of the top-left corner of the tile on the reference
+	  grid. */
+	uint_fast32_t ystart;
+
+	/* The x-coordinate of the bottom-right corner of the tile on the
+	  reference grid (plus one). */
+	uint_fast32_t xend;
+
+	/* The y-coordinate of the bottom-right corner of the tile on the
+	  reference grid (plus one). */
+	uint_fast32_t yend;
+
+	/* The packed packet header data for this tile. */
+	jpc_ppxstab_t *pptstab;
+
+	/* A stream containing the packed packet header data for this tile. */
+	jas_stream_t *pkthdrstream;
+
+	/* The current position within the packed packet header stream. */
+	long pkthdrstreampos;
+
+	/* The coding parameters for this tile. */
+	jpc_dec_cp_t *cp;
+
+	/* The per tile-component information. */
+	jpc_dec_tcomp_t *tcomps;
+
+	/* The next expected tile-part number. */
+	int partno;
+
+	/* The number of tile-parts. */
+	int numparts;
+
+	/* The coding mode. */
+	int realmode;
+
+	/* The packet iterator for this tile. */
+	jpc_pi_t *pi;
+
+} jpc_dec_tile_t;
+
+/* Decoder per-component state information. */
+
+typedef struct {
+
+	/* The horizontal sampling period. */
+	uint_fast32_t hstep;
+
+	/* The vertical sampling period. */
+	uint_fast32_t vstep;
+
+	/* The number of samples in the horizontal direction. */
+	uint_fast32_t width;
+
+	/* The number of samples in the vertical direction. */
+	uint_fast32_t height;
+
+	/* The precision of the sample data. */
+	uint_fast16_t prec;
+
+	/* The signedness of the sample data. */
+	bool sgnd;
+
+	/* The sample alignment horizontal offset. */
+	uint_fast32_t hsubstep;
+	
+	/* The sample alignment vertical offset. */
+	uint_fast32_t vsubstep;
+
+} jpc_dec_cmpt_t;
+
+/* Decoder state information. */
+
+typedef struct {
+
+	/* The decoded image. */
+	jas_image_t *image;
+
+	/* The x-coordinate of the top-left corner of the image area on
+	  the reference grid. */
+	uint_fast32_t xstart;
+
+	/* The y-coordinate of the top-left corner of the image area on
+	  the reference grid. */
+	uint_fast32_t ystart;
+
+	/* The x-coordinate of the bottom-right corner of the image area on
+	  the reference grid (plus one). */
+	uint_fast32_t xend;
+
+	/* The y-coordinate of the bottom-right corner of the image area on
+	  the reference grid (plus one). */
+	uint_fast32_t yend;
+
+	/* The nominal tile width in units of the image reference grid. */
+	uint_fast32_t tilewidth;
+
+	/* The nominal tile height in units of the image reference grid. */
+	uint_fast32_t tileheight;
+
+	/* The horizontal offset from the origin of the reference grid to the
+	  left side of the first tile. */
+	uint_fast32_t tilexoff;
+
+	/* The vertical offset from the origin of the reference grid to the
+	  top side of the first tile. */
+	uint_fast32_t tileyoff;
+
+	/* The number of tiles spanning the image area in the vertical
+	  direction. */
+	int numhtiles;
+
+	/* The number of tiles spanning the image area in the horizontal
+	  direction. */
+	int numvtiles;
+
+	/* The total number of tiles. */
+	int numtiles;
+
+	/* The per-tile information. */
+	jpc_dec_tile_t *tiles;
+
+	/* The tile currently being processed. */
+	jpc_dec_tile_t *curtile;
+
+	/* The number of components. */
+	int numcomps;
+
+	/* The stream containing the input JPEG-2000 code stream data. */
+	jas_stream_t *in;
+
+	/* The default coding parameters for all tiles. */
+	jpc_dec_cp_t *cp;
+
+	/* The maximum number of layers that may be decoded. */
+	int maxlyrs;
+
+	/* The maximum number of packets that may be decoded. */
+	int maxpkts;
+
+	/* The number of packets decoded so far in the processing of the entire
+	  code stream. */
+	int numpkts;
+
+	/* The next expected PPM marker segment sequence number. */
+	int ppmseqno;
+
+	/* The current state for code stream processing. */
+	int state;
+
+	/* The per-component information. */
+	jpc_dec_cmpt_t *cmpts;
+
+	/* The information from PPM marker segments. */
+	jpc_ppxstab_t *ppmstab;
+
+	/* A list of streams containing packet header data from PPM marker
+	  segments. */
+	jpc_streamlist_t *pkthdrstreams;
+
+	/* The expected ending offset for a tile-part. */
+	long curtileendoff;
+
+	/* This is required by the tier-2 decoder. */
+	jpc_cstate_t *cstate;
+
+} jpc_dec_t;
+
+/* Decoder options. */
+
+typedef struct {
+
+	/* The debug level for the decoder. */
+	int debug;
+
+	/* The maximum number of layers to decode. */
+	int maxlyrs;
+
+	/* The maximum number of packets to decode. */
+	int maxpkts;
+
+} jpc_dec_importopts_t;
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Create a decoder segment object. */
+jpc_dec_seg_t *jpc_seg_alloc(void);
+
+/* Destroy a decoder segment object. */
+void jpc_seg_destroy(jpc_dec_seg_t *seg);
+
+/* Remove a segment from a segment list. */
+void jpc_seglist_remove(jpc_dec_seglist_t *list, jpc_dec_seg_t *node);
+
+/* Insert a segment into a segment list. */
+void jpc_seglist_insert(jpc_dec_seglist_t *list, jpc_dec_seg_t *ins,
+  jpc_dec_seg_t *node);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_enc.c b/src/libjasper/jpc/jpc_enc.c
new file mode 100644
index 0000000..95afad2
--- /dev/null
+++ b/src/libjasper/jpc/jpc_enc.c
@@ -0,0 +1,2626 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_enc.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_string.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_image.h"
+#include "jasper/jas_fix.h"
+#include "jasper/jas_tvp.h"
+#include "jasper/jas_version.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_flt.h"
+#include "jpc_fix.h"
+#include "jpc_tagtree.h"
+#include "jpc_enc.h"
+#include "jpc_cs.h"
+#include "jpc_mct.h"
+#include "jpc_tsfb.h"
+#include "jpc_qmfb.h"
+#include "jpc_t1enc.h"
+#include "jpc_t2enc.h"
+#include "jpc_cod.h"
+#include "jpc_math.h"
+#include "jpc_util.h"
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+#define JPC_POW2(n) \
+	(1 << (n))
+
+#define JPC_FLOORTOMULTPOW2(x, n) \
+  (((n) > 0) ? ((x) & (~((1 << n) - 1))) : (x))
+/* Round to the nearest multiple of the specified power of two in the
+  direction of negative infinity. */
+
+#define	JPC_CEILTOMULTPOW2(x, n) \
+  (((n) > 0) ? JPC_FLOORTOMULTPOW2(((x) + (1 << (n)) - 1), n) : (x))
+/* Round to the nearest multiple of the specified power of two in the
+  direction of positive infinity. */
+
+#define	JPC_POW2(n)	\
+  (1 << (n))
+
+jpc_enc_tile_t *jpc_enc_tile_create(jpc_enc_cp_t *cp, jas_image_t *image, int tileno);
+void jpc_enc_tile_destroy(jpc_enc_tile_t *tile);
+
+static jpc_enc_tcmpt_t *tcmpt_create(jpc_enc_tcmpt_t *tcmpt, jpc_enc_cp_t *cp,
+  jas_image_t *image, jpc_enc_tile_t *tile);
+static void tcmpt_destroy(jpc_enc_tcmpt_t *tcmpt);
+static jpc_enc_rlvl_t *rlvl_create(jpc_enc_rlvl_t *rlvl, jpc_enc_cp_t *cp,
+  jpc_enc_tcmpt_t *tcmpt, jpc_tsfb_band_t *bandinfos);
+static void rlvl_destroy(jpc_enc_rlvl_t *rlvl);
+static jpc_enc_band_t *band_create(jpc_enc_band_t *band, jpc_enc_cp_t *cp,
+  jpc_enc_rlvl_t *rlvl, jpc_tsfb_band_t *bandinfos);
+static void band_destroy(jpc_enc_band_t *bands);
+static jpc_enc_prc_t *prc_create(jpc_enc_prc_t *prc, jpc_enc_cp_t *cp,
+  jpc_enc_band_t *band);
+static void prc_destroy(jpc_enc_prc_t *prcs);
+static jpc_enc_cblk_t *cblk_create(jpc_enc_cblk_t *cblk, jpc_enc_cp_t *cp,
+  jpc_enc_prc_t *prc);
+static void cblk_destroy(jpc_enc_cblk_t *cblks);
+int ratestrtosize(char *s, uint_fast32_t rawsize, uint_fast32_t *size);
+static void pass_destroy(jpc_enc_pass_t *pass);
+void jpc_enc_dump(jpc_enc_t *enc);
+
+/******************************************************************************\
+* Local prototypes.
+\******************************************************************************/
+
+int dump_passes(jpc_enc_pass_t *passes, int numpasses, jpc_enc_cblk_t *cblk);
+void calcrdslopes(jpc_enc_cblk_t *cblk);
+void dump_layeringinfo(jpc_enc_t *enc);
+static int jpc_calcssexp(jpc_fix_t stepsize);
+static int jpc_calcssmant(jpc_fix_t stepsize);
+void jpc_quantize(jas_matrix_t *data, jpc_fix_t stepsize);
+static int jpc_enc_encodemainhdr(jpc_enc_t *enc);
+static int jpc_enc_encodemainbody(jpc_enc_t *enc);
+int jpc_enc_encodetiledata(jpc_enc_t *enc);
+jpc_enc_t *jpc_enc_create(jpc_enc_cp_t *cp, jas_stream_t *out, jas_image_t *image);
+void jpc_enc_destroy(jpc_enc_t *enc);
+static int jpc_enc_encodemainhdr(jpc_enc_t *enc);
+static int jpc_enc_encodemainbody(jpc_enc_t *enc);
+int jpc_enc_encodetiledata(jpc_enc_t *enc);
+int rateallocate(jpc_enc_t *enc, int numlyrs, uint_fast32_t *cumlens);
+int setins(int numvalues, jpc_flt_t *values, jpc_flt_t value);
+static jpc_enc_cp_t *cp_create(char *optstr, jas_image_t *image);
+void jpc_enc_cp_destroy(jpc_enc_cp_t *cp);
+static uint_fast32_t jpc_abstorelstepsize(jpc_fix_t absdelta, int scaleexpn);
+
+#define	GJPC_QCX_EXPN(x)		(((x) & (~0x1f)), (((x) & 0x1f) << 11))
+#define	GJPC_QCX_MANT(x)		(((x) & (~0x7ff)), ((x) & 0x7ff))
+
+static uint_fast32_t jpc_abstorelstepsize(jpc_fix_t absdelta, int scaleexpn)
+{
+	int p;
+	uint_fast32_t mant;
+	uint_fast32_t expn;
+	int n;
+
+	if (absdelta < 0) {
+		abort();
+	}
+
+	p = jpc_firstone(absdelta) - JPC_FIX_FRACBITS;
+	n = 11 - jpc_firstone(absdelta);
+	mant = ((n < 0) ? (absdelta >> (-n)) : (absdelta << n)) & 0x7ff;
+	expn = scaleexpn - p;
+	if (scaleexpn < p) {
+		abort();
+	}
+	return JPC_QCX_EXPN(expn) | JPC_QCX_MANT(mant);
+}
+
+typedef enum {
+	OPT_DEBUG,
+	OPT_IMGAREAOFFX,
+	OPT_IMGAREAOFFY,
+	OPT_TILEGRDOFFX,
+	OPT_TILEGRDOFFY,
+	OPT_TILEWIDTH,
+	OPT_TILEHEIGHT,
+	OPT_PRCWIDTH,
+	OPT_PRCHEIGHT,
+	OPT_CBLKWIDTH,
+	OPT_CBLKHEIGHT,
+	OPT_MODE,
+	OPT_PRG,
+	OPT_NOMCT,
+	OPT_MAXRLVLS,
+	OPT_SOP,
+	OPT_EPH,
+	OPT_LAZY,
+	OPT_TERMALL,
+	OPT_SEGSYM,
+	OPT_VCAUSAL,
+	OPT_RESET,
+	OPT_PTERM,
+	OPT_NUMGBITS,
+	OPT_RATE,
+	OPT_ILYRRATES,
+	OPT_JP2OVERHEAD
+} optid_t;
+
+jas_taginfo_t encopts[] = {
+	{OPT_DEBUG, "debug"},
+	{OPT_IMGAREAOFFX, "imgareatlx"},
+	{OPT_IMGAREAOFFY, "imgareatly"},
+	{OPT_TILEGRDOFFX, "tilegrdtlx"},
+	{OPT_TILEGRDOFFY, "tilegrdtly"},
+	{OPT_TILEWIDTH, "tilewidth"},
+	{OPT_TILEHEIGHT, "tileheight"},
+	{OPT_PRCWIDTH, "prcwidth"},
+	{OPT_PRCHEIGHT, "prcheight"},
+	{OPT_CBLKWIDTH, "cblkwidth"},
+	{OPT_CBLKHEIGHT, "cblkheight"},
+	{OPT_MODE, "mode"},
+	{OPT_PRG, "prg"},
+	{OPT_NOMCT, "nomct"},
+	{OPT_MAXRLVLS, "numrlvls"},
+	{OPT_SOP, "sop"},
+	{OPT_EPH, "eph"},
+	{OPT_LAZY, "lazy"},
+	{OPT_TERMALL, "termall"},
+	{OPT_SEGSYM, "segsym"},
+	{OPT_VCAUSAL, "vcausal"},
+	{OPT_PTERM, "pterm"},
+	{OPT_RESET, "resetprob"},
+	{OPT_NUMGBITS, "numgbits"},
+	{OPT_RATE, "rate"},
+	{OPT_ILYRRATES, "ilyrrates"},
+	{OPT_JP2OVERHEAD, "_jp2overhead"},
+	{-1, 0}
+};
+
+typedef enum {
+	PO_L = 0,
+	PO_R
+} poid_t;
+
+
+jas_taginfo_t prgordtab[] = {
+	{JPC_COD_LRCPPRG, "lrcp"},
+	{JPC_COD_RLCPPRG, "rlcp"},
+	{JPC_COD_RPCLPRG, "rpcl"},
+	{JPC_COD_PCRLPRG, "pcrl"},
+	{JPC_COD_CPRLPRG, "cprl"},
+	{-1, 0}
+};
+
+typedef enum {
+	MODE_INT,
+	MODE_REAL
+} modeid_t;
+
+jas_taginfo_t modetab[] = {
+	{MODE_INT, "int"},
+	{MODE_REAL, "real"},
+	{-1, 0}
+};
+
+/******************************************************************************\
+* The main encoder entry point.
+\******************************************************************************/
+
+int jpc_encode(jas_image_t *image, jas_stream_t *out, char *optstr)
+{
+	jpc_enc_t *enc;
+	jpc_enc_cp_t *cp;
+
+	enc = 0;
+	cp = 0;
+
+	jpc_initluts();
+
+	if (!(cp = cp_create(optstr, image))) {
+		jas_eprintf("invalid JP encoder options\n");
+		goto error;
+	}
+
+	if (!(enc = jpc_enc_create(cp, out, image))) {
+		goto error;
+	}
+	cp = 0;
+
+	/* Encode the main header. */
+	if (jpc_enc_encodemainhdr(enc)) {
+		goto error;
+	}
+
+	/* Encode the main body.  This constitutes most of the encoding work. */
+	if (jpc_enc_encodemainbody(enc)) {
+		goto error;
+	}
+
+	/* Write EOC marker segment. */
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_EOC))) {
+		goto error;
+	}
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		jas_eprintf("cannot write EOI marker\n");
+		goto error;
+	}
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+
+	if (jas_stream_flush(enc->out)) {
+		goto error;
+	}
+
+	jpc_enc_destroy(enc);
+
+	return 0;
+
+error:
+	if (cp) {
+		jpc_enc_cp_destroy(cp);
+	}
+	if (enc) {
+		jpc_enc_destroy(enc);
+	}
+	return -1;
+}
+
+/******************************************************************************\
+* Option parsing code.
+\******************************************************************************/
+
+static jpc_enc_cp_t *cp_create(char *optstr, jas_image_t *image)
+{
+	jpc_enc_cp_t *cp;
+	jas_tvparser_t *tvp;
+	int ret;
+	int numilyrrates;
+	double *ilyrrates;
+	int i;
+	int tagid;
+	jpc_enc_tcp_t *tcp;
+	jpc_enc_tccp_t *tccp;
+	jpc_enc_ccp_t *ccp;
+	int cmptno;
+	uint_fast16_t rlvlno;
+	uint_fast16_t prcwidthexpn;
+	uint_fast16_t prcheightexpn;
+	bool enablemct;
+	uint_fast32_t jp2overhead;
+	uint_fast16_t lyrno;
+	uint_fast32_t hsteplcm;
+	uint_fast32_t vsteplcm;
+	bool mctvalid;
+
+	tvp = 0;
+	cp = 0;
+	ilyrrates = 0;
+	numilyrrates = 0;
+
+	if (!(cp = jas_malloc(sizeof(jpc_enc_cp_t)))) {
+		goto error;
+	}
+
+	prcwidthexpn = 15;
+	prcheightexpn = 15;
+	enablemct = true;
+	jp2overhead = 0;
+
+	cp->ccps = 0;
+	cp->debug = 0;
+	cp->imgareatlx = UINT_FAST32_MAX;
+	cp->imgareatly = UINT_FAST32_MAX;
+	cp->refgrdwidth = 0;
+	cp->refgrdheight = 0;
+	cp->tilegrdoffx = UINT_FAST32_MAX;
+	cp->tilegrdoffy = UINT_FAST32_MAX;
+	cp->tilewidth = 0;
+	cp->tileheight = 0;
+	cp->numcmpts = jas_image_numcmpts(image);
+
+	hsteplcm = 1;
+	vsteplcm = 1;
+	for (cmptno = 0; cmptno < jas_image_numcmpts(image); ++cmptno) {
+		if (jas_image_cmptbrx(image, cmptno) + jas_image_cmpthstep(image, cmptno) <=
+		  jas_image_brx(image) || jas_image_cmptbry(image, cmptno) +
+		  jas_image_cmptvstep(image, cmptno) <= jas_image_bry(image)) {
+			jas_eprintf("unsupported image type\n");
+			goto error;
+		}
+		/* Note: We ought to be calculating the LCMs here.  Fix some day. */
+		hsteplcm *= jas_image_cmpthstep(image, cmptno);
+		vsteplcm *= jas_image_cmptvstep(image, cmptno);
+	}
+
+	if (!(cp->ccps = jas_malloc(cp->numcmpts * sizeof(jpc_enc_ccp_t)))) {
+		goto error;
+	}
+	for (cmptno = 0, ccp = cp->ccps; cmptno < JAS_CAST(int, cp->numcmpts); ++cmptno,
+	  ++ccp) {
+		ccp->sampgrdstepx = jas_image_cmpthstep(image, cmptno);
+		ccp->sampgrdstepy = jas_image_cmptvstep(image, cmptno);
+		/* XXX - this isn't quite correct for more general image */
+		ccp->sampgrdsubstepx = 0;
+		ccp->sampgrdsubstepx = 0;
+		ccp->prec = jas_image_cmptprec(image, cmptno);
+		ccp->sgnd = jas_image_cmptsgnd(image, cmptno);
+		ccp->numstepsizes = 0;
+		memset(ccp->stepsizes, 0, sizeof(ccp->stepsizes));
+	}
+
+	cp->rawsize = jas_image_rawsize(image);
+	cp->totalsize = UINT_FAST32_MAX;
+
+	tcp = &cp->tcp;
+	tcp->csty = 0;
+	tcp->intmode = true;
+	tcp->prg = JPC_COD_LRCPPRG;
+	tcp->numlyrs = 1;
+	tcp->ilyrrates = 0;
+
+	tccp = &cp->tccp;
+	tccp->csty = 0;
+	tccp->maxrlvls = 6;
+	tccp->cblkwidthexpn = 6;
+	tccp->cblkheightexpn = 6;
+	tccp->cblksty = 0;
+	tccp->numgbits = 2;
+
+	if (!(tvp = jas_tvparser_create(optstr ? optstr : ""))) {
+		goto error;
+	}
+
+	while (!(ret = jas_tvparser_next(tvp))) {
+		switch (jas_taginfo_nonull(jas_taginfos_lookup(encopts,
+		  jas_tvparser_gettag(tvp)))->id) {
+		case OPT_DEBUG:
+			cp->debug = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_IMGAREAOFFX:
+			cp->imgareatlx = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_IMGAREAOFFY:
+			cp->imgareatly = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_TILEGRDOFFX:
+			cp->tilegrdoffx = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_TILEGRDOFFY:
+			cp->tilegrdoffy = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_TILEWIDTH:
+			cp->tilewidth = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_TILEHEIGHT:
+			cp->tileheight = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_PRCWIDTH:
+			prcwidthexpn = jpc_floorlog2(atoi(jas_tvparser_getval(tvp)));
+			break;
+		case OPT_PRCHEIGHT:
+			prcheightexpn = jpc_floorlog2(atoi(jas_tvparser_getval(tvp)));
+			break;
+		case OPT_CBLKWIDTH:
+			tccp->cblkwidthexpn =
+			  jpc_floorlog2(atoi(jas_tvparser_getval(tvp)));
+			break;
+		case OPT_CBLKHEIGHT:
+			tccp->cblkheightexpn =
+			  jpc_floorlog2(atoi(jas_tvparser_getval(tvp)));
+			break;
+		case OPT_MODE:
+			if ((tagid = jas_taginfo_nonull(jas_taginfos_lookup(modetab,
+			  jas_tvparser_getval(tvp)))->id) < 0) {
+				jas_eprintf("ignoring invalid mode %s\n",
+				  jas_tvparser_getval(tvp));
+			} else {
+				tcp->intmode = (tagid == MODE_INT);
+			}
+			break;
+		case OPT_PRG:
+			if ((tagid = jas_taginfo_nonull(jas_taginfos_lookup(prgordtab,
+			  jas_tvparser_getval(tvp)))->id) < 0) {
+				jas_eprintf("ignoring invalid progression order %s\n",
+				  jas_tvparser_getval(tvp));
+			} else {
+				tcp->prg = tagid;
+			}
+			break;
+		case OPT_NOMCT:
+			enablemct = false;
+			break;
+		case OPT_MAXRLVLS:
+			tccp->maxrlvls = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_SOP:
+			cp->tcp.csty |= JPC_COD_SOP;
+			break;
+		case OPT_EPH:
+			cp->tcp.csty |= JPC_COD_EPH;
+			break;
+		case OPT_LAZY:
+			tccp->cblksty |= JPC_COX_LAZY;
+			break;
+		case OPT_TERMALL:
+			tccp->cblksty |= JPC_COX_TERMALL;
+			break;
+		case OPT_SEGSYM:
+			tccp->cblksty |= JPC_COX_SEGSYM;
+			break;
+		case OPT_VCAUSAL:
+			tccp->cblksty |= JPC_COX_VSC;
+			break;
+		case OPT_RESET:
+			tccp->cblksty |= JPC_COX_RESET;
+			break;
+		case OPT_PTERM:
+			tccp->cblksty |= JPC_COX_PTERM;
+			break;
+		case OPT_NUMGBITS:
+			cp->tccp.numgbits = atoi(jas_tvparser_getval(tvp));
+			break;
+		case OPT_RATE:
+			if (ratestrtosize(jas_tvparser_getval(tvp), cp->rawsize,
+			  &cp->totalsize)) {
+				jas_eprintf("ignoring bad rate specifier %s\n",
+				  jas_tvparser_getval(tvp));
+			}
+			break;
+		case OPT_ILYRRATES:
+			if (jpc_atoaf(jas_tvparser_getval(tvp), &numilyrrates,
+			  &ilyrrates)) {
+				jas_eprintf("warning: invalid intermediate layer rates specifier ignored (%s)\n",
+				  jas_tvparser_getval(tvp));
+			}
+			break;
+
+		case OPT_JP2OVERHEAD:
+			jp2overhead = atoi(jas_tvparser_getval(tvp));
+			break;
+		default:
+			jas_eprintf("warning: ignoring invalid option %s\n",
+			 jas_tvparser_gettag(tvp));
+			break;
+		}
+	}
+
+	jas_tvparser_destroy(tvp);
+	tvp = 0;
+
+	if (cp->totalsize != UINT_FAST32_MAX) {
+		cp->totalsize = (cp->totalsize > jp2overhead) ?
+		  (cp->totalsize - jp2overhead) : 0;
+	}
+
+	if (cp->imgareatlx == UINT_FAST32_MAX) {
+		cp->imgareatlx = 0;
+	} else {
+		if (hsteplcm != 1) {
+			jas_eprintf("warning: overriding imgareatlx value\n");
+		}
+		cp->imgareatlx *= hsteplcm;
+	}
+	if (cp->imgareatly == UINT_FAST32_MAX) {
+		cp->imgareatly = 0;
+	} else {
+		if (vsteplcm != 1) {
+			jas_eprintf("warning: overriding imgareatly value\n");
+		}
+		cp->imgareatly *= vsteplcm;
+	}
+	cp->refgrdwidth = cp->imgareatlx + jas_image_width(image);
+	cp->refgrdheight = cp->imgareatly + jas_image_height(image);
+	if (cp->tilegrdoffx == UINT_FAST32_MAX) {
+		cp->tilegrdoffx = cp->imgareatlx;
+	}
+	if (cp->tilegrdoffy == UINT_FAST32_MAX) {
+		cp->tilegrdoffy = cp->imgareatly;
+	}
+	if (!cp->tilewidth) {
+		cp->tilewidth = cp->refgrdwidth - cp->tilegrdoffx;
+	}
+	if (!cp->tileheight) {
+		cp->tileheight = cp->refgrdheight - cp->tilegrdoffy;
+	}
+
+	if (cp->numcmpts == 3) {
+		mctvalid = true;
+		for (cmptno = 0; cmptno < jas_image_numcmpts(image); ++cmptno) {
+			if (jas_image_cmptprec(image, cmptno) != jas_image_cmptprec(image, 0) ||
+			  jas_image_cmptsgnd(image, cmptno) != jas_image_cmptsgnd(image, 0) ||
+			  jas_image_cmptwidth(image, cmptno) != jas_image_cmptwidth(image, 0) ||
+			  jas_image_cmptheight(image, cmptno) != jas_image_cmptheight(image, 0)) {
+				mctvalid = false;
+			}
+		}
+	} else {
+		mctvalid = false;
+	}
+	if (mctvalid && enablemct && jas_clrspc_fam(jas_image_clrspc(image)) != JAS_CLRSPC_FAM_RGB) {
+		jas_eprintf("warning: color space apparently not RGB\n");
+	}
+	if (mctvalid && enablemct && jas_clrspc_fam(jas_image_clrspc(image)) == JAS_CLRSPC_FAM_RGB) {
+		tcp->mctid = (tcp->intmode) ? (JPC_MCT_RCT) : (JPC_MCT_ICT);
+	} else {
+		tcp->mctid = JPC_MCT_NONE;
+	}
+	tccp->qmfbid = (tcp->intmode) ? (JPC_COX_RFT) : (JPC_COX_INS);
+
+	for (rlvlno = 0; rlvlno < tccp->maxrlvls; ++rlvlno) {
+		tccp->prcwidthexpns[rlvlno] = prcwidthexpn;
+		tccp->prcheightexpns[rlvlno] = prcheightexpn;
+	}
+	if (prcwidthexpn != 15 || prcheightexpn != 15) {
+		tccp->csty |= JPC_COX_PRT;
+	}
+
+	/* Ensure that the tile width and height is valid. */
+	if (!cp->tilewidth) {
+		jas_eprintf("invalid tile width %lu\n", (unsigned long)
+		  cp->tilewidth);
+		goto error;
+	}
+	if (!cp->tileheight) {
+		jas_eprintf("invalid tile height %lu\n", (unsigned long)
+		  cp->tileheight);
+		goto error;
+	}
+
+	/* Ensure that the tile grid offset is valid. */
+	if (cp->tilegrdoffx > cp->imgareatlx ||
+	  cp->tilegrdoffy > cp->imgareatly ||
+	  cp->tilegrdoffx + cp->tilewidth < cp->imgareatlx ||
+	  cp->tilegrdoffy + cp->tileheight < cp->imgareatly) {
+		jas_eprintf("invalid tile grid offset (%lu, %lu)\n",
+		  (unsigned long) cp->tilegrdoffx, (unsigned long)
+		  cp->tilegrdoffy);
+		goto error;
+	}
+
+	cp->numhtiles = JPC_CEILDIV(cp->refgrdwidth - cp->tilegrdoffx,
+	  cp->tilewidth);
+	cp->numvtiles = JPC_CEILDIV(cp->refgrdheight - cp->tilegrdoffy,
+	  cp->tileheight);
+	cp->numtiles = cp->numhtiles * cp->numvtiles;
+
+	if (ilyrrates && numilyrrates > 0) {
+		tcp->numlyrs = numilyrrates + 1;
+		if (!(tcp->ilyrrates = jas_malloc((tcp->numlyrs - 1) *
+		  sizeof(jpc_fix_t)))) {
+			goto error;
+		}
+		for (i = 0; i < JAS_CAST(int, tcp->numlyrs - 1); ++i) {
+			tcp->ilyrrates[i] = jpc_dbltofix(ilyrrates[i]);
+		}
+	}
+
+	/* Ensure that the integer mode is used in the case of lossless
+	  coding. */
+	if (cp->totalsize == UINT_FAST32_MAX && (!cp->tcp.intmode)) {
+		jas_eprintf("cannot use real mode for lossless coding\n");
+		goto error;
+	}
+
+	/* Ensure that the precinct width is valid. */
+	if (prcwidthexpn > 15) {
+		jas_eprintf("invalid precinct width\n");
+		goto error;
+	}
+
+	/* Ensure that the precinct height is valid. */
+	if (prcheightexpn > 15) {
+		jas_eprintf("invalid precinct height\n");
+		goto error;
+	}
+
+	/* Ensure that the code block width is valid. */
+	if (cp->tccp.cblkwidthexpn < 2 || cp->tccp.cblkwidthexpn > 12) {
+		jas_eprintf("invalid code block width %d\n",
+		  JPC_POW2(cp->tccp.cblkwidthexpn));
+		goto error;
+	}
+
+	/* Ensure that the code block height is valid. */
+	if (cp->tccp.cblkheightexpn < 2 || cp->tccp.cblkheightexpn > 12) {
+		jas_eprintf("invalid code block height %d\n",
+		  JPC_POW2(cp->tccp.cblkheightexpn));
+		goto error;
+	}
+
+	/* Ensure that the code block size is not too large. */
+	if (cp->tccp.cblkwidthexpn + cp->tccp.cblkheightexpn > 12) {
+		jas_eprintf("code block size too large\n");
+		goto error;
+	}
+
+	/* Ensure that the number of layers is valid. */
+	if (cp->tcp.numlyrs > 16384) {
+		jas_eprintf("too many layers\n");
+		goto error;
+	}
+
+	/* There must be at least one resolution level. */
+	if (cp->tccp.maxrlvls < 1) {
+		jas_eprintf("must be at least one resolution level\n");
+		goto error;
+	}
+
+	/* Ensure that the number of guard bits is valid. */
+	if (cp->tccp.numgbits > 8) {
+		jas_eprintf("invalid number of guard bits\n");
+		goto error;
+	}
+
+	/* Ensure that the rate is within the legal range. */
+	if (cp->totalsize != UINT_FAST32_MAX && cp->totalsize > cp->rawsize) {
+		jas_eprintf("warning: specified rate is unreasonably large (%lu > %lu)\n", (unsigned long) cp->totalsize, (unsigned long) cp->rawsize);
+	}
+
+	/* Ensure that the intermediate layer rates are valid. */
+	if (tcp->numlyrs > 1) {
+		/* The intermediate layers rates must increase monotonically. */
+		for (lyrno = 0; lyrno + 2 < tcp->numlyrs; ++lyrno) {
+			if (tcp->ilyrrates[lyrno] >= tcp->ilyrrates[lyrno + 1]) {
+				jas_eprintf("intermediate layer rates must increase monotonically\n");
+				goto error;
+			}
+		}
+		/* The intermediate layer rates must be less than the overall rate. */
+		if (cp->totalsize != UINT_FAST32_MAX) {
+			for (lyrno = 0; lyrno < tcp->numlyrs - 1; ++lyrno) {
+				if (jpc_fixtodbl(tcp->ilyrrates[lyrno]) > ((double) cp->totalsize)
+				  / cp->rawsize) {
+					jas_eprintf("warning: intermediate layer rates must be less than overall rate\n");
+					goto error;
+				}
+			}
+		}
+	}
+
+	if (ilyrrates) {
+		jas_free(ilyrrates);
+	}
+
+	return cp;
+
+error:
+
+	if (ilyrrates) {
+		jas_free(ilyrrates);
+	}
+	if (tvp) {
+		jas_tvparser_destroy(tvp);
+	}
+	if (cp) {
+		jpc_enc_cp_destroy(cp);
+	}
+	return 0;
+}
+
+void jpc_enc_cp_destroy(jpc_enc_cp_t *cp)
+{
+	if (cp->ccps) {
+		if (cp->tcp.ilyrrates) {
+			jas_free(cp->tcp.ilyrrates);
+		}
+		jas_free(cp->ccps);
+	}
+	jas_free(cp);
+}
+
+int ratestrtosize(char *s, uint_fast32_t rawsize, uint_fast32_t *size)
+{
+	char *cp;
+	jpc_flt_t f;
+
+	/* Note: This function must not modify output size on failure. */
+	if ((cp = strchr(s, 'B'))) {
+		*size = atoi(s);
+	} else {
+		f = atof(s);
+		if (f < 0) {
+			*size = 0;
+		} else if (f > 1.0) {
+			*size = rawsize + 1;
+		} else {
+			*size = f * rawsize;
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Encoder constructor and destructor.
+\******************************************************************************/
+
+jpc_enc_t *jpc_enc_create(jpc_enc_cp_t *cp, jas_stream_t *out, jas_image_t *image)
+{
+	jpc_enc_t *enc;
+
+	enc = 0;
+
+	if (!(enc = jas_malloc(sizeof(jpc_enc_t)))) {
+		goto error;
+	}
+
+	enc->image = image;
+	enc->out = out;
+	enc->cp = cp;
+	enc->cstate = 0;
+	enc->tmpstream = 0;
+	enc->mrk = 0;
+	enc->curtile = 0;
+
+	if (!(enc->cstate = jpc_cstate_create())) {
+		goto error;
+	}
+	enc->len = 0;
+	enc->mainbodysize = 0;
+
+	return enc;
+
+error:
+
+	if (enc) {
+		jpc_enc_destroy(enc);
+	}
+	return 0;
+}
+
+void jpc_enc_destroy(jpc_enc_t *enc)
+{
+	/* The image object (i.e., enc->image) and output stream object
+	(i.e., enc->out) are created outside of the encoder.
+	Therefore, they must not be destroyed here. */
+
+	if (enc->curtile) {
+		jpc_enc_tile_destroy(enc->curtile);
+	}
+	if (enc->cp) {
+		jpc_enc_cp_destroy(enc->cp);
+	}
+	if (enc->cstate) {
+		jpc_cstate_destroy(enc->cstate);
+	}
+	if (enc->tmpstream) {
+		jas_stream_close(enc->tmpstream);
+	}
+
+	jas_free(enc);
+}
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+static int jpc_calcssmant(jpc_fix_t stepsize)
+{
+	int n;
+	int e;
+	int m;
+
+	n = jpc_firstone(stepsize);
+	e = n - JPC_FIX_FRACBITS;
+	if (n >= 11) {
+		m = (stepsize >> (n - 11)) & 0x7ff;
+	} else {
+		m = (stepsize & ((1 << n) - 1)) << (11 - n);
+	}
+	return m;
+}
+
+static int jpc_calcssexp(jpc_fix_t stepsize)
+{
+	return jpc_firstone(stepsize) - JPC_FIX_FRACBITS;
+}
+
+static int jpc_enc_encodemainhdr(jpc_enc_t *enc)
+{
+	jpc_siz_t *siz;
+	jpc_cod_t *cod;
+	jpc_qcd_t *qcd;
+	int i;
+long startoff;
+long mainhdrlen;
+	jpc_enc_cp_t *cp;
+	jpc_qcc_t *qcc;
+	jpc_enc_tccp_t *tccp;
+	uint_fast16_t cmptno;
+	jpc_tsfb_band_t bandinfos[JPC_MAXBANDS];
+	jpc_fix_t mctsynweight;
+	jpc_enc_tcp_t *tcp;
+	jpc_tsfb_t *tsfb;
+	jpc_tsfb_band_t *bandinfo;
+	uint_fast16_t numbands;
+	uint_fast16_t bandno;
+	uint_fast16_t rlvlno;
+	uint_fast16_t analgain;
+	jpc_fix_t absstepsize;
+	char buf[1024];
+	jpc_com_t *com;
+
+	cp = enc->cp;
+
+startoff = jas_stream_getrwcount(enc->out);
+
+	/* Write SOC marker segment. */
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_SOC))) {
+		return -1;
+	}
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		jas_eprintf("cannot write SOC marker\n");
+		return -1;
+	}
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+
+	/* Write SIZ marker segment. */
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_SIZ))) {
+		return -1;
+	}
+	siz = &enc->mrk->parms.siz;
+	siz->caps = 0;
+	siz->xoff = cp->imgareatlx;
+	siz->yoff = cp->imgareatly;
+	siz->width = cp->refgrdwidth;
+	siz->height = cp->refgrdheight;
+	siz->tilexoff = cp->tilegrdoffx;
+	siz->tileyoff = cp->tilegrdoffy;
+	siz->tilewidth = cp->tilewidth;
+	siz->tileheight = cp->tileheight;
+	siz->numcomps = cp->numcmpts;
+	siz->comps = jas_malloc(siz->numcomps * sizeof(jpc_sizcomp_t));
+	assert(siz->comps);
+	for (i = 0; i < JAS_CAST(int, cp->numcmpts); ++i) {
+		siz->comps[i].prec = cp->ccps[i].prec;
+		siz->comps[i].sgnd = cp->ccps[i].sgnd;
+		siz->comps[i].hsamp = cp->ccps[i].sampgrdstepx;
+		siz->comps[i].vsamp = cp->ccps[i].sampgrdstepy;
+	}
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		jas_eprintf("cannot write SIZ marker\n");
+		return -1;
+	}
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_COM))) {
+		return -1;
+	}
+
+  // GeoJasper: dima - add GeoJasper info
+  //sprintf(buf, "Creator: JasPer Version %s", jas_getversion());
+  sprintf(buf, "Creator: GeoJasPer %s JasPer %s", GJAS_VERSION, jas_getversion());
+	com = &enc->mrk->parms.com;
+	com->len = strlen(buf);
+	com->regid = JPC_COM_LATIN;
+	if (!(com->data = JAS_CAST(uchar *, jas_strdup(buf)))) {
+		abort();
+	}
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		jas_eprintf("cannot write COM marker\n");
+		return -1;
+	}
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+
+#if 0
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_CRG))) {
+		return -1;
+	}
+	crg = &enc->mrk->parms.crg;
+	crg->comps = jas_malloc(crg->numcomps * sizeof(jpc_crgcomp_t));
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		jas_eprintf("cannot write CRG marker\n");
+		return -1;
+	}
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+#endif
+
+	tcp = &cp->tcp;
+	tccp = &cp->tccp;
+	for (cmptno = 0; cmptno < cp->numcmpts; ++cmptno) {
+		tsfb = jpc_cod_gettsfb(tccp->qmfbid, tccp->maxrlvls - 1);
+		jpc_tsfb_getbands(tsfb, 0, 0, 1 << tccp->maxrlvls, 1 << tccp->maxrlvls,
+		  bandinfos);
+		jpc_tsfb_destroy(tsfb);
+		mctsynweight = jpc_mct_getsynweight(tcp->mctid, cmptno);
+		numbands = 3 * tccp->maxrlvls - 2;
+		for (bandno = 0, bandinfo = bandinfos; bandno < numbands;
+		  ++bandno, ++bandinfo) {
+			rlvlno = (bandno) ? ((bandno - 1) / 3 + 1) : 0;
+			analgain = JPC_NOMINALGAIN(tccp->qmfbid, tccp->maxrlvls,
+			  rlvlno, bandinfo->orient);
+			if (!tcp->intmode) {
+				absstepsize = jpc_fix_div(jpc_inttofix(1 <<
+				  (analgain + 1)), bandinfo->synenergywt);
+			} else {
+				absstepsize = jpc_inttofix(1);
+			}	
+			cp->ccps[cmptno].stepsizes[bandno] =
+			  jpc_abstorelstepsize(absstepsize,
+			  cp->ccps[cmptno].prec + analgain);
+		}
+		cp->ccps[cmptno].numstepsizes = numbands;
+	}
+
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_COD))) {
+		return -1;
+	}
+	cod = &enc->mrk->parms.cod;
+	cod->csty = cp->tccp.csty | cp->tcp.csty;
+	cod->compparms.csty = cp->tccp.csty | cp->tcp.csty;
+	cod->compparms.numdlvls = cp->tccp.maxrlvls - 1;
+	cod->compparms.numrlvls = cp->tccp.maxrlvls;
+	cod->prg = cp->tcp.prg;
+	cod->numlyrs = cp->tcp.numlyrs;
+	cod->compparms.cblkwidthval = JPC_COX_CBLKSIZEEXPN(cp->tccp.cblkwidthexpn);
+	cod->compparms.cblkheightval = JPC_COX_CBLKSIZEEXPN(cp->tccp.cblkheightexpn);
+	cod->compparms.cblksty = cp->tccp.cblksty;
+	cod->compparms.qmfbid = cp->tccp.qmfbid;
+	cod->mctrans = (cp->tcp.mctid != JPC_MCT_NONE);
+	if (tccp->csty & JPC_COX_PRT) {
+		for (rlvlno = 0; rlvlno < tccp->maxrlvls; ++rlvlno) {
+			cod->compparms.rlvls[rlvlno].parwidthval = tccp->prcwidthexpns[rlvlno];
+			cod->compparms.rlvls[rlvlno].parheightval = tccp->prcheightexpns[rlvlno];
+		}
+	}
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		jas_eprintf("cannot write COD marker\n");
+		return -1;
+	}
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+
+	if (!(enc->mrk = jpc_ms_create(JPC_MS_QCD))) {
+		return -1;
+	}
+	qcd = &enc->mrk->parms.qcd;
+	qcd->compparms.qntsty = (tccp->qmfbid == JPC_COX_INS) ?
+	  JPC_QCX_SEQNT : JPC_QCX_NOQNT;
+	qcd->compparms.numstepsizes = cp->ccps[0].numstepsizes;
+	qcd->compparms.numguard = cp->tccp.numgbits;
+	qcd->compparms.stepsizes = cp->ccps[0].stepsizes;
+	if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+		return -1;
+	}
+	/* We do not want the step size array to be freed! */
+	qcd->compparms.stepsizes = 0;
+	jpc_ms_destroy(enc->mrk);
+	enc->mrk = 0;
+
+	tccp = &cp->tccp;
+	for (cmptno = 1; cmptno < cp->numcmpts; ++cmptno) {
+		if (!(enc->mrk = jpc_ms_create(JPC_MS_QCC))) {
+			return -1;
+		}
+		qcc = &enc->mrk->parms.qcc;
+		qcc->compno = cmptno;
+		qcc->compparms.qntsty = (tccp->qmfbid == JPC_COX_INS) ?
+		  JPC_QCX_SEQNT : JPC_QCX_NOQNT;
+		qcc->compparms.numstepsizes = cp->ccps[cmptno].numstepsizes;
+		qcc->compparms.numguard = cp->tccp.numgbits;
+		qcc->compparms.stepsizes = cp->ccps[cmptno].stepsizes;
+		if (jpc_putms(enc->out, enc->cstate, enc->mrk)) {
+			return -1;
+		}
+		/* We do not want the step size array to be freed! */
+		qcc->compparms.stepsizes = 0;
+		jpc_ms_destroy(enc->mrk);
+		enc->mrk = 0;
+	}
+
+#define MAINTLRLEN	2
+	mainhdrlen = jas_stream_getrwcount(enc->out) - startoff;
+	enc->len += mainhdrlen;
+	if (enc->cp->totalsize != UINT_FAST32_MAX) {
+		uint_fast32_t overhead;
+		overhead = mainhdrlen + MAINTLRLEN;
+		enc->mainbodysize = (enc->cp->totalsize >= overhead) ?
+		  (enc->cp->totalsize - overhead) : 0;
+	} else {
+		enc->mainbodysize = UINT_FAST32_MAX;
+	}
+
+	return 0;
+}
+
+static int jpc_enc_encodemainbody(jpc_enc_t *enc)
+{
+	int tileno;
+	int tilex;
+	int tiley;
+	int i;
+	jpc_sot_t *sot;
+	jpc_enc_tcmpt_t *comp;
+	jpc_enc_tcmpt_t *endcomps;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_rlvl_t *lvl;
+	int rlvlno;
+	jpc_qcc_t *qcc;
+	jpc_cod_t *cod;
+	int adjust;
+	int j;
+	int absbandno;
+	long numbytes;
+	long tilehdrlen;
+	long tilelen;
+	jpc_enc_tile_t *tile;
+	jpc_enc_cp_t *cp;
+	double rho;
+	int lyrno;
+	int cmptno;
+	int samestepsizes;
+	jpc_enc_ccp_t *ccps;
+	jpc_enc_tccp_t *tccp;
+int bandno;
+uint_fast32_t x;
+uint_fast32_t y;
+int mingbits;
+int actualnumbps;
+jpc_fix_t mxmag;
+jpc_fix_t mag;
+int numgbits;
+
+	cp = enc->cp;
+
+	/* Avoid compile warnings. */
+	numbytes = 0;
+
+	for (tileno = 0; tileno < JAS_CAST(int, cp->numtiles); ++tileno) {
+		tilex = tileno % cp->numhtiles;
+		tiley = tileno / cp->numhtiles;
+
+		if (!(enc->curtile = jpc_enc_tile_create(enc->cp, enc->image, tileno))) {
+			abort();
+		}
+
+		tile = enc->curtile;
+
+		if (jas_getdbglevel() >= 10) {
+			jpc_enc_dump(enc);
+		}
+
+		endcomps = &tile->tcmpts[tile->numtcmpts];
+		for (cmptno = 0, comp = tile->tcmpts; cmptno < tile->numtcmpts; ++cmptno, ++comp) {
+			if (!cp->ccps[cmptno].sgnd) {
+				adjust = 1 << (cp->ccps[cmptno].prec - 1);
+				for (i = 0; i < jas_matrix_numrows(comp->data); ++i) {
+					for (j = 0; j < jas_matrix_numcols(comp->data); ++j) {
+						*jas_matrix_getref(comp->data, i, j) -= adjust;
+					}
+				}
+			}
+		}
+
+		if (!tile->intmode) {
+				endcomps = &tile->tcmpts[tile->numtcmpts];
+				for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+					jas_matrix_asl(comp->data, JPC_FIX_FRACBITS);
+				}
+		}
+
+		switch (tile->mctid) {
+		case JPC_MCT_RCT:
+assert(jas_image_numcmpts(enc->image) == 3);
+			jpc_rct(tile->tcmpts[0].data, tile->tcmpts[1].data,
+			  tile->tcmpts[2].data);
+			break;
+		case JPC_MCT_ICT:
+assert(jas_image_numcmpts(enc->image) == 3);
+			jpc_ict(tile->tcmpts[0].data, tile->tcmpts[1].data,
+			  tile->tcmpts[2].data);
+			break;
+		default:
+			break;
+		}
+
+		for (i = 0; i < jas_image_numcmpts(enc->image); ++i) {
+			comp = &tile->tcmpts[i];
+			jpc_tsfb_analyze(comp->tsfb, comp->data);
+
+		}
+
+
+		endcomps = &tile->tcmpts[tile->numtcmpts];
+		for (cmptno = 0, comp = tile->tcmpts; comp != endcomps; ++cmptno, ++comp) {
+			mingbits = 0;
+			absbandno = 0;
+			/* All bands must have a corresponding quantizer step size,
+			  even if they contain no samples and are never coded. */
+			/* Some bands may not be hit by the loop below, so we must
+			  initialize all of the step sizes to a sane value. */
+			memset(comp->stepsizes, 0, sizeof(comp->stepsizes));
+			for (rlvlno = 0, lvl = comp->rlvls; rlvlno < comp->numrlvls; ++rlvlno, ++lvl) {
+				if (!lvl->bands) {
+					absbandno += rlvlno ? 3 : 1;
+					continue;
+				}
+				endbands = &lvl->bands[lvl->numbands];
+				for (band = lvl->bands; band != endbands; ++band) {
+					if (!band->data) {
+						++absbandno;
+						continue;
+					}
+					actualnumbps = 0;
+					mxmag = 0;
+					for (y = 0; y < JAS_CAST(uint_fast32_t, jas_matrix_numrows(band->data)); ++y) {
+						for (x = 0; x < JAS_CAST(uint_fast32_t, jas_matrix_numcols(band->data)); ++x) {
+							mag = abs(jas_matrix_get(band->data, y, x));
+							if (mag > mxmag) {
+								mxmag = mag;
+							}
+						}
+					}
+					if (tile->intmode) {
+						actualnumbps = jpc_firstone(mxmag) + 1;
+					} else {
+						actualnumbps = jpc_firstone(mxmag) + 1 - JPC_FIX_FRACBITS;
+					}
+					numgbits = actualnumbps - (cp->ccps[cmptno].prec - 1 +
+					  band->analgain);
+#if 0
+jas_eprintf("%d %d mag=%d actual=%d numgbits=%d\n", cp->ccps[cmptno].prec, band->analgain, mxmag, actualnumbps, numgbits);
+#endif
+					if (numgbits > mingbits) {
+						mingbits = numgbits;
+					}
+					if (!tile->intmode) {
+						band->absstepsize = jpc_fix_div(jpc_inttofix(1
+						  << (band->analgain + 1)),
+						  band->synweight);
+					} else {
+						band->absstepsize = jpc_inttofix(1);
+					}
+					band->stepsize = jpc_abstorelstepsize(
+					  band->absstepsize, cp->ccps[cmptno].prec +
+					  band->analgain);
+					band->numbps = cp->tccp.numgbits +
+					  JPC_QCX_GETEXPN(band->stepsize) - 1;
+
+					if ((!tile->intmode) && band->data) {
+						jpc_quantize(band->data, band->absstepsize);
+					}
+
+					comp->stepsizes[absbandno] = band->stepsize;
+					++absbandno;
+				}
+			}
+
+			assert(JPC_FIX_FRACBITS >= JPC_NUMEXTRABITS);
+			if (!tile->intmode) {
+				jas_matrix_divpow2(comp->data, JPC_FIX_FRACBITS - JPC_NUMEXTRABITS);
+			} else {
+				jas_matrix_asl(comp->data, JPC_NUMEXTRABITS);
+			}
+
+#if 0
+jas_eprintf("mingbits %d\n", mingbits);
+#endif
+			if (mingbits > cp->tccp.numgbits) {
+				jas_eprintf("error: too few guard bits (need at least %d)\n",
+				  mingbits);
+				return -1;
+			}
+		}
+
+		if (!(enc->tmpstream = jas_stream_memopen(0, 0))) {
+			jas_eprintf("cannot open tmp file\n");
+			return -1;
+		}
+
+		/* Write the tile header. */
+		if (!(enc->mrk = jpc_ms_create(JPC_MS_SOT))) {
+			return -1;
+		}
+		sot = &enc->mrk->parms.sot;
+		sot->len = 0;
+		sot->tileno = tileno;
+		sot->partno = 0;
+		sot->numparts = 1;
+		if (jpc_putms(enc->tmpstream, enc->cstate, enc->mrk)) {
+			jas_eprintf("cannot write SOT marker\n");
+			return -1;
+		}
+		jpc_ms_destroy(enc->mrk);
+		enc->mrk = 0;
+
+/************************************************************************/
+/************************************************************************/
+/************************************************************************/
+
+		tccp = &cp->tccp;
+		for (cmptno = 0; cmptno < JAS_CAST(int, cp->numcmpts); ++cmptno) {
+			comp = &tile->tcmpts[cmptno];
+			if (comp->numrlvls != tccp->maxrlvls) {
+				if (!(enc->mrk = jpc_ms_create(JPC_MS_COD))) {
+					return -1;
+				}
+/* XXX = this is not really correct. we are using comp #0's precint sizes
+and other characteristics */
+				comp = &tile->tcmpts[0];
+				cod = &enc->mrk->parms.cod;
+				cod->compparms.csty = 0;
+				cod->compparms.numdlvls = comp->numrlvls - 1;
+				cod->prg = tile->prg;
+				cod->numlyrs = tile->numlyrs;
+				cod->compparms.cblkwidthval = JPC_COX_CBLKSIZEEXPN(comp->cblkwidthexpn);
+				cod->compparms.cblkheightval = JPC_COX_CBLKSIZEEXPN(comp->cblkheightexpn);
+				cod->compparms.cblksty = comp->cblksty;
+				cod->compparms.qmfbid = comp->qmfbid;
+				cod->mctrans = (tile->mctid != JPC_MCT_NONE);
+				for (i = 0; i < comp->numrlvls; ++i) {
+					cod->compparms.rlvls[i].parwidthval = comp->rlvls[i].prcwidthexpn;
+					cod->compparms.rlvls[i].parheightval = comp->rlvls[i].prcheightexpn;
+				}
+				if (jpc_putms(enc->tmpstream, enc->cstate, enc->mrk)) {
+					return -1;
+				}
+				jpc_ms_destroy(enc->mrk);
+				enc->mrk = 0;
+			}
+		}
+
+		for (cmptno = 0, comp = tile->tcmpts; cmptno < JAS_CAST(int, cp->numcmpts); ++cmptno, ++comp) {
+			ccps = &cp->ccps[cmptno];
+			if (JAS_CAST(int, ccps->numstepsizes) == comp->numstepsizes) {
+				samestepsizes = 1;
+				for (bandno = 0; bandno < JAS_CAST(int, ccps->numstepsizes); ++bandno) {
+					if (ccps->stepsizes[bandno] != comp->stepsizes[bandno]) {
+						samestepsizes = 0;
+						break;
+					}
+				}
+			} else {
+				samestepsizes = 0;
+			}
+			if (!samestepsizes) {
+				if (!(enc->mrk = jpc_ms_create(JPC_MS_QCC))) {
+					return -1;
+				}
+				qcc = &enc->mrk->parms.qcc;
+				qcc->compno = cmptno;
+				qcc->compparms.numguard = cp->tccp.numgbits;
+				qcc->compparms.qntsty = (comp->qmfbid == JPC_COX_INS) ?
+				  JPC_QCX_SEQNT : JPC_QCX_NOQNT;
+				qcc->compparms.numstepsizes = comp->numstepsizes;
+				qcc->compparms.stepsizes = comp->stepsizes;
+				if (jpc_putms(enc->tmpstream, enc->cstate, enc->mrk)) {
+					return -1;
+				}
+				qcc->compparms.stepsizes = 0;
+				jpc_ms_destroy(enc->mrk);
+				enc->mrk = 0;
+			}
+		}
+
+		/* Write a SOD marker to indicate the end of the tile header. */
+		if (!(enc->mrk = jpc_ms_create(JPC_MS_SOD))) {
+			return -1;
+		}
+		if (jpc_putms(enc->tmpstream, enc->cstate, enc->mrk)) {
+			jas_eprintf("cannot write SOD marker\n");
+			return -1;
+		}
+		jpc_ms_destroy(enc->mrk);
+		enc->mrk = 0;
+tilehdrlen = jas_stream_getrwcount(enc->tmpstream);
+
+/************************************************************************/
+/************************************************************************/
+/************************************************************************/
+
+if (jpc_enc_enccblks(enc)) {
+	abort();
+	return -1;
+}
+
+		cp = enc->cp;
+		rho = (double) (tile->brx - tile->tlx) * (tile->bry - tile->tly) /
+		  ((cp->refgrdwidth - cp->imgareatlx) * (cp->refgrdheight -
+		  cp->imgareatly));
+		tile->rawsize = cp->rawsize * rho;
+
+		for (lyrno = 0; lyrno < tile->numlyrs - 1; ++lyrno) {
+			tile->lyrsizes[lyrno] = tile->rawsize * jpc_fixtodbl(
+			  cp->tcp.ilyrrates[lyrno]);
+		}
+		tile->lyrsizes[tile->numlyrs - 1] = (cp->totalsize != UINT_FAST32_MAX) ?
+		  (rho * enc->mainbodysize) : UINT_FAST32_MAX;
+		for (lyrno = 0; lyrno < tile->numlyrs; ++lyrno) {
+			if (tile->lyrsizes[lyrno] != UINT_FAST32_MAX) {
+				if (tilehdrlen <= JAS_CAST(long, tile->lyrsizes[lyrno])) {
+					tile->lyrsizes[lyrno] -= tilehdrlen;
+				} else {
+					tile->lyrsizes[lyrno] = 0;
+				}
+			}
+		}
+
+		if (rateallocate(enc, tile->numlyrs, tile->lyrsizes)) {
+			return -1;
+		}
+
+#if 0
+jas_eprintf("ENCODE TILE DATA\n");
+#endif
+		if (jpc_enc_encodetiledata(enc)) {
+			jas_eprintf("dotile failed\n");
+			return -1;
+		}
+
+/************************************************************************/
+/************************************************************************/
+/************************************************************************/
+
+/************************************************************************/
+/************************************************************************/
+/************************************************************************/
+
+		tilelen = jas_stream_tell(enc->tmpstream);
+
+		if (jas_stream_seek(enc->tmpstream, 6, SEEK_SET) < 0) {
+			return -1;
+		}
+		jpc_putuint32(enc->tmpstream, tilelen);
+
+		if (jas_stream_seek(enc->tmpstream, 0, SEEK_SET) < 0) {
+			return -1;
+		}
+		if (jpc_putdata(enc->out, enc->tmpstream, -1)) {
+			return -1;
+		}
+		enc->len += tilelen;
+
+		jas_stream_close(enc->tmpstream);
+		enc->tmpstream = 0;
+
+		jpc_enc_tile_destroy(enc->curtile);
+		enc->curtile = 0;
+
+	}
+
+	return 0;
+}
+
+int jpc_enc_encodetiledata(jpc_enc_t *enc)
+{
+assert(enc->tmpstream);
+	if (jpc_enc_encpkts(enc, enc->tmpstream)) {
+		return -1;
+	}
+	return 0;
+}
+
+int dump_passes(jpc_enc_pass_t *passes, int numpasses, jpc_enc_cblk_t *cblk)
+{
+	jpc_enc_pass_t *pass;
+	int i;
+	jas_stream_memobj_t *smo;
+
+	smo = cblk->stream->obj_;
+
+	pass = passes;
+	for (i = 0; i < numpasses; ++i) {
+		jas_eprintf("start=%d end=%d type=%d term=%d lyrno=%d firstchar=%02x size=%ld pos=%ld\n",
+		  (int)pass->start, (int)pass->end, (int)pass->type, (int)pass->term, (int)pass->lyrno,
+		  smo->buf_[pass->start], (long)smo->len_, (long)smo->pos_);
+#if 0
+		jas_memdump(stderr, &smo->buf_[pass->start], pass->end - pass->start);
+#endif
+		++pass;
+	}
+	return 0;
+}
+
+void jpc_quantize(jas_matrix_t *data, jpc_fix_t stepsize)
+{
+	int i;
+	int j;
+	jpc_fix_t t;
+
+	if (stepsize == jpc_inttofix(1)) {
+		return;
+	}
+
+	for (i = 0; i < jas_matrix_numrows(data); ++i) {
+		for (j = 0; j < jas_matrix_numcols(data); ++j) {
+			t = jas_matrix_get(data, i, j);
+
+{
+	if (t < 0) {
+		t = jpc_fix_neg(jpc_fix_div(jpc_fix_neg(t), stepsize));
+	} else {
+		t = jpc_fix_div(t, stepsize);
+	}
+}
+
+			jas_matrix_set(data, i, j, t);
+		}
+	}
+}
+
+void calcrdslopes(jpc_enc_cblk_t *cblk)
+{
+	jpc_enc_pass_t *endpasses;
+	jpc_enc_pass_t *pass0;
+	jpc_enc_pass_t *pass1;
+	jpc_enc_pass_t *pass2;
+	jpc_flt_t slope0;
+	jpc_flt_t slope;
+	jpc_flt_t dd;
+	long dr;
+
+	endpasses = &cblk->passes[cblk->numpasses];
+	pass2 = cblk->passes;
+	slope0 = 0;
+	while (pass2 != endpasses) {
+		pass0 = 0;
+		for (pass1 = cblk->passes; pass1 != endpasses; ++pass1) {
+			dd = pass1->cumwmsedec;
+			dr = pass1->end;
+			if (pass0) {
+				dd -= pass0->cumwmsedec;
+				dr -= pass0->end;
+			}
+			if (dd <= 0) {
+				pass1->rdslope = JPC_BADRDSLOPE;
+				if (pass1 >= pass2) {
+					pass2 = &pass1[1];
+				}
+				continue;
+			}
+			if (pass1 < pass2 && pass1->rdslope <= 0) {
+				continue;
+			}
+			if (!dr) {
+				assert(pass0);
+				pass0->rdslope = 0;
+				break;
+			}
+			slope = dd / dr;
+			if (pass0 && slope >= slope0) {
+				pass0->rdslope = 0;
+				break;
+			}
+			pass1->rdslope = slope;
+			if (pass1 >= pass2) {
+				pass2 = &pass1[1];
+			}
+			pass0 = pass1;
+			slope0 = slope;
+		}
+	}
+
+#if 0
+	for (pass0 = cblk->passes; pass0 != endpasses; ++pass0) {
+if (pass0->rdslope > 0.0) {
+		jas_eprintf("pass %02d nmsedec=%lf dec=%lf end=%d %lf\n", pass0 - cblk->passes,
+		  fixtodbl(pass0->nmsedec), pass0->wmsedec, pass0->end, pass0->rdslope);
+}
+	}
+#endif
+}
+
+void dump_layeringinfo(jpc_enc_t *enc)
+{
+
+	jpc_enc_tcmpt_t *tcmpt;
+	int tcmptno;
+	jpc_enc_rlvl_t *rlvl;
+	int rlvlno;
+	jpc_enc_band_t *band;
+	int bandno;
+	jpc_enc_prc_t *prc;
+	int prcno;
+	jpc_enc_cblk_t *cblk;
+	int cblkno;
+	jpc_enc_pass_t *pass;
+	int passno;
+	int lyrno;
+	jpc_enc_tile_t *tile;
+
+	tile = enc->curtile;
+
+	for (lyrno = 0; lyrno < tile->numlyrs; ++lyrno) {
+		jas_eprintf("lyrno = %02d\n", lyrno);
+		for (tcmptno = 0, tcmpt = tile->tcmpts; tcmptno < tile->numtcmpts;
+		  ++tcmptno, ++tcmpt) {
+			for (rlvlno = 0, rlvl = tcmpt->rlvls; rlvlno < tcmpt->numrlvls;
+			  ++rlvlno, ++rlvl) {
+				if (!rlvl->bands) {
+					continue;
+				}
+				for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+				  ++bandno, ++band) {
+					if (!band->data) {
+						continue;
+					}
+					for (prcno = 0, prc = band->prcs; prcno < rlvl->numprcs;
+					  ++prcno, ++prc) {
+						if (!prc->cblks) {
+							continue;
+						}
+						for (cblkno = 0, cblk = prc->cblks; cblkno <
+						  prc->numcblks; ++cblkno, ++cblk) {
+							for (passno = 0, pass = cblk->passes; passno <
+							  cblk->numpasses && pass->lyrno == lyrno;
+							  ++passno, ++pass) {
+								jas_eprintf("lyrno=%02d cmptno=%02d rlvlno=%02d bandno=%02d prcno=%02d cblkno=%03d passno=%03d\n", lyrno, tcmptno, rlvlno, bandno, prcno, cblkno, passno);
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+int rateallocate(jpc_enc_t *enc, int numlyrs, uint_fast32_t *cumlens)
+{
+	jpc_flt_t lo;
+	jpc_flt_t hi;
+	jas_stream_t *out;
+	long cumlen;
+	int lyrno;
+	jpc_flt_t thresh;
+	jpc_flt_t goodthresh;
+	int success;
+	long pos;
+	long oldpos;
+	int numiters;
+
+	jpc_enc_tcmpt_t *comp;
+	jpc_enc_tcmpt_t *endcomps;
+	jpc_enc_rlvl_t *lvl;
+	jpc_enc_rlvl_t *endlvls;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_cblk_t *endcblks;
+	jpc_enc_pass_t *pass;
+	jpc_enc_pass_t *endpasses;
+	jpc_enc_pass_t *pass1;
+	jpc_flt_t mxrdslope;
+	jpc_flt_t mnrdslope;
+	jpc_enc_tile_t *tile;
+	jpc_enc_prc_t *prc;
+	int prcno;
+
+	tile = enc->curtile;
+
+	for (lyrno = 1; lyrno < numlyrs - 1; ++lyrno) {
+		if (cumlens[lyrno - 1] > cumlens[lyrno]) {
+			abort();
+		}
+	}
+
+	if (!(out = jas_stream_memopen(0, 0))) {
+		return -1;
+	}
+
+
+	/* Find minimum and maximum R-D slope values. */
+	mnrdslope = DBL_MAX;
+	mxrdslope = 0;
+	endcomps = &tile->tcmpts[tile->numtcmpts];
+	for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+		endlvls = &comp->rlvls[comp->numrlvls];
+		for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+			if (!lvl->bands) {
+				continue;
+			}
+			endbands = &lvl->bands[lvl->numbands];
+			for (band = lvl->bands; band != endbands; ++band) {
+				if (!band->data) {
+					continue;
+				}
+				for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+					if (!prc->cblks) {
+						continue;
+					}
+					endcblks = &prc->cblks[prc->numcblks];
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						calcrdslopes(cblk);
+						endpasses = &cblk->passes[cblk->numpasses];
+						for (pass = cblk->passes; pass != endpasses; ++pass) {
+							if (pass->rdslope > 0) {
+								if (pass->rdslope < mnrdslope) {
+									mnrdslope = pass->rdslope;
+								}
+								if (pass->rdslope > mxrdslope) {
+									mxrdslope = pass->rdslope;
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+if (jas_getdbglevel()) {
+	jas_eprintf("min rdslope = %f max rdslope = %f\n", mnrdslope, mxrdslope);
+}
+
+	jpc_init_t2state(enc, 1);
+
+	for (lyrno = 0; lyrno < numlyrs; ++lyrno) {
+
+		lo = mnrdslope;
+		hi = mxrdslope;
+
+		success = 0;
+		goodthresh = 0;
+		numiters = 0;
+
+		do {
+
+			cumlen = cumlens[lyrno];
+			if (cumlen == UINT_FAST32_MAX) {
+				/* Only the last layer can be free of a rate
+				  constraint (e.g., for lossless coding). */
+				assert(lyrno == numlyrs - 1);
+				goodthresh = -1;
+				success = 1;
+				break;
+			}
+
+			thresh = (lo + hi) / 2;
+
+			/* Save the tier 2 coding state. */
+			jpc_save_t2state(enc);
+			oldpos = jas_stream_tell(out);
+			assert(oldpos >= 0);
+
+			/* Assign all passes with R-D slopes greater than or
+			  equal to the current threshold to this layer. */
+			endcomps = &tile->tcmpts[tile->numtcmpts];
+			for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+				endlvls = &comp->rlvls[comp->numrlvls];
+				for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+					if (!lvl->bands) {
+						continue;
+					}
+					endbands = &lvl->bands[lvl->numbands];
+					for (band = lvl->bands; band != endbands; ++band) {
+						if (!band->data) {
+							continue;
+						}
+						for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+							if (!prc->cblks) {
+								continue;
+							}
+							endcblks = &prc->cblks[prc->numcblks];
+							for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+								if (cblk->curpass) {
+									endpasses = &cblk->passes[cblk->numpasses];
+									pass1 = cblk->curpass;
+									for (pass = cblk->curpass; pass != endpasses; ++pass) {
+										if (pass->rdslope >= thresh) {
+											pass1 = &pass[1];
+										}
+									}
+									for (pass = cblk->curpass; pass != pass1; ++pass) {
+										pass->lyrno = lyrno;
+									}
+									for (; pass != endpasses; ++pass) {
+										pass->lyrno = -1;
+									}
+								}
+							}
+						}
+					}
+				}
+			}
+
+			/* Perform tier 2 coding. */
+			endcomps = &tile->tcmpts[tile->numtcmpts];
+			for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+				endlvls = &comp->rlvls[comp->numrlvls];
+				for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+					if (!lvl->bands) {
+						continue;
+					}
+					for (prcno = 0; prcno < lvl->numprcs; ++prcno) {
+						if (jpc_enc_encpkt(enc, out, comp - tile->tcmpts, lvl - comp->rlvls, prcno, lyrno)) {
+							return -1;
+						}
+					}
+				}
+			}
+
+			pos = jas_stream_tell(out);
+
+			/* Check the rate constraint. */
+			assert(pos >= 0);
+			if (pos > cumlen) {
+				/* The rate is too high. */
+				lo = thresh;
+			} else if (pos <= cumlen) {
+				/* The rate is low enough, so try higher. */
+				hi = thresh;
+				if (!success || thresh < goodthresh) {
+					goodthresh = thresh;
+					success = 1;
+				}
+			}
+
+			/* Save the tier 2 coding state. */
+			jpc_restore_t2state(enc);
+			if (jas_stream_seek(out, oldpos, SEEK_SET) < 0) {
+				abort();
+			}
+
+if (jas_getdbglevel()) {
+jas_eprintf("maxlen=%08ld actuallen=%08ld thresh=%f\n", cumlen, pos, thresh);
+}
+
+			++numiters;
+		} while (lo < hi - 1e-3 && numiters < 32);
+
+		if (!success) {
+			jas_eprintf("warning: empty layer generated\n");
+		}
+
+if (jas_getdbglevel()) {
+jas_eprintf("success %d goodthresh %f\n", success, goodthresh);
+}
+
+		/* Assign all passes with R-D slopes greater than or
+		  equal to the selected threshold to this layer. */
+		endcomps = &tile->tcmpts[tile->numtcmpts];
+		for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+			endlvls = &comp->rlvls[comp->numrlvls];
+			for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+if (!lvl->bands) {
+	continue;
+}
+				endbands = &lvl->bands[lvl->numbands];
+				for (band = lvl->bands; band != endbands; ++band) {
+					if (!band->data) {
+						continue;
+					}
+					for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+						if (!prc->cblks) {
+							continue;
+						}
+						endcblks = &prc->cblks[prc->numcblks];
+						for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+							if (cblk->curpass) {
+								endpasses = &cblk->passes[cblk->numpasses];
+								pass1 = cblk->curpass;
+								if (success) {
+									for (pass = cblk->curpass; pass != endpasses; ++pass) {
+										if (pass->rdslope >= goodthresh) {
+											pass1 = &pass[1];
+										}
+									}
+								}
+								for (pass = cblk->curpass; pass != pass1; ++pass) {
+									pass->lyrno = lyrno;
+								}
+								for (; pass != endpasses; ++pass) {
+									pass->lyrno = -1;
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+
+		/* Perform tier 2 coding. */
+		endcomps = &tile->tcmpts[tile->numtcmpts];
+		for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+			endlvls = &comp->rlvls[comp->numrlvls];
+			for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+				if (!lvl->bands) {
+					continue;
+				}
+				for (prcno = 0; prcno < lvl->numprcs; ++prcno) {
+					if (jpc_enc_encpkt(enc, out, comp - tile->tcmpts, lvl - comp->rlvls, prcno, lyrno)) {
+						return -1;
+					}
+				}
+			}
+		}
+	}
+
+	if (jas_getdbglevel() >= 5) {
+		dump_layeringinfo(enc);
+	}
+
+	jas_stream_close(out);
+
+	JAS_DBGLOG(10, ("done doing rateallocation\n"));
+#if 0
+jas_eprintf("DONE RATE ALLOCATE\n");
+#endif
+
+	return 0;
+}
+
+/******************************************************************************\
+* Tile constructors and destructors.
+\******************************************************************************/
+
+jpc_enc_tile_t *jpc_enc_tile_create(jpc_enc_cp_t *cp, jas_image_t *image, int tileno)
+{
+	jpc_enc_tile_t *tile;
+	uint_fast32_t htileno;
+	uint_fast32_t vtileno;
+	uint_fast16_t lyrno;
+	uint_fast16_t cmptno;
+	jpc_enc_tcmpt_t *tcmpt;
+
+	if (!(tile = jas_malloc(sizeof(jpc_enc_tile_t)))) {
+		goto error;
+	}
+
+	/* Initialize a few members used in error recovery. */
+	tile->tcmpts = 0;
+	tile->lyrsizes = 0;
+	tile->numtcmpts = cp->numcmpts;
+	tile->pi = 0;
+
+	tile->tileno = tileno;
+	htileno = tileno % cp->numhtiles;
+	vtileno = tileno / cp->numhtiles;
+
+	/* Calculate the coordinates of the top-left and bottom-right
+	  corners of the tile. */
+	tile->tlx = JAS_MAX(cp->tilegrdoffx + htileno * cp->tilewidth,
+	  cp->imgareatlx);
+	tile->tly = JAS_MAX(cp->tilegrdoffy + vtileno * cp->tileheight,
+	  cp->imgareatly);
+	tile->brx = JAS_MIN(cp->tilegrdoffx + (htileno + 1) * cp->tilewidth,
+	  cp->refgrdwidth);
+	tile->bry = JAS_MIN(cp->tilegrdoffy + (vtileno + 1) * cp->tileheight,
+	  cp->refgrdheight);
+
+	/* Initialize some tile coding parameters. */
+	tile->intmode = cp->tcp.intmode;
+	tile->csty = cp->tcp.csty;
+	tile->prg = cp->tcp.prg;
+	tile->mctid = cp->tcp.mctid;
+
+	tile->numlyrs = cp->tcp.numlyrs;
+	if (!(tile->lyrsizes = jas_malloc(tile->numlyrs *
+	  sizeof(uint_fast32_t)))) {
+		goto error;
+	}
+	for (lyrno = 0; lyrno < tile->numlyrs; ++lyrno) {
+		tile->lyrsizes[lyrno] = 0;
+	}
+
+	/* Allocate an array for the per-tile-component information. */
+	if (!(tile->tcmpts = jas_malloc(cp->numcmpts * sizeof(jpc_enc_tcmpt_t)))) {
+		goto error;
+	}
+	/* Initialize a few members critical for error recovery. */
+	for (cmptno = 0, tcmpt = tile->tcmpts; cmptno < cp->numcmpts;
+	  ++cmptno, ++tcmpt) {
+		tcmpt->rlvls = 0;
+		tcmpt->tsfb = 0;
+		tcmpt->data = 0;
+	}
+	/* Initialize the per-tile-component information. */
+	for (cmptno = 0, tcmpt = tile->tcmpts; cmptno < cp->numcmpts;
+	  ++cmptno, ++tcmpt) {
+		if (!tcmpt_create(tcmpt, cp, image, tile)) {
+			goto error;
+		}
+	}
+
+	/* Initialize the synthesis weights for the MCT. */
+	switch (tile->mctid) {
+	case JPC_MCT_RCT:
+		tile->tcmpts[0].synweight = jpc_dbltofix(sqrt(3.0));
+		tile->tcmpts[1].synweight = jpc_dbltofix(sqrt(0.6875));
+		tile->tcmpts[2].synweight = jpc_dbltofix(sqrt(0.6875));
+		break;
+	case JPC_MCT_ICT:
+		tile->tcmpts[0].synweight = jpc_dbltofix(sqrt(3.0000));
+		tile->tcmpts[1].synweight = jpc_dbltofix(sqrt(3.2584));
+		tile->tcmpts[2].synweight = jpc_dbltofix(sqrt(2.4755));
+		break;
+	default:
+	case JPC_MCT_NONE:
+		for (cmptno = 0, tcmpt = tile->tcmpts; cmptno < cp->numcmpts;
+		  ++cmptno, ++tcmpt) {
+			tcmpt->synweight = JPC_FIX_ONE;
+		}
+		break;
+	}
+
+	if (!(tile->pi = jpc_enc_pi_create(cp, tile))) {
+		goto error;
+	}
+
+	return tile;
+
+error:
+
+	if (tile) {
+		jpc_enc_tile_destroy(tile);
+	}
+	return 0;
+}
+
+void jpc_enc_tile_destroy(jpc_enc_tile_t *tile)
+{
+	jpc_enc_tcmpt_t *tcmpt;
+	uint_fast16_t cmptno;
+
+	if (tile->tcmpts) {
+		for (cmptno = 0, tcmpt = tile->tcmpts; cmptno <
+		  tile->numtcmpts; ++cmptno, ++tcmpt) {
+			tcmpt_destroy(tcmpt);
+		}
+		jas_free(tile->tcmpts);
+	}
+	if (tile->lyrsizes) {
+		jas_free(tile->lyrsizes);
+	}
+	if (tile->pi) {
+		jpc_pi_destroy(tile->pi);
+	}
+	jas_free(tile);
+}
+
+static jpc_enc_tcmpt_t *tcmpt_create(jpc_enc_tcmpt_t *tcmpt, jpc_enc_cp_t *cp,
+  jas_image_t *image, jpc_enc_tile_t *tile)
+{
+	uint_fast16_t cmptno;
+	uint_fast16_t rlvlno;
+	jpc_enc_rlvl_t *rlvl;
+	uint_fast32_t tlx;
+	uint_fast32_t tly;
+	uint_fast32_t brx;
+	uint_fast32_t bry;
+	uint_fast32_t cmpttlx;
+	uint_fast32_t cmpttly;
+	jpc_enc_ccp_t *ccp;
+	jpc_tsfb_band_t bandinfos[JPC_MAXBANDS];
+
+	tcmpt->tile = tile;
+	tcmpt->tsfb = 0;
+	tcmpt->data = 0;
+	tcmpt->rlvls = 0;
+
+	/* Deduce the component number. */
+	cmptno = tcmpt - tile->tcmpts;
+
+	ccp = &cp->ccps[cmptno];
+
+	/* Compute the coordinates of the top-left and bottom-right
+	  corners of this tile-component. */
+	tlx = JPC_CEILDIV(tile->tlx, ccp->sampgrdstepx);
+	tly = JPC_CEILDIV(tile->tly, ccp->sampgrdstepy);
+	brx = JPC_CEILDIV(tile->brx, ccp->sampgrdstepx);
+	bry = JPC_CEILDIV(tile->bry, ccp->sampgrdstepy);
+
+	/* Create a sequence to hold the tile-component sample data. */
+	if (!(tcmpt->data = jas_seq2d_create(tlx, tly, brx, bry))) {
+		goto error;
+	}
+
+	/* Get the image data associated with this tile-component. */
+	cmpttlx = JPC_CEILDIV(cp->imgareatlx, ccp->sampgrdstepx);
+	cmpttly = JPC_CEILDIV(cp->imgareatly, ccp->sampgrdstepy);
+	if (jas_image_readcmpt(image, cmptno, tlx - cmpttlx, tly - cmpttly,
+	  brx - tlx, bry - tly, tcmpt->data)) {
+		goto error;
+	}
+
+	tcmpt->synweight = 0;
+	tcmpt->qmfbid = cp->tccp.qmfbid;
+	tcmpt->numrlvls = cp->tccp.maxrlvls;
+	tcmpt->numbands = 3 * tcmpt->numrlvls - 2;
+	if (!(tcmpt->tsfb = jpc_cod_gettsfb(tcmpt->qmfbid, tcmpt->numrlvls - 1))) {
+		goto error;
+	}
+
+	for (rlvlno = 0; rlvlno < tcmpt->numrlvls; ++rlvlno) {
+		tcmpt->prcwidthexpns[rlvlno] = cp->tccp.prcwidthexpns[rlvlno];
+		tcmpt->prcheightexpns[rlvlno] = cp->tccp.prcheightexpns[rlvlno];
+	}
+	tcmpt->cblkwidthexpn = cp->tccp.cblkwidthexpn;
+	tcmpt->cblkheightexpn = cp->tccp.cblkheightexpn;
+	tcmpt->cblksty = cp->tccp.cblksty;
+	tcmpt->csty = cp->tccp.csty;
+
+	tcmpt->numstepsizes = tcmpt->numbands;
+	assert(tcmpt->numstepsizes <= JPC_MAXBANDS);
+	memset(tcmpt->stepsizes, 0, sizeof(tcmpt->numstepsizes *
+	  sizeof(uint_fast16_t)));
+
+	/* Retrieve information about the various bands. */
+	jpc_tsfb_getbands(tcmpt->tsfb, jas_seq2d_xstart(tcmpt->data),
+	  jas_seq2d_ystart(tcmpt->data), jas_seq2d_xend(tcmpt->data),
+	  jas_seq2d_yend(tcmpt->data), bandinfos);
+
+	if (!(tcmpt->rlvls = jas_malloc(tcmpt->numrlvls * sizeof(jpc_enc_rlvl_t)))) {
+		goto error;
+	}
+	for (rlvlno = 0, rlvl = tcmpt->rlvls; rlvlno < tcmpt->numrlvls;
+	  ++rlvlno, ++rlvl) {
+		rlvl->bands = 0;
+		rlvl->tcmpt = tcmpt;
+	}
+	for (rlvlno = 0, rlvl = tcmpt->rlvls; rlvlno < tcmpt->numrlvls;
+	  ++rlvlno, ++rlvl) {
+		if (!rlvl_create(rlvl, cp, tcmpt, bandinfos)) {
+			goto error;
+		}
+	}
+
+	return tcmpt;
+
+error:
+
+	tcmpt_destroy(tcmpt);
+	return 0;
+
+}
+
+static void tcmpt_destroy(jpc_enc_tcmpt_t *tcmpt)
+{
+	jpc_enc_rlvl_t *rlvl;
+	uint_fast16_t rlvlno;
+
+	if (tcmpt->rlvls) {
+		for (rlvlno = 0, rlvl = tcmpt->rlvls; rlvlno < tcmpt->numrlvls;
+		  ++rlvlno, ++rlvl) {
+			rlvl_destroy(rlvl);
+		}
+		jas_free(tcmpt->rlvls);
+	}
+
+	if (tcmpt->data) {
+		jas_seq2d_destroy(tcmpt->data);
+	}
+	if (tcmpt->tsfb) {
+		jpc_tsfb_destroy(tcmpt->tsfb);
+	}
+}
+
+static jpc_enc_rlvl_t *rlvl_create(jpc_enc_rlvl_t *rlvl, jpc_enc_cp_t *cp,
+  jpc_enc_tcmpt_t *tcmpt, jpc_tsfb_band_t *bandinfos)
+{
+	uint_fast16_t rlvlno;
+	uint_fast32_t tlprctlx;
+	uint_fast32_t tlprctly;
+	uint_fast32_t brprcbrx;
+	uint_fast32_t brprcbry;
+	uint_fast16_t bandno;
+	jpc_enc_band_t *band;
+
+	/* Deduce the resolution level. */
+	rlvlno = rlvl - tcmpt->rlvls;
+
+	/* Initialize members required for error recovery. */
+	rlvl->bands = 0;
+	rlvl->tcmpt = tcmpt;
+
+	/* Compute the coordinates of the top-left and bottom-right
+	  corners of the tile-component at this resolution. */
+	rlvl->tlx = JPC_CEILDIVPOW2(jas_seq2d_xstart(tcmpt->data), tcmpt->numrlvls -
+	  1 - rlvlno);
+	rlvl->tly = JPC_CEILDIVPOW2(jas_seq2d_ystart(tcmpt->data), tcmpt->numrlvls -
+	  1 - rlvlno);
+	rlvl->brx = JPC_CEILDIVPOW2(jas_seq2d_xend(tcmpt->data), tcmpt->numrlvls -
+	  1 - rlvlno);
+	rlvl->bry = JPC_CEILDIVPOW2(jas_seq2d_yend(tcmpt->data), tcmpt->numrlvls -
+	  1 - rlvlno);
+
+	if (rlvl->tlx >= rlvl->brx || rlvl->tly >= rlvl->bry) {
+		rlvl->numhprcs = 0;
+		rlvl->numvprcs = 0;
+		rlvl->numprcs = 0;
+		return rlvl;
+	}
+
+	rlvl->numbands = (!rlvlno) ? 1 : 3;
+	rlvl->prcwidthexpn = cp->tccp.prcwidthexpns[rlvlno];
+	rlvl->prcheightexpn = cp->tccp.prcheightexpns[rlvlno];
+	if (!rlvlno) {
+		rlvl->cbgwidthexpn = rlvl->prcwidthexpn;
+		rlvl->cbgheightexpn = rlvl->prcheightexpn;
+	} else {
+		rlvl->cbgwidthexpn = rlvl->prcwidthexpn - 1;
+		rlvl->cbgheightexpn = rlvl->prcheightexpn - 1;
+	}
+	rlvl->cblkwidthexpn = JAS_MIN(cp->tccp.cblkwidthexpn, rlvl->cbgwidthexpn);
+	rlvl->cblkheightexpn = JAS_MIN(cp->tccp.cblkheightexpn, rlvl->cbgheightexpn);
+
+	/* Compute the number of precincts. */
+	tlprctlx = JPC_FLOORTOMULTPOW2(rlvl->tlx, rlvl->prcwidthexpn);
+	tlprctly = JPC_FLOORTOMULTPOW2(rlvl->tly, rlvl->prcheightexpn);
+	brprcbrx = JPC_CEILTOMULTPOW2(rlvl->brx, rlvl->prcwidthexpn);
+	brprcbry = JPC_CEILTOMULTPOW2(rlvl->bry, rlvl->prcheightexpn);
+	rlvl->numhprcs = JPC_FLOORDIVPOW2(brprcbrx - tlprctlx, rlvl->prcwidthexpn);
+	rlvl->numvprcs = JPC_FLOORDIVPOW2(brprcbry - tlprctly, rlvl->prcheightexpn);
+	rlvl->numprcs = rlvl->numhprcs * rlvl->numvprcs;
+
+	if (!(rlvl->bands = jas_malloc(rlvl->numbands * sizeof(jpc_enc_band_t)))) {
+		goto error;
+	}
+	for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+	  ++bandno, ++band) {
+		band->prcs = 0;
+		band->data = 0;
+		band->rlvl = rlvl;
+	}
+	for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+	  ++bandno, ++band) {
+		if (!band_create(band, cp, rlvl, bandinfos)) {
+			goto error;
+		}
+	}
+
+	return rlvl;
+error:
+
+	rlvl_destroy(rlvl);
+	return 0;
+}
+
+static void rlvl_destroy(jpc_enc_rlvl_t *rlvl)
+{
+	jpc_enc_band_t *band;
+	uint_fast16_t bandno;
+
+	if (rlvl->bands) {
+		for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+		  ++bandno, ++band) {
+			band_destroy(band);
+		}
+		jas_free(rlvl->bands);
+	}
+}
+
+static jpc_enc_band_t *band_create(jpc_enc_band_t *band, jpc_enc_cp_t *cp,
+  jpc_enc_rlvl_t *rlvl, jpc_tsfb_band_t *bandinfos)
+{
+	uint_fast16_t bandno;
+	uint_fast16_t gblbandno;
+	uint_fast16_t rlvlno;
+	jpc_tsfb_band_t *bandinfo;
+	jpc_enc_tcmpt_t *tcmpt;
+	uint_fast32_t prcno;
+	jpc_enc_prc_t *prc;
+
+	tcmpt = rlvl->tcmpt;
+	band->data = 0;
+	band->prcs = 0;
+	band->rlvl = rlvl;
+
+	/* Deduce the resolution level and band number. */
+	rlvlno = rlvl - rlvl->tcmpt->rlvls;
+	bandno = band - rlvl->bands;
+	gblbandno = (!rlvlno) ? 0 : (3 * (rlvlno - 1) + bandno + 1);
+
+	bandinfo = &bandinfos[gblbandno];
+
+if (bandinfo->xstart != bandinfo->xend && bandinfo->ystart != bandinfo->yend) {
+	if (!(band->data = jas_seq2d_create(0, 0, 0, 0))) {
+		goto error;
+	}
+	jas_seq2d_bindsub(band->data, tcmpt->data, bandinfo->locxstart,
+	  bandinfo->locystart, bandinfo->locxend, bandinfo->locyend);
+	jas_seq2d_setshift(band->data, bandinfo->xstart, bandinfo->ystart);
+}
+	band->orient = bandinfo->orient;
+	band->analgain = JPC_NOMINALGAIN(cp->tccp.qmfbid, tcmpt->numrlvls, rlvlno,
+	  band->orient);
+	band->numbps = 0;
+	band->absstepsize = 0;
+	band->stepsize = 0;
+	band->synweight = bandinfo->synenergywt;
+
+if (band->data) {
+	if (!(band->prcs = jas_malloc(rlvl->numprcs * sizeof(jpc_enc_prc_t)))) {
+		goto error;
+	}
+	for (prcno = 0, prc = band->prcs; prcno < rlvl->numprcs; ++prcno,
+	  ++prc) {
+		prc->cblks = 0;
+		prc->incltree = 0;
+		prc->nlibtree = 0;
+		prc->savincltree = 0;
+		prc->savnlibtree = 0;
+		prc->band = band;
+	}
+	for (prcno = 0, prc = band->prcs; prcno < rlvl->numprcs; ++prcno,
+	  ++prc) {
+		if (!prc_create(prc, cp, band)) {
+			goto error;
+		}
+	}
+}
+
+	return band;
+
+error:
+	band_destroy(band);
+	return 0;
+}
+
+static void band_destroy(jpc_enc_band_t *band)
+{
+	jpc_enc_prc_t *prc;
+	jpc_enc_rlvl_t *rlvl;
+	uint_fast32_t prcno;
+
+	if (band->prcs) {
+		rlvl = band->rlvl;
+		for (prcno = 0, prc = band->prcs; prcno < rlvl->numprcs;
+		  ++prcno, ++prc) {
+			prc_destroy(prc);
+		}
+		jas_free(band->prcs);
+	}
+	if (band->data) {
+		jas_seq2d_destroy(band->data);
+	}
+}
+
+static jpc_enc_prc_t *prc_create(jpc_enc_prc_t *prc, jpc_enc_cp_t *cp, jpc_enc_band_t *band)
+{
+	uint_fast32_t prcno;
+	uint_fast32_t prcxind;
+	uint_fast32_t prcyind;
+	uint_fast32_t cbgtlx;
+	uint_fast32_t cbgtly;
+	uint_fast32_t tlprctlx;
+	uint_fast32_t tlprctly;
+	uint_fast32_t tlcbgtlx;
+	uint_fast32_t tlcbgtly;
+	uint_fast16_t rlvlno;
+	jpc_enc_rlvl_t *rlvl;
+	uint_fast32_t tlcblktlx;
+	uint_fast32_t tlcblktly;
+	uint_fast32_t brcblkbrx;
+	uint_fast32_t brcblkbry;
+	uint_fast32_t cblkno;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_tcmpt_t *tcmpt;
+
+	prc->cblks = 0;
+	prc->incltree = 0;
+	prc->savincltree = 0;
+	prc->nlibtree = 0;
+	prc->savnlibtree = 0;
+
+	rlvl = band->rlvl;
+	tcmpt = rlvl->tcmpt;
+rlvlno = rlvl - tcmpt->rlvls;
+	prcno = prc - band->prcs;
+	prcxind = prcno % rlvl->numhprcs;
+	prcyind = prcno / rlvl->numhprcs;
+	prc->band = band;
+
+tlprctlx = JPC_FLOORTOMULTPOW2(rlvl->tlx, rlvl->prcwidthexpn);
+tlprctly = JPC_FLOORTOMULTPOW2(rlvl->tly, rlvl->prcheightexpn);
+if (!rlvlno) {
+	tlcbgtlx = tlprctlx;
+	tlcbgtly = tlprctly;
+} else {
+	tlcbgtlx = JPC_CEILDIVPOW2(tlprctlx, 1);
+	tlcbgtly = JPC_CEILDIVPOW2(tlprctly, 1);
+}
+
+	/* Compute the coordinates of the top-left and bottom-right
+	  corners of the precinct. */
+	cbgtlx = tlcbgtlx + (prcxind << rlvl->cbgwidthexpn);
+	cbgtly = tlcbgtly + (prcyind << rlvl->cbgheightexpn);
+	prc->tlx = JAS_MAX(jas_seq2d_xstart(band->data), cbgtlx);
+	prc->tly = JAS_MAX(jas_seq2d_ystart(band->data), cbgtly);
+	prc->brx = JAS_MIN(jas_seq2d_xend(band->data), cbgtlx +
+	  (1 << rlvl->cbgwidthexpn));
+	prc->bry = JAS_MIN(jas_seq2d_yend(band->data), cbgtly +
+	  (1 << rlvl->cbgheightexpn));
+
+	if (prc->tlx < prc->brx && prc->tly < prc->bry) {
+		/* The precinct contains at least one code block. */
+
+		tlcblktlx = JPC_FLOORTOMULTPOW2(prc->tlx, rlvl->cblkwidthexpn);
+		tlcblktly = JPC_FLOORTOMULTPOW2(prc->tly, rlvl->cblkheightexpn);
+		brcblkbrx = JPC_CEILTOMULTPOW2(prc->brx, rlvl->cblkwidthexpn);
+		brcblkbry = JPC_CEILTOMULTPOW2(prc->bry, rlvl->cblkheightexpn);
+		prc->numhcblks = JPC_FLOORDIVPOW2(brcblkbrx - tlcblktlx,
+		  rlvl->cblkwidthexpn);
+		prc->numvcblks = JPC_FLOORDIVPOW2(brcblkbry - tlcblktly,
+		  rlvl->cblkheightexpn);
+		prc->numcblks = prc->numhcblks * prc->numvcblks;
+
+		if (!(prc->incltree = jpc_tagtree_create(prc->numhcblks,
+		  prc->numvcblks))) {
+			goto error;
+		}
+		if (!(prc->nlibtree = jpc_tagtree_create(prc->numhcblks,
+		  prc->numvcblks))) {
+			goto error;
+		}
+		if (!(prc->savincltree = jpc_tagtree_create(prc->numhcblks,
+		  prc->numvcblks))) {
+			goto error;
+		}
+		if (!(prc->savnlibtree = jpc_tagtree_create(prc->numhcblks,
+		  prc->numvcblks))) {
+			goto error;
+		}
+
+		if (!(prc->cblks = jas_malloc(prc->numcblks * sizeof(jpc_enc_cblk_t)))) {
+			goto error;
+		}
+		for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks;
+		  ++cblkno, ++cblk) {
+			cblk->passes = 0;
+			cblk->stream = 0;
+			cblk->mqenc = 0;
+			cblk->data = 0;
+			cblk->flags = 0;
+			cblk->prc = prc;
+		}
+		for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks;
+		  ++cblkno, ++cblk) {
+			if (!cblk_create(cblk, cp, prc)) {
+				goto error;
+			}
+		}
+	} else {
+		/* The precinct does not contain any code blocks. */
+		prc->tlx = prc->brx;
+		prc->tly = prc->bry;
+		prc->numcblks = 0;
+		prc->numhcblks = 0;
+		prc->numvcblks = 0;
+		prc->cblks = 0;
+		prc->incltree = 0;
+		prc->nlibtree = 0;
+		prc->savincltree = 0;
+		prc->savnlibtree = 0;
+	}
+
+	return prc;
+
+error:
+	prc_destroy(prc);
+	return 0;
+}
+
+static void prc_destroy(jpc_enc_prc_t *prc)
+{
+	jpc_enc_cblk_t *cblk;
+	uint_fast32_t cblkno;
+
+	if (prc->cblks) {
+		for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks;
+		  ++cblkno, ++cblk) {
+			cblk_destroy(cblk);
+		}
+		jas_free(prc->cblks);
+	}
+	if (prc->incltree) {
+		jpc_tagtree_destroy(prc->incltree);
+	}
+	if (prc->nlibtree) {
+		jpc_tagtree_destroy(prc->nlibtree);
+	}
+	if (prc->savincltree) {
+		jpc_tagtree_destroy(prc->savincltree);
+	}
+	if (prc->savnlibtree) {
+		jpc_tagtree_destroy(prc->savnlibtree);
+	}
+}
+
+static jpc_enc_cblk_t *cblk_create(jpc_enc_cblk_t *cblk, jpc_enc_cp_t *cp, jpc_enc_prc_t *prc)
+{
+	jpc_enc_band_t *band;
+	uint_fast32_t cblktlx;
+	uint_fast32_t cblktly;
+	uint_fast32_t cblkbrx;
+	uint_fast32_t cblkbry;
+	jpc_enc_rlvl_t *rlvl;
+	uint_fast32_t cblkxind;
+	uint_fast32_t cblkyind;
+	uint_fast32_t cblkno;
+	uint_fast32_t tlcblktlx;
+	uint_fast32_t tlcblktly;
+
+	cblkno = cblk - prc->cblks;
+	cblkxind = cblkno % prc->numhcblks;
+	cblkyind = cblkno / prc->numhcblks;
+	rlvl = prc->band->rlvl;
+	cblk->prc = prc;
+
+	cblk->numpasses = 0;
+	cblk->passes = 0;
+	cblk->numencpasses = 0;
+	cblk->numimsbs = 0;
+	cblk->numlenbits = 0;
+	cblk->stream = 0;
+	cblk->mqenc = 0;
+	cblk->flags = 0;
+	cblk->numbps = 0;
+	cblk->curpass = 0;
+	cblk->data = 0;
+	cblk->savedcurpass = 0;
+	cblk->savednumlenbits = 0;
+	cblk->savednumencpasses = 0;
+
+	band = prc->band;
+	tlcblktlx = JPC_FLOORTOMULTPOW2(prc->tlx, rlvl->cblkwidthexpn);
+	tlcblktly = JPC_FLOORTOMULTPOW2(prc->tly, rlvl->cblkheightexpn);
+	cblktlx = JAS_MAX(tlcblktlx + (cblkxind << rlvl->cblkwidthexpn), prc->tlx);
+	cblktly = JAS_MAX(tlcblktly + (cblkyind << rlvl->cblkheightexpn), prc->tly);
+	cblkbrx = JAS_MIN(tlcblktlx + ((cblkxind + 1) << rlvl->cblkwidthexpn),
+	  prc->brx);
+	cblkbry = JAS_MIN(tlcblktly + ((cblkyind + 1) << rlvl->cblkheightexpn),
+	  prc->bry);
+
+	assert(cblktlx < cblkbrx && cblktly < cblkbry);
+	if (!(cblk->data = jas_seq2d_create(0, 0, 0, 0))) {
+		goto error;
+	}
+	jas_seq2d_bindsub(cblk->data, band->data, cblktlx, cblktly, cblkbrx, cblkbry);
+
+	return cblk;
+
+error:
+	cblk_destroy(cblk);
+	return 0;
+}
+
+static void cblk_destroy(jpc_enc_cblk_t *cblk)
+{
+	uint_fast16_t passno;
+	jpc_enc_pass_t *pass;
+	if (cblk->passes) {
+		for (passno = 0, pass = cblk->passes; passno < cblk->numpasses;
+		  ++passno, ++pass) {
+			pass_destroy(pass);
+		}
+		jas_free(cblk->passes);
+	}
+	if (cblk->stream) {
+		jas_stream_close(cblk->stream);
+	}
+	if (cblk->mqenc) {
+		jpc_mqenc_destroy(cblk->mqenc);
+	}
+	if (cblk->data) {
+		jas_seq2d_destroy(cblk->data);
+	}
+	if (cblk->flags) {
+		jas_seq2d_destroy(cblk->flags);
+	}
+}
+
+static void pass_destroy(jpc_enc_pass_t *pass)
+{
+	/* XXX - need to free resources here */
+}
+
+void jpc_enc_dump(jpc_enc_t *enc)
+{
+	jpc_enc_tile_t *tile;
+	jpc_enc_tcmpt_t *tcmpt;
+	jpc_enc_rlvl_t *rlvl;
+	jpc_enc_band_t *band;
+	jpc_enc_prc_t *prc;
+	jpc_enc_cblk_t *cblk;
+	uint_fast16_t cmptno;
+	uint_fast16_t rlvlno;
+	uint_fast16_t bandno;
+	uint_fast32_t prcno;
+	uint_fast32_t cblkno;
+
+	tile = enc->curtile;
+
+	for (cmptno = 0, tcmpt = tile->tcmpts; cmptno < tile->numtcmpts; ++cmptno,
+	  ++tcmpt) {
+		jas_eprintf("  tcmpt %5d %5d %5d %5d\n", jas_seq2d_xstart(tcmpt->data), jas_seq2d_ystart(tcmpt->data), jas_seq2d_xend(tcmpt->data), jas_seq2d_yend(tcmpt->data));
+		for (rlvlno = 0, rlvl = tcmpt->rlvls; rlvlno < tcmpt->numrlvls;
+		  ++rlvlno, ++rlvl) {
+			jas_eprintf("    rlvl %5d %5d %5d %5d\n", rlvl->tlx, rlvl->tly, rlvl->brx, rlvl->bry);
+			for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+			  ++bandno, ++band) {
+				if (!band->data) {
+					continue;
+				}
+				jas_eprintf("      band %5d %5d %5d %5d\n", jas_seq2d_xstart(band->data), jas_seq2d_ystart(band->data), jas_seq2d_xend(band->data), jas_seq2d_yend(band->data));
+				for (prcno = 0, prc = band->prcs; prcno < rlvl->numprcs;
+				  ++prcno, ++prc) {
+					jas_eprintf("        prc %5d %5d %5d %5d (%5d %5d)\n", prc->tlx, prc->tly, prc->brx, prc->bry, prc->brx - prc->tlx, prc->bry - prc->tly);
+					if (!prc->cblks) {
+						continue;
+					}
+					for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks;
+					  ++cblkno, ++cblk) {
+						jas_eprintf("         cblk %5d %5d %5d %5d\n", jas_seq2d_xstart(cblk->data), jas_seq2d_ystart(cblk->data), jas_seq2d_xend(cblk->data), jas_seq2d_yend(cblk->data));
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/src/libjasper/jpc/jpc_enc.h b/src/libjasper/jpc/jpc_enc.h
new file mode 100644
index 0000000..a29720b
--- /dev/null
+++ b/src/libjasper/jpc/jpc_enc.h
@@ -0,0 +1,646 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_enc.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_ENC_H
+#define JPC_ENC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_seq.h"
+
+#include "jpc_t2cod.h"
+#include "jpc_mqenc.h"
+#include "jpc_cod.h"
+#include "jpc_tagtree.h"
+#include "jpc_cs.h"
+#include "jpc_flt.h"
+#include "jpc_tsfb.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* The number of bits used in various lookup tables. */
+#define	JPC_NUMEXTRABITS	JPC_NMSEDEC_FRACBITS
+
+/* An invalid R-D slope value. */
+#define	JPC_BADRDSLOPE	(-1)
+
+/******************************************************************************\
+* Coding parameters types.
+\******************************************************************************/
+
+/* Per-component coding paramters. */
+
+typedef struct {
+
+	/* The horizontal sampling period. */
+	uint_fast8_t sampgrdstepx;
+
+	/* The vertical sampling period. */
+	uint_fast8_t sampgrdstepy;
+
+	/* The sample alignment horizontal offset. */
+	uint_fast8_t sampgrdsubstepx;
+
+	/* The sample alignment vertical offset. */
+	uint_fast8_t sampgrdsubstepy;
+
+	/* The precision of the samples. */
+	uint_fast8_t prec;
+
+	/* The signedness of the samples. */
+	bool sgnd;
+
+	/* The number of step sizes. */
+	uint_fast16_t numstepsizes;
+
+	/* The quantizer step sizes. */
+	uint_fast16_t stepsizes[JPC_MAXBANDS];
+
+} jpc_enc_ccp_t;
+
+/* Per-tile coding parameters. */
+
+typedef struct {
+
+	/* The coding mode. */
+	bool intmode;
+
+	/* The coding style (i.e., SOP, EPH). */
+	uint_fast8_t csty;
+
+	/* The progression order. */
+	uint_fast8_t prg;
+
+	/* The multicomponent transform. */
+	uint_fast8_t mctid;
+
+	/* The number of layers. */
+	uint_fast16_t numlyrs;
+
+	/* The normalized bit rates associated with the various
+	  intermediate layers. */
+	jpc_fix_t *ilyrrates;
+
+} jpc_enc_tcp_t;
+
+/* Per tile-component coding parameters. */
+
+typedef struct {
+
+	/* The coding style (i.e., explicit precinct sizes). */
+	uint_fast8_t csty;
+
+	/* The maximum number of resolution levels allowed. */
+	uint_fast8_t maxrlvls;
+
+	/* The exponent for the nominal code block width. */
+	uint_fast16_t cblkwidthexpn;
+
+	/* The exponent for the nominal code block height. */
+	uint_fast16_t cblkheightexpn;
+
+	/* The code block style parameters (e.g., lazy, terminate all,
+	  segmentation symbols, causal, reset probability models). */
+	uint_fast8_t cblksty;
+
+	/* The QMFB. */
+	uint_fast8_t qmfbid;
+
+	/* The precinct width values. */
+	uint_fast16_t prcwidthexpns[JPC_MAXRLVLS];
+
+	/* The precinct height values. */
+	uint_fast16_t prcheightexpns[JPC_MAXRLVLS];
+
+	/* The number of guard bits. */
+	uint_fast8_t numgbits;
+
+} jpc_enc_tccp_t;
+
+/* Coding parameters. */
+
+typedef struct {
+
+	/* The debug level. */
+	int debug;
+
+	/* The horizontal offset from the origin of the reference grid to the
+	  left edge of the image area. */
+	uint_fast32_t imgareatlx;
+
+	/* The vertical offset from the origin of the reference grid to the
+	  top edge of the image area. */
+	uint_fast32_t imgareatly;
+
+	/* The horizontal offset from the origin of the reference grid to the
+	  right edge of the image area (plus one). */
+	uint_fast32_t refgrdwidth;
+
+	/* The vertical offset from the origin of the reference grid to the
+	  bottom edge of the image area (plus one). */
+	uint_fast32_t refgrdheight;
+
+	/* The horizontal offset from the origin of the tile grid to the
+	  origin of the reference grid. */
+	uint_fast32_t tilegrdoffx;
+
+	/* The vertical offset from the origin of the tile grid to the
+	  origin of the reference grid. */
+	uint_fast32_t tilegrdoffy;
+
+	/* The nominal tile width in units of the image reference grid. */
+	uint_fast32_t tilewidth;
+
+	/* The nominal tile height in units of the image reference grid. */
+	uint_fast32_t tileheight;
+
+	/* The number of tiles spanning the image area in the horizontal
+	  direction. */
+	uint_fast32_t numhtiles;
+
+	/* The number of tiles spanning the image area in the vertical
+	  direction. */
+	uint_fast32_t numvtiles;
+
+	/* The number of tiles. */
+	uint_fast32_t numtiles;
+
+	/* The number of components. */
+	uint_fast16_t numcmpts;
+
+	/* The per-component coding parameters. */
+	jpc_enc_ccp_t *ccps;
+
+	/* The per-tile coding parameters. */
+	jpc_enc_tcp_t tcp;
+
+	/* The per-tile-component coding parameters. */
+	jpc_enc_tccp_t tccp;
+
+	/* The target code stream length in bytes. */
+	uint_fast32_t totalsize;
+
+	/* The raw (i.e., uncompressed) size of the image in bytes. */
+	uint_fast32_t rawsize;
+
+} jpc_enc_cp_t;
+
+/******************************************************************************\
+* Encoder class.
+\******************************************************************************/
+
+/* Encoder per-coding-pass state information. */
+
+typedef struct {
+
+	/* The starting offset for this pass. */
+	int start;
+
+	/* The ending offset for this pass. */
+	int end;
+
+	/* The type of data in this pass (i.e., MQ or raw). */
+	int type;
+
+	/* Flag indicating that this pass is terminated. */
+	int term;
+
+	/* The entropy coder state after coding this pass. */
+	jpc_mqencstate_t mqencstate;
+
+	/* The layer to which this pass has been assigned. */
+	int lyrno;
+
+	/* The R-D slope for this pass. */
+	jpc_flt_t rdslope;
+
+	/* The weighted MSE reduction associated with this pass. */
+	jpc_flt_t wmsedec;
+
+	/* The cumulative weighted MSE reduction. */
+	jpc_flt_t cumwmsedec;
+
+	/* The normalized MSE reduction. */
+	long nmsedec;
+
+} jpc_enc_pass_t;
+
+/* Encoder per-code-block state information. */
+
+typedef struct {
+
+	/* The number of passes. */
+	int numpasses;
+
+	/* The per-pass information. */
+	jpc_enc_pass_t *passes;
+
+	/* The number of passes encoded so far. */
+	int numencpasses;
+
+	/* The number of insignificant MSBs. */
+	int numimsbs;
+
+	/* The number of bits used to encode pass data lengths. */
+	int numlenbits;
+
+	/* The byte stream for this code block. */
+	jas_stream_t *stream;
+
+	/* The entropy encoder. */
+	jpc_mqenc_t *mqenc;
+
+	/* The data for this code block. */
+	jas_matrix_t *data;
+
+	/* The state for this code block. */
+	jas_matrix_t *flags;
+
+	/* The number of bit planes required for this code block. */
+	int numbps;
+
+	/* The next pass to be encoded. */
+	jpc_enc_pass_t *curpass;
+
+	/* The per-code-block-group state information. */
+	struct jpc_enc_prc_s *prc;
+
+	/* The saved current pass. */
+	/* This is used by the rate control code. */
+	jpc_enc_pass_t *savedcurpass;
+
+	/* The saved length indicator size. */
+	/* This is used by the rate control code. */
+	int savednumlenbits;
+
+	/* The saved number of encoded passes. */
+	/* This is used by the rate control code. */
+	int savednumencpasses;
+
+} jpc_enc_cblk_t;
+
+/* Encoder per-code-block-group state information. */
+
+typedef struct jpc_enc_prc_s {
+
+	/* The x-coordinate of the top-left corner of the precinct. */
+	uint_fast32_t tlx;
+
+	/* The y-coordinate of the top-left corner of the precinct. */
+	uint_fast32_t tly;
+
+	/* The x-coordinate of the bottom-right corner of the precinct
+	  (plus one). */
+	uint_fast32_t brx;
+
+	/* The y-coordinate of the bottom-right corner of the precinct
+	  (plus one). */
+	uint_fast32_t bry;
+
+	/* The number of code blocks spanning the precinct in the horizontal
+	direction. */
+	int numhcblks;
+
+	/* The number of code blocks spanning the precinct in the vertical
+	direction. */
+	int numvcblks;
+
+	/* The total number of code blocks. */
+	int numcblks;
+
+	/* The per-code-block information. */
+	jpc_enc_cblk_t *cblks;
+
+	/* The inclusion tag tree. */
+	jpc_tagtree_t *incltree;
+
+	/* The insignifcant MSBs tag tree. */
+	jpc_tagtree_t *nlibtree;
+
+	/* The per-band information. */
+	struct jpc_enc_band_s *band;
+
+	/* The saved inclusion tag tree. */
+	/* This is used by rate control. */
+	jpc_tagtree_t *savincltree;
+
+	/* The saved leading-insignificant-bit-planes tag tree. */
+	/* This is used by rate control. */
+	jpc_tagtree_t *savnlibtree;
+
+} jpc_enc_prc_t;
+
+/* Encoder per-band state information. */
+
+typedef struct jpc_enc_band_s {
+
+	/* The per precinct information. */
+	jpc_enc_prc_t *prcs;
+
+	/* The coefficient data for this band. */
+	jas_matrix_t *data;
+
+	/* The orientation of this band (i.e., LL, LH, HL, or HH). */
+	int orient;
+
+	/* The number of bit planes associated with this band. */
+	int numbps;
+
+	/* The quantizer step size. */
+	jpc_fix_t absstepsize;
+
+	/* The encoded quantizer step size. */
+	int stepsize;
+
+	/* The L2 norm of the synthesis basis functions associated with
+	  this band.  (The MCT is not considered in this value.) */
+	jpc_fix_t synweight;
+
+	/* The analysis gain for this band. */
+	int analgain;
+
+	/* The per-resolution-level information. */
+	struct jpc_enc_rlvl_s *rlvl;
+
+} jpc_enc_band_t;
+
+/* Encoder per-resolution-level state information. */
+
+typedef struct jpc_enc_rlvl_s {
+
+	/* The x-coordinate of the top-left corner of the tile-component
+	  at this resolution. */
+	uint_fast32_t tlx;
+
+	/* The y-coordinate of the top-left corner of the tile-component
+	  at this resolution. */
+	uint_fast32_t tly;
+
+	/* The x-coordinate of the bottom-right corner of the tile-component
+	  at this resolution (plus one). */
+	uint_fast32_t brx;
+
+	/* The y-coordinate of the bottom-right corner of the tile-component
+	  at this resolution (plus one). */
+	uint_fast32_t bry;
+
+	/* The exponent value for the nominal precinct width measured
+	  relative to the associated LL band. */
+	int prcwidthexpn;
+
+	/* The exponent value for the nominal precinct height measured
+	  relative to the associated LL band. */
+	int prcheightexpn;
+
+	/* The number of precincts spanning the resolution level in the
+	  horizontal direction. */
+	int numhprcs;
+
+	/* The number of precincts spanning the resolution level in the
+	  vertical direction. */
+	int numvprcs;
+
+	/* The total number of precincts. */
+	int numprcs;
+
+	/* The exponent value for the nominal code block group width.
+	  This quantity is associated with the next lower resolution level
+	  (assuming that there is one). */
+	int cbgwidthexpn;
+
+	/* The exponent value for the nominal code block group height.
+	  This quantity is associated with the next lower resolution level
+	  (assuming that there is one). */
+	int cbgheightexpn;
+
+	/* The exponent value for the code block width. */
+	uint_fast16_t cblkwidthexpn;
+
+	/* The exponent value for the code block height. */
+	uint_fast16_t cblkheightexpn;
+
+	/* The number of bands associated with this resolution level. */
+	int numbands;
+
+	/* The per-band information. */
+	jpc_enc_band_t *bands;
+
+	/* The parent tile-component. */
+	struct jpc_enc_tcmpt_s *tcmpt;
+
+} jpc_enc_rlvl_t;
+
+/* Encoder per-tile-component state information. */
+
+typedef struct jpc_enc_tcmpt_s {
+
+	/* The number of resolution levels. */
+	int numrlvls;
+
+	/* The per-resolution-level information. */
+	jpc_enc_rlvl_t *rlvls;
+
+	/* The tile-component data. */
+	jas_matrix_t *data;
+
+	/* The QMFB. */
+	int qmfbid;
+
+	/* The number of bands. */
+	int numbands;
+
+	/* The TSFB. */
+	jpc_tsfb_t *tsfb;
+
+	/* The synthesis energy weight (for the MCT). */
+	jpc_fix_t synweight;
+
+	/* The precinct width exponents. */
+	int prcwidthexpns[JPC_MAXRLVLS];
+
+	/* The precinct height exponents. */
+	int prcheightexpns[JPC_MAXRLVLS];
+
+	/* The code block width exponent. */
+	int cblkwidthexpn;
+
+	/* The code block height exponent. */
+	int cblkheightexpn;
+
+	/* Coding style (i.e., explicit precinct sizes). */
+	int csty;
+
+	/* Code block style. */
+	int cblksty;
+
+	/* The number of quantizer step sizes. */
+	int numstepsizes;
+
+	/* The encoded quantizer step sizes. */
+	uint_fast16_t stepsizes[JPC_MAXBANDS];
+
+	/* The parent tile. */
+	struct jpc_enc_tile_s *tile;
+
+} jpc_enc_tcmpt_t;
+
+/* Encoder per-tile state information. */
+
+typedef struct jpc_enc_tile_s {
+
+	/* The tile number. */
+	uint_fast32_t tileno;
+
+	/* The x-coordinate of the top-left corner of the tile measured with
+	  respect to the reference grid. */
+	uint_fast32_t tlx;
+
+	/* The y-coordinate of the top-left corner of the tile measured with
+	  respect to the reference grid. */
+	uint_fast32_t tly;
+
+	/* The x-coordinate of the bottom-right corner of the tile measured
+	  with respect to the reference grid (plus one). */
+	uint_fast32_t brx;
+
+	/* The y-coordinate of the bottom-right corner of the tile measured
+	  with respect to the reference grid (plus one). */
+	uint_fast32_t bry;
+
+	/* The coding style. */
+	uint_fast8_t csty;
+
+	/* The progression order. */
+	uint_fast8_t prg;
+
+	/* The number of layers. */
+	int numlyrs;
+
+	/* The MCT to employ (if any). */
+	uint_fast8_t mctid;
+
+	/* The packet iterator (used to determine the order of packet
+	  generation). */
+	jpc_pi_t *pi;
+
+	/* The coding mode (i.e., integer or real). */
+	bool intmode;
+
+	/* The number of bytes to allocate to the various layers. */
+	uint_fast32_t *lyrsizes;
+
+	/* The number of tile-components. */
+	int numtcmpts;
+
+	/* The per tile-component information. */
+	jpc_enc_tcmpt_t *tcmpts;
+
+	/* The raw (i.e., uncompressed) size of this tile. */
+	uint_fast32_t rawsize;
+
+} jpc_enc_tile_t;
+
+/* Encoder class. */
+
+typedef struct jpc_enc_s {
+
+	/* The image being encoded. */
+	jas_image_t *image;
+
+	/* The output stream. */
+	jas_stream_t *out;
+
+	/* The coding parameters. */
+	jpc_enc_cp_t *cp;
+
+	/* The tile currently being processed. */
+	jpc_enc_tile_t *curtile;
+
+	/* The code stream state. */
+	jpc_cstate_t *cstate;
+
+	/* The number of bytes output so far. */
+	uint_fast32_t len;
+
+	/* The number of bytes available for the main body of the code stream. */
+	/* This is used for rate allocation purposes. */
+	uint_fast32_t mainbodysize;
+
+	/* The marker segment currently being processed. */
+	/* This member is a convenience for making cleanup easier. */
+	jpc_ms_t *mrk;
+
+	/* The stream used to temporarily hold tile-part data. */
+	jas_stream_t *tmpstream;
+
+} jpc_enc_t;
+
+#endif
diff --git a/src/libjasper/jpc/jpc_fix.h b/src/libjasper/jpc/jpc_fix.h
new file mode 100644
index 0000000..d2b88ed
--- /dev/null
+++ b/src/libjasper/jpc/jpc_fix.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Fixed-Point Number Class
+ *
+ * $Id: jpc_fix.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_FIX_H
+#define JPC_FIX_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_fix.h"
+
+/******************************************************************************\
+* Basic parameters of the fixed-point type.
+\******************************************************************************/
+
+/* The integral type used to represent a fixed-point number.  This
+  type must be capable of representing values from -(2^31) to 2^31-1
+  (inclusive). */
+typedef int_fast32_t jpc_fix_t;
+
+/* The integral type used to respresent higher-precision intermediate results.
+  This type should be capable of representing values from -(2^63) to 2^63-1
+  (inclusive). */
+typedef int_fast64_t jpc_fix_big_t;
+
+/* The number of bits used for the fractional part of a fixed-point number. */
+#define JPC_FIX_FRACBITS	13
+
+/******************************************************************************\
+* Instantiations of the generic fixed-point number macros for the
+* parameters given above.  (Too bad C does not support templates, eh?)
+* The purpose of these macros is self-evident if one examines the
+* corresponding macros in the jasper/jas_fix.h header file.
+\******************************************************************************/
+
+#define	JPC_FIX_ZERO	JAS_FIX_ZERO(jpc_fix_t, JPC_FIX_FRACBITS)
+#define	JPC_FIX_ONE		JAS_FIX_ONE(jpc_fix_t, JPC_FIX_FRACBITS)
+#define	JPC_FIX_HALF	JAS_FIX_HALF(jpc_fix_t, JPC_FIX_FRACBITS)
+
+#define jpc_inttofix(x)	JAS_INTTOFIX(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define jpc_fixtoint(x)	JAS_FIXTOINT(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define jpc_fixtodbl(x)	JAS_FIXTODBL(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define jpc_dbltofix(x)	JAS_DBLTOFIX(jpc_fix_t, JPC_FIX_FRACBITS, x)
+
+#define	jpc_fix_add(x, y)	JAS_FIX_ADD(jpc_fix_t, JPC_FIX_FRACBITS, x, y)
+#define	jpc_fix_sub(x, y)	JAS_FIX_SUB(jpc_fix_t, JPC_FIX_FRACBITS, x, y)
+#define	jpc_fix_mul(x, y) \
+	JAS_FIX_MUL(jpc_fix_t, JPC_FIX_FRACBITS, jpc_fix_big_t, x, y)
+#define	jpc_fix_mulbyint(x, y) \
+	JAS_FIX_MULBYINT(jpc_fix_t, JPC_FIX_FRACBITS, x, y)
+#define	jpc_fix_div(x, y) \
+	JAS_FIX_DIV(jpc_fix_t, JPC_FIX_FRACBITS, jpc_fix_big_t, x, y)
+#define	jpc_fix_neg(x)		JAS_FIX_NEG(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define	jpc_fix_asl(x, n)	JAS_FIX_ASL(jpc_fix_t, JPC_FIX_FRACBITS, x, n)
+#define	jpc_fix_asr(x, n)	JAS_FIX_ASR(jpc_fix_t, JPC_FIX_FRACBITS, x, n)
+
+#define jpc_fix_pluseq(x, y)	JAS_FIX_PLUSEQ(jpc_fix_t, JPC_FIX_FRACBITS, x, y)
+#define jpc_fix_minuseq(x, y)	JAS_FIX_MINUSEQ(jpc_fix_t, JPC_FIX_FRACBITS, x, y)
+#define	jpc_fix_muleq(x, y)	\
+	JAS_FIX_MULEQ(jpc_fix_t, JPC_FIX_FRACBITS, jpc_fix_big_t, x, y)
+
+#define	jpc_fix_abs(x)		JAS_FIX_ABS(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define	jpc_fix_isint(x)	JAS_FIX_ISINT(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define jpc_fix_sgn(x)		JAS_FIX_SGN(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define	jpc_fix_round(x)	JAS_FIX_ROUND(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define	jpc_fix_floor(x)	JAS_FIX_FLOOR(jpc_fix_t, JPC_FIX_FRACBITS, x)
+#define jpc_fix_trunc(x)	JAS_FIX_TRUNC(jpc_fix_t, JPC_FIX_FRACBITS, x)
+
+/******************************************************************************\
+* Extra macros for convenience.
+\******************************************************************************/
+
+/* Compute the sum of three fixed-point numbers. */
+#define jpc_fix_add3(x, y, z)	jpc_fix_add(jpc_fix_add(x, y), z)
+
+#endif
diff --git a/src/libjasper/jpc/jpc_flt.h b/src/libjasper/jpc/jpc_flt.h
new file mode 100644
index 0000000..f942f3d
--- /dev/null
+++ b/src/libjasper/jpc/jpc_flt.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Floating-Point Class
+ *
+ * $Id: jpc_flt.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_FLT_H
+#define JPC_FLT_H
+
+#include <float.h>
+
+/* The code ought to be modified so this type is not used at all. */
+/* Very few places in the code rely on floating-point arithmetic, aside
+  from conversions in printf's. */
+typedef double jpc_flt_t;
+
+#endif
diff --git a/src/libjasper/jpc/jpc_math.c b/src/libjasper/jpc/jpc_math.c
new file mode 100644
index 0000000..f268554
--- /dev/null
+++ b/src/libjasper/jpc/jpc_math.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Math Library
+ *
+ * $Id: jpc_math.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "jpc_math.h"
+
+/******************************************************************************\
+* Miscellaneous Functions
+\******************************************************************************/
+
+/* Calculate the integer quantity floor(log2(x)), where x is a positive
+  integer. */
+int jpc_floorlog2(int x)
+{
+	int y;
+
+	/* The argument must be positive. */
+	assert(x > 0);
+
+	y = 0;
+	while (x > 1) {
+		x >>= 1;
+		++y;
+	}
+	return y;
+}
+
+/* Calculate the bit position of the first leading one in a nonnegative
+  integer. */
+/* This function is the basically the same as ceillog2(x), except that the
+  allowable range for x is slightly different. */
+int jpc_firstone(int x)
+{
+	int n;
+
+	/* The argument must be nonnegative. */
+	assert(x >= 0);
+
+	n = -1;
+	while (x > 0) {
+		x >>= 1;
+		++n;
+	}
+	return n;
+}
diff --git a/src/libjasper/jpc/jpc_math.h b/src/libjasper/jpc/jpc_math.h
new file mode 100644
index 0000000..e8e0978
--- /dev/null
+++ b/src/libjasper/jpc/jpc_math.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef	JPC_MATH_H
+#define	JPC_MATH_H
+
+/******************************************************************************\
+* Includes
+\******************************************************************************/
+
+#include	<assert.h>
+
+/******************************************************************************\
+* Macros
+\******************************************************************************/
+
+/* Compute the floor of the quotient of two integers. */
+#define	JPC_FLOORDIV(x, y)	((x) / (y))
+
+/* Compute the ceiling of the quotient of two integers. */
+#define	JPC_CEILDIV(x, y)	(((x) + (y) - 1) / (y))
+
+/* Compute the floor of (x / 2^y). */
+#define	JPC_FLOORDIVPOW2(x, y)	((x) >> (y))
+
+/* Compute the ceiling of (x / 2^y). */
+#define	JPC_CEILDIVPOW2(x, y)	(((x) + (1 << (y)) - 1) >> (y))
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Calculate the bit position of the first leading one in a nonnegative
+  integer. */
+int jpc_firstone(int x);
+
+/* Calculate the integer quantity floor(log2(x)), where x is a positive
+  integer. */
+int jpc_floorlog2(int x);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_mct.c b/src/libjasper/jpc/jpc_mct.c
new file mode 100644
index 0000000..c5b33e4
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mct.c
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Multicomponent Transform Code
+ *
+ * $Id: jpc_mct.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+
+#include "jasper/jas_seq.h"
+
+#include "jpc_fix.h"
+#include "jpc_mct.h"
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+/* Compute the forward RCT. */
+
+void jpc_rct(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2)
+{
+	int numrows;
+	int numcols;
+	int i;
+	int j;
+	jpc_fix_t *c0p;
+	jpc_fix_t *c1p;
+	jpc_fix_t *c2p;
+
+	numrows = jas_matrix_numrows(c0);
+	numcols = jas_matrix_numcols(c0);
+
+	/* All three matrices must have the same dimensions. */
+	assert(jas_matrix_numrows(c1) == numrows && jas_matrix_numcols(c1) == numcols
+	  && jas_matrix_numrows(c2) == numrows && jas_matrix_numcols(c2) == numcols);
+
+	for (i = 0; i < numrows; i++) {
+		c0p = jas_matrix_getref(c0, i, 0);
+		c1p = jas_matrix_getref(c1, i, 0);
+		c2p = jas_matrix_getref(c2, i, 0);
+		for (j = numcols; j > 0; --j) {
+			int r;
+			int g;
+			int b;
+			int y;
+			int u;
+			int v;
+			r = *c0p;
+			g = *c1p;
+			b = *c2p;
+			y = (r + (g << 1) + b) >> 2;
+			u = b - g;
+			v = r - g;
+			*c0p++ = y;
+			*c1p++ = u;
+			*c2p++ = v;
+		}
+	}
+}
+
+/* Compute the inverse RCT. */
+
+void jpc_irct(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2)
+{
+	int numrows;
+	int numcols;
+	int i;
+	int j;
+	jpc_fix_t *c0p;
+	jpc_fix_t *c1p;
+	jpc_fix_t *c2p;
+
+	numrows = jas_matrix_numrows(c0);
+	numcols = jas_matrix_numcols(c0);
+
+	/* All three matrices must have the same dimensions. */
+	assert(jas_matrix_numrows(c1) == numrows && jas_matrix_numcols(c1) == numcols
+	  && jas_matrix_numrows(c2) == numrows && jas_matrix_numcols(c2) == numcols);
+
+	for (i = 0; i < numrows; i++) {
+		c0p = jas_matrix_getref(c0, i, 0);
+		c1p = jas_matrix_getref(c1, i, 0);
+		c2p = jas_matrix_getref(c2, i, 0);
+		for (j = numcols; j > 0; --j) {
+			int r;
+			int g;
+			int b;
+			int y;
+			int u;
+			int v;
+			y = *c0p;
+			u = *c1p;
+			v = *c2p;
+			g = y - ((u + v) >> 2);
+			r = v + g;
+			b = u + g;
+			*c0p++ = r;
+			*c1p++ = g;
+			*c2p++ = b;
+		}
+	}
+}
+
+void jpc_ict(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2)
+{
+	int numrows;
+	int numcols;
+	int i;
+	int j;
+	jpc_fix_t r;
+	jpc_fix_t g;
+	jpc_fix_t b;
+	jpc_fix_t y;
+	jpc_fix_t u;
+	jpc_fix_t v;
+	jpc_fix_t *c0p;
+	jpc_fix_t *c1p;
+	jpc_fix_t *c2p;
+
+	numrows = jas_matrix_numrows(c0);
+	assert(jas_matrix_numrows(c1) == numrows && jas_matrix_numrows(c2) == numrows);
+	numcols = jas_matrix_numcols(c0);
+	assert(jas_matrix_numcols(c1) == numcols && jas_matrix_numcols(c2) == numcols);
+	for (i = 0; i < numrows; ++i) {
+		c0p = jas_matrix_getref(c0, i, 0);
+		c1p = jas_matrix_getref(c1, i, 0);
+		c2p = jas_matrix_getref(c2, i, 0);
+		for (j = numcols; j > 0; --j) {
+			r = *c0p;
+			g = *c1p;
+			b = *c2p;
+			y = jpc_fix_add3(jpc_fix_mul(jpc_dbltofix(0.299), r), jpc_fix_mul(jpc_dbltofix(0.587), g),
+			  jpc_fix_mul(jpc_dbltofix(0.114), b));
+			u = jpc_fix_add3(jpc_fix_mul(jpc_dbltofix(-0.16875), r), jpc_fix_mul(jpc_dbltofix(-0.33126), g),
+			  jpc_fix_mul(jpc_dbltofix(0.5), b));
+			v = jpc_fix_add3(jpc_fix_mul(jpc_dbltofix(0.5), r), jpc_fix_mul(jpc_dbltofix(-0.41869), g),
+			  jpc_fix_mul(jpc_dbltofix(-0.08131), b));
+			*c0p++ = y;
+			*c1p++ = u;
+			*c2p++ = v;
+		}
+	}
+}
+
+void jpc_iict(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2)
+{
+	int numrows;
+	int numcols;
+	int i;
+	int j;
+	jpc_fix_t r;
+	jpc_fix_t g;
+	jpc_fix_t b;
+	jpc_fix_t y;
+	jpc_fix_t u;
+	jpc_fix_t v;
+	jpc_fix_t *c0p;
+	jpc_fix_t *c1p;
+	jpc_fix_t *c2p;
+
+	numrows = jas_matrix_numrows(c0);
+	assert(jas_matrix_numrows(c1) == numrows && jas_matrix_numrows(c2) == numrows);
+	numcols = jas_matrix_numcols(c0);
+	assert(jas_matrix_numcols(c1) == numcols && jas_matrix_numcols(c2) == numcols);
+	for (i = 0; i < numrows; ++i) {
+		c0p = jas_matrix_getref(c0, i, 0);
+		c1p = jas_matrix_getref(c1, i, 0);
+		c2p = jas_matrix_getref(c2, i, 0);
+		for (j = numcols; j > 0; --j) {
+			y = *c0p;
+			u = *c1p;
+			v = *c2p;
+			r = jpc_fix_add(y, jpc_fix_mul(jpc_dbltofix(1.402), v));
+			g = jpc_fix_add3(y, jpc_fix_mul(jpc_dbltofix(-0.34413), u),
+			  jpc_fix_mul(jpc_dbltofix(-0.71414), v));
+			b = jpc_fix_add(y, jpc_fix_mul(jpc_dbltofix(1.772), u));
+			*c0p++ = r;
+			*c1p++ = g;
+			*c2p++ = b;
+		}
+	}
+}
+
+jpc_fix_t jpc_mct_getsynweight(int mctid, int cmptno)
+{
+	jpc_fix_t synweight;
+
+	synweight = JPC_FIX_ONE;
+	switch (mctid) {
+	case JPC_MCT_RCT:
+		switch (cmptno) {
+		case 0:
+			synweight = jpc_dbltofix(sqrt(3.0));
+			break;
+		case 1:
+			synweight = jpc_dbltofix(sqrt(0.6875));
+			break;
+		case 2:
+			synweight = jpc_dbltofix(sqrt(0.6875));
+			break;
+		}
+		break;
+	case JPC_MCT_ICT:
+		switch (cmptno) {
+		case 0:
+			synweight = jpc_dbltofix(sqrt(3.0000));
+			break;
+		case 1:
+			synweight = jpc_dbltofix(sqrt(3.2584));
+			break;
+		case 2:
+			synweight = jpc_dbltofix(sqrt(2.4755));
+			break;
+		}
+		break;
+#if 0
+	default:
+		synweight = JPC_FIX_ONE;
+		break;
+#endif
+	}
+
+	return synweight;
+}
diff --git a/src/libjasper/jpc/jpc_mct.h b/src/libjasper/jpc/jpc_mct.h
new file mode 100644
index 0000000..7d0176b
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mct.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Multicomponent Transform Code
+ *
+ * $Id: jpc_mct.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_MCT_H
+#define JPC_MCT_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_seq.h"
+#include "jasper/jas_fix.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/*
+ * Multicomponent transform IDs.
+ */
+
+#define JPC_MCT_NONE	0
+#define JPC_MCT_ICT		1
+#define JPC_MCT_RCT		2
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Calculate the forward RCT. */
+void jpc_rct(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2);
+
+/* Calculate the inverse RCT. */
+void jpc_irct(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2);
+
+/* Calculate the forward ICT. */
+void jpc_ict(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2);
+
+/* Calculate the inverse ICT. */
+void jpc_iict(jas_matrix_t *c0, jas_matrix_t *c1, jas_matrix_t *c2);
+
+/* Get the synthesis weight associated with a particular component. */
+jpc_fix_t jpc_mct_getsynweight(int mctid, int cmptno);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_mqcod.c b/src/libjasper/jpc/jpc_mqcod.c
new file mode 100644
index 0000000..f6149b3
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mqcod.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * MQ Arithmetic Coder
+ *
+ * $Id: jpc_mqcod.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_malloc.h"
+
+#include "jpc_mqcod.h"
+
+/******************************************************************************\
+* Data.
+\******************************************************************************/
+
+/* MQ coder per-state information. */
+
+jpc_mqstate_t jpc_mqstates[47 * 2] = {
+	{0x5601, 0, &jpc_mqstates[ 2], &jpc_mqstates[ 3]},
+	{0x5601, 1, &jpc_mqstates[ 3], &jpc_mqstates[ 2]},
+	{0x3401, 0, &jpc_mqstates[ 4], &jpc_mqstates[12]},
+	{0x3401, 1, &jpc_mqstates[ 5], &jpc_mqstates[13]},
+	{0x1801, 0, &jpc_mqstates[ 6], &jpc_mqstates[18]},
+	{0x1801, 1, &jpc_mqstates[ 7], &jpc_mqstates[19]},
+	{0x0ac1, 0, &jpc_mqstates[ 8], &jpc_mqstates[24]},
+	{0x0ac1, 1, &jpc_mqstates[ 9], &jpc_mqstates[25]},
+	{0x0521, 0, &jpc_mqstates[10], &jpc_mqstates[58]},
+	{0x0521, 1, &jpc_mqstates[11], &jpc_mqstates[59]},
+	{0x0221, 0, &jpc_mqstates[76], &jpc_mqstates[66]},
+	{0x0221, 1, &jpc_mqstates[77], &jpc_mqstates[67]},
+	{0x5601, 0, &jpc_mqstates[14], &jpc_mqstates[13]},
+	{0x5601, 1, &jpc_mqstates[15], &jpc_mqstates[12]},
+	{0x5401, 0, &jpc_mqstates[16], &jpc_mqstates[28]},
+	{0x5401, 1, &jpc_mqstates[17], &jpc_mqstates[29]},
+	{0x4801, 0, &jpc_mqstates[18], &jpc_mqstates[28]},
+	{0x4801, 1, &jpc_mqstates[19], &jpc_mqstates[29]},
+	{0x3801, 0, &jpc_mqstates[20], &jpc_mqstates[28]},
+	{0x3801, 1, &jpc_mqstates[21], &jpc_mqstates[29]},
+	{0x3001, 0, &jpc_mqstates[22], &jpc_mqstates[34]},
+	{0x3001, 1, &jpc_mqstates[23], &jpc_mqstates[35]},
+	{0x2401, 0, &jpc_mqstates[24], &jpc_mqstates[36]},
+	{0x2401, 1, &jpc_mqstates[25], &jpc_mqstates[37]},
+	{0x1c01, 0, &jpc_mqstates[26], &jpc_mqstates[40]},
+	{0x1c01, 1, &jpc_mqstates[27], &jpc_mqstates[41]},
+	{0x1601, 0, &jpc_mqstates[58], &jpc_mqstates[42]},
+	{0x1601, 1, &jpc_mqstates[59], &jpc_mqstates[43]},
+	{0x5601, 0, &jpc_mqstates[30], &jpc_mqstates[29]},
+	{0x5601, 1, &jpc_mqstates[31], &jpc_mqstates[28]},
+	{0x5401, 0, &jpc_mqstates[32], &jpc_mqstates[28]},
+	{0x5401, 1, &jpc_mqstates[33], &jpc_mqstates[29]},
+	{0x5101, 0, &jpc_mqstates[34], &jpc_mqstates[30]},
+	{0x5101, 1, &jpc_mqstates[35], &jpc_mqstates[31]},
+	{0x4801, 0, &jpc_mqstates[36], &jpc_mqstates[32]},
+	{0x4801, 1, &jpc_mqstates[37], &jpc_mqstates[33]},
+	{0x3801, 0, &jpc_mqstates[38], &jpc_mqstates[34]},
+	{0x3801, 1, &jpc_mqstates[39], &jpc_mqstates[35]},
+	{0x3401, 0, &jpc_mqstates[40], &jpc_mqstates[36]},
+	{0x3401, 1, &jpc_mqstates[41], &jpc_mqstates[37]},
+	{0x3001, 0, &jpc_mqstates[42], &jpc_mqstates[38]},
+	{0x3001, 1, &jpc_mqstates[43], &jpc_mqstates[39]},
+	{0x2801, 0, &jpc_mqstates[44], &jpc_mqstates[38]},
+	{0x2801, 1, &jpc_mqstates[45], &jpc_mqstates[39]},
+	{0x2401, 0, &jpc_mqstates[46], &jpc_mqstates[40]},
+	{0x2401, 1, &jpc_mqstates[47], &jpc_mqstates[41]},
+	{0x2201, 0, &jpc_mqstates[48], &jpc_mqstates[42]},
+	{0x2201, 1, &jpc_mqstates[49], &jpc_mqstates[43]},
+	{0x1c01, 0, &jpc_mqstates[50], &jpc_mqstates[44]},
+	{0x1c01, 1, &jpc_mqstates[51], &jpc_mqstates[45]},
+	{0x1801, 0, &jpc_mqstates[52], &jpc_mqstates[46]},
+	{0x1801, 1, &jpc_mqstates[53], &jpc_mqstates[47]},
+	{0x1601, 0, &jpc_mqstates[54], &jpc_mqstates[48]},
+	{0x1601, 1, &jpc_mqstates[55], &jpc_mqstates[49]},
+	{0x1401, 0, &jpc_mqstates[56], &jpc_mqstates[50]},
+	{0x1401, 1, &jpc_mqstates[57], &jpc_mqstates[51]},
+	{0x1201, 0, &jpc_mqstates[58], &jpc_mqstates[52]},
+	{0x1201, 1, &jpc_mqstates[59], &jpc_mqstates[53]},
+	{0x1101, 0, &jpc_mqstates[60], &jpc_mqstates[54]},
+	{0x1101, 1, &jpc_mqstates[61], &jpc_mqstates[55]},
+	{0x0ac1, 0, &jpc_mqstates[62], &jpc_mqstates[56]},
+	{0x0ac1, 1, &jpc_mqstates[63], &jpc_mqstates[57]},
+	{0x09c1, 0, &jpc_mqstates[64], &jpc_mqstates[58]},
+	{0x09c1, 1, &jpc_mqstates[65], &jpc_mqstates[59]},
+	{0x08a1, 0, &jpc_mqstates[66], &jpc_mqstates[60]},
+	{0x08a1, 1, &jpc_mqstates[67], &jpc_mqstates[61]},
+	{0x0521, 0, &jpc_mqstates[68], &jpc_mqstates[62]},
+	{0x0521, 1, &jpc_mqstates[69], &jpc_mqstates[63]},
+	{0x0441, 0, &jpc_mqstates[70], &jpc_mqstates[64]},
+	{0x0441, 1, &jpc_mqstates[71], &jpc_mqstates[65]},
+	{0x02a1, 0, &jpc_mqstates[72], &jpc_mqstates[66]},
+	{0x02a1, 1, &jpc_mqstates[73], &jpc_mqstates[67]},
+	{0x0221, 0, &jpc_mqstates[74], &jpc_mqstates[68]},
+	{0x0221, 1, &jpc_mqstates[75], &jpc_mqstates[69]},
+	{0x0141, 0, &jpc_mqstates[76], &jpc_mqstates[70]},
+	{0x0141, 1, &jpc_mqstates[77], &jpc_mqstates[71]},
+	{0x0111, 0, &jpc_mqstates[78], &jpc_mqstates[72]},
+	{0x0111, 1, &jpc_mqstates[79], &jpc_mqstates[73]},
+	{0x0085, 0, &jpc_mqstates[80], &jpc_mqstates[74]},
+	{0x0085, 1, &jpc_mqstates[81], &jpc_mqstates[75]},
+	{0x0049, 0, &jpc_mqstates[82], &jpc_mqstates[76]},
+	{0x0049, 1, &jpc_mqstates[83], &jpc_mqstates[77]},
+	{0x0025, 0, &jpc_mqstates[84], &jpc_mqstates[78]},
+	{0x0025, 1, &jpc_mqstates[85], &jpc_mqstates[79]},
+	{0x0015, 0, &jpc_mqstates[86], &jpc_mqstates[80]},
+	{0x0015, 1, &jpc_mqstates[87], &jpc_mqstates[81]},
+	{0x0009, 0, &jpc_mqstates[88], &jpc_mqstates[82]},
+	{0x0009, 1, &jpc_mqstates[89], &jpc_mqstates[83]},
+	{0x0005, 0, &jpc_mqstates[90], &jpc_mqstates[84]},
+	{0x0005, 1, &jpc_mqstates[91], &jpc_mqstates[85]},
+	{0x0001, 0, &jpc_mqstates[90], &jpc_mqstates[86]},
+	{0x0001, 1, &jpc_mqstates[91], &jpc_mqstates[87]},
+	{0x5601, 0, &jpc_mqstates[92], &jpc_mqstates[92]},
+	{0x5601, 1, &jpc_mqstates[93], &jpc_mqstates[93]},
+};
diff --git a/src/libjasper/jpc/jpc_mqcod.h b/src/libjasper/jpc/jpc_mqcod.h
new file mode 100644
index 0000000..ac32b8a
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mqcod.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * MQ Arithmetic Coder
+ *
+ * $Id: jpc_mqcod.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_MQCOD_H
+#define JPC_MQCOD_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_types.h"
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/*
+ * MQ coder context information.
+ */
+
+typedef struct {
+
+	/* The most probable symbol (MPS). */
+	int mps;
+
+	/* The state index. */
+	int_fast16_t ind;
+
+} jpc_mqctx_t;
+
+/*
+ * MQ coder state table entry.
+ */
+
+typedef struct jpc_mqstate_s {
+
+	/* The Qe value. */
+	uint_fast16_t qeval;
+
+	/* The MPS. */
+	int mps;
+
+	/* The NMPS state. */
+	struct jpc_mqstate_s *nmps;
+
+	/* The NLPS state. */
+	struct jpc_mqstate_s *nlps;
+
+} jpc_mqstate_t;
+
+/******************************************************************************\
+* Data.
+\******************************************************************************/
+
+/* The state table for the MQ coder. */
+extern jpc_mqstate_t jpc_mqstates[];
+
+#endif
diff --git a/src/libjasper/jpc/jpc_mqdec.c b/src/libjasper/jpc/jpc_mqdec.c
new file mode 100644
index 0000000..74e6b33
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mqdec.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * MQ Arithmetic Decoder
+ *
+ * $Id: jpc_mqdec.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_mqdec.h"
+
+/******************************************************************************\
+* Local macros.
+\******************************************************************************/
+
+#if defined(DEBUG)
+#define	MQDEC_CALL(n, x) \
+	((jas_getdbglevel() >= (n)) ? ((void)(x)) : ((void)0))
+#else
+#define	MQDEC_CALL(n, x)
+#endif
+
+/******************************************************************************\
+* Local function prototypes.
+\******************************************************************************/
+
+static void jpc_mqdec_bytein(jpc_mqdec_t *mqdec);
+
+/******************************************************************************\
+* Code for creation and destruction of a MQ decoder.
+\******************************************************************************/
+
+/* Create a MQ decoder. */
+jpc_mqdec_t *jpc_mqdec_create(int maxctxs, jas_stream_t *in)
+{
+	jpc_mqdec_t *mqdec;
+
+	/* There must be at least one context. */
+	assert(maxctxs > 0);
+
+	/* Allocate memory for the MQ decoder. */
+	if (!(mqdec = jas_malloc(sizeof(jpc_mqdec_t)))) {
+		goto error;
+	}
+	mqdec->in = in;
+	mqdec->maxctxs = maxctxs;
+	/* Allocate memory for the per-context state information. */
+	if (!(mqdec->ctxs = jas_malloc(mqdec->maxctxs * sizeof(jpc_mqstate_t *)))) {
+		goto error;
+	}
+	/* Set the current context to the first context. */
+	mqdec->curctx = mqdec->ctxs;
+
+	/* If an input stream has been associated with the MQ decoder,
+	  initialize the decoder state from the stream. */
+	if (mqdec->in) {
+		jpc_mqdec_init(mqdec);
+	}
+	/* Initialize the per-context state information. */
+	jpc_mqdec_setctxs(mqdec, 0, 0);
+
+	return mqdec;
+
+error:
+	/* Oops...  Something has gone wrong. */
+	if (mqdec) {
+		jpc_mqdec_destroy(mqdec);
+	}
+	return 0;
+}
+
+/* Destroy a MQ decoder. */
+void jpc_mqdec_destroy(jpc_mqdec_t *mqdec)
+{
+	if (mqdec->ctxs) {
+		jas_free(mqdec->ctxs);
+	}
+	jas_free(mqdec);
+}
+
+/******************************************************************************\
+* Code for initialization of a MQ decoder.
+\******************************************************************************/
+
+/* Initialize the state of a MQ decoder. */
+
+void jpc_mqdec_init(jpc_mqdec_t *mqdec)
+{
+	int c;
+
+	mqdec->eof = 0;
+	mqdec->creg = 0;
+	/* Get the next byte from the input stream. */
+	if ((c = jas_stream_getc(mqdec->in)) == EOF) {
+		/* We have encountered an I/O error or EOF. */
+		c = 0xff;
+		mqdec->eof = 1;
+	}
+	mqdec->inbuffer = c;
+	mqdec->creg += mqdec->inbuffer << 16;
+	jpc_mqdec_bytein(mqdec);
+	mqdec->creg <<= 7;
+	mqdec->ctreg -= 7;
+	mqdec->areg = 0x8000;
+}
+
+/* Set the input stream for a MQ decoder. */
+
+void jpc_mqdec_setinput(jpc_mqdec_t *mqdec, jas_stream_t *in)
+{
+	mqdec->in = in;
+}
+
+/* Initialize one or more contexts. */
+
+void jpc_mqdec_setctxs(jpc_mqdec_t *mqdec, int numctxs, jpc_mqctx_t *ctxs)
+{
+	jpc_mqstate_t **ctx;
+	int n;
+
+	ctx = mqdec->ctxs;
+	n = JAS_MIN(mqdec->maxctxs, numctxs);
+	while (--n >= 0) {
+		*ctx = &jpc_mqstates[2 * ctxs->ind + ctxs->mps];
+		++ctx;
+		++ctxs;
+	}
+	n = mqdec->maxctxs - numctxs;
+	while (--n >= 0) {
+		*ctx = &jpc_mqstates[0];
+		++ctx;
+	}
+}
+
+/* Initialize a context. */
+
+void jpc_mqdec_setctx(jpc_mqdec_t *mqdec, int ctxno, jpc_mqctx_t *ctx)
+{
+	jpc_mqstate_t **ctxi;
+	ctxi = &mqdec->ctxs[ctxno];
+	*ctxi = &jpc_mqstates[2 * ctx->ind + ctx->mps];
+}
+
+/******************************************************************************\
+* Code for decoding a bit.
+\******************************************************************************/
+
+/* Decode a bit. */
+
+int jpc_mqdec_getbit_func(register jpc_mqdec_t *mqdec)
+{
+	int bit;
+	JAS_DBGLOG(100, ("jpc_mqdec_getbit_func(%p)\n", mqdec));
+	MQDEC_CALL(100, jpc_mqdec_dump(mqdec, stderr));
+	bit = jpc_mqdec_getbit_macro(mqdec);
+	MQDEC_CALL(100, jpc_mqdec_dump(mqdec, stderr));
+	JAS_DBGLOG(100, ("ctx = %d, decoded %d\n", mqdec->curctx -
+	  mqdec->ctxs, bit));
+	return bit;
+}
+
+/* Apply MPS_EXCHANGE algorithm (with RENORMD). */
+int jpc_mqdec_mpsexchrenormd(register jpc_mqdec_t *mqdec)
+{
+	int ret;
+	register jpc_mqstate_t *state = *mqdec->curctx;
+	jpc_mqdec_mpsexchange(mqdec->areg, state->qeval, mqdec->curctx, ret);
+	jpc_mqdec_renormd(mqdec->areg, mqdec->creg, mqdec->ctreg, mqdec->in,
+	  mqdec->eof, mqdec->inbuffer);
+	return ret;
+}
+
+/* Apply LPS_EXCHANGE algorithm (with RENORMD). */
+int jpc_mqdec_lpsexchrenormd(register jpc_mqdec_t *mqdec)
+{
+	int ret;
+	register jpc_mqstate_t *state = *mqdec->curctx;
+	jpc_mqdec_lpsexchange(mqdec->areg, state->qeval, mqdec->curctx, ret);
+	jpc_mqdec_renormd(mqdec->areg, mqdec->creg, mqdec->ctreg, mqdec->in,
+	  mqdec->eof, mqdec->inbuffer);
+	return ret;
+}
+
+/******************************************************************************\
+* Support code.
+\******************************************************************************/
+
+/* Apply the BYTEIN algorithm. */
+static void jpc_mqdec_bytein(jpc_mqdec_t *mqdec)
+{
+	int c;
+	unsigned char prevbuf;
+
+	if (!mqdec->eof) {
+		if ((c = jas_stream_getc(mqdec->in)) == EOF) {
+			mqdec->eof = 1;
+			c = 0xff;
+		}
+		prevbuf = mqdec->inbuffer;
+		mqdec->inbuffer = c;
+		if (prevbuf == 0xff) {
+			if (c > 0x8f) {
+				mqdec->creg += 0xff00;
+				mqdec->ctreg = 8;
+			} else {
+				mqdec->creg += c << 9;
+				mqdec->ctreg = 7;
+			}
+		} else {
+			mqdec->creg += c << 8;
+			mqdec->ctreg = 8;
+		}
+	} else {
+		mqdec->creg += 0xff00;
+		mqdec->ctreg = 8;
+	}
+}
+
+/******************************************************************************\
+* Code for debugging.
+\******************************************************************************/
+
+/* Dump a MQ decoder to a stream for debugging. */
+
+void jpc_mqdec_dump(jpc_mqdec_t *mqdec, FILE *out)
+{
+	fprintf(out, "MQDEC A = %08lx, C = %08lx, CT=%08lx, ",
+	  (unsigned long) mqdec->areg, (unsigned long) mqdec->creg,
+	  (unsigned long) mqdec->ctreg);
+	fprintf(out, "CTX = %d, ", mqdec->curctx - mqdec->ctxs);
+	fprintf(out, "IND %d, MPS %d, QEVAL %x\n", *mqdec->curctx -
+	  jpc_mqstates, (*mqdec->curctx)->mps, (*mqdec->curctx)->qeval);
+}
diff --git a/src/libjasper/jpc/jpc_mqdec.h b/src/libjasper/jpc/jpc_mqdec.h
new file mode 100644
index 0000000..bac7b51
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mqdec.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * MQ Arithmetic Decoder
+ *
+ * $Id: jpc_mqdec.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_MQDEC_H
+#define JPC_MQDEC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_stream.h"
+
+#include "jpc_mqcod.h"
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* MQ arithmetic decoder. */
+
+typedef struct {
+
+	/* The C register. */
+	uint_fast32_t creg;
+
+	/* The A register. */
+	uint_fast32_t areg;
+
+	/* The CT register. */
+	uint_fast32_t ctreg;
+
+	/* The current context. */
+	jpc_mqstate_t **curctx;
+
+	/* The per-context information. */
+	jpc_mqstate_t **ctxs;
+
+	/* The maximum number of contexts. */
+	int maxctxs;
+
+	/* The stream from which to read data. */
+	jas_stream_t *in;
+
+	/* The last character read. */
+	uchar inbuffer;
+
+	/* The EOF indicator. */
+	int eof;
+
+} jpc_mqdec_t;
+
+/******************************************************************************\
+* Functions/macros for construction and destruction.
+\******************************************************************************/
+
+/* Create a MQ decoder. */
+jpc_mqdec_t *jpc_mqdec_create(int maxctxs, jas_stream_t *in);
+
+/* Destroy a MQ decoder. */
+void jpc_mqdec_destroy(jpc_mqdec_t *dec);
+
+/******************************************************************************\
+* Functions/macros for initialization.
+\******************************************************************************/
+
+/* Set the input stream associated with a MQ decoder. */
+void jpc_mqdec_setinput(jpc_mqdec_t *dec, jas_stream_t *in);
+
+/* Initialize a MQ decoder. */
+void jpc_mqdec_init(jpc_mqdec_t *dec);
+
+/******************************************************************************\
+* Functions/macros for manipulating contexts.
+\******************************************************************************/
+
+/* Set the current context for a MQ decoder. */
+#define	jpc_mqdec_setcurctx(dec, ctxno) \
+	((mqdec)->curctx = &(mqdec)->ctxs[ctxno]);
+
+/* Set the state information for a particular context of a MQ decoder. */
+void jpc_mqdec_setctx(jpc_mqdec_t *dec, int ctxno, jpc_mqctx_t *ctx);
+
+/* Set the state information for all contexts of a MQ decoder. */
+void jpc_mqdec_setctxs(jpc_mqdec_t *dec, int numctxs, jpc_mqctx_t *ctxs);
+
+/******************************************************************************\
+* Functions/macros for decoding bits.
+\******************************************************************************/
+
+/* Decode a symbol. */
+#if !defined(DEBUG)
+#define	jpc_mqdec_getbit(dec) \
+	jpc_mqdec_getbit_macro(dec)
+#else
+#define	jpc_mqdec_getbit(dec) \
+	jpc_mqdec_getbit_func(dec)
+#endif
+
+/* Decode a symbol (assuming an unskewed probability distribution). */
+#if !defined(DEBUG)
+#define	jpc_mqdec_getbitnoskew(dec) \
+	jpc_mqdec_getbit_macro(dec)
+#else
+#define	jpc_mqdec_getbitnoskew(dec) \
+	jpc_mqdec_getbit_func(dec)
+#endif
+
+/******************************************************************************\
+* Functions/macros for debugging.
+\******************************************************************************/
+
+/* Dump the MQ decoder state for debugging. */
+void jpc_mqdec_dump(jpc_mqdec_t *dec, FILE *out);
+
+/******************************************************************************\
+* EVERYTHING BELOW THIS POINT IS IMPLEMENTATION SPECIFIC AND NOT PART OF THE
+* APPLICATION INTERFACE.  DO NOT RELY ON ANY OF THE INTERNAL FUNCTIONS/MACROS
+* GIVEN BELOW.
+\******************************************************************************/
+
+#define	jpc_mqdec_getbit_macro(dec) \
+	((((dec)->areg -= (*(dec)->curctx)->qeval), \
+	  (dec)->creg >> 16 >= (*(dec)->curctx)->qeval) ? \
+	  ((((dec)->creg -= (*(dec)->curctx)->qeval << 16), \
+	  (dec)->areg & 0x8000) ?  (*(dec)->curctx)->mps : \
+	  jpc_mqdec_mpsexchrenormd(dec)) : \
+	  jpc_mqdec_lpsexchrenormd(dec))
+
+#define	jpc_mqdec_mpsexchange(areg, delta, curctx, bit) \
+{ \
+	if ((areg) < (delta)) { \
+		register jpc_mqstate_t *state = *(curctx); \
+		/* LPS decoded. */ \
+		(bit) = state->mps ^ 1; \
+		*(curctx) = state->nlps; \
+	} else { \
+		register jpc_mqstate_t *state = *(curctx); \
+		/* MPS decoded. */ \
+		(bit) = state->mps; \
+		*(curctx) = state->nmps; \
+	} \
+}
+
+#define	jpc_mqdec_lpsexchange(areg, delta, curctx, bit) \
+{ \
+	if ((areg) >= (delta)) { \
+		register jpc_mqstate_t *state = *(curctx); \
+		(areg) = (delta); \
+		(bit) = state->mps ^ 1; \
+		*(curctx) = state->nlps; \
+	} else { \
+		register jpc_mqstate_t *state = *(curctx); \
+		(areg) = (delta); \
+		(bit) = state->mps; \
+		*(curctx) = state->nmps; \
+	} \
+}
+
+#define	jpc_mqdec_renormd(areg, creg, ctreg, in, eof, inbuf) \
+{ \
+	do { \
+		if (!(ctreg)) { \
+			jpc_mqdec_bytein2(creg, ctreg, in, eof, inbuf); \
+		} \
+		(areg) <<= 1; \
+		(creg) <<= 1; \
+		--(ctreg); \
+	} while (!((areg) & 0x8000)); \
+}
+
+#define	jpc_mqdec_bytein2(creg, ctreg, in, eof, inbuf) \
+{ \
+	int c; \
+	unsigned char prevbuf; \
+	if (!(eof)) { \
+		if ((c = jas_stream_getc(in)) == EOF) { \
+			(eof) = 1; \
+			c = 0xff; \
+		} \
+		prevbuf = (inbuf); \
+		(inbuf) = c; \
+		if (prevbuf == 0xff) { \
+			if (c > 0x8f) { \
+				(creg) += 0xff00; \
+				(ctreg) = 8; \
+			} else { \
+				(creg) += c << 9; \
+				(ctreg) = 7; \
+			} \
+		} else { \
+			(creg) += c << 8; \
+			(ctreg) = 8; \
+		} \
+	} else { \
+		(creg) += 0xff00; \
+		(ctreg) = 8; \
+	} \
+}
+
+int jpc_mqdec_getbit_func(jpc_mqdec_t *dec);
+int jpc_mqdec_mpsexchrenormd(jpc_mqdec_t *dec);
+int jpc_mqdec_lpsexchrenormd(jpc_mqdec_t *dec);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_mqenc.c b/src/libjasper/jpc/jpc_mqenc.c
new file mode 100644
index 0000000..e3f4bea
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mqenc.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * MQ Arithmetic Encoder
+ *
+ * $Id: jpc_mqenc.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "jasper/jas_stream.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_mqenc.h"
+
+/******************************************************************************\
+* Macros
+\******************************************************************************/
+
+#if defined(DEBUG)
+#define	JPC_MQENC_CALL(n, x) \
+	((jas_getdbglevel() >= (n)) ? ((void)(x)) : ((void)0))
+#else
+#define	JPC_MQENC_CALL(n, x)
+#endif
+
+#define	jpc_mqenc_codemps9(areg, creg, ctreg, curctx, enc) \
+{ \
+	jpc_mqstate_t *state = *(curctx); \
+	(areg) -= state->qeval; \
+	if (!((areg) & 0x8000)) { \
+		if ((areg) < state->qeval) { \
+			(areg) = state->qeval; \
+		} else { \
+			(creg) += state->qeval; \
+		} \
+		*(curctx) = state->nmps; \
+		jpc_mqenc_renorme((areg), (creg), (ctreg), (enc)); \
+	} else { \
+		(creg) += state->qeval; \
+	} \
+}
+
+#define	jpc_mqenc_codelps2(areg, creg, ctreg, curctx, enc) \
+{ \
+	jpc_mqstate_t *state = *(curctx); \
+	(areg) -= state->qeval; \
+	if ((areg) < state->qeval) { \
+		(creg) += state->qeval; \
+	} else { \
+		(areg) = state->qeval; \
+	} \
+	*(curctx) = state->nlps; \
+	jpc_mqenc_renorme((areg), (creg), (ctreg), (enc)); \
+}
+
+#define	jpc_mqenc_renorme(areg, creg, ctreg, enc) \
+{ \
+	do { \
+		(areg) <<= 1; \
+		(creg) <<= 1; \
+		if (!--(ctreg)) { \
+			jpc_mqenc_byteout((areg), (creg), (ctreg), (enc)); \
+		} \
+	} while (!((areg) & 0x8000)); \
+}
+
+#define	jpc_mqenc_byteout(areg, creg, ctreg, enc) \
+{ \
+	if ((enc)->outbuf != 0xff) { \
+		if ((creg) & 0x8000000) { \
+			if (++((enc)->outbuf) == 0xff) { \
+				(creg) &= 0x7ffffff; \
+				jpc_mqenc_byteout2(enc); \
+				enc->outbuf = ((creg) >> 20) & 0xff; \
+				(creg) &= 0xfffff; \
+				(ctreg) = 7; \
+			} else { \
+				jpc_mqenc_byteout2(enc); \
+				enc->outbuf = ((creg) >> 19) & 0xff; \
+				(creg) &= 0x7ffff; \
+				(ctreg) = 8; \
+			} \
+		} else { \
+			jpc_mqenc_byteout2(enc); \
+			(enc)->outbuf = ((creg) >> 19) & 0xff; \
+			(creg) &= 0x7ffff; \
+			(ctreg) = 8; \
+		} \
+	} else { \
+		jpc_mqenc_byteout2(enc); \
+		(enc)->outbuf = ((creg) >> 20) & 0xff; \
+		(creg) &= 0xfffff; \
+		(ctreg) = 7; \
+	} \
+}
+
+#define	jpc_mqenc_byteout2(enc) \
+{ \
+	if (enc->outbuf >= 0) { \
+		if (jas_stream_putc(enc->out, (unsigned char)enc->outbuf) == EOF) { \
+			enc->err |= 1; \
+		} \
+	} \
+	enc->lastbyte = enc->outbuf; \
+}
+
+/******************************************************************************\
+* Local function protoypes.
+\******************************************************************************/
+
+static void jpc_mqenc_setbits(jpc_mqenc_t *mqenc);
+
+/******************************************************************************\
+* Code for creation and destruction of encoder.
+\******************************************************************************/
+
+/* Create a MQ encoder. */
+
+jpc_mqenc_t *jpc_mqenc_create(int maxctxs, jas_stream_t *out)
+{
+	jpc_mqenc_t *mqenc;
+
+	/* Allocate memory for the MQ encoder. */
+	if (!(mqenc = jas_malloc(sizeof(jpc_mqenc_t)))) {
+		goto error;
+	}
+	mqenc->out = out;
+	mqenc->maxctxs = maxctxs;
+
+	/* Allocate memory for the per-context state information. */
+	if (!(mqenc->ctxs = jas_malloc(mqenc->maxctxs * sizeof(jpc_mqstate_t *)))) {
+		goto error;
+	}
+
+	/* Set the current context to the first one. */
+	mqenc->curctx = mqenc->ctxs;
+
+	jpc_mqenc_init(mqenc);
+
+	/* Initialize the per-context state information to something sane. */
+	jpc_mqenc_setctxs(mqenc, 0, 0);
+
+	return mqenc;
+
+error:
+	if (mqenc) {
+		jpc_mqenc_destroy(mqenc);
+	}
+	return 0;
+}
+
+/* Destroy a MQ encoder. */
+
+void jpc_mqenc_destroy(jpc_mqenc_t *mqenc)
+{
+	if (mqenc->ctxs) {
+		jas_free(mqenc->ctxs);
+	}
+	jas_free(mqenc);
+}
+
+/******************************************************************************\
+* State initialization code.
+\******************************************************************************/
+
+/* Initialize the coding state of a MQ encoder. */
+
+void jpc_mqenc_init(jpc_mqenc_t *mqenc)
+{
+	mqenc->areg = 0x8000;
+	mqenc->outbuf = -1;
+	mqenc->creg = 0;
+	mqenc->ctreg = 12;
+	mqenc->lastbyte = -1;
+	mqenc->err = 0;
+}
+
+/* Initialize one or more contexts. */
+
+void jpc_mqenc_setctxs(jpc_mqenc_t *mqenc, int numctxs, jpc_mqctx_t *ctxs)
+{
+	jpc_mqstate_t **ctx;
+	int n;
+
+	ctx = mqenc->ctxs;
+	n = JAS_MIN(mqenc->maxctxs, numctxs);
+	while (--n >= 0) {
+		*ctx = &jpc_mqstates[2 * ctxs->ind + ctxs->mps];
+		++ctx;
+		++ctxs;
+	}
+	n = mqenc->maxctxs - numctxs;
+	while (--n >= 0) {
+		*ctx = &jpc_mqstates[0];
+		++ctx;
+	}
+
+}
+
+/* Get the coding state for a MQ encoder. */
+
+void jpc_mqenc_getstate(jpc_mqenc_t *mqenc, jpc_mqencstate_t *state)
+{
+	state->areg = mqenc->areg;
+	state->creg = mqenc->creg;
+	state->ctreg = mqenc->ctreg;
+	state->lastbyte = mqenc->lastbyte;
+}
+
+/******************************************************************************\
+* Code for coding symbols.
+\******************************************************************************/
+
+/* Encode a bit. */
+
+int jpc_mqenc_putbit_func(jpc_mqenc_t *mqenc, int bit)
+{
+	const jpc_mqstate_t *state;
+	JAS_DBGLOG(100, ("jpc_mqenc_putbit(%p, %d)\n", mqenc, bit));
+	JPC_MQENC_CALL(100, jpc_mqenc_dump(mqenc, stderr));
+
+	state = *(mqenc->curctx);
+
+	if (state->mps == bit) {
+		/* Apply the CODEMPS algorithm as defined in the standard. */
+		mqenc->areg -= state->qeval;
+		if (!(mqenc->areg & 0x8000)) {
+			jpc_mqenc_codemps2(mqenc);
+		} else {
+			mqenc->creg += state->qeval;
+		}
+	} else {
+		/* Apply the CODELPS algorithm as defined in the standard. */
+		jpc_mqenc_codelps2(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc->curctx, mqenc);
+	}
+
+	return jpc_mqenc_error(mqenc) ? (-1) : 0;
+}
+
+int jpc_mqenc_codemps2(jpc_mqenc_t *mqenc)
+{
+	/* Note: This function only performs part of the work associated with
+	the CODEMPS algorithm from the standard.  Some of the work is also
+	performed by the caller. */
+
+	jpc_mqstate_t *state = *(mqenc->curctx);
+	if (mqenc->areg < state->qeval) {
+		mqenc->areg = state->qeval;
+	} else {
+		mqenc->creg += state->qeval;
+	}
+	*mqenc->curctx = state->nmps;
+	jpc_mqenc_renorme(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc);
+	return jpc_mqenc_error(mqenc) ? (-1) : 0;
+}
+
+int jpc_mqenc_codelps(jpc_mqenc_t *mqenc)
+{
+	jpc_mqenc_codelps2(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc->curctx, mqenc);
+	return jpc_mqenc_error(mqenc) ? (-1) : 0;
+}
+
+/******************************************************************************\
+* Miscellaneous code.
+\******************************************************************************/
+
+/* Terminate the code word. */
+
+int jpc_mqenc_flush(jpc_mqenc_t *mqenc, int termmode)
+{
+	int_fast16_t k;
+
+	switch (termmode) {
+	case JPC_MQENC_PTERM:
+		k = 11 - mqenc->ctreg + 1;
+		while (k > 0) {
+			mqenc->creg <<= mqenc->ctreg;
+			mqenc->ctreg = 0;
+			jpc_mqenc_byteout(mqenc->areg, mqenc->creg, mqenc->ctreg,
+			  mqenc);
+			k -= mqenc->ctreg;
+		}
+		if (mqenc->outbuf != 0xff) {
+			jpc_mqenc_byteout(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc);
+		}
+		break;
+	case JPC_MQENC_DEFTERM:
+		jpc_mqenc_setbits(mqenc);
+		mqenc->creg <<= mqenc->ctreg;
+		jpc_mqenc_byteout(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc);
+		mqenc->creg <<= mqenc->ctreg;
+		jpc_mqenc_byteout(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc);
+		if (mqenc->outbuf != 0xff) {
+			jpc_mqenc_byteout(mqenc->areg, mqenc->creg, mqenc->ctreg, mqenc);
+		}
+		break;
+	default:
+		abort();
+		break;
+	}
+	return 0;
+}
+
+static void jpc_mqenc_setbits(jpc_mqenc_t *mqenc)
+{
+	uint_fast32_t tmp = mqenc->creg + mqenc->areg;
+	mqenc->creg |= 0xffff;
+	if (mqenc->creg >= tmp) {
+		mqenc->creg -= 0x8000;
+	}
+}
+
+/* Dump a MQ encoder to a stream for debugging. */
+
+int jpc_mqenc_dump(jpc_mqenc_t *mqenc, FILE *out)
+{
+	fprintf(out, "AREG = %08x, CREG = %08x, CTREG = %d\n",
+	  mqenc->areg, mqenc->creg, mqenc->ctreg);
+	fprintf(out, "IND = %02d, MPS = %d, QEVAL = %04x\n",
+	  *mqenc->curctx - jpc_mqstates, (*mqenc->curctx)->mps,
+	  (*mqenc->curctx)->qeval);
+	return 0;
+}
diff --git a/src/libjasper/jpc/jpc_mqenc.h b/src/libjasper/jpc/jpc_mqenc.h
new file mode 100644
index 0000000..248ed43
--- /dev/null
+++ b/src/libjasper/jpc/jpc_mqenc.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * MQ Arithmetic Encoder
+ *
+ * $Id: jpc_mqenc.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_MQENC_H
+#define JPC_MQENC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_stream.h"
+
+#include "jpc_mqcod.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/*
+ * Termination modes.
+ */
+
+#define	JPC_MQENC_DEFTERM	0	/* default termination */
+#define	JPC_MQENC_PTERM		1	/* predictable termination */
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* MQ arithmetic encoder class. */
+
+typedef struct {
+
+	/* The C register. */
+	uint_fast32_t creg;
+
+	/* The A register. */
+	uint_fast32_t areg;
+
+	/* The CT register. */
+	uint_fast32_t ctreg;
+
+	/* The maximum number of contexts. */
+	int maxctxs;
+
+	/* The per-context information. */
+	jpc_mqstate_t **ctxs;
+
+	/* The current context. */
+	jpc_mqstate_t **curctx;
+
+	/* The stream for encoder output. */
+	jas_stream_t *out;
+
+	/* The byte buffer (i.e., the B variable in the standard). */
+	int_fast16_t outbuf;
+
+	/* The last byte output. */
+	int_fast16_t lastbyte;
+
+	/* The error indicator. */
+	int err;
+	
+} jpc_mqenc_t;
+
+/* MQ arithmetic encoder state information. */
+
+typedef struct {
+
+	/* The A register. */
+	unsigned areg;
+
+	/* The C register. */
+	unsigned creg;
+
+	/* The CT register. */
+	unsigned ctreg;
+
+	/* The last byte output by the encoder. */
+	int lastbyte;
+
+} jpc_mqencstate_t;
+
+/******************************************************************************\
+* Functions/macros for construction and destruction.
+\******************************************************************************/
+
+/* Create a MQ encoder. */
+jpc_mqenc_t *jpc_mqenc_create(int maxctxs, jas_stream_t *out);
+
+/* Destroy a MQ encoder. */
+void jpc_mqenc_destroy(jpc_mqenc_t *enc);
+
+/******************************************************************************\
+* Functions/macros for initialization.
+\******************************************************************************/
+
+/* Initialize a MQ encoder. */
+void jpc_mqenc_init(jpc_mqenc_t *enc);
+
+/******************************************************************************\
+* Functions/macros for context manipulation.
+\******************************************************************************/
+
+/* Set the current context. */
+#define	jpc_mqenc_setcurctx(enc, ctxno) \
+        ((enc)->curctx = &(enc)->ctxs[ctxno]);
+
+/* Set the state information for a particular context. */
+void jpc_mqenc_setctx(jpc_mqenc_t *enc, int ctxno, jpc_mqctx_t *ctx);
+
+/* Set the state information for multiple contexts. */
+void jpc_mqenc_setctxs(jpc_mqenc_t *enc, int numctxs, jpc_mqctx_t *ctxs);
+
+/******************************************************************************\
+* Miscellaneous functions/macros.
+\******************************************************************************/
+
+/* Get the error state of a MQ encoder. */
+#define	jpc_mqenc_error(enc) \
+	((enc)->err)
+
+/* Get the current encoder state. */
+void jpc_mqenc_getstate(jpc_mqenc_t *enc, jpc_mqencstate_t *state);
+
+/* Terminate the code. */
+int jpc_mqenc_flush(jpc_mqenc_t *enc, int termmode);
+
+/******************************************************************************\
+* Functions/macros for encoding bits.
+\******************************************************************************/
+
+/* Encode a bit. */
+#if !defined(DEBUG)
+#define	jpc_mqenc_putbit(enc, bit)	jpc_mqenc_putbit_macro(enc, bit)
+#else
+#define	jpc_mqenc_putbit(enc, bit)	jpc_mqenc_putbit_func(enc, bit)
+#endif
+
+/******************************************************************************\
+* Functions/macros for debugging.
+\******************************************************************************/
+
+int jpc_mqenc_dump(jpc_mqenc_t *mqenc, FILE *out);
+
+/******************************************************************************\
+* Implementation-specific details.
+\******************************************************************************/
+
+/* Note: This macro is included only to satisfy the needs of
+  the mqenc_putbit macro. */
+#define	jpc_mqenc_putbit_macro(enc, bit) \
+	(((*((enc)->curctx))->mps == (bit)) ? \
+	  (((enc)->areg -= (*(enc)->curctx)->qeval), \
+	  ((!((enc)->areg & 0x8000)) ? (jpc_mqenc_codemps2(enc)) : \
+	  ((enc)->creg += (*(enc)->curctx)->qeval))) : \
+	  jpc_mqenc_codelps(enc))
+
+/* Note: These function prototypes are included only to satisfy the
+  needs of the mqenc_putbit_macro macro.  Do not call any of these
+  functions directly. */
+int jpc_mqenc_codemps2(jpc_mqenc_t *enc);
+int jpc_mqenc_codelps(jpc_mqenc_t *enc);
+
+/* Note: This function prototype is included only to satisfy the needs of
+  the mqenc_putbit macro. */
+int jpc_mqenc_putbit_func(jpc_mqenc_t *enc, int bit);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_qmfb.c b/src/libjasper/jpc/jpc_qmfb.c
new file mode 100644
index 0000000..75b8566
--- /dev/null
+++ b/src/libjasper/jpc/jpc_qmfb.c
@@ -0,0 +1,3152 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Quadrature Mirror-Image Filter Bank (QMFB) Library
+ *
+ * $Id: jpc_qmfb.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+#undef WT_LENONE /* This is not needed due to normalization. */
+#define WT_DOSCALE
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+#include "jasper/jas_fix.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+
+#include "jpc_qmfb.h"
+#include "jpc_tsfb.h"
+#include "jpc_math.h"
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+#define QMFB_SPLITBUFSIZE	4096
+#define	QMFB_JOINBUFSIZE	4096
+
+int jpc_ft_analyze(jpc_fix_t *a, int xstart, int ystart, int width, int height,
+  int stride);
+int jpc_ft_synthesize(int *a, int xstart, int ystart, int width, int height,
+  int stride);
+
+int jpc_ns_analyze(jpc_fix_t *a, int xstart, int ystart, int width, int height,
+  int stride);
+int jpc_ns_synthesize(jpc_fix_t *a, int xstart, int ystart, int width,
+  int height, int stride);
+
+void jpc_ft_fwdlift_row(jpc_fix_t *a, int numcols, int parity);
+void jpc_ft_fwdlift_col(jpc_fix_t *a, int numrows, int stride,
+  int parity);
+void jpc_ft_fwdlift_colgrp(jpc_fix_t *a, int numrows, int stride,
+  int parity);
+void jpc_ft_fwdlift_colres(jpc_fix_t *a, int numrows, int numcols,
+  int stride, int parity);
+
+void jpc_ft_invlift_row(jpc_fix_t *a, int numcols, int parity);
+void jpc_ft_invlift_col(jpc_fix_t *a, int numrows, int stride,
+  int parity);
+void jpc_ft_invlift_colgrp(jpc_fix_t *a, int numrows, int stride,
+  int parity);
+void jpc_ft_invlift_colres(jpc_fix_t *a, int numrows, int numcols,
+  int stride, int parity);
+
+void jpc_ns_fwdlift_row(jpc_fix_t *a, int numcols, int parity);
+void jpc_ns_fwdlift_colgrp(jpc_fix_t *a, int numrows, int stride, int parity);
+void jpc_ns_fwdlift_colres(jpc_fix_t *a, int numrows, int numcols, int stride,
+  int parity);
+void jpc_ns_invlift_row(jpc_fix_t *a, int numcols, int parity);
+void jpc_ns_invlift_colgrp(jpc_fix_t *a, int numrows, int stride, int parity);
+void jpc_ns_invlift_colres(jpc_fix_t *a, int numrows, int numcols, int stride,
+  int parity);
+
+void jpc_qmfb_split_row(jpc_fix_t *a, int numcols, int parity);
+void jpc_qmfb_split_col(jpc_fix_t *a, int numrows, int stride, int parity);
+void jpc_qmfb_split_colgrp(jpc_fix_t *a, int numrows, int stride, int parity);
+void jpc_qmfb_split_colres(jpc_fix_t *a, int numrows, int numcols, int stride,
+  int parity);
+
+void jpc_qmfb_join_row(jpc_fix_t *a, int numcols, int parity);
+void jpc_qmfb_join_col(jpc_fix_t *a, int numrows, int stride, int parity);
+void jpc_qmfb_join_colgrp(jpc_fix_t *a, int numrows, int stride, int parity);
+void jpc_qmfb_join_colres(jpc_fix_t *a, int numrows, int numcols, int stride,
+  int parity);
+
+double jpc_ft_lpenergywts[32] = {
+	1.2247448713915889,
+	1.6583123951776999,
+	2.3184046238739260,
+	3.2691742076555053,
+	4.6199296531440819,
+	6.5323713152269596,
+	9.2377452606141937,
+	13.0639951297449581,
+	18.4752262333915667,
+	26.1278968190610392,
+	36.9504194305524791,
+	52.2557819580462777,
+	73.9008347315741645,
+	104.5115624560829133,
+	147.8016689469569656,
+	209.0231247296646018,
+	295.6033378293900000,
+	418.0462494347059419,
+	591.2066756503630813,
+	836.0924988714708661,
+	/* approximations */
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661,
+	836.0924988714708661
+};
+
+double jpc_ft_hpenergywts[32] = {
+	0.8477912478906585,
+	0.9601432184835760,
+	1.2593401049756179,
+	1.7444107171191079,
+	2.4538713036750726,
+	3.4656517695088755,
+	4.8995276398597856,
+	6.9283970402160842,
+	9.7980274940131444,
+	13.8564306871112652,
+	19.5959265076535587,
+	27.7128159494245487,
+	39.1918369552045860,
+	55.4256262207444053,
+	78.3836719028959124,
+	110.8512517317256822,
+	156.7673435548526868,
+	221.7025033739244293,
+	313.5346870787551552,
+	443.4050067351659550,
+	/* approximations */
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550,
+	443.4050067351659550
+};
+
+double jpc_ns_lpenergywts[32] = {
+	1.4021081679297411,
+	2.0303718560817923,
+	2.9011625562785555,
+	4.1152851751758002,
+	5.8245108637728071,
+	8.2387599345725171,
+	11.6519546479210838,
+	16.4785606470644375,
+	23.3042776444606794,
+	32.9572515613740435,
+	46.6086013487782793,
+	65.9145194076860861,
+	93.2172084551803977,
+	131.8290408510004283,
+	186.4344176300625691,
+	263.6580819564562148,
+	372.8688353500955373,
+	527.3161639447193920,
+	745.7376707114038936,
+	1054.6323278917823245,
+	/* approximations follow */
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245,
+	1054.6323278917823245
+};
+
+double jpc_ns_hpenergywts[32] = {
+	1.4425227650161456,
+	1.9669426082455688,
+	2.8839248082788891,
+	4.1475208393432981,
+	5.8946497530677817,
+	8.3471789178590949,
+	11.8086046551047463,
+	16.7012780415647804,
+	23.6196657032246620,
+	33.4034255108592362,
+	47.2396388881632632,
+	66.8069597416714061,
+	94.4793162154500692,
+	133.6139330736999113,
+	188.9586372358249378,
+	267.2278678461869390,
+	377.9172750722391356,
+	534.4557359047058753,
+	755.8345502191498326,
+	1068.9114718353569060,
+	/* approximations follow */
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060,
+	1068.9114718353569060
+};
+
+jpc_qmfb2d_t jpc_ft_qmfb2d = {
+	jpc_ft_analyze,
+	jpc_ft_synthesize,
+	jpc_ft_lpenergywts,
+	jpc_ft_hpenergywts
+};
+
+jpc_qmfb2d_t jpc_ns_qmfb2d = {
+	jpc_ns_analyze,
+	jpc_ns_synthesize,
+	jpc_ns_lpenergywts,
+	jpc_ns_hpenergywts
+};
+
+/******************************************************************************\
+* generic
+\******************************************************************************/
+
+void jpc_qmfb_split_row(jpc_fix_t *a, int numcols, int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numcols, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t splitbuf[QMFB_SPLITBUFSIZE];
+#else
+	jpc_fix_t splitbuf[bufsize];
+#endif
+	jpc_fix_t *buf = splitbuf;
+	register jpc_fix_t *srcptr;
+	register jpc_fix_t *dstptr;
+	register int n;
+	register int m;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Get a buffer. */
+	if (bufsize > QMFB_SPLITBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide in this case. */
+			abort();
+		}
+	}
+#endif
+
+	if (numcols >= 2) {
+		hstartcol = (numcols + 1 - parity) >> 1;
+		m = (parity) ? hstartcol : (numcols - hstartcol);
+		/* Save the samples destined for the highpass channel. */
+		n = m;
+		dstptr = buf;
+		srcptr = &a[1 - parity];
+		while (n-- > 0) {
+			*dstptr = *srcptr;
+			++dstptr;
+			srcptr += 2;
+		}
+		/* Copy the appropriate samples into the lowpass channel. */
+		dstptr = &a[1 - parity];
+		srcptr = &a[2 - parity];
+		n = numcols - m - (!parity);
+		while (n-- > 0) {
+			*dstptr = *srcptr;
+			++dstptr;
+			srcptr += 2;
+		}
+		/* Copy the saved samples into the highpass channel. */
+		dstptr = &a[hstartcol];
+		srcptr = buf;
+		n = m;
+		while (n-- > 0) {
+			*dstptr = *srcptr;
+			++dstptr;
+			++srcptr;
+		}
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the split buffer was allocated on the heap, free this memory. */
+	if (buf != splitbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_split_col(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numrows, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t splitbuf[QMFB_SPLITBUFSIZE];
+#else
+	jpc_fix_t splitbuf[bufsize];
+#endif
+	jpc_fix_t *buf = splitbuf;
+	register jpc_fix_t *srcptr;
+	register jpc_fix_t *dstptr;
+	register int n;
+	register int m;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Get a buffer. */
+	if (bufsize > QMFB_SPLITBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide in this case. */
+			abort();
+		}
+	}
+#endif
+
+	if (numrows >= 2) {
+		hstartcol = (numrows + 1 - parity) >> 1;
+		m = (parity) ? hstartcol : (numrows - hstartcol);
+		/* Save the samples destined for the highpass channel. */
+		n = m;
+		dstptr = buf;
+		srcptr = &a[(1 - parity) * stride];
+		while (n-- > 0) {
+			*dstptr = *srcptr;
+			++dstptr;
+			srcptr += stride << 1;
+		}
+		/* Copy the appropriate samples into the lowpass channel. */
+		dstptr = &a[(1 - parity) * stride];
+		srcptr = &a[(2 - parity) * stride];
+		n = numrows - m - (!parity);
+		while (n-- > 0) {
+			*dstptr = *srcptr;
+			dstptr += stride;
+			srcptr += stride << 1;
+		}
+		/* Copy the saved samples into the highpass channel. */
+		dstptr = &a[hstartcol * stride];
+		srcptr = buf;
+		n = m;
+		while (n-- > 0) {
+			*dstptr = *srcptr;
+			dstptr += stride;
+			++srcptr;
+		}
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the split buffer was allocated on the heap, free this memory. */
+	if (buf != splitbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_split_colgrp(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numrows, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t splitbuf[QMFB_SPLITBUFSIZE * JPC_QMFB_COLGRPSIZE];
+#else
+	jpc_fix_t splitbuf[bufsize * JPC_QMFB_COLGRPSIZE];
+#endif
+	jpc_fix_t *buf = splitbuf;
+	jpc_fix_t *srcptr;
+	jpc_fix_t *dstptr;
+	register jpc_fix_t *srcptr2;
+	register jpc_fix_t *dstptr2;
+	register int n;
+	register int i;
+	int m;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Get a buffer. */
+	if (bufsize > QMFB_SPLITBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide in this case. */
+			abort();
+		}
+	}
+#endif
+
+	if (numrows >= 2) {
+		hstartcol = (numrows + 1 - parity) >> 1;
+		m = (parity) ? hstartcol : (numrows - hstartcol);
+		/* Save the samples destined for the highpass channel. */
+		n = m;
+		dstptr = buf;
+		srcptr = &a[(1 - parity) * stride];
+		while (n-- > 0) {
+			dstptr2 = dstptr;
+			srcptr2 = srcptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				*dstptr2 = *srcptr2;
+				++dstptr2;
+				++srcptr2;
+			}
+			dstptr += JPC_QMFB_COLGRPSIZE;
+			srcptr += stride << 1;
+		}
+		/* Copy the appropriate samples into the lowpass channel. */
+		dstptr = &a[(1 - parity) * stride];
+		srcptr = &a[(2 - parity) * stride];
+		n = numrows - m - (!parity);
+		while (n-- > 0) {
+			dstptr2 = dstptr;
+			srcptr2 = srcptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				*dstptr2 = *srcptr2;
+				++dstptr2;
+				++srcptr2;
+			}
+			dstptr += stride;
+			srcptr += stride << 1;
+		}
+		/* Copy the saved samples into the highpass channel. */
+		dstptr = &a[hstartcol * stride];
+		srcptr = buf;
+		n = m;
+		while (n-- > 0) {
+			dstptr2 = dstptr;
+			srcptr2 = srcptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				*dstptr2 = *srcptr2;
+				++dstptr2;
+				++srcptr2;
+			}
+			dstptr += stride;
+			srcptr += JPC_QMFB_COLGRPSIZE;
+		}
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the split buffer was allocated on the heap, free this memory. */
+	if (buf != splitbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_split_colres(jpc_fix_t *a, int numrows, int numcols,
+  int stride, int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numrows, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t splitbuf[QMFB_SPLITBUFSIZE * JPC_QMFB_COLGRPSIZE];
+#else
+	jpc_fix_t splitbuf[bufsize * numcols];
+#endif
+	jpc_fix_t *buf = splitbuf;
+	jpc_fix_t *srcptr;
+	jpc_fix_t *dstptr;
+	register jpc_fix_t *srcptr2;
+	register jpc_fix_t *dstptr2;
+	register int n;
+	register int i;
+	int m;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Get a buffer. */
+	if (bufsize > QMFB_SPLITBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide in this case. */
+			abort();
+		}
+	}
+#endif
+
+	if (numrows >= 2) {
+		hstartcol = (numrows + 1 - parity) >> 1;
+		m = (parity) ? hstartcol : (numrows - hstartcol);
+		/* Save the samples destined for the highpass channel. */
+		n = m;
+		dstptr = buf;
+		srcptr = &a[(1 - parity) * stride];
+		while (n-- > 0) {
+			dstptr2 = dstptr;
+			srcptr2 = srcptr;
+			for (i = 0; i < numcols; ++i) {
+				*dstptr2 = *srcptr2;
+				++dstptr2;
+				++srcptr2;
+			}
+			dstptr += numcols;
+			srcptr += stride << 1;
+		}
+		/* Copy the appropriate samples into the lowpass channel. */
+		dstptr = &a[(1 - parity) * stride];
+		srcptr = &a[(2 - parity) * stride];
+		n = numrows - m - (!parity);
+		while (n-- > 0) {
+			dstptr2 = dstptr;
+			srcptr2 = srcptr;
+			for (i = 0; i < numcols; ++i) {
+				*dstptr2 = *srcptr2;
+				++dstptr2;
+				++srcptr2;
+			}
+			dstptr += stride;
+			srcptr += stride << 1;
+		}
+		/* Copy the saved samples into the highpass channel. */
+		dstptr = &a[hstartcol * stride];
+		srcptr = buf;
+		n = m;
+		while (n-- > 0) {
+			dstptr2 = dstptr;
+			srcptr2 = srcptr;
+			for (i = 0; i < numcols; ++i) {
+				*dstptr2 = *srcptr2;
+				++dstptr2;
+				++srcptr2;
+			}
+			dstptr += stride;
+			srcptr += numcols;
+		}
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the split buffer was allocated on the heap, free this memory. */
+	if (buf != splitbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_join_row(jpc_fix_t *a, int numcols, int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numcols, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t joinbuf[QMFB_JOINBUFSIZE];
+#else
+	jpc_fix_t joinbuf[bufsize];
+#endif
+	jpc_fix_t *buf = joinbuf;
+	register jpc_fix_t *srcptr;
+	register jpc_fix_t *dstptr;
+	register int n;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Allocate memory for the join buffer from the heap. */
+	if (bufsize > QMFB_JOINBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide. */
+			abort();
+		}
+	}
+#endif
+
+	hstartcol = (numcols + 1 - parity) >> 1;
+
+	/* Save the samples from the lowpass channel. */
+	n = hstartcol;
+	srcptr = &a[0];
+	dstptr = buf;
+	while (n-- > 0) {
+		*dstptr = *srcptr;
+		++srcptr;
+		++dstptr;
+	}
+	/* Copy the samples from the highpass channel into place. */
+	srcptr = &a[hstartcol];
+	dstptr = &a[1 - parity];
+	n = numcols - hstartcol;
+	while (n-- > 0) {
+		*dstptr = *srcptr;
+		dstptr += 2;
+		++srcptr;
+	}
+	/* Copy the samples from the lowpass channel into place. */
+	srcptr = buf;
+	dstptr = &a[parity];
+	n = hstartcol;
+	while (n-- > 0) {
+		*dstptr = *srcptr;
+		dstptr += 2;
+		++srcptr;
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the join buffer was allocated on the heap, free this memory. */
+	if (buf != joinbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_join_col(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numrows, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t joinbuf[QMFB_JOINBUFSIZE];
+#else
+	jpc_fix_t joinbuf[bufsize];
+#endif
+	jpc_fix_t *buf = joinbuf;
+	register jpc_fix_t *srcptr;
+	register jpc_fix_t *dstptr;
+	register int n;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Allocate memory for the join buffer from the heap. */
+	if (bufsize > QMFB_JOINBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide. */
+			abort();
+		}
+	}
+#endif
+
+	hstartcol = (numrows + 1 - parity) >> 1;
+
+	/* Save the samples from the lowpass channel. */
+	n = hstartcol;
+	srcptr = &a[0];
+	dstptr = buf;
+	while (n-- > 0) {
+		*dstptr = *srcptr;
+		srcptr += stride;
+		++dstptr;
+	}
+	/* Copy the samples from the highpass channel into place. */
+	srcptr = &a[hstartcol * stride];
+	dstptr = &a[(1 - parity) * stride];
+	n = numrows - hstartcol;
+	while (n-- > 0) {
+		*dstptr = *srcptr;
+		dstptr += 2 * stride;
+		srcptr += stride;
+	}
+	/* Copy the samples from the lowpass channel into place. */
+	srcptr = buf;
+	dstptr = &a[parity * stride];
+	n = hstartcol;
+	while (n-- > 0) {
+		*dstptr = *srcptr;
+		dstptr += 2 * stride;
+		++srcptr;
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the join buffer was allocated on the heap, free this memory. */
+	if (buf != joinbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_join_colgrp(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numrows, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t joinbuf[QMFB_JOINBUFSIZE * JPC_QMFB_COLGRPSIZE];
+#else
+	jpc_fix_t joinbuf[bufsize * JPC_QMFB_COLGRPSIZE];
+#endif
+	jpc_fix_t *buf = joinbuf;
+	jpc_fix_t *srcptr;
+	jpc_fix_t *dstptr;
+	register jpc_fix_t *srcptr2;
+	register jpc_fix_t *dstptr2;
+	register int n;
+	register int i;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Allocate memory for the join buffer from the heap. */
+	if (bufsize > QMFB_JOINBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * JPC_QMFB_COLGRPSIZE * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide. */
+			abort();
+		}
+	}
+#endif
+
+	hstartcol = (numrows + 1 - parity) >> 1;
+
+	/* Save the samples from the lowpass channel. */
+	n = hstartcol;
+	srcptr = &a[0];
+	dstptr = buf;
+	while (n-- > 0) {
+		dstptr2 = dstptr;
+		srcptr2 = srcptr;
+		for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+			*dstptr2 = *srcptr2;
+			++dstptr2;
+			++srcptr2;
+		}
+		srcptr += stride;
+		dstptr += JPC_QMFB_COLGRPSIZE;
+	}
+	/* Copy the samples from the highpass channel into place. */
+	srcptr = &a[hstartcol * stride];
+	dstptr = &a[(1 - parity) * stride];
+	n = numrows - hstartcol;
+	while (n-- > 0) {
+		dstptr2 = dstptr;
+		srcptr2 = srcptr;
+		for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+			*dstptr2 = *srcptr2;
+			++dstptr2;
+			++srcptr2;
+		}
+		dstptr += 2 * stride;
+		srcptr += stride;
+	}
+	/* Copy the samples from the lowpass channel into place. */
+	srcptr = buf;
+	dstptr = &a[parity * stride];
+	n = hstartcol;
+	while (n-- > 0) {
+		dstptr2 = dstptr;
+		srcptr2 = srcptr;
+		for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+			*dstptr2 = *srcptr2;
+			++dstptr2;
+			++srcptr2;
+		}
+		dstptr += 2 * stride;
+		srcptr += JPC_QMFB_COLGRPSIZE;
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the join buffer was allocated on the heap, free this memory. */
+	if (buf != joinbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+void jpc_qmfb_join_colres(jpc_fix_t *a, int numrows, int numcols,
+  int stride, int parity)
+{
+
+	int bufsize = JPC_CEILDIVPOW2(numrows, 1);
+#if !defined(HAVE_VLA)
+	jpc_fix_t joinbuf[QMFB_JOINBUFSIZE * JPC_QMFB_COLGRPSIZE];
+#else
+	jpc_fix_t joinbuf[bufsize * numcols];
+#endif
+	jpc_fix_t *buf = joinbuf;
+	jpc_fix_t *srcptr;
+	jpc_fix_t *dstptr;
+	register jpc_fix_t *srcptr2;
+	register jpc_fix_t *dstptr2;
+	register int n;
+	register int i;
+	int hstartcol;
+
+#if !defined(HAVE_VLA)
+	/* Allocate memory for the join buffer from the heap. */
+	if (bufsize > QMFB_JOINBUFSIZE) {
+		if (!(buf = jas_malloc(bufsize * numcols * sizeof(jpc_fix_t)))) {
+			/* We have no choice but to commit suicide. */
+			abort();
+		}
+	}
+#endif
+
+	hstartcol = (numrows + 1 - parity) >> 1;
+
+	/* Save the samples from the lowpass channel. */
+	n = hstartcol;
+	srcptr = &a[0];
+	dstptr = buf;
+	while (n-- > 0) {
+		dstptr2 = dstptr;
+		srcptr2 = srcptr;
+		for (i = 0; i < numcols; ++i) {
+			*dstptr2 = *srcptr2;
+			++dstptr2;
+			++srcptr2;
+		}
+		srcptr += stride;
+		dstptr += numcols;
+	}
+	/* Copy the samples from the highpass channel into place. */
+	srcptr = &a[hstartcol * stride];
+	dstptr = &a[(1 - parity) * stride];
+	n = numrows - hstartcol;
+	while (n-- > 0) {
+		dstptr2 = dstptr;
+		srcptr2 = srcptr;
+		for (i = 0; i < numcols; ++i) {
+			*dstptr2 = *srcptr2;
+			++dstptr2;
+			++srcptr2;
+		}
+		dstptr += 2 * stride;
+		srcptr += stride;
+	}
+	/* Copy the samples from the lowpass channel into place. */
+	srcptr = buf;
+	dstptr = &a[parity * stride];
+	n = hstartcol;
+	while (n-- > 0) {
+		dstptr2 = dstptr;
+		srcptr2 = srcptr;
+		for (i = 0; i < numcols; ++i) {
+			*dstptr2 = *srcptr2;
+			++dstptr2;
+			++srcptr2;
+		}
+		dstptr += 2 * stride;
+		srcptr += numcols;
+	}
+
+#if !defined(HAVE_VLA)
+	/* If the join buffer was allocated on the heap, free this memory. */
+	if (buf != joinbuf) {
+		jas_free(buf);
+	}
+#endif
+
+}
+
+/******************************************************************************\
+* 5/3 transform
+\******************************************************************************/
+
+void jpc_ft_fwdlift_row(jpc_fix_t *a, int numcols, int parity)
+{
+
+	register jpc_fix_t *lptr;
+	register jpc_fix_t *hptr;
+	register int n;
+	int llen;
+
+	llen = (numcols + 1 - parity) >> 1;
+
+	if (numcols > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (parity) {
+			hptr[0] -= lptr[0];
+			++hptr;
+		}
+		n = numcols - llen - parity - (parity == (numcols & 1));
+		while (n-- > 0) {
+			hptr[0] -= (lptr[0] + lptr[1]) >> 1;
+			++hptr;
+			++lptr;
+		}
+		if (parity == (numcols & 1)) {
+			hptr[0] -= lptr[0];
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (!parity) {
+			lptr[0] += (hptr[0] + 1) >> 1;
+			++lptr;
+		}
+		n = llen - (!parity) - (parity != (numcols & 1));
+		while (n-- > 0) {
+			lptr[0] += (hptr[0] + hptr[1] + 2) >> 2;
+			++lptr;
+			++hptr;
+		}
+		if (parity != (numcols & 1)) {
+			lptr[0] += (hptr[0] + 1) >> 1;
+		}
+
+	} else {
+
+		if (parity) {
+			lptr = &a[0];
+			lptr[0] <<= 1;
+		}
+
+	}
+
+}
+
+void jpc_ft_fwdlift_col(jpc_fix_t *a, int numrows, int stride, int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+#if 0
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int i;
+#endif
+	register int n;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			hptr[0] -= lptr[0];
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			hptr[0] -= (lptr[0] + lptr[stride]) >> 1;
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			hptr[0] -= lptr[0];
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr[0] += (hptr[0] + 1) >> 1;
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr[0] += (hptr[0] + hptr[stride] + 2) >> 2;
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr[0] += (hptr[0] + 1) >> 1;
+		}
+
+	} else {
+
+		if (parity) {
+			lptr = &a[0];
+			lptr[0] <<= 1;
+		}
+
+	}
+
+}
+
+void jpc_ft_fwdlift_colgrp(jpc_fix_t *a, int numrows, int stride, int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] -= lptr2[0];
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] -= (lptr2[0] + lptr2[stride]) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] -= lptr2[0];
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] += (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] += (hptr2[0] + hptr2[stride] + 2) >> 2;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] += (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+	} else {
+
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] <<= 1;
+				++lptr2;
+			}
+		}
+
+	}
+
+}
+
+void jpc_ft_fwdlift_colres(jpc_fix_t *a, int numrows, int numcols, int stride,
+  int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] -= lptr2[0];
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] -= (lptr2[0] + lptr2[stride]) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] -= lptr2[0];
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] += (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] += (hptr2[0] + hptr2[stride] + 2) >> 2;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] += (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+	} else {
+
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] <<= 1;
+				++lptr2;
+			}
+		}
+
+	}
+
+}
+
+void jpc_ft_invlift_row(jpc_fix_t *a, int numcols, int parity)
+{
+
+	register jpc_fix_t *lptr;
+	register jpc_fix_t *hptr;
+	register int n;
+	int llen;
+
+	llen = (numcols + 1 - parity) >> 1;
+
+	if (numcols > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (!parity) {
+			lptr[0] -= (hptr[0] + 1) >> 1;
+			++lptr;
+		}
+		n = llen - (!parity) - (parity != (numcols & 1));
+		while (n-- > 0) {
+			lptr[0] -= (hptr[0] + hptr[1] + 2) >> 2;
+			++lptr;
+			++hptr;
+		}
+		if (parity != (numcols & 1)) {
+			lptr[0] -= (hptr[0] + 1) >> 1;
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (parity) {
+			hptr[0] += lptr[0];
+			++hptr;
+		}
+		n = numcols - llen - parity - (parity == (numcols & 1));
+		while (n-- > 0) {
+			hptr[0] += (lptr[0] + lptr[1]) >> 1;
+			++hptr;
+			++lptr;
+		}
+		if (parity == (numcols & 1)) {
+			hptr[0] += lptr[0];
+		}
+
+	} else {
+
+		if (parity) {
+			lptr = &a[0];
+			lptr[0] >>= 1;
+		}
+
+	}
+
+}
+
+void jpc_ft_invlift_col(jpc_fix_t *a, int numrows, int stride, int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+#if 0
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int i;
+#endif
+	register int n;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr[0] -= (hptr[0] + 1) >> 1;
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr[0] -= (hptr[0] + hptr[stride] + 2) >> 2;
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr[0] -= (hptr[0] + 1) >> 1;
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			hptr[0] += lptr[0];
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			hptr[0] += (lptr[0] + lptr[stride]) >> 1;
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			hptr[0] += lptr[0];
+		}
+
+	} else {
+
+		if (parity) {
+			lptr = &a[0];
+			lptr[0] >>= 1;
+		}
+
+	}
+
+}
+
+void jpc_ft_invlift_colgrp(jpc_fix_t *a, int numrows, int stride, int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] -= (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] -= (hptr2[0] + hptr2[stride] + 2) >> 2;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] -= (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] += lptr2[0];
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] += (lptr2[0] + lptr2[stride]) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] += lptr2[0];
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+	} else {
+
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] >>= 1;
+				++lptr2;
+			}
+		}
+
+	}
+
+}
+
+void jpc_ft_invlift_colres(jpc_fix_t *a, int numrows, int numcols, int stride,
+  int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] -= (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] -= (hptr2[0] + hptr2[stride] + 2) >> 2;
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] -= (hptr2[0] + 1) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] += lptr2[0];
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] += (lptr2[0] + lptr2[stride]) >> 1;
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] += lptr2[0];
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+	} else {
+
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] >>= 1;
+				++lptr2;
+			}
+		}
+
+	}
+
+}
+
+int jpc_ft_analyze(jpc_fix_t *a, int xstart, int ystart, int width, int height,
+  int stride)
+{
+	int numrows = height;
+	int numcols = width;
+	int rowparity = ystart & 1;
+	int colparity = xstart & 1;
+	int i;
+	jpc_fix_t *startptr;
+	int maxcols;
+
+	maxcols = (numcols / JPC_QMFB_COLGRPSIZE) * JPC_QMFB_COLGRPSIZE;
+	startptr = &a[0];
+	for (i = 0; i < maxcols; i += JPC_QMFB_COLGRPSIZE) {
+		jpc_qmfb_split_colgrp(startptr, numrows, stride, rowparity);
+		jpc_ft_fwdlift_colgrp(startptr, numrows, stride, rowparity);
+		startptr += JPC_QMFB_COLGRPSIZE;
+	}
+	if (maxcols < numcols) {
+		jpc_qmfb_split_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+		jpc_ft_fwdlift_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+	}
+
+	startptr = &a[0];
+	for (i = 0; i < numrows; ++i) {
+		jpc_qmfb_split_row(startptr, numcols, colparity);
+		jpc_ft_fwdlift_row(startptr, numcols, colparity);
+		startptr += stride;
+	}
+
+	return 0;
+
+}
+
+int jpc_ft_synthesize(int *a, int xstart, int ystart, int width, int height,
+  int stride)
+{
+	int numrows = height;
+	int numcols = width;
+	int rowparity = ystart & 1;
+	int colparity = xstart & 1;
+
+	int maxcols;
+	jpc_fix_t *startptr;
+	int i;
+
+	startptr = &a[0];
+	for (i = 0; i < numrows; ++i) {
+		jpc_ft_invlift_row(startptr, numcols, colparity);
+		jpc_qmfb_join_row(startptr, numcols, colparity);
+		startptr += stride;
+	}
+
+	maxcols = (numcols / JPC_QMFB_COLGRPSIZE) * JPC_QMFB_COLGRPSIZE;
+	startptr = &a[0];
+	for (i = 0; i < maxcols; i += JPC_QMFB_COLGRPSIZE) {
+    // GeoJasper: dima - progress
+    jas_do_progress( i, maxcols, "jpc: synthesize" ); // dima
+    if (jas_test_abort() == 1) return 0;
+
+		jpc_ft_invlift_colgrp(startptr, numrows, stride, rowparity);
+		jpc_qmfb_join_colgrp(startptr, numrows, stride, rowparity);
+		startptr += JPC_QMFB_COLGRPSIZE;
+	}
+	if (maxcols < numcols) {
+		jpc_ft_invlift_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+		jpc_qmfb_join_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+	}
+
+	return 0;
+
+}
+
+/******************************************************************************\
+* 9/7 transform
+\******************************************************************************/
+
+#define ALPHA (-1.586134342059924)
+#define BETA (-0.052980118572961)
+#define GAMMA (0.882911075530934)
+#define DELTA (0.443506852043971)
+#define LGAIN (1.0 / 1.23017410558578)
+#define HGAIN (1.0 / 1.62578613134411)
+
+void jpc_ns_fwdlift_row(jpc_fix_t *a, int numcols, int parity)
+{
+
+	register jpc_fix_t *lptr;
+	register jpc_fix_t *hptr;
+	register int n;
+	int llen;
+
+	llen = (numcols + 1 - parity) >> 1;
+
+	if (numcols > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (parity) {
+			jpc_fix_pluseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+			  lptr[0]));
+			++hptr;
+		}
+		n = numcols - llen - parity - (parity == (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_pluseq(hptr[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+			  jpc_fix_add(lptr[0], lptr[1])));
+			++hptr;
+			++lptr;
+		}
+		if (parity == (numcols & 1)) {
+			jpc_fix_pluseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+			  lptr[0]));
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (!parity) {
+			jpc_fix_pluseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr[0]));
+			++lptr;
+		}
+		n = llen - (!parity) - (parity != (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_pluseq(lptr[0], jpc_fix_mul(jpc_dbltofix(BETA),
+			  jpc_fix_add(hptr[0], hptr[1])));
+			++lptr;
+			++hptr;
+		}
+		if (parity != (numcols & 1)) {
+			jpc_fix_pluseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr[0]));
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (parity) {
+			jpc_fix_pluseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+			  lptr[0]));
+			++hptr;
+		}
+		n = numcols - llen - parity - (parity == (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_pluseq(hptr[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+			  jpc_fix_add(lptr[0], lptr[1])));
+			++hptr;
+			++lptr;
+		}
+		if (parity == (numcols & 1)) {
+			jpc_fix_pluseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+			  lptr[0]));
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (!parity) {
+			jpc_fix_pluseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+			  hptr[0]));
+			++lptr;
+		}
+		n = llen - (!parity) - (parity != (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_pluseq(lptr[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+			  jpc_fix_add(hptr[0], hptr[1])));
+			++lptr;
+			++hptr;
+		}
+		if (parity != (numcols & 1)) {
+			jpc_fix_pluseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+			  hptr[0]));
+		}
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr[0] = jpc_fix_mul(lptr[0], jpc_dbltofix(LGAIN));
+			++lptr;
+		}
+		hptr = &a[llen];
+		n = numcols - llen;
+		while (n-- > 0) {
+			hptr[0] = jpc_fix_mul(hptr[0], jpc_dbltofix(HGAIN));
+			++hptr;
+		}
+#endif
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr = &a[0];
+			lptr[0] <<= 1;
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_fwdlift_colgrp(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+				  lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+				  lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(BETA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+				  lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+				  lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr2 = lptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] = jpc_fix_mul(lptr2[0], jpc_dbltofix(LGAIN));
+				++lptr2;
+			}
+			lptr += stride;
+		}
+		hptr = &a[llen * stride];
+		n = numrows - llen;
+		while (n-- > 0) {
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] = jpc_fix_mul(hptr2[0], jpc_dbltofix(HGAIN));
+				++hptr2;
+			}
+			hptr += stride;
+		}
+#endif
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] <<= 1;
+				++lptr2;
+			}
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_fwdlift_colres(jpc_fix_t *a, int numrows, int numcols,
+  int stride, int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+				  lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+				  lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(BETA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+				  lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+				  lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr2 = lptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] = jpc_fix_mul(lptr2[0], jpc_dbltofix(LGAIN));
+				++lptr2;
+			}
+			lptr += stride;
+		}
+		hptr = &a[llen * stride];
+		n = numrows - llen;
+		while (n-- > 0) {
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] = jpc_fix_mul(hptr2[0], jpc_dbltofix(HGAIN));
+				++hptr2;
+			}
+			hptr += stride;
+		}
+#endif
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] <<= 1;
+				++lptr2;
+			}
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_fwdlift_col(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+			  lptr2[0]));
+			++hptr2;
+			++lptr2;
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+			  jpc_fix_add(lptr2[0], lptr2[stride])));
+			++lptr2;
+			++hptr2;
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+			  lptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr2[0]));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(BETA),
+			  jpc_fix_add(hptr2[0], hptr2[stride])));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+			  lptr2[0]));
+			++hptr2;
+			++lptr2;
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+			  jpc_fix_add(lptr2[0], lptr2[stride])));
+			++lptr2;
+			++hptr2;
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+			  lptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+			  hptr2[0]));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+			  jpc_fix_add(hptr2[0], hptr2[stride])));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_pluseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+			  hptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr2 = lptr;
+			lptr2[0] = jpc_fix_mul(lptr2[0], jpc_dbltofix(LGAIN));
+			++lptr2;
+			lptr += stride;
+		}
+		hptr = &a[llen * stride];
+		n = numrows - llen;
+		while (n-- > 0) {
+			hptr2 = hptr;
+			hptr2[0] = jpc_fix_mul(hptr2[0], jpc_dbltofix(HGAIN));
+			++hptr2;
+			hptr += stride;
+		}
+#endif
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr2 = &a[0];
+			lptr2[0] <<= 1;
+			++lptr2;
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_invlift_row(jpc_fix_t *a, int numcols, int parity)
+{
+
+	register jpc_fix_t *lptr;
+	register jpc_fix_t *hptr;
+	register int n;
+	int llen;
+
+	llen = (numcols + 1 - parity) >> 1;
+
+	if (numcols > 1) {
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr[0] = jpc_fix_mul(lptr[0], jpc_dbltofix(1.0 / LGAIN));
+			++lptr;
+		}
+		hptr = &a[llen];
+		n = numcols - llen;
+		while (n-- > 0) {
+			hptr[0] = jpc_fix_mul(hptr[0], jpc_dbltofix(1.0 / HGAIN));
+			++hptr;
+		}
+#endif
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (!parity) {
+			jpc_fix_minuseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+			  hptr[0]));
+			++lptr;
+		}
+		n = llen - (!parity) - (parity != (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_minuseq(lptr[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+			  jpc_fix_add(hptr[0], hptr[1])));
+			++lptr;
+			++hptr;
+		}
+		if (parity != (numcols & 1)) {
+			jpc_fix_minuseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * DELTA),
+			  hptr[0]));
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (parity) {
+			jpc_fix_minuseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+			  lptr[0]));
+			++hptr;
+		}
+		n = numcols - llen - parity - (parity == (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_minuseq(hptr[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+			  jpc_fix_add(lptr[0], lptr[1])));
+			++hptr;
+			++lptr;
+		}
+		if (parity == (numcols & 1)) {
+			jpc_fix_minuseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * GAMMA),
+			  lptr[0]));
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (!parity) {
+			jpc_fix_minuseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr[0]));
+			++lptr;
+		}
+		n = llen - (!parity) - (parity != (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_minuseq(lptr[0], jpc_fix_mul(jpc_dbltofix(BETA),
+			  jpc_fix_add(hptr[0], hptr[1])));
+			++lptr;
+			++hptr;
+		}
+		if (parity != (numcols & 1)) {
+			jpc_fix_minuseq(lptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr[0]));
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen];
+		if (parity) {
+			jpc_fix_minuseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+			  lptr[0]));
+			++hptr;
+		}
+		n = numcols - llen - parity - (parity == (numcols & 1));
+		while (n-- > 0) {
+			jpc_fix_minuseq(hptr[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+			  jpc_fix_add(lptr[0], lptr[1])));
+			++hptr;
+			++lptr;
+		}
+		if (parity == (numcols & 1)) {
+			jpc_fix_minuseq(hptr[0], jpc_fix_mul(jpc_dbltofix(2.0 * ALPHA),
+			  lptr[0]));
+		}
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr = &a[0];
+			lptr[0] >>= 1;
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_invlift_colgrp(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr2 = lptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] = jpc_fix_mul(lptr2[0], jpc_dbltofix(1.0 / LGAIN));
+				++lptr2;
+			}
+			lptr += stride;
+		}
+		hptr = &a[llen * stride];
+		n = numrows - llen;
+		while (n-- > 0) {
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				hptr2[0] = jpc_fix_mul(hptr2[0], jpc_dbltofix(1.0 / HGAIN));
+				++hptr2;
+			}
+			hptr += stride;
+		}
+#endif
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  DELTA), hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  DELTA), hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  GAMMA), lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  GAMMA), lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(BETA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  ALPHA), lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  ALPHA), lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < JPC_QMFB_COLGRPSIZE; ++i) {
+				lptr2[0] >>= 1;
+				++lptr2;
+			}
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_invlift_colres(jpc_fix_t *a, int numrows, int numcols,
+  int stride, int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	register int i;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr2 = lptr;
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] = jpc_fix_mul(lptr2[0], jpc_dbltofix(1.0 / LGAIN));
+				++lptr2;
+			}
+			lptr += stride;
+		}
+		hptr = &a[llen * stride];
+		n = numrows - llen;
+		while (n-- > 0) {
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				hptr2[0] = jpc_fix_mul(hptr2[0], jpc_dbltofix(1.0 / HGAIN));
+				++hptr2;
+			}
+			hptr += stride;
+		}
+#endif
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  DELTA), hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  DELTA), hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  GAMMA), lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  GAMMA), lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(BETA),
+				  jpc_fix_add(hptr2[0], hptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+				  hptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  ALPHA), lptr2[0]));
+				++hptr2;
+				++lptr2;
+			}
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+				  jpc_fix_add(lptr2[0], lptr2[stride])));
+				++lptr2;
+				++hptr2;
+			}
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			for (i = 0; i < numcols; ++i) {
+				jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+				  ALPHA), lptr2[0]));
+				++lptr2;
+				++hptr2;
+			}
+		}
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr2 = &a[0];
+			for (i = 0; i < numcols; ++i) {
+				lptr2[0] >>= 1;
+				++lptr2;
+			}
+		}
+#endif
+
+	}
+
+}
+
+void jpc_ns_invlift_col(jpc_fix_t *a, int numrows, int stride,
+  int parity)
+{
+
+	jpc_fix_t *lptr;
+	jpc_fix_t *hptr;
+	register jpc_fix_t *lptr2;
+	register jpc_fix_t *hptr2;
+	register int n;
+	int llen;
+
+	llen = (numrows + 1 - parity) >> 1;
+
+	if (numrows > 1) {
+
+		/* Apply the scaling step. */
+#if defined(WT_DOSCALE)
+		lptr = &a[0];
+		n = llen;
+		while (n-- > 0) {
+			lptr2 = lptr;
+			lptr2[0] = jpc_fix_mul(lptr2[0], jpc_dbltofix(1.0 / LGAIN));
+			++lptr2;
+			lptr += stride;
+		}
+		hptr = &a[llen * stride];
+		n = numrows - llen;
+		while (n-- > 0) {
+			hptr2 = hptr;
+			hptr2[0] = jpc_fix_mul(hptr2[0], jpc_dbltofix(1.0 / HGAIN));
+			++hptr2;
+			hptr += stride;
+		}
+#endif
+
+		/* Apply the first lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+			  DELTA), hptr2[0]));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(DELTA),
+			  jpc_fix_add(hptr2[0], hptr2[stride])));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+			  DELTA), hptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the second lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+			  GAMMA), lptr2[0]));
+			++hptr2;
+			++lptr2;
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(GAMMA),
+			  jpc_fix_add(lptr2[0], lptr2[stride])));
+			++lptr2;
+			++hptr2;
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+			  GAMMA), lptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the third lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (!parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr2[0]));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+		}
+		n = llen - (!parity) - (parity != (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(BETA),
+			  jpc_fix_add(hptr2[0], hptr2[stride])));
+			++lptr2;
+			++hptr2;
+			lptr += stride;
+			hptr += stride;
+		}
+		if (parity != (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(lptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 * BETA),
+			  hptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+		/* Apply the fourth lifting step. */
+		lptr = &a[0];
+		hptr = &a[llen * stride];
+		if (parity) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+			  ALPHA), lptr2[0]));
+			++hptr2;
+			++lptr2;
+			hptr += stride;
+		}
+		n = numrows - llen - parity - (parity == (numrows & 1));
+		while (n-- > 0) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(ALPHA),
+			  jpc_fix_add(lptr2[0], lptr2[stride])));
+			++lptr2;
+			++hptr2;
+			hptr += stride;
+			lptr += stride;
+		}
+		if (parity == (numrows & 1)) {
+			lptr2 = lptr;
+			hptr2 = hptr;
+			jpc_fix_minuseq(hptr2[0], jpc_fix_mul(jpc_dbltofix(2.0 *
+			  ALPHA), lptr2[0]));
+			++lptr2;
+			++hptr2;
+		}
+
+	} else {
+
+#if defined(WT_LENONE)
+		if (parity) {
+			lptr2 = &a[0];
+			lptr2[0] >>= 1;
+			++lptr2;
+		}
+#endif
+
+	}
+
+}
+
+int jpc_ns_analyze(jpc_fix_t *a, int xstart, int ystart, int width, int height,
+  int stride)
+{
+
+	int numrows = height;
+	int numcols = width;
+	int rowparity = ystart & 1;
+	int colparity = xstart & 1;
+	int i;
+	jpc_fix_t *startptr;
+	int maxcols;
+
+	maxcols = (numcols / JPC_QMFB_COLGRPSIZE) * JPC_QMFB_COLGRPSIZE;
+	startptr = &a[0];
+	for (i = 0; i < maxcols; i += JPC_QMFB_COLGRPSIZE) {
+		jpc_qmfb_split_colgrp(startptr, numrows, stride, rowparity);
+		jpc_ns_fwdlift_colgrp(startptr, numrows, stride, rowparity);
+		startptr += JPC_QMFB_COLGRPSIZE;
+	}
+	if (maxcols < numcols) {
+		jpc_qmfb_split_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+		jpc_ns_fwdlift_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+	}
+
+	startptr = &a[0];
+	for (i = 0; i < numrows; ++i) {
+		jpc_qmfb_split_row(startptr, numcols, colparity);
+		jpc_ns_fwdlift_row(startptr, numcols, colparity);
+		startptr += stride;
+	}
+
+	return 0;
+
+}
+
+int jpc_ns_synthesize(jpc_fix_t *a, int xstart, int ystart, int width,
+  int height, int stride)
+{
+
+	int numrows = height;
+	int numcols = width;
+	int rowparity = ystart & 1;
+	int colparity = xstart & 1;
+	int maxcols;
+	jpc_fix_t *startptr;
+	int i;
+
+	startptr = &a[0];
+	for (i = 0; i < numrows; ++i) {
+		jpc_ns_invlift_row(startptr, numcols, colparity);
+		jpc_qmfb_join_row(startptr, numcols, colparity);
+		startptr += stride;
+	}
+
+	maxcols = (numcols / JPC_QMFB_COLGRPSIZE) * JPC_QMFB_COLGRPSIZE;
+	startptr = &a[0];
+	for (i = 0; i < maxcols; i += JPC_QMFB_COLGRPSIZE) {
+    // GeoJasper: dima - progress
+    jas_do_progress( i, maxcols, "jpc: synthesize" ); // dima
+    if (jas_test_abort() == 1) return 0;
+
+		jpc_ns_invlift_colgrp(startptr, numrows, stride, rowparity);
+		jpc_qmfb_join_colgrp(startptr, numrows, stride, rowparity);
+		startptr += JPC_QMFB_COLGRPSIZE;
+	}
+	if (maxcols < numcols) {
+		jpc_ns_invlift_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+		jpc_qmfb_join_colres(startptr, numrows, numcols - maxcols, stride,
+		  rowparity);
+	}
+
+	return 0;
+
+}
+
diff --git a/src/libjasper/jpc/jpc_qmfb.h b/src/libjasper/jpc/jpc_qmfb.h
new file mode 100644
index 0000000..2586d55
--- /dev/null
+++ b/src/libjasper/jpc/jpc_qmfb.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2004 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Quadrature Mirror-Image Filter Bank (QMFB) Routines
+ *
+ * $Id: jpc_qmfb.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_QMFB_H
+#define JPC_QMFB_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_seq.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* QMFB IDs. */
+#define	JPC_QMFB1D_FT	1	/* 5/3 */
+#define	JPC_QMFB1D_NS	2	/* 9/7 */
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+#if !defined(JPC_QMFB_COLGRPSIZE)
+/* The number of columns to group together during the vertical processing
+stage of the wavelet transform. */
+/* The default value for this parameter is probably not optimal for
+any particular platform.  Hopefully, it is not too unreasonable, however. */
+#define JPC_QMFB_COLGRPSIZE	16
+#endif
+
+typedef struct {
+	int (*analyze)(int *, int, int, int, int, int);
+	int (*synthesize)(int *, int, int, int, int, int);
+	double *lpenergywts;
+	double *hpenergywts;
+} jpc_qmfb2d_t;
+
+extern jpc_qmfb2d_t jpc_ft_qmfb2d;
+extern jpc_qmfb2d_t jpc_ns_qmfb2d;
+
+#endif
diff --git a/src/libjasper/jpc/jpc_t1cod.c b/src/libjasper/jpc/jpc_t1cod.c
new file mode 100644
index 0000000..ea4d2b1
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t1cod.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_t1cod.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <math.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_math.h"
+
+#include "jpc_bs.h"
+#include "jpc_dec.h"
+#include "jpc_cs.h"
+#include "jpc_mqcod.h"
+#include "jpc_t1cod.h"
+#include "jpc_tsfb.h"
+
+double jpc_pow2i(int n);
+
+/******************************************************************************\
+* Global data.
+\******************************************************************************/
+
+int jpc_zcctxnolut[4 * 256];
+int jpc_spblut[256];
+int jpc_scctxnolut[256];
+int jpc_magctxnolut[4096];
+
+jpc_fix_t jpc_signmsedec[1 << JPC_NMSEDEC_BITS];
+jpc_fix_t jpc_refnmsedec[1 << JPC_NMSEDEC_BITS];
+jpc_fix_t jpc_signmsedec0[1 << JPC_NMSEDEC_BITS];
+jpc_fix_t jpc_refnmsedec0[1 << JPC_NMSEDEC_BITS];
+
+jpc_mqctx_t jpc_mqctxs[JPC_NUMCTXS];
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+void jpc_initmqctxs(void);
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+int JPC_PASSTYPE(int passno)
+{
+	int passtype;
+	switch (passno % 3) {
+	case 0:
+		passtype = JPC_CLNPASS;
+		break;
+	case 1:
+		passtype = JPC_SIGPASS;
+		break;
+	case 2:
+		passtype = JPC_REFPASS;
+		break;
+	default:
+		passtype = -1;
+		assert(0);
+		break;
+	}
+	return passtype;
+}
+
+int JPC_NOMINALGAIN(int qmfbid, int numlvls, int lvlno, int orient)
+{
+	/* Avoid compiler warnings about unused parameters. */
+	numlvls = 0;
+
+if (qmfbid == JPC_COX_INS) {
+	return 0;
+}
+	assert(qmfbid == JPC_COX_RFT);
+	if (lvlno == 0) {
+		assert(orient == JPC_TSFB_LL);
+		return 0;
+	} else {
+		switch (orient) {
+		case JPC_TSFB_LH:
+		case JPC_TSFB_HL:
+			return 1;
+			break;
+		case JPC_TSFB_HH:
+			return 2;
+			break;
+		}
+	}
+	abort();
+}
+
+/******************************************************************************\
+* Coding pass related functions.
+\******************************************************************************/
+
+int JPC_SEGTYPE(int passno, int firstpassno, int bypass)
+{
+	int passtype;
+	if (bypass) {
+		passtype = JPC_PASSTYPE(passno);
+		if (passtype == JPC_CLNPASS) {
+			return JPC_SEG_MQ;
+		}
+		return ((passno < firstpassno + 10) ? JPC_SEG_MQ : JPC_SEG_RAW);
+	} else {
+		return JPC_SEG_MQ;
+	}
+}
+
+int JPC_SEGPASSCNT(int passno, int firstpassno, int numpasses, int bypass, int termall)
+{
+	int ret;
+	int passtype;
+
+	if (termall) {
+		ret = 1;
+	} else if (bypass) {
+		if (passno < firstpassno + 10) {
+			ret = 10 - (passno - firstpassno);
+		} else {
+			passtype = JPC_PASSTYPE(passno);
+			switch (passtype) {
+			case JPC_SIGPASS:
+				ret = 2;
+				break;
+			case JPC_REFPASS:
+				ret = 1;
+				break;
+			case JPC_CLNPASS:
+				ret = 1;
+				break;
+			default:
+				ret = -1;
+				assert(0);
+				break;
+			}
+		}
+	} else {
+		ret = JPC_PREC * 3 - 2;
+	}
+	ret = JAS_MIN(ret, numpasses - passno);
+	return ret;
+}
+
+int JPC_ISTERMINATED(int passno, int firstpassno, int numpasses, int termall,
+  int lazy)
+{
+	int ret;
+	int n;
+	if (passno - firstpassno == numpasses - 1) {
+		ret = 1;
+	} else {
+		n = JPC_SEGPASSCNT(passno, firstpassno, numpasses, lazy, termall);
+		ret = (n <= 1) ? 1 : 0;
+	}
+
+	return ret;
+}
+
+/******************************************************************************\
+* Lookup table code.
+\******************************************************************************/
+
+void jpc_initluts()
+{
+	int i;
+	int orient;
+	int refine;
+	float u;
+	float v;
+	float t;
+
+/* XXX - hack */
+jpc_initmqctxs();
+
+	for (orient = 0; orient < 4; ++orient) {
+		for (i = 0; i < 256; ++i) {
+			jpc_zcctxnolut[(orient << 8) | i] = jpc_getzcctxno(i, orient);
+		}
+	}
+
+	for (i = 0; i < 256; ++i) {
+		jpc_spblut[i] = jpc_getspb(i << 4);
+	}
+
+	for (i = 0; i < 256; ++i) {
+		jpc_scctxnolut[i] = jpc_getscctxno(i << 4);
+	}
+
+	for (refine = 0; refine < 2; ++refine) {
+		for (i = 0; i < 2048; ++i) {
+			jpc_magctxnolut[(refine << 11) + i] = jpc_getmagctxno((refine ? JPC_REFINE : 0) | i);
+		}
+	}
+
+	for (i = 0; i < (1 << JPC_NMSEDEC_BITS); ++i) {
+		t = i * jpc_pow2i(-JPC_NMSEDEC_FRACBITS);
+		u = t;
+		v = t - 1.5;
+		jpc_signmsedec[i] = jpc_dbltofix(floor((u * u - v * v) * jpc_pow2i(JPC_NMSEDEC_FRACBITS) + 0.5) / jpc_pow2i(JPC_NMSEDEC_FRACBITS));
+/* XXX - this calc is not correct */
+		jpc_signmsedec0[i] = jpc_dbltofix(floor((u * u) * jpc_pow2i(JPC_NMSEDEC_FRACBITS) + 0.5) / jpc_pow2i(JPC_NMSEDEC_FRACBITS));
+		u = t - 1.0;
+		if (i & (1 << (JPC_NMSEDEC_BITS - 1))) {
+			v = t - 1.5;
+		} else {
+			v = t - 0.5;
+		}
+		jpc_refnmsedec[i] = jpc_dbltofix(floor((u * u - v * v) * jpc_pow2i(JPC_NMSEDEC_FRACBITS) + 0.5) / jpc_pow2i(JPC_NMSEDEC_FRACBITS));
+/* XXX - this calc is not correct */
+		jpc_refnmsedec0[i] = jpc_dbltofix(floor((u * u) * jpc_pow2i(JPC_NMSEDEC_FRACBITS) + 0.5) / jpc_pow2i(JPC_NMSEDEC_FRACBITS));
+	}
+}
+
+jpc_fix_t jpc_getsignmsedec_func(jpc_fix_t x, int bitpos)
+{
+	jpc_fix_t y;
+	assert(!(x & (~JAS_ONES(bitpos + 1))));
+	y = jpc_getsignmsedec_macro(x, bitpos);
+	return y;
+}
+
+int jpc_getzcctxno(int f, int orient)
+{
+	int h;
+	int v;
+	int d;
+	int n;
+	int t;
+	int hv;
+
+	/* Avoid compiler warning. */
+	n = 0;
+
+	h = ((f & JPC_WSIG) != 0) + ((f & JPC_ESIG) != 0);
+	v = ((f & JPC_NSIG) != 0) + ((f & JPC_SSIG) != 0);
+	d = ((f & JPC_NWSIG) != 0) + ((f & JPC_NESIG) != 0) + ((f & JPC_SESIG) != 0) + ((f & JPC_SWSIG) != 0);
+	switch (orient) {
+	case JPC_TSFB_HL:
+		t = h;
+		h = v;
+		v = t;
+	case JPC_TSFB_LL:
+	case JPC_TSFB_LH:
+		if (!h) {
+			if (!v) {
+				if (!d) {
+					n = 0;
+				} else if (d == 1) {
+					n = 1;
+				} else {
+					n = 2;
+				}
+			} else if (v == 1) {
+				n = 3;
+			} else {
+				n = 4;
+			}
+		} else if (h == 1) {
+			if (!v) {
+				if (!d) {
+					n = 5;
+				} else {
+					n = 6;
+				}
+			} else {
+				n = 7;
+			}
+		} else {
+			n = 8;
+		}
+		break;
+	case JPC_TSFB_HH:
+		hv = h + v;
+		if (!d) {
+			if (!hv) {
+				n = 0;
+			} else if (hv == 1) {
+				n = 1;
+			} else {
+				n = 2;
+			}
+		} else if (d == 1) {
+			if (!hv) {
+				n = 3;
+			} else if (hv == 1) {
+				n = 4;
+			} else {
+				n = 5;
+			}
+		} else if (d == 2) {
+			if (!hv) {
+				n = 6;
+			} else {
+				n = 7;
+			}
+		} else {
+			n = 8;
+		}
+		break;
+	}
+	assert(n < JPC_NUMZCCTXS);
+	return JPC_ZCCTXNO + n;
+}
+
+int jpc_getspb(int f)
+{
+	int hc;
+	int vc;
+	int n;
+
+	hc = JAS_MIN(((f & (JPC_ESIG | JPC_ESGN)) == JPC_ESIG) + ((f & (JPC_WSIG | JPC_WSGN)) == JPC_WSIG), 1) -
+	  JAS_MIN(((f & (JPC_ESIG | JPC_ESGN)) == (JPC_ESIG | JPC_ESGN)) + ((f & (JPC_WSIG | JPC_WSGN)) == (JPC_WSIG | JPC_WSGN)), 1);
+	vc = JAS_MIN(((f & (JPC_NSIG | JPC_NSGN)) == JPC_NSIG) + ((f & (JPC_SSIG | JPC_SSGN)) == JPC_SSIG), 1) -
+	  JAS_MIN(((f & (JPC_NSIG | JPC_NSGN)) == (JPC_NSIG | JPC_NSGN)) + ((f & (JPC_SSIG | JPC_SSGN)) == (JPC_SSIG | JPC_SSGN)), 1);
+	if (!hc && !vc) {
+		n = 0;
+	} else {
+		n = (!(hc > 0 || (!hc && vc > 0)));
+	}
+	return n;
+}
+
+int jpc_getscctxno(int f)
+{
+	int hc;
+	int vc;
+	int n;
+
+	/* Avoid compiler warning. */
+	n = 0;
+
+	hc = JAS_MIN(((f & (JPC_ESIG | JPC_ESGN)) == JPC_ESIG) + ((f & (JPC_WSIG | JPC_WSGN)) == JPC_WSIG),
+	  1) - JAS_MIN(((f & (JPC_ESIG | JPC_ESGN)) == (JPC_ESIG | JPC_ESGN)) +
+	  ((f & (JPC_WSIG | JPC_WSGN)) == (JPC_WSIG | JPC_WSGN)), 1);
+	vc = JAS_MIN(((f & (JPC_NSIG | JPC_NSGN)) == JPC_NSIG) + ((f & (JPC_SSIG | JPC_SSGN)) == JPC_SSIG),
+	  1) - JAS_MIN(((f & (JPC_NSIG | JPC_NSGN)) == (JPC_NSIG | JPC_NSGN)) +
+	  ((f & (JPC_SSIG | JPC_SSGN)) == (JPC_SSIG | JPC_SSGN)), 1);
+	assert(hc >= -1 && hc <= 1 && vc >= -1 && vc <= 1);
+	if (hc < 0) {
+		hc = -hc;
+		vc = -vc;
+	}
+	if (!hc) {
+		if (vc == -1) {
+			n = 1;
+		} else if (!vc) {
+			n = 0;
+		} else {
+			n = 1;
+		}
+	} else if (hc == 1) {
+		if (vc == -1) {
+			n = 2;
+		} else if (!vc) {
+			n = 3;
+		} else {
+			n = 4;
+		}
+	}
+	assert(n < JPC_NUMSCCTXS);
+	return JPC_SCCTXNO + n;
+}
+
+int jpc_getmagctxno(int f)
+{
+	int n;
+
+	if (!(f & JPC_REFINE)) {
+		n = (f & (JPC_OTHSIGMSK)) ? 1 : 0;
+	} else {
+		n = 2;
+	}
+
+	assert(n < JPC_NUMMAGCTXS);
+	return JPC_MAGCTXNO + n;
+}
+
+void jpc_initctxs(jpc_mqctx_t *ctxs)
+{
+	jpc_mqctx_t *ctx;
+	int i;
+
+	ctx = ctxs;
+	for (i = 0; i < JPC_NUMCTXS; ++i) {
+		ctx->mps = 0;
+		switch (i) {
+		case JPC_UCTXNO:
+			ctx->ind = 46;
+			break;
+		case JPC_ZCCTXNO:
+			ctx->ind = 4;
+			break;
+		case JPC_AGGCTXNO:
+			ctx->ind = 3;
+			break;
+		default:
+			ctx->ind = 0;
+			break;
+		}
+		++ctx;
+	}
+}
+
+void jpc_initmqctxs()
+{
+	jpc_initctxs(jpc_mqctxs);
+}
+
+/* Calculate the real quantity exp2(n), where x is an integer. */
+double jpc_pow2i(int n)
+{
+	double x;
+	double a;
+
+	x = 1.0;
+	if (n < 0) {
+		a = 0.5;
+		n = -n;
+	} else {
+		a = 2.0;
+	}
+	while (--n >= 0) {
+		x *= a;
+	}
+	return x;
+}
diff --git a/src/libjasper/jpc/jpc_t1cod.h b/src/libjasper/jpc/jpc_t1cod.h
new file mode 100644
index 0000000..065c0af
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t1cod.h
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_t1cod.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_T1COD_H
+#define JPC_T1COD_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_fix.h"
+#include "jasper/jas_math.h"
+
+#include "jpc_mqcod.h"
+#include "jpc_tsfb.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+/* The number of bits used to index into various lookup tables. */
+#define JPC_NMSEDEC_BITS	7
+#define JPC_NMSEDEC_FRACBITS	(JPC_NMSEDEC_BITS - 1)
+
+/*
+ * Segment types.
+ */
+
+/* Invalid. */
+#define JPC_SEG_INVALID	0
+/* MQ. */
+#define JPC_SEG_MQ		1
+/* Raw. */
+#define JPC_SEG_RAW		2
+
+/* The nominal word size. */
+#define	JPC_PREC	32
+
+/* Tier-1 coding pass types. */
+#define	JPC_SIGPASS	0	/* significance */
+#define	JPC_REFPASS	1	/* refinement */
+#define	JPC_CLNPASS	2	/* cleanup */
+
+/*
+ * Per-sample state information for tier-1 coding.
+ */
+
+/* The northeast neighbour has been found to be significant. */
+#define	JPC_NESIG	0x0001
+/* The southeast neighbour has been found to be significant. */
+#define	JPC_SESIG	0x0002
+/* The southwest neighbour has been found to be significant. */
+#define	JPC_SWSIG	0x0004
+/* The northwest neighbour has been found to be significant. */
+#define	JPC_NWSIG	0x0008
+/* The north neighbour has been found to be significant. */
+#define	JPC_NSIG	0x0010
+/* The east neighbour has been found to be significant. */
+#define	JPC_ESIG	0x0020
+/* The south neighbour has been found to be significant. */
+#define	JPC_SSIG	0x0040
+/* The west neighbour has been found to be significant. */
+#define	JPC_WSIG	0x0080
+/* The significance mask for 8-connected neighbours. */
+#define	JPC_OTHSIGMSK \
+	(JPC_NSIG | JPC_NESIG | JPC_ESIG | JPC_SESIG | JPC_SSIG | JPC_SWSIG | JPC_WSIG | JPC_NWSIG)
+/* The significance mask for 4-connected neighbours. */
+#define	JPC_PRIMSIGMSK	(JPC_NSIG | JPC_ESIG | JPC_SSIG | JPC_WSIG)
+
+/* The north neighbour is negative in value. */
+#define	JPC_NSGN	0x0100
+/* The east neighbour is negative in value. */
+#define	JPC_ESGN	0x0200
+/* The south neighbour is negative in value. */
+#define	JPC_SSGN	0x0400
+/* The west neighbour is negative in value. */
+#define	JPC_WSGN	0x0800
+/* The sign mask for 4-connected neighbours. */
+#define	JPC_SGNMSK	(JPC_NSGN | JPC_ESGN | JPC_SSGN | JPC_WSGN)
+
+/* This sample has been found to be significant. */
+#define JPC_SIG		0x1000
+/* The sample has been refined. */
+#define	JPC_REFINE	0x2000
+/* This sample has been processed during the significance pass. */
+#define	JPC_VISIT	0x4000
+
+/* The number of aggregation contexts. */
+#define	JPC_NUMAGGCTXS	1
+/* The number of zero coding contexts. */
+#define	JPC_NUMZCCTXS	9
+/* The number of magnitude contexts. */
+#define	JPC_NUMMAGCTXS	3
+/* The number of sign coding contexts. */
+#define	JPC_NUMSCCTXS	5
+/* The number of uniform contexts. */
+#define	JPC_NUMUCTXS	1
+
+/* The context ID for the first aggregation context. */
+#define	JPC_AGGCTXNO	0
+/* The context ID for the first zero coding context. */
+#define	JPC_ZCCTXNO		(JPC_AGGCTXNO + JPC_NUMAGGCTXS)
+/* The context ID for the first magnitude context. */
+#define	JPC_MAGCTXNO	(JPC_ZCCTXNO + JPC_NUMZCCTXS)
+/* The context ID for the first sign coding context. */
+#define	JPC_SCCTXNO		(JPC_MAGCTXNO + JPC_NUMMAGCTXS)
+/* The context ID for the first uniform context. */
+#define	JPC_UCTXNO		(JPC_SCCTXNO + JPC_NUMSCCTXS)
+/* The total number of contexts. */
+#define	JPC_NUMCTXS		(JPC_UCTXNO + JPC_NUMUCTXS)
+
+/******************************************************************************\
+* External data.
+\******************************************************************************/
+
+/* These lookup tables are used by various macros/functions. */
+/* Do not access these lookup tables directly. */
+extern int jpc_zcctxnolut[];
+extern int jpc_spblut[];
+extern int jpc_scctxnolut[];
+extern int jpc_magctxnolut[];
+extern jpc_fix_t jpc_refnmsedec[];
+extern jpc_fix_t jpc_signmsedec[];
+extern jpc_fix_t jpc_refnmsedec0[];
+extern jpc_fix_t jpc_signmsedec0[];
+
+/* The initial settings for the MQ contexts. */
+extern jpc_mqctx_t jpc_mqctxs[];
+
+/******************************************************************************\
+* Functions and macros.
+\******************************************************************************/
+
+/* Initialize the MQ contexts. */
+void jpc_initctxs(jpc_mqctx_t *ctxs);
+
+/* Get the zero coding context. */
+int jpc_getzcctxno(int f, int orient);
+#define	JPC_GETZCCTXNO(f, orient) \
+	(jpc_zcctxnolut[((orient) << 8) | ((f) & JPC_OTHSIGMSK)])
+
+/* Get the sign prediction bit. */
+int jpc_getspb(int f);
+#define	JPC_GETSPB(f) \
+	(jpc_spblut[((f) & (JPC_PRIMSIGMSK | JPC_SGNMSK)) >> 4])
+
+/* Get the sign coding context. */
+int jpc_getscctxno(int f);
+#define	JPC_GETSCCTXNO(f) \
+	(jpc_scctxnolut[((f) & (JPC_PRIMSIGMSK | JPC_SGNMSK)) >> 4])
+
+/* Get the magnitude context. */
+int jpc_getmagctxno(int f);
+#define	JPC_GETMAGCTXNO(f) \
+	(jpc_magctxnolut[((f) & JPC_OTHSIGMSK) | ((((f) & JPC_REFINE) != 0) << 11)])
+
+/* Get the normalized MSE reduction for significance passes. */
+#define	JPC_GETSIGNMSEDEC(x, bitpos)	jpc_getsignmsedec_macro(x, bitpos)
+jpc_fix_t jpc_getsignmsedec_func(jpc_fix_t x, int bitpos);
+#define	jpc_getsignmsedec_macro(x, bitpos) \
+	((bitpos > JPC_NMSEDEC_FRACBITS) ? jpc_signmsedec[JPC_ASR(x, bitpos - JPC_NMSEDEC_FRACBITS) & JAS_ONES(JPC_NMSEDEC_BITS)] : \
+	  (jpc_signmsedec0[JPC_ASR(x, bitpos - JPC_NMSEDEC_FRACBITS) & JAS_ONES(JPC_NMSEDEC_BITS)]))
+
+/* Get the normalized MSE reduction for refinement passes. */
+#define	JPC_GETREFNMSEDEC(x, bitpos)	jpc_getrefnmsedec_macro(x, bitpos)
+jpc_fix_t jpc_refsignmsedec_func(jpc_fix_t x, int bitpos);
+#define	jpc_getrefnmsedec_macro(x, bitpos) \
+	((bitpos > JPC_NMSEDEC_FRACBITS) ? jpc_refnmsedec[JPC_ASR(x, bitpos - JPC_NMSEDEC_FRACBITS) & JAS_ONES(JPC_NMSEDEC_BITS)] : \
+	  (jpc_refnmsedec0[JPC_ASR(x, bitpos - JPC_NMSEDEC_FRACBITS) & JAS_ONES(JPC_NMSEDEC_BITS)]))
+
+/* Arithmetic shift right (with ability to shift left also). */
+#define	JPC_ASR(x, n) \
+	(((n) >= 0) ? ((x) >> (n)) : ((x) << (-(n))))
+
+/* Update the per-sample state information. */
+#define	JPC_UPDATEFLAGS4(fp, rowstep, s, vcausalflag) \
+{ \
+	register jpc_fix_t *np = (fp) - (rowstep); \
+	register jpc_fix_t *sp = (fp) + (rowstep); \
+	if ((vcausalflag)) { \
+		sp[-1] |= JPC_NESIG; \
+		sp[1] |= JPC_NWSIG; \
+		if (s) { \
+			*sp |= JPC_NSIG | JPC_NSGN; \
+			(fp)[-1] |= JPC_ESIG | JPC_ESGN; \
+			(fp)[1] |= JPC_WSIG | JPC_WSGN; \
+		} else { \
+			*sp |= JPC_NSIG; \
+			(fp)[-1] |= JPC_ESIG; \
+			(fp)[1] |= JPC_WSIG; \
+		} \
+	} else { \
+		np[-1] |= JPC_SESIG; \
+		np[1] |= JPC_SWSIG; \
+		sp[-1] |= JPC_NESIG; \
+		sp[1] |= JPC_NWSIG; \
+		if (s) { \
+			*np |= JPC_SSIG | JPC_SSGN; \
+			*sp |= JPC_NSIG | JPC_NSGN; \
+			(fp)[-1] |= JPC_ESIG | JPC_ESGN; \
+			(fp)[1] |= JPC_WSIG | JPC_WSGN; \
+		} else { \
+			*np |= JPC_SSIG; \
+			*sp |= JPC_NSIG; \
+			(fp)[-1] |= JPC_ESIG; \
+			(fp)[1] |= JPC_WSIG; \
+		} \
+	} \
+}
+
+/* Initialize the lookup tables used by the codec. */
+void jpc_initluts(void);
+
+/* Get the nominal gain associated with a particular band. */
+int JPC_NOMINALGAIN(int qmfbid, int numlvls, int lvlno, int orient);
+
+/* Get the coding pass type. */
+int JPC_PASSTYPE(int passno);
+
+/* Get the segment type. */
+int JPC_SEGTYPE(int passno, int firstpassno, int bypass);
+
+/* Get the number of coding passess in the segment. */
+int JPC_SEGPASSCNT(int passno, int firstpassno, int numpasses, int bypass,
+  int termall);
+
+/* Is the coding pass terminated? */
+int JPC_ISTERMINATED(int passno, int firstpassno, int numpasses, int termall,
+  int lazy);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_t1dec.c b/src/libjasper/jpc/jpc_t1dec.c
new file mode 100644
index 0000000..50df081
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t1dec.c
@@ -0,0 +1,927 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 1 Decoder
+ *
+ * $Id: jpc_t1dec.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "jasper/jas_fix.h"
+#include "jasper/jas_stream.h"
+#include "jasper/jas_math.h"
+
+#include "jpc_bs.h"
+#include "jpc_mqdec.h"
+#include "jpc_t1dec.h"
+#include "jpc_t1cod.h"
+#include "jpc_dec.h"
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+static int jpc_dec_decodecblk(jpc_dec_t *dec, jpc_dec_tile_t *tile, jpc_dec_tcomp_t *tcomp, jpc_dec_band_t *band,
+  jpc_dec_cblk_t *cblk, int dopartial, int maxlyrs);
+static int dec_sigpass(jpc_dec_t *dec, jpc_mqdec_t *mqdec, int bitpos, int orient,
+  int vcausalflag, jas_matrix_t *flags, jas_matrix_t *data);
+static int dec_rawsigpass(jpc_dec_t *dec, jpc_bitstream_t *in, int bitpos,
+  int vcausalflag, jas_matrix_t *flags, jas_matrix_t *data);
+static int dec_refpass(jpc_dec_t *dec, jpc_mqdec_t *mqdec, int bitpos, int vcausalflag,
+  jas_matrix_t *flags, jas_matrix_t *data);
+static int dec_rawrefpass(jpc_dec_t *dec, jpc_bitstream_t *in, int bitpos,
+  int vcausalflag, jas_matrix_t *flags, jas_matrix_t *data);
+static int dec_clnpass(jpc_dec_t *dec, jpc_mqdec_t *mqdec, int bitpos, int orient,
+  int vcausalflag, int segsymflag, jas_matrix_t *flags, jas_matrix_t *data);
+
+#if defined(DEBUG)
+static long t1dec_cnt = 0;
+#endif
+
+#if !defined(DEBUG)
+#define	JPC_T1D_GETBIT(mqdec, v, passtypename, symtypename) \
+	((v) = jpc_mqdec_getbit(mqdec))
+#else
+#define	JPC_T1D_GETBIT(mqdec, v, passtypename, symtypename) \
+{ \
+	(v) = jpc_mqdec_getbit(mqdec); \
+	if (jas_getdbglevel() >= 100) { \
+		jas_eprintf("index = %ld; passtype = %s; symtype = %s; sym = %d\n", t1dec_cnt, passtypename, symtypename, v); \
+		++t1dec_cnt; \
+	} \
+}
+#endif
+#define	JPC_T1D_GETBITNOSKEW(mqdec, v, passtypename, symtypename) \
+	JPC_T1D_GETBIT(mqdec, v, passtypename, symtypename)
+
+#if !defined(DEBUG)
+#define	JPC_T1D_RAWGETBIT(bitstream, v, passtypename, symtypename) \
+	((v) = jpc_bitstream_getbit(bitstream))
+#else
+#define	JPC_T1D_RAWGETBIT(bitstream, v, passtypename, symtypename) \
+{ \
+	(v) = jpc_bitstream_getbit(bitstream); \
+	if (jas_getdbglevel() >= 100) { \
+		jas_eprintf("index = %ld; passtype = %s; symtype = %s; sym = %d\n", t1dec_cnt, passtypename, symtypename, v); \
+		++t1dec_cnt; \
+	} \
+}
+#endif
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+int jpc_dec_decodecblks(jpc_dec_t *dec, jpc_dec_tile_t *tile)
+{
+	jpc_dec_tcomp_t *tcomp;
+	int compcnt;
+	jpc_dec_rlvl_t *rlvl;
+	int rlvlcnt;
+	jpc_dec_band_t *band;
+	int bandcnt;
+	jpc_dec_prc_t *prc;
+	int prccnt;
+	jpc_dec_cblk_t *cblk;
+	int cblkcnt;
+
+	for (compcnt = dec->numcomps, tcomp = tile->tcomps; compcnt > 0;
+	  --compcnt, ++tcomp) {
+		for (rlvlcnt = tcomp->numrlvls, rlvl = tcomp->rlvls;
+		  rlvlcnt > 0; --rlvlcnt, ++rlvl) {
+
+      // GeoJasper: dima - progress
+      jas_do_progress( (tcomp->numrlvls-rlvlcnt), (tcomp->numrlvls-1), "jpc: decode" );
+      if (jas_test_abort() == 1) return -1;
+
+			if (!rlvl->bands) {
+				continue;
+			}
+			for (bandcnt = rlvl->numbands, band = rlvl->bands;
+			  bandcnt > 0; --bandcnt, ++band) {
+				if (!band->data) {
+					continue;
+				}
+				for (prccnt = rlvl->numprcs, prc = band->prcs;
+				  prccnt > 0; --prccnt, ++prc) {
+					if (!prc->cblks) {
+						continue;
+					}
+					for (cblkcnt = prc->numcblks,
+					  cblk = prc->cblks; cblkcnt > 0;
+					  --cblkcnt, ++cblk) {
+						if (jpc_dec_decodecblk(dec, tile, tcomp,
+						  band, cblk, 1, JPC_MAXLYRS)) {
+							return -1;
+						}
+					}
+				}
+
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int jpc_dec_decodecblk(jpc_dec_t *dec, jpc_dec_tile_t *tile, jpc_dec_tcomp_t *tcomp, jpc_dec_band_t *band,
+  jpc_dec_cblk_t *cblk, int dopartial, int maxlyrs)
+{
+	jpc_dec_seg_t *seg;
+	int i;
+	int bpno;
+	int passtype;
+	int ret;
+	int compno;
+	int filldata;
+	int fillmask;
+	jpc_dec_ccp_t *ccp;
+
+	compno = tcomp - tile->tcomps;
+
+	if (!cblk->flags) {
+		/* Note: matrix is assumed to be zeroed */
+		if (!(cblk->flags = jas_matrix_create(jas_matrix_numrows(cblk->data) +
+		  2, jas_matrix_numcols(cblk->data) + 2))) {
+			return -1;
+		}
+	}
+
+	seg = cblk->segs.head;
+	while (seg && (seg != cblk->curseg || dopartial) && (maxlyrs < 0 ||
+	  seg->lyrno < maxlyrs)) {
+		assert(seg->numpasses >= seg->maxpasses || dopartial);
+		assert(seg->stream);
+		jas_stream_rewind(seg->stream);
+		jas_stream_setrwcount(seg->stream, 0);
+		if (seg->type == JPC_SEG_MQ) {
+			if (!cblk->mqdec) {
+				if (!(cblk->mqdec = jpc_mqdec_create(JPC_NUMCTXS, 0))) {
+					return -1;
+				}
+				jpc_mqdec_setctxs(cblk->mqdec, JPC_NUMCTXS, jpc_mqctxs);
+			}
+			jpc_mqdec_setinput(cblk->mqdec, seg->stream);
+			jpc_mqdec_init(cblk->mqdec);
+		} else {
+			assert(seg->type == JPC_SEG_RAW);
+			if (!cblk->nulldec) {
+				if (!(cblk->nulldec = jpc_bitstream_sopen(seg->stream, "r"))) {
+					assert(0);
+				}
+			}
+		}
+
+
+		for (i = 0; i < seg->numpasses; ++i) {
+			if (cblk->numimsbs > band->numbps) {
+				ccp = &tile->cp->ccps[compno];
+				if (ccp->roishift <= 0) {
+					jas_eprintf("warning: corrupt code stream\n");
+				} else {
+					if (cblk->numimsbs < ccp->roishift - band->numbps) {
+						jas_eprintf("warning: corrupt code stream\n");
+					}
+				}
+			}
+			bpno = band->roishift + band->numbps - 1 - (cblk->numimsbs +
+			  (seg->passno + i - cblk->firstpassno + 2) / 3);
+if (bpno < 0) {
+	goto premature_exit;
+}
+#if 1
+			passtype = (seg->passno + i + 2) % 3;
+#else
+			passtype = JPC_PASSTYPE(seg->passno + i + 2);
+#endif
+			assert(bpno >= 0 && bpno < 31);
+			switch (passtype) {
+			case JPC_SIGPASS:
+				ret = (seg->type == JPC_SEG_MQ) ? dec_sigpass(dec,
+				  cblk->mqdec, bpno, band->orient,
+				  (tile->cp->ccps[compno].cblkctx & JPC_COX_VSC) != 0,
+				  cblk->flags, cblk->data) :
+				  dec_rawsigpass(dec, cblk->nulldec, bpno,
+				  (tile->cp->ccps[compno].cblkctx & JPC_COX_VSC) != 0,
+				  cblk->flags, cblk->data);
+				break;
+			case JPC_REFPASS:
+				ret = (seg->type == JPC_SEG_MQ) ?
+				  dec_refpass(dec, cblk->mqdec, bpno,
+				  (tile->cp->ccps[compno].cblkctx & JPC_COX_VSC) != 0,
+				  cblk->flags, cblk->data) :
+				  dec_rawrefpass(dec, cblk->nulldec, bpno,
+				  (tile->cp->ccps[compno].cblkctx & JPC_COX_VSC) != 0,
+				  cblk->flags, cblk->data);
+				break;
+			case JPC_CLNPASS:
+				assert(seg->type == JPC_SEG_MQ);
+				ret = dec_clnpass(dec, cblk->mqdec, bpno,
+				  band->orient, (tile->cp->ccps[compno].cblkctx &
+				  JPC_COX_VSC) != 0, (tile->cp->ccps[compno].cblkctx &
+				  JPC_COX_SEGSYM) != 0, cblk->flags,
+				  cblk->data);
+				break;
+			default:
+				ret = -1;
+				break;
+			}
+			/* Do we need to reset after each coding pass? */
+			if (tile->cp->ccps[compno].cblkctx & JPC_COX_RESET) {
+				jpc_mqdec_setctxs(cblk->mqdec, JPC_NUMCTXS, jpc_mqctxs);
+			}
+
+			if (ret) {
+				jas_eprintf("coding pass failed passtype=%d segtype=%d\n", passtype, seg->type);
+				return -1;
+			}
+
+		}
+
+		if (seg->type == JPC_SEG_MQ) {
+/* Note: dont destroy mq decoder because context info will be lost */
+		} else {
+			assert(seg->type == JPC_SEG_RAW);
+			if (tile->cp->ccps[compno].cblkctx & JPC_COX_PTERM) {
+				fillmask = 0x7f;
+				filldata = 0x2a;
+			} else {
+				fillmask = 0;
+				filldata = 0;
+			}
+			if ((ret = jpc_bitstream_inalign(cblk->nulldec, fillmask,
+			  filldata)) < 0) {
+				return -1;
+			} else if (ret > 0) {
+				jas_eprintf("warning: bad termination pattern detected\n");
+			}
+			jpc_bitstream_close(cblk->nulldec);
+			cblk->nulldec = 0;
+		}
+
+		cblk->curseg = seg->next;
+		jpc_seglist_remove(&cblk->segs, seg);
+		jpc_seg_destroy(seg);
+		seg = cblk->curseg;
+	}
+
+	assert(dopartial ? (!cblk->curseg) : 1);
+
+premature_exit:
+	return 0;
+}
+
+/******************************************************************************\
+* Code for significance pass.
+\******************************************************************************/
+
+#define	jpc_sigpass_step(fp, frowstep, dp, bitpos, oneplushalf, orient, mqdec, vcausalflag) \
+{ \
+	int f; \
+	int v; \
+	f = *(fp); \
+	if ((f & JPC_OTHSIGMSK) && !(f & (JPC_SIG | JPC_VISIT))) { \
+		jpc_mqdec_setcurctx((mqdec), JPC_GETZCCTXNO(f, (orient))); \
+		JPC_T1D_GETBIT((mqdec), v, "SIG", "ZC"); \
+		if (v) { \
+			jpc_mqdec_setcurctx((mqdec), JPC_GETSCCTXNO(f)); \
+			JPC_T1D_GETBIT((mqdec), v, "SIG", "SC"); \
+			v ^= JPC_GETSPB(f); \
+			JPC_UPDATEFLAGS4((fp), (frowstep), v, (vcausalflag)); \
+			*(fp) |= JPC_SIG; \
+			*(dp) = (v) ? (-(oneplushalf)) : (oneplushalf); \
+		} \
+		*(fp) |= JPC_VISIT; \
+	} \
+}
+
+static int dec_sigpass(jpc_dec_t *dec, register jpc_mqdec_t *mqdec, int bitpos, int orient,
+  int vcausalflag, jas_matrix_t *flags, jas_matrix_t *data)
+{
+	int i;
+	int j;
+	int one;
+	int half;
+	int oneplushalf;
+	int vscanlen;
+	int width;
+	int height;
+	jpc_fix_t *fp;
+	int frowstep;
+	int fstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dp;
+	int drowstep;
+	int dstripestep;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *dvscanstart;
+	int k;
+
+	/* Avoid compiler warning about unused parameters. */
+	dec = 0;
+
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << bitpos;
+	half = one >> 1;
+	oneplushalf = one | half;
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			/* Process first sample in vertical scan. */
+			jpc_sigpass_step(fp, frowstep, dp, bitpos, oneplushalf,
+			  orient, mqdec, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process second sample in vertical scan. */
+			jpc_sigpass_step(fp, frowstep, dp, bitpos, oneplushalf,
+			  orient, mqdec, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process third sample in vertical scan. */
+			jpc_sigpass_step(fp, frowstep, dp, bitpos, oneplushalf,
+			  orient, mqdec, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process fourth sample in vertical scan. */
+			jpc_sigpass_step(fp, frowstep, dp, bitpos, oneplushalf,
+			  orient, mqdec, 0);
+		}
+	}
+	return 0;
+}
+
+#define	jpc_rawsigpass_step(fp, frowstep, dp, oneplushalf, in, vcausalflag) \
+{ \
+	jpc_fix_t f = *(fp); \
+	jpc_fix_t v; \
+	if ((f & JPC_OTHSIGMSK) && !(f & (JPC_SIG | JPC_VISIT))) { \
+		JPC_T1D_RAWGETBIT(in, v, "SIG", "ZC"); \
+		if (v < 0) { \
+			return -1; \
+		} \
+		if (v) { \
+			JPC_T1D_RAWGETBIT(in, v, "SIG", "SC"); \
+			if (v < 0) { \
+				return -1; \
+			} \
+			JPC_UPDATEFLAGS4((fp), (frowstep), v, (vcausalflag)); \
+			*(fp) |= JPC_SIG; \
+			*(dp) = v ? (-oneplushalf) : (oneplushalf); \
+		} \
+		*(fp) |= JPC_VISIT; \
+	} \
+}
+
+static int dec_rawsigpass(jpc_dec_t *dec, jpc_bitstream_t *in, int bitpos, int vcausalflag,
+  jas_matrix_t *flags, jas_matrix_t *data)
+{
+	int i;
+	int j;
+	int k;
+	int one;
+	int half;
+	int oneplushalf;
+	int vscanlen;
+	int width;
+	int height;
+	jpc_fix_t *fp;
+	int frowstep;
+	int fstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dp;
+	int drowstep;
+	int dstripestep;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *dvscanstart;
+
+	/* Avoid compiler warning about unused parameters. */
+	dec = 0;
+
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << bitpos;
+	half = one >> 1;
+	oneplushalf = one | half;
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			/* Process first sample in vertical scan. */
+			jpc_rawsigpass_step(fp, frowstep, dp, oneplushalf,
+			  in, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process second sample in vertical scan. */
+			jpc_rawsigpass_step(fp, frowstep, dp, oneplushalf,
+			  in, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process third sample in vertical scan. */
+			jpc_rawsigpass_step(fp, frowstep, dp, oneplushalf,
+			  in, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process fourth sample in vertical scan. */
+			jpc_rawsigpass_step(fp, frowstep, dp, oneplushalf,
+			  in, 0);
+
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Code for refinement pass.
+\******************************************************************************/
+
+#define	jpc_refpass_step(fp, dp, poshalf, neghalf, mqdec, vcausalflag) \
+{ \
+	int v; \
+	int t; \
+	if (((*(fp)) & (JPC_SIG | JPC_VISIT)) == JPC_SIG) { \
+		jpc_mqdec_setcurctx((mqdec), JPC_GETMAGCTXNO(*(fp))); \
+		JPC_T1D_GETBITNOSKEW((mqdec), v, "REF", "MR"); \
+		t = (v ? (poshalf) : (neghalf)); \
+		*(dp) += (*(dp) < 0) ? (-t) : t; \
+		*(fp) |= JPC_REFINE; \
+	} \
+}
+
+static int dec_refpass(jpc_dec_t *dec, register jpc_mqdec_t *mqdec, int bitpos,
+  int vcausalflag, jas_matrix_t *flags, jas_matrix_t *data)
+{
+	int i;
+	int j;
+	int vscanlen;
+	int width;
+	int height;
+	int one;
+	int poshalf;
+	int neghalf;
+	jpc_fix_t *fp;
+	int frowstep;
+	int fstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dp;
+	int drowstep;
+	int dstripestep;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *dvscanstart;
+	int k;
+
+	/* Avoid compiler warning about unused parameters. */
+	dec = 0;
+	vcausalflag = 0;
+
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << bitpos;
+	poshalf = one >> 1;
+	neghalf = (bitpos > 0) ? (-poshalf) : (-1);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			/* Process first sample in vertical scan. */
+			jpc_refpass_step(fp, dp, poshalf, neghalf, mqdec,
+			  vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process second sample in vertical scan. */
+			jpc_refpass_step(fp, dp, poshalf, neghalf, mqdec, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process third sample in vertical scan. */
+			jpc_refpass_step(fp, dp, poshalf, neghalf, mqdec, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process fourth sample in vertical scan. */
+			jpc_refpass_step(fp, dp, poshalf, neghalf, mqdec, 0);
+		}
+	}
+
+	return 0;
+}
+
+#define	jpc_rawrefpass_step(fp, dp, poshalf, neghalf, in, vcausalflag) \
+{ \
+	jpc_fix_t v; \
+	jpc_fix_t t; \
+	if (((*(fp)) & (JPC_SIG | JPC_VISIT)) == JPC_SIG) { \
+		JPC_T1D_RAWGETBIT(in, v, "REF", "MAGREF"); \
+		if (v < 0) { \
+			return -1; \
+		} \
+		t = (v ? poshalf : neghalf); \
+		*(dp) += (*(dp) < 0) ? (-t) : t; \
+		*(fp) |= JPC_REFINE; \
+	} \
+}
+
+static int dec_rawrefpass(jpc_dec_t *dec, jpc_bitstream_t *in, int bitpos, int vcausalflag,
+  jas_matrix_t *flags, jas_matrix_t *data)
+{
+	int i;
+	int j;
+	int k;
+	int vscanlen;
+	int width;
+	int height;
+	int one;
+	int poshalf;
+	int neghalf;
+	jpc_fix_t *fp;
+	int frowstep;
+	int fstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dp;
+	int drowstep;
+	int dstripestep;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *dvscanstart;
+
+	/* Avoid compiler warning about unused parameters. */
+	dec = 0;
+	vcausalflag = 0;
+
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << bitpos;
+	poshalf = one >> 1;
+	neghalf = (bitpos > 0) ? (-poshalf) : (-1);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			/* Process first sample in vertical scan. */
+			jpc_rawrefpass_step(fp, dp, poshalf, neghalf, in,
+			  vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process second sample in vertical scan. */
+			jpc_rawrefpass_step(fp, dp, poshalf, neghalf, in, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process third sample in vertical scan. */
+			jpc_rawrefpass_step(fp, dp, poshalf, neghalf, in, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process fourth sample in vertical scan. */
+			jpc_rawrefpass_step(fp, dp, poshalf, neghalf, in, 0);
+		}
+	}
+	return 0;
+}
+
+/******************************************************************************\
+* Code for cleanup pass.
+\******************************************************************************/
+
+#define	jpc_clnpass_step(f, fp, frowstep, dp, oneplushalf, orient, mqdec, flabel, plabel, vcausalflag) \
+{ \
+	int v; \
+flabel \
+	if (!((f) & (JPC_SIG | JPC_VISIT))) { \
+		jpc_mqdec_setcurctx((mqdec), JPC_GETZCCTXNO((f), (orient))); \
+		JPC_T1D_GETBIT((mqdec), v, "CLN", "ZC"); \
+		if (v) { \
+plabel \
+			/* Coefficient is significant. */ \
+			jpc_mqdec_setcurctx((mqdec), JPC_GETSCCTXNO(f)); \
+			JPC_T1D_GETBIT((mqdec), v, "CLN", "SC"); \
+			v ^= JPC_GETSPB(f); \
+			*(dp) = (v) ? (-(oneplushalf)) : (oneplushalf); \
+			JPC_UPDATEFLAGS4((fp), (frowstep), v, (vcausalflag)); \
+			*(fp) |= JPC_SIG; \
+		} \
+	} \
+	/* XXX - Is this correct?  Can aggregation cause some VISIT bits not to be reset?  Check. */ \
+	*(fp) &= ~JPC_VISIT; \
+}
+
+static int dec_clnpass(jpc_dec_t *dec, register jpc_mqdec_t *mqdec, int bitpos, int orient,
+  int vcausalflag, int segsymflag, jas_matrix_t *flags, jas_matrix_t *data)
+{
+	int i;
+	int j;
+	int k;
+	int vscanlen;
+	int v;
+	int half;
+	int runlen;
+	int f;
+	int width;
+	int height;
+	int one;
+	int oneplushalf;
+
+	jpc_fix_t *fp;
+	int frowstep;
+	int fstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *fvscanstart;
+
+	jpc_fix_t *dp;
+	int drowstep;
+	int dstripestep;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *dvscanstart;
+
+	/* Avoid compiler warning about unused parameters. */
+	dec = 0;
+
+	one = 1 << bitpos;
+	half = one >> 1;
+	oneplushalf = one | half;
+
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = 0; i < height; i += 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(4, height - i);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			if (vscanlen >= 4 && (!((*fp) & (JPC_SIG | JPC_VISIT |
+			  JPC_OTHSIGMSK))) && (fp += frowstep, !((*fp) & (JPC_SIG |
+			  JPC_VISIT | JPC_OTHSIGMSK))) && (fp += frowstep, !((*fp) &
+			  (JPC_SIG | JPC_VISIT | JPC_OTHSIGMSK))) && (fp += frowstep,
+			  !((*fp) & (JPC_SIG | JPC_VISIT | JPC_OTHSIGMSK)))) {
+
+				jpc_mqdec_setcurctx(mqdec, JPC_AGGCTXNO);
+				JPC_T1D_GETBIT(mqdec, v, "CLN", "AGG");
+				if (!v) {
+					continue;
+				}
+				jpc_mqdec_setcurctx(mqdec, JPC_UCTXNO);
+				JPC_T1D_GETBITNOSKEW(mqdec, v, "CLN", "RL");
+				runlen = v;
+				JPC_T1D_GETBITNOSKEW(mqdec, v, "CLN", "RL");
+				runlen = (runlen << 1) | v;
+				f = *(fp = fvscanstart + frowstep * runlen);
+				dp = dvscanstart + drowstep * runlen;
+				k = vscanlen - runlen;
+				switch (runlen) {
+				case 0:
+					goto clnpass_partial0;
+					break;
+				case 1:
+					goto clnpass_partial1;
+					break;
+				case 2:
+					goto clnpass_partial2;
+					break;
+				case 3:
+					goto clnpass_partial3;
+					break;
+				}
+			} else {
+				f = *(fp = fvscanstart);
+				dp = dvscanstart;
+				k = vscanlen;
+				goto clnpass_full0;
+			}
+
+			/* Process first sample in vertical scan. */
+			jpc_clnpass_step(f, fp, frowstep, dp, oneplushalf, orient,
+			  mqdec, clnpass_full0:, clnpass_partial0:,
+			  vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process second sample in vertical scan. */
+			f = *fp;
+			jpc_clnpass_step(f, fp, frowstep, dp, oneplushalf, orient,
+				mqdec, ;, clnpass_partial1:, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process third sample in vertical scan. */
+			f = *fp;
+			jpc_clnpass_step(f, fp, frowstep, dp, oneplushalf, orient,
+				mqdec, ;, clnpass_partial2:, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			/* Process fourth sample in vertical scan. */
+			f = *fp;
+			jpc_clnpass_step(f, fp, frowstep, dp, oneplushalf, orient,
+				mqdec, ;, clnpass_partial3:, 0);
+		}
+	}
+
+	if (segsymflag) {
+		int segsymval;
+		segsymval = 0;
+		jpc_mqdec_setcurctx(mqdec, JPC_UCTXNO);
+		JPC_T1D_GETBITNOSKEW(mqdec, v, "CLN", "SEGSYM");
+		segsymval = (segsymval << 1) | (v & 1);
+		JPC_T1D_GETBITNOSKEW(mqdec, v, "CLN", "SEGSYM");
+		segsymval = (segsymval << 1) | (v & 1);
+		JPC_T1D_GETBITNOSKEW(mqdec, v, "CLN", "SEGSYM");
+		segsymval = (segsymval << 1) | (v & 1);
+		JPC_T1D_GETBITNOSKEW(mqdec, v, "CLN", "SEGSYM");
+		segsymval = (segsymval << 1) | (v & 1);
+		if (segsymval != 0xa) {
+			jas_eprintf("warning: bad segmentation symbol\n");
+		}
+	}
+
+	return 0;
+}
diff --git a/src/libjasper/jpc/jpc_t1dec.h b/src/libjasper/jpc/jpc_t1dec.h
new file mode 100644
index 0000000..133dff8
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t1dec.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 1 Decoder
+ *
+ * $Id: jpc_t1dec.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_T1DEC_H
+#define JPC_T1DEC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jpc_dec.h"
+#include "jpc_mqdec.h"
+#include "jpc_t1cod.h"
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Decode all of the code blocks for a particular tile. */
+int jpc_dec_decodecblks(jpc_dec_t *dec, jpc_dec_tile_t *tile);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_t1enc.c b/src/libjasper/jpc/jpc_t1enc.c
new file mode 100644
index 0000000..b7cc171
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t1enc.c
@@ -0,0 +1,964 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 1 Encoder
+ *
+ * $Id: jpc_t1enc.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "jasper/jas_fix.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+
+#include "jpc_t1enc.h"
+#include "jpc_t1cod.h"
+#include "jpc_enc.h"
+#include "jpc_cod.h"
+#include "jpc_math.h"
+
+static int jpc_encsigpass(jpc_mqenc_t *mqenc, int bitpos, int orient, int,
+  jas_matrix_t *flags, jas_matrix_t *data, int term, long *nmsedec);
+
+static int jpc_encrefpass(jpc_mqenc_t *mqenc, int bitpos, int, jas_matrix_t *flags,
+  jas_matrix_t *data, int term, long *nmsedec);
+
+static int jpc_encclnpass(jpc_mqenc_t *mqenc, int bitpos, int orient, int,
+  int, jas_matrix_t *flags, jas_matrix_t *data, int term, long *nmsedec);
+
+static int jpc_encrawsigpass(jpc_bitstream_t *out, int bitpos, int,
+  jas_matrix_t *flags, jas_matrix_t *data, int term, long *nmsedec);
+
+static int jpc_encrawrefpass(jpc_bitstream_t *out, int bitpos, int,
+  jas_matrix_t *flags, jas_matrix_t *data, int term, long *nmsedec);
+
+/******************************************************************************\
+* Code for encoding code blocks.
+\******************************************************************************/
+
+/* Encode all of the code blocks associated with the current tile. */
+int jpc_enc_enccblks(jpc_enc_t *enc)
+{
+	jpc_enc_tcmpt_t *tcmpt;
+	jpc_enc_tcmpt_t *endcomps;
+	jpc_enc_rlvl_t *lvl;
+	jpc_enc_rlvl_t *endlvls;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_cblk_t *endcblks;
+	int i;
+	int j;
+	int mx;
+	int bmx;
+	int v;
+	jpc_enc_tile_t *tile;
+	uint_fast32_t prcno;
+	jpc_enc_prc_t *prc;
+
+	tile = enc->curtile;
+
+	endcomps = &tile->tcmpts[tile->numtcmpts];
+	for (tcmpt = tile->tcmpts; tcmpt != endcomps; ++tcmpt) {
+		endlvls = &tcmpt->rlvls[tcmpt->numrlvls];
+		for (lvl = tcmpt->rlvls; lvl != endlvls; ++lvl) {
+
+      // GeoJasper: dima - progress
+      jas_do_progress( (int) lvl, (int) (endlvls-1), "jpc: encode" );
+      if (jas_test_abort() == 1) return -1;      
+
+      if (!lvl->bands) {
+				continue;
+			}
+			endbands = &lvl->bands[lvl->numbands];
+			for (band = lvl->bands; band != endbands; ++band) {
+				if (!band->data) {
+					continue;
+				}
+				for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+					if (!prc->cblks) {
+						continue;
+					}
+					bmx = 0;
+					endcblks = &prc->cblks[prc->numcblks];
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						mx = 0;
+						for (i = 0; i < jas_matrix_numrows(cblk->data); ++i) {
+							for (j = 0; j < jas_matrix_numcols(cblk->data); ++j) {
+								v = abs(jas_matrix_get(cblk->data, i, j));
+								if (v > mx) {
+									mx = v;
+								}
+							}
+						}
+						if (mx > bmx) {
+							bmx = mx;
+						}
+						cblk->numbps = JAS_MAX(jpc_firstone(mx) + 1 - JPC_NUMEXTRABITS, 0);
+					}
+
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						cblk->numimsbs = band->numbps - cblk->numbps;
+						assert(cblk->numimsbs >= 0);
+					}
+
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						if (jpc_enc_enccblk(enc, cblk->stream, tcmpt, band, cblk)) {
+							return -1;
+						}
+					}
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+int getthebyte(jas_stream_t *in, long off)
+{
+	int c;
+	long oldpos;
+	oldpos = jas_stream_tell(in);
+	assert(oldpos >= 0);
+	jas_stream_seek(in, off, SEEK_SET);
+	c = jas_stream_peekc(in);
+	jas_stream_seek(in, oldpos, SEEK_SET);
+	return c;
+}
+
+/* Encode a single code block. */
+int jpc_enc_enccblk(jpc_enc_t *enc, jas_stream_t *out, jpc_enc_tcmpt_t *tcmpt, jpc_enc_band_t *band, jpc_enc_cblk_t *cblk)
+{
+	jpc_enc_pass_t *pass;
+	jpc_enc_pass_t *endpasses;
+	int bitpos;
+	int n;
+	int adjust;
+	int ret;
+	int passtype;
+	int t;
+	jpc_bitstream_t *bout;
+	jpc_enc_pass_t *termpass;
+	jpc_enc_rlvl_t *rlvl;
+	int vcausal;
+	int segsym;
+	int termmode;
+	int c;
+
+	bout = 0;
+	rlvl = band->rlvl;
+
+	cblk->stream = jas_stream_memopen(0, 0);
+	assert(cblk->stream);
+	cblk->mqenc = jpc_mqenc_create(JPC_NUMCTXS, cblk->stream);
+	assert(cblk->mqenc);
+	jpc_mqenc_setctxs(cblk->mqenc, JPC_NUMCTXS, jpc_mqctxs);
+
+	cblk->numpasses = (cblk->numbps > 0) ? (3 * cblk->numbps - 2) : 0;
+	if (cblk->numpasses > 0) {
+		cblk->passes = jas_malloc(cblk->numpasses * sizeof(jpc_enc_pass_t));
+		assert(cblk->passes);
+	} else {
+		cblk->passes = 0;
+	}
+	endpasses = &cblk->passes[cblk->numpasses];
+	for (pass = cblk->passes; pass != endpasses; ++pass) {
+		pass->start = 0;
+		pass->end = 0;
+		pass->term = JPC_ISTERMINATED(pass - cblk->passes, 0, cblk->numpasses, (tcmpt->cblksty & JPC_COX_TERMALL) != 0, (tcmpt->cblksty & JPC_COX_LAZY) != 0);
+		pass->type = JPC_SEGTYPE(pass - cblk->passes, 0, (tcmpt->cblksty & JPC_COX_LAZY) != 0);
+		pass->lyrno = -1;
+if (pass == endpasses - 1) {
+assert(pass->term == 1);
+	pass->term = 1;
+}
+	}
+
+	cblk->flags = jas_matrix_create(jas_matrix_numrows(cblk->data) + 2,
+	  jas_matrix_numcols(cblk->data) + 2);
+	assert(cblk->flags);
+
+
+	bitpos = cblk->numbps - 1;
+	pass = cblk->passes;
+	n = cblk->numpasses;
+	while (--n >= 0) {
+
+		if (pass->type == JPC_SEG_MQ) {
+			/* NOP */
+		} else {
+			assert(pass->type == JPC_SEG_RAW);
+			if (!bout) {
+				bout = jpc_bitstream_sopen(cblk->stream, "w");
+				assert(bout);
+			}
+		}
+
+#if 1
+		passtype = (pass - cblk->passes + 2) % 3;
+#else
+		passtype = JPC_PASSTYPE(pass - cblk->passes + 2);
+#endif
+		pass->start = jas_stream_tell(cblk->stream);
+#if 0
+assert(jas_stream_tell(cblk->stream) == jas_stream_getrwcount(cblk->stream));
+#endif
+		assert(bitpos >= 0);
+		vcausal = (tcmpt->cblksty & JPC_COX_VSC) != 0;
+		segsym = (tcmpt->cblksty & JPC_COX_SEGSYM) != 0;
+		if (pass->term) {
+			termmode = ((tcmpt->cblksty & JPC_COX_PTERM) ?
+			  JPC_MQENC_PTERM : JPC_MQENC_DEFTERM) + 1;
+		} else {
+			termmode = 0;
+		}
+		switch (passtype) {
+		case JPC_SIGPASS:
+			ret = (pass->type == JPC_SEG_MQ) ? jpc_encsigpass(cblk->mqenc,
+			  bitpos, band->orient, vcausal, cblk->flags,
+			  cblk->data, termmode, &pass->nmsedec) :
+			  jpc_encrawsigpass(bout, bitpos, vcausal, cblk->flags,
+			  cblk->data, termmode, &pass->nmsedec);
+			break;
+		case JPC_REFPASS:
+			ret = (pass->type == JPC_SEG_MQ) ? jpc_encrefpass(cblk->mqenc,
+			  bitpos, vcausal, cblk->flags, cblk->data, termmode,
+			  &pass->nmsedec) : jpc_encrawrefpass(bout, bitpos,
+			  vcausal, cblk->flags, cblk->data, termmode,
+			  &pass->nmsedec);
+			break;
+		case JPC_CLNPASS:
+			assert(pass->type == JPC_SEG_MQ);
+			ret = jpc_encclnpass(cblk->mqenc, bitpos, band->orient,
+			  vcausal, segsym, cblk->flags, cblk->data, termmode,
+			  &pass->nmsedec);
+			break;
+		default:
+			assert(0);
+			break;
+		}
+
+		if (pass->type == JPC_SEG_MQ) {
+			if (pass->term) {
+				jpc_mqenc_init(cblk->mqenc);
+			}
+			jpc_mqenc_getstate(cblk->mqenc, &pass->mqencstate);
+			pass->end = jas_stream_tell(cblk->stream);
+			if (tcmpt->cblksty & JPC_COX_RESET) {
+				jpc_mqenc_setctxs(cblk->mqenc, JPC_NUMCTXS, jpc_mqctxs);
+			}
+		} else {
+			if (pass->term) {
+				if (jpc_bitstream_pending(bout)) {
+					jpc_bitstream_outalign(bout, 0x2a);
+				}
+				jpc_bitstream_close(bout);
+				bout = 0;
+				pass->end = jas_stream_tell(cblk->stream);
+			} else {
+				pass->end = jas_stream_tell(cblk->stream) +
+				  jpc_bitstream_pending(bout);
+/* NOTE - This will not work.  need to adjust by # of pending output bytes */
+			}
+		}
+#if 0
+/* XXX - This assertion fails sometimes when various coding modes are used.
+This seems to be harmless, but why does it happen at all? */
+assert(jas_stream_tell(cblk->stream) == jas_stream_getrwcount(cblk->stream));
+#endif
+
+		pass->wmsedec = jpc_fixtodbl(band->rlvl->tcmpt->synweight) *
+		  jpc_fixtodbl(band->rlvl->tcmpt->synweight) *
+		  jpc_fixtodbl(band->synweight) *
+		  jpc_fixtodbl(band->synweight) *
+		  jpc_fixtodbl(band->absstepsize) * jpc_fixtodbl(band->absstepsize) *
+		  ((double) (1 << bitpos)) * ((double)(1 << bitpos)) *
+		  jpc_fixtodbl(pass->nmsedec);
+		pass->cumwmsedec = pass->wmsedec;
+		if (pass != cblk->passes) {
+			pass->cumwmsedec += pass[-1].cumwmsedec;
+		}
+		if (passtype == JPC_CLNPASS) {
+			--bitpos;
+		}
+		++pass;
+	}
+
+#if 0
+dump_passes(cblk->passes, cblk->numpasses, cblk);
+#endif
+
+	n = 0;
+	endpasses = &cblk->passes[cblk->numpasses];
+	for (pass = cblk->passes; pass != endpasses; ++pass) {
+		if (pass->start < n) {
+			pass->start = n;
+		}
+		if (pass->end < n) {
+			pass->end = n;
+		}
+		if (!pass->term) {
+			termpass = pass;
+			while (termpass - pass < cblk->numpasses &&
+			  !termpass->term) {
+				++termpass;
+			}
+			if (pass->type == JPC_SEG_MQ) {
+				t = (pass->mqencstate.lastbyte == 0xff) ? 1 : 0;
+				if (pass->mqencstate.ctreg >= 5) {
+					adjust = 4 + t;
+				} else {
+					adjust = 5 + t;
+				}
+				pass->end += adjust;
+			}
+			if (pass->end > termpass->end) {
+				pass->end = termpass->end;
+			}
+			if ((c = getthebyte(cblk->stream, pass->end - 1)) == EOF) {
+				abort();
+			}
+			if (c == 0xff) {
+				++pass->end;
+			}
+			n = JAS_MAX(n, pass->end);
+		} else {
+			n = JAS_MAX(n, pass->end);
+		}
+	}
+
+#if 0
+dump_passes(cblk->passes, cblk->numpasses, cblk);
+#endif
+
+	if (bout) {
+		jpc_bitstream_close(bout);
+	}
+
+	return 0;
+}
+
+/******************************************************************************\
+* Code for significance pass.
+\******************************************************************************/
+
+#define	sigpass_step(fp, frowstep, dp, bitpos, one, nmsedec, orient, mqenc, vcausalflag) \
+{ \
+	int f; \
+	int v; \
+	f = *(fp); \
+	if ((f & JPC_OTHSIGMSK) && !(f & (JPC_SIG | JPC_VISIT))) { \
+		v = (abs(*(dp)) & (one)) ? 1 : 0; \
+		jpc_mqenc_setcurctx(mqenc, JPC_GETZCCTXNO(f, (orient))); \
+		jpc_mqenc_putbit(mqenc, v); \
+		if (v) { \
+			*(nmsedec) += JPC_GETSIGNMSEDEC(abs(*(dp)), (bitpos) + JPC_NUMEXTRABITS); \
+			v = ((*(dp) < 0) ? 1 : 0); \
+			jpc_mqenc_setcurctx(mqenc, JPC_GETSCCTXNO(f)); \
+			jpc_mqenc_putbit(mqenc, v ^ JPC_GETSPB(f)); \
+			JPC_UPDATEFLAGS4(fp, frowstep, v, vcausalflag); \
+			*(fp) |= JPC_SIG; \
+		} \
+		*(fp) |= JPC_VISIT; \
+	} \
+}
+
+static int jpc_encsigpass(jpc_mqenc_t *mqenc, int bitpos, int orient, int vcausalflag,
+  jas_matrix_t *flags, jas_matrix_t *data, int term, long *nmsedec)
+{
+	int i;
+	int j;
+	int one;
+	int vscanlen;
+	int width;
+	int height;
+	int frowstep;
+	int drowstep;
+	int fstripestep;
+	int dstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *fp;
+	jpc_fix_t *dp;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dvscanstart;
+	int k;
+
+	*nmsedec = 0;
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << (bitpos + JPC_NUMEXTRABITS);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			sigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, orient, mqenc, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			sigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, orient, mqenc, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			sigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, orient, mqenc, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			sigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, orient, mqenc, 0);
+
+		}
+	}
+
+	if (term) {
+		jpc_mqenc_flush(mqenc, term - 1);
+	}
+
+	return jpc_mqenc_error(mqenc) ? (-1) : 0;
+}
+
+#define	rawsigpass_step(fp, frowstep, dp, bitpos, one, nmsedec, out, vcausalflag) \
+{ \
+	jpc_fix_t f = *(fp); \
+	jpc_fix_t v; \
+	if ((f & JPC_OTHSIGMSK) && !(f & (JPC_SIG | JPC_VISIT))) { \
+		v = (abs(*(dp)) & (one)) ? 1 : 0; \
+		if ((jpc_bitstream_putbit((out), v)) == EOF) { \
+			return -1; \
+		} \
+		if (v) { \
+			*(nmsedec) += JPC_GETSIGNMSEDEC(abs(*(dp)), (bitpos) + JPC_NUMEXTRABITS); \
+			v = ((*(dp) < 0) ? 1 : 0); \
+			if (jpc_bitstream_putbit(out, v) == EOF) { \
+				return -1; \
+			} \
+			JPC_UPDATEFLAGS4(fp, frowstep, v, vcausalflag); \
+			*(fp) |= JPC_SIG; \
+		} \
+		*(fp) |= JPC_VISIT; \
+	} \
+}
+
+static int jpc_encrawsigpass(jpc_bitstream_t *out, int bitpos, int vcausalflag, jas_matrix_t *flags,
+  jas_matrix_t *data, int term, long *nmsedec)
+{
+	int i;
+	int j;
+	int k;
+	int one;
+	int vscanlen;
+	int width;
+	int height;
+	int frowstep;
+	int drowstep;
+	int fstripestep;
+	int dstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *fp;
+	jpc_fix_t *dp;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dvscanstart;
+
+	*nmsedec = 0;
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << (bitpos + JPC_NUMEXTRABITS);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			rawsigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, out, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			rawsigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, out, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			rawsigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, out, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+			rawsigpass_step(fp, frowstep, dp, bitpos, one,
+			  nmsedec, out, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+
+		}
+	}
+
+	if (term) {
+		jpc_bitstream_outalign(out, 0x2a);
+	}
+
+	return 0;
+}
+
+/******************************************************************************\
+* Code for refinement pass.
+\******************************************************************************/
+
+#define	refpass_step(fp, dp, bitpos, one, nmsedec, mqenc, vcausalflag) \
+{ \
+	int v; \
+	if (((*(fp)) & (JPC_SIG | JPC_VISIT)) == JPC_SIG) { \
+		(d) = *(dp); \
+		*(nmsedec) += JPC_GETREFNMSEDEC(abs(d), (bitpos) + JPC_NUMEXTRABITS); \
+		jpc_mqenc_setcurctx((mqenc), JPC_GETMAGCTXNO(*(fp))); \
+		v = (abs(d) & (one)) ? 1 : 0; \
+		jpc_mqenc_putbit((mqenc), v); \
+		*(fp) |= JPC_REFINE; \
+	} \
+}
+
+static int jpc_encrefpass(jpc_mqenc_t *mqenc, int bitpos, int vcausalflag, jas_matrix_t *flags, jas_matrix_t *data,
+  int term, long *nmsedec)
+{
+	int i;
+	int j;
+	int one;
+	int vscanlen;
+	int d;
+	int width;
+	int height;
+	int frowstep;
+	int drowstep;
+	int fstripestep;
+	int dstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dvscanstart;
+	jpc_fix_t *dp;
+	jpc_fix_t *fp;
+int k;
+
+	*nmsedec = 0;
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << (bitpos + JPC_NUMEXTRABITS);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			refpass_step(fp, dp, bitpos, one, nmsedec,
+			  mqenc, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			refpass_step(fp, dp, bitpos, one, nmsedec,
+			  mqenc, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			refpass_step(fp, dp, bitpos, one, nmsedec,
+			  mqenc, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			refpass_step(fp, dp, bitpos, one, nmsedec,
+			  mqenc, 0);
+
+		}
+	}
+
+	if (term) {
+		jpc_mqenc_flush(mqenc, term - 1);
+	}
+
+	return jpc_mqenc_error(mqenc) ? (-1) : 0;
+}
+
+#define	rawrefpass_step(fp, dp, bitpos, one, nmsedec, out, vcausalflag) \
+{ \
+	jpc_fix_t d; \
+	jpc_fix_t v; \
+	if (((*(fp)) & (JPC_SIG | JPC_VISIT)) == JPC_SIG) { \
+		d = *(dp); \
+		*(nmsedec) += JPC_GETREFNMSEDEC(abs(d), (bitpos) + JPC_NUMEXTRABITS); \
+		v = (abs(d) & (one)) ? 1 : 0; \
+		if (jpc_bitstream_putbit((out), v) == EOF) { \
+			return -1; \
+		} \
+		*(fp) |= JPC_REFINE; \
+	} \
+}
+
+static int jpc_encrawrefpass(jpc_bitstream_t *out, int bitpos, int vcausalflag, jas_matrix_t *flags,
+  jas_matrix_t *data, int term, long *nmsedec)
+{
+	int i;
+	int j;
+	int k;
+	int one;
+	int vscanlen;
+	int width;
+	int height;
+	int frowstep;
+	int drowstep;
+	int fstripestep;
+	int dstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dvscanstart;
+	jpc_fix_t *dp;
+	jpc_fix_t *fp;
+
+	*nmsedec = 0;
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << (bitpos + JPC_NUMEXTRABITS);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+			fp = fvscanstart;
+			dp = dvscanstart;
+			k = vscanlen;
+
+			rawrefpass_step(fp, dp, bitpos, one, nmsedec,
+			  out, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			rawrefpass_step(fp, dp, bitpos, one, nmsedec,
+			  out, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			rawrefpass_step(fp, dp, bitpos, one, nmsedec,
+			  out, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			rawrefpass_step(fp, dp, bitpos, one, nmsedec,
+			  out, vcausalflag);
+
+		}
+	}
+
+	if (term) {
+		jpc_bitstream_outalign(out, 0x2a);
+	}
+
+	return 0;
+}
+
+/******************************************************************************\
+* Code for cleanup pass.
+\******************************************************************************/
+
+#define	clnpass_step(fp, frowstep, dp, bitpos, one, orient, nmsedec, mqenc, label1, label2, vcausalflag) \
+{ \
+	int f; \
+	int v; \
+label1 \
+	f = *(fp); \
+	if (!(f & (JPC_SIG | JPC_VISIT))) { \
+		jpc_mqenc_setcurctx(mqenc, JPC_GETZCCTXNO(f, (orient))); \
+		v = (abs(*(dp)) & (one)) ? 1 : 0; \
+		jpc_mqenc_putbit((mqenc), v); \
+		if (v) { \
+label2 \
+			f = *(fp); \
+			/* Coefficient is significant. */ \
+			*(nmsedec) += JPC_GETSIGNMSEDEC(abs(*(dp)), (bitpos) + JPC_NUMEXTRABITS); \
+			jpc_mqenc_setcurctx((mqenc), JPC_GETSCCTXNO(f)); \
+			v = ((*(dp) < 0) ? 1 : 0); \
+			jpc_mqenc_putbit((mqenc), v ^ JPC_GETSPB(f)); \
+			JPC_UPDATEFLAGS4((fp), (frowstep), v, vcausalflag); \
+			*(fp) |= JPC_SIG; \
+		} \
+	} \
+	*(fp) &= ~JPC_VISIT; \
+}
+
+static int jpc_encclnpass(jpc_mqenc_t *mqenc, int bitpos, int orient, int vcausalflag, int segsymflag, jas_matrix_t *flags,
+  jas_matrix_t *data, int term, long *nmsedec)
+{
+	int i;
+	int j;
+	int k;
+	int vscanlen;
+	int v;
+	int runlen;
+	jpc_fix_t *fp;
+	int width;
+	int height;
+	jpc_fix_t *dp;
+	int one;
+	int frowstep;
+	int drowstep;
+	int fstripestep;
+	int dstripestep;
+	jpc_fix_t *fstripestart;
+	jpc_fix_t *dstripestart;
+	jpc_fix_t *fvscanstart;
+	jpc_fix_t *dvscanstart;
+
+	*nmsedec = 0;
+	width = jas_matrix_numcols(data);
+	height = jas_matrix_numrows(data);
+	frowstep = jas_matrix_rowstep(flags);
+	drowstep = jas_matrix_rowstep(data);
+	fstripestep = frowstep << 2;
+	dstripestep = drowstep << 2;
+
+	one = 1 << (bitpos + JPC_NUMEXTRABITS);
+
+	fstripestart = jas_matrix_getref(flags, 1, 1);
+	dstripestart = jas_matrix_getref(data, 0, 0);
+	for (i = height; i > 0; i -= 4, fstripestart += fstripestep,
+	  dstripestart += dstripestep) {
+		fvscanstart = fstripestart;
+		dvscanstart = dstripestart;
+		vscanlen = JAS_MIN(i, 4);
+		for (j = width; j > 0; --j, ++fvscanstart, ++dvscanstart) {
+
+			fp = fvscanstart;
+			if (vscanlen >= 4 && !((*fp) & (JPC_SIG | JPC_VISIT |
+			  JPC_OTHSIGMSK)) && (fp += frowstep, !((*fp) & (JPC_SIG |
+			  JPC_VISIT | JPC_OTHSIGMSK))) && (fp += frowstep, !((*fp) &
+			  (JPC_SIG | JPC_VISIT | JPC_OTHSIGMSK))) && (fp += frowstep,
+			  !((*fp) & (JPC_SIG | JPC_VISIT | JPC_OTHSIGMSK)))) {
+				dp = dvscanstart;
+				for (k = 0; k < vscanlen; ++k) {
+					v = (abs(*dp) & one) ? 1 : 0;
+					if (v) {
+						break;
+					}
+					dp += drowstep;
+				}
+				runlen = k;
+				if (runlen >= 4) {
+					jpc_mqenc_setcurctx(mqenc, JPC_AGGCTXNO);
+					jpc_mqenc_putbit(mqenc, 0);
+					continue;
+				}
+				jpc_mqenc_setcurctx(mqenc, JPC_AGGCTXNO);
+				jpc_mqenc_putbit(mqenc, 1);
+				jpc_mqenc_setcurctx(mqenc, JPC_UCTXNO);
+				jpc_mqenc_putbit(mqenc, runlen >> 1);
+				jpc_mqenc_putbit(mqenc, runlen & 1);
+				fp = fvscanstart + frowstep * runlen;
+				dp = dvscanstart + drowstep * runlen;
+				k = vscanlen - runlen;
+				switch (runlen) {
+				case 0:
+					goto clnpass_partial0;
+					break;
+				case 1:
+					goto clnpass_partial1;
+					break;
+				case 2:
+					goto clnpass_partial2;
+					break;
+				case 3:
+					goto clnpass_partial3;
+					break;
+				}
+			} else {
+				runlen = 0;
+				fp = fvscanstart;
+				dp = dvscanstart;
+				k = vscanlen;
+				goto clnpass_full0;
+			}
+			clnpass_step(fp, frowstep, dp, bitpos, one,
+			  orient, nmsedec, mqenc, clnpass_full0:, clnpass_partial0:, vcausalflag);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			clnpass_step(fp, frowstep, dp, bitpos, one,
+				orient, nmsedec, mqenc, ;, clnpass_partial1:, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			clnpass_step(fp, frowstep, dp, bitpos, one,
+				orient, nmsedec, mqenc, ;, clnpass_partial2:, 0);
+			if (--k <= 0) {
+				continue;
+			}
+			fp += frowstep;
+			dp += drowstep;
+			clnpass_step(fp, frowstep, dp, bitpos, one,
+				orient, nmsedec, mqenc, ;, clnpass_partial3:, 0);
+		}
+	}
+
+	if (segsymflag) {
+		jpc_mqenc_setcurctx(mqenc, JPC_UCTXNO);
+		jpc_mqenc_putbit(mqenc, 1);
+		jpc_mqenc_putbit(mqenc, 0);
+		jpc_mqenc_putbit(mqenc, 1);
+		jpc_mqenc_putbit(mqenc, 0);
+	}
+
+	if (term) {
+		jpc_mqenc_flush(mqenc, term - 1);
+	}
+
+	return jpc_mqenc_error(mqenc) ? (-1) : 0;
+}
diff --git a/src/libjasper/jpc/jpc_t1enc.h b/src/libjasper/jpc/jpc_t1enc.h
new file mode 100644
index 0000000..5cdf6fa
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t1enc.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 1 Encoder
+ *
+ * $Id: jpc_t1enc.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_T1ENC_H
+#define JPC_T1ENC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_seq.h"
+
+#include "jpc_enc.h"
+#include "jpc_t1cod.h"
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Encode all of the code blocks. */
+int jpc_enc_enccblks(jpc_enc_t *enc);
+
+/* Encode a single code block. */
+int jpc_enc_enccblk(jpc_enc_t *enc, jas_stream_t *out, jpc_enc_tcmpt_t *comp,
+  jpc_enc_band_t *band, jpc_enc_cblk_t *cblk);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_t2cod.c b/src/libjasper/jpc/jpc_t2cod.c
new file mode 100644
index 0000000..a186d35
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t2cod.c
@@ -0,0 +1,684 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier-2 Coding Library
+ *
+ * $Id: jpc_t2cod.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#include "jasper/jas_math.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+
+#include "jpc_cs.h"
+#include "jpc_t2cod.h"
+#include "jpc_math.h"
+
+static int jpc_pi_nextlrcp(jpc_pi_t *pi);
+static int jpc_pi_nextrlcp(jpc_pi_t *pi);
+static int jpc_pi_nextrpcl(jpc_pi_t *pi);
+static int jpc_pi_nextpcrl(jpc_pi_t *pi);
+static int jpc_pi_nextcprl(jpc_pi_t *pi);
+
+int jpc_pi_next(jpc_pi_t *pi)
+{
+	jpc_pchg_t *pchg;
+	int ret;
+
+
+	for (;;) {
+
+		pi->valid = false;
+
+		if (!pi->pchg) {
+			++pi->pchgno;
+			pi->compno = 0;
+			pi->rlvlno = 0;
+			pi->prcno = 0;
+			pi->lyrno = 0;
+			pi->prgvolfirst = true;
+			if (pi->pchgno < jpc_pchglist_numpchgs(pi->pchglist)) {
+				pi->pchg = jpc_pchglist_get(pi->pchglist, pi->pchgno);
+			} else if (pi->pchgno == jpc_pchglist_numpchgs(pi->pchglist)) {
+				pi->pchg = &pi->defaultpchg;
+			} else {
+				return 1;
+			}
+		}
+
+		pchg = pi->pchg;
+		switch (pchg->prgord) {
+		case JPC_COD_LRCPPRG:
+			ret = jpc_pi_nextlrcp(pi);
+			break;
+		case JPC_COD_RLCPPRG:
+			ret = jpc_pi_nextrlcp(pi);
+			break;
+		case JPC_COD_RPCLPRG:
+			ret = jpc_pi_nextrpcl(pi);
+			break;
+		case JPC_COD_PCRLPRG:
+			ret = jpc_pi_nextpcrl(pi);
+			break;
+		case JPC_COD_CPRLPRG:
+			ret = jpc_pi_nextcprl(pi);
+			break;
+		default:
+			ret = -1;
+			break;
+		}
+		if (!ret) {
+			pi->valid = true;
+			++pi->pktno;
+			return 0;
+		}
+		pi->pchg = 0;
+	}
+}
+
+static int jpc_pi_nextlrcp(register jpc_pi_t *pi)
+{
+	jpc_pchg_t *pchg;
+	int *prclyrno;
+
+	pchg = pi->pchg;
+	if (!pi->prgvolfirst) {
+		prclyrno = &pi->pirlvl->prclyrnos[pi->prcno];
+		goto skip;
+	} else {
+		pi->prgvolfirst = false;
+	}
+
+	for (pi->lyrno = 0; pi->lyrno < pi->numlyrs && pi->lyrno <
+	  JAS_CAST(int, pchg->lyrnoend); ++pi->lyrno) {
+		for (pi->rlvlno = pchg->rlvlnostart; pi->rlvlno < pi->maxrlvls &&
+		  pi->rlvlno < pchg->rlvlnoend; ++pi->rlvlno) {
+			for (pi->compno = pchg->compnostart, pi->picomp =
+			  &pi->picomps[pi->compno]; pi->compno < pi->numcomps
+			  && pi->compno < JAS_CAST(int, pchg->compnoend); ++pi->compno,
+			  ++pi->picomp) {
+				if (pi->rlvlno >= pi->picomp->numrlvls) {
+					continue;
+				}
+				pi->pirlvl = &pi->picomp->pirlvls[pi->rlvlno];
+				for (pi->prcno = 0, prclyrno =
+				  pi->pirlvl->prclyrnos; pi->prcno <
+				  pi->pirlvl->numprcs; ++pi->prcno,
+				  ++prclyrno) {
+					if (pi->lyrno >= *prclyrno) {
+						*prclyrno = pi->lyrno;
+						++(*prclyrno);
+						return 0;
+					}
+skip:
+					;
+				}
+			}
+		}
+	}
+	return 1;
+}
+
+static int jpc_pi_nextrlcp(register jpc_pi_t *pi)
+{
+	jpc_pchg_t *pchg;
+	int *prclyrno;
+
+	pchg = pi->pchg;
+	if (!pi->prgvolfirst) {
+		assert(pi->prcno < pi->pirlvl->numprcs);
+		prclyrno = &pi->pirlvl->prclyrnos[pi->prcno];
+		goto skip;
+	} else {
+		pi->prgvolfirst = 0;
+	}
+
+	for (pi->rlvlno = pchg->rlvlnostart; pi->rlvlno < pi->maxrlvls &&
+	  pi->rlvlno < pchg->rlvlnoend; ++pi->rlvlno) {
+		for (pi->lyrno = 0; pi->lyrno < pi->numlyrs && pi->lyrno <
+		  JAS_CAST(int, pchg->lyrnoend); ++pi->lyrno) {
+			for (pi->compno = pchg->compnostart, pi->picomp =
+			  &pi->picomps[pi->compno]; pi->compno < pi->numcomps &&
+			  pi->compno < JAS_CAST(int, pchg->compnoend); ++pi->compno, ++pi->picomp) {
+				if (pi->rlvlno >= pi->picomp->numrlvls) {
+					continue;
+				}
+				pi->pirlvl = &pi->picomp->pirlvls[pi->rlvlno];
+				for (pi->prcno = 0, prclyrno = pi->pirlvl->prclyrnos;
+				  pi->prcno < pi->pirlvl->numprcs; ++pi->prcno, ++prclyrno) {
+					if (pi->lyrno >= *prclyrno) {
+						*prclyrno = pi->lyrno;
+						++(*prclyrno);
+						return 0;
+					}
+skip:
+					;
+				}
+			}
+		}
+	}
+	return 1;
+}
+
+static int jpc_pi_nextrpcl(register jpc_pi_t *pi)
+{
+	int rlvlno;
+	jpc_pirlvl_t *pirlvl;
+	jpc_pchg_t *pchg;
+	int prchind;
+	int prcvind;
+	int *prclyrno;
+	int compno;
+	jpc_picomp_t *picomp;
+	int xstep;
+	int ystep;
+	uint_fast32_t r;
+	uint_fast32_t rpx;
+	uint_fast32_t rpy;
+	uint_fast32_t trx0;
+	uint_fast32_t try0;
+
+	pchg = pi->pchg;
+	if (!pi->prgvolfirst) {
+		goto skip;
+	} else {
+		pi->xstep = 0;
+		pi->ystep = 0;
+		for (compno = 0, picomp = pi->picomps; compno < pi->numcomps;
+		  ++compno, ++picomp) {
+			for (rlvlno = 0, pirlvl = picomp->pirlvls; rlvlno <
+			  picomp->numrlvls; ++rlvlno, ++pirlvl) {
+				xstep = picomp->hsamp * (1 << (pirlvl->prcwidthexpn +
+				  picomp->numrlvls - rlvlno - 1));
+				ystep = picomp->vsamp * (1 << (pirlvl->prcheightexpn +
+				  picomp->numrlvls - rlvlno - 1));
+				pi->xstep = (!pi->xstep) ? xstep : JAS_MIN(pi->xstep, xstep);
+				pi->ystep = (!pi->ystep) ? ystep : JAS_MIN(pi->ystep, ystep);
+			}
+		}
+		pi->prgvolfirst = 0;
+	}
+
+	for (pi->rlvlno = pchg->rlvlnostart; pi->rlvlno < pchg->rlvlnoend &&
+	  pi->rlvlno < pi->maxrlvls; ++pi->rlvlno) {
+		for (pi->y = pi->ystart; pi->y < pi->yend; pi->y +=
+		  pi->ystep - (pi->y % pi->ystep)) {
+			for (pi->x = pi->xstart; pi->x < pi->xend; pi->x +=
+			  pi->xstep - (pi->x % pi->xstep)) {
+				for (pi->compno = pchg->compnostart,
+				  pi->picomp = &pi->picomps[pi->compno];
+				  pi->compno < JAS_CAST(int, pchg->compnoend) && pi->compno <
+				  pi->numcomps; ++pi->compno, ++pi->picomp) {
+					if (pi->rlvlno >= pi->picomp->numrlvls) {
+						continue;
+					}
+					pi->pirlvl = &pi->picomp->pirlvls[pi->rlvlno];
+					if (pi->pirlvl->numprcs == 0) {
+						continue;
+					}
+					r = pi->picomp->numrlvls - 1 - pi->rlvlno;
+					rpx = r + pi->pirlvl->prcwidthexpn;
+					rpy = r + pi->pirlvl->prcheightexpn;
+					trx0 = JPC_CEILDIV(pi->xstart, pi->picomp->hsamp << r);
+					try0 = JPC_CEILDIV(pi->ystart, pi->picomp->vsamp << r);
+					if (((pi->x == pi->xstart && ((trx0 << r) % (1 << rpx)))
+					  || !(pi->x % (1 << rpx))) &&
+					  ((pi->y == pi->ystart && ((try0 << r) % (1 << rpy)))
+					  || !(pi->y % (1 << rpy)))) {
+						prchind = JPC_FLOORDIVPOW2(JPC_CEILDIV(pi->x, pi->picomp->hsamp
+						  << r), pi->pirlvl->prcwidthexpn) - JPC_FLOORDIVPOW2(trx0,
+						  pi->pirlvl->prcwidthexpn);
+						prcvind = JPC_FLOORDIVPOW2(JPC_CEILDIV(pi->y, pi->picomp->vsamp
+						  << r), pi->pirlvl->prcheightexpn) - JPC_FLOORDIVPOW2(try0,
+						  pi->pirlvl->prcheightexpn);
+						pi->prcno = prcvind * pi->pirlvl->numhprcs + prchind;
+
+						assert(pi->prcno < pi->pirlvl->numprcs);
+						for (pi->lyrno = 0; pi->lyrno <
+						  pi->numlyrs && pi->lyrno < JAS_CAST(int, pchg->lyrnoend); ++pi->lyrno) {
+							prclyrno = &pi->pirlvl->prclyrnos[pi->prcno];
+							if (pi->lyrno >= *prclyrno) {
+								++(*prclyrno);
+								return 0;
+							}
+skip:
+							;
+						}
+					}
+				}
+			}
+		}
+	}
+	return 1;
+}
+
+static int jpc_pi_nextpcrl(register jpc_pi_t *pi)
+{
+	int rlvlno;
+	jpc_pirlvl_t *pirlvl;
+	jpc_pchg_t *pchg;
+	int prchind;
+	int prcvind;
+	int *prclyrno;
+	int compno;
+	jpc_picomp_t *picomp;
+	int xstep;
+	int ystep;
+	uint_fast32_t trx0;
+	uint_fast32_t try0;
+	uint_fast32_t r;
+	uint_fast32_t rpx;
+	uint_fast32_t rpy;
+
+	pchg = pi->pchg;
+	if (!pi->prgvolfirst) {
+		goto skip;
+	} else {
+		pi->xstep = 0;
+		pi->ystep = 0;
+		for (compno = 0, picomp = pi->picomps; compno < pi->numcomps;
+		  ++compno, ++picomp) {
+			for (rlvlno = 0, pirlvl = picomp->pirlvls; rlvlno <
+			  picomp->numrlvls; ++rlvlno, ++pirlvl) {
+				xstep = picomp->hsamp * (1 <<
+				  (pirlvl->prcwidthexpn + picomp->numrlvls -
+				  rlvlno - 1));
+				ystep = picomp->vsamp * (1 <<
+				  (pirlvl->prcheightexpn + picomp->numrlvls -
+				  rlvlno - 1));
+				pi->xstep = (!pi->xstep) ? xstep :
+				  JAS_MIN(pi->xstep, xstep);
+				pi->ystep = (!pi->ystep) ? ystep :
+				  JAS_MIN(pi->ystep, ystep);
+			}
+		}
+		pi->prgvolfirst = 0;
+	}
+
+	for (pi->y = pi->ystart; pi->y < pi->yend; pi->y += pi->ystep -
+	  (pi->y % pi->ystep)) {
+		for (pi->x = pi->xstart; pi->x < pi->xend; pi->x += pi->xstep -
+		  (pi->x % pi->xstep)) {
+			for (pi->compno = pchg->compnostart, pi->picomp =
+			  &pi->picomps[pi->compno]; pi->compno < pi->numcomps
+			  && pi->compno < JAS_CAST(int, pchg->compnoend); ++pi->compno,
+			  ++pi->picomp) {
+				for (pi->rlvlno = pchg->rlvlnostart,
+				  pi->pirlvl = &pi->picomp->pirlvls[pi->rlvlno];
+				  pi->rlvlno < pi->picomp->numrlvls &&
+				  pi->rlvlno < pchg->rlvlnoend; ++pi->rlvlno,
+				  ++pi->pirlvl) {
+					if (pi->pirlvl->numprcs == 0) {
+						continue;
+					}
+					r = pi->picomp->numrlvls - 1 - pi->rlvlno;
+					trx0 = JPC_CEILDIV(pi->xstart, pi->picomp->hsamp << r);
+					try0 = JPC_CEILDIV(pi->ystart, pi->picomp->vsamp << r);
+					rpx = r + pi->pirlvl->prcwidthexpn;
+					rpy = r + pi->pirlvl->prcheightexpn;
+					if (((pi->x == pi->xstart && ((trx0 << r) % (1 << rpx))) ||
+					  !(pi->x % (pi->picomp->hsamp << rpx))) &&
+					  ((pi->y == pi->ystart && ((try0 << r) % (1 << rpy))) ||
+					  !(pi->y % (pi->picomp->vsamp << rpy)))) {
+						prchind = JPC_FLOORDIVPOW2(JPC_CEILDIV(pi->x, pi->picomp->hsamp
+						  << r), pi->pirlvl->prcwidthexpn) - JPC_FLOORDIVPOW2(trx0,
+						  pi->pirlvl->prcwidthexpn);
+						prcvind = JPC_FLOORDIVPOW2(JPC_CEILDIV(pi->y, pi->picomp->vsamp
+						  << r), pi->pirlvl->prcheightexpn) - JPC_FLOORDIVPOW2(try0,
+						  pi->pirlvl->prcheightexpn);
+						pi->prcno = prcvind * pi->pirlvl->numhprcs + prchind;
+						assert(pi->prcno < pi->pirlvl->numprcs);
+						for (pi->lyrno = 0; pi->lyrno < pi->numlyrs &&
+						  pi->lyrno < JAS_CAST(int, pchg->lyrnoend); ++pi->lyrno) {
+							prclyrno = &pi->pirlvl->prclyrnos[pi->prcno];
+							if (pi->lyrno >= *prclyrno) {
+								++(*prclyrno);
+								return 0;
+							}
+skip:
+							;
+						}
+					}
+				}
+			}
+		}
+	}
+	return 1;
+}
+
+static int jpc_pi_nextcprl(register jpc_pi_t *pi)
+{
+	int rlvlno;
+	jpc_pirlvl_t *pirlvl;
+	jpc_pchg_t *pchg;
+	int prchind;
+	int prcvind;
+	int *prclyrno;
+	uint_fast32_t trx0;
+	uint_fast32_t try0;
+	uint_fast32_t r;
+	uint_fast32_t rpx;
+	uint_fast32_t rpy;
+
+	pchg = pi->pchg;
+	if (!pi->prgvolfirst) {
+		goto skip;
+	} else {
+		pi->prgvolfirst = 0;
+	}
+
+	for (pi->compno = pchg->compnostart, pi->picomp =
+	  &pi->picomps[pi->compno]; pi->compno < JAS_CAST(int, pchg->compnoend); ++pi->compno,
+	  ++pi->picomp) {
+		pirlvl = pi->picomp->pirlvls;
+		pi->xstep = pi->picomp->hsamp * (1 << (pirlvl->prcwidthexpn +
+		  pi->picomp->numrlvls - 1));
+		pi->ystep = pi->picomp->vsamp * (1 << (pirlvl->prcheightexpn +
+		  pi->picomp->numrlvls - 1));
+		for (rlvlno = 1, pirlvl = &pi->picomp->pirlvls[1];
+		  rlvlno < pi->picomp->numrlvls; ++rlvlno, ++pirlvl) {
+			pi->xstep = JAS_MIN(pi->xstep, pi->picomp->hsamp * (1 <<
+			  (pirlvl->prcwidthexpn + pi->picomp->numrlvls -
+			  rlvlno - 1)));
+			pi->ystep = JAS_MIN(pi->ystep, pi->picomp->vsamp * (1 <<
+			  (pirlvl->prcheightexpn + pi->picomp->numrlvls -
+			  rlvlno - 1)));
+		}
+		for (pi->y = pi->ystart; pi->y < pi->yend;
+		  pi->y += pi->ystep - (pi->y % pi->ystep)) {
+			for (pi->x = pi->xstart; pi->x < pi->xend;
+			  pi->x += pi->xstep - (pi->x % pi->xstep)) {
+				for (pi->rlvlno = pchg->rlvlnostart,
+				  pi->pirlvl = &pi->picomp->pirlvls[pi->rlvlno];
+				  pi->rlvlno < pi->picomp->numrlvls && pi->rlvlno <
+				  pchg->rlvlnoend; ++pi->rlvlno, ++pi->pirlvl) {
+					if (pi->pirlvl->numprcs == 0) {
+						continue;
+					}
+					r = pi->picomp->numrlvls - 1 - pi->rlvlno;
+					trx0 = JPC_CEILDIV(pi->xstart, pi->picomp->hsamp << r);
+					try0 = JPC_CEILDIV(pi->ystart, pi->picomp->vsamp << r);
+					rpx = r + pi->pirlvl->prcwidthexpn;
+					rpy = r + pi->pirlvl->prcheightexpn;
+					if (((pi->x == pi->xstart && ((trx0 << r) % (1 << rpx))) ||
+					  !(pi->x % (pi->picomp->hsamp << rpx))) &&
+					  ((pi->y == pi->ystart && ((try0 << r) % (1 << rpy))) ||
+					  !(pi->y % (pi->picomp->vsamp << rpy)))) {
+						prchind = JPC_FLOORDIVPOW2(JPC_CEILDIV(pi->x, pi->picomp->hsamp
+						  << r), pi->pirlvl->prcwidthexpn) - JPC_FLOORDIVPOW2(trx0,
+						  pi->pirlvl->prcwidthexpn);
+						prcvind = JPC_FLOORDIVPOW2(JPC_CEILDIV(pi->y, pi->picomp->vsamp
+						  << r), pi->pirlvl->prcheightexpn) - JPC_FLOORDIVPOW2(try0,
+						  pi->pirlvl->prcheightexpn);
+						pi->prcno = prcvind *
+						  pi->pirlvl->numhprcs +
+						  prchind;
+						assert(pi->prcno <
+						  pi->pirlvl->numprcs);
+						for (pi->lyrno = 0; pi->lyrno <
+						  pi->numlyrs && pi->lyrno < JAS_CAST(int, pchg->lyrnoend); ++pi->lyrno) {
+							prclyrno = &pi->pirlvl->prclyrnos[pi->prcno];
+							if (pi->lyrno >= *prclyrno) {
+								++(*prclyrno);
+								return 0;
+							}
+skip:
+							;
+						}
+					}
+				}
+			}
+		}
+	}
+	return 1;
+}
+
+static void pirlvl_destroy(jpc_pirlvl_t *rlvl)
+{
+	if (rlvl->prclyrnos) {
+		jas_free(rlvl->prclyrnos);
+	}
+}
+
+static void jpc_picomp_destroy(jpc_picomp_t *picomp)
+{
+	int rlvlno;
+	jpc_pirlvl_t *pirlvl;
+	if (picomp->pirlvls) {
+		for (rlvlno = 0, pirlvl = picomp->pirlvls; rlvlno <
+		  picomp->numrlvls; ++rlvlno, ++pirlvl) {
+			pirlvl_destroy(pirlvl);
+		}
+		jas_free(picomp->pirlvls);
+	}
+}
+
+void jpc_pi_destroy(jpc_pi_t *pi)
+{
+	jpc_picomp_t *picomp;
+	int compno;
+	if (pi->picomps) {
+		for (compno = 0, picomp = pi->picomps; compno < pi->numcomps;
+		  ++compno, ++picomp) {
+			jpc_picomp_destroy(picomp);
+		}
+		jas_free(pi->picomps);
+	}
+	if (pi->pchglist) {
+		jpc_pchglist_destroy(pi->pchglist);
+	}
+	jas_free(pi);
+}
+
+jpc_pi_t *jpc_pi_create0()
+{
+	jpc_pi_t *pi;
+	if (!(pi = jas_malloc(sizeof(jpc_pi_t)))) {
+		return 0;
+	}
+	pi->picomps = 0;
+	pi->pchgno = 0;
+	if (!(pi->pchglist = jpc_pchglist_create())) {
+		jas_free(pi);
+		return 0;
+	}
+	return pi;
+}
+
+int jpc_pi_addpchg(jpc_pi_t *pi, jpc_pocpchg_t *pchg)
+{
+	return jpc_pchglist_insert(pi->pchglist, -1, pchg);
+}
+
+jpc_pchglist_t *jpc_pchglist_create()
+{
+	jpc_pchglist_t *pchglist;
+	if (!(pchglist = jas_malloc(sizeof(jpc_pchglist_t)))) {
+		return 0;
+	}
+	pchglist->numpchgs = 0;
+	pchglist->maxpchgs = 0;
+	pchglist->pchgs = 0;
+	return pchglist;
+}
+
+int jpc_pchglist_insert(jpc_pchglist_t *pchglist, int pchgno, jpc_pchg_t *pchg)
+{
+	int i;
+	int newmaxpchgs;
+	jpc_pchg_t **newpchgs;
+	if (pchgno < 0) {
+		pchgno = pchglist->numpchgs;
+	}
+	if (pchglist->numpchgs >= pchglist->maxpchgs) {
+		newmaxpchgs = pchglist->maxpchgs + 128;
+		if (!(newpchgs = jas_realloc(pchglist->pchgs, newmaxpchgs * sizeof(jpc_pchg_t *)))) {
+			return -1;
+		}
+		pchglist->maxpchgs = newmaxpchgs;
+		pchglist->pchgs = newpchgs;
+	}
+	for (i = pchglist->numpchgs; i > pchgno; --i) {
+		pchglist->pchgs[i] = pchglist->pchgs[i - 1];
+	}
+	pchglist->pchgs[pchgno] = pchg;
+	++pchglist->numpchgs;
+	return 0;
+}
+
+jpc_pchg_t *jpc_pchglist_remove(jpc_pchglist_t *pchglist, int pchgno)
+{
+	int i;
+	jpc_pchg_t *pchg;
+	assert(pchgno < pchglist->numpchgs);
+	pchg = pchglist->pchgs[pchgno];
+	for (i = pchgno + 1; i < pchglist->numpchgs; ++i) {
+		pchglist->pchgs[i - 1] = pchglist->pchgs[i];
+	}
+	--pchglist->numpchgs;
+	return pchg;
+}
+
+jpc_pchg_t *jpc_pchg_copy(jpc_pchg_t *pchg)
+{
+	jpc_pchg_t *newpchg;
+	if (!(newpchg = jas_malloc(sizeof(jpc_pchg_t)))) {
+		return 0;
+	}
+	*newpchg = *pchg;
+	return newpchg;
+}
+
+jpc_pchglist_t *jpc_pchglist_copy(jpc_pchglist_t *pchglist)
+{
+	jpc_pchglist_t *newpchglist;
+	jpc_pchg_t *newpchg;
+	int pchgno;
+	if (!(newpchglist = jpc_pchglist_create())) {
+		return 0;
+	}
+	for (pchgno = 0; pchgno < pchglist->numpchgs; ++pchgno) {
+		if (!(newpchg = jpc_pchg_copy(pchglist->pchgs[pchgno])) ||
+		  jpc_pchglist_insert(newpchglist, -1, newpchg)) {
+			jpc_pchglist_destroy(newpchglist);
+			return 0;
+		}
+	}
+	return newpchglist;
+}
+
+void jpc_pchglist_destroy(jpc_pchglist_t *pchglist)
+{
+	int pchgno;
+	if (pchglist->pchgs) {
+		for (pchgno = 0; pchgno < pchglist->numpchgs; ++pchgno) {
+			jpc_pchg_destroy(pchglist->pchgs[pchgno]);
+		}
+		jas_free(pchglist->pchgs);
+	}
+	jas_free(pchglist);
+}
+
+void jpc_pchg_destroy(jpc_pchg_t *pchg)
+{
+	jas_free(pchg);
+}
+
+jpc_pchg_t *jpc_pchglist_get(jpc_pchglist_t *pchglist, int pchgno)
+{
+	return pchglist->pchgs[pchgno];
+}
+
+int jpc_pchglist_numpchgs(jpc_pchglist_t *pchglist)
+{
+	return pchglist->numpchgs;
+}
+
+int jpc_pi_init(jpc_pi_t *pi)
+{
+	int compno;
+	int rlvlno;
+	int prcno;
+	jpc_picomp_t *picomp;
+	jpc_pirlvl_t *pirlvl;
+	int *prclyrno;
+
+	pi->prgvolfirst = 0;
+	pi->valid = 0;
+	pi->pktno = -1;
+	pi->pchgno = -1;
+	pi->pchg = 0;
+
+	for (compno = 0, picomp = pi->picomps; compno < pi->numcomps;
+	  ++compno, ++picomp) {
+		for (rlvlno = 0, pirlvl = picomp->pirlvls; rlvlno <
+		  picomp->numrlvls; ++rlvlno, ++pirlvl) {
+			for (prcno = 0, prclyrno = pirlvl->prclyrnos;
+			  prcno < pirlvl->numprcs; ++prcno, ++prclyrno) {
+				*prclyrno = 0;
+			}
+		}
+	}
+	return 0;
+}
diff --git a/src/libjasper/jpc/jpc_t2cod.h b/src/libjasper/jpc/jpc_t2cod.h
new file mode 100644
index 0000000..5eefcc2
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t2cod.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier-2 Coding Library
+ *
+ * $Id: jpc_t2cod.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_T2COD_H
+#define	JPC_T2COD_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jpc_cs.h"
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+/* Progression change list. */
+
+typedef struct {
+
+	/* The number of progression changes. */
+	int numpchgs;
+
+	/* The maximum number of progression changes that can be accomodated
+	  without growing the progression change array. */
+	int maxpchgs;
+
+	/* The progression changes. */
+	jpc_pchg_t **pchgs;
+
+} jpc_pchglist_t;
+
+/* Packet iterator per-resolution-level information. */
+
+typedef struct {
+
+	/* The number of precincts. */
+	int numprcs;
+
+	/* The last layer processed for each precinct. */
+	int *prclyrnos;
+
+	/* The precinct width exponent. */
+	int prcwidthexpn;
+
+	/* The precinct height exponent. */
+	int prcheightexpn;
+
+	/* The number of precincts spanning the resolution level in the horizontal
+	  direction. */
+	int numhprcs;
+
+} jpc_pirlvl_t;
+
+/* Packet iterator per-component information. */
+
+typedef struct {
+
+	/* The number of resolution levels. */
+	int numrlvls;
+
+	/* The per-resolution-level information. */
+	jpc_pirlvl_t *pirlvls;
+
+	/* The horizontal sampling period. */
+	int hsamp;
+
+	/* The vertical sampling period. */
+	int vsamp;
+
+} jpc_picomp_t;
+
+/* Packet iterator class. */
+
+typedef struct {
+
+	/* The number of layers. */
+	int numlyrs;
+
+	/* The number of resolution levels. */
+	int maxrlvls;
+
+	/* The number of components. */
+	int numcomps;
+
+	/* The per-component information. */
+	jpc_picomp_t *picomps;
+
+	/* The current component. */
+	jpc_picomp_t *picomp;
+
+	/* The current resolution level. */
+	jpc_pirlvl_t *pirlvl;
+
+	/* The number of the current component. */
+	int compno;
+
+	/* The number of the current resolution level. */
+	int rlvlno;
+
+	/* The number of the current precinct. */
+	int prcno;
+
+	/* The number of the current layer. */
+	int lyrno;
+
+	/* The x-coordinate of the current position. */
+	int x;
+
+	/* The y-coordinate of the current position. */
+	int y;
+
+	/* The horizontal step size. */
+	int xstep;
+
+	/* The vertical step size. */
+	int ystep;
+
+	/* The x-coordinate of the top-left corner of the tile on the reference
+	  grid. */
+	int xstart;
+
+	/* The y-coordinate of the top-left corner of the tile on the reference
+	  grid. */
+	int ystart;
+
+	/* The x-coordinate of the bottom-right corner of the tile on the
+	  reference grid (plus one). */
+	int xend;
+
+	/* The y-coordinate of the bottom-right corner of the tile on the
+	  reference grid (plus one). */
+	int yend;
+
+	/* The current progression change. */
+	jpc_pchg_t *pchg;
+
+	/* The progression change list. */
+	jpc_pchglist_t *pchglist;
+
+	/* The progression to use in the absense of explicit specification. */
+	jpc_pchg_t defaultpchg;
+
+	/* The current progression change number. */
+	int pchgno;
+
+	/* Is this the first time in the current progression volume? */
+	bool prgvolfirst;
+
+	/* Is the current iterator value valid? */
+	bool valid;
+
+	/* The current packet number. */
+	int pktno;
+
+} jpc_pi_t;
+
+/******************************************************************************\
+* Functions/macros for packet iterators.
+\******************************************************************************/
+
+/* Create a packet iterator. */
+jpc_pi_t *jpc_pi_create0(void);
+
+/* Destroy a packet iterator. */
+void jpc_pi_destroy(jpc_pi_t *pi);
+
+/* Add a progression change to a packet iterator. */
+int jpc_pi_addpchg(jpc_pi_t *pi, jpc_pocpchg_t *pchg);
+
+/* Prepare a packet iterator for iteration. */
+int jpc_pi_init(jpc_pi_t *pi);
+
+/* Set the iterator to the first packet. */
+int jpc_pi_begin(jpc_pi_t *pi);
+
+/* Proceed to the next packet in sequence. */
+int jpc_pi_next(jpc_pi_t *pi);
+
+/* Get the index of the current packet. */
+#define	jpc_pi_getind(pi)	((pi)->pktno)
+
+/* Get the component number of the current packet. */
+#define jpc_pi_cmptno(pi)	(assert(pi->valid), (pi)->compno)
+
+/* Get the resolution level of the current packet. */
+#define jpc_pi_rlvlno(pi)	(assert(pi->valid), (pi)->rlvlno)
+
+/* Get the layer number of the current packet. */
+#define jpc_pi_lyrno(pi)	(assert(pi->valid), (pi)->lyrno)
+
+/* Get the precinct number of the current packet. */
+#define jpc_pi_prcno(pi)	(assert(pi->valid), (pi)->prcno)
+
+/* Get the progression order for the current packet. */
+#define jpc_pi_prg(pi)	(assert(pi->valid), (pi)->pchg->prgord)
+
+/******************************************************************************\
+* Functions/macros for progression change lists.
+\******************************************************************************/
+
+/* Create a progression change list. */
+jpc_pchglist_t *jpc_pchglist_create(void);
+
+/* Destroy a progression change list. */
+void jpc_pchglist_destroy(jpc_pchglist_t *pchglist);
+
+/* Insert a new element into a progression change list. */
+int jpc_pchglist_insert(jpc_pchglist_t *pchglist, int pchgno, jpc_pchg_t *pchg);
+
+/* Remove an element from a progression change list. */
+jpc_pchg_t *jpc_pchglist_remove(jpc_pchglist_t *pchglist, int pchgno);
+
+/* Get an element from a progression change list. */
+jpc_pchg_t *jpc_pchglist_get(jpc_pchglist_t *pchglist, int pchgno);
+
+/* Copy a progression change list. */
+jpc_pchglist_t *jpc_pchglist_copy(jpc_pchglist_t *pchglist);
+
+/* Get the number of elements in a progression change list. */
+int jpc_pchglist_numpchgs(jpc_pchglist_t *pchglist);
+
+/******************************************************************************\
+* Functions/macros for progression changes.
+\******************************************************************************/
+
+/* Destroy a progression change. */
+void jpc_pchg_destroy(jpc_pchg_t *pchg);
+
+/* Copy a progression change. */
+jpc_pchg_t *jpc_pchg_copy(jpc_pchg_t *pchg);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_t2dec.c b/src/libjasper/jpc/jpc_t2dec.c
new file mode 100644
index 0000000..0a79c88
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t2dec.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 2 Decoder
+ *
+ * $Id: jpc_t2dec.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "jasper/jas_types.h"
+#include "jasper/jas_fix.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_stream.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_bs.h"
+#include "jpc_dec.h"
+#include "jpc_cs.h"
+#include "jpc_mqdec.h"
+#include "jpc_t2dec.h"
+#include "jpc_t1cod.h"
+#include "jpc_math.h"
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+long jpc_dec_lookahead(jas_stream_t *in);
+static int jpc_getcommacode(jpc_bitstream_t *in);
+static int jpc_getnumnewpasses(jpc_bitstream_t *in);
+static int jpc_dec_decodepkt(jpc_dec_t *dec, jas_stream_t *pkthdrstream, jas_stream_t *in, int compno, int lvlno,
+  int prcno, int lyrno);
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+static int jpc_getcommacode(jpc_bitstream_t *in)
+{
+	int n;
+	int v;
+
+	n = 0;
+	for (;;) {
+		if ((v = jpc_bitstream_getbit(in)) < 0) {
+			return -1;
+		}
+		if (jpc_bitstream_eof(in)) {
+			return -1;
+		}
+		if (!v) {
+			break;
+		}
+		++n;
+	}
+
+	return n;
+}
+
+static int jpc_getnumnewpasses(jpc_bitstream_t *in)
+{
+	int n;
+
+	if ((n = jpc_bitstream_getbit(in)) > 0) {
+		if ((n = jpc_bitstream_getbit(in)) > 0) {
+			if ((n = jpc_bitstream_getbits(in, 2)) == 3) {
+				if ((n = jpc_bitstream_getbits(in, 5)) == 31) {
+					if ((n = jpc_bitstream_getbits(in, 7)) >= 0) {
+						n += 36 + 1;
+					}
+				} else if (n >= 0) {
+					n += 5 + 1;
+				}
+			} else if (n >= 0) {
+				n += 2 + 1;
+			}
+		} else if (!n) {
+			n += 2;
+		}
+	} else if (!n) {
+		++n;
+	}
+
+	return n;
+}
+
+static int jpc_dec_decodepkt(jpc_dec_t *dec, jas_stream_t *pkthdrstream, jas_stream_t *in, int compno, int rlvlno,
+  int prcno, int lyrno)
+{
+	jpc_bitstream_t *inb;
+	jpc_dec_tcomp_t *tcomp;
+	jpc_dec_rlvl_t *rlvl;
+	jpc_dec_band_t *band;
+	jpc_dec_cblk_t *cblk;
+	int n;
+	int m;
+	int i;
+	jpc_tagtreenode_t *leaf;
+	int included;
+	int ret;
+	int numnewpasses;
+	jpc_dec_seg_t *seg;
+	int len;
+	int present;
+	int savenumnewpasses;
+	int mycounter;
+	jpc_ms_t *ms;
+	jpc_dec_tile_t *tile;
+	jpc_dec_ccp_t *ccp;
+	jpc_dec_cp_t *cp;
+	int bandno;
+	jpc_dec_prc_t *prc;
+	int usedcblkcnt;
+	int cblkno;
+	uint_fast32_t bodylen;
+	bool discard;
+	int passno;
+	int maxpasses;
+	int hdrlen;
+	int hdroffstart;
+	int hdroffend;
+
+	/* Avoid compiler warning about possible use of uninitialized
+	  variable. */
+	bodylen = 0;
+
+	discard = (lyrno >= dec->maxlyrs);
+
+	tile = dec->curtile;
+	cp = tile->cp;
+	ccp = &cp->ccps[compno];
+
+	/*
+	 * Decode the packet header.
+	 */
+
+	/* Decode the SOP marker segment if present. */
+	if (cp->csty & JPC_COD_SOP) {
+		if (jpc_dec_lookahead(in) == JPC_MS_SOP) {
+			if (!(ms = jpc_getms(in, dec->cstate))) {
+				return -1;
+			}
+			if (jpc_ms_gettype(ms) != JPC_MS_SOP) {
+				jpc_ms_destroy(ms);
+				jas_eprintf("missing SOP marker segment\n");
+				return -1;
+			}
+			jpc_ms_destroy(ms);
+		}
+	}
+
+hdroffstart = jas_stream_getrwcount(pkthdrstream);
+
+	if (!(inb = jpc_bitstream_sopen(pkthdrstream, "r"))) {
+		return -1;
+	}
+
+	if ((present = jpc_bitstream_getbit(inb)) < 0) {
+		return 1;
+	}
+	JAS_DBGLOG(10, ("\n", present));
+	JAS_DBGLOG(10, ("present=%d ", present));
+
+	/* Is the packet non-empty? */
+	if (present) {
+		/* The packet is non-empty. */
+		tcomp = &tile->tcomps[compno];
+		rlvl = &tcomp->rlvls[rlvlno];
+		bodylen = 0;
+		for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+		  ++bandno, ++band) {
+			if (!band->data) {
+				continue;
+			}
+			prc = &band->prcs[prcno];
+			if (!prc->cblks) {
+				continue;
+			}
+			usedcblkcnt = 0;
+			for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks;
+			  ++cblkno, ++cblk) {
+				++usedcblkcnt;
+				if (!cblk->numpasses) {
+					leaf = jpc_tagtree_getleaf(prc->incltagtree, usedcblkcnt - 1);
+					if ((included = jpc_tagtree_decode(prc->incltagtree, leaf, lyrno + 1, inb)) < 0) {
+						return -1;
+					}
+				} else {
+					if ((included = jpc_bitstream_getbit(inb)) < 0) {
+						return -1;
+					}
+				}
+				JAS_DBGLOG(10, ("\n"));
+				JAS_DBGLOG(10, ("included=%d ", included));
+				if (!included) {
+					continue;
+				}
+				if (!cblk->numpasses) {
+					i = 1;
+					leaf = jpc_tagtree_getleaf(prc->numimsbstagtree, usedcblkcnt - 1);
+					for (;;) {
+						if ((ret = jpc_tagtree_decode(prc->numimsbstagtree, leaf, i, inb)) < 0) {
+							return -1;
+						}
+						if (ret) {
+							break;
+						}
+						++i;
+					}
+					cblk->numimsbs = i - 1;
+					cblk->firstpassno = cblk->numimsbs * 3;
+				}
+				if ((numnewpasses = jpc_getnumnewpasses(inb)) < 0) {
+					return -1;
+				}
+				JAS_DBGLOG(10, ("numnewpasses=%d ", numnewpasses));
+				seg = cblk->curseg;
+				savenumnewpasses = numnewpasses;
+				mycounter = 0;
+				if (numnewpasses > 0) {
+					if ((m = jpc_getcommacode(inb)) < 0) {
+						return -1;
+					}
+					cblk->numlenbits += m;
+					JAS_DBGLOG(10, ("increment=%d ", m));
+					while (numnewpasses > 0) {
+						passno = cblk->firstpassno + cblk->numpasses + mycounter;
+	/* XXX - the maxpasses is not set precisely but this doesn't matter... */
+						maxpasses = JPC_SEGPASSCNT(passno, cblk->firstpassno, 10000, (ccp->cblkctx & JPC_COX_LAZY) != 0, (ccp->cblkctx & JPC_COX_TERMALL) != 0);
+						if (!discard && !seg) {
+							if (!(seg = jpc_seg_alloc())) {
+								return -1;
+							}
+							jpc_seglist_insert(&cblk->segs, cblk->segs.tail, seg);
+							if (!cblk->curseg) {
+								cblk->curseg = seg;
+							}
+							seg->passno = passno;
+							seg->type = JPC_SEGTYPE(seg->passno, cblk->firstpassno, (ccp->cblkctx & JPC_COX_LAZY) != 0);
+							seg->maxpasses = maxpasses;
+						}
+						n = JAS_MIN(numnewpasses, maxpasses);
+						mycounter += n;
+						numnewpasses -= n;
+						if ((len = jpc_bitstream_getbits(inb, cblk->numlenbits + jpc_floorlog2(n))) < 0) {
+							return -1;
+						}
+						JAS_DBGLOG(10, ("len=%d ", len));
+						if (!discard) {
+							seg->lyrno = lyrno;
+							seg->numpasses += n;
+							seg->cnt = len;
+							seg = seg->next;
+						}
+						bodylen += len;
+					}
+				}
+				cblk->numpasses += savenumnewpasses;
+			}
+		}
+
+		jpc_bitstream_inalign(inb, 0, 0);
+
+	} else {
+		if (jpc_bitstream_inalign(inb, 0x7f, 0)) {
+			jas_eprintf("alignment failed\n");
+			return -1;
+		}
+	}
+	jpc_bitstream_close(inb);
+
+	hdroffend = jas_stream_getrwcount(pkthdrstream);
+	hdrlen = hdroffend - hdroffstart;
+	if (jas_getdbglevel() >= 5) {
+		jas_eprintf("hdrlen=%lu bodylen=%lu \n", (unsigned long) hdrlen,
+		  (unsigned long) bodylen);
+	}
+
+	if (cp->csty & JPC_COD_EPH) {
+		if (jpc_dec_lookahead(pkthdrstream) == JPC_MS_EPH) {
+			if (!(ms = jpc_getms(pkthdrstream, dec->cstate))) {
+				jas_eprintf("cannot get (EPH) marker segment\n");
+				return -1;
+			}
+			if (jpc_ms_gettype(ms) != JPC_MS_EPH) {
+				jpc_ms_destroy(ms);
+				jas_eprintf("missing EPH marker segment\n");
+				return -1;
+			}
+			jpc_ms_destroy(ms);
+		}
+	}
+
+	/* decode the packet body. */
+
+	if (jas_getdbglevel() >= 1) {
+		jas_eprintf("packet body offset=%06ld\n", (long) jas_stream_getrwcount(in));
+	}
+
+	if (!discard) {
+		tcomp = &tile->tcomps[compno];
+		rlvl = &tcomp->rlvls[rlvlno];
+		for (bandno = 0, band = rlvl->bands; bandno < rlvl->numbands;
+		  ++bandno, ++band) {
+			if (!band->data) {
+				continue;
+			}
+			prc = &band->prcs[prcno];
+			if (!prc->cblks) {
+				continue;
+			}
+			for (cblkno = 0, cblk = prc->cblks; cblkno < prc->numcblks;
+			  ++cblkno, ++cblk) {
+				seg = cblk->curseg;
+				while (seg) {
+					if (!seg->stream) {
+						if (!(seg->stream = jas_stream_memopen(0, 0))) {
+							return -1;
+						}
+					}
+#if 0
+jas_eprintf("lyrno=%02d, compno=%02d, lvlno=%02d, prcno=%02d, bandno=%02d, cblkno=%02d, passno=%02d numpasses=%02d cnt=%d numbps=%d, numimsbs=%d\n", lyrno, compno, rlvlno, prcno, band - rlvl->bands, cblk - prc->cblks, seg->passno, seg->numpasses, seg->cnt, band->numbps, cblk->numimsbs);
+#endif
+					if (seg->cnt > 0) {
+						if (jpc_getdata(in, seg->stream, seg->cnt) < 0) {
+							return -1;
+						}
+						seg->cnt = 0;
+					}
+					if (seg->numpasses >= seg->maxpasses) {
+						cblk->curseg = seg->next;
+					}
+					seg = seg->next;
+				}
+			}
+		}
+	} else {
+		if (jas_stream_gobble(in, bodylen) != JAS_CAST(int, bodylen)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+/********************************************************************************************/
+/********************************************************************************************/
+
+int jpc_dec_decodepkts(jpc_dec_t *dec, jas_stream_t *pkthdrstream, jas_stream_t *in)
+{
+	jpc_dec_tile_t *tile;
+	jpc_pi_t *pi;
+	int ret;
+
+	tile = dec->curtile;
+	pi = tile->pi;
+	for (;;) {
+if (!tile->pkthdrstream || jas_stream_peekc(tile->pkthdrstream) == EOF) {
+		switch (jpc_dec_lookahead(in)) {
+		case JPC_MS_EOC:
+		case JPC_MS_SOT:
+			return 0;
+			break;
+		case JPC_MS_SOP:
+		case JPC_MS_EPH:
+		case 0:
+			break;
+		default:
+			return -1;
+			break;
+		}
+}
+		if ((ret = jpc_pi_next(pi))) {
+			return ret;
+		}
+if (dec->maxpkts >= 0 && dec->numpkts >= dec->maxpkts) {
+	jas_eprintf("warning: stopping decode prematurely as requested\n");
+	return 0;
+}
+		if (jas_getdbglevel() >= 1) {
+			jas_eprintf("packet offset=%08ld prg=%d cmptno=%02d "
+			  "rlvlno=%02d prcno=%03d lyrno=%02d\n", (long)
+			  jas_stream_getrwcount(in), jpc_pi_prg(pi), jpc_pi_cmptno(pi),
+			  jpc_pi_rlvlno(pi), jpc_pi_prcno(pi), jpc_pi_lyrno(pi));
+		}
+		if (jpc_dec_decodepkt(dec, pkthdrstream, in, jpc_pi_cmptno(pi), jpc_pi_rlvlno(pi),
+		  jpc_pi_prcno(pi), jpc_pi_lyrno(pi))) {
+			return -1;
+		}
+++dec->numpkts;
+	}
+
+	return 0;
+}
+
+jpc_pi_t *jpc_dec_pi_create(jpc_dec_t *dec, jpc_dec_tile_t *tile)
+{
+	jpc_pi_t *pi;
+	int compno;
+	jpc_picomp_t *picomp;
+	jpc_pirlvl_t *pirlvl;
+	jpc_dec_tcomp_t *tcomp;
+	int rlvlno;
+	jpc_dec_rlvl_t *rlvl;
+	int prcno;
+	int *prclyrno;
+	jpc_dec_cmpt_t *cmpt;
+
+	if (!(pi = jpc_pi_create0())) {
+		return 0;
+	}
+	pi->numcomps = dec->numcomps;
+	if (!(pi->picomps = jas_malloc(pi->numcomps * sizeof(jpc_picomp_t)))) {
+		jpc_pi_destroy(pi);
+		return 0;
+	}
+	for (compno = 0, picomp = pi->picomps; compno < pi->numcomps; ++compno,
+	  ++picomp) {
+		picomp->pirlvls = 0;
+	}
+
+	for (compno = 0, tcomp = tile->tcomps, picomp = pi->picomps;
+	  compno < pi->numcomps; ++compno, ++tcomp, ++picomp) {
+		picomp->numrlvls = tcomp->numrlvls;
+		if (!(picomp->pirlvls = jas_malloc(picomp->numrlvls *
+		  sizeof(jpc_pirlvl_t)))) {
+			jpc_pi_destroy(pi);
+			return 0;
+		}
+		for (rlvlno = 0, pirlvl = picomp->pirlvls; rlvlno <
+		  picomp->numrlvls; ++rlvlno, ++pirlvl) {
+			pirlvl->prclyrnos = 0;
+		}
+		for (rlvlno = 0, pirlvl = picomp->pirlvls, rlvl = tcomp->rlvls;
+		  rlvlno < picomp->numrlvls; ++rlvlno, ++pirlvl, ++rlvl) {
+/* XXX sizeof(long) should be sizeof different type */
+			pirlvl->numprcs = rlvl->numprcs;
+			if (!(pirlvl->prclyrnos = jas_malloc(pirlvl->numprcs *
+			  sizeof(long)))) {
+				jpc_pi_destroy(pi);
+				return 0;
+			}
+		}
+	}
+
+	pi->maxrlvls = 0;
+	for (compno = 0, tcomp = tile->tcomps, picomp = pi->picomps, cmpt =
+	  dec->cmpts; compno < pi->numcomps; ++compno, ++tcomp, ++picomp,
+	  ++cmpt) {
+		picomp->hsamp = cmpt->hstep;
+		picomp->vsamp = cmpt->vstep;
+		for (rlvlno = 0, pirlvl = picomp->pirlvls, rlvl = tcomp->rlvls;
+		  rlvlno < picomp->numrlvls; ++rlvlno, ++pirlvl, ++rlvl) {
+			pirlvl->prcwidthexpn = rlvl->prcwidthexpn;
+			pirlvl->prcheightexpn = rlvl->prcheightexpn;
+			for (prcno = 0, prclyrno = pirlvl->prclyrnos;
+			  prcno < pirlvl->numprcs; ++prcno, ++prclyrno) {
+				*prclyrno = 0;
+			}
+			pirlvl->numhprcs = rlvl->numhprcs;
+		}
+		if (pi->maxrlvls < tcomp->numrlvls) {
+			pi->maxrlvls = tcomp->numrlvls;
+		}
+	}
+
+	pi->numlyrs = tile->cp->numlyrs;
+	pi->xstart = tile->xstart;
+	pi->ystart = tile->ystart;
+	pi->xend = tile->xend;
+	pi->yend = tile->yend;
+
+	pi->picomp = 0;
+	pi->pirlvl = 0;
+	pi->x = 0;
+	pi->y = 0;
+	pi->compno = 0;
+	pi->rlvlno = 0;
+	pi->prcno = 0;
+	pi->lyrno = 0;
+	pi->xstep = 0;
+	pi->ystep = 0;
+
+	pi->pchgno = -1;
+
+	pi->defaultpchg.prgord = tile->cp->prgord;
+	pi->defaultpchg.compnostart = 0;
+	pi->defaultpchg.compnoend = pi->numcomps;
+	pi->defaultpchg.rlvlnostart = 0;
+	pi->defaultpchg.rlvlnoend = pi->maxrlvls;
+	pi->defaultpchg.lyrnoend = pi->numlyrs;
+	pi->pchg = 0;
+
+	pi->valid = 0;
+
+	return pi;
+}
+
+long jpc_dec_lookahead(jas_stream_t *in)
+{
+	uint_fast16_t x;
+	if (jpc_getuint16(in, &x)) {
+		return -1;
+	}
+	if (jas_stream_ungetc(in, x & 0xff) == EOF ||
+	  jas_stream_ungetc(in, x >> 8) == EOF) {
+		return -1;
+	}
+	if (x >= JPC_MS_INMIN && x <= JPC_MS_INMAX) {
+		return x;
+	}
+	return 0;
+}
diff --git a/src/libjasper/jpc/jpc_t2dec.h b/src/libjasper/jpc/jpc_t2dec.h
new file mode 100644
index 0000000..6517252
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t2dec.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 2 Decoder
+ *
+ * $Id: jpc_t2dec.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_T2DEC_H
+#define JPC_T2DEC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_fix.h"
+#include "jasper/jas_stream.h"
+
+#include "jpc_bs.h"
+#include "jpc_dec.h"
+#include "jpc_mqdec.h"
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Decode the packets for a tile-part. */
+int jpc_dec_decodepkts(jpc_dec_t *dec, jas_stream_t *pkthdrstream,
+  jas_stream_t *in);
+
+/* Create a packet iterator for the decoder. */
+jpc_pi_t *jpc_dec_pi_create(jpc_dec_t *dec, jpc_dec_tile_t *tile);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_t2enc.c b/src/libjasper/jpc/jpc_t2enc.c
new file mode 100644
index 0000000..3c807d9
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t2enc.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 2 Encoder
+ *
+ * $Id: jpc_t2enc.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "jasper/jas_fix.h"
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_math.h"
+#include "jasper/jas_debug.h"
+
+#include "jpc_flt.h"
+#include "jpc_t2enc.h"
+#include "jpc_t2cod.h"
+#include "jpc_tagtree.h"
+#include "jpc_enc.h"
+#include "jpc_math.h"
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+static int jpc_putcommacode(jpc_bitstream_t *out, int n)
+{
+	assert(n >= 0);
+
+	while (--n >= 0) {
+		if (jpc_bitstream_putbit(out, 1) == EOF) {
+			return -1;
+		}
+	}
+	if (jpc_bitstream_putbit(out, 0) == EOF) {
+		return -1;
+	}
+	return 0;
+}
+
+static int jpc_putnumnewpasses(jpc_bitstream_t *out, int n)
+{
+	int ret;
+
+	if (n <= 0) {
+		return -1;
+	} else if (n == 1) {
+		ret = jpc_bitstream_putbit(out, 0);
+	} else if (n == 2) {
+		ret = jpc_bitstream_putbits(out, 2, 2);
+	} else if (n <= 5) {
+		ret = jpc_bitstream_putbits(out, 4, 0xc | (n - 3));
+	} else if (n <= 36) {
+		ret = jpc_bitstream_putbits(out, 9, 0x1e0 | (n - 6));
+	} else if (n <= 164) {
+		ret = jpc_bitstream_putbits(out, 16, 0xff80 | (n - 37));
+	} else {
+		/* The standard has no provision for encoding a larger value.
+		In practice, however, it is highly unlikely that this
+		limitation will ever be encountered. */
+		return -1;
+	}
+
+	return (ret != EOF) ? 0 : (-1);
+}
+
+int jpc_enc_encpkts(jpc_enc_t *enc, jas_stream_t *out)
+{
+	jpc_enc_tile_t *tile;
+	jpc_pi_t *pi;
+
+	tile = enc->curtile;
+
+	jpc_init_t2state(enc, 0);
+	pi = tile->pi;
+	jpc_pi_init(pi);
+
+	if (!jpc_pi_next(pi)) {
+		for (;;) {
+			if (jpc_enc_encpkt(enc, out, jpc_pi_cmptno(pi), jpc_pi_rlvlno(pi),
+			  jpc_pi_prcno(pi), jpc_pi_lyrno(pi))) {
+				return -1;
+			}
+			if (jpc_pi_next(pi)) {
+				break;
+			}
+		}
+	}
+	
+	return 0;
+}
+
+int jpc_enc_encpkt(jpc_enc_t *enc, jas_stream_t *out, int compno, int lvlno, int prcno, int lyrno)
+{
+	jpc_enc_tcmpt_t *comp;
+	jpc_enc_rlvl_t *lvl;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_cblk_t *endcblks;
+	jpc_bitstream_t *outb;
+	jpc_enc_pass_t *pass;
+	jpc_enc_pass_t *startpass;
+	jpc_enc_pass_t *lastpass;
+	jpc_enc_pass_t *endpass;
+	jpc_enc_pass_t *endpasses;
+	int i;
+	int included;
+	int ret;
+	jpc_tagtreenode_t *leaf;
+	int n;
+	int t1;
+	int t2;
+	int adjust;
+	int maxadjust;
+	int datalen;
+	int numnewpasses;
+	int passcount;
+	jpc_enc_tile_t *tile;
+	jpc_enc_prc_t *prc;
+	jpc_enc_cp_t *cp;
+	jpc_ms_t *ms;
+
+	tile = enc->curtile;
+	cp = enc->cp;
+
+	if (cp->tcp.csty & JPC_COD_SOP) {
+		if (!(ms = jpc_ms_create(JPC_MS_SOP))) {
+			return -1;
+		}
+		ms->parms.sop.seqno = jpc_pi_getind(tile->pi);
+		if (jpc_putms(out, enc->cstate, ms)) {
+			return -1;
+		}
+		jpc_ms_destroy(ms);
+	}
+
+	outb = jpc_bitstream_sopen(out, "w+");
+	assert(outb);
+
+	if (jpc_bitstream_putbit(outb, 1) == EOF) {
+		return -1;
+	}
+	JAS_DBGLOG(10, ("\n"));
+	JAS_DBGLOG(10, ("present. "));
+
+	comp = &tile->tcmpts[compno];
+	lvl = &comp->rlvls[lvlno];
+	endbands = &lvl->bands[lvl->numbands];
+	for (band = lvl->bands; band != endbands; ++band) {
+		if (!band->data) {
+			continue;
+		}
+		prc = &band->prcs[prcno];
+		if (!prc->cblks) {
+			continue;
+		}
+
+		endcblks = &prc->cblks[prc->numcblks];
+		for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+			if (!lyrno) {
+				leaf = jpc_tagtree_getleaf(prc->nlibtree, cblk - prc->cblks);
+				jpc_tagtree_setvalue(prc->nlibtree, leaf, cblk->numimsbs);
+			}
+			pass = cblk->curpass;
+			included = (pass && pass->lyrno == lyrno);
+			if (included && (!cblk->numencpasses)) {
+				assert(pass->lyrno == lyrno);
+				leaf = jpc_tagtree_getleaf(prc->incltree,
+				  cblk - prc->cblks);
+				jpc_tagtree_setvalue(prc->incltree, leaf, pass->lyrno);
+			}
+		}
+
+		endcblks = &prc->cblks[prc->numcblks];
+		for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+			pass = cblk->curpass;
+			included = (pass && pass->lyrno == lyrno);
+			if (!cblk->numencpasses) {
+				leaf = jpc_tagtree_getleaf(prc->incltree,
+				  cblk - prc->cblks);
+				if (jpc_tagtree_encode(prc->incltree, leaf, lyrno
+				  + 1, outb) < 0) {
+					return -1;
+				}
+			} else {
+				if (jpc_bitstream_putbit(outb, included) == EOF) {
+					return -1;
+				}
+			}
+			JAS_DBGLOG(10, ("included=%d ", included));
+			if (!included) {
+				continue;
+			}
+			if (!cblk->numencpasses) {
+				i = 1;
+				leaf = jpc_tagtree_getleaf(prc->nlibtree, cblk - prc->cblks);
+				for (;;) {
+					if ((ret = jpc_tagtree_encode(prc->nlibtree, leaf, i, outb)) < 0) {
+						return -1;
+					}
+					if (ret) {
+						break;
+					}
+					++i;
+				}
+				assert(leaf->known_ && i == leaf->value_ + 1);
+			}
+
+			endpasses = &cblk->passes[cblk->numpasses];
+			startpass = pass;
+			endpass = startpass;
+			while (endpass != endpasses && endpass->lyrno == lyrno){
+				++endpass;
+			}
+			numnewpasses = endpass - startpass;
+			if (jpc_putnumnewpasses(outb, numnewpasses)) {
+				return -1;
+			}
+			JAS_DBGLOG(10, ("numnewpasses=%d ", numnewpasses));
+
+			lastpass = endpass - 1;
+			n = startpass->start;
+			passcount = 1;
+			maxadjust = 0;
+			for (pass = startpass; pass != endpass; ++pass) {
+				if (pass->term || pass == lastpass) {
+					datalen = pass->end - n;
+					t1 = jpc_firstone(datalen) + 1;
+					t2 = cblk->numlenbits + jpc_floorlog2(passcount);
+					adjust = JAS_MAX(t1 - t2, 0);
+					maxadjust = JAS_MAX(adjust, maxadjust);
+					n += datalen;
+					passcount = 1;
+				} else {
+					++passcount;
+				}
+			}
+			if (jpc_putcommacode(outb, maxadjust)) {
+				return -1;
+			}
+			cblk->numlenbits += maxadjust;
+
+			lastpass = endpass - 1;
+			n = startpass->start;
+			passcount = 1;
+			for (pass = startpass; pass != endpass; ++pass) {
+				if (pass->term || pass == lastpass) {
+					datalen = pass->end - n;
+assert(jpc_firstone(datalen) < cblk->numlenbits + jpc_floorlog2(passcount));
+					if (jpc_bitstream_putbits(outb, cblk->numlenbits + jpc_floorlog2(passcount), datalen) == EOF) {
+						return -1;
+					}
+					n += datalen;
+					passcount = 1;
+				} else {
+					++passcount;
+				}
+			}
+		}
+	}
+
+	jpc_bitstream_outalign(outb, 0);
+	jpc_bitstream_close(outb);
+
+	if (cp->tcp.csty & JPC_COD_EPH) {
+		if (!(ms = jpc_ms_create(JPC_MS_EPH))) {
+			return -1;
+		}
+		jpc_putms(out, enc->cstate, ms);
+		jpc_ms_destroy(ms);
+	}
+
+	comp = &tile->tcmpts[compno];
+	lvl = &comp->rlvls[lvlno];
+	endbands = &lvl->bands[lvl->numbands];
+	for (band = lvl->bands; band != endbands; ++band) {
+		if (!band->data) {
+			continue;
+		}
+		prc = &band->prcs[prcno];
+		if (!prc->cblks) {
+			continue;
+		}
+		endcblks = &prc->cblks[prc->numcblks];
+		for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+			pass = cblk->curpass;
+
+			if (!pass) {
+				continue;
+			}
+			if (pass->lyrno != lyrno) {
+				assert(pass->lyrno < 0 || pass->lyrno > lyrno);
+				continue;
+			}
+
+			endpasses = &cblk->passes[cblk->numpasses];
+			startpass = pass;
+			endpass = startpass;
+			while (endpass != endpasses && endpass->lyrno == lyrno){
+				++endpass;
+			}
+			lastpass = endpass - 1;
+			numnewpasses = endpass - startpass;
+
+			jas_stream_seek(cblk->stream, startpass->start, SEEK_SET);
+			assert(jas_stream_tell(cblk->stream) == startpass->start);
+			if (jas_stream_copy(out, cblk->stream, lastpass->end - startpass->start)) {
+				return -1;
+			}
+			cblk->curpass = (endpass != endpasses) ? endpass : 0;
+			cblk->numencpasses += numnewpasses;
+
+		}
+	}
+
+	return 0;
+}
+
+void jpc_save_t2state(jpc_enc_t *enc)
+{
+/* stream pos in embedded T1 stream may be wrong since not saved/restored! */
+
+	jpc_enc_tcmpt_t *comp;
+	jpc_enc_tcmpt_t *endcomps;
+	jpc_enc_rlvl_t *lvl;
+	jpc_enc_rlvl_t *endlvls;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_cblk_t *endcblks;
+	jpc_enc_tile_t *tile;
+	int prcno;
+	jpc_enc_prc_t *prc;
+
+	tile = enc->curtile;
+
+	endcomps = &tile->tcmpts[tile->numtcmpts];
+	for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+		endlvls = &comp->rlvls[comp->numrlvls];
+		for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+			if (!lvl->bands) {
+				continue;
+			}
+			endbands = &lvl->bands[lvl->numbands];
+			for (band = lvl->bands; band != endbands; ++band) {
+				if (!band->data) {
+					continue;
+				}
+				for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+					if (!prc->cblks) {
+						continue;
+					}
+					jpc_tagtree_copy(prc->savincltree, prc->incltree);
+					jpc_tagtree_copy(prc->savnlibtree, prc->nlibtree);
+					endcblks = &prc->cblks[prc->numcblks];
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						cblk->savedcurpass = cblk->curpass;
+						cblk->savednumencpasses = cblk->numencpasses;
+						cblk->savednumlenbits = cblk->numlenbits;
+					}
+				}
+			}
+		}
+	}
+
+}
+
+void jpc_restore_t2state(jpc_enc_t *enc)
+{
+
+	jpc_enc_tcmpt_t *comp;
+	jpc_enc_tcmpt_t *endcomps;
+	jpc_enc_rlvl_t *lvl;
+	jpc_enc_rlvl_t *endlvls;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_cblk_t *endcblks;
+	jpc_enc_tile_t *tile;
+	int prcno;
+	jpc_enc_prc_t *prc;
+
+	tile = enc->curtile;
+
+	endcomps = &tile->tcmpts[tile->numtcmpts];
+	for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+		endlvls = &comp->rlvls[comp->numrlvls];
+		for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+			if (!lvl->bands) {
+				continue;
+			}
+			endbands = &lvl->bands[lvl->numbands];
+			for (band = lvl->bands; band != endbands; ++band) {
+				if (!band->data) {
+					continue;
+				}
+				for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+					if (!prc->cblks) {
+						continue;
+					}
+					jpc_tagtree_copy(prc->incltree, prc->savincltree);
+					jpc_tagtree_copy(prc->nlibtree, prc->savnlibtree);
+					endcblks = &prc->cblks[prc->numcblks];
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						cblk->curpass = cblk->savedcurpass;
+						cblk->numencpasses = cblk->savednumencpasses;
+						cblk->numlenbits = cblk->savednumlenbits;
+					}
+				}
+			}
+		}
+	}
+}
+
+void jpc_init_t2state(jpc_enc_t *enc, int raflag)
+{
+/* It is assumed that band->numbps and cblk->numbps precomputed */
+
+	jpc_enc_tcmpt_t *comp;
+	jpc_enc_tcmpt_t *endcomps;
+	jpc_enc_rlvl_t *lvl;
+	jpc_enc_rlvl_t *endlvls;
+	jpc_enc_band_t *band;
+	jpc_enc_band_t *endbands;
+	jpc_enc_cblk_t *cblk;
+	jpc_enc_cblk_t *endcblks;
+	jpc_enc_pass_t *pass;
+	jpc_enc_pass_t *endpasses;
+	jpc_tagtreenode_t *leaf;
+	jpc_enc_tile_t *tile;
+	int prcno;
+	jpc_enc_prc_t *prc;
+
+	tile = enc->curtile;
+
+	endcomps = &tile->tcmpts[tile->numtcmpts];
+	for (comp = tile->tcmpts; comp != endcomps; ++comp) {
+		endlvls = &comp->rlvls[comp->numrlvls];
+		for (lvl = comp->rlvls; lvl != endlvls; ++lvl) {
+			if (!lvl->bands) {
+				continue;
+			}
+			endbands = &lvl->bands[lvl->numbands];
+			for (band = lvl->bands; band != endbands; ++band) {
+				if (!band->data) {
+					continue;
+				}
+				for (prcno = 0, prc = band->prcs; prcno < lvl->numprcs; ++prcno, ++prc) {
+					if (!prc->cblks) {
+						continue;
+					}
+					jpc_tagtree_reset(prc->incltree);
+					jpc_tagtree_reset(prc->nlibtree);
+					endcblks = &prc->cblks[prc->numcblks];
+					for (cblk = prc->cblks; cblk != endcblks; ++cblk) {
+						if (jas_stream_rewind(cblk->stream)) {
+							assert(0);
+						}
+						cblk->curpass = (cblk->numpasses > 0) ? cblk->passes : 0;
+						cblk->numencpasses = 0;
+						cblk->numlenbits = 3;
+						cblk->numimsbs = band->numbps - cblk->numbps;
+						assert(cblk->numimsbs >= 0);
+						leaf = jpc_tagtree_getleaf(prc->nlibtree, cblk - prc->cblks);
+						jpc_tagtree_setvalue(prc->nlibtree, leaf, cblk->numimsbs);
+
+						if (raflag) {
+							endpasses = &cblk->passes[cblk->numpasses];
+							for (pass = cblk->passes; pass != endpasses; ++pass) {
+								pass->lyrno = -1;
+								pass->lyrno = 0;
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+
+}
+
+jpc_pi_t *jpc_enc_pi_create(jpc_enc_cp_t *cp, jpc_enc_tile_t *tile)
+{
+	jpc_pi_t *pi;
+	int compno;
+	jpc_picomp_t *picomp;
+	jpc_pirlvl_t *pirlvl;
+	jpc_enc_tcmpt_t *tcomp;
+	int rlvlno;
+	jpc_enc_rlvl_t *rlvl;
+	int prcno;
+	int *prclyrno;
+
+	if (!(pi = jpc_pi_create0())) {
+		return 0;
+	}
+	pi->pktno = -1;
+	pi->numcomps = cp->numcmpts;
+	if (!(pi->picomps = jas_malloc(pi->numcomps * sizeof(jpc_picomp_t)))) {
+		jpc_pi_destroy(pi);
+		return 0;
+	}
+	for (compno = 0, picomp = pi->picomps; compno < pi->numcomps; ++compno,
+	  ++picomp) {
+		picomp->pirlvls = 0;
+	}
+
+	for (compno = 0, tcomp = tile->tcmpts, picomp = pi->picomps;
+	  compno < pi->numcomps; ++compno, ++tcomp, ++picomp) {
+		picomp->numrlvls = tcomp->numrlvls;
+		if (!(picomp->pirlvls = jas_malloc(picomp->numrlvls *
+		  sizeof(jpc_pirlvl_t)))) {
+			jpc_pi_destroy(pi);
+			return 0;
+		}
+		for (rlvlno = 0, pirlvl = picomp->pirlvls; rlvlno <
+		  picomp->numrlvls; ++rlvlno, ++pirlvl) {
+			pirlvl->prclyrnos = 0;
+		}
+		for (rlvlno = 0, pirlvl = picomp->pirlvls, rlvl = tcomp->rlvls;
+		  rlvlno < picomp->numrlvls; ++rlvlno, ++pirlvl, ++rlvl) {
+/* XXX sizeof(long) should be sizeof different type */
+			pirlvl->numprcs = rlvl->numprcs;
+			if (rlvl->numprcs) {
+				if (!(pirlvl->prclyrnos = jas_malloc(pirlvl->numprcs *
+				  sizeof(long)))) {
+					jpc_pi_destroy(pi);
+					return 0;
+				}
+			} else {
+				pirlvl->prclyrnos = 0;
+			}
+		}
+	}
+
+	pi->maxrlvls = 0;
+	for (compno = 0, tcomp = tile->tcmpts, picomp = pi->picomps;
+	  compno < pi->numcomps; ++compno, ++tcomp, ++picomp) {
+		picomp->hsamp = cp->ccps[compno].sampgrdstepx;
+		picomp->vsamp = cp->ccps[compno].sampgrdstepy;
+		for (rlvlno = 0, pirlvl = picomp->pirlvls, rlvl = tcomp->rlvls;
+		  rlvlno < picomp->numrlvls; ++rlvlno, ++pirlvl, ++rlvl) {
+			pirlvl->prcwidthexpn = rlvl->prcwidthexpn;
+			pirlvl->prcheightexpn = rlvl->prcheightexpn;
+			for (prcno = 0, prclyrno = pirlvl->prclyrnos;
+			  prcno < pirlvl->numprcs; ++prcno, ++prclyrno) {
+				*prclyrno = 0;
+			}
+			pirlvl->numhprcs = rlvl->numhprcs;
+		}
+		if (pi->maxrlvls < tcomp->numrlvls) {
+			pi->maxrlvls = tcomp->numrlvls;
+		}
+	}
+
+	pi->numlyrs = tile->numlyrs;
+	pi->xstart = tile->tlx;
+	pi->ystart = tile->tly;
+	pi->xend = tile->brx;
+	pi->yend = tile->bry;
+
+	pi->picomp = 0;
+	pi->pirlvl = 0;
+	pi->x = 0;
+	pi->y = 0;
+	pi->compno = 0;
+	pi->rlvlno = 0;
+	pi->prcno = 0;
+	pi->lyrno = 0;
+	pi->xstep = 0;
+	pi->ystep = 0;
+
+	pi->pchgno = -1;
+
+	pi->defaultpchg.prgord = tile->prg;
+	pi->defaultpchg.compnostart = 0;
+	pi->defaultpchg.compnoend = pi->numcomps;
+	pi->defaultpchg.rlvlnostart = 0;
+	pi->defaultpchg.rlvlnoend = pi->maxrlvls;
+	pi->defaultpchg.lyrnoend = pi->numlyrs;
+	pi->pchg = 0;
+
+	pi->valid = 0;
+
+	return pi;
+}
diff --git a/src/libjasper/jpc/jpc_t2enc.h b/src/libjasper/jpc/jpc_t2enc.h
new file mode 100644
index 0000000..f2ade16
--- /dev/null
+++ b/src/libjasper/jpc/jpc_t2enc.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tier 2 Encoder
+ *
+ * $Id: jpc_t2enc.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_T2ENC_H
+#define JPC_T2ENC_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "jpc_enc.h"
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Encode the packets for a tile. */
+int jpc_enc_encpkts(jpc_enc_t *enc, jas_stream_t *out);
+
+/* Encode the specified packet. */
+int jpc_enc_encpkt(jpc_enc_t *enc, jas_stream_t *out, int compno, int lvlno,
+  int prcno, int lyrno);
+
+/* Save the tier-2 coding state. */
+void jpc_save_t2state(jpc_enc_t *enc);
+
+/* Restore the tier-2 coding state. */
+void jpc_restore_t2state(jpc_enc_t *enc);
+
+/* Initialize the tier-2 coding state. */
+void jpc_init_t2state(jpc_enc_t *enc, int raflag);
+
+/* Create a packet iterator for the encoder. */
+jpc_pi_t *jpc_enc_pi_create(jpc_enc_cp_t *cp, jpc_enc_tile_t *tile);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_tagtree.c b/src/libjasper/jpc/jpc_tagtree.c
new file mode 100644
index 0000000..c4878d1
--- /dev/null
+++ b/src/libjasper/jpc/jpc_tagtree.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tag Tree Library
+ *
+ * $Id: jpc_tagtree.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <limits.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "jasper/jas_malloc.h"
+
+#include "jpc_tagtree.h"
+
+/******************************************************************************\
+* Prototypes.
+\******************************************************************************/
+
+static jpc_tagtree_t *jpc_tagtree_alloc(void);
+
+/******************************************************************************\
+* Code for creating and destroying tag trees.
+\******************************************************************************/
+
+/* Create a tag tree. */
+
+jpc_tagtree_t *jpc_tagtree_create(int numleafsh, int numleafsv)
+{
+	int nplh[JPC_TAGTREE_MAXDEPTH];
+	int nplv[JPC_TAGTREE_MAXDEPTH];
+	jpc_tagtreenode_t *node;
+	jpc_tagtreenode_t *parentnode;
+	jpc_tagtreenode_t *parentnode0;
+	jpc_tagtree_t *tree;
+	int i;
+	int j;
+	int k;
+	int numlvls;
+	int n;
+
+	assert(numleafsh > 0 && numleafsv > 0);
+
+	if (!(tree = jpc_tagtree_alloc())) {
+		return 0;
+	}
+	tree->numleafsh_ = numleafsh;
+	tree->numleafsv_ = numleafsv;
+
+	numlvls = 0;
+	nplh[0] = numleafsh;
+	nplv[0] = numleafsv;
+	do {
+		n = nplh[numlvls] * nplv[numlvls];
+		nplh[numlvls + 1] = (nplh[numlvls] + 1) / 2;
+		nplv[numlvls + 1] = (nplv[numlvls] + 1) / 2;
+		tree->numnodes_ += n;
+		++numlvls;
+	} while (n > 1);
+
+	if (!(tree->nodes_ = jas_malloc(tree->numnodes_ * sizeof(jpc_tagtreenode_t)))) {
+		return 0;
+	}
+
+	/* Initialize the parent links for all nodes in the tree. */
+
+	node = tree->nodes_;
+	parentnode = &tree->nodes_[tree->numleafsh_ * tree->numleafsv_];
+	parentnode0 = parentnode;
+
+	for (i = 0; i < numlvls - 1; ++i) {
+		for (j = 0; j < nplv[i]; ++j) {
+			k = nplh[i];
+			while (--k >= 0) {
+				node->parent_ = parentnode;
+				++node;
+				if (--k >= 0) {
+					node->parent_ = parentnode;
+					++node;
+				}
+				++parentnode;
+			}
+			if ((j & 1) || j == nplv[i] - 1) {
+				parentnode0 = parentnode;
+			} else {
+				parentnode = parentnode0;
+				parentnode0 += nplh[i];
+			}
+		}
+	}
+	node->parent_ = 0;
+
+	/* Initialize the data values to something sane. */
+
+	jpc_tagtree_reset(tree);
+
+	return tree;
+}
+
+/* Destroy a tag tree. */
+
+void jpc_tagtree_destroy(jpc_tagtree_t *tree)
+{
+	if (tree->nodes_) {
+		jas_free(tree->nodes_);
+	}
+	jas_free(tree);
+}
+
+static jpc_tagtree_t *jpc_tagtree_alloc()
+{
+	jpc_tagtree_t *tree;
+
+	if (!(tree = jas_malloc(sizeof(jpc_tagtree_t)))) {
+		return 0;
+	}
+	tree->numleafsh_ = 0;
+	tree->numleafsv_ = 0;
+	tree->numnodes_ = 0;
+	tree->nodes_ = 0;
+
+	return tree;
+}
+
+/******************************************************************************\
+* Code.
+\******************************************************************************/
+
+/* Copy state information from one tag tree to another. */
+
+void jpc_tagtree_copy(jpc_tagtree_t *dsttree, jpc_tagtree_t *srctree)
+{
+	int n;
+	jpc_tagtreenode_t *srcnode;
+	jpc_tagtreenode_t *dstnode;
+
+	/* The two tag trees must have similar sizes. */
+	assert(srctree->numleafsh_ == dsttree->numleafsh_ &&
+	  srctree->numleafsv_ == dsttree->numleafsv_);
+
+	n = srctree->numnodes_;
+	srcnode = srctree->nodes_;
+	dstnode = dsttree->nodes_;
+	while (--n >= 0) {
+		dstnode->value_ = srcnode->value_;
+		dstnode->low_ = srcnode->low_;
+		dstnode->known_ = srcnode->known_;
+		++dstnode;
+		++srcnode;
+	}
+}
+
+/* Reset all of the state information associated with a tag tree. */
+
+void jpc_tagtree_reset(jpc_tagtree_t *tree)
+{
+	int n;
+	jpc_tagtreenode_t *node;
+
+	n = tree->numnodes_;
+	node = tree->nodes_;
+
+	while (--n >= 0) {
+		node->value_ = INT_MAX;
+		node->low_ = 0;
+		node->known_ = 0;
+		++node;
+	}
+}
+
+/* Set the value associated with the specified leaf node, updating
+the other nodes as necessary. */
+
+void jpc_tagtree_setvalue(jpc_tagtree_t *tree, jpc_tagtreenode_t *leaf,
+  int value)
+{
+	jpc_tagtreenode_t *node;
+
+	/* Avoid compiler warnings about unused parameters. */
+	tree = 0;
+
+	assert(value >= 0);
+
+	node = leaf;
+	while (node && node->value_ > value) {
+		node->value_ = value;
+		node = node->parent_;
+	}
+}
+
+/* Get a particular leaf node. */
+
+jpc_tagtreenode_t *jpc_tagtree_getleaf(jpc_tagtree_t *tree, int n)
+{
+	return &tree->nodes_[n];
+}
+
+/* Invoke the tag tree encoding procedure. */
+
+int jpc_tagtree_encode(jpc_tagtree_t *tree, jpc_tagtreenode_t *leaf,
+  int threshold, jpc_bitstream_t *out)
+{
+	jpc_tagtreenode_t *stk[JPC_TAGTREE_MAXDEPTH - 1];
+	jpc_tagtreenode_t **stkptr;
+	jpc_tagtreenode_t *node;
+	int low;
+
+	/* Avoid compiler warnings about unused parameters. */
+	tree = 0;
+
+	assert(leaf);
+	assert(threshold >= 0);
+
+	/* Traverse to the root of the tree, recording the path taken. */
+	stkptr = stk;
+	node = leaf;
+	while (node->parent_) {
+		*stkptr++ = node;
+		node = node->parent_;
+	}
+
+	low = 0;
+	for (;;) {
+		if (low > node->low_) {
+			/* Deferred propagation of the lower bound downward in
+			  the tree. */
+			node->low_ = low;
+		} else {
+			low = node->low_;
+		}
+
+		while (low < threshold) {
+			if (low >= node->value_) {
+				if (!node->known_) {
+					if (jpc_bitstream_putbit(out, 1) == EOF) {
+						return -1;
+					}
+					node->known_ = 1;
+				}
+				break;
+			}
+			if (jpc_bitstream_putbit(out, 0) == EOF) {
+				return -1;
+			}
+			++low;
+		}
+		node->low_ = low;
+		if (stkptr == stk) {
+			break;
+		}
+		node = *--stkptr;
+
+	}
+	return (leaf->low_ < threshold) ? 1 : 0;
+
+}
+
+/* Invoke the tag tree decoding procedure. */
+
+int jpc_tagtree_decode(jpc_tagtree_t *tree, jpc_tagtreenode_t *leaf,
+  int threshold, jpc_bitstream_t *in)
+{
+	jpc_tagtreenode_t *stk[JPC_TAGTREE_MAXDEPTH - 1];
+	jpc_tagtreenode_t **stkptr;
+	jpc_tagtreenode_t *node;
+	int low;
+	int ret;
+
+	/* Avoid compiler warnings about unused parameters. */
+	tree = 0;
+
+	assert(threshold >= 0);
+
+	/* Traverse to the root of the tree, recording the path taken. */
+	stkptr = stk;
+	node = leaf;
+	while (node->parent_) {
+		*stkptr++ = node;
+		node = node->parent_;
+	}
+
+	low = 0;
+	for (;;) {
+		if (low > node->low_) {
+			node->low_ = low;
+		} else {
+			low = node->low_;
+		}
+		while (low < threshold && low < node->value_) {
+			if ((ret = jpc_bitstream_getbit(in)) < 0) {
+				return -1;
+			}
+			if (ret) {
+				node->value_ = low;
+			} else {
+				++low;
+			}
+		}
+		node->low_ = low;
+		if (stkptr == stk) {
+			break;
+		}
+		node = *--stkptr;
+	}
+
+	return (node->value_ < threshold) ? 1 : 0;
+}
+
+/******************************************************************************\
+* Code for debugging.
+\******************************************************************************/
+
+void jpc_tagtree_dump(jpc_tagtree_t *tree, FILE *out)
+{
+	jpc_tagtreenode_t *node;
+	int n;
+
+	node = tree->nodes_;
+	n = tree->numnodes_;
+	while (--n >= 0) {
+		fprintf(out, "node %p, parent %p, value %d, lower %d, known %d\n",
+		  (void *) node, (void *) node->parent_, node->value_, node->low_,
+		  node->known_);
+		++node;
+	}
+}
diff --git a/src/libjasper/jpc/jpc_tagtree.h b/src/libjasper/jpc/jpc_tagtree.h
new file mode 100644
index 0000000..22c4b84
--- /dev/null
+++ b/src/libjasper/jpc/jpc_tagtree.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tag Tree Library
+ *
+ * $Id: jpc_tagtree.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_TAGTREE_H
+#define JPC_TAGTREE_H
+
+/******************************************************************************\
+* Includes
+\******************************************************************************/
+
+#include <limits.h>
+#include <stdio.h>
+
+#include "jpc_bs.h"
+
+/******************************************************************************\
+* Constants
+\******************************************************************************/
+
+/* The maximum allowable depth for a tag tree. */
+#define JPC_TAGTREE_MAXDEPTH	32
+
+/******************************************************************************\
+* Types
+\******************************************************************************/
+
+/*
+ * Tag tree node.
+ */
+
+typedef struct jpc_tagtreenode_ {
+
+	/* The parent of this node. */
+	struct jpc_tagtreenode_ *parent_;
+
+	/* The value associated with this node. */
+	int value_;
+
+	/* The lower bound on the value associated with this node. */
+	int low_;
+
+	/* A flag indicating if the value is known exactly. */
+	int known_;
+
+} jpc_tagtreenode_t;
+
+/*
+ * Tag tree.
+ */
+
+typedef struct {
+
+	/* The number of leaves in the horizontal direction. */
+	int numleafsh_;
+
+	/* The number of leaves in the vertical direction. */
+	int numleafsv_;
+
+	/* The total number of nodes in the tree. */
+	int numnodes_;
+
+	/* The nodes. */
+	jpc_tagtreenode_t *nodes_;
+
+} jpc_tagtree_t;
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Create a tag tree. */
+jpc_tagtree_t *jpc_tagtree_create(int numleafsh, int numleafsv);
+
+/* Destroy a tag tree. */
+void jpc_tagtree_destroy(jpc_tagtree_t *tree);
+
+/* Copy data from one tag tree to another. */
+void jpc_tagtree_copy(jpc_tagtree_t *dsttree, jpc_tagtree_t *srctree);
+
+/* Reset the tag tree state. */
+void jpc_tagtree_reset(jpc_tagtree_t *tree);
+
+/* Set the value associated with a particular leaf node of a tag tree. */
+void jpc_tagtree_setvalue(jpc_tagtree_t *tree, jpc_tagtreenode_t *leaf,
+  int value);
+
+/* Get a pointer to a particular leaf node. */
+jpc_tagtreenode_t *jpc_tagtree_getleaf(jpc_tagtree_t *tree, int n);
+
+/* Invoke the tag tree decoding procedure. */
+int jpc_tagtree_decode(jpc_tagtree_t *tree, jpc_tagtreenode_t *leaf,
+  int threshold, jpc_bitstream_t *in);
+
+/* Invoke the tag tree encoding procedure. */
+int jpc_tagtree_encode(jpc_tagtree_t *tree, jpc_tagtreenode_t *leaf,
+  int threshold, jpc_bitstream_t *out);
+
+/* Dump a tag tree (for debugging purposes). */
+void jpc_tagtree_dump(jpc_tagtree_t *tree, FILE *out);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_tsfb.c b/src/libjasper/jpc/jpc_tsfb.c
new file mode 100644
index 0000000..6f332ff
--- /dev/null
+++ b/src/libjasper/jpc/jpc_tsfb.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2004 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tree-Structured Filter Bank (TSFB) Library
+ *
+ * $Id: jpc_tsfb.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include <assert.h>
+
+#include "jasper/jas_malloc.h"
+#include "jasper/jas_seq.h"
+
+#include "jpc_tsfb.h"
+#include "jpc_cod.h"
+#include "jpc_cs.h"
+#include "jpc_util.h"
+#include "jpc_math.h"
+
+void jpc_tsfb_getbands2(jpc_tsfb_t *tsfb, int locxstart, int locystart,
+  int xstart, int ystart, int xend, int yend, jpc_tsfb_band_t **bands,
+  int numlvls);
+
+/******************************************************************************\
+*
+\******************************************************************************/
+
+jpc_tsfb_t *jpc_cod_gettsfb(int qmfbid, int numlvls)
+{
+	jpc_tsfb_t *tsfb;
+
+	if (!(tsfb = malloc(sizeof(jpc_tsfb_t))))
+		return 0;
+
+	if (numlvls > 0) {
+		switch (qmfbid) {
+		case JPC_COX_INS:
+			tsfb->qmfb = &jpc_ns_qmfb2d;
+			break;
+		default:
+		case JPC_COX_RFT:
+			tsfb->qmfb = &jpc_ft_qmfb2d;
+			break;
+		}
+	} else {
+		tsfb->qmfb = 0;
+	}
+	tsfb->numlvls = numlvls;
+	return tsfb;
+}
+
+void jpc_tsfb_destroy(jpc_tsfb_t *tsfb)
+{
+	free(tsfb);
+}
+
+int jpc_tsfb_analyze(jpc_tsfb_t *tsfb, jas_seq2d_t *a)
+{
+	return (tsfb->numlvls > 0) ? jpc_tsfb_analyze2(tsfb, jas_seq2d_getref(a,
+	  jas_seq2d_xstart(a), jas_seq2d_ystart(a)), jas_seq2d_xstart(a),
+	  jas_seq2d_ystart(a), jas_seq2d_width(a),
+	  jas_seq2d_height(a), jas_seq2d_rowstep(a), tsfb->numlvls - 1) : 0;
+}
+
+int jpc_tsfb_analyze2(jpc_tsfb_t *tsfb, int *a, int xstart, int ystart,
+  int width, int height, int stride, int numlvls)
+{
+	if (width > 0 && height > 0) {
+		if ((*tsfb->qmfb->analyze)(a, xstart, ystart, width, height, stride))
+			return -1;
+		if (numlvls > 0) {
+			if (jpc_tsfb_analyze2(tsfb, a, JPC_CEILDIVPOW2(xstart,
+			  1), JPC_CEILDIVPOW2(ystart, 1), JPC_CEILDIVPOW2(
+			  xstart + width, 1) - JPC_CEILDIVPOW2(xstart, 1),
+			  JPC_CEILDIVPOW2(ystart + height, 1) -
+			  JPC_CEILDIVPOW2(ystart, 1), stride, numlvls - 1)) {
+				return -1;
+			}
+		}
+	}
+	return 0;
+}
+
+int jpc_tsfb_synthesize(jpc_tsfb_t *tsfb, jas_seq2d_t *a)
+{
+	return (tsfb->numlvls > 0) ? jpc_tsfb_synthesize2(tsfb,
+	  jas_seq2d_getref(a, jas_seq2d_xstart(a), jas_seq2d_ystart(a)),
+	  jas_seq2d_xstart(a), jas_seq2d_ystart(a), jas_seq2d_width(a),
+	  jas_seq2d_height(a), jas_seq2d_rowstep(a), tsfb->numlvls - 1) : 0;
+}
+
+int jpc_tsfb_synthesize2(jpc_tsfb_t *tsfb, int *a, int xstart, int ystart,
+  int width, int height, int stride, int numlvls)
+{
+	if (numlvls > 0) {
+		if (jpc_tsfb_synthesize2(tsfb, a, JPC_CEILDIVPOW2(xstart, 1),
+		  JPC_CEILDIVPOW2(ystart, 1), JPC_CEILDIVPOW2(xstart + width,
+		  1) - JPC_CEILDIVPOW2(xstart, 1), JPC_CEILDIVPOW2(ystart +
+		  height, 1) - JPC_CEILDIVPOW2(ystart, 1), stride, numlvls -
+		  1)) {
+			return -1;
+		}
+	}
+	if (width > 0 && height > 0) {
+		if ((*tsfb->qmfb->synthesize)(a, xstart, ystart, width, height, stride)) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int jpc_tsfb_getbands(jpc_tsfb_t *tsfb, uint_fast32_t xstart,
+  uint_fast32_t ystart, uint_fast32_t xend, uint_fast32_t yend,
+  jpc_tsfb_band_t *bands)
+{
+	jpc_tsfb_band_t *band;
+
+	band = bands;
+	if (tsfb->numlvls > 0) {
+		jpc_tsfb_getbands2(tsfb, xstart, ystart, xstart, ystart, xend, yend,
+		  &band, tsfb->numlvls);
+	} else {
+
+		band->xstart = xstart;
+		band->ystart = ystart;
+		band->xend = xend;
+		band->yend = yend;
+		band->locxstart = xstart;
+		band->locystart = ystart;
+		band->locxend = band->locxstart + band->xend - band->xstart;
+		band->locyend = band->locystart + band->yend - band->ystart;
+		band->orient = JPC_TSFB_LL;
+		band->synenergywt = JPC_FIX_ONE;
+		++band;
+	}
+	return band - bands;
+}
+
+void jpc_tsfb_getbands2(jpc_tsfb_t *tsfb, int locxstart, int locystart,
+  int xstart, int ystart, int xend, int yend, jpc_tsfb_band_t **bands,
+  int numlvls)
+{
+	int newxstart;
+	int newystart;
+	int newxend;
+	int newyend;
+	jpc_tsfb_band_t *band;
+
+	newxstart = JPC_CEILDIVPOW2(xstart, 1);
+	newystart = JPC_CEILDIVPOW2(ystart, 1);
+	newxend = JPC_CEILDIVPOW2(xend, 1);
+	newyend = JPC_CEILDIVPOW2(yend, 1);
+
+	if (numlvls > 0) {
+
+		jpc_tsfb_getbands2(tsfb, locxstart, locystart, newxstart, newystart,
+		  newxend, newyend, bands, numlvls - 1);
+
+		band = *bands;
+		band->xstart = JPC_FLOORDIVPOW2(xstart, 1);
+		band->ystart = newystart;
+		band->xend = JPC_FLOORDIVPOW2(xend, 1);
+		band->yend = newyend;
+		band->locxstart = locxstart + newxend - newxstart;
+		band->locystart = locystart;
+		band->locxend = band->locxstart + band->xend - band->xstart;
+		band->locyend = band->locystart + band->yend - band->ystart;
+		band->orient = JPC_TSFB_HL;
+		band->synenergywt = jpc_dbltofix(tsfb->qmfb->hpenergywts[
+		  tsfb->numlvls - numlvls] * tsfb->qmfb->lpenergywts[
+		  tsfb->numlvls - numlvls]);
+		++(*bands);
+
+		band = *bands;
+		band->xstart = newxstart;
+		band->ystart = JPC_FLOORDIVPOW2(ystart, 1);
+		band->xend = newxend;
+		band->yend = JPC_FLOORDIVPOW2(yend, 1);
+		band->locxstart = locxstart;
+		band->locystart = locystart + newyend - newystart;
+		band->locxend = band->locxstart + band->xend - band->xstart;
+		band->locyend = band->locystart + band->yend - band->ystart;
+		band->orient = JPC_TSFB_LH;
+		band->synenergywt = jpc_dbltofix(tsfb->qmfb->lpenergywts[
+		  tsfb->numlvls - numlvls] * tsfb->qmfb->hpenergywts[
+		  tsfb->numlvls - numlvls]);
+		++(*bands);
+
+		band = *bands;
+		band->xstart = JPC_FLOORDIVPOW2(xstart, 1);
+		band->ystart = JPC_FLOORDIVPOW2(ystart, 1);
+		band->xend = JPC_FLOORDIVPOW2(xend, 1);
+		band->yend = JPC_FLOORDIVPOW2(yend, 1);
+		band->locxstart = locxstart + newxend - newxstart;
+		band->locystart = locystart + newyend - newystart;
+		band->locxend = band->locxstart + band->xend - band->xstart;
+		band->locyend = band->locystart + band->yend - band->ystart;
+		band->orient = JPC_TSFB_HH;
+		band->synenergywt = jpc_dbltofix(tsfb->qmfb->hpenergywts[
+		  tsfb->numlvls - numlvls] * tsfb->qmfb->hpenergywts[
+		  tsfb->numlvls - numlvls]);
+		++(*bands);
+
+	} else {
+
+		band = *bands;
+		band->xstart = xstart;
+		band->ystart = ystart;
+		band->xend = xend;
+		band->yend = yend;
+		band->locxstart = locxstart;
+		band->locystart = locystart;
+		band->locxend = band->locxstart + band->xend - band->xstart;
+		band->locyend = band->locystart + band->yend - band->ystart;
+		band->orient = JPC_TSFB_LL;
+		band->synenergywt = jpc_dbltofix(tsfb->qmfb->lpenergywts[
+		  tsfb->numlvls - numlvls - 1] * tsfb->qmfb->lpenergywts[
+		  tsfb->numlvls - numlvls - 1]);
+		++(*bands);
+
+	}
+
+}
diff --git a/src/libjasper/jpc/jpc_tsfb.h b/src/libjasper/jpc/jpc_tsfb.h
new file mode 100644
index 0000000..3285e51
--- /dev/null
+++ b/src/libjasper/jpc/jpc_tsfb.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2004 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * Tree-Structured Filter Bank (TSFB) Library
+ *
+ * $Id: jpc_tsfb.h,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+#ifndef JPC_TSFB_H
+#define JPC_TSFB_H
+
+/******************************************************************************\
+* Includes.
+\******************************************************************************/
+
+#include "jasper/jas_seq.h"
+
+#include "jpc_fix.h"
+#include "jpc_qmfb.h"
+
+/******************************************************************************\
+* Constants.
+\******************************************************************************/
+
+#define	JPC_TSFB_MAXBANDS	(JPC_TSFB_MAXDEPTH * 3 + 1)
+#define	JPC_TSFB_MAXDEPTH	32
+#define	JPC_TSFB_RITIMODE	JPC_QMFB1D_RITIMODE
+
+#define JPC_TSFB_LL	0
+#define JPC_TSFB_LH	1
+#define JPC_TSFB_HL	2
+#define JPC_TSFB_HH	3
+
+/******************************************************************************\
+* Types.
+\******************************************************************************/
+
+typedef struct {
+	int xstart;
+	int ystart;
+	int xend;
+	int yend;
+	int orient;
+	int locxstart;
+	int locystart;
+	int locxend;
+	int locyend;
+	jpc_fix_t synenergywt;
+} jpc_tsfb_band_t;
+
+typedef struct {
+	int numlvls;
+	jpc_qmfb2d_t *qmfb;
+} jpc_tsfb_t;
+
+/******************************************************************************\
+* Functions.
+\******************************************************************************/
+
+/* Create a TSFB. */
+jpc_tsfb_t *jpc_cod_gettsfb(int qmfbid, int numlevels);
+
+/* Destroy a TSFB. */
+void jpc_tsfb_destroy(jpc_tsfb_t *tsfb);
+
+/* Perform analysis. */
+int jpc_tsfb_analyze(jpc_tsfb_t *tsfb, jas_seq2d_t *x);
+
+/* Perform synthesis. */
+int jpc_tsfb_synthesize(jpc_tsfb_t *tsfb, jas_seq2d_t *x);
+
+/* Get band information for a TSFB. */
+int jpc_tsfb_getbands(jpc_tsfb_t *tsfb, uint_fast32_t xstart,
+  uint_fast32_t ystart, uint_fast32_t xend, uint_fast32_t yend,
+  jpc_tsfb_band_t *bands);
+
+#endif
diff --git a/src/libjasper/jpc/jpc_util.c b/src/libjasper/jpc/jpc_util.c
new file mode 100644
index 0000000..7dfeb63
--- /dev/null
+++ b/src/libjasper/jpc/jpc_util.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1999-2000 Image Power, Inc. and the University of
+ *   British Columbia.
+ * Copyright (c) 2001-2003 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+/*
+ * $Id: jpc_util.c,v 1.1 2008/10/17 06:15:00 scuri Exp $
+ */
+
+/******************************************************************************\
+* Includes
+\******************************************************************************/
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "jasper/jas_math.h"
+#include "jasper/jas_malloc.h"
+
+#include "jpc_fix.h"
+#include "jpc_cs.h"
+#include "jpc_flt.h"
+#include "jpc_util.h"
+
+/******************************************************************************\
+* Miscellaneous Functions
+\******************************************************************************/
+
+int jpc_atoaf(char *s, int *numvalues, double **values)
+{
+	static char delim[] = ", \t\n";
+	char buf[4096];
+	int n;
+	double *vs;
+	char *cp;
+
+	strncpy(buf, s, sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+	n = 0;
+	if ((cp = strtok(buf, delim))) {
+		++n;
+		while ((cp = strtok(0, delim))) {
+			if (cp != '\0') {
+				++n;
+			}
+		}
+	}
+
+	if (n) {
+		if (!(vs = jas_malloc(n * sizeof(double)))) {
+			return -1;
+		}
+
+		strncpy(buf, s, sizeof(buf));
+		buf[sizeof(buf) - 1] = '\0';
+		n = 0;
+		if ((cp = strtok(buf, delim))) {
+			vs[n] = atof(cp);
+			++n;
+			while ((cp = strtok(0, delim))) {
+				if (cp != '\0') {
+					vs[n] = atof(cp);
+					++n;
+				}
+			}
+		}
+	} else {
+		vs = 0;
+	}
+
+	*numvalues = n;
+	*values = vs;
+
+	return 0;
+}
+
+jas_seq_t *jpc_seq_upsample(jas_seq_t *x, int m)
+{
+	jas_seq_t *z;
+	int i;
+
+	if (!(z = jas_seq_create(jas_seq_start(x) * m, (jas_seq_end(x) - 1) * m + 1)))
+		return 0;
+	for (i = jas_seq_start(z); i < jas_seq_end(z); i++) {
+		*jas_seq_getref(z, i) = (!JAS_MOD(i, m)) ? jas_seq_get(x, i / m) :
+		  jpc_inttofix(0);
+	}
+
+	return z;
+}
+
+jpc_fix_t jpc_seq_norm(jas_seq_t *x)
+{
+	jpc_fix_t s;
+	int i;
+
+	s = jpc_inttofix(0);
+	for (i = jas_seq_start(x); i < jas_seq_end(x); i++) {
+		s = jpc_fix_add(s, jpc_fix_mul(jas_seq_get(x, i), jas_seq_get(x, i)));
+	}
+
+	return jpc_dbltofix(sqrt(jpc_fixtodbl(s)));
+}
+
+jas_seq_t *jpc_seq_conv(jas_seq_t *x, jas_seq_t *y)
+{
+	int i;
+	int j;
+	int k;
+	jas_seq_t *z;
+	jpc_fix_t s;
+	jpc_fix_t v;
+
+	z = jas_seq_create(jas_seq_start(x) + jas_seq_start(y),
+	  jas_seq_end(x) + jas_seq_end(y) - 1);
+	assert(z);
+	for (i = jas_seq_start(z); i < jas_seq_end(z); i++) {
+		s = jpc_inttofix(0);
+		for (j = jas_seq_start(y); j < jas_seq_end(y); j++) {
+			k = i - j;
+			if (k < jas_seq_start(x) || k >= jas_seq_end(x)) {
+				v = JPC_FIX_ZERO;
+			} else {
+				v = jas_seq_get(x, k);
+			}
+			s = jpc_fix_add(s, jpc_fix_mul(jas_seq_get(y, j), v));
+		}
+		*jas_seq_getref(z, i) = s;
+	}
+
+	return z;
+}
diff --git a/src/libjasper/jpc/jpc_util.h b/src/libjasper/jpc/jpc_util.h
new file mode 100644
index 0000000..526312a
--- /dev/null
+++ b/src/libjasper/jpc/jpc_util.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2001-2002 Michael David Adams.
+ * All rights reserved.
+ */
+
+/* __START_OF_JASPER_LICENSE__
+ * 
+ * JasPer License Version 2.0
+ * 
+ * Copyright (c) 2001-2006 Michael David Adams
+ * Copyright (c) 1999-2000 Image Power, Inc.
+ * Copyright (c) 1999-2000 The University of British Columbia
+ * 
+ * All rights reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person (the
+ * "User") obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, and/or sell copies of the Software, and to permit
+ * persons to whom the Software is furnished to do so, subject to the
+ * following conditions:
+ * 
+ * 1.  The above copyright notices and this permission notice (which
+ * includes the disclaimer below) shall be included in all copies or
+ * substantial portions of the Software.
+ * 
+ * 2.  The name of a copyright holder shall not be used to endorse or
+ * promote products derived from the Software without specific prior
+ * written permission.
+ * 
+ * THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS
+ * LICENSE.  NO USE OF THE SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+ * THIS DISCLAIMER.  THE SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
+ * "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+ * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+ * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  NO ASSURANCES ARE
+ * PROVIDED BY THE COPYRIGHT HOLDERS THAT THE SOFTWARE DOES NOT INFRINGE
+ * THE PATENT OR OTHER INTELLECTUAL PROPERTY RIGHTS OF ANY OTHER ENTITY.
+ * EACH COPYRIGHT HOLDER DISCLAIMS ANY LIABILITY TO THE USER FOR CLAIMS
+ * BROUGHT BY ANY OTHER ENTITY BASED ON INFRINGEMENT OF INTELLECTUAL
+ * PROPERTY RIGHTS OR OTHERWISE.  AS A CONDITION TO EXERCISING THE RIGHTS
+ * GRANTED HEREUNDER, EACH USER HEREBY ASSUMES SOLE RESPONSIBILITY TO SECURE
+ * ANY OTHER INTELLECTUAL PROPERTY RIGHTS NEEDED, IF ANY.  THE SOFTWARE
+ * IS NOT FAULT-TOLERANT AND IS NOT INTENDED FOR USE IN MISSION-CRITICAL
+ * SYSTEMS, SUCH AS THOSE USED IN THE OPERATION OF NUCLEAR FACILITIES,
+ * AIRCRAFT NAVIGATION OR COMMUNICATION SYSTEMS, AIR TRAFFIC CONTROL
+ * SYSTEMS, DIRECT LIFE SUPPORT MACHINES, OR WEAPONS SYSTEMS, IN WHICH
+ * THE FAILURE OF THE SOFTWARE OR SYSTEM COULD LEAD DIRECTLY TO DEATH,
+ * PERSONAL INJURY, OR SEVERE PHYSICAL OR ENVIRONMENTAL DAMAGE ("HIGH
+ * RISK ACTIVITIES").  THE COPYRIGHT HOLDERS SPECIFICALLY DISCLAIM ANY
+ * EXPRESS OR IMPLIED WARRANTY OF FITNESS FOR HIGH RISK ACTIVITIES.
+ * 
+ * __END_OF_JASPER_LICENSE__
+ */
+
+#ifndef JPC_UTIL_H
+#define JPC_UTIL_H
+
+/* Parse a comma separated list of real numbers into an array of doubles. */
+int jpc_atoaf(char *s, int *numvalues, double **values);
+
+/* Upsample a sequence. */
+jas_seq_t *jpc_seq_upsample(jas_seq_t *seq, int n);
+
+/* Convolve two sequences. */
+jas_seq_t *jpc_seq_conv(jas_seq_t *seq0, jas_seq_t *seq1);
+
+/* Compute the norm of a sequence. */
+jpc_fix_t jpc_seq_norm(jas_seq_t *x);
+
+#endif
diff --git a/src/libjpeg/jcapimin.c b/src/libjpeg/jcapimin.c
new file mode 100644
index 0000000..54fb8c5
--- /dev/null
+++ b/src/libjpeg/jcapimin.c
@@ -0,0 +1,280 @@
+/*
+ * jcapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-compression case or the transcoding-only
+ * case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jcapistd.c.  But also see jcparam.c for
+ * parameter-setup helper routines, jcomapi.c for routines shared by
+ * compression and decompression, and jctrans.c for the transcoding case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG compression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_compress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_compress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->dest = NULL;
+
+  cinfo->comp_info = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  cinfo->script_space = NULL;
+
+  cinfo->input_gamma = 1.0;	/* in case application forgets */
+
+  /* OK, I'm ready */
+  cinfo->global_state = CSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG compression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_compress (j_compress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG compression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_compress (j_compress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Forcibly suppress or un-suppress all quantization and Huffman tables.
+ * Marks all currently defined tables as already written (if suppress)
+ * or not written (if !suppress).  This will control whether they get emitted
+ * by a subsequent jpeg_start_compress call.
+ *
+ * This routine is exported for use by applications that want to produce
+ * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
+ * since it is called by jpeg_start_compress, we put it here --- otherwise
+ * jcparam.o would be linked whether the application used it or not.
+ */
+
+GLOBAL(void)
+jpeg_suppress_tables (j_compress_ptr cinfo, boolean suppress)
+{
+  int i;
+  JQUANT_TBL * qtbl;
+  JHUFF_TBL * htbl;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
+      qtbl->sent_table = suppress;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+  }
+}
+
+
+/*
+ * Finish JPEG compression.
+ *
+ * If a multipass operating mode was selected, this may do a great deal of
+ * work including most of the actual output.
+ */
+
+GLOBAL(void)
+jpeg_finish_compress (j_compress_ptr cinfo)
+{
+  JDIMENSION iMCU_row;
+
+  if (cinfo->global_state == CSTATE_SCANNING ||
+      cinfo->global_state == CSTATE_RAW_OK) {
+    /* Terminate first pass */
+    if (cinfo->next_scanline < cinfo->image_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_pass) (cinfo);
+  } else if (cinfo->global_state != CSTATE_WRCOEFS)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any remaining passes */
+  while (! cinfo->master->is_last_pass) {
+    (*cinfo->master->prepare_for_pass) (cinfo);
+    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) iMCU_row;
+	cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* We bypass the main controller and invoke coef controller directly;
+       * all work is being done from the coefficient buffer.
+       */
+      if (! (*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE) NULL))
+	ERREXIT(cinfo, JERR_CANT_SUSPEND);
+    }
+    (*cinfo->master->finish_pass) (cinfo);
+  }
+  /* Write EOI, do final cleanup */
+  (*cinfo->marker->write_file_trailer) (cinfo);
+  (*cinfo->dest->term_destination) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+}
+
+
+/*
+ * Write a special marker.
+ * This is only recommended for writing COM or APPn markers.
+ * Must be called after jpeg_start_compress() and before
+ * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
+ */
+
+GLOBAL(void)
+jpeg_write_marker (j_compress_ptr cinfo, int marker,
+		   const JOCTET *dataptr, unsigned int datalen)
+{
+  JMETHOD(void, write_marker_byte, (j_compress_ptr info, int val));
+
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+  write_marker_byte = cinfo->marker->write_marker_byte;	/* copy for speed */
+  while (datalen--) {
+    (*write_marker_byte) (cinfo, *dataptr);
+    dataptr++;
+  }
+}
+
+/* Same, but piecemeal. */
+
+GLOBAL(void)
+jpeg_write_m_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+{
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+}
+
+GLOBAL(void)
+jpeg_write_m_byte (j_compress_ptr cinfo, int val)
+{
+  (*cinfo->marker->write_marker_byte) (cinfo, val);
+}
+
+
+/*
+ * Alternate compression function: just write an abbreviated table file.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * To produce a pair of files containing abbreviated tables and abbreviated
+ * image data, one would proceed as follows:
+ *
+ *		initialize JPEG object
+ *		set JPEG parameters
+ *		set destination to table file
+ *		jpeg_write_tables(cinfo);
+ *		set destination to image file
+ *		jpeg_start_compress(cinfo, FALSE);
+ *		write data...
+ *		jpeg_finish_compress(cinfo);
+ *
+ * jpeg_write_tables has the side effect of marking all tables written
+ * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
+ * will not re-emit the tables unless it is passed write_all_tables=TRUE.
+ */
+
+GLOBAL(void)
+jpeg_write_tables (j_compress_ptr cinfo)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Initialize the marker writer ... bit of a crock to do it here. */
+  jinit_marker_writer(cinfo);
+  /* Write them tables! */
+  (*cinfo->marker->write_tables_only) (cinfo);
+  /* And clean up. */
+  (*cinfo->dest->term_destination) (cinfo);
+  /*
+   * In library releases up through v6a, we called jpeg_abort() here to free
+   * any working memory allocated by the destination manager and marker
+   * writer.  Some applications had a problem with that: they allocated space
+   * of their own from the library memory manager, and didn't want it to go
+   * away during write_tables.  So now we do nothing.  This will cause a
+   * memory leak if an app calls write_tables repeatedly without doing a full
+   * compression cycle or otherwise resetting the JPEG object.  However, that
+   * seems less bad than unexpectedly freeing memory in the normal case.
+   * An app that prefers the old behavior can call jpeg_abort for itself after
+   * each call to jpeg_write_tables().
+   */
+}
diff --git a/src/libjpeg/jcapistd.c b/src/libjpeg/jcapistd.c
new file mode 100644
index 0000000..c0320b1
--- /dev/null
+++ b/src/libjpeg/jcapistd.c
@@ -0,0 +1,161 @@
+/*
+ * jcapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-compression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_compress, it will end up linking in the entire compressor.
+ * We thus must separate this file from jcapimin.c to avoid linking the
+ * whole compression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Compression initialization.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * We require a write_all_tables parameter as a failsafe check when writing
+ * multiple datastreams from the same compression object.  Since prior runs
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
+ * would emit an abbreviated stream (no tables) by default.  This may be what
+ * is wanted, but for safety's sake it should not be the default behavior:
+ * programmers should have to make a deliberate choice to emit abbreviated
+ * images.  Therefore the documentation and examples should encourage people
+ * to pass write_all_tables=TRUE; then it will take active thought to do the
+ * wrong thing.
+ */
+
+GLOBAL(void)
+jpeg_start_compress (j_compress_ptr cinfo, boolean write_all_tables)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (write_all_tables)
+    jpeg_suppress_tables(cinfo, FALSE);	/* mark all tables to be written */
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  jinit_compress_master(cinfo);
+  /* Set up for the first pass */
+  (*cinfo->master->prepare_for_pass) (cinfo);
+  /* Ready for application to drive first pass through jpeg_write_scanlines
+   * or jpeg_write_raw_data.
+   */
+  cinfo->next_scanline = 0;
+  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
+}
+
+
+/*
+ * Write some scanlines of data to the JPEG compressor.
+ *
+ * The return value will be the number of lines actually written.
+ * This should be less than the supplied num_lines only in case that
+ * the data destination module has requested suspension of the compressor,
+ * or if more than image_height scanlines are passed in.
+ *
+ * Note: we warn about excess calls to jpeg_write_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * excess scanlines passed in the last valid call are *silently* ignored,
+ * so that the application need not adjust num_lines for end-of-image
+ * when using a multiple-scanline buffer.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_scanlines (j_compress_ptr cinfo, JSAMPARRAY scanlines,
+		      JDIMENSION num_lines)
+{
+  JDIMENSION row_ctr, rows_left;
+
+  if (cinfo->global_state != CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height)
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_scanlines.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Ignore any extra scanlines at bottom of image. */
+  rows_left = cinfo->image_height - cinfo->next_scanline;
+  if (num_lines > rows_left)
+    num_lines = rows_left;
+
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  cinfo->next_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to write raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_write_raw_data (j_compress_ptr cinfo, JSAMPIMAGE data,
+		     JDIMENSION num_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != CSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and jpeg_write_raw_data.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Verify that at least one iMCU row has been passed. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  if (num_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Directly compress the row. */
+  if (! (*cinfo->coef->compress_data) (cinfo, data)) {
+    /* If compressor did not consume the whole row, suspend processing. */
+    return 0;
+  }
+
+  /* OK, we processed one iMCU row. */
+  cinfo->next_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
diff --git a/src/libjpeg/jccoefct.c b/src/libjpeg/jccoefct.c
new file mode 100644
index 0000000..1963ddb
--- /dev/null
+++ b/src/libjpeg/jccoefct.c
@@ -0,0 +1,449 @@
+/*
+ * jccoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for compression.
+ * This controller is the top level of the JPEG compressor proper.
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* We use a full-image coefficient buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the DCT
+ * step is run during the first pass, and subsequent passes need only read
+ * the buffered coefficients.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* For single-pass compression, it's sufficient to buffer just one MCU
+   * (although this may prove a bit slow in practice).  We allocate a
+   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
+   * MCU constructed and sent.  (On 80x86, the workspace is FAR even though
+   * it's not really very big; this is to keep the module interfaces unchanged
+   * when a large coefficient buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays.
+   */
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+METHODDEF(boolean) compress_output
+    JPP((j_compress_ptr cinfo, JSAMPIMAGE input_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (coef->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_data;
+    break;
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub.compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, bi, ci, yindex, yoffset, blockcnt;
+  JDIMENSION ypos, xpos;
+  jpeg_component_info *compptr;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Determine where data comes from in input_buf and do the DCT thing.
+       * Each call on forward_DCT processes a horizontal row of DCT blocks
+       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
+       * sequentially.  Dummy blocks at the right or bottom edge are filled in
+       * specially.  The data in them does not matter for image reconstruction,
+       * so we fill them with values that will encode to the smallest amount of
+       * data, viz: all zeroes in the AC entries, DC entries equal to previous
+       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
+       */
+      blkn = 0;
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	xpos = MCU_col_num * compptr->MCU_sample_width;
+	ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+					 input_buf[compptr->component_index],
+					 coef->MCU_buffer[blkn],
+					 ypos, xpos, (JDIMENSION) blockcnt);
+	    if (blockcnt < compptr->MCU_width) {
+	      /* Create some dummy blocks at the right edge of the image. */
+	      jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt],
+			(compptr->MCU_width - blockcnt) * SIZEOF(JBLOCK));
+	      for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+		coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn+bi-1][0][0];
+	      }
+	    }
+	  } else {
+	    /* Create a row of dummy blocks at the bottom of the image. */
+	    jzero_far((void FAR *) coef->MCU_buffer[blkn],
+		      compptr->MCU_width * SIZEOF(JBLOCK));
+	    for (bi = 0; bi < compptr->MCU_width; bi++) {
+	      coef->MCU_buffer[blkn+bi][0][0] = coef->MCU_buffer[blkn-1][0][0];
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  ypos += DCTSIZE;
+	}
+      }
+      /* Try to write the MCU.  In event of a suspension failure, we will
+       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
+       */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * This amount of data is read from the source buffer, DCT'd and quantized,
+ * and saved into the virtual arrays.  We also generate suitable dummy blocks
+ * as needed at the right and lower edges.  (The dummy blocks are constructed
+ * in the virtual arrays, which have been padded appropriately.)  This makes
+ * it possible for subsequent passes not to worry about real vs. dummy blocks.
+ *
+ * We must also emit the data to the entropy encoder.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All
+ * components are DCT'd and loaded into the virtual arrays in this pass.
+ * However, it may be that only a subset of the components are emitted to
+ * the entropy encoder during this first pass; be careful about looking
+ * at the scan-dependent variables (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION blocks_across, MCUs_across, MCUindex;
+  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
+  JCOEF lastDC;
+  jpeg_component_info *compptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW thisblockrow, lastblockrow;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (coef->iMCU_row_num < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    blocks_across = compptr->width_in_blocks;
+    h_samp_factor = compptr->h_samp_factor;
+    /* Count number of dummy blocks to be added at the right margin. */
+    ndummy = (int) (blocks_across % h_samp_factor);
+    if (ndummy > 0)
+      ndummy = h_samp_factor - ndummy;
+    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
+     * on forward_DCT processes a complete horizontal row of DCT blocks.
+     */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      thisblockrow = buffer[block_row];
+      (*cinfo->fdct->forward_DCT) (cinfo, compptr,
+				   input_buf[ci], thisblockrow,
+				   (JDIMENSION) (block_row * DCTSIZE),
+				   (JDIMENSION) 0, blocks_across);
+      if (ndummy > 0) {
+	/* Create dummy blocks at the right edge of the image. */
+	thisblockrow += blocks_across; /* => first dummy block */
+	jzero_far((void FAR *) thisblockrow, ndummy * SIZEOF(JBLOCK));
+	lastDC = thisblockrow[-1][0];
+	for (bi = 0; bi < ndummy; bi++) {
+	  thisblockrow[bi][0] = lastDC;
+	}
+      }
+    }
+    /* If at end of image, create dummy block rows as needed.
+     * The tricky part here is that within each MCU, we want the DC values
+     * of the dummy blocks to match the last real block's DC value.
+     * This squeezes a few more bytes out of the resulting file...
+     */
+    if (coef->iMCU_row_num == last_iMCU_row) {
+      blocks_across += ndummy;	/* include lower right corner */
+      MCUs_across = blocks_across / h_samp_factor;
+      for (block_row = block_rows; block_row < compptr->v_samp_factor;
+	   block_row++) {
+	thisblockrow = buffer[block_row];
+	lastblockrow = buffer[block_row-1];
+	jzero_far((void FAR *) thisblockrow,
+		  (size_t) (blocks_across * SIZEOF(JBLOCK)));
+	for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+	  lastDC = lastblockrow[h_samp_factor-1][0];
+	  for (bi = 0; bi < h_samp_factor; bi++) {
+	    thisblockrow[bi][0] = lastDC;
+	  }
+	  thisblockrow += h_samp_factor; /* advance to next MCU in row */
+	  lastblockrow += h_samp_factor;
+	}
+      }
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the entropy encoder, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+#endif /* FULL_COEF_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_coef_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    int ci;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/src/libjpeg/jccolor.c b/src/libjpeg/jccolor.c
new file mode 100644
index 0000000..0a8a4b5
--- /dev/null
+++ b/src/libjpeg/jccolor.c
@@ -0,0 +1,459 @@
+/*
+ * jccolor.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_converter pub; /* public fields */
+
+  /* Private state for RGB->YCC conversion */
+  INT32 * rgb_ycc_tab;		/* => table for RGB to YCbCr conversion */
+} my_color_converter;
+
+typedef my_color_converter * my_cconvert_ptr;
+
+
+/**************** RGB -> YCbCr conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
+ *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
+ * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
+ * were not represented exactly.  Now we sacrifice exact representation of
+ * maximum red and maximum blue in order to get exact grayscales.
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times R,G,B for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * in the tables to save adding them separately in the inner loop.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define CBCR_OFFSET	((INT32) CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+/* We allocate one big table and divide it up into eight parts, instead of
+ * doing eight alloc_small requests.  This lets us use a single table base
+ * address, which can be held in a register in the inner loops on many
+ * machines (more than can hold all eight addresses, anyway).
+ */
+
+#define R_Y_OFF		0			/* offset to R => Y section */
+#define G_Y_OFF		(1*(MAXJSAMPLE+1))	/* offset to G => Y section */
+#define B_Y_OFF		(2*(MAXJSAMPLE+1))	/* etc. */
+#define R_CB_OFF	(3*(MAXJSAMPLE+1))
+#define G_CB_OFF	(4*(MAXJSAMPLE+1))
+#define B_CB_OFF	(5*(MAXJSAMPLE+1))
+#define R_CR_OFF	B_CB_OFF		/* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF	(6*(MAXJSAMPLE+1))
+#define B_CR_OFF	(7*(MAXJSAMPLE+1))
+#define TABLE_SIZE	(8*(MAXJSAMPLE+1))
+
+
+/*
+ * Initialize for RGB->YCC colorspace conversion.
+ */
+
+METHODDEF(void)
+rgb_ycc_start (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  INT32 * rgb_ycc_tab;
+  INT32 i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(TABLE_SIZE * SIZEOF(INT32)));
+
+  for (i = 0; i <= MAXJSAMPLE; i++) {
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to MAXJSAMPLE
+     * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+/*  B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+*/
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+rgb_ycc_convert (j_compress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		 JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+METHODDEF(void)
+rgb_gray_convert (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		  JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles Adobe-style CMYK->YCCK conversion,
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume rgb_ycc_start has been called.
+ */
+
+METHODDEF(void)
+cmyk_ycck_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    outptr3 = output_buf[3][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = MAXJSAMPLE - GETJSAMPLE(inptr[0]);
+      g = MAXJSAMPLE - GETJSAMPLE(inptr[1]);
+      b = MAXJSAMPLE - GETJSAMPLE(inptr[2]);
+      /* K passes through as-is */
+      outptr3[col] = inptr[3];	/* don't need GETJSAMPLE here */
+      inptr += 4;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles grayscale output with no conversion.
+ * The source can be either plain grayscale or YCbCr (since Y == gray).
+ */
+
+METHODDEF(void)
+grayscale_convert (j_compress_ptr cinfo,
+		   JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+		   JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+  int instride = cinfo->input_components;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      outptr[col] = inptr[0];	/* don't need GETJSAMPLE() here */
+      inptr += instride;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles multi-component colorspaces without conversion.
+ * We assume input_components == num_components.
+ */
+
+METHODDEF(void)
+null_convert (j_compress_ptr cinfo,
+	      JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+	      JDIMENSION output_row, int num_rows)
+{
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  register int ci;
+  int nc = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    /* It seems fastest to make a separate pass for each component. */
+    for (ci = 0; ci < nc; ci++) {
+      inptr = *input_buf;
+      outptr = output_buf[ci][output_row];
+      for (col = 0; col < num_cols; col++) {
+	outptr[col] = inptr[ci]; /* don't need GETJSAMPLE() here */
+	inptr += nc;
+      }
+    }
+    input_buf++;
+    output_row++;
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+null_method (j_compress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for input colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_converter (j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_converter));
+  cinfo->cconvert = (struct jpeg_color_converter *) cconvert;
+  /* set start_pass to null method until we find out differently */
+  cconvert->pub.start_pass = null_method;
+
+  /* Make sure input_components agrees with in_color_space */
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->input_components != 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+#if RGB_PIXELSIZE != 3
+    if (cinfo->input_components != RGB_PIXELSIZE)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+#endif /* else share code with YCbCr */
+
+  case JCS_YCbCr:
+    if (cinfo->input_components != 3)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->input_components != 4)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->input_components < 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+  }
+
+  /* Check num_components, set conversion method based on requested space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_GRAYSCALE)
+      cconvert->pub.color_convert = grayscale_convert;
+    else if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_gray_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = grayscale_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_RGB) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = cmyk_ycck_convert;
+    } else if (cinfo->in_color_space == JCS_YCCK)
+      cconvert->pub.color_convert = null_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:			/* allow null conversion of JCS_UNKNOWN */
+    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
+	cinfo->num_components != cinfo->input_components)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    cconvert->pub.color_convert = null_convert;
+    break;
+  }
+}
diff --git a/src/libjpeg/jcdctmgr.c b/src/libjpeg/jcdctmgr.c
new file mode 100644
index 0000000..61fa79b
--- /dev/null
+++ b/src/libjpeg/jcdctmgr.c
@@ -0,0 +1,387 @@
+/*
+ * jcdctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the forward-DCT management logic.
+ * This code selects a particular DCT implementation to be used,
+ * and it performs related housekeeping chores including coefficient
+ * quantization.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_forward_dct pub;	/* public fields */
+
+  /* Pointer to the DCT routine actually in use */
+  forward_DCT_method_ptr do_dct;
+
+  /* The actual post-DCT divisors --- not identical to the quant table
+   * entries, because of scaling (especially for an unnormalized DCT).
+   * Each table is given in normal array order.
+   */
+  DCTELEM * divisors[NUM_QUANT_TBLS];
+
+#ifdef DCT_FLOAT_SUPPORTED
+  /* Same as above for the floating-point case. */
+  float_DCT_method_ptr do_float_dct;
+  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
+#endif
+} my_fdct_controller;
+
+typedef my_fdct_controller * my_fdct_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ * Verify that all referenced Q-tables are present, and set up
+ * the divisor table for each one.
+ * In the current implementation, DCT of all components is done during
+ * the first pass, even if only some components will be output in the
+ * first scan.  Hence all components should be examined here.
+ */
+
+METHODDEF(void)
+start_pass_fdctmgr (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  int ci, qtblno, i;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+  DCTELEM * dtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    qtblno = compptr->quant_tbl_no;
+    /* Make sure specified quantization table is present */
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    qtbl = cinfo->quant_tbl_ptrs[qtblno];
+    /* Compute divisors for this quant table */
+    /* We may do this more than once for same table, but it's not a big deal */
+    switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+    case JDCT_ISLOW:
+      /* For LL&M IDCT method, divisors are equal to raw quantization
+       * coefficients multiplied by 8 (to counteract scaling).
+       */
+      if (fdct->divisors[qtblno] == NULL) {
+	fdct->divisors[qtblno] = (DCTELEM *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      DCTSIZE2 * SIZEOF(DCTELEM));
+      }
+      dtbl = fdct->divisors[qtblno];
+      for (i = 0; i < DCTSIZE2; i++) {
+	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 */
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	if (fdct->divisors[qtblno] == NULL) {
+	  fdct->divisors[qtblno] = (DCTELEM *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(DCTELEM));
+	}
+	dtbl = fdct->divisors[qtblno];
+	for (i = 0; i < DCTSIZE2; i++) {
+	  dtbl[i] = (DCTELEM)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-3);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, divisors are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * We apply a further scale factor of 8.
+	 * What's actually stored is 1/divisor so that the inner loop can
+	 * use a multiplication rather than a division.
+	 */
+	FAST_FLOAT * fdtbl;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	if (fdct->float_divisors[qtblno] == NULL) {
+	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+					DCTSIZE2 * SIZEOF(FAST_FLOAT));
+	}
+	fdtbl = fdct->float_divisors[qtblno];
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fdtbl[i] = (FAST_FLOAT)
+	      (1.0 / (((double) qtbl->quantval[i] *
+		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Perform forward DCT on one or more blocks of a component.
+ *
+ * The input samples are taken from the sample_data[] array starting at
+ * position start_row/start_col, and moving to the right for any additional
+ * blocks. The quantized coefficients are returned in coef_blocks[].
+ */
+
+METHODDEF(void)
+forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
+	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+	     JDIMENSION start_row, JDIMENSION start_col,
+	     JDIMENSION num_blocks)
+/* This version is used for integer DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  forward_DCT_method_ptr do_dct = fdct->do_dct;
+  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    { register DCTELEM *workspaceptr;
+      register JSAMPROW elemptr;
+      register int elemr;
+
+      workspaceptr = workspace;
+      for (elemr = 0; elemr < DCTSIZE; elemr++) {
+	elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8		/* unroll the inner loop */
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	*workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+#else
+	{ register int elemc;
+	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+	    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
+	  }
+	}
+#endif
+      }
+    }
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register DCTELEM temp, qval;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	qval = divisors[i];
+	temp = workspace[i];
+	/* Divide the coefficient value by qval, ensuring proper rounding.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 *
+	 * In most files, at least half of the output values will be zero
+	 * (at default quantization settings, more like three-quarters...)
+	 * so we should ensure that this case is fast.  On many machines,
+	 * a comparison is enough cheaper than a divide to make a special test
+	 * a win.  Since both inputs will be nonnegative, we need only test
+	 * for a < b to discover whether a/b is 0.
+	 * If your machine's division is fast enough, define FAST_DIVIDE.
+	 */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a,b)	a /= b
+#else
+#define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
+#endif
+	if (temp < 0) {
+	  temp = -temp;
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	  temp = -temp;
+	} else {
+	  temp += qval>>1;	/* for rounding */
+	  DIVIDE_BY(temp, qval);
+	}
+	output_ptr[i] = (JCOEF) temp;
+      }
+    }
+  }
+}
+
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+METHODDEF(void)
+forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+		   JDIMENSION start_row, JDIMENSION start_col,
+		   JDIMENSION num_blocks)
+/* This version is used for floating-point DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
+  float_DCT_method_ptr do_dct = fdct->do_float_dct;
+  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
+  JDIMENSION bi;
+
+  sample_data += start_row;	/* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    { register FAST_FLOAT *workspaceptr;
+      register JSAMPROW elemptr;
+      register int elemr;
+
+      workspaceptr = workspace;
+      for (elemr = 0; elemr < DCTSIZE; elemr++) {
+	elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8		/* unroll the inner loop */
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	*workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+#else
+	{ register int elemc;
+	  for (elemc = DCTSIZE; elemc > 0; elemc--) {
+	    *workspaceptr++ = (FAST_FLOAT)
+	      (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
+	  }
+	}
+#endif
+      }
+    }
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    { register FAST_FLOAT temp;
+      register int i;
+      register JCOEFPTR output_ptr = coef_blocks[bi];
+
+      for (i = 0; i < DCTSIZE2; i++) {
+	/* Apply the quantization and scaling factor */
+	temp = workspace[i] * divisors[i];
+	/* Round to nearest integer.
+	 * Since C does not specify the direction of rounding for negative
+	 * quotients, we have to force the dividend positive for portability.
+	 * The maximum coefficient size is +-16K (for 12-bit data), so this
+	 * code should work for either 16-bit or 32-bit ints.
+	 */
+	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
+      }
+    }
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
+
+
+/*
+ * Initialize FDCT manager.
+ */
+
+GLOBAL(void)
+jinit_forward_dct (j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct;
+  int i;
+
+  fdct = (my_fdct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_fdct_controller));
+  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
+  fdct->pub.start_pass = start_pass_fdctmgr;
+
+  switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+    fdct->pub.forward_DCT = forward_DCT;
+    fdct->do_dct = jpeg_fdct_islow;
+    break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+    fdct->pub.forward_DCT = forward_DCT;
+    fdct->do_dct = jpeg_fdct_ifast;
+    break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+    fdct->pub.forward_DCT = forward_DCT_float;
+    fdct->do_float_dct = jpeg_fdct_float;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+
+  /* Mark divisor tables unallocated */
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    fdct->divisors[i] = NULL;
+#ifdef DCT_FLOAT_SUPPORTED
+    fdct->float_divisors[i] = NULL;
+#endif
+  }
+}
diff --git a/src/libjpeg/jchuff.c b/src/libjpeg/jchuff.c
new file mode 100644
index 0000000..f235250
--- /dev/null
+++ b/src/libjpeg/jchuff.c
@@ -0,0 +1,909 @@
+/*
+ * jchuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines.
+ *
+ * Much of the complexity here has to do with supporting output suspension.
+ * If the data destination module demands suspension, we want to be able to
+ * back up to the start of the current MCU.  To do this, we copy state
+ * variables into local working storage, and update them back to the
+ * permanent JPEG objects only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jchuff.h"		/* Declarations shared with jcphuff.c */
+
+
+/* Expanded entropy encoder object for Huffman encoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).put_buffer = (src).put_buffer, \
+	 (dest).put_bits = (src).put_bits, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  savable_state saved;		/* Bit buffer & DC state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+#ifdef ENTROPY_OPT_SUPPORTED	/* Statistics tables for optimization */
+  long * dc_count_ptrs[NUM_HUFF_TBLS];
+  long * ac_count_ptrs[NUM_HUFF_TBLS];
+#endif
+} huff_entropy_encoder;
+
+typedef huff_entropy_encoder * huff_entropy_ptr;
+
+/* Working state while writing an MCU.
+ * This struct contains all the fields that are needed by subroutines.
+ */
+
+typedef struct {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  savable_state cur;		/* Current bit buffer & DC state */
+  j_compress_ptr cinfo;		/* dump_buffer needs access to this */
+} working_state;
+
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo,
+					JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo));
+#ifdef ENTROPY_OPT_SUPPORTED
+METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo,
+					  JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo));
+#endif
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ * If gather_statistics is TRUE, we do not output anything during the scan,
+ * just count the Huffman symbols used and generate Huffman code tables.
+ */
+
+METHODDEF(void)
+start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, dctbl, actbl;
+  jpeg_component_info * compptr;
+
+  if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->pub.encode_mcu = encode_mcu_gather;
+    entropy->pub.finish_pass = finish_pass_gather;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    entropy->pub.encode_mcu = encode_mcu_huff;
+    entropy->pub.finish_pass = finish_pass_huff;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+      /* Check for invalid table indexes */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
+	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+      if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
+	ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->dc_count_ptrs[dctbl] == NULL)
+	entropy->dc_count_ptrs[dctbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long));
+      if (entropy->ac_count_ptrs[actbl] == NULL)
+	entropy->ac_count_ptrs[actbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long));
+#endif
+    } else {
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
+			      & entropy->dc_derived_tbls[dctbl]);
+      jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
+			      & entropy->ac_derived_tbls[actbl]);
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bit buffer to empty */
+  entropy->saved.put_buffer = 0;
+  entropy->saved.put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jcphuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
+			 c_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  c_derived_tbl *dtbl;
+  int p, i, l, lastp, si, maxsymbol;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (c_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(c_derived_tbl));
+  dtbl = *pdtbl;
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  lastp = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+  
+  /* Figure C.3: generate encoding tables */
+  /* These are code and size indexed by symbol value */
+
+  /* Set all codeless symbols to have code length 0;
+   * this lets us detect duplicate VAL entries here, and later
+   * allows emit_bits to detect any attempt to emit such symbols.
+   */
+  MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi));
+
+  /* This is also a convenient place to check for out-of-range
+   * and duplicated VAL entries.  We allow 0..255 for AC symbols
+   * but only 0..15 for DC.  (We could constrain them further
+   * based on data depth and mode, but this seems enough.)
+   */
+  maxsymbol = isDC ? 15 : 255;
+
+  for (p = 0; p < lastp; p++) {
+    i = htbl->huffval[p];
+    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    dtbl->ehufco[i] = huffcode[p];
+    dtbl->ehufsi[i] = huffsize[p];
+  }
+}
+
+
+/* Outputting bytes to the file */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte(state,val,action)  \
+	{ *(state)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(state)->free_in_buffer == 0)  \
+	    if (! dump_buffer(state))  \
+	      { action; } }
+
+
+LOCAL(boolean)
+dump_buffer (working_state * state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+  struct jpeg_destination_mgr * dest = state->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (state->cinfo))
+    return FALSE;
+  /* After a successful buffer dump, must reset buffer pointers */
+  state->next_output_byte = dest->next_output_byte;
+  state->free_in_buffer = dest->free_in_buffer;
+  return TRUE;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(boolean)
+emit_bits (working_state * state, unsigned int code, int size)
+/* Emit some bits; return TRUE if successful, FALSE if must suspend */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = state->cur.put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+  
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
+  
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+    
+    emit_byte(state, c, return FALSE);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte(state, 0, return FALSE);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  state->cur.put_buffer = put_buffer; /* update state variables */
+  state->cur.put_bits = put_bits;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+flush_bits (working_state * state)
+{
+  if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
+    return FALSE;
+  state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
+  state->cur.put_bits = 0;
+  return TRUE;
+}
+
+
+/* Encode a single block's worth of coefficients */
+
+LOCAL(boolean)
+encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
+		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+  register int temp, temp2;
+  register int nbits;
+  register int k, r, i;
+  
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+  
+  temp = temp2 = block[0] - last_dc_val;
+
+  if (temp < 0) {
+    temp = -temp;		/* temp is abs value of input */
+    /* For a negative input, want temp2 = bitwise complement of abs(input) */
+    /* This code assumes we are on a two's complement machine */
+    temp2--;
+  }
+  
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > MAX_COEF_BITS+1)
+    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+  
+  /* Emit the Huffman-coded symbol for the number of bits */
+  if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
+    return FALSE;
+
+  /* Emit that number of bits of the value, if positive, */
+  /* or the complement of its magnitude, if negative. */
+  if (nbits)			/* emit_bits rejects calls with size 0 */
+    if (! emit_bits(state, (unsigned int) temp2, nbits))
+      return FALSE;
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+  
+  r = 0;			/* r = run length of zeros */
+  
+  for (k = 1; k < DCTSIZE2; k++) {
+    if ((temp = block[jpeg_natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+	if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
+	  return FALSE;
+	r -= 16;
+      }
+
+      temp2 = temp;
+      if (temp < 0) {
+	temp = -temp;		/* temp is abs value of input */
+	/* This code assumes we are on a two's complement machine */
+	temp2--;
+      }
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;		/* there must be at least one 1 bit */
+      while ((temp >>= 1))
+	nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > MAX_COEF_BITS)
+	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+      
+      /* Emit Huffman symbol for run length / number of bits */
+      i = (r << 4) + nbits;
+      if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i]))
+	return FALSE;
+
+      /* Emit that number of bits of the value, if positive, */
+      /* or the complement of its magnitude, if negative. */
+      if (! emit_bits(state, (unsigned int) temp2, nbits))
+	return FALSE;
+      
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0]))
+      return FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(boolean)
+emit_restart (working_state * state, int restart_num)
+{
+  int ci;
+
+  if (! flush_bits(state))
+    return FALSE;
+
+  emit_byte(state, 0xFF, return FALSE);
+  emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
+    state->cur.last_dc_val[ci] = 0;
+
+  /* The restart counter is not updated until we successfully write the MCU. */
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of Huffman-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Load up working state */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cinfo = cinfo;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! emit_restart(&state, entropy->next_restart_num))
+	return FALSE;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    if (! encode_one_block(&state,
+			   MCU_data[blkn][0], state.cur.last_dc_val[ci],
+			   entropy->dc_derived_tbls[compptr->dc_tbl_no],
+			   entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+      return FALSE;
+    /* Update last_dc_val */
+    state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  /* Completed MCU, so update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  ASSIGN_STATE(entropy->saved, state.cur);
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  working_state state;
+
+  /* Load up working state ... flush_bits needs it */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  ASSIGN_STATE(state.cur, entropy->saved);
+  state.cinfo = cinfo;
+
+  /* Flush out the last data */
+  if (! flush_bits(&state))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  ASSIGN_STATE(entropy->saved, state.cur);
+}
+
+
+/*
+ * Huffman coding optimization.
+ *
+ * We first scan the supplied data and count the number of uses of each symbol
+ * that is to be Huffman-coded. (This process MUST agree with the code above.)
+ * Then we build a Huffman coding tree for the observed counts.
+ * Symbols which are not needed at all for the particular image are not
+ * assigned any code, which saves space in the DHT marker as well as in
+ * the compressed data.
+ */
+
+#ifdef ENTROPY_OPT_SUPPORTED
+
+
+/* Process a single block's worth of coefficients */
+
+LOCAL(void)
+htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
+		 long dc_counts[], long ac_counts[])
+{
+  register int temp;
+  register int nbits;
+  register int k, r;
+  
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+  
+  temp = block[0] - last_dc_val;
+  if (temp < 0)
+    temp = -temp;
+  
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > MAX_COEF_BITS+1)
+    ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+  /* Count the Huffman symbol for the number of bits */
+  dc_counts[nbits]++;
+  
+  /* Encode the AC coefficients per section F.1.2.2 */
+  
+  r = 0;			/* r = run length of zeros */
+  
+  for (k = 1; k < DCTSIZE2; k++) {
+    if ((temp = block[jpeg_natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+	ac_counts[0xF0]++;
+	r -= 16;
+      }
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      if (temp < 0)
+	temp = -temp;
+      
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;		/* there must be at least one 1 bit */
+      while ((temp >>= 1))
+	nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > MAX_COEF_BITS)
+	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+      
+      /* Count Huffman symbol for run length / number of bits */
+      ac_counts[(r << 4) + nbits]++;
+      
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    ac_counts[0]++;
+}
+
+
+/*
+ * Trial-encode one MCU's worth of Huffman-compressed coefficients.
+ * No data is actually output, so no suspension return is possible.
+ */
+
+METHODDEF(boolean)
+encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn, ci;
+  jpeg_component_info * compptr;
+
+  /* Take care of restart intervals if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      /* Re-initialize DC predictions to 0 */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+	entropy->saved.last_dc_val[ci] = 0;
+      /* Update restart state */
+      entropy->restarts_to_go = cinfo->restart_interval;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
+		    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+		    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Generate the best Huffman code table for the given counts, fill htbl.
+ * Note this is also used by jcphuff.c.
+ *
+ * The JPEG standard requires that no symbol be assigned a codeword of all
+ * one bits (so that padding bits added at the end of a compressed segment
+ * can't look like a valid code).  Because of the canonical ordering of
+ * codewords, this just means that there must be an unused slot in the
+ * longest codeword length category.  Section K.2 of the JPEG spec suggests
+ * reserving such a slot by pretending that symbol 256 is a valid symbol
+ * with count 1.  In theory that's not optimal; giving it count zero but
+ * including it in the symbol set anyway should give a better Huffman code.
+ * But the theoretically better code actually seems to come out worse in
+ * practice, because it produces more all-ones bytes (which incur stuffed
+ * zero bytes in the final file).  In any case the difference is tiny.
+ *
+ * The JPEG standard requires Huffman codes to be no more than 16 bits long.
+ * If some symbols have a very small but nonzero probability, the Huffman tree
+ * must be adjusted to meet the code length restriction.  We currently use
+ * the adjustment method suggested in JPEG section K.2.  This method is *not*
+ * optimal; it may not choose the best possible limited-length code.  But
+ * typically only very-low-frequency symbols will be given less-than-optimal
+ * lengths, so the code is almost optimal.  Experimental comparisons against
+ * an optimal limited-length-code algorithm indicate that the difference is
+ * microscopic --- usually less than a hundredth of a percent of total size.
+ * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
+ */
+
+GLOBAL(void)
+jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])
+{
+#define MAX_CLEN 32		/* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN+1];	/* bits[k] = # of symbols with code length k */
+  int codesize[257];		/* codesize[k] = code length of symbol k */
+  int others[257];		/* next symbol in current branch of tree */
+  int c1, c2;
+  int p, i, j;
+  long v;
+
+  /* This algorithm is explained in section K.2 of the JPEG standard */
+
+  MEMZERO(bits, SIZEOF(bits));
+  MEMZERO(codesize, SIZEOF(codesize));
+  for (i = 0; i < 257; i++)
+    others[i] = -1;		/* init links to empty */
+  
+  freq[256] = 1;		/* make sure 256 has a nonzero count */
+  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
+   * that no real symbol is given code-value of all ones, because 256
+   * will be placed last in the largest codeword category.
+   */
+
+  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
+
+  for (;;) {
+    /* Find the smallest nonzero frequency, set c1 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c1 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v) {
+	v = freq[i];
+	c1 = i;
+      }
+    }
+
+    /* Find the next smallest nonzero frequency, set c2 = its symbol */
+    /* In case of ties, take the larger symbol number */
+    c2 = -1;
+    v = 1000000000L;
+    for (i = 0; i <= 256; i++) {
+      if (freq[i] && freq[i] <= v && i != c1) {
+	v = freq[i];
+	c2 = i;
+      }
+    }
+
+    /* Done if we've merged everything into one frequency */
+    if (c2 < 0)
+      break;
+    
+    /* Else merge the two counts/trees */
+    freq[c1] += freq[c2];
+    freq[c2] = 0;
+
+    /* Increment the codesize of everything in c1's tree branch */
+    codesize[c1]++;
+    while (others[c1] >= 0) {
+      c1 = others[c1];
+      codesize[c1]++;
+    }
+    
+    others[c1] = c2;		/* chain c2 onto c1's tree branch */
+    
+    /* Increment the codesize of everything in c2's tree branch */
+    codesize[c2]++;
+    while (others[c2] >= 0) {
+      c2 = others[c2];
+      codesize[c2]++;
+    }
+  }
+
+  /* Now count the number of symbols of each code length */
+  for (i = 0; i <= 256; i++) {
+    if (codesize[i]) {
+      /* The JPEG standard seems to think that this can't happen, */
+      /* but I'm paranoid... */
+      if (codesize[i] > MAX_CLEN)
+	ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+
+      bits[codesize[i]]++;
+    }
+  }
+
+  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
+   * Huffman procedure assigned any such lengths, we must adjust the coding.
+   * Here is what the JPEG spec says about how this next bit works:
+   * Since symbols are paired for the longest Huffman code, the symbols are
+   * removed from this length category two at a time.  The prefix for the pair
+   * (which is one bit shorter) is allocated to one of the pair; then,
+   * skipping the BITS entry for that prefix length, a code word from the next
+   * shortest nonzero BITS entry is converted into a prefix for two code words
+   * one bit longer.
+   */
+  
+  for (i = MAX_CLEN; i > 16; i--) {
+    while (bits[i] > 0) {
+      j = i - 2;		/* find length of new prefix to be used */
+      while (bits[j] == 0)
+	j--;
+      
+      bits[i] -= 2;		/* remove two symbols */
+      bits[i-1]++;		/* one goes in this length */
+      bits[j+1] += 2;		/* two new symbols in this length */
+      bits[j]--;		/* symbol of this length is now a prefix */
+    }
+  }
+
+  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
+  while (bits[i] == 0)		/* find largest codelength still in use */
+    i--;
+  bits[i]--;
+  
+  /* Return final symbol counts (only for lengths 0..16) */
+  MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits));
+  
+  /* Return a list of the symbols sorted by code length */
+  /* It's not real clear to me why we don't need to consider the codelength
+   * changes made above, but the JPEG spec seems to think this works.
+   */
+  p = 0;
+  for (i = 1; i <= MAX_CLEN; i++) {
+    for (j = 0; j <= 255; j++) {
+      if (codesize[j] == i) {
+	htbl->huffval[p] = (UINT8) j;
+	p++;
+      }
+    }
+  }
+
+  /* Set sent_table FALSE so updated table will be written to JPEG file. */
+  htbl->sent_table = FALSE;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, dctbl, actbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did_dc[NUM_HUFF_TBLS];
+  boolean did_ac[NUM_HUFF_TBLS];
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  MEMZERO(did_dc, SIZEOF(did_dc));
+  MEMZERO(did_ac, SIZEOF(did_ac));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    if (! did_dc[dctbl]) {
+      htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl];
+      if (*htblptr == NULL)
+	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
+      did_dc[dctbl] = TRUE;
+    }
+    if (! did_ac[actbl]) {
+      htblptr = & cinfo->ac_huff_tbl_ptrs[actbl];
+      if (*htblptr == NULL)
+	*htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
+      did_ac[actbl] = TRUE;
+    }
+  }
+}
+
+
+#endif /* ENTROPY_OPT_SUPPORTED */
+
+
+/*
+ * Module initialization routine for Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_huff_encoder (j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
+#endif
+  }
+}
diff --git a/src/libjpeg/jchuff.h b/src/libjpeg/jchuff.h
new file mode 100644
index 0000000..a9599fc
--- /dev/null
+++ b/src/libjpeg/jchuff.h
@@ -0,0 +1,47 @@
+/*
+ * jchuff.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for Huffman entropy encoding routines
+ * that are shared between the sequential encoder (jchuff.c) and the
+ * progressive encoder (jcphuff.c).  No other modules need to see these.
+ */
+
+/* The legal range of a DCT coefficient is
+ *  -1024 .. +1023  for 8-bit data;
+ * -16384 .. +16383 for 12-bit data.
+ * Hence the magnitude should always fit in 10 or 14 bits respectively.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MAX_COEF_BITS 10
+#else
+#define MAX_COEF_BITS 14
+#endif
+
+/* Derived data constructed for each Huffman table */
+
+typedef struct {
+  unsigned int ehufco[256];	/* code for each symbol */
+  char ehufsi[256];		/* length of code for each symbol */
+  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
+} c_derived_tbl;
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_make_c_derived_tbl	jMkCDerived
+#define jpeg_gen_optimal_table	jGenOptTbl
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_c_derived_tbl
+	JPP((j_compress_ptr cinfo, boolean isDC, int tblno,
+	     c_derived_tbl ** pdtbl));
+
+/* Generate an optimal table definition given the specified counts */
+EXTERN(void) jpeg_gen_optimal_table
+	JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]));
diff --git a/src/libjpeg/jcinit.c b/src/libjpeg/jcinit.c
new file mode 100644
index 0000000..5efffe3
--- /dev/null
+++ b/src/libjpeg/jcinit.c
@@ -0,0 +1,72 @@
+/*
+ * jcinit.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains initialization logic for the JPEG compressor.
+ * This routine is in charge of selecting the modules to be executed and
+ * making an initialization call to each one.
+ *
+ * Logically, this code belongs in jcmaster.c.  It's split out because
+ * linking this routine implies linking the entire compression library.
+ * For a transcoding-only application, we want to be able to use jcmaster.c
+ * without linking in the whole library.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Master selection of compression modules.
+ * This is done once at the start of processing an image.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ */
+
+GLOBAL(void)
+jinit_compress_master (j_compress_ptr cinfo)
+{
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, FALSE /* full compression */);
+
+  /* Preprocessing */
+  if (! cinfo->raw_data_in) {
+    jinit_color_converter(cinfo);
+    jinit_downsampler(cinfo);
+    jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+  }
+  /* Forward DCT */
+  jinit_forward_dct(cinfo);
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* Need a full-image coefficient buffer in any multi-pass mode. */
+  jinit_c_coef_controller(cinfo,
+		(boolean) (cinfo->num_scans > 1 || cinfo->optimize_coding));
+  jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
diff --git a/src/libjpeg/jcmainct.c b/src/libjpeg/jcmainct.c
new file mode 100644
index 0000000..e0279a7
--- /dev/null
+++ b/src/libjpeg/jcmainct.c
@@ -0,0 +1,293 @@
+/*
+ * jcmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for compression.
+ * The main buffer lies between the pre-processor and the JPEG
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Note: currently, there is no operating mode in which a full-image buffer
+ * is needed at this step.  If there were, that mode could not be used with
+ * "raw data" input, since this module is bypassed in that case.  However,
+ * we've left the code here for possible use in special applications.
+ */
+#undef FULL_MAIN_BUFFER_SUPPORTED
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_main_controller pub; /* public fields */
+
+  JDIMENSION cur_iMCU_row;	/* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;	/* counts row groups received in iMCU row */
+  boolean suspended;		/* remember if we suspended output */
+  J_BUF_MODE pass_mode;		/* current operating mode */
+
+  /* If using just a strip buffer, this points to the entire set of buffers
+   * (we allocate one for each component).  In the full-image case, this
+   * points to the currently accessible strips of the virtual arrays.
+   */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  /* If using full-image storage, this array holds pointers to virtual-array
+   * control blocks for each component.  Unused if not full-image storage.
+   */
+  jvirt_sarray_ptr whole_image[MAX_COMPONENTS];
+#endif
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+METHODDEF(void) process_data_buffer_main
+	JPP((j_compress_ptr cinfo, JSAMPARRAY input_buf,
+	     JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  /* Do nothing in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  main->cur_iMCU_row = 0;	/* initialize counters */
+  main->rowgroup_ctr = 0;
+  main->suspended = FALSE;
+  main->pass_mode = pass_mode;	/* save mode for use by process_data */
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    if (main->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+    main->pub.process_data = process_data_simple_main;
+    break;
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+  case JBUF_SAVE_SOURCE:
+  case JBUF_CRANK_DEST:
+  case JBUF_SAVE_AND_PASS:
+    if (main->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    main->pub.process_data = process_data_buffer_main;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This routine handles the simple pass-through mode,
+ * where we have only a strip buffer.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Read input data if we haven't filled the main buffer yet */
+    if (main->rowgroup_ctr < DCTSIZE)
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					main->buffer, &main->rowgroup_ctr,
+					(JDIMENSION) DCTSIZE);
+
+    /* If we don't have a full iMCU row buffered, return to application for
+     * more data.  Note that preprocessor will always pad to fill the iMCU row
+     * at the bottom of the image.
+     */
+    if (main->rowgroup_ctr != DCTSIZE)
+      return;
+
+    /* Send the completed row to the compressor */
+    if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+      /* If compressor did not consume the whole row, then we must need to
+       * suspend processing and return to the application.  In this situation
+       * we pretend we didn't yet consume the last input row; otherwise, if
+       * it happened to be the last row of the image, the application would
+       * think we were done.
+       */
+      if (! main->suspended) {
+	(*in_row_ctr)--;
+	main->suspended = TRUE;
+      }
+      return;
+    }
+    /* We did finish the row.  Undo our little suspension hack if a previous
+     * call suspended; then mark the main buffer empty.
+     */
+    if (main->suspended) {
+      (*in_row_ctr)++;
+      main->suspended = FALSE;
+    }
+    main->rowgroup_ctr = 0;
+    main->cur_iMCU_row++;
+  }
+}
+
+
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+
+/*
+ * Process some data.
+ * This routine handles all of the modes that use a full-size buffer.
+ */
+
+METHODDEF(void)
+process_data_buffer_main (j_compress_ptr cinfo,
+			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			  JDIMENSION in_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
+
+  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Realign the virtual buffers if at the start of an iMCU row. */
+    if (main->rowgroup_ctr == 0) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, main->whole_image[ci],
+	   main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
+	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
+      }
+      /* In a read pass, pretend we just read some source data. */
+      if (! writing) {
+	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
+	main->rowgroup_ctr = DCTSIZE;
+      }
+    }
+
+    /* If a write pass, read input data until the current iMCU row is full. */
+    /* Note: preprocessor will pad if necessary to fill the last iMCU row. */
+    if (writing) {
+      (*cinfo->prep->pre_process_data) (cinfo,
+					input_buf, in_row_ctr, in_rows_avail,
+					main->buffer, &main->rowgroup_ctr,
+					(JDIMENSION) DCTSIZE);
+      /* Return to application if we need more data to fill the iMCU row. */
+      if (main->rowgroup_ctr < DCTSIZE)
+	return;
+    }
+
+    /* Emit data, unless this is a sink-only pass. */
+    if (main->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+	/* If compressor did not consume the whole row, then we must need to
+	 * suspend processing and return to the application.  In this situation
+	 * we pretend we didn't yet consume the last input row; otherwise, if
+	 * it happened to be the last row of the image, the application would
+	 * think we were done.
+	 */
+	if (! main->suspended) {
+	  (*in_row_ctr)--;
+	  main->suspended = TRUE;
+	}
+	return;
+      }
+      /* We did finish the row.  Undo our little suspension hack if a previous
+       * call suspended; then mark the main buffer empty.
+       */
+      if (main->suspended) {
+	(*in_row_ctr)++;
+	main->suspended = FALSE;
+      }
+    }
+
+    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
+    main->rowgroup_ctr = 0;
+    main->cur_iMCU_row++;
+  }
+}
+
+#endif /* FULL_MAIN_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main;
+  int ci;
+  jpeg_component_info *compptr;
+
+  main = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = (struct jpeg_c_main_controller *) main;
+  main->pub.start_pass = start_pass_main;
+
+  /* We don't need to create a buffer in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  /* Create the buffer.  It holds downsampled data, so each component
+   * may be of a different size.
+   */
+  if (need_full_buffer) {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component */
+    /* Note we pad the bottom to a multiple of the iMCU height */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 compptr->width_in_blocks * DCTSIZE,
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor) * DCTSIZE,
+	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+#ifdef FULL_MAIN_BUFFER_SUPPORTED
+    main->whole_image[0] = NULL; /* flag for no virtual arrays */
+#endif
+    /* Allocate a strip buffer for each component */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 compptr->width_in_blocks * DCTSIZE,
+	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
+    }
+  }
+}
diff --git a/src/libjpeg/jcmarker.c b/src/libjpeg/jcmarker.c
new file mode 100644
index 0000000..3d1e6c6
--- /dev/null
+++ b/src/libjpeg/jcmarker.c
@@ -0,0 +1,664 @@
+/*
+ * jcmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to write JPEG datastream markers.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+  
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+  
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+  
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+  
+  M_DHT   = 0xc4,
+  
+  M_DAC   = 0xcc,
+  
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+  
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+  
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+  
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+  
+  M_TEM   = 0x01,
+  
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_writer pub; /* public fields */
+
+  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
+} my_marker_writer;
+
+typedef my_marker_writer * my_marker_ptr;
+
+
+/*
+ * Basic output routines.
+ *
+ * Note that we do not support suspension while writing a marker.
+ * Therefore, an application using suspension must ensure that there is
+ * enough buffer space for the initial markers (typ. 600-700 bytes) before
+ * calling jpeg_start_compress, and enough space to write the trailing EOI
+ * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
+ * modes are not supported at all with suspension, so those two are the only
+ * points where markers will be written.
+ */
+
+LOCAL(void)
+emit_byte (j_compress_ptr cinfo, int val)
+/* Emit a byte */
+{
+  struct jpeg_destination_mgr * dest = cinfo->dest;
+
+  *(dest->next_output_byte)++ = (JOCTET) val;
+  if (--dest->free_in_buffer == 0) {
+    if (! (*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+}
+
+
+LOCAL(void)
+emit_marker (j_compress_ptr cinfo, JPEG_MARKER mark)
+/* Emit a marker code */
+{
+  emit_byte(cinfo, 0xFF);
+  emit_byte(cinfo, (int) mark);
+}
+
+
+LOCAL(void)
+emit_2bytes (j_compress_ptr cinfo, int value)
+/* Emit a 2-byte integer; these are always MSB first in JPEG files */
+{
+  emit_byte(cinfo, (value >> 8) & 0xFF);
+  emit_byte(cinfo, value & 0xFF);
+}
+
+
+/*
+ * Routines to write specific marker types.
+ */
+
+LOCAL(int)
+emit_dqt (j_compress_ptr cinfo, int index)
+/* Emit a DQT marker */
+/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
+{
+  JQUANT_TBL * qtbl = cinfo->quant_tbl_ptrs[index];
+  int prec;
+  int i;
+
+  if (qtbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
+
+  prec = 0;
+  for (i = 0; i < DCTSIZE2; i++) {
+    if (qtbl->quantval[i] > 255)
+      prec = 1;
+  }
+
+  if (! qtbl->sent_table) {
+    emit_marker(cinfo, M_DQT);
+
+    emit_2bytes(cinfo, prec ? DCTSIZE2*2 + 1 + 2 : DCTSIZE2 + 1 + 2);
+
+    emit_byte(cinfo, index + (prec<<4));
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      /* The table entries must be emitted in zigzag order. */
+      unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
+      if (prec)
+	emit_byte(cinfo, (int) (qval >> 8));
+      emit_byte(cinfo, (int) (qval & 0xFF));
+    }
+
+    qtbl->sent_table = TRUE;
+  }
+
+  return prec;
+}
+
+
+LOCAL(void)
+emit_dht (j_compress_ptr cinfo, int index, boolean is_ac)
+/* Emit a DHT marker */
+{
+  JHUFF_TBL * htbl;
+  int length, i;
+  
+  if (is_ac) {
+    htbl = cinfo->ac_huff_tbl_ptrs[index];
+    index += 0x10;		/* output index has AC bit set */
+  } else {
+    htbl = cinfo->dc_huff_tbl_ptrs[index];
+  }
+
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
+  
+  if (! htbl->sent_table) {
+    emit_marker(cinfo, M_DHT);
+    
+    length = 0;
+    for (i = 1; i <= 16; i++)
+      length += htbl->bits[i];
+    
+    emit_2bytes(cinfo, length + 2 + 1 + 16);
+    emit_byte(cinfo, index);
+    
+    for (i = 1; i <= 16; i++)
+      emit_byte(cinfo, htbl->bits[i]);
+    
+    for (i = 0; i < length; i++)
+      emit_byte(cinfo, htbl->huffval[i]);
+    
+    htbl->sent_table = TRUE;
+  }
+}
+
+
+LOCAL(void)
+emit_dac (j_compress_ptr cinfo)
+/* Emit a DAC marker */
+/* Since the useful info is so small, we want to emit all the tables in */
+/* one DAC marker.  Therefore this routine does its own scan of the table. */
+{
+#ifdef C_ARITH_CODING_SUPPORTED
+  char dc_in_use[NUM_ARITH_TBLS];
+  char ac_in_use[NUM_ARITH_TBLS];
+  int length, i;
+  jpeg_component_info *compptr;
+  
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    dc_in_use[i] = ac_in_use[i] = 0;
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    dc_in_use[compptr->dc_tbl_no] = 1;
+    ac_in_use[compptr->ac_tbl_no] = 1;
+  }
+  
+  length = 0;
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    length += dc_in_use[i] + ac_in_use[i];
+  
+  emit_marker(cinfo, M_DAC);
+  
+  emit_2bytes(cinfo, length*2 + 2);
+  
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    if (dc_in_use[i]) {
+      emit_byte(cinfo, i);
+      emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4));
+    }
+    if (ac_in_use[i]) {
+      emit_byte(cinfo, i + 0x10);
+      emit_byte(cinfo, cinfo->arith_ac_K[i]);
+    }
+  }
+#endif /* C_ARITH_CODING_SUPPORTED */
+}
+
+
+LOCAL(void)
+emit_dri (j_compress_ptr cinfo)
+/* Emit a DRI marker */
+{
+  emit_marker(cinfo, M_DRI);
+  
+  emit_2bytes(cinfo, 4);	/* fixed length */
+
+  emit_2bytes(cinfo, (int) cinfo->restart_interval);
+}
+
+
+LOCAL(void)
+emit_sof (j_compress_ptr cinfo, JPEG_MARKER code)
+/* Emit a SOF marker */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, code);
+  
+  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
+
+  /* Make sure image isn't bigger than SOF field can handle */
+  if ((long) cinfo->image_height > 65535L ||
+      (long) cinfo->image_width > 65535L)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535);
+
+  emit_byte(cinfo, cinfo->data_precision);
+  emit_2bytes(cinfo, (int) cinfo->image_height);
+  emit_2bytes(cinfo, (int) cinfo->image_width);
+
+  emit_byte(cinfo, cinfo->num_components);
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    emit_byte(cinfo, compptr->component_id);
+    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
+    emit_byte(cinfo, compptr->quant_tbl_no);
+  }
+}
+
+
+LOCAL(void)
+emit_sos (j_compress_ptr cinfo)
+/* Emit a SOS marker */
+{
+  int i, td, ta;
+  jpeg_component_info *compptr;
+  
+  emit_marker(cinfo, M_SOS);
+  
+  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
+  
+  emit_byte(cinfo, cinfo->comps_in_scan);
+  
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    emit_byte(cinfo, compptr->component_id);
+    td = compptr->dc_tbl_no;
+    ta = compptr->ac_tbl_no;
+    if (cinfo->progressive_mode) {
+      /* Progressive mode: only DC or only AC tables are used in one scan;
+       * furthermore, Huffman coding of DC refinement uses no table at all.
+       * We emit 0 for unused field(s); this is recommended by the P&M text
+       * but does not seem to be specified in the standard.
+       */
+      if (cinfo->Ss == 0) {
+	ta = 0;			/* DC scan */
+	if (cinfo->Ah != 0 && !cinfo->arith_code)
+	  td = 0;		/* no DC table either */
+      } else {
+	td = 0;			/* AC scan */
+      }
+    }
+    emit_byte(cinfo, (td << 4) + ta);
+  }
+
+  emit_byte(cinfo, cinfo->Ss);
+  emit_byte(cinfo, cinfo->Se);
+  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
+}
+
+
+LOCAL(void)
+emit_jfif_app0 (j_compress_ptr cinfo)
+/* Emit a JFIF-compliant APP0 marker */
+{
+  /*
+   * Length of APP0 block	(2 bytes)
+   * Block ID			(4 bytes - ASCII "JFIF")
+   * Zero byte			(1 byte to terminate the ID string)
+   * Version Major, Minor	(2 bytes - major first)
+   * Units			(1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu			(2 bytes - dots per unit horizontal)
+   * Ydpu			(2 bytes - dots per unit vertical)
+   * Thumbnail X size		(1 byte)
+   * Thumbnail Y size		(1 byte)
+   */
+  
+  emit_marker(cinfo, M_APP0);
+  
+  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
+
+  emit_byte(cinfo, 0x4A);	/* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0x49);
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0);
+  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
+  emit_byte(cinfo, cinfo->JFIF_minor_version);
+  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
+  emit_2bytes(cinfo, (int) cinfo->X_density);
+  emit_2bytes(cinfo, (int) cinfo->Y_density);
+  emit_byte(cinfo, 0);		/* No thumbnail image */
+  emit_byte(cinfo, 0);
+}
+
+
+LOCAL(void)
+emit_adobe_app14 (j_compress_ptr cinfo)
+/* Emit an Adobe APP14 marker */
+{
+  /*
+   * Length of APP14 block	(2 bytes)
+   * Block ID			(5 bytes - ASCII "Adobe")
+   * Version Number		(2 bytes - currently 100)
+   * Flags0			(2 bytes - currently 0)
+   * Flags1			(2 bytes - currently 0)
+   * Color transform		(1 byte)
+   *
+   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
+   * now in circulation seem to use Version = 100, so that's what we write.
+   *
+   * We write the color transform byte as 1 if the JPEG color space is
+   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
+   * whether the encoder performed a transformation, which is pretty useless.
+   */
+  
+  emit_marker(cinfo, M_APP14);
+  
+  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
+
+  emit_byte(cinfo, 0x41);	/* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x64);
+  emit_byte(cinfo, 0x6F);
+  emit_byte(cinfo, 0x62);
+  emit_byte(cinfo, 0x65);
+  emit_2bytes(cinfo, 100);	/* Version */
+  emit_2bytes(cinfo, 0);	/* Flags0 */
+  emit_2bytes(cinfo, 0);	/* Flags1 */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_YCbCr:
+    emit_byte(cinfo, 1);	/* Color transform = 1 */
+    break;
+  case JCS_YCCK:
+    emit_byte(cinfo, 2);	/* Color transform = 2 */
+    break;
+  default:
+    emit_byte(cinfo, 0);	/* Color transform = 0 */
+    break;
+  }
+}
+
+
+/*
+ * These routines allow writing an arbitrary marker with parameters.
+ * The only intended use is to emit COM or APPn markers after calling
+ * write_file_header and before calling write_frame_header.
+ * Other uses are not guaranteed to produce desirable results.
+ * Counting the parameter bytes properly is the caller's responsibility.
+ */
+
+METHODDEF(void)
+write_marker_header (j_compress_ptr cinfo, int marker, unsigned int datalen)
+/* Emit an arbitrary marker header */
+{
+  if (datalen > (unsigned int) 65533)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  emit_marker(cinfo, (JPEG_MARKER) marker);
+
+  emit_2bytes(cinfo, (int) (datalen + 2));	/* total length */
+}
+
+METHODDEF(void)
+write_marker_byte (j_compress_ptr cinfo, int val)
+/* Emit one byte of marker parameters following write_marker_header */
+{
+  emit_byte(cinfo, val);
+}
+
+
+/*
+ * Write datastream header.
+ * This consists of an SOI and optional APPn markers.
+ * We recommend use of the JFIF marker, but not the Adobe marker,
+ * when using YCbCr or grayscale data.  The JFIF marker should NOT
+ * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * to distinguish RGB, CMYK, and YCCK colorspaces.
+ * Note that an application can write additional header markers after
+ * jpeg_start_compress returns.
+ */
+
+METHODDEF(void)
+write_file_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  emit_marker(cinfo, M_SOI);	/* first the SOI */
+
+  /* SOI is defined to reset restart interval to 0 */
+  marker->last_restart_interval = 0;
+
+  if (cinfo->write_JFIF_header)	/* next an optional JFIF APP0 */
+    emit_jfif_app0(cinfo);
+  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
+    emit_adobe_app14(cinfo);
+}
+
+
+/*
+ * Write frame header.
+ * This consists of DQT and SOFn markers.
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
+ * This avoids compatibility problems with incorrect implementations that
+ * try to error-check the quant table numbers as soon as they see the SOF.
+ */
+
+METHODDEF(void)
+write_frame_header (j_compress_ptr cinfo)
+{
+  int ci, prec;
+  boolean is_baseline;
+  jpeg_component_info *compptr;
+  
+  /* Emit DQT for each quantization table.
+   * Note that emit_dqt() suppresses any duplicate tables.
+   */
+  prec = 0;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+  }
+  /* now prec is nonzero iff there are any 16-bit quant tables. */
+
+  /* Check for a non-baseline specification.
+   * Note we assume that Huffman table numbers won't be changed later.
+   */
+  if (cinfo->arith_code || cinfo->progressive_mode ||
+      cinfo->data_precision != 8) {
+    is_baseline = FALSE;
+  } else {
+    is_baseline = TRUE;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
+	is_baseline = FALSE;
+    }
+    if (prec && is_baseline) {
+      is_baseline = FALSE;
+      /* If it's baseline except for quantizer size, warn the user */
+      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
+    }
+  }
+
+  /* Emit the proper SOF marker */
+  if (cinfo->arith_code) {
+    emit_sof(cinfo, M_SOF9);	/* SOF code for arithmetic coding */
+  } else {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF2);	/* SOF code for progressive Huffman */
+    else if (is_baseline)
+      emit_sof(cinfo, M_SOF0);	/* SOF code for baseline implementation */
+    else
+      emit_sof(cinfo, M_SOF1);	/* SOF code for non-baseline Huffman file */
+  }
+}
+
+
+/*
+ * Write scan header.
+ * This consists of DHT or DAC markers, optional DRI, and SOS.
+ * Compressed data will be written following the SOS.
+ */
+
+METHODDEF(void)
+write_scan_header (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  int i;
+  jpeg_component_info *compptr;
+
+  if (cinfo->arith_code) {
+    /* Emit arith conditioning info.  We may have some duplication
+     * if the file has multiple scans, but it's so small it's hardly
+     * worth worrying about.
+     */
+    emit_dac(cinfo);
+  } else {
+    /* Emit Huffman tables.
+     * Note that emit_dht() suppresses any duplicate tables.
+     */
+    for (i = 0; i < cinfo->comps_in_scan; i++) {
+      compptr = cinfo->cur_comp_info[i];
+      if (cinfo->progressive_mode) {
+	/* Progressive mode: only DC or only AC tables are used in one scan */
+	if (cinfo->Ss == 0) {
+	  if (cinfo->Ah == 0)	/* DC needs no table for refinement scan */
+	    emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+	} else {
+	  emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+	}
+      } else {
+	/* Sequential mode: need both DC and AC tables */
+	emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+	emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+      }
+    }
+  }
+
+  /* Emit DRI if required --- note that DRI value could change for each scan.
+   * We avoid wasting space with unnecessary DRIs, however.
+   */
+  if (cinfo->restart_interval != marker->last_restart_interval) {
+    emit_dri(cinfo);
+    marker->last_restart_interval = cinfo->restart_interval;
+  }
+
+  emit_sos(cinfo);
+}
+
+
+/*
+ * Write datastream trailer.
+ */
+
+METHODDEF(void)
+write_file_trailer (j_compress_ptr cinfo)
+{
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Write an abbreviated table-specification datastream.
+ * This consists of SOI, DQT and DHT tables, and EOI.
+ * Any table that is defined and not marked sent_table = TRUE will be
+ * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
+ */
+
+METHODDEF(void)
+write_tables_only (j_compress_ptr cinfo)
+{
+  int i;
+
+  emit_marker(cinfo, M_SOI);
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if (cinfo->quant_tbl_ptrs[i] != NULL)
+      (void) emit_dqt(cinfo, i);
+  }
+
+  if (! cinfo->arith_code) {
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, FALSE);
+      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
+	emit_dht(cinfo, i, TRUE);
+    }
+  }
+
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Initialize the marker writer module.
+ */
+
+GLOBAL(void)
+jinit_marker_writer (j_compress_ptr cinfo)
+{
+  my_marker_ptr marker;
+
+  /* Create the subobject */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_marker_writer));
+  cinfo->marker = (struct jpeg_marker_writer *) marker;
+  /* Initialize method pointers */
+  marker->pub.write_file_header = write_file_header;
+  marker->pub.write_frame_header = write_frame_header;
+  marker->pub.write_scan_header = write_scan_header;
+  marker->pub.write_file_trailer = write_file_trailer;
+  marker->pub.write_tables_only = write_tables_only;
+  marker->pub.write_marker_header = write_marker_header;
+  marker->pub.write_marker_byte = write_marker_byte;
+  /* Initialize private state */
+  marker->last_restart_interval = 0;
+}
diff --git a/src/libjpeg/jcmaster.c b/src/libjpeg/jcmaster.c
new file mode 100644
index 0000000..aab4020
--- /dev/null
+++ b/src/libjpeg/jcmaster.c
@@ -0,0 +1,590 @@
+/*
+ * jcmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG compressor.
+ * These routines are concerned with parameter validation, initial setup,
+ * and inter-pass control (determining the number of passes and the work 
+ * to be done in each pass).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef enum {
+	main_pass,		/* input data, also do first output step */
+	huff_opt_pass,		/* Huffman code optimization pass */
+	output_pass		/* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;	/* public fields */
+
+  c_pass_type pass_type;	/* the type of the current pass */
+
+  int pass_number;		/* # of passes completed */
+  int total_passes;		/* total # of passes needed */
+
+  int scan_number;		/* current index in scan_info[] */
+} my_comp_master;
+
+typedef my_comp_master * my_master_ptr;
+
+
+/*
+ * Support routines that do various essential calculations.
+ */
+
+LOCAL(void)
+initial_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Sanity check on image dimensions */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0 || cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Fill in the correct component_index value; don't rely on application */
+    compptr->component_index = ci;
+    /* For compression, we never do DCT scaling. */
+    compptr->DCT_scaled_size = DCTSIZE;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed (this flag isn't actually used for compression) */
+    compptr->component_needed = TRUE;
+  }
+
+  /* Compute number of fully interleaved MCU rows (number of times that
+   * main controller will call coefficient controller).
+   */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+}
+
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+
+LOCAL(void)
+validate_script (j_compress_ptr cinfo)
+/* Verify that the scan script in cinfo->scan_info[] is valid; also
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
+ */
+{
+  const jpeg_scan_info * scanptr;
+  int scanno, ncomps, ci, coefi, thisi;
+  int Ss, Se, Ah, Al;
+  boolean component_sent[MAX_COMPONENTS];
+#ifdef C_PROGRESSIVE_SUPPORTED
+  int * last_bitpos_ptr;
+  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
+  /* -1 until that coefficient has been seen; then last Al for it */
+#endif
+
+  if (cinfo->num_scans <= 0)
+    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
+
+  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
+   * for progressive JPEG, no scan can have this.
+   */
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2-1) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    cinfo->progressive_mode = TRUE;
+    last_bitpos_ptr = & last_bitpos[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      for (coefi = 0; coefi < DCTSIZE2; coefi++)
+	*last_bitpos_ptr++ = -1;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++) 
+      component_sent[ci] = FALSE;
+  }
+
+  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
+    /* Validate component indexes */
+    ncomps = scanptr->comps_in_scan;
+    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
+    for (ci = 0; ci < ncomps; ci++) {
+      thisi = scanptr->component_index[ci];
+      if (thisi < 0 || thisi >= cinfo->num_components)
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+      /* Components must appear in SOF order within each scan */
+      if (ci > 0 && thisi <= scanptr->component_index[ci-1])
+	ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+    }
+    /* Validate progression parameters */
+    Ss = scanptr->Ss;
+    Se = scanptr->Se;
+    Ah = scanptr->Ah;
+    Al = scanptr->Al;
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      /* The JPEG spec simply gives the ranges 0..13 for Ah and Al, but that
+       * seems wrong: the upper bound ought to depend on data precision.
+       * Perhaps they really meant 0..N+1 for N-bit precision.
+       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
+       * out-of-range reconstructed DC values during the first DC scan,
+       * which might cause problems for some decoders.
+       */
+#if BITS_IN_JSAMPLE == 8
+#define MAX_AH_AL 10
+#else
+#define MAX_AH_AL 13
+#endif
+      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
+	  Ah < 0 || Ah > MAX_AH_AL || Al < 0 || Al > MAX_AH_AL)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      if (Ss == 0) {
+	if (Se != 0)		/* DC and AC together not OK */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else {
+	if (ncomps != 1)	/* AC scans must be for only one component */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      for (ci = 0; ci < ncomps; ci++) {
+	last_bitpos_ptr = & last_bitpos[scanptr->component_index[ci]][0];
+	if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+	  ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	for (coefi = Ss; coefi <= Se; coefi++) {
+	  if (last_bitpos_ptr[coefi] < 0) {
+	    /* first scan of this coefficient */
+	    if (Ah != 0)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  } else {
+	    /* not first scan */
+	    if (Ah != last_bitpos_ptr[coefi] || Al != Ah-1)
+	      ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+	  }
+	  last_bitpos_ptr[coefi] = Al;
+	}
+      }
+#endif
+    } else {
+      /* For sequential JPEG, all progression parameters must be these: */
+      if (Ss != 0 || Se != DCTSIZE2-1 || Ah != 0 || Al != 0)
+	ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      /* Make sure components are not sent twice */
+      for (ci = 0; ci < ncomps; ci++) {
+	thisi = scanptr->component_index[ci];
+	if (component_sent[thisi])
+	  ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+	component_sent[thisi] = TRUE;
+      }
+    }
+  }
+
+  /* Now verify that everything got sent. */
+  if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    /* For progressive mode, we only check that at least some DC data
+     * got sent for each component; the spec does not require that all bits
+     * of all coefficients be transmitted.  Would it be wiser to enforce
+     * transmission of all coefficient bits??
+     */
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (last_bitpos[ci][0] < 0)
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+#endif
+  } else {
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (! component_sent[ci])
+	ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+  }
+}
+
+#endif /* C_MULTISCAN_FILES_SUPPORTED */
+
+
+LOCAL(void)
+select_scan_parameters (j_compress_ptr cinfo)
+/* Set up the scan parameters for the current scan */
+{
+  int ci;
+
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->scan_info != NULL) {
+    /* Prepare for current scan --- the script is already validated */
+    my_master_ptr master = (my_master_ptr) cinfo->master;
+    const jpeg_scan_info * scanptr = cinfo->scan_info + master->scan_number;
+
+    cinfo->comps_in_scan = scanptr->comps_in_scan;
+    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
+      cinfo->cur_comp_info[ci] =
+	&cinfo->comp_info[scanptr->component_index[ci]];
+    }
+    cinfo->Ss = scanptr->Ss;
+    cinfo->Se = scanptr->Se;
+    cinfo->Ah = scanptr->Ah;
+    cinfo->Al = scanptr->Al;
+  }
+  else
+#endif
+  {
+    /* Prepare for single sequential-JPEG scan containing all components */
+    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPS_IN_SCAN);
+    cinfo->comps_in_scan = cinfo->num_components;
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
+    }
+    cinfo->Ss = 0;
+    cinfo->Se = DCTSIZE2-1;
+    cinfo->Ah = 0;
+    cinfo->Al = 0;
+  }
+}
+
+
+LOCAL(void)
+per_scan_setup (j_compress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = DCTSIZE;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+
+  /* Convert restart specified in rows to actual MCU count. */
+  /* Note that count must fit in 16 bits, so we provide limiting. */
+  if (cinfo->restart_in_rows > 0) {
+    long nominal = (long) cinfo->restart_in_rows * (long) cinfo->MCUs_per_row;
+    cinfo->restart_interval = (unsigned int) MIN(nominal, 65535L);
+  }
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each pass.  We determine which modules
+ * will be active during this pass and give them appropriate start_pass calls.
+ * We also set is_last_pass to indicate whether any more passes will be
+ * required.
+ */
+
+METHODDEF(void)
+prepare_for_pass (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  switch (master->pass_type) {
+  case main_pass:
+    /* Initial pass: will collect input data, and do either Huffman
+     * optimization or data output for the first scan.
+     */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (! cinfo->raw_data_in) {
+      (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->downsample->start_pass) (cinfo);
+      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+    (*cinfo->fdct->start_pass) (cinfo);
+    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
+    (*cinfo->coef->start_pass) (cinfo,
+				(master->total_passes > 1 ?
+				 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    if (cinfo->optimize_coding) {
+      /* No immediate data output; postpone writing frame/scan headers */
+      master->pub.call_pass_startup = FALSE;
+    } else {
+      /* Will write frame/scan headers at first jpeg_write_scanlines call */
+      master->pub.call_pass_startup = TRUE;
+    }
+    break;
+#ifdef ENTROPY_OPT_SUPPORTED
+  case huff_opt_pass:
+    /* Do Huffman optimization for a scan after the first one. */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) {
+      (*cinfo->entropy->start_pass) (cinfo, TRUE);
+      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+      master->pub.call_pass_startup = FALSE;
+      break;
+    }
+    /* Special case: Huffman DC refinement scans need no Huffman table
+     * and therefore we can skip the optimization pass for them.
+     */
+    master->pass_type = output_pass;
+    master->pass_number++;
+    /*FALLTHROUGH*/
+#endif
+  case output_pass:
+    /* Do a data-output pass. */
+    /* We need not repeat per-scan setup if prior optimization pass did it. */
+    if (! cinfo->optimize_coding) {
+      select_scan_parameters(cinfo);
+      per_scan_setup(cinfo);
+    }
+    (*cinfo->entropy->start_pass) (cinfo, FALSE);
+    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+    /* We emit frame/scan headers now */
+    if (master->scan_number == 0)
+      (*cinfo->marker->write_frame_header) (cinfo);
+    (*cinfo->marker->write_scan_header) (cinfo);
+    master->pub.call_pass_startup = FALSE;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+
+  master->pub.is_last_pass = (master->pass_number == master->total_passes-1);
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->total_passes;
+  }
+}
+
+
+/*
+ * Special start-of-pass hook.
+ * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
+ * In single-pass processing, we need this hook because we don't want to
+ * write frame/scan headers during jpeg_start_compress; we want to let the
+ * application write COM markers etc. between jpeg_start_compress and the
+ * jpeg_write_scanlines loop.
+ * In multi-pass processing, this routine is not used.
+ */
+
+METHODDEF(void)
+pass_startup (j_compress_ptr cinfo)
+{
+  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
+
+  (*cinfo->marker->write_frame_header) (cinfo);
+  (*cinfo->marker->write_scan_header) (cinfo);
+}
+
+
+/*
+ * Finish up at end of pass.
+ */
+
+METHODDEF(void)
+finish_pass_master (j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* The entropy coder always needs an end-of-pass call,
+   * either to analyze statistics or to flush its output buffer.
+   */
+  (*cinfo->entropy->finish_pass) (cinfo);
+
+  /* Update state for next pass */
+  switch (master->pass_type) {
+  case main_pass:
+    /* next pass is either output of scan 0 (after optimization)
+     * or output of scan 1 (if no optimization).
+     */
+    master->pass_type = output_pass;
+    if (! cinfo->optimize_coding)
+      master->scan_number++;
+    break;
+  case huff_opt_pass:
+    /* next pass is always output of current scan */
+    master->pass_type = output_pass;
+    break;
+  case output_pass:
+    /* next pass is either optimization or output of next scan */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    master->scan_number++;
+    break;
+  }
+
+  master->pass_number++;
+}
+
+
+/*
+ * Initialize master compression control.
+ */
+
+GLOBAL(void)
+jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_comp_master));
+  cinfo->master = (struct jpeg_comp_master *) master;
+  master->pub.prepare_for_pass = prepare_for_pass;
+  master->pub.pass_startup = pass_startup;
+  master->pub.finish_pass = finish_pass_master;
+  master->pub.is_last_pass = FALSE;
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo);
+
+  if (cinfo->scan_info != NULL) {
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+    validate_script(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    cinfo->num_scans = 1;
+  }
+
+  if (cinfo->progressive_mode)	/*  TEMPORARY HACK ??? */
+    cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */
+
+  /* Initialize my private state */
+  if (transcode_only) {
+    /* no main pass in transcoding */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    else
+      master->pass_type = output_pass;
+  } else {
+    /* for normal compression, first pass is always this type: */
+    master->pass_type = main_pass;
+  }
+  master->scan_number = 0;
+  master->pass_number = 0;
+  if (cinfo->optimize_coding)
+    master->total_passes = cinfo->num_scans * 2;
+  else
+    master->total_passes = cinfo->num_scans;
+}
diff --git a/src/libjpeg/jcomapi.c b/src/libjpeg/jcomapi.c
new file mode 100644
index 0000000..9b1fa75
--- /dev/null
+++ b/src/libjpeg/jcomapi.c
@@ -0,0 +1,106 @@
+/*
+ * jcomapi.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr) cinfo)->marker_list = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy (j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;		/* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;	/* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table (j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JQUANT_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table (j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, SIZEOF(JHUFF_TBL));
+  tbl->sent_table = FALSE;	/* make sure this is false in any new table */
+  return tbl;
+}
diff --git a/src/libjpeg/jconfig.h b/src/libjpeg/jconfig.h
new file mode 100644
index 0000000..678e3d3
--- /dev/null
+++ b/src/libjpeg/jconfig.h
@@ -0,0 +1,26 @@
+/*
+ * see jconfig.doc
+ */
+
+#define HAVE_PROTOTYPES
+#define HAVE_UNSIGNED_CHAR
+#define HAVE_UNSIGNED_SHORT
+#define HAVE_STDDEF_H
+#define HAVE_STDLIB_H
+
+
+#define HAVE_JFIO
+
+#include "im_binfile.h"
+
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) imBinFileRead((imBinFile*)file, (buf), (sizeofbuf), 1))
+
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) imBinFileWrite((imBinFile*)file, (buf), (sizeofbuf), 1))
+
+#define  JFFLUSH(file) \
+  ((void)(file))
+
+#define JFERROR(file) \
+  imBinFileError((imBinFile*)file)
diff --git a/src/libjpeg/jcparam.c b/src/libjpeg/jcparam.c
new file mode 100644
index 0000000..6fc48f5
--- /dev/null
+++ b/src/libjpeg/jcparam.c
@@ -0,0 +1,610 @@
+/*
+ * jcparam.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains optional default-setting code for the JPEG compressor.
+ * Applications do not have to use this file, but those that don't use it
+ * must know a lot more about the innards of the JPEG code.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Quantization table setup routines
+ */
+
+GLOBAL(void)
+jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl,
+		      const unsigned int *basic_table,
+		      int scale_factor, boolean force_baseline)
+/* Define a quantization table equal to the basic_table times
+ * a scale factor (given as a percentage).
+ * If force_baseline is TRUE, the computed quantization table entries
+ * are limited to 1..255 for JPEG baseline compatibility.
+ */
+{
+  JQUANT_TBL ** qtblptr;
+  int i;
+  long temp;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
+    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
+
+  qtblptr = & cinfo->quant_tbl_ptrs[which_tbl];
+
+  if (*qtblptr == NULL)
+    *qtblptr = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = ((long) basic_table[i] * scale_factor + 50L) / 100L;
+    /* limit the values to the valid range */
+    if (temp <= 0L) temp = 1L;
+    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
+    if (force_baseline && temp > 255L)
+      temp = 255L;		/* limit to baseline range if requested */
+    (*qtblptr)->quantval[i] = (UINT16) temp;
+  }
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*qtblptr)->sent_table = FALSE;
+}
+
+
+GLOBAL(void)
+jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor,
+			 boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and a straight percentage-scaling quality scale.  In most cases it's better
+ * to use jpeg_set_quality (below); this entry point is provided for
+ * applications that insist on a linear percentage scaling.
+ */
+{
+  /* These are the sample quantization tables given in JPEG spec section K.1.
+   * The spec says that the values given produce "good" quality, and
+   * when divided by 2, "very good" quality.
+   */
+  static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+  };
+  static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+    17,  18,  24,  47,  99,  99,  99,  99,
+    18,  21,  26,  66,  99,  99,  99,  99,
+    24,  26,  56,  99,  99,  99,  99,  99,
+    47,  66,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99,
+    99,  99,  99,  99,  99,  99,  99,  99
+  };
+
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+		       scale_factor, force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+		       scale_factor, force_baseline);
+}
+
+
+GLOBAL(int)
+jpeg_quality_scaling (int quality)
+/* Convert a user-specified quality rating to a percentage scaling factor
+ * for an underlying quantization table, using our recommended scaling curve.
+ * The input 'quality' factor should be 0 (terrible) to 100 (very good).
+ */
+{
+  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
+  if (quality <= 0) quality = 1;
+  if (quality > 100) quality = 100;
+
+  /* The basic table is used as-is (scaling 100) for a quality of 50.
+   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
+   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
+   * to make all the table entries 1 (hence, minimum quantization loss).
+   * Qualities 1..50 are converted to scaling percentage 5000/Q.
+   */
+  if (quality < 50)
+    quality = 5000 / quality;
+  else
+    quality = 200 - quality*2;
+
+  return quality;
+}
+
+
+GLOBAL(void)
+jpeg_set_quality (j_compress_ptr cinfo, int quality, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables.
+ * This is the standard quality-adjusting entry point for typical user
+ * interfaces; only those who want detailed control over quantization tables
+ * would use the preceding three routines directly.
+ */
+{
+  /* Convert user 0-100 rating to percentage scaling */
+  quality = jpeg_quality_scaling(quality);
+
+  /* Set up standard quality tables */
+  jpeg_set_linear_quality(cinfo, quality, force_baseline);
+}
+
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table (j_compress_ptr cinfo,
+		JHUFF_TBL **htblptr, const UINT8 *bits, const UINT8 *val)
+/* Define a Huffman table */
+{
+  int nsymbols, len;
+
+  if (*htblptr == NULL)
+    *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jchuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols < 1 || nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  MEMCOPY((*htblptr)->huffval, val, nsymbols * SIZEOF(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables (j_compress_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+  static const UINT8 bits_dc_luminance[17] =
+    { /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_luminance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_dc_chrominance[17] =
+    { /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+  static const UINT8 val_dc_chrominance[] =
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
+  
+  static const UINT8 bits_ac_luminance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d };
+  static const UINT8 val_ac_luminance[] =
+    { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+      0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+      0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+      0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+      0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+      0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+      0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+      0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+      0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+      0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+      0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+      0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+      0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+      0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+      0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+      0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+      0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+      0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+      0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+      0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  static const UINT8 bits_ac_chrominance[17] =
+    { /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 };
+  static const UINT8 val_ac_chrominance[] =
+    { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+      0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+      0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+      0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+      0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+      0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+      0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+      0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+      0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+      0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+      0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+      0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+      0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+      0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+      0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+      0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+      0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+      0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+      0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+      0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+      0xf9, 0xfa };
+  
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[0],
+		 bits_dc_luminance, val_dc_luminance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[0],
+		 bits_ac_luminance, val_ac_luminance);
+  add_huff_table(cinfo, &cinfo->dc_huff_tbl_ptrs[1],
+		 bits_dc_chrominance, val_dc_chrominance);
+  add_huff_table(cinfo, &cinfo->ac_huff_tbl_ptrs[1],
+		 bits_ac_chrominance, val_ac_chrominance);
+}
+
+
+/*
+ * Default parameter setup for compression.
+ *
+ * Applications that don't choose to use this routine must do their
+ * own setup of all these parameters.  Alternately, you can call this
+ * to establish defaults and then alter parameters selectively.  This
+ * is the recommended approach since, if we add any new parameters,
+ * your code will still work (they'll be set to reasonable defaults).
+ */
+
+GLOBAL(void)
+jpeg_set_defaults (j_compress_ptr cinfo)
+{
+  int i;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Allocate comp_info array large enough for maximum component count.
+   * Array is made permanent in case application wants to compress
+   * multiple images at same param settings.
+   */
+  if (cinfo->comp_info == NULL)
+    cinfo->comp_info = (jpeg_component_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  MAX_COMPONENTS * SIZEOF(jpeg_component_info));
+
+  /* Initialize everything not dependent on the color space */
+
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+  /* Set up two quantization tables using default quality of 75 */
+  jpeg_set_quality(cinfo, 75, TRUE);
+  /* Set up two Huffman tables */
+  std_huff_tables(cinfo);
+
+  /* Initialize default arithmetic coding conditioning */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+
+  /* Default is no multiple-scan output */
+  cinfo->scan_info = NULL;
+  cinfo->num_scans = 0;
+
+  /* Expect normal source image, not raw downsampled data */
+  cinfo->raw_data_in = FALSE;
+
+  /* Use Huffman coding, not arithmetic coding, by default */
+  cinfo->arith_code = FALSE;
+
+  /* By default, don't do extra passes to optimize entropy coding */
+  cinfo->optimize_coding = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, force optimization on so that usable
+   * tables will be computed.  This test can be removed if default tables
+   * are supplied that are valid for the desired precision.
+   */
+  if (cinfo->data_precision > 8)
+    cinfo->optimize_coding = TRUE;
+
+  /* By default, use the simpler non-cosited sampling alignment */
+  cinfo->CCIR601_sampling = FALSE;
+
+  /* No input smoothing */
+  cinfo->smoothing_factor = 0;
+
+  /* DCT algorithm preference */
+  cinfo->dct_method = JDCT_DEFAULT;
+
+  /* No restart markers */
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+
+  /* Fill in default JFIF marker parameters.  Note that whether the marker
+   * will actually be written is determined by jpeg_set_colorspace.
+   *
+   * By default, the library emits JFIF version code 1.01.
+   * An application that wants to emit JFIF 1.02 extension markers should set
+   * JFIF_minor_version to 2.  We could probably get away with just defaulting
+   * to 1.02, but there may still be some decoders in use that will complain
+   * about that; saying 1.01 should minimize compatibility problems.
+   */
+  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;	/* Pixel size is unknown by default */
+  cinfo->X_density = 1;		/* Pixel aspect ratio is square by default */
+  cinfo->Y_density = 1;
+
+  /* Choose JPEG colorspace based on input space, set defaults accordingly */
+
+  jpeg_default_colorspace(cinfo);
+}
+
+
+/*
+ * Select an appropriate JPEG colorspace for in_color_space.
+ */
+
+GLOBAL(void)
+jpeg_default_colorspace (j_compress_ptr cinfo)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+    break;
+  case JCS_RGB:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_YCbCr:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_CMYK:
+    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
+    break;
+  case JCS_YCCK:
+    jpeg_set_colorspace(cinfo, JCS_YCCK);
+    break;
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+}
+
+
+/*
+ * Set the JPEG colorspace, and choose colorspace-dependent default values.
+ */
+
+GLOBAL(void)
+jpeg_set_colorspace (j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+{
+  jpeg_component_info * compptr;
+  int ci;
+
+#define SET_COMP(index,id,hsamp,vsamp,quant,dctbl,actbl)  \
+  (compptr = &cinfo->comp_info[index], \
+   compptr->component_id = (id), \
+   compptr->h_samp_factor = (hsamp), \
+   compptr->v_samp_factor = (vsamp), \
+   compptr->quant_tbl_no = (quant), \
+   compptr->dc_tbl_no = (dctbl), \
+   compptr->ac_tbl_no = (actbl) )
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
+   * tables 1 for chrominance components.
+   */
+
+  cinfo->jpeg_color_space = colorspace;
+
+  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
+  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
+
+  switch (colorspace) {
+  case JCS_GRAYSCALE:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 1;
+    /* JFIF specifies component ID 1 */
+    SET_COMP(0, 1, 1,1, 0, 0,0);
+    break;
+  case JCS_RGB:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
+    cinfo->num_components = 3;
+    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCbCr:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 3;
+    /* JFIF specifies component IDs 1,2,3 */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    break;
+  case JCS_CMYK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x43 /* 'C' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x4D /* 'M' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x59 /* 'Y' */, 1,1, 0, 0,0);
+    SET_COMP(3, 0x4B /* 'K' */, 1,1, 0, 0,0);
+    break;
+  case JCS_YCCK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 1, 2,2, 0, 0,0);
+    SET_COMP(1, 2, 1,1, 1, 1,1);
+    SET_COMP(2, 3, 1,1, 1, 1,1);
+    SET_COMP(3, 4, 2,2, 0, 0,0);
+    break;
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+}
+
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+LOCAL(jpeg_scan_info *)
+fill_a_scan (jpeg_scan_info * scanptr, int ci,
+	     int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for specified component */
+{
+  scanptr->comps_in_scan = 1;
+  scanptr->component_index[0] = ci;
+  scanptr->Ss = Ss;
+  scanptr->Se = Se;
+  scanptr->Ah = Ah;
+  scanptr->Al = Al;
+  scanptr++;
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_scans (jpeg_scan_info * scanptr, int ncomps,
+	    int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for each component */
+{
+  int ci;
+
+  for (ci = 0; ci < ncomps; ci++) {
+    scanptr->comps_in_scan = 1;
+    scanptr->component_index[0] = ci;
+    scanptr->Ss = Ss;
+    scanptr->Se = Se;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  }
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_dc_scans (jpeg_scan_info * scanptr, int ncomps, int Ah, int Al)
+/* Support routine: generate interleaved DC scan if possible, else N scans */
+{
+  int ci;
+
+  if (ncomps <= MAX_COMPS_IN_SCAN) {
+    /* Single interleaved DC scan */
+    scanptr->comps_in_scan = ncomps;
+    for (ci = 0; ci < ncomps; ci++)
+      scanptr->component_index[ci] = ci;
+    scanptr->Ss = scanptr->Se = 0;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  } else {
+    /* Noninterleaved DC scan for each component */
+    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
+  }
+  return scanptr;
+}
+
+
+/*
+ * Create a recommended progressive-JPEG script.
+ * cinfo->num_components and cinfo->jpeg_color_space must be correct.
+ */
+
+GLOBAL(void)
+jpeg_simple_progression (j_compress_ptr cinfo)
+{
+  int ncomps = cinfo->num_components;
+  int nscans;
+  jpeg_scan_info * scanptr;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Figure space needed for script.  Calculation must match code below! */
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    nscans = 10;
+  } else {
+    /* All-purpose script for other color spaces. */
+    if (ncomps > MAX_COMPS_IN_SCAN)
+      nscans = 6 * ncomps;	/* 2 DC + 4 AC scans per component */
+    else
+      nscans = 2 + 4 * ncomps;	/* 2 DC scans; 4 AC scans per component */
+  }
+
+  /* Allocate space for script.
+   * We need to put it in the permanent pool in case the application performs
+   * multiple compressions without changing the settings.  To avoid a memory
+   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
+   * object, we try to re-use previously allocated space, and we allocate
+   * enough space to handle YCbCr even if initially asked for grayscale.
+   */
+  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
+    cinfo->script_space_size = MAX(nscans, 10);
+    cinfo->script_space = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+			cinfo->script_space_size * SIZEOF(jpeg_scan_info));
+  }
+  scanptr = cinfo->script_space;
+  cinfo->scan_info = scanptr;
+  cinfo->num_scans = nscans;
+
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    /* Initial DC scan */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    /* Initial AC scan: get some luma data out in a hurry */
+    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
+    /* Chroma data is too small to be worth expending many scans on */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
+    /* Complete spectral selection for luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
+    /* Refine next bit of luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
+    /* Finish DC successive approximation */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    /* Finish AC successive approximation */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
+    /* Luma bottom bit comes last since it's usually largest scan */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
+  } else {
+    /* All-purpose script for other color spaces. */
+    /* Successive approximation first pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
+    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
+    /* Successive approximation second pass */
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
+    /* Successive approximation final pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
+  }
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/src/libjpeg/jcphuff.c b/src/libjpeg/jcphuff.c
new file mode 100644
index 0000000..07f9178
--- /dev/null
+++ b/src/libjpeg/jcphuff.c
@@ -0,0 +1,833 @@
+/*
+ * jcphuff.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy encoding routines for progressive JPEG.
+ *
+ * We do not support output suspension in this module, since the library
+ * currently does not allow multiple-scan files to be written with output
+ * suspension.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jchuff.h"		/* Declarations shared with jchuff.c */
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+/* Expanded entropy encoder object for progressive Huffman encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  /* Mode flag: TRUE for optimization, FALSE for actual data output */
+  boolean gather_statistics;
+
+  /* Bit-level coding status.
+   * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
+   */
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+  INT32 put_buffer;		/* current bit-accumulation buffer */
+  int put_bits;			/* # of bits now in it */
+  j_compress_ptr cinfo;		/* link to cinfo (needed for dump_buffer) */
+
+  /* Coding status for DC components */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+
+  /* Coding status for AC components */
+  int ac_tbl_no;		/* the table number of the single component */
+  unsigned int EOBRUN;		/* run length of EOBs */
+  unsigned int BE;		/* # of buffered correction bits before MCU */
+  char * bit_buffer;		/* buffer for correction bits (1 per char) */
+  /* packing correction bits tightly would save some space but cost time... */
+
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+  int next_restart_num;		/* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan).
+   * Since any one scan codes only DC or only AC, we only need one set
+   * of tables, not one for DC and one for AC.
+   */
+  c_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  /* Statistics tables for optimization; again, one set is enough */
+  long * count_ptrs[NUM_HUFF_TBLS];
+} phuff_entropy_encoder;
+
+typedef phuff_entropy_encoder * phuff_entropy_ptr;
+
+/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
+ * buffer can hold.  Larger sizes may slightly improve compression, but
+ * 1000 is already well into the realm of overkill.
+ * The minimum safe size is 64 bits.
+ */
+
+#define MAX_CORR_BITS  1000	/* Max # of correction bits I can buffer */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32.
+ * We assume that int right shift is unsigned if INT32 right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	int ishift_temp;
+#define IRIGHT_SHIFT(x,shft)  \
+	((ishift_temp = (x)) < 0 ? \
+	 (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \
+	 (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo));
+METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo));
+
+
+/*
+ * Initialize for a Huffman-compressed scan using progressive JPEG.
+ */
+
+METHODDEF(void)
+start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics)
+{  
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+
+  entropy->cinfo = cinfo;
+  entropy->gather_statistics = gather_statistics;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* We assume jcmaster.c already validated the scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_first;
+    else
+      entropy->pub.encode_mcu = encode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_refine;
+    else {
+      entropy->pub.encode_mcu = encode_mcu_AC_refine;
+      /* AC refinement needs a correction bit buffer */
+      if (entropy->bit_buffer == NULL)
+	entropy->bit_buffer = (char *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      MAX_CORR_BITS * SIZEOF(char));
+    }
+  }
+  if (gather_statistics)
+    entropy->pub.finish_pass = finish_pass_gather_phuff;
+  else
+    entropy->pub.finish_pass = finish_pass_phuff;
+
+  /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
+   * for AC coefficients.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Initialize DC predictions to 0 */
+    entropy->last_dc_val[ci] = 0;
+    /* Get table index */
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)	/* DC refinement needs no table */
+	continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
+    }
+    if (gather_statistics) {
+      /* Check for invalid table index */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->count_ptrs[tbl] == NULL)
+	entropy->count_ptrs[tbl] = (long *)
+	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				      257 * SIZEOF(long));
+      MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long));
+    } else {
+      /* Compute derived values for Huffman table */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
+			      & entropy->derived_tbls[tbl]);
+    }
+  }
+
+  /* Initialize AC stuff */
+  entropy->EOBRUN = 0;
+  entropy->BE = 0;
+
+  /* Initialize bit buffer to empty */
+  entropy->put_buffer = 0;
+  entropy->put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/* Outputting bytes to the file.
+ * NB: these must be called only when actually outputting,
+ * that is, entropy->gather_statistics == FALSE.
+ */
+
+/* Emit a byte */
+#define emit_byte(entropy,val)  \
+	{ *(entropy)->next_output_byte++ = (JOCTET) (val);  \
+	  if (--(entropy)->free_in_buffer == 0)  \
+	    dump_buffer(entropy); }
+
+
+LOCAL(void)
+dump_buffer (phuff_entropy_ptr entropy)
+/* Empty the output buffer; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr * dest = entropy->cinfo->dest;
+
+  if (! (*dest->empty_output_buffer) (entropy->cinfo))
+    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
+  /* After a successful buffer dump, must reset buffer pointers */
+  entropy->next_output_byte = dest->next_output_byte;
+  entropy->free_in_buffer = dest->free_in_buffer;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+INLINE
+LOCAL(void)
+emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size)
+/* Emit some bits, unless we are in gather mode */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register INT32 put_buffer = (INT32) code;
+  register int put_bits = entropy->put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+  if (entropy->gather_statistics)
+    return;			/* do nothing if we're only getting stats */
+
+  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
+  
+  put_bits += size;		/* new number of bits in buffer */
+  
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
+
+  while (put_bits >= 8) {
+    int c = (int) ((put_buffer >> 16) & 0xFF);
+    
+    emit_byte(entropy, c);
+    if (c == 0xFF) {		/* need to stuff a zero byte? */
+      emit_byte(entropy, 0);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  entropy->put_buffer = put_buffer; /* update variables */
+  entropy->put_bits = put_bits;
+}
+
+
+LOCAL(void)
+flush_bits (phuff_entropy_ptr entropy)
+{
+  emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
+  entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
+  entropy->put_bits = 0;
+}
+
+
+/*
+ * Emit (or just count) a Huffman symbol.
+ */
+
+INLINE
+LOCAL(void)
+emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl * tbl = entropy->derived_tbls[tbl_no];
+    emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+/*
+ * Emit bits from a correction bit buffer.
+ */
+
+LOCAL(void)
+emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart,
+		    unsigned int nbits)
+{
+  if (entropy->gather_statistics)
+    return;			/* no real work */
+
+  while (nbits > 0) {
+    emit_bits(entropy, (unsigned int) (*bufstart), 1);
+    bufstart++;
+    nbits--;
+  }
+}
+
+
+/*
+ * Emit any pending EOBRUN symbol.
+ */
+
+LOCAL(void)
+emit_eobrun (phuff_entropy_ptr entropy)
+{
+  register int temp, nbits;
+
+  if (entropy->EOBRUN > 0) {	/* if there is any pending EOBRUN */
+    temp = entropy->EOBRUN;
+    nbits = 0;
+    while ((temp >>= 1))
+      nbits++;
+    /* safety check: shouldn't happen given limited correction-bit buffer */
+    if (nbits > 14)
+      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+    emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
+    if (nbits)
+      emit_bits(entropy, entropy->EOBRUN, nbits);
+
+    entropy->EOBRUN = 0;
+
+    /* Emit any buffered correction bits */
+    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart (phuff_entropy_ptr entropy, int restart_num)
+{
+  int ci;
+
+  emit_eobrun(entropy);
+
+  if (! entropy->gather_statistics) {
+    flush_bits(entropy);
+    emit_byte(entropy, 0xFF);
+    emit_byte(entropy, JPEG_RST0 + restart_num);
+  }
+
+  if (entropy->cinfo->Ss == 0) {
+    /* Re-initialize DC predictions to 0 */
+    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
+      entropy->last_dc_val[ci] = 0;
+  } else {
+    /* Re-initialize all AC-related fields to 0 */
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  int blkn, ci;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  jpeg_component_info * compptr;
+  ISHIFT_TEMPS
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
+
+    /* DC differences are figured on the point-transformed values. */
+    temp = temp2 - entropy->last_dc_val[ci];
+    entropy->last_dc_val[ci] = temp2;
+
+    /* Encode the DC coefficient difference per section G.1.2.1 */
+    temp2 = temp;
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      /* For a negative input, want temp2 = bitwise complement of abs(input) */
+      /* This code assumes we are on a two's complement machine */
+      temp2--;
+    }
+    
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 0;
+    while (temp) {
+      nbits++;
+      temp >>= 1;
+    }
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > MAX_COEF_BITS+1)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+    
+    /* Count/emit the Huffman-coded symbol for the number of bits */
+    emit_symbol(entropy, compptr->dc_tbl_no, nbits);
+    
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (nbits)			/* emit_bits rejects calls with size 0 */
+      emit_bits(entropy, (unsigned int) temp2, nbits);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp, temp2;
+  register int nbits;
+  register int r, k;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
+  
+  r = 0;			/* r = run length of zeros */
+   
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = (*block)[jpeg_natural_order[k]]) == 0) {
+      r++;
+      continue;
+    }
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value; so the code is
+     * interwoven with finding the abs value (temp) and output bits (temp2).
+     */
+    if (temp < 0) {
+      temp = -temp;		/* temp is abs value of input */
+      temp >>= Al;		/* apply the point transform */
+      /* For a negative coef, want temp2 = bitwise complement of abs(coef) */
+      temp2 = ~temp;
+    } else {
+      temp >>= Al;		/* apply the point transform */
+      temp2 = temp;
+    }
+    /* Watch out for case that nonzero coef is zero after point transform */
+    if (temp == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any pending EOBRUN */
+    if (entropy->EOBRUN > 0)
+      emit_eobrun(entropy);
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+    while (r > 15) {
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+    }
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = 1;			/* there must be at least one 1 bit */
+    while ((temp >>= 1))
+      nbits++;
+    /* Check for out-of-range coefficient values */
+    if (nbits > MAX_COEF_BITS)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    emit_bits(entropy, (unsigned int) temp2, nbits);
+
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0) {			/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    if (entropy->EOBRUN == 0x7FFF)
+      emit_eobrun(entropy);	/* force it out to avoid overflow */
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  int blkn;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    temp = (*block)[0];
+    emit_bits(entropy, (unsigned int) (temp >> Al), 1);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  register int temp;
+  register int r, k;
+  int EOB;
+  char *BR_buffer;
+  unsigned int BR;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  int absvalues[DCTSIZE2];
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+
+  /* It is convenient to make a pre-pass to determine the transformed
+   * coefficients' absolute values and the EOB position.
+   */
+  EOB = 0;
+  for (k = cinfo->Ss; k <= Se; k++) {
+    temp = (*block)[jpeg_natural_order[k]];
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if (temp < 0)
+      temp = -temp;		/* temp is abs value of input */
+    temp >>= Al;		/* apply the point transform */
+    absvalues[k] = temp;	/* save abs value for main pass */
+    if (temp == 1)
+      EOB = k;			/* EOB = index of last newly-nonzero coef */
+  }
+
+  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
+  
+  r = 0;			/* r = run length of zeros */
+  BR = 0;			/* BR = count of buffered bits added now */
+  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
+
+  for (k = cinfo->Ss; k <= Se; k++) {
+    if ((temp = absvalues[k]) == 0) {
+      r++;
+      continue;
+    }
+
+    /* Emit any required ZRLs, but not if they can be folded into EOB */
+    while (r > 15 && k <= EOB) {
+      /* emit any pending EOBRUN and the BE correction bits */
+      emit_eobrun(entropy);
+      /* Emit ZRL */
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0);
+      r -= 16;
+      /* Emit buffered correction bits that must be associated with ZRL */
+      emit_buffered_bits(entropy, BR_buffer, BR);
+      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+      BR = 0;
+    }
+
+    /* If the coef was previously nonzero, it only needs a correction bit.
+     * NOTE: a straight translation of the spec's figure G.7 would suggest
+     * that we also need to test r > 15.  But if r > 15, we can only get here
+     * if k > EOB, which implies that this coefficient is not 1.
+     */
+    if (temp > 1) {
+      /* The correction bit is the next bit of the absolute value. */
+      BR_buffer[BR++] = (char) (temp & 1);
+      continue;
+    }
+
+    /* Emit any pending EOBRUN and the BE correction bits */
+    emit_eobrun(entropy);
+
+    /* Count/emit Huffman symbol for run length / number of bits */
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1);
+
+    /* Emit output bit for newly-nonzero coef */
+    temp = ((*block)[jpeg_natural_order[k]] < 0) ? 0 : 1;
+    emit_bits(entropy, (unsigned int) temp, 1);
+
+    /* Emit buffered correction bits that must be associated with this code */
+    emit_buffered_bits(entropy, BR_buffer, BR);
+    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */
+    BR = 0;
+    r = 0;			/* reset zero run length */
+  }
+
+  if (r > 0 || BR > 0) {	/* If there are trailing zeroes, */
+    entropy->EOBRUN++;		/* count an EOB */
+    entropy->BE += BR;		/* concat my correction bits to older ones */
+    /* We force out the EOB if we risk either:
+     * 1. overflow of the EOB counter;
+     * 2. overflow of the correction bit buffer during the next MCU.
+     */
+    if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1))
+      emit_eobrun(entropy);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed progressive scan.
+ */
+
+METHODDEF(void)
+finish_pass_phuff (j_compress_ptr cinfo)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Flush out any buffered data */
+  emit_eobrun(entropy);
+  flush_bits(entropy);
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather_phuff (j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info * compptr;
+  JHUFF_TBL **htblptr;
+  boolean did[NUM_HUFF_TBLS];
+
+  /* Flush out buffered data (all we care about is counting the EOB symbol) */
+  emit_eobrun(entropy);
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  MEMZERO(did, SIZEOF(did));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)	/* DC refinement needs no table */
+	continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      tbl = compptr->ac_tbl_no;
+    }
+    if (! did[tbl]) {
+      if (is_DC_band)
+        htblptr = & cinfo->dc_huff_tbl_ptrs[tbl];
+      else
+        htblptr = & cinfo->ac_huff_tbl_ptrs[tbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
+      did[tbl] = TRUE;
+    }
+  }
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_encoder (j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(phuff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  entropy->pub.start_pass = start_pass_phuff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+    entropy->count_ptrs[i] = NULL;
+  }
+  entropy->bit_buffer = NULL;	/* needed only in AC refinement scan */
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/src/libjpeg/jcprepct.c b/src/libjpeg/jcprepct.c
new file mode 100644
index 0000000..fa93333
--- /dev/null
+++ b/src/libjpeg/jcprepct.c
@@ -0,0 +1,354 @@
+/*
+ * jcprepct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the compression preprocessing controller.
+ * This controller manages the color conversion, downsampling,
+ * and edge expansion steps.
+ *
+ * Most of the complexity here is associated with buffering input rows
+ * as required by the downsampler.  See the comments at the head of
+ * jcsample.c for the downsampler's needs.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* At present, jcsample.c can request context rows only for smoothing.
+ * In the future, we might also need context rows for CCIR601 sampling
+ * or other more-complex downsampling procedures.  The code to support
+ * context rows should be compiled only if needed.
+ */
+#ifdef INPUT_SMOOTHING_SUPPORTED
+#define CONTEXT_ROWS_SUPPORTED
+#endif
+
+
+/*
+ * For the simple (no-context-row) case, we just need to buffer one
+ * row group's worth of pixels for the downsampling step.  At the bottom of
+ * the image, we pad to a full row group by replicating the last pixel row.
+ * The downsampler's last output row is then replicated if needed to pad
+ * out to a full iMCU row.
+ *
+ * When providing context rows, we must buffer three row groups' worth of
+ * pixels.  Three row groups are physically allocated, but the row pointer
+ * arrays are made five row groups high, with the extra pointers above and
+ * below "wrapping around" to point to the last and first real row groups.
+ * This allows the downsampler to access the proper context rows.
+ * At the top and bottom of the image, we create dummy context rows by
+ * copying the first or last real pixel row.  This copying could be avoided
+ * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
+ * trouble on the compression side.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_prep_controller pub; /* public fields */
+
+  /* Downsampling input buffer.  This buffer holds color-converted data
+   * until we have enough to do a downsample step.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  JDIMENSION rows_to_go;	/* counts rows remaining in source image */
+  int next_buf_row;		/* index of next row to store in color_buf */
+
+#ifdef CONTEXT_ROWS_SUPPORTED	/* only needed for context case */
+  int this_row_group;		/* starting row index of group to process */
+  int next_buf_stop;		/* downsample when we reach this index */
+#endif
+} my_prep_controller;
+
+typedef my_prep_controller * my_prep_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_prep (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Initialize total-height counter for detecting bottom of image */
+  prep->rows_to_go = cinfo->image_height;
+  /* Mark the conversion buffer empty */
+  prep->next_buf_row = 0;
+#ifdef CONTEXT_ROWS_SUPPORTED
+  /* Preset additional state variables for context mode.
+   * These aren't used in non-context mode, so we needn't test which mode.
+   */
+  prep->this_row_group = 0;
+  /* Set next_buf_stop to stop after two row groups have been read in. */
+  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
+#endif
+}
+
+
+/*
+ * Expand an image vertically from height input_rows to height output_rows,
+ * by duplicating the bottom row.
+ */
+
+LOCAL(void)
+expand_bottom_edge (JSAMPARRAY image_data, JDIMENSION num_cols,
+		    int input_rows, int output_rows)
+{
+  register int row;
+
+  for (row = input_rows; row < output_rows; row++) {
+    jcopy_sample_rows(image_data, input_rows-1, image_data, row,
+		      1, num_cols);
+  }
+}
+
+
+/*
+ * Process some data in the simple no-context case.
+ *
+ * Preprocessor output data is counted in "row groups".  A row group
+ * is defined to be v_samp_factor sample rows of each component.
+ * Downsampling will produce this much data from each max_v_samp_factor
+ * input rows.
+ */
+
+METHODDEF(void)
+pre_process_data (j_compress_ptr cinfo,
+		  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		  JDIMENSION in_rows_avail,
+		  JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		  JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  JDIMENSION inrows;
+  jpeg_component_info * compptr;
+
+  while (*in_row_ctr < in_rows_avail &&
+	 *out_row_group_ctr < out_row_groups_avail) {
+    /* Do color conversion to fill the conversion buffer. */
+    inrows = in_rows_avail - *in_row_ctr;
+    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
+    numrows = (int) MIN((JDIMENSION) numrows, inrows);
+    (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+				       prep->color_buf,
+				       (JDIMENSION) prep->next_buf_row,
+				       numrows);
+    *in_row_ctr += numrows;
+    prep->next_buf_row += numrows;
+    prep->rows_to_go -= numrows;
+    /* If at bottom of image, pad to fill the conversion buffer. */
+    if (prep->rows_to_go == 0 &&
+	prep->next_buf_row < cinfo->max_v_samp_factor) {
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+	expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			   prep->next_buf_row, cinfo->max_v_samp_factor);
+      }
+      prep->next_buf_row = cinfo->max_v_samp_factor;
+    }
+    /* If we've filled the conversion buffer, empty it. */
+    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf, (JDIMENSION) 0,
+					output_buf, *out_row_group_ctr);
+      prep->next_buf_row = 0;
+      (*out_row_group_ctr)++;
+    }
+    /* If at bottom of image, pad the output to a full iMCU height.
+     * Note we assume the caller is providing a one-iMCU-height output buffer!
+     */
+    if (prep->rows_to_go == 0 &&
+	*out_row_group_ctr < out_row_groups_avail) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	   ci++, compptr++) {
+	expand_bottom_edge(output_buf[ci],
+			   compptr->width_in_blocks * DCTSIZE,
+			   (int) (*out_row_group_ctr * compptr->v_samp_factor),
+			   (int) (out_row_groups_avail * compptr->v_samp_factor));
+      }
+      *out_row_group_ctr = out_row_groups_avail;
+      break;			/* can exit outer loop without test */
+    }
+  }
+}
+
+
+#ifdef CONTEXT_ROWS_SUPPORTED
+
+/*
+ * Process some data in the context case.
+ */
+
+METHODDEF(void)
+pre_process_context (j_compress_ptr cinfo,
+		     JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+		     JDIMENSION in_rows_avail,
+		     JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+		     JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int numrows, ci;
+  int buf_height = cinfo->max_v_samp_factor * 3;
+  JDIMENSION inrows;
+
+  while (*out_row_group_ctr < out_row_groups_avail) {
+    if (*in_row_ctr < in_rows_avail) {
+      /* Do color conversion to fill the conversion buffer. */
+      inrows = in_rows_avail - *in_row_ctr;
+      numrows = prep->next_buf_stop - prep->next_buf_row;
+      numrows = (int) MIN((JDIMENSION) numrows, inrows);
+      (*cinfo->cconvert->color_convert) (cinfo, input_buf + *in_row_ctr,
+					 prep->color_buf,
+					 (JDIMENSION) prep->next_buf_row,
+					 numrows);
+      /* Pad at top of image, if first time through */
+      if (prep->rows_to_go == cinfo->image_height) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  int row;
+	  for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+	    jcopy_sample_rows(prep->color_buf[ci], 0,
+			      prep->color_buf[ci], -row,
+			      1, cinfo->image_width);
+	  }
+	}
+      }
+      *in_row_ctr += numrows;
+      prep->next_buf_row += numrows;
+      prep->rows_to_go -= numrows;
+    } else {
+      /* Return for more data, unless we are at the bottom of the image. */
+      if (prep->rows_to_go != 0)
+	break;
+      /* When at bottom of image, pad to fill the conversion buffer. */
+      if (prep->next_buf_row < prep->next_buf_stop) {
+	for (ci = 0; ci < cinfo->num_components; ci++) {
+	  expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+			     prep->next_buf_row, prep->next_buf_stop);
+	}
+	prep->next_buf_row = prep->next_buf_stop;
+      }
+    }
+    /* If we've gotten enough data, downsample a row group. */
+    if (prep->next_buf_row == prep->next_buf_stop) {
+      (*cinfo->downsample->downsample) (cinfo,
+					prep->color_buf,
+					(JDIMENSION) prep->this_row_group,
+					output_buf, *out_row_group_ctr);
+      (*out_row_group_ctr)++;
+      /* Advance pointers with wraparound as necessary. */
+      prep->this_row_group += cinfo->max_v_samp_factor;
+      if (prep->this_row_group >= buf_height)
+	prep->this_row_group = 0;
+      if (prep->next_buf_row >= buf_height)
+	prep->next_buf_row = 0;
+      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
+    }
+  }
+}
+
+
+/*
+ * Create the wrapped-around downsampling input buffer needed for context mode.
+ */
+
+LOCAL(void)
+create_context_buffer (j_compress_ptr cinfo)
+{
+  my_prep_ptr prep = (my_prep_ptr) cinfo->prep;
+  int rgroup_height = cinfo->max_v_samp_factor;
+  int ci, i;
+  jpeg_component_info * compptr;
+  JSAMPARRAY true_buffer, fake_buffer;
+
+  /* Grab enough space for fake row pointers for all the components;
+   * we need five row groups' worth of pointers for each component.
+   */
+  fake_buffer = (JSAMPARRAY)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(cinfo->num_components * 5 * rgroup_height) *
+				SIZEOF(JSAMPROW));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate the actual buffer space (3 row groups) for this component.
+     * We make the buffer wide enough to allow the downsampler to edge-expand
+     * horizontally within the buffer, if it so chooses.
+     */
+    true_buffer = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+		      cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+       (JDIMENSION) (3 * rgroup_height));
+    /* Copy true buffer row pointers into the middle of the fake row array */
+    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
+	    3 * rgroup_height * SIZEOF(JSAMPROW));
+    /* Fill in the above and below wraparound pointers */
+    for (i = 0; i < rgroup_height; i++) {
+      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
+      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
+    }
+    prep->color_buf[ci] = fake_buffer + rgroup_height;
+    fake_buffer += 5 * rgroup_height; /* point to space for next component */
+  }
+}
+
+#endif /* CONTEXT_ROWS_SUPPORTED */
+
+
+/*
+ * Initialize preprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_prep_ptr prep;
+  int ci;
+  jpeg_component_info * compptr;
+
+  if (need_full_buffer)		/* safety check */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  prep = (my_prep_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_prep_controller));
+  cinfo->prep = (struct jpeg_c_prep_controller *) prep;
+  prep->pub.start_pass = start_pass_prep;
+
+  /* Allocate the color conversion buffer.
+   * We make the buffer wide enough to allow the downsampler to edge-expand
+   * horizontally within the buffer, if it so chooses.
+   */
+  if (cinfo->downsample->need_context_rows) {
+    /* Set up to provide context rows */
+#ifdef CONTEXT_ROWS_SUPPORTED
+    prep->pub.pre_process_data = pre_process_context;
+    create_context_buffer(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* No context, just make it tall enough for one row group */
+    prep->pub.pre_process_data = pre_process_data;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      prep->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE *
+			cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/src/libjpeg/jcsample.c b/src/libjpeg/jcsample.c
new file mode 100644
index 0000000..212ec87
--- /dev/null
+++ b/src/libjpeg/jcsample.c
@@ -0,0 +1,519 @@
+/*
+ * jcsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains downsampling routines.
+ *
+ * Downsampling input data is counted in "row groups".  A row group
+ * is defined to be max_v_samp_factor pixel rows of each component,
+ * from which the downsampler produces v_samp_factor sample rows.
+ * A single row group is processed in each call to the downsampler module.
+ *
+ * The downsampler is responsible for edge-expansion of its output data
+ * to fill an integral number of DCT blocks horizontally.  The source buffer
+ * may be modified if it is helpful for this purpose (the source buffer is
+ * allocated wide enough to correspond to the desired output width).
+ * The caller (the prep controller) is responsible for vertical padding.
+ *
+ * The downsampler may request "context rows" by setting need_context_rows
+ * during startup.  In this case, the input arrays will contain at least
+ * one row group's worth of pixels above and below the passed-in data;
+ * the caller will create dummy rows at image top and bottom by replicating
+ * the first or last real pixel row.
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ *
+ * The downsampling algorithm used here is a simple average of the source
+ * pixels covered by the output pixel.  The hi-falutin sampling literature
+ * refers to this as a "box filter".  In general the characteristics of a box
+ * filter are not very good, but for the specific cases we normally use (1:1
+ * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
+ * nearly so bad.  If you intend to use other sampling ratios, you'd be well
+ * advised to improve this code.
+ *
+ * A simple input-smoothing capability is provided.  This is mainly intended
+ * for cleaning up color-dithered GIF input files (if you find it inadequate,
+ * we suggest using an external filtering program such as pnmconvol).  When
+ * enabled, each input pixel P is replaced by a weighted sum of itself and its
+ * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
+ * where SF = (smoothing_factor / 1024).
+ * Currently, smoothing is only supported for 2h2v sampling factors.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to downsample a single component */
+typedef JMETHOD(void, downsample1_ptr,
+		(j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_downsampler pub;	/* public fields */
+
+  /* Downsampling method pointers, one per component */
+  downsample1_ptr methods[MAX_COMPONENTS];
+} my_downsampler;
+
+typedef my_downsampler * my_downsample_ptr;
+
+
+/*
+ * Initialize for a downsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_downsample (j_compress_ptr cinfo)
+{
+  /* no work for now */
+}
+
+
+/*
+ * Expand a component horizontally from width input_cols to width output_cols,
+ * by duplicating the rightmost samples.
+ */
+
+LOCAL(void)
+expand_right_edge (JSAMPARRAY image_data, int num_rows,
+		   JDIMENSION input_cols, JDIMENSION output_cols)
+{
+  register JSAMPROW ptr;
+  register JSAMPLE pixval;
+  register int count;
+  int row;
+  int numcols = (int) (output_cols - input_cols);
+
+  if (numcols > 0) {
+    for (row = 0; row < num_rows; row++) {
+      ptr = image_data[row] + input_cols;
+      pixval = ptr[-1];		/* don't need GETJSAMPLE() here */
+      for (count = numcols; count > 0; count--)
+	*ptr++ = pixval;
+    }
+  }
+}
+
+
+/*
+ * Do downsampling for a whole row group (all components).
+ *
+ * In this version we simply downsample each component independently.
+ */
+
+METHODDEF(void)
+sep_downsample (j_compress_ptr cinfo,
+		JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+		JSAMPIMAGE output_buf, JDIMENSION out_row_group_index)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JSAMPARRAY in_ptr, out_ptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    in_ptr = input_buf[ci] + in_row_index;
+    out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
+    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * One row group is processed per call.
+ * This version handles arbitrary integral sampling ratios, without smoothing.
+ * Note that this version is not actually used for customary sampling ratios.
+ */
+
+METHODDEF(void)
+int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
+  JDIMENSION outcol, outcol_h;	/* outcol_h == outcol*h_expand */
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  JSAMPROW inptr, outptr;
+  INT32 outvalue;
+
+  h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
+  v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
+  numpix = h_expand * v_expand;
+  numpix2 = numpix/2;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * h_expand);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    for (outcol = 0, outcol_h = 0; outcol < output_cols;
+	 outcol++, outcol_h += h_expand) {
+      outvalue = 0;
+      for (v = 0; v < v_expand; v++) {
+	inptr = input_data[inrow+v] + outcol_h;
+	for (h = 0; h < h_expand; h++) {
+	  outvalue += (INT32) GETJSAMPLE(*inptr++);
+	}
+      }
+      *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix);
+    }
+    inrow += v_expand;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  /* Copy the data */
+  jcopy_sample_rows(input_data, 0, output_data, 0,
+		    cinfo->max_v_samp_factor, cinfo->image_width);
+  /* Edge-expand */
+  expand_right_edge(output_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, compptr->width_in_blocks * DCTSIZE);
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
+ * without smoothing.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    bias = 0;			/* bias = 0,1,0,1,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1])
+			      + bias) >> 1);
+      bias ^= 1;		/* 0=>1, 1=>0 */
+      inptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION outcol;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr0, inptr1, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor,
+		    cinfo->image_width, output_cols * 2);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    bias = 1;			/* bias = 1,2,1,2,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+			      GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1])
+			      + bias) >> 2);
+      bias ^= 3;		/* 1=>2, 2=>1 */
+      inptr0 += 2; inptr1 += 2;
+    }
+    inrow += 2;
+  }
+}
+
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+			JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols * 2);
+
+  /* We don't bother to form the individual "smoothed" input pixel values;
+   * we can directly compute the output which is the average of the four
+   * smoothed values.  Each of the four member pixels contributes a fraction
+   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
+   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
+   * output.  The four corner-adjacent neighbor pixels contribute a fraction
+   * SF to just one smoothed pixel, or SF/4 to the final output; while the
+   * eight edge-adjacent neighbors contribute SF to each of two smoothed
+   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
+   * factors are scaled by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
+  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow+1];
+    above_ptr = input_data[inrow-1];
+    below_ptr = input_data[inrow+2];
+
+    /* Special case for first column: pretend column -1 is same as column 0 */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) +
+	       GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) +
+		GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      /* sum of pixels directly mapped to this output element */
+      membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		  GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+      /* sum of edge-neighbor pixels */
+      neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+		 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+		 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) +
+		 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]);
+      /* The edge-neighbors count twice as much as corner-neighbors */
+      neighsum += neighsum;
+      /* Add in the corner-neighbors */
+      neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) +
+		  GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]);
+      /* form final output scaled up by 2^16 */
+      membersum = membersum * memberscale + neighsum * neighscale;
+      /* round, descale and output it */
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) +
+		GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]);
+    neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) +
+	       GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) +
+	       GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) +
+	       GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]);
+    neighsum += neighsum;
+    neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) +
+		GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]);
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+    inrow += 2;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr,
+			    JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION colctr;
+  JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE;
+  register JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  INT32 membersum, neighsum, memberscale, neighscale;
+  int colsum, lastcolsum, nextcolsum;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+		    cinfo->image_width, output_cols);
+
+  /* Each of the eight neighbor pixels contributes a fraction SF to the
+   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
+   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
+  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    above_ptr = input_data[outrow-1];
+    below_ptr = input_data[outrow+1];
+
+    /* Special case for first column */
+    colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) +
+	     GETJSAMPLE(*inptr);
+    membersum = GETJSAMPLE(*inptr++);
+    nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		 GETJSAMPLE(*inptr);
+    neighsum = colsum + (colsum - membersum) + nextcolsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+    lastcolsum = colsum; colsum = nextcolsum;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      membersum = GETJSAMPLE(*inptr++);
+      above_ptr++; below_ptr++;
+      nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) +
+		   GETJSAMPLE(*inptr);
+      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
+      membersum = membersum * memberscale + neighsum * neighscale;
+      *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16);
+      lastcolsum = colsum; colsum = nextcolsum;
+    }
+
+    /* Special case for last column */
+    membersum = GETJSAMPLE(*inptr);
+    neighsum = lastcolsum + (colsum - membersum) + colsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (JSAMPLE) ((membersum + 32768) >> 16);
+
+  }
+}
+
+#endif /* INPUT_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Module initialization routine for downsampling.
+ * Note that we must select a routine for each component.
+ */
+
+GLOBAL(void)
+jinit_downsampler (j_compress_ptr cinfo)
+{
+  my_downsample_ptr downsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean smoothok = TRUE;
+
+  downsample = (my_downsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_downsampler));
+  cinfo->downsample = (struct jpeg_downsampler *) downsample;
+  downsample->pub.start_pass = start_pass_downsample;
+  downsample->pub.downsample = sep_downsample;
+  downsample->pub.need_context_rows = FALSE;
+
+  if (cinfo->CCIR601_sampling)
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, and set up method pointers */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
+	compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = fullsize_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = fullsize_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+	       compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+      smoothok = FALSE;
+      downsample->methods[ci] = h2v1_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+	       compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+	downsample->methods[ci] = h2v2_smooth_downsample;
+	downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+	downsample->methods[ci] = h2v2_downsample;
+    } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
+	       (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+      smoothok = FALSE;
+      downsample->methods[ci] = int_downsample;
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+  }
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  if (cinfo->smoothing_factor && !smoothok)
+    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
+#endif
+}
diff --git a/src/libjpeg/jctrans.c b/src/libjpeg/jctrans.c
new file mode 100644
index 0000000..0e6d707
--- /dev/null
+++ b/src/libjpeg/jctrans.c
@@ -0,0 +1,388 @@
+/*
+ * jctrans.c
+ *
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+LOCAL(void) transencode_coef_controller
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+			       j_compress_ptr dstinfo)
+{
+  JQUANT_TBL ** qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   */
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+      MEMCOPY((*qtblptr)->quantval,
+	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
+	      SIZEOF((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+	     MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's Huffman table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   * We will *not*, however, copy version info from mislabeled "2.01" files.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection (j_compress_ptr cinfo,
+			      jvirt_barray_ptr * coef_arrays)
+{
+  /* Although we don't actually use input_components for transcoding,
+   * jcmaster.c's initial_setup will complain if input_components is 0.
+   */
+  cinfo->input_components = 1;
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr * whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yindex+yoffset < compptr->last_row_height) {
+	    /* Fill in pointers to real blocks in this row */
+	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	    for (xindex = 0; xindex < blockcnt; xindex++)
+	      MCU_buffer[blkn++] = buffer_ptr++;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = 0;
+	  }
+	  /* Fill in any dummy blocks needed in this row.
+	   * Dummy blocks are filled in the same way as in jccoefct.c:
+	   * all zeroes in the AC entries, DC entries equal to previous
+	   * block's DC value.  The init routine has already zeroed the
+	   * AC entries, so we need only set the DC entries correctly.
+	   */
+	  for (; xindex < compptr->MCU_width; xindex++) {
+	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+	    blkn++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller (j_compress_ptr cinfo,
+			     jvirt_barray_ptr * coef_arrays)
+{
+  my_coef_ptr coef;
+  JBLOCKROW buffer;
+  int i;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Allocate and pre-zero space for dummy DCT blocks. */
+  buffer = (JBLOCKROW)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+    coef->dummy_buffer[i] = buffer + i;
+  }
+}
diff --git a/src/libjpeg/jdapimin.c b/src/libjpeg/jdapimin.c
new file mode 100644
index 0000000..cadb59f
--- /dev/null
+++ b/src/libjpeg/jdapimin.c
@@ -0,0 +1,395 @@
+/*
+ * jdapimin.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-decompression case or the
+ * transcoding-only case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+ * shared by compression and decompression, and jdtrans.c for the transcoding
+ * case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Initialization of a JPEG decompression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateDecompress (j_decompress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;		/* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != SIZEOF(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE, 
+	     (int) SIZEOF(struct jpeg_decompress_struct), (int) structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr * err = cinfo->err;
+    void * client_data = cinfo->client_data; /* ignore Purify complaint here */
+    MEMZERO(cinfo, SIZEOF(struct jpeg_decompress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = TRUE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr) cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->src = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Initialize marker processor so application can override methods
+   * for COM, APPn markers before calling jpeg_read_header.
+   */
+  cinfo->marker_list = NULL;
+  jinit_marker_reader(cinfo);
+
+  /* And initialize the overall input controller. */
+  jinit_input_controller(cinfo);
+
+  /* OK, I'm ready */
+  cinfo->global_state = DSTATE_START;
+}
+
+
+/*
+ * Destruction of a JPEG decompression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG decompression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_decompress (j_decompress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr) cinfo); /* use common routine */
+}
+
+
+/*
+ * Set default decompression parameters.
+ */
+
+LOCAL(void)
+default_decompress_parms (j_decompress_ptr cinfo)
+{
+  /* Guess the input colorspace, and set output colorspace accordingly. */
+  /* (Wish JPEG committee had provided a real way to specify this...) */
+  /* Note application may override our guesses. */
+  switch (cinfo->num_components) {
+  case 1:
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+    break;
+    
+  case 3:
+    if (cinfo->saw_JFIF_marker) {
+      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
+    } else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_RGB;
+	break;
+      case 1:
+	cinfo->jpeg_color_space = JCS_YCbCr;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+	break;
+      }
+    } else {
+      /* Saw no special markers, try to guess from the component IDs */
+      int cid0 = cinfo->comp_info[0].component_id;
+      int cid1 = cinfo->comp_info[1].component_id;
+      int cid2 = cinfo->comp_info[2].component_id;
+
+      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
+	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+      else {
+	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+      }
+    }
+    /* Always guess RGB is proper output colorspace. */
+    cinfo->out_color_space = JCS_RGB;
+    break;
+    
+  case 4:
+    if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+	cinfo->jpeg_color_space = JCS_CMYK;
+	break;
+      case 2:
+	cinfo->jpeg_color_space = JCS_YCCK;
+	break;
+      default:
+	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+	break;
+      }
+    } else {
+      /* No special markers, assume straight CMYK. */
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+    break;
+    
+  default:
+    cinfo->jpeg_color_space = JCS_UNKNOWN;
+    cinfo->out_color_space = JCS_UNKNOWN;
+    break;
+  }
+
+  /* Set defaults for other decompression parameters. */
+  cinfo->scale_num = 1;		/* 1:1 scaling */
+  cinfo->scale_denom = 1;
+  cinfo->output_gamma = 1.0;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  /* We set these in case application only sets quantize_colors. */
+  cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+  cinfo->two_pass_quantize = TRUE;
+#else
+  cinfo->two_pass_quantize = FALSE;
+#endif
+  cinfo->desired_number_of_colors = 256;
+  cinfo->colormap = NULL;
+  /* Initialize for no mode change in buffered-image mode. */
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+ * Decompression startup: read start of JPEG datastream to see what's there.
+ * Need only initialize JPEG object and supply a data source before calling.
+ *
+ * This routine will read as far as the first SOS marker (ie, actual start of
+ * compressed data), and will save all tables and parameters in the JPEG
+ * object.  It will also initialize the decompression parameters to default
+ * values, and finally return JPEG_HEADER_OK.  On return, the application may
+ * adjust the decompression parameters and then call jpeg_start_decompress.
+ * (Or, if the application only wanted to determine the image parameters,
+ * the data need not be decompressed.  In that case, call jpeg_abort or
+ * jpeg_destroy to release any temporary space.)
+ * If an abbreviated (tables only) datastream is presented, the routine will
+ * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+ * re-use the JPEG object to read the abbreviated image datastream(s).
+ * It is unnecessary (but OK) to call jpeg_abort in this case.
+ * The JPEG_SUSPENDED return code only occurs if the data source module
+ * requests suspension of the decompressor.  In this case the application
+ * should load more source data and then re-call jpeg_read_header to resume
+ * processing.
+ * If a non-suspending data source is used and require_image is TRUE, then the
+ * return code need not be inspected since only JPEG_HEADER_OK is possible.
+ *
+ * This routine is now just a front end to jpeg_consume_input, with some
+ * extra error checking.
+ */
+
+GLOBAL(int)
+jpeg_read_header (j_decompress_ptr cinfo, boolean require_image)
+{
+  int retcode;
+
+  if (cinfo->global_state != DSTATE_START &&
+      cinfo->global_state != DSTATE_INHEADER)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  retcode = jpeg_consume_input(cinfo);
+
+  switch (retcode) {
+  case JPEG_REACHED_SOS:
+    retcode = JPEG_HEADER_OK;
+    break;
+  case JPEG_REACHED_EOI:
+    if (require_image)		/* Complain if application wanted an image */
+      ERREXIT(cinfo, JERR_NO_IMAGE);
+    /* Reset to start state; it would be safer to require the application to
+     * call jpeg_abort, but we can't change it now for compatibility reasons.
+     * A side effect is to free any temporary memory (there shouldn't be any).
+     */
+    jpeg_abort((j_common_ptr) cinfo); /* sets state = DSTATE_START */
+    retcode = JPEG_HEADER_TABLES_ONLY;
+    break;
+  case JPEG_SUSPENDED:
+    /* no work */
+    break;
+  }
+
+  return retcode;
+}
+
+
+/*
+ * Consume data in advance of what the decompressor requires.
+ * This can be called at any time once the decompressor object has
+ * been created and a data source has been set up.
+ *
+ * This routine is essentially a state machine that handles a couple
+ * of critical state-transition actions, namely initial setup and
+ * transition from header scanning to ready-for-start_decompress.
+ * All the actual input is done via the input controller's consume_input
+ * method.
+ */
+
+GLOBAL(int)
+jpeg_consume_input (j_decompress_ptr cinfo)
+{
+  int retcode = JPEG_SUSPENDED;
+
+  /* NB: every possible DSTATE value should be listed in this switch */
+  switch (cinfo->global_state) {
+  case DSTATE_START:
+    /* Start-of-datastream actions: reset appropriate modules */
+    (*cinfo->inputctl->reset_input_controller) (cinfo);
+    /* Initialize application's data source module */
+    (*cinfo->src->init_source) (cinfo);
+    cinfo->global_state = DSTATE_INHEADER;
+    /*FALLTHROUGH*/
+  case DSTATE_INHEADER:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+      /* Set up default parameters based on header data */
+      default_decompress_parms(cinfo);
+      /* Set global state: ready for start_decompress */
+      cinfo->global_state = DSTATE_READY;
+    }
+    break;
+  case DSTATE_READY:
+    /* Can't advance past first SOS until start_decompress is called */
+    retcode = JPEG_REACHED_SOS;
+    break;
+  case DSTATE_PRELOAD:
+  case DSTATE_PRESCAN:
+  case DSTATE_SCANNING:
+  case DSTATE_RAW_OK:
+  case DSTATE_BUFIMAGE:
+  case DSTATE_BUFPOST:
+  case DSTATE_STOPPING:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  return retcode;
+}
+
+
+/*
+ * Have we finished reading the input file?
+ */
+
+GLOBAL(boolean)
+jpeg_input_complete (j_decompress_ptr cinfo)
+{
+  /* Check for valid jpeg object */
+  if (cinfo->global_state < DSTATE_START ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+ * Is there more than one scan?
+ */
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans (j_decompress_ptr cinfo)
+{
+  /* Only valid after jpeg_read_header completes */
+  if (cinfo->global_state < DSTATE_READY ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+ * Finish JPEG decompression.
+ *
+ * This will normally just verify the file trailer and release temp storage.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_decompress (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && ! cinfo->buffered_image) {
+    /* Terminate final pass of non-buffered mode */
+    if (cinfo->output_scanline < cinfo->output_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+    /* Finishing after a buffered-image operation */
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state != DSTATE_STOPPING) {
+    /* STOPPING = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read until EOI */
+  while (! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  /* Do final cleanup */
+  (*cinfo->src->term_source) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr) cinfo);
+  return TRUE;
+}
diff --git a/src/libjpeg/jdapistd.c b/src/libjpeg/jdapistd.c
new file mode 100644
index 0000000..c8e3fa0
--- /dev/null
+++ b/src/libjpeg/jdapistd.c
@@ -0,0 +1,275 @@
+/*
+ * jdapistd.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_start_decompress (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+	int retcode;
+	/* Call progress monitor hook if present */
+	if (cinfo->progress != NULL)
+	  (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+	/* Absorb some more input */
+	retcode = (*cinfo->inputctl->consume_input) (cinfo);
+	if (retcode == JPEG_SUSPENDED)
+	  return FALSE;
+	if (retcode == JPEG_REACHED_EOI)
+	  break;
+	/* Advance progress counter if appropriate */
+	if (cinfo->progress != NULL &&
+	    (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	  if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	    /* jdmaster underestimated number of scans; ratchet up one scan */
+	    cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	  }
+	}
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+	cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+	cinfo->progress->pass_limit = (long) cinfo->output_height;
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      (*cinfo->main->process_data) (cinfo, (JSAMPARRAY) NULL,
+				    &cinfo->output_scanline, (JDIMENSION) 0);
+      if (cinfo->output_scanline == last_scanline)
+	return FALSE;		/* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * jpeg_read_scanlines or jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_scanlines (j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+		     JDIMENSION max_lines)
+{
+  JDIMENSION row_ctr;
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+}
+
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data,
+		    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long) cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long) cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (! (*cinfo->coef->decompress_data) (cinfo, data))
+    return 0;			/* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output (j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached &&
+      scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output (j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;		/* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
diff --git a/src/libjpeg/jdatadst.c b/src/libjpeg/jdatadst.c
new file mode 100644
index 0000000..ccd9283
--- /dev/null
+++ b/src/libjpeg/jdatadst.c
@@ -0,0 +1,151 @@
+/*
+ * jdatadst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to a file (or any stdio stream).  While these routines
+ * are sufficient for most applications, some will want to use a different
+ * destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data destination object for stdio output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  FILE * outfile;		/* target stream */
+  JOCTET * buffer;		/* start of buffer */
+} my_destination_mgr;
+
+typedef my_destination_mgr * my_dest_ptr;
+
+#define OUTPUT_BUF_SIZE  4096	/* choose an efficiently fwrite'able size */
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  /* Allocate the output buffer --- it will be released when done with image */
+  dest->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  OUTPUT_BUF_SIZE * SIZEOF(JOCTET));
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_output_buffer (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+
+  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+      (size_t) OUTPUT_BUF_SIZE)
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_destination (j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr) cinfo->dest;
+  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
+
+  /* Write any data remaining in the buffer */
+  if (datacount > 0) {
+    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+  }
+  JFFLUSH(dest->outfile);
+  /* Make sure we wrote the output file OK */
+  if (JFERROR(dest->outfile))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+
+/*
+ * Prepare for output to a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing compression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile)
+{
+  my_dest_ptr dest;
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same file without re-executing jpeg_stdio_dest.
+   * This makes it dangerous to use this manager and a different destination
+   * manager serially with the same JPEG object, because their private object
+   * sizes may be different.  Caveat programmer.
+   */
+  if (cinfo->dest == NULL) {	/* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_destination_mgr));
+  }
+
+  dest = (my_dest_ptr) cinfo->dest;
+  dest->pub.init_destination = init_destination;
+  dest->pub.empty_output_buffer = empty_output_buffer;
+  dest->pub.term_destination = term_destination;
+  dest->outfile = outfile;
+}
diff --git a/src/libjpeg/jdatasrc.c b/src/libjpeg/jdatasrc.c
new file mode 100644
index 0000000..edc752b
--- /dev/null
+++ b/src/libjpeg/jdatasrc.c
@@ -0,0 +1,212 @@
+/*
+ * jdatasrc.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from a file (or any stdio stream).  While these routines
+ * are sufficient for most applications, some will want to use a different
+ * source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+typedef struct {
+  struct jpeg_source_mgr pub;	/* public fields */
+
+  FILE * infile;		/* source stream */
+  JOCTET * buffer;		/* start of buffer */
+  boolean start_of_file;	/* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr * my_src_ptr;
+
+#define INPUT_BUF_SIZE  4096	/* choose an efficiently fread'able size */
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer (j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+  size_t nbytes;
+
+  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)	/* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET) 0xFF;
+    src->buffer[1] = (JOCTET) JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data (j_decompress_ptr cinfo, long num_bytes)
+{
+  my_src_ptr src = (my_src_ptr) cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long) src->pub.bytes_in_buffer) {
+      num_bytes -= (long) src->pub.bytes_in_buffer;
+      (void) fill_input_buffer(cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->pub.next_input_byte += (size_t) num_bytes;
+    src->pub.bytes_in_buffer -= (size_t) num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile)
+{
+  my_src_ptr src;
+
+  /* The source object and input buffer are made permanent so that a series
+   * of JPEG images can be read from the same file by calling jpeg_stdio_src
+   * only before the first one.  (If we discarded the buffer at the end of
+   * one image, we'd likely lose the start of the next one.)
+   * This makes it unsafe to use this manager and a different source
+   * manager serially with the same JPEG object.  Caveat programmer.
+   */
+  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  SIZEOF(my_source_mgr));
+    src = (my_src_ptr) cinfo->src;
+    src->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				  INPUT_BUF_SIZE * SIZEOF(JOCTET));
+  }
+
+  src = (my_src_ptr) cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
diff --git a/src/libjpeg/jdcoefct.c b/src/libjpeg/jdcoefct.c
new file mode 100644
index 0000000..4938d20
--- /dev/null
+++ b/src/libjpeg/jdcoefct.c
@@ -0,0 +1,736 @@
+/*
+ * jdcoefct.c
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * (On 80x86, the workspace is FAR even though it's not really very big;
+   * this is to keep the module interfaces unchanged when a large coefficient
+   * buffer is necessary.)
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int * coef_bits_latch;
+#define SAVED_COEFS  6		/* we save coef_bits[0..5] */
+#endif
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok JPP((j_decompress_ptr cinfo));
+METHODDEF(int) decompress_smooth_data
+	JPP((j_decompress_ptr cinfo, JSAMPIMAGE output_buf));
+#endif
+
+
+LOCAL(void)
+start_iMCU_row (j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass (j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub.decompress_data = decompress_smooth_data;
+    else
+      coef->pub.decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+	 MCU_col_num++) {
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      jzero_far((void FAR *) coef->MCU_buffer[0],
+		(size_t) (cinfo->blocks_in_MCU * SIZEOF(JBLOCK)));
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+      /* Determine where data should go in output_buf and do the IDCT thing.
+       * We skip dummy blocks at the right and bottom edges (but blkn gets
+       * incremented past them!).  Note the inner loop relies on having
+       * allocated the MCU_buffer[] blocks sequentially.
+       */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	/* Don't bother to IDCT an uninteresting component. */
+	if (! compptr->component_needed) {
+	  blkn += compptr->MCU_blocks;
+	  continue;
+	}
+	inverse_DCT = cinfo->idct->inverse_DCT[compptr->component_index];
+	useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						    : compptr->last_col_width;
+	output_ptr = output_buf[compptr->component_index] +
+	  yoffset * compptr->DCT_scaled_size;
+	start_col = MCU_col_num * compptr->MCU_sample_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (cinfo->input_iMCU_row < last_iMCU_row ||
+	      yoffset+yindex < compptr->last_row_height) {
+	    output_col = start_col;
+	    for (xindex = 0; xindex < useful_width; xindex++) {
+	      (*inverse_DCT) (cinfo, compptr,
+			      (JCOEFPTR) coef->MCU_buffer[blkn+xindex],
+			      output_ptr, output_col);
+	      output_col += compptr->DCT_scaled_size;
+	    }
+	  }
+	  blkn += compptr->MCU_width;
+	  output_ptr += compptr->DCT_scaled_size;
+	}
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data (j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;	/* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	  for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+	    coef->MCU_buffer[blkn++] = buffer_ptr++;
+	  }
+	}
+      }
+      /* Try to fetch the MCU. */
+      if (! (*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->MCU_ctr = MCU_col_num;
+	return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+	 (cinfo->input_scan_number == cinfo->output_scan_number &&
+	  cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      output_col = 0;
+      for (block_num = 0; block_num < compptr->width_in_blocks; block_num++) {
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr,
+			output_ptr, output_col);
+	buffer_ptr++;
+	output_col += compptr->DCT_scaled_size;
+      }
+      output_ptr += compptr->DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing as described by section K.8
+ * of the JPEG standard: the first 5 AC coefficients are estimated from
+ * the DC values of a DCT block and its 8 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 5 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok (j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtable;
+  int * coef_bits;
+  int * coef_bits_latch;
+
+  if (! cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  cinfo->num_components *
+				  (SAVED_COEFS * SIZEOF(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 5 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+	qtable->quantval[Q01_POS] == 0 ||
+	qtable->quantval[Q10_POS] == 0 ||
+	qtable->quantval[Q20_POS] == 0 ||
+	qtable->quantval[Q11_POS] == 0 ||
+	qtable->quantval[Q02_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi <= 5; coefi++) {
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+	smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_block_row, next_block_row;
+  JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  inverse_DCT_method_ptr inverse_DCT;
+  boolean first_row, last_row;
+  JBLOCK workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  INT32 Q00,Q01,Q02,Q10,Q11,Q20, num;
+  int DC1,DC2,DC3,DC4,DC5,DC6,DC7,DC8,DC9;
+  int Al, pred;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+	 ! cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep one row ahead so that next block row's DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 1 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row+delta)
+	break;
+    }
+    if ((*cinfo->inputctl->consume_input)(cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (! compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+      last_row = FALSE;
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+      last_row = TRUE;
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+	 (JDIMENSION) access_rows, FALSE);
+      buffer += compptr->v_samp_factor;	/* point to current iMCU row */
+      first_row = FALSE;
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+	((j_common_ptr) cinfo, coef->whole_image[ci],
+	 (JDIMENSION) 0, (JDIMENSION) access_rows, FALSE);
+      first_row = TRUE;
+    }
+    /* Fetch component-dependent info */
+    coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    inverse_DCT = cinfo->idct->inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row];
+      if (first_row && block_row == 0)
+	prev_block_row = buffer_ptr;
+      else
+	prev_block_row = buffer[block_row-1];
+      if (last_row && block_row == block_rows-1)
+	next_block_row = buffer_ptr;
+      else
+	next_block_row = buffer[block_row+1];
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all nine here so as to do the right thing on narrow pics.
+       */
+      DC1 = DC2 = DC3 = (int) prev_block_row[0][0];
+      DC4 = DC5 = DC6 = (int) buffer_ptr[0][0];
+      DC7 = DC8 = DC9 = (int) next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = 0; block_num <= last_block_column; block_num++) {
+	/* Fetch current DCT block into workspace so we can modify it. */
+	jcopy_block_row(buffer_ptr, (JBLOCKROW) workspace, (JDIMENSION) 1);
+	/* Update DC values */
+	if (block_num < last_block_column) {
+	  DC3 = (int) prev_block_row[1][0];
+	  DC6 = (int) buffer_ptr[1][0];
+	  DC9 = (int) next_block_row[1][0];
+	}
+	/* Compute coefficient estimates per K.8.
+	 * An estimate is applied only if coefficient is still zero,
+	 * and is not known to be fully accurate.
+	 */
+	/* AC01 */
+	if ((Al=coef_bits[1]) != 0 && workspace[1] == 0) {
+	  num = 36 * Q00 * (DC4 - DC6);
+	  if (num >= 0) {
+	    pred = (int) (((Q01<<7) + num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q01<<7) - num) / (Q01<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[1] = (JCOEF) pred;
+	}
+	/* AC10 */
+	if ((Al=coef_bits[2]) != 0 && workspace[8] == 0) {
+	  num = 36 * Q00 * (DC2 - DC8);
+	  if (num >= 0) {
+	    pred = (int) (((Q10<<7) + num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q10<<7) - num) / (Q10<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[8] = (JCOEF) pred;
+	}
+	/* AC20 */
+	if ((Al=coef_bits[3]) != 0 && workspace[16] == 0) {
+	  num = 9 * Q00 * (DC2 + DC8 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q20<<7) + num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q20<<7) - num) / (Q20<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[16] = (JCOEF) pred;
+	}
+	/* AC11 */
+	if ((Al=coef_bits[4]) != 0 && workspace[9] == 0) {
+	  num = 5 * Q00 * (DC1 - DC3 - DC7 + DC9);
+	  if (num >= 0) {
+	    pred = (int) (((Q11<<7) + num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q11<<7) - num) / (Q11<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[9] = (JCOEF) pred;
+	}
+	/* AC02 */
+	if ((Al=coef_bits[5]) != 0 && workspace[2] == 0) {
+	  num = 9 * Q00 * (DC4 + DC6 - 2*DC5);
+	  if (num >= 0) {
+	    pred = (int) (((Q02<<7) + num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	  } else {
+	    pred = (int) (((Q02<<7) - num) / (Q02<<8));
+	    if (Al > 0 && pred >= (1<<Al))
+	      pred = (1<<Al)-1;
+	    pred = -pred;
+	  }
+	  workspace[2] = (JCOEF) pred;
+	}
+	/* OK, do the IDCT */
+	(*inverse_DCT) (cinfo, compptr, (JCOEFPTR) workspace,
+			output_ptr, output_col);
+	/* Advance for next column */
+	DC1 = DC2; DC2 = DC3;
+	DC4 = DC5; DC5 = DC6;
+	DC7 = DC8; DC8 = DC9;
+	buffer_ptr++, prev_block_row++, next_block_row++;
+	output_col += compptr->DCT_scaled_size;
+      }
+      output_ptr += compptr->DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_coef_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *) coef;
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+	access_rows *= 3;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, TRUE,
+	 (JDIMENSION) jround_up((long) compptr->width_in_blocks,
+				(long) compptr->h_samp_factor),
+	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
+				(long) compptr->v_samp_factor),
+	 (JDIMENSION) access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub.decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  D_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub.decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/src/libjpeg/jdcolor.c b/src/libjpeg/jdcolor.c
new file mode 100644
index 0000000..6c04dfe
--- /dev/null
+++ b/src/libjpeg/jdcolor.c
@@ -0,0 +1,396 @@
+/*
+ * jdcolor.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+} my_color_deconverter;
+
+typedef my_color_deconverter * my_cconvert_ptr;
+
+
+/**************** YCbCr -> RGB conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *	R = Y                + 1.40200 * Cr
+ *	G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *	B = Y + 1.77200 * Cb
+ * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
+ * values for the G calculation are left scaled up, since we must add them
+ * together before rounding.
+ */
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert (j_decompress_ptr cinfo,
+		 JSAMPIMAGE input_buf, JDIMENSION input_row,
+		 JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/**************** Cases other than YCbCr -> RGB **************/
+
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ */
+
+METHODDEF(void)
+null_convert (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION input_row,
+	      JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION count;
+  register int num_components = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->output_width;
+  int ci;
+
+  while (--num_rows >= 0) {
+    for (ci = 0; ci < num_components; ci++) {
+      inptr = input_buf[ci][input_row];
+      outptr = output_buf[0] + ci;
+      for (count = num_cols; count > 0; count--) {
+	*outptr = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+	outptr += num_components;
+      }
+    }
+    input_row++;
+    output_buf++;
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCbCr -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  jcopy_sample_rows(input_buf[0], (int) input_row, output_buf, 0,
+		    num_rows, cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+METHODDEF(void)
+gray_rgb_convert (j_decompress_ptr cinfo,
+		  JSAMPIMAGE input_buf, JDIMENSION input_row,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Adobe-style YCCK->CMYK conversion.
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert (j_decompress_ptr cinfo,
+		   JSAMPIMAGE input_buf, JDIMENSION input_row,
+		   JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
+      outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS)))];
+      outptr[2] = range_limit[MAXJSAMPLE - (y + Cbbtab[cb])];	/* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];	/* don't need GETJSAMPLE here */
+      outptr += 4;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor (j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+jinit_color_deconverter (j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci;
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_color_deconverter));
+  cinfo->cconvert = (struct jpeg_color_deconverter *) cconvert;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:			/* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+  }
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also clear the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
+	cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+	cinfo->comp_info[ci].component_needed = FALSE;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+      cconvert->pub.color_convert = gray_rgb_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+    cinfo->out_color_components = 4;
+    if (cinfo->jpeg_color_space == JCS_YCCK) {
+      cconvert->pub.color_convert = ycck_cmyk_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      cconvert->pub.color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:
+    /* Permit null conversion to same output space */
+    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
+      cinfo->out_color_components = cinfo->num_components;
+      cconvert->pub.color_convert = null_convert;
+    } else			/* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
diff --git a/src/libjpeg/jdct.h b/src/libjpeg/jdct.h
new file mode 100644
index 0000000..04192a2
--- /dev/null
+++ b/src/libjpeg/jdct.h
@@ -0,0 +1,176 @@
+/*
+ * jdct.h
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease 
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+
+/*
+ * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+ * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
+ * for 8-bit samples, INT32 for 12-bit samples.  (NOTE: Floating-point DCT
+ * implementations use an array of type FAST_FLOAT, instead.)
+ * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE).
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef int DCTELEM;		/* 16 or 32 bits is fine */
+#else
+typedef INT32 DCTELEM;		/* must have 32 bits */
+#endif
+
+typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
+typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_scaled_size * DCT_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2	/* fractional bits in scale factors */
+#else
+typedef INT32 IFAST_MULT_TYPE;	/* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13	/* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0..MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define IDCT_range_limit(cinfo)  ((cinfo)->sample_range_limit + CENTERJSAMPLE)
+
+#define RANGE_MASK  (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_fdct_islow		jFDislow
+#define jpeg_fdct_ifast		jFDifast
+#define jpeg_fdct_float		jFDfloat
+#define jpeg_idct_islow		jRDislow
+#define jpeg_idct_ifast		jRDifast
+#define jpeg_idct_float		jRDfloat
+#define jpeg_idct_4x4		jRD4x4
+#define jpeg_idct_2x2		jRD2x2
+#define jpeg_idct_1x1		jRD1x1
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data));
+EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data));
+
+EXTERN(void) jpeg_idct_islow
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_ifast
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_float
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_4x4
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_2x2
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+EXTERN(void) jpeg_idct_1x1
+    JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	 JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col));
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type INT32.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE	((INT32) 1)
+#define CONST_SCALE (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)	((INT32) ((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round an INT32 value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT16) (const)))
+#endif
+#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var,const)  (((INT16) (var)) * ((INT32) (const)))
+#endif
+
+#ifndef MULTIPLY16C16		/* default definition */
+#define MULTIPLY16C16(var,const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1,var2)  (((INT16) (var1)) * ((INT16) (var2)))
+#endif
+
+#ifndef MULTIPLY16V16		/* default definition */
+#define MULTIPLY16V16(var1,var2)  ((var1) * (var2))
+#endif
diff --git a/src/libjpeg/jddctmgr.c b/src/libjpeg/jddctmgr.c
new file mode 100644
index 0000000..bbf8d0e
--- /dev/null
+++ b/src/libjpeg/jddctmgr.c
@@ -0,0 +1,269 @@
+/*
+ * jddctmgr.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;	/* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller * my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr) cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch (compptr->DCT_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case 1:
+      method_ptr = jpeg_idct_1x1;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+    case 2:
+      method_ptr = jpeg_idct_2x2;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+    case 4:
+      method_ptr = jpeg_idct_4x4;
+      method = JDCT_ISLOW;	/* jidctred uses islow-style table */
+      break;
+#endif
+    case DCTSIZE:
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+	method_ptr = jpeg_idct_islow;
+	method = JDCT_ISLOW;
+	break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+	method_ptr = jpeg_idct_ifast;
+	method = JDCT_IFAST;
+	break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+	method_ptr = jpeg_idct_float;
+	method = JDCT_FLOAT;
+	break;
+#endif
+      default:
+	ERREXIT(cinfo, JERR_NOT_COMPILED);
+	break;
+      }
+      break;
+    default:
+      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size);
+      break;
+    }
+    idct->pub.inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (! compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)		/* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+	/* For LL&M IDCT method, multipliers are equal to raw quantization
+	 * coefficients, but are stored as ints to ensure access efficiency.
+	 */
+	ISLOW_MULT_TYPE * ismtbl = (ISLOW_MULT_TYPE *) compptr->dct_table;
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ismtbl[i] = (ISLOW_MULT_TYPE) qtbl->quantval[i];
+	}
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+	/* For AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 * For integer operation, the multiplier table is to be scaled by
+	 * IFAST_SCALE_BITS.
+	 */
+	IFAST_MULT_TYPE * ifmtbl = (IFAST_MULT_TYPE *) compptr->dct_table;
+#define CONST_BITS 14
+	static const INT16 aanscales[DCTSIZE2] = {
+	  /* precomputed values scaled up by 14 bits */
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+	};
+	SHIFT_TEMPS
+
+	for (i = 0; i < DCTSIZE2; i++) {
+	  ifmtbl[i] = (IFAST_MULT_TYPE)
+	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
+				  (INT32) aanscales[i]),
+		    CONST_BITS-IFAST_SCALE_BITS);
+	}
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+	/* For float AA&N IDCT method, multipliers are equal to quantization
+	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
+	 *   scalefactor[0] = 1
+	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+	 */
+	FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table;
+	int row, col;
+	static const double aanscalefactor[DCTSIZE] = {
+	  1.0, 1.387039845, 1.306562965, 1.175875602,
+	  1.0, 0.785694958, 0.541196100, 0.275899379
+	};
+
+	i = 0;
+	for (row = 0; row < DCTSIZE; row++) {
+	  for (col = 0; col < DCTSIZE; col++) {
+	    fmtbl[i] = (FLOAT_MULT_TYPE)
+	      ((double) qtbl->quantval[i] *
+	       aanscalefactor[row] * aanscalefactor[col]);
+	    i++;
+	  }
+	}
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+jinit_inverse_dct (j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_idct_controller));
+  cinfo->idct = (struct jpeg_inverse_dct *) idct;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(multiplier_table));
+    MEMZERO(compptr->dct_table, SIZEOF(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/src/libjpeg/jdhuff.c b/src/libjpeg/jdhuff.c
new file mode 100644
index 0000000..b960874
--- /dev/null
+++ b/src/libjpeg/jdhuff.c
@@ -0,0 +1,684 @@
+/*
+ * jdhuff.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
+  boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder * huff_entropy_ptr;
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci, blkn, dctbl, actbl;
+  jpeg_component_info * compptr;
+
+  /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+   * This ought to be an error condition, but we make it a warning because
+   * there are some baseline files out there with all zeroes in these bytes.
+   */
+  if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2-1 ||
+      cinfo->Ah != 0 || cinfo->Al != 0)
+    WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    /* Compute derived values for Huffman tables */
+    /* We may do this more than once for a table, but it's not expensive */
+    jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl,
+			    & entropy->dc_derived_tbls[dctbl]);
+    jpeg_make_d_derived_tbl(cinfo, FALSE, actbl,
+			    & entropy->ac_derived_tbls[actbl]);
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Precalculate decoding info for each block in an MCU of this scan */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    /* Precalculate which table to use for each block */
+    entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+    entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
+    /* Decide whether we really care about the coefficient values */
+    if (compptr->component_needed) {
+      entropy->dc_needed[blkn] = TRUE;
+      /* we don't need the ACs if producing a 1/8th-size image */
+      entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+    } else {
+      entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jdphuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
+			 d_derived_tbl ** pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;		/* fill in back link */
+  
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int) htbl->bits[l];
+    if (i < 0 || p + i > 256)	/* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char) l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+  
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+  
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int) huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((INT32) code) >= (((INT32) 1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
+    }
+  }
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int) htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
+	dtbl->look_nbits[lookbits] = l;
+	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15.
+   * (Tighter bounds could be applied depending on the data depth and mode,
+   * but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > 15)
+	ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching (shared with jdphuff.c).
+ * See jdhuff.h for info about usage.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15	/* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE-7)
+#endif
+
+
+GLOBAL(boolean)
+jpeg_fill_bit_buffer (bitread_working_state * state,
+		      register bit_buf_type get_buffer, register int bits_left,
+		      int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET * next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {	/* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+	if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	  return FALSE;
+	next_input_byte = cinfo->src->next_input_byte;
+	bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = GETJOCTET(*next_input_byte++);
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+	/* Loop here to discard any padding FF's on terminating marker,
+	 * so that we can save a valid unread_marker value.  NOTE: we will
+	 * accept multiple FF's followed by a 0 as meaning a single FF data
+	 * byte.  This data pattern is not valid according to the standard.
+	 */
+	do {
+	  if (bytes_in_buffer == 0) {
+	    if (! (*cinfo->src->fill_input_buffer) (cinfo))
+	      return FALSE;
+	    next_input_byte = cinfo->src->next_input_byte;
+	    bytes_in_buffer = cinfo->src->bytes_in_buffer;
+	  }
+	  bytes_in_buffer--;
+	  c = GETJOCTET(*next_input_byte++);
+	} while (c == 0xFF);
+
+	if (c == 0) {
+	  /* Found FF/00, which represents an FF data byte */
+	  c = 0xFF;
+	} else {
+	  /* Oops, it's actually a marker indicating end of compressed data.
+	   * Save the marker code for later use.
+	   * Fine point: it might appear that we should save the marker into
+	   * bitread working state, not straight into permanent state.  But
+	   * once we have hit a marker, we cannot need to suspend within the
+	   * current MCU, because we will read no more bytes from the data
+	   * source.  So it is OK to update permanent state right away.
+	   */
+	  cinfo->unread_marker = c;
+	  /* See if we need to insert some fake zero bits. */
+	  goto no_more_bytes;
+	}
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+  no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (! cinfo->entropy->insufficient_data) {
+	WARNMS(cinfo, JWRN_HIT_MARKER);
+	cinfo->entropy->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ * See jdhuff.h for info about usage.
+ */
+
+GLOBAL(int)
+jpeg_huff_decode (bitread_working_state * state,
+		  register bit_buf_type get_buffer, register int bits_left,
+		  d_derived_tbl * htbl, int min_bits)
+{
+  register int l = min_bits;
+  register INT32 code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16 in the JPEG spec. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;			/* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ];
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Decode and return one MCU's worth of Huffman-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * Returns FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * this module, since we'll just re-assign them on the next call.)
+ */
+
+METHODDEF(boolean)
+decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  int blkn;
+  BITREAD_STATE_VARS;
+  savable_state state;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      JBLOCKROW block = MCU_data[blkn];
+      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+      register int s, k, r;
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      if (entropy->dc_needed[blkn]) {
+	/* Convert DC difference to actual value, update last_dc_val */
+	int ci = cinfo->MCU_membership[blkn];
+	s += state.last_dc_val[ci];
+	state.last_dc_val[ci] = s;
+	/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+	(*block)[0] = (JCOEF) s;
+      }
+
+      if (entropy->ac_needed[blkn]) {
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* Since zeroes are skipped, output area must be cleared beforehand */
+	for (k = 1; k < DCTSIZE2; k++) {
+	  HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
+      
+	  r = s >> 4;
+	  s &= 15;
+      
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    r = GET_BITS(s);
+	    s = HUFF_EXTEND(r, s);
+	    /* Output coefficient in natural (dezigzagged) order.
+	     * Note: the extra entries in jpeg_natural_order[] will save us
+	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
+	     */
+	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+	  } else {
+	    if (r != 15)
+	      break;
+	    k += 15;
+	  }
+	}
+
+      } else {
+
+	/* Section F.2.2.2: decode the AC coefficients */
+	/* In this path we just discard the values */
+	for (k = 1; k < DCTSIZE2; k++) {
+	  HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+      
+	  r = s >> 4;
+	  s &= 15;
+      
+	  if (s) {
+	    k += r;
+	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	    DROP_BITS(s);
+	  } else {
+	    if (r != 15)
+	      break;
+	    k += 15;
+	  }
+	}
+
+      }
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(huff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.decode_mcu = decode_mcu;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+  }
+}
+
+/*
+ * BEWARE OF KLUDGE:  This subroutine is a hack for decoding illegal JPEG-in-
+ *                    TIFF encapsulations produced by Microsoft's Wang Imaging
+ * for Windows application with the public-domain TIFF Library.  Based upon an
+ * examination of selected output files, this program apparently divides a JPEG
+ * bit-stream into consecutive horizontal TIFF "strips", such that the JPEG
+ * encoder's/decoder's DC coefficients for each image component are reset before
+ * each "strip".  Moreover, a "strip" is not necessarily encoded in a multiple
+ * of 8 bits, so one must sometimes discard 1-7 bits at the end of each "strip"
+ * for alignment to the next input-Byte storage boundary.  IJG JPEG Library
+ * decoder state is not normally exposed to client applications, so this sub-
+ * routine provides the TIFF Library with a "hook" to make these corrections.
+ * It should be called after "jpeg_start_decompress()" and before
+ * "jpeg_finish_decompress()", just before decoding each "strip" using
+ * "jpeg_read_raw_data()" or "jpeg_read_scanlines()".
+ *
+ * This kludge is not sanctioned or supported by the Independent JPEG Group, and
+ * future changes to the IJG JPEG Library might invalidate it.  Do not send bug
+ * reports about this code to IJG developers.  Instead, contact the author for
+ * advice: Scott B. Marovich <marovich@hpl.hp.com>, Hewlett-Packard Labs, 6/01.
+ */
+GLOBAL(void)
+jpeg_reset_huff_decode (register j_decompress_ptr cinfo,register float *refbw)
+{ register huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  register int ci = 0;
+
+  /* Re-initialize DC predictions */
+  do entropy->saved.last_dc_val[ci] = -refbw[ci << 1];
+  while (++ci < cinfo->comps_in_scan);
+  /* Discard encoded input bits, up to the next Byte boundary */
+  entropy->bitstate.bits_left &= ~7;
+}
diff --git a/src/libjpeg/jdhuff.h b/src/libjpeg/jdhuff.h
new file mode 100644
index 0000000..ae19b6c
--- /dev/null
+++ b/src/libjpeg/jdhuff.h
@@ -0,0 +1,201 @@
+/*
+ * jdhuff.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains declarations for Huffman entropy decoding routines
+ * that are shared between the sequential decoder (jdhuff.c) and the
+ * progressive decoder (jdphuff.c).  No other modules need to see these.
+ */
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_make_d_derived_tbl	jMkDDerived
+#define jpeg_fill_bit_buffer	jFilBitBuf
+#define jpeg_huff_decode	jHufDecode
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD	8	/* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from these tables.
+   */
+  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
+  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+} d_derived_tbl;
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_d_derived_tbl
+	JPP((j_decompress_ptr cinfo, boolean isDC, int tblno,
+	     d_derived_tbl ** pdtbl));
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {		/* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {		/* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET * next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;	/* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;	/* current bit-extraction buffer */
+  int bits_left;		/* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;	/* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS  \
+	register bit_buf_type get_buffer;  \
+	register int bits_left;  \
+	bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop,permstate)  \
+	br_state.cinfo = cinfop; \
+	br_state.next_input_byte = cinfop->src->next_input_byte; \
+	br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+	get_buffer = permstate.get_buffer; \
+	bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop,permstate)  \
+	cinfop->src->next_input_byte = br_state.next_input_byte; \
+	cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+	permstate.get_buffer = get_buffer; \
+	permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *	CHECK_BIT_BUFFER(state,n,action);
+ *		Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *		Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *		Fetch next N bits without removing them from the buffer.
+ *	DROP_BITS(n);
+ *		Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state,nbits,action) \
+	{ if (bits_left < (nbits)) {  \
+	    if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits))  \
+	      { action; }  \
+	    get_buffer = (state).get_buffer; bits_left = (state).bits_left; } }
+
+#define GET_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1))
+
+#define PEEK_BITS(nbits) \
+	(((int) (get_buffer >> (bits_left -  (nbits)))) & ((1<<(nbits))-1))
+
+#define DROP_BITS(nbits) \
+	(bits_left -= (nbits))
+
+/* Load up the bit buffer to a depth of at least nbits */
+EXTERN(boolean) jpeg_fill_bit_buffer
+	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+	     register int bits_left, int nbits));
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \
+{ register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1; goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = htbl->look_nbits[look]) != 0) { \
+    DROP_BITS(nb); \
+    result = htbl->look_sym[look]; \
+  } else { \
+    nb = HUFF_LOOKAHEAD+1; \
+slowlabel: \
+    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
+	{ failaction; } \
+    get_buffer = state.get_buffer; bits_left = state.bits_left; \
+  } \
+}
+
+/* Out-of-line case for Huffman code fetching */
+EXTERN(int) jpeg_huff_decode
+	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
+	     register int bits_left, d_derived_tbl * htbl, int min_bits));
diff --git a/src/libjpeg/jdinput.c b/src/libjpeg/jdinput.c
new file mode 100644
index 0000000..0c2ac8f
--- /dev/null
+++ b/src/libjpeg/jdinput.c
@@ -0,0 +1,381 @@
+/*
+ * jdinput.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient decoding).  The actual input
+ * reading is done in jdmarker.c, jdhuff.c, and jdphuff.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  boolean inheaders;		/* TRUE until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller * my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+LOCAL(void)
+initial_setup (j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION ||
+      (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	     MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR ||
+	compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+				   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+				   compptr->v_samp_factor);
+  }
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE.
+   * In the full decompressor, this will be overridden by jdmaster.c;
+   * but in the transcoder, jdmaster.c is not used, so we must do it here.
+   */
+  cinfo->min_DCT_scaled_size = DCTSIZE;
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->DCT_scaled_size = DCTSIZE;
+    /* Size in DCT blocks */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor,
+		    (long) cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor,
+		    (long) cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long) cinfo->image_height,
+		  (long) (cinfo->max_v_samp_factor*DCTSIZE));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+}
+
+
+LOCAL(void)
+per_scan_setup (j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  
+  if (cinfo->comps_in_scan == 1) {
+    
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+    
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->DCT_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+    
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+    
+  } else {
+    
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+	       MAX_COMPS_IN_SCAN);
+    
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width,
+		    (long) (cinfo->max_h_samp_factor*DCTSIZE));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height,
+		    (long) (cinfo->max_v_samp_factor*DCTSIZE));
+    
+    cinfo->blocks_in_MCU = 0;
+    
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int) (compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int) (compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+	ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+	cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+    
+  }
+}
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * The JPEG spec prohibits the encoder from changing the contents of a Q-table
+ * slot between scans of a component using that slot.  If the encoder does so
+ * anyway, this decoder will simply use the Q-table values that were current
+ * at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables (j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL * qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+	cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(JQUANT_TBL));
+    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass (j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient controller after it's read all
+ * the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass (j_decompress_ptr cinfo)
+{
+  cinfo->inputctl->consume_input = consume_markers;
+}
+
+
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient controller's consume_data routine, depending on whether
+ * we are reading a compressed data segment or inter-segment markers.
+ */
+
+METHODDEF(int)
+consume_markers (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  val = (*cinfo->marker->read_markers) (cinfo);
+
+  switch (val) {
+  case JPEG_REACHED_SOS:	/* Found SOS */
+    if (inputctl->inheaders) {	/* 1st SOS */
+      initial_setup(cinfo);
+      inputctl->inheaders = FALSE;
+      /* Note: start_input_pass must be called by jdmaster.c
+       * before any more input can be consumed.  jdapimin.c is
+       * responsible for enforcing this sequencing.
+       */
+    } else {			/* 2nd or later SOS marker */
+      if (! inputctl->pub.has_multiple_scans)
+	ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+      start_input_pass(cinfo);
+    }
+    break;
+  case JPEG_REACHED_EOI:	/* Found EOI */
+    inputctl->pub.eoi_reached = TRUE;
+    if (inputctl->inheaders) {	/* Tables-only datastream, apparently */
+      if (cinfo->marker->saw_SOF)
+	ERREXIT(cinfo, JERR_SOF_NO_SOS);
+    } else {
+      /* Prevent infinite loop in coef ctlr's decompress_data routine
+       * if user set output_scan_number larger than number of scans.
+       */
+      if (cinfo->output_scan_number > cinfo->input_scan_number)
+	cinfo->output_scan_number = cinfo->input_scan_number;
+    }
+    break;
+  case JPEG_SUSPENDED:
+    break;
+  }
+
+  return val;
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller (j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_input_controller));
+  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+}
diff --git a/src/libjpeg/jdmainct.c b/src/libjpeg/jdmainct.c
new file mode 100644
index 0000000..13c956f
--- /dev/null
+++ b/src/libjpeg/jdmainct.c
@@ -0,0 +1,512 @@
+/*
+ * jdmainct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient or
+ * postprocessing controllers.  Nonetheless, the main controller is not
+ * trivial.  Its responsibility is to provide context rows for upsampling/
+ * rescaling, and doing this in an efficient fashion is a bit tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient controller will deliver data to us one iMCU row at a time;
+ * each iMCU row contains v_samp_factor * DCT_scaled_size sample rows, or
+ * exactly min_DCT_scaled_size row groups.  (This amount of data corresponds
+ * to one row of MCUs when the image is fully interleaved.)  Note that the
+ * number of sample rows varies across components, but the number of row
+ * groups does not.  Some garbage sample rows may be included in the last iMCU
+ * row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient controller and dole it
+ * out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  boolean buffer_full;		/* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;	/* counts row groups output to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  JSAMPIMAGE xbuffer[2];	/* pointers to weird pointer lists */
+
+  int whichptr;			/* indicates which pointer set is now in use */
+  int context_state;		/* process_data state machine status */
+  JDIMENSION rowgroups_avail;	/* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;	/* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller * my_main_ptr;
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU	0	/* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU	1	/* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW	2	/* feeding postponed row group */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+METHODDEF(void) process_data_context_main
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post
+	JPP((j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+	     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail));
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers (j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  main->xbuffer[0] = (JSAMPIMAGE)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
+  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (JSAMPARRAY)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
+    xbuf += rgroup;		/* want one row group at negative offsets */
+    main->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    main->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers (j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in main->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main->xbuffer[0][ci];
+    xbuf1 = main->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = main->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup*(M-2) + i] = buf[rgroup*M + i];
+      xbuf1[rgroup*M + i] = buf[rgroup*(M-2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_wraparound_pointers (j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main->xbuffer[0][ci];
+    xbuf1 = main->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
+      xbuf0[rgroup*(M+2) + i] = xbuf0[i];
+      xbuf1[rgroup*(M+2) + i] = xbuf1[i];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers (j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size;
+    rgroup = iMCUheight / cinfo->min_DCT_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = main->xbuffer[main->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left-1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      main->pub.process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      main->context_state = CTX_PREPARE_FOR_IMCU;
+      main->iMCU_row_ctr = 0;
+    } else {
+      /* Simple case with no context needed */
+      main->pub.process_data = process_data_simple_main;
+    }
+    main->buffer_full = FALSE;	/* Mark buffer empty */
+    main->rowgroup_ctr = 0;
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    main->pub.process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main (j_decompress_ptr cinfo,
+			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			  JDIMENSION out_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+  JDIMENSION rowgroups_avail;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! main->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
+      return;			/* suspension forced, can do nothing more */
+    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+  }
+
+  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
+  rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size;
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->post_process_data) (cinfo, main->buffer,
+				     &main->rowgroup_ctr, rowgroups_avail,
+				     output_buf, out_row_ctr, out_rows_avail);
+
+  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
+  if (main->rowgroup_ctr >= rowgroups_avail) {
+    main->buffer_full = FALSE;
+    main->rowgroup_ctr = 0;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main (j_decompress_ptr cinfo,
+			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail)
+{
+  my_main_ptr main = (my_main_ptr) cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (! main->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo,
+					   main->xbuffer[main->whichptr]))
+      return;			/* suspension forced, can do nothing more */
+    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    main->iMCU_row_ctr++;	/* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (main->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
+			&main->rowgroup_ctr, main->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (main->rowgroup_ctr < main->rowgroups_avail)
+      return;			/* Need to suspend */
+    main->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;			/* Postprocessor exactly filled output buf */
+    /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    main->rowgroup_ctr = 0;
+    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    main->context_state = CTX_PROCESS_IMCU;
+    /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
+			&main->rowgroup_ctr, main->rowgroups_avail,
+			output_buf, out_row_ctr, out_rows_avail);
+    if (main->rowgroup_ctr < main->rowgroups_avail)
+      return;			/* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (main->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    main->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1);
+    main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2);
+    main->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post (j_decompress_ptr cinfo,
+			 JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			 JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->post_process_data) (cinfo, (JSAMPIMAGE) NULL,
+				     (JDIMENSION *) NULL, (JDIMENSION) 0,
+				     output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+  main = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_main_controller));
+  cinfo->main = (struct jpeg_d_main_controller *) main;
+  main->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)		/* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->min_DCT_scaled_size + 2;
+  } else {
+    ngroups = cinfo->min_DCT_scaled_size;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+      cinfo->min_DCT_scaled_size; /* height of a row group of component */
+    main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 compptr->width_in_blocks * compptr->DCT_scaled_size,
+			 (JDIMENSION) (rgroup * ngroups));
+  }
+}
diff --git a/src/libjpeg/jdmarker.c b/src/libjpeg/jdmarker.c
new file mode 100644
index 0000000..f4cca8c
--- /dev/null
+++ b/src/libjpeg/jdmarker.c
@@ -0,0 +1,1360 @@
+/*
+ * jdmarker.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {			/* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+  
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+  
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+  
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+  
+  M_DHT   = 0xc4,
+  
+  M_DAC   = 0xcc,
+  
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+  
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+  
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+  
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+  
+  M_TEM   = 0x01,
+  
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;	/* NULL if not processing a marker */
+  unsigned int bytes_read;		/* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader * my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo)  \
+	struct jpeg_source_mgr * datasrc = (cinfo)->src;  \
+	const JOCTET * next_input_byte = datasrc->next_input_byte;  \
+	size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo)  \
+	( datasrc->next_input_byte = next_input_byte,  \
+	  datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo)  \
+	( next_input_byte = datasrc->next_input_byte,  \
+	  bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo,action)  \
+	if (bytes_in_buffer == 0) {  \
+	  if (! (*datasrc->fill_input_buffer) (cinfo))  \
+	    { action; }  \
+	  INPUT_RELOAD(cinfo);  \
+	}
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = GETJOCTET(*next_input_byte++); )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps INT32.
+ */
+#define INPUT_2BYTES(cinfo,V,action)  \
+	MAKESTMT( MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V = ((unsigned int) GETJOCTET(*next_input_byte++)) << 8; \
+		  MAKE_BYTE_AVAIL(cinfo,action); \
+		  bytes_in_buffer--; \
+		  V += GETJOCTET(*next_input_byte++); )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi (j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+  
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith)
+/* Process a SOFn marker */
+{
+  INT32 length;
+  int c, ci;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  cinfo->progressive_mode = is_prog;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+	   (int) cinfo->image_width, (int) cinfo->image_height,
+	   cinfo->num_components);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
+      || cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL)	/* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *) (*cinfo->mem->alloc_small)
+			((j_common_ptr) cinfo, JPOOL_IMAGE,
+			 cinfo->num_components * SIZEOF(jpeg_component_info));
+  
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, compptr->component_id, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+	     compptr->component_id, compptr->h_samp_factor,
+	     compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos (j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  INT32 length;
+  int i, ci, n, c, cc;
+  jpeg_component_info * compptr;
+  INPUT_VARS(cinfo);
+
+  if (! cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOS_NO_SOF);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n < 1 || n > MAX_COMPS_IN_SCAN)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, cc, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      if (cc == compptr->component_id)
+	goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+
+  id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+    
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
+	     compptr->dc_tbl_no, compptr->ac_tbl_no);
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+	   cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another SOS marker */
+  cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac (j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  INT32 length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2*NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index-NUM_ARITH_TBLS] = (UINT8) val;
+    } else {			/* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8) (val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8) (val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+	ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* ! D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht (j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  INT32 length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+      
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[1], bits[2], bits[3], bits[4],
+	     bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+	     bits[9], bits[10], bits[11], bits[12],
+	     bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((INT32) count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    length -= count;
+
+    if (index & 0x10) {		/* AC table definition */
+      index -= 0x10;
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {			/* DC table definition */
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (index < 0 || index >= NUM_HUFF_TBLS)
+      ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo);
+  
+    MEMCOPY((*htblptr)->bits, bits, SIZEOF((*htblptr)->bits));
+    MEMCOPY((*htblptr)->huffval, huffval, SIZEOF((*htblptr)->huffval));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt (j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  INT32 length;
+  int n, i, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+      
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      if (prec)
+	INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+	INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+	TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+		 quant_ptr->quantval[i],   quant_ptr->quantval[i+1],
+		 quant_ptr->quantval[i+2], quant_ptr->quantval[i+3],
+		 quant_ptr->quantval[i+4], quant_ptr->quantval[i+5],
+		 quant_ptr->quantval[i+6], quant_ptr->quantval[i+7]);
+      }
+    }
+
+    length -= DCTSIZE2+1;
+    if (prec) length -= DCTSIZE2;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri (j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  INT32 length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN	14	/* Length of interesting data in APP0 */
+#define APP14_DATA_LEN	12	/* Length of interesting data in APP14 */
+#define APPN_DATA_LEN	14	/* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	      unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  INT32 totallen = (INT32) datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x49 &&
+      GETJOCTET(data[3]) == 0x46 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = GETJOCTET(data[5]);
+    cinfo->JFIF_minor_version = GETJOCTET(data[6]);
+    cinfo->density_unit = GETJOCTET(data[7]);
+    cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
+    cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
+    /* Check version.
+     * Major version must be 1, anything else signals an incompatible change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+	     cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+	     cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (GETJOCTET(data[12]) | GETJOCTET(data[13]))
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL,
+	       GETJOCTET(data[12]), GETJOCTET(data[13]));
+    totallen -= APP0_DATA_LEN;
+    if (totallen !=
+	((INT32)GETJOCTET(data[12]) * (INT32)GETJOCTET(data[13]) * (INT32) 3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int) totallen);
+  } else if (datalen >= 6 &&
+      GETJOCTET(data[0]) == 0x4A &&
+      GETJOCTET(data[1]) == 0x46 &&
+      GETJOCTET(data[2]) == 0x58 &&
+      GETJOCTET(data[3]) == 0x58 &&
+      GETJOCTET(data[4]) == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (GETJOCTET(data[5])) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int) totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int) totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int) totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION,
+	       GETJOCTET(data[5]), (int) totallen);
+      break;
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int) totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14 (j_decompress_ptr cinfo, JOCTET FAR * data,
+	       unsigned int datalen, INT32 remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      GETJOCTET(data[0]) == 0x41 &&
+      GETJOCTET(data[1]) == 0x64 &&
+      GETJOCTET(data[2]) == 0x6F &&
+      GETJOCTET(data[3]) == 0x62 &&
+      GETJOCTET(data[4]) == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (GETJOCTET(data[5]) << 8) + GETJOCTET(data[6]);
+    flags0 = (GETJOCTET(data[7]) << 8) + GETJOCTET(data[8]);
+    flags1 = (GETJOCTET(data[9]) << 8) + GETJOCTET(data[10]);
+    transform = GETJOCTET(data[11]);
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8) transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int) (datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn (j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  INT32 length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int) length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET FAR *) b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker (j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET FAR * data;
+  INT32 length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {		/* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int) M_COM)
+	limit = marker->length_limit_COM;
+      else
+	limit = marker->length_limit_APPn[cinfo->unread_marker - (int) M_APP0];
+      if ((unsigned int) length < limit)
+	limit = (unsigned int) length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+	(*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				    SIZEOF(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8) cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int) length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET FAR *) (cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);		/* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {	/* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL) {
+      cinfo->marker_list = cur_marker;
+    } else {
+      jpeg_saved_marker_ptr prev = cinfo->marker_list;
+      while (prev->next != NULL)
+	prev = prev->next;
+      prev->next = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+	     (int) (data_length + length));
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable (j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  INT32 length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+  
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int) length);
+
+  INPUT_SYNC(cinfo);		/* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long) length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker (j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;			/* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker (j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int) M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ */
+
+METHODDEF(int)
+read_markers (j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (! cinfo->marker->saw_SOI) {
+	if (! first_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      } else {
+	if (! next_marker(cinfo))
+	  return JPEG_SUSPENDED;
+      }
+    }
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (! get_soi(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:		/* Baseline */
+    case M_SOF1:		/* Extended sequential, Huffman */
+      if (! get_sof(cinfo, FALSE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:		/* Progressive, Huffman */
+      if (! get_sof(cinfo, TRUE, FALSE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:		/* Extended sequential, arithmetic */
+      if (! get_sof(cinfo, FALSE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:		/* Progressive, arithmetic */
+      if (! get_sof(cinfo, TRUE, TRUE))
+	return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF3:		/* Lossless, Huffman */
+    case M_SOF5:		/* Differential sequential, Huffman */
+    case M_SOF6:		/* Differential progressive, Huffman */
+    case M_SOF7:		/* Differential lossless, Huffman */
+    case M_JPG:			/* Reserved for JPEG extensions */
+    case M_SOF11:		/* Lossless, arithmetic */
+    case M_SOF13:		/* Differential sequential, arithmetic */
+    case M_SOF14:		/* Differential progressive, arithmetic */
+    case M_SOF15:		/* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (! get_sos(cinfo))
+	return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_SOS;
+    
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0;	/* processed the marker */
+      return JPEG_REACHED_EOI;
+      
+    case M_DAC:
+      if (! get_dac(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DHT:
+      if (! get_dht(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DQT:
+      if (! get_dqt(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_DRI:
+      if (! get_dri(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_APPn[
+		cinfo->unread_marker - (int) M_APP0]) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+      
+    case M_COM:
+      if (! (*((my_marker_ptr) cinfo->marker)->process_COM) (cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:		/* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:			/* Ignore DNL ... perhaps the wrong thing */
+      if (! skip_variable(cinfo))
+	return JPEG_SUSPENDED;
+      break;
+
+    default:			/* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+      break;
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker (j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (! next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int) M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (! (*cinfo->src->resync_to_restart) (cinfo,
+					    cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart (j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+  
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+  
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int) M_SOF0)
+      action = 2;		/* invalid marker */
+    else if (marker < (int) M_RST0 || marker > (int) M_RST7)
+      action = 3;		/* valid non-restart marker */
+    else {
+      if (marker == ((int) M_RST0 + ((desired+1) & 7)) ||
+	  marker == ((int) M_RST0 + ((desired+2) & 7)))
+	action = 3;		/* one of the next two expected restarts */
+      else if (marker == ((int) M_RST0 + ((desired-1) & 7)) ||
+	       marker == ((int) M_RST0 + ((desired-2) & 7)))
+	action = 2;		/* a prior restart, so advance */
+      else
+	action = 1;		/* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (! next_marker(cinfo))
+	return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  cinfo->comp_info = NULL;		/* until allocated by get_sof */
+  cinfo->input_scan_number = 0;		/* no SOS seen yet */
+  cinfo->unread_marker = 0;		/* no pending marker */
+  marker->pub.saw_SOI = FALSE;		/* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader (j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
+				SIZEOF(my_marker_reader));
+  cinfo->marker = (struct jpeg_marker_reader *) marker;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers (j_decompress_ptr cinfo, int marker_code,
+		   unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - SIZEOF(struct jpeg_marker_struct);
+  if (((long) length_limit) > maxlength)
+    length_limit = (unsigned int) maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int) M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int) M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int) M_APP0 || marker_code == (int) M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int) M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15) {
+    marker->process_APPn[marker_code - (int) M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int) M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor (j_decompress_ptr cinfo, int marker_code,
+			   jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr) cinfo->marker;
+
+  if (marker_code == (int) M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int) M_APP0 && marker_code <= (int) M_APP15)
+    marker->process_APPn[marker_code - (int) M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/src/libjpeg/jdmaster.c b/src/libjpeg/jdmaster.c
new file mode 100644
index 0000000..2802c5b
--- /dev/null
+++ b/src/libjpeg/jdmaster.c
@@ -0,0 +1,557 @@
+/*
+ * jdmaster.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;		/* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer * quantizer_1pass;
+  struct jpeg_color_quantizer * quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master * my_master_ptr;
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample (j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Merging is the equivalent of plain box-filter upsampling */
+  if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
+    return FALSE;
+  /* jdmerge.c only supports YCC=>RGB color conversion */
+  if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
+      cinfo->out_color_space != JCS_RGB ||
+      cinfo->out_color_components != RGB_PIXELSIZE)
+    return FALSE;
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
+      cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size ||
+      cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;			/* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+#endif
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  /* Compute actual output image dimensions and DCT scaling choices. */
+  if (cinfo->scale_num * 8 <= cinfo->scale_denom) {
+    /* Provide 1/8 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 8L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 8L);
+    cinfo->min_DCT_scaled_size = 1;
+  } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) {
+    /* Provide 1/4 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 4L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 4L);
+    cinfo->min_DCT_scaled_size = 2;
+  } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) {
+    /* Provide 1/2 scaling */
+    cinfo->output_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width, 2L);
+    cinfo->output_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height, 2L);
+    cinfo->min_DCT_scaled_size = 4;
+  } else {
+    /* Provide 1/1 scaling */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    cinfo->min_DCT_scaled_size = DCTSIZE;
+  }
+  /* In selecting the actual DCT scaling for each component, we try to
+   * scale up the chroma components via IDCT scaling rather than upsampling.
+   * This saves time if the upsampler gets to use 1:1 scaling.
+   * Note this code assumes that the supported DCT scalings are powers of 2.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int ssize = cinfo->min_DCT_scaled_size;
+    while (ssize < DCTSIZE &&
+	   (compptr->h_samp_factor * ssize * 2 <=
+	    cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) &&
+	   (compptr->v_samp_factor * ssize * 2 <=
+	    cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) {
+      ssize = ssize * 2;
+    }
+    compptr->DCT_scaled_size = ssize;
+  }
+
+  /* Recompute downsampled dimensions of components;
+   * application needs to know these if using raw downsampled data.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Size in samples, after IDCT scaling */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_width *
+		    (long) (compptr->h_samp_factor * compptr->DCT_scaled_size),
+		    (long) (cinfo->max_h_samp_factor * DCTSIZE));
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long) cinfo->image_height *
+		    (long) (compptr->v_samp_factor * compptr->DCT_scaled_size),
+		    (long) (cinfo->max_v_samp_factor * DCTSIZE));
+  }
+
+#else /* !IDCT_SCALING_SUPPORTED */
+
+  /* Hardwire it to "no scaling" */
+  cinfo->output_width = cinfo->image_width;
+  cinfo->output_height = cinfo->image_height;
+  /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
+   * and has computed unscaled downsampled_width and downsampled_height.
+   */
+
+#endif /* IDCT_SCALING_SUPPORTED */
+
+  /* Report number of components in selected colorspace. */
+  /* Probably this should be in the color conversion module... */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+#if RGB_PIXELSIZE != 3
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    break;
+#endif /* else share code with YCbCr */
+  case JCS_YCbCr:
+    cinfo->out_color_components = 3;
+    break;
+  case JCS_CMYK:
+  case JCS_YCCK:
+    cinfo->out_color_components = 4;
+    break;
+  default:			/* else must be same colorspace as in file */
+    cinfo->out_color_components = cinfo->num_components;
+    break;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+			      cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *		x = sample_range_limit[x];
+ * is faster than explicit tests
+ *		if (x < 0)  x = 0;
+ *		else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
+ * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
+ * limiting step (just after the IDCT), a wildly out-of-range value is 
+ * possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *		x = range_limit[x & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * So the post-IDCT limiting table ends up looking like this:
+ *   CENTERJSAMPLE,CENTERJSAMPLE+1,...,MAXJSAMPLE,
+ *   MAXJSAMPLE (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0          (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0,1,...,CENTERJSAMPLE-1
+ * Negative inputs select values from the upper half of the table after
+ * masking.
+ *
+ * We can save some space by overlapping the start of the post-IDCT table
+ * with the simpler range limiting table.  The post-IDCT table begins at
+ * sample_range_limit + CENTERJSAMPLE.
+ *
+ * Note that the table is allocated in near data space on PCs; it's small
+ * enough and used often enough to justify this.
+ */
+
+LOCAL(void)
+prepare_range_limit_table (j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE * table;
+  int i;
+
+  table = (JSAMPLE *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(5 * (MAXJSAMPLE+1) + CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  table += (MAXJSAMPLE+1);	/* allow negative subscripts of simple table */
+  cinfo->sample_range_limit = table;
+  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+  MEMZERO(table - (MAXJSAMPLE+1), (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  /* Main part of "simple" table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    table[i] = (JSAMPLE) i;
+  table += CENTERJSAMPLE;	/* Point to where post-IDCT table starts */
+  /* End of simple table, rest of first half of post-IDCT table */
+  for (i = CENTERJSAMPLE; i < 2*(MAXJSAMPLE+1); i++)
+    table[i] = MAXJSAMPLE;
+  /* Second half of post-IDCT table */
+  MEMZERO(table + (2 * (MAXJSAMPLE+1)),
+	  (2 * (MAXJSAMPLE+1) - CENTERJSAMPLE) * SIZEOF(JSAMPLE));
+  MEMCOPY(table + (4 * (MAXJSAMPLE+1) - CENTERJSAMPLE),
+	  cinfo->sample_range_limit, CENTERJSAMPLE * SIZEOF(JSAMPLE));
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (! cinfo->quantize_colors || ! cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      jinit_1pass_quantizer(cinfo);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      jinit_2pass_quantizer(cinfo);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (! cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      jinit_merged_upsampler(cinfo); /* does color conversion too */
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      jinit_color_deconverter(cinfo);
+      jinit_upsampler(cinfo);
+    }
+    jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+  }
+  /* Inverse DCT */
+  jinit_inverse_dct(cinfo);
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_decoder(cinfo);
+  }
+
+  /* Initialize principal buffer controllers. */
+  use_c_buffer = cinfo->inputctl->has_multiple_scans || cinfo->buffered_image;
+  jinit_d_coef_controller(cinfo, use_c_buffer);
+
+  if (! cinfo->raw_data_out)
+    jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && ! cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+	cinfo->cquantize = master->quantizer_2pass;
+	master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+	cinfo->cquantize = master->quantizer_1pass;
+      } else {
+	ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (! cinfo->raw_data_out) {
+      if (! master->using_merged_upsample)
+	(*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+	(*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+	    (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+				    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && ! cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap (j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr) cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress (j_decompress_ptr cinfo)
+{
+  my_master_ptr master;
+
+  master = (my_master_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(my_decomp_master));
+  cinfo->master = (struct jpeg_decomp_master *) master;
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/src/libjpeg/jdmerge.c b/src/libjpeg/jdmerge.c
new file mode 100644
index 0000000..3744446
--- /dev/null
+++ b/src/libjpeg/jdmerge.c
@@ -0,0 +1,400 @@
+/*
+ * jdmerge.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *	R = Y           + K1 * Cr
+ *	G = Y + K2 * Cb + K3 * Cr
+ *	B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters of the
+ * multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *	YCbCr => RGB color conversion only.
+ *	Sampling ratios of 2h1v or 2h2v.
+ *	No scaling needed at upsample time.
+ *	Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these are
+ * the only common cases.  (For uncommon cases we fall back on the more
+ * general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  JMETHOD(void, upmethod, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+			   JSAMPARRAY output_buf));
+
+  /* Private state for YCC->RGB conversion */
+  int * Cr_r_tab;		/* => table for Cr to R conversion */
+  int * Cb_b_tab;		/* => table for Cb to B conversion */
+  INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
+  INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  JSAMPROW spare_row;
+  boolean spare_full;		/* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;	/* samples per output row */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+#define SCALEBITS	16	/* speediest right-shift on some machines */
+#define ONE_HALF	((INT32) 1 << (SCALEBITS-1))
+#define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  upsample->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  upsample->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    upsample->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    upsample->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;		/* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+    jcopy_sample_rows(& upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+		      1, upsample->out_row_width);
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (! upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+			 output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
+		      JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+jinit_merged_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub.upsample = merged_2v_upsample;
+    upsample->upmethod = h2v2_merged_upsample;
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (JSAMPROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+		(size_t) (upsample->out_row_width * SIZEOF(JSAMPLE)));
+  } else {
+    upsample->pub.upsample = merged_1v_upsample;
+    upsample->upmethod = h2v1_merged_upsample;
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/src/libjpeg/jdphuff.c b/src/libjpeg/jdphuff.c
new file mode 100644
index 0000000..2267809
--- /dev/null
+++ b/src/libjpeg/jdphuff.c
@@ -0,0 +1,668 @@
+/*
+ * jdphuff.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains Huffman entropy decoding routines for progressive JPEG.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"		/* Declarations shared with jdhuff.c */
+
+
+#ifdef D_PROGRESSIVE_SUPPORTED
+
+/*
+ * Expanded entropy decoder object for progressive Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;			/* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];	/* last DC coef for each component */
+} savable_state;
+
+/* This macro is to work around compilers with missing or broken
+ * structure assignment.  You'll need to fix this code if you have
+ * such a compiler and you change MAX_COMPS_IN_SCAN.
+ */
+
+#ifndef NO_STRUCT_ASSIGN
+#define ASSIGN_STATE(dest,src)  ((dest) = (src))
+#else
+#if MAX_COMPS_IN_SCAN == 4
+#define ASSIGN_STATE(dest,src)  \
+	((dest).EOBRUN = (src).EOBRUN, \
+	 (dest).last_dc_val[0] = (src).last_dc_val[0], \
+	 (dest).last_dc_val[1] = (src).last_dc_val[1], \
+	 (dest).last_dc_val[2] = (src).last_dc_val[2], \
+	 (dest).last_dc_val[3] = (src).last_dc_val[3])
+#endif
+#endif
+
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;	/* Bit buffer at start of MCU */
+  savable_state saved;		/* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;	/* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl * derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */
+} phuff_entropy_decoder;
+
+typedef phuff_entropy_decoder * phuff_entropy_ptr;
+
+/* Forward declarations */
+METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo,
+					    JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo,
+					     JBLOCKROW *MCU_data));
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_phuff_decoder (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  boolean is_DC_band, bad;
+  int ci, coefi, tbl;
+  int *coef_bit_ptr;
+  jpeg_component_info * compptr;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* Validate scan parameters */
+  bad = FALSE;
+  if (is_DC_band) {
+    if (cinfo->Se != 0)
+      bad = TRUE;
+  } else {
+    /* need not check Ss/Se < 0 since they came from unsigned bytes */
+    if (cinfo->Ss > cinfo->Se || cinfo->Se >= DCTSIZE2)
+      bad = TRUE;
+    /* AC scans may have only one component */
+    if (cinfo->comps_in_scan != 1)
+      bad = TRUE;
+  }
+  if (cinfo->Ah != 0) {
+    /* Successive approximation refinement scan: must have Al = Ah-1. */
+    if (cinfo->Al != cinfo->Ah-1)
+      bad = TRUE;
+  }
+  if (cinfo->Al > 13)		/* need not check for < 0 */
+    bad = TRUE;
+  /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
+   * but the spec doesn't say so, and we try to be liberal about what we
+   * accept.  Note: large Al values could result in out-of-range DC
+   * coefficients during early scans, leading to bizarre displays due to
+   * overflows in the IDCT math.  But we won't crash.
+   */
+  if (bad)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+	     cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+  /* Update progression status, and verify that scan order is legal.
+   * Note that inter-scan inconsistencies are treated as warnings
+   * not fatal errors ... not clear if this is right way to behave.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    int cindex = cinfo->cur_comp_info[ci]->component_index;
+    coef_bit_ptr = & cinfo->coef_bits[cindex][0];
+    if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+      WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+    for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+      int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+      if (cinfo->Ah != expected)
+	WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+      coef_bit_ptr[coefi] = cinfo->Al;
+    }
+  }
+
+  /* Select MCU decoding routine */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_first;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_refine;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_refine;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Make sure requested tables are present, and compute derived tables.
+     * We may build same derived table more than once, but it's not expensive.
+     */
+    if (is_DC_band) {
+      if (cinfo->Ah == 0) {	/* DC refinement needs no table */
+	tbl = compptr->dc_tbl_no;
+	jpeg_make_d_derived_tbl(cinfo, TRUE, tbl,
+				& entropy->derived_tbls[tbl]);
+      }
+    } else {
+      tbl = compptr->ac_tbl_no;
+      jpeg_make_d_derived_tbl(cinfo, FALSE, tbl,
+			      & entropy->derived_tbls[tbl]);
+      /* remember the single active table */
+      entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize private state variables */
+  entropy->saved.EOBRUN = 0;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#ifdef AVOID_TABLES
+
+#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] =   /* entry n is 2**(n-1) */
+  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+
+static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
+  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
+    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
+    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
+    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (! (*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients. 
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl * tbl;
+  jpeg_component_info * compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(state, entropy->saved);
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+	CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	r = GET_BITS(s);
+	s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF) (s << Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    ASSIGN_STATE(entropy->saved, state);
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;	/* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN > 0)		/* if it's a band of zeroes... */
+      EOBRUN--;			/* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  k += r;
+	  CHECK_BIT_BUFFER(br_state, s, return FALSE);
+	  r = GET_BITS(s);
+	  s = HUFF_EXTEND(r, s);
+	  /* Scale and output coefficient in natural (dezigzagged) order */
+	  (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al);
+	} else {
+	  if (r == 15) {	/* ZRL */
+	    k += 15;		/* skip 15 zeroes in band */
+	  } else {		/* EOBr, run length is 2^r + appended bits */
+	    EOBRUN = 1 << r;
+	    if (r) {		/* EOBr, r > 0 */
+	      CHECK_BIT_BUFFER(br_state, r, return FALSE);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    EOBRUN--;		/* this band is processed at this moment */
+	    break;		/* force end-of-band */
+	  }
+	}
+      }
+
+      BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;	/* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int blkn;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      (*block)[0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{   
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy;
+  int Se = cinfo->Se;
+  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
+  int m1 = (-1) << cinfo->Al;	/* -1 in the bit position being coded */
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl * tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (! process_restart(cinfo))
+	return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (! entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      for (; k <= Se; k++) {
+	HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+	r = s >> 4;
+	s &= 15;
+	if (s) {
+	  if (s != 1)		/* size of new coef should always be 1 */
+	    WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1))
+	    s = p1;		/* newly nonzero coef is positive */
+	  else
+	    s = m1;		/* newly nonzero coef is negative */
+	} else {
+	  if (r != 15) {
+	    EOBRUN = 1 << r;	/* EOBr, run length is 2^r + appended bits */
+	    if (r) {
+	      CHECK_BIT_BUFFER(br_state, r, goto undoit);
+	      r = GET_BITS(r);
+	      EOBRUN += r;
+	    }
+	    break;		/* rest of block is handled by EOB logic */
+	  }
+	  /* note s = 0 for processing ZRL */
+	}
+	/* Advance over already-nonzero coefs and r still-zero coefs,
+	 * appending correction bits to the nonzeroes.  A correction bit is 1
+	 * if the absolute value of the coefficient must be increased.
+	 */
+	do {
+	  thiscoef = *block + jpeg_natural_order[k];
+	  if (*thiscoef != 0) {
+	    CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	    if (GET_BITS(1)) {
+	      if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+		if (*thiscoef >= 0)
+		  *thiscoef += p1;
+		else
+		  *thiscoef += m1;
+	      }
+	    }
+	  } else {
+	    if (--r < 0)
+	      break;		/* reached target zero coefficient */
+	  }
+	  k++;
+	} while (k <= Se);
+	if (s) {
+	  int pos = jpeg_natural_order[k];
+	  /* Output newly nonzero coefficient */
+	  (*block)[pos] = (JCOEF) s;
+	  /* Remember its position in case we have to suspend */
+	  newnz_pos[num_newnz++] = pos;
+	}
+      }
+    }
+
+    if (EOBRUN > 0) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      for (; k <= Se; k++) {
+	thiscoef = *block + jpeg_natural_order[k];
+	if (*thiscoef != 0) {
+	  CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+	  if (GET_BITS(1)) {
+	    if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+	      if (*thiscoef >= 0)
+		*thiscoef += p1;
+	      else
+		*thiscoef += m1;
+	    }
+	  }
+	}
+      }
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz > 0)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_decoder (j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int *coef_bit_ptr;
+  int ci, i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(phuff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *) entropy;
+  entropy->pub.start_pass = start_pass_phuff_decoder;
+
+  /* Mark derived tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+  }
+
+  /* Create progression status table */
+  cinfo->coef_bits = (int (*)[DCTSIZE2])
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				cinfo->num_components*DCTSIZE2*SIZEOF(int));
+  coef_bit_ptr = & cinfo->coef_bits[0][0];
+  for (ci = 0; ci < cinfo->num_components; ci++) 
+    for (i = 0; i < DCTSIZE2; i++)
+      *coef_bit_ptr++ = -1;
+}
+
+#endif /* D_PROGRESSIVE_SUPPORTED */
diff --git a/src/libjpeg/jdpostct.c b/src/libjpeg/jdpostct.c
new file mode 100644
index 0000000..571563d
--- /dev/null
+++ b/src/libjpeg/jdpostct.c
@@ -0,0 +1,290 @@
+/*
+ * jdpostct.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image;	/* virtual array, or NULL if one-pass */
+  JSAMPARRAY buffer;		/* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;	/* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;	/* row # of first row in current strip */
+  JDIMENSION next_row;		/* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller * my_post_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) post_process_1pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) post_process_prepass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+METHODDEF(void) post_process_2pass
+	JPP((j_decompress_ptr cinfo,
+	     JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	     JDIMENSION in_row_groups_avail,
+	     JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	     JDIMENSION out_rows_avail));
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub.post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+	post->buffer = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, post->whole_image,
+	   (JDIMENSION) 0, post->strip_height, TRUE);
+      }
+    } else {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub.post_process_data = cinfo->upsample->upsample;
+    }
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub.post_process_data = post_process_2pass;
+    break;
+#endif /* QUANT_2PASS_SUPPORTED */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_1pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &num_rows, max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer, output_buf + *out_row_ctr, (int) num_rows);
+  *out_row_ctr += num_rows;
+}
+
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass (j_decompress_ptr cinfo,
+		      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		      JDIMENSION in_row_groups_avail,
+		      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		      JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->upsample) (cinfo,
+		input_buf, in_row_group_ctr, in_row_groups_avail,
+		post->buffer, &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->color_quantize) (cinfo, post->buffer + old_next_row,
+					 (JSAMPARRAY) NULL, (int) num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass (j_decompress_ptr cinfo,
+		    JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+		    JDIMENSION in_row_groups_avail,
+		    JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+		    JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr) cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (*cinfo->mem->access_virt_sarray)
+	((j_common_ptr) cinfo, post->whole_image,
+	 post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->color_quantize) (cinfo,
+		post->buffer + post->next_row, output_buf + *out_row_ctr,
+		(int) num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+jinit_d_post_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *) post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;	/* flag for no virtual arrays */
+  post->buffer = NULL;		/* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION) cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 (JDIMENSION) jround_up((long) cinfo->output_height,
+				(long) post->strip_height),
+	 post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 cinfo->output_width * cinfo->out_color_components,
+	 post->strip_height);
+    }
+  }
+}
diff --git a/src/libjpeg/jdsample.c b/src/libjpeg/jdsample.c
new file mode 100644
index 0000000..80ffefb
--- /dev/null
+++ b/src/libjpeg/jdsample.c
@@ -0,0 +1,478 @@
+/*
+ * jdsample.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef JMETHOD(void, upsample1_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;	/* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;		/* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;	/* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler * my_upsample_ptr;
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample (j_decompress_ptr cinfo,
+	      JSAMPIMAGE input_buf, JDIMENSION *in_row_group_ctr,
+	      JDIMENSION in_row_groups_avail,
+	      JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+	      JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+	 ci++, compptr++) {
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+	input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+	upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION) (cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go) 
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->color_convert) (cinfo, upsample->color_buf,
+				     (JDIMENSION) upsample->next_row_out,
+				     output_buf + *out_row_ctr,
+				     (int) num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		   JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This is a no-op version used for "uninteresting" components.
+ * These components will not be referenced by color conversion.
+ */
+
+METHODDEF(void)
+noop_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  *output_data_ptr = NULL;	/* safety check */
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	      JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  register int h;
+  JSAMPROW outend;
+  int h_expand, v_expand;
+  int inrow, outrow;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      for (h = h_expand; h > 0; h--) {
+	*outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+			v_expand-1, cinfo->output_width);
+    }
+    inrow++;
+    outrow += v_expand;
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register JSAMPLE invalue;
+  JSAMPROW outend;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;	/* don't need GETJSAMPLE() here */
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    jcopy_sample_rows(output_data, outrow, output_data, outrow+1,
+		      1, cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+ *
+ * The upsampling algorithm is linear interpolation between pixel centers,
+ * also known as a "triangle filter".  This is a good compromise between
+ * speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+ * of the way between input pixel centers.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr, outptr;
+  register int invalue;
+  register JDIMENSION colctr;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    /* Special case for first column */
+    invalue = GETJSAMPLE(*inptr++);
+    *outptr++ = (JSAMPLE) invalue;
+    *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2);
+
+    for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+      /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
+      invalue = GETJSAMPLE(*inptr++) * 3;
+      *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2);
+      *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(*inptr) + 2) >> 2);
+    }
+
+    /* Special case for last column */
+    invalue = GETJSAMPLE(*inptr);
+    *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2);
+    *outptr++ = (JSAMPLE) invalue;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * Again a triangle filter; see comments for h2v1 case, above.
+ *
+ * It is OK for us to reference the adjacent input rows because we demanded
+ * context from the main buffer controller (see initialization code).
+ */
+
+METHODDEF(void)
+h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		     JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
+{
+  JSAMPARRAY output_data = *output_data_ptr;
+  register JSAMPROW inptr0, inptr1, outptr;
+#if BITS_IN_JSAMPLE == 8
+  register int thiscolsum, lastcolsum, nextcolsum;
+#else
+  register INT32 thiscolsum, lastcolsum, nextcolsum;
+#endif
+  register JDIMENSION colctr;
+  int inrow, outrow, v;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    for (v = 0; v < 2; v++) {
+      /* inptr0 points to nearest input row, inptr1 points to next nearest */
+      inptr0 = input_data[inrow];
+      if (v == 0)		/* next nearest is row above */
+	inptr1 = input_data[inrow-1];
+      else			/* next nearest is row below */
+	inptr1 = input_data[inrow+1];
+      outptr = output_data[outrow++];
+
+      /* Special case for first column */
+      thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 8) >> 4);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+      lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+
+      for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+	/* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
+	/* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
+	nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++);
+	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+	*outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4);
+	lastcolsum = thiscolsum; thiscolsum = nextcolsum;
+      }
+
+      /* Special case for last column */
+      *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4);
+      *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 7) >> 4);
+    }
+    inrow++;
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+jinit_upsampler (j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info * compptr;
+  boolean need_buffer, do_fancy;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+  upsample = (my_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *) upsample;
+  upsample->pub.start_pass = start_pass_upsample;
+  upsample->pub.upsample = sep_upsample;
+  upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+
+  if (cinfo->CCIR601_sampling)	/* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
+   * so don't ask for it.
+   */
+  do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1;
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) /
+		 cinfo->min_DCT_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) /
+		 cinfo->min_DCT_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    need_buffer = TRUE;
+    if (! compptr->component_needed) {
+      /* Don't bother to upsample an uninteresting component. */
+      upsample->methods[ci] = noop_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group == v_out_group) {
+      /* Special cases for 2h1v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2)
+	upsample->methods[ci] = h2v1_fancy_upsample;
+      else
+	upsample->methods[ci] = h2v1_upsample;
+    } else if (h_in_group * 2 == h_out_group &&
+	       v_in_group * 2 == v_out_group) {
+      /* Special cases for 2h2v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2) {
+	upsample->methods[ci] = h2v2_fancy_upsample;
+	upsample->pub.need_context_rows = TRUE;
+      } else
+	upsample->methods[ci] = h2v2_upsample;
+    } else if ((h_out_group % h_in_group) == 0 &&
+	       (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+      upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    if (need_buffer) {
+      upsample->color_buf[ci] = (*cinfo->mem->alloc_sarray)
+	((j_common_ptr) cinfo, JPOOL_IMAGE,
+	 (JDIMENSION) jround_up((long) cinfo->output_width,
+				(long) cinfo->max_h_samp_factor),
+	 (JDIMENSION) cinfo->max_v_samp_factor);
+    }
+  }
+}
diff --git a/src/libjpeg/jdtrans.c b/src/libjpeg/jdtrans.c
new file mode 100644
index 0000000..6c0ab71
--- /dev/null
+++ b/src/libjpeg/jdtrans.c
@@ -0,0 +1,143 @@
+/*
+ * jdtrans.c
+ *
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding decompression,
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
+ * The routines in jdapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transdecode_master_selection JPP((j_decompress_ptr cinfo));
+
+
+/*
+ * Read the coefficient arrays from a JPEG file.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * The entire image is read into a set of virtual coefficient-block arrays,
+ * one per component.  The return value is a pointer to the array of
+ * virtual-array descriptors.  These can be manipulated directly via the
+ * JPEG memory manager, or handed off to jpeg_write_coefficients().
+ * To release the memory occupied by the virtual arrays, call
+ * jpeg_finish_decompress() when done with the data.
+ *
+ * An alternative usage is to simply obtain access to the coefficient arrays
+ * during a buffered-image-mode decompression operation.  This is allowed
+ * after any jpeg_finish_output() call.  The arrays can be accessed until
+ * jpeg_finish_decompress() is called.  (Note that any call to the library
+ * may reposition the arrays, so don't rely on access_virt_barray() results
+ * to stay valid across library calls.)
+ *
+ * Returns NULL if suspended.  This case need be checked only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jpeg_read_coefficients (j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+	(*cinfo->progress->progress_monitor) ((j_common_ptr) cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input) (cinfo);
+      if (retcode == JPEG_SUSPENDED)
+	return NULL;
+      if (retcode == JPEG_REACHED_EOI)
+	break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+	  (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+	if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+	  /* startup underestimated number of scans; ratchet up one scan */
+	  cinfo->progress->pass_limit += (long) cinfo->total_iMCU_rows;
+	}
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return cinfo->coef->coef_arrays;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return NULL;			/* keep compiler happy */
+}
+
+
+/*
+ * Master selection of decompression modules for transcoding.
+ * This substitutes for jdmaster.c's initialization of the full decompressor.
+ */
+
+LOCAL(void)
+transdecode_master_selection (j_decompress_ptr cinfo)
+{
+  /* This is effectively a buffered-image operation. */
+  cinfo->buffered_image = TRUE;
+
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_decoder(cinfo);
+  }
+
+  /* Always get a full-image coefficient buffer. */
+  jinit_d_coef_controller(cinfo, TRUE);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Initialize progress monitoring. */
+  if (cinfo->progress != NULL) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else if (cinfo->inputctl->has_multiple_scans) {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    } else {
+      nscans = 1;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long) cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = 1;
+  }
+}
diff --git a/src/libjpeg/jerror.c b/src/libjpeg/jerror.c
new file mode 100644
index 0000000..3da7be8
--- /dev/null
+++ b/src/libjpeg/jerror.c
@@ -0,0 +1,252 @@
+/*
+ * jerror.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ * The message table is made an external symbol just in case any applications
+ * want to refer to it directly.
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_message_table	jMsgTable
+#endif
+
+#define JMESSAGE(code,string)	string ,
+
+const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(void)
+error_exit (j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message (j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+	     MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message (j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message (j_common_ptr cinfo, char * buffer)
+{
+  struct jpeg_error_mgr * err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char * msgtext = NULL;
+  const char * msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+	     msg_code >= err->first_addon_message &&
+	     msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    sprintf(buffer, msgtext, err->msg_parm.s);
+  else
+    sprintf(buffer, msgtext,
+	    err->msg_parm.i[0], err->msg_parm.i[1],
+	    err->msg_parm.i[2], err->msg_parm.i[3],
+	    err->msg_parm.i[4], err->msg_parm.i[5],
+	    err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr (j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;	/* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *	struct jpeg_compress_struct cinfo;
+ *	struct jpeg_error_mgr err;
+ *
+ *	cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error (struct jpeg_error_mgr * err)
+{
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  err->trace_level = 0;		/* default = no tracing */
+  err->num_warnings = 0;	/* no warnings emitted yet */
+  err->msg_code = 0;		/* may be useful as a flag for "no error" */
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int) JMSG_LASTMSGCODE - 1;
+
+  err->addon_message_table = NULL;
+  err->first_addon_message = 0;	/* for safety */
+  err->last_addon_message = 0;
+
+  return err;
+}
diff --git a/src/libjpeg/jerror.h b/src/libjpeg/jerror.h
new file mode 100644
index 0000000..fc2fffe
--- /dev/null
+++ b/src/libjpeg/jerror.h
@@ -0,0 +1,291 @@
+/*
+ * jerror.h
+ *
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code,string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code,string)	code ,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+JMESSAGE(JERR_ARITH_NOTIMPL,
+	 "Sorry, there are legal restrictions on arithmetic coding")
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+	 "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+	 "Invalid progressive parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+	 "Invalid progressive parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+	 "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+	 "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+	 "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_SOS_NO_SOF, "Invalid JPEG file structure: SOS before SOF")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+	 "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+	 "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+	 "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+	 "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION,
+	 "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+	 "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+	 "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+	 "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+	 "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+	 "Unrecognized component IDs %d %d %d, assuming YCbCr")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+	 "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+	 "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT3(cinfo,code,p1,p2,p3)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXIT4(cinfo,code,p1,p2,p3,p4)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+#define ERREXITS(cinfo,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo)))
+
+#define MAKESTMT(stuff)		do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS1(cinfo,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+#define WARNMS2(cinfo,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo,lvl,code)  \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS1(cinfo,lvl,code,p1)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS2(cinfo,lvl,code,p1,p2)  \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+#define TRACEMS3(cinfo,lvl,code,p1,p2,p3)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS4(cinfo,lvl,code,p1,p2,p3,p4)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS5(cinfo,lvl,code,p1,p2,p3,p4,p5)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMS8(cinfo,lvl,code,p1,p2,p3,p4,p5,p6,p7,p8)  \
+  MAKESTMT(int * _mp = (cinfo)->err->msg_parm.i; \
+	   _mp[0] = (p1); _mp[1] = (p2); _mp[2] = (p3); _mp[3] = (p4); \
+	   _mp[4] = (p5); _mp[5] = (p6); _mp[6] = (p7); _mp[7] = (p8); \
+	   (cinfo)->err->msg_code = (code); \
+	   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)); )
+#define TRACEMSS(cinfo,lvl,code,str)  \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr) (cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/src/libjpeg/jfdctflt.c b/src/libjpeg/jfdctflt.c
new file mode 100644
index 0000000..79d7a00
--- /dev/null
+++ b/src/libjpeg/jfdctflt.c
@@ -0,0 +1,168 @@
+/*
+ * jfdctflt.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * This implementation should be more accurate than either of the integer
+ * DCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_float (FAST_FLOAT * data)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
+  FAST_FLOAT *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/src/libjpeg/jfdctfst.c b/src/libjpeg/jfdctfst.c
new file mode 100644
index 0000000..ccb378a
--- /dev/null
+++ b/src/libjpeg/jfdctfst.c
@@ -0,0 +1,224 @@
+/*
+ * jfdctfst.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((INT32)   98)		/* FIX(0.382683433) */
+#define FIX_0_541196100  ((INT32)  139)		/* FIX(0.541196100) */
+#define FIX_0_707106781  ((INT32)  181)		/* FIX(0.707106781) */
+#define FIX_1_306562965  ((INT32)  334)		/* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_ifast (DCTELEM * data)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part */
+    
+    tmp10 = tmp0 + tmp3;	/* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE*4] = tmp10 - tmp11;
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE*6] = tmp13 - z1;
+    
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;		/* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE*3] = z13 - z2;
+    dataptr[DCTSIZE*1] = z11 + z4;
+    dataptr[DCTSIZE*7] = z11 - z4;
+
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/src/libjpeg/jfdctint.c b/src/libjpeg/jfdctint.c
new file mode 100644
index 0000000..0a78b64
--- /dev/null
+++ b/src/libjpeg/jfdctint.c
@@ -0,0 +1,283 @@
+/*
+ * jfdctint.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is INT32 anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_islow (DCTELEM * data)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+    
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+    dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+				   CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+				   CONST_BITS-PASS1_BITS);
+    
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+    
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+    
+    dataptr += DCTSIZE;		/* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+    tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+    tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+    tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+    tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+    tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+    tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+    tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+    
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
+    
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+					   CONST_BITS+PASS1_BITS);
+    
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+    
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
+					   CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
+					   CONST_BITS+PASS1_BITS);
+    
+    dataptr++;			/* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/src/libjpeg/jidctflt.c b/src/libjpeg/jidctflt.c
new file mode 100644
index 0000000..0188ce3
--- /dev/null
+++ b/src/libjpeg/jidctflt.c
@@ -0,0 +1,242 @@
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE * quantptr;
+  FAST_FLOAT * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*4] = tmp3 + tmp4;
+    wsptr[DCTSIZE*3] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+    
+    /* Even part */
+
+    tmp10 = wsptr[0] + wsptr[4];
+    tmp11 = wsptr[0] - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/src/libjpeg/jidctfst.c b/src/libjpeg/jidctfst.c
new file mode 100644
index 0000000..dba4216
--- /dev/null
+++ b/src/libjpeg/jidctfst.c
@@ -0,0 +1,368 @@
+/*
+ * jidctfst.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((INT32)  277)		/* FIX(1.082392200) */
+#define FIX_1_414213562  ((INT32)  362)		/* FIX(1.414213562) */
+#define FIX_1_847759065  ((INT32)  473)		/* FIX(1.847759065) */
+#define FIX_2_613125930  ((INT32)  669)		/* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x,n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an INT32 constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var,const)  ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared INT32, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef,quantval)  (((IFAST_MULT_TYPE) (coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef,quantval)  \
+	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
+#endif
+
+
+/* Like DESCALE, but applies to a DCTELEM and produces an int.
+ * We assume that int right shift is unsigned if INT32 right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS	DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16		/* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32		/* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x,shft)  \
+    ((ishift_temp = (x)) < 0 ? \
+     (ishift_temp >> (shft)) | ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
+     (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+#ifdef USE_ACCURATE_ROUNDING
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
+#else
+#define IDESCALE(x,n)  ((int) IRIGHT_SHIFT(x, n))
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS			/* for DESCALE */
+  ISHIFT_TEMPS			/* for IDESCALE */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int) DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
+    wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
+    wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
+    wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
+    wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
+    wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
+    wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
+    wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+
+    tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
+    tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
+
+    tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
+    tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
+	    - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
+    z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
+    z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
+    z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
+			    & RANGE_MASK];
+
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/src/libjpeg/jidctint.c b/src/libjpeg/jidctint.c
new file mode 100644
index 0000000..a72b320
--- /dev/null
+++ b/src/libjpeg/jidctint.c
@@ -0,0 +1,389 @@
+/*
+ * jidctint.c
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate INT32 array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((INT32)  2446)	/* FIX(0.298631336) */
+#define FIX_0_390180644  ((INT32)  3196)	/* FIX(0.390180644) */
+#define FIX_0_541196100  ((INT32)  4433)	/* FIX(0.541196100) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_175875602  ((INT32)  9633)	/* FIX(1.175875602) */
+#define FIX_1_501321110  ((INT32)  12299)	/* FIX(1.501321110) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_1_961570560  ((INT32)  16069)	/* FIX(1.961570560) */
+#define FIX_2_053119869  ((INT32)  16819)	/* FIX(2.053119869) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_072711026  ((INT32)  25172)	/* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp1, tmp2, tmp3;
+  INT32 tmp10, tmp11, tmp12, tmp13;
+  INT32 z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+
+    tmp0 = (z2 + z3) << CONST_BITS;
+    tmp1 = (z2 - z3) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
+    wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
+    
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+    
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    
+    z2 = (INT32) wsptr[2];
+    z3 = (INT32) wsptr[6];
+    
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+    
+    tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS;
+    tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS;
+    
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+    
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+    
+    tmp0 = (INT32) wsptr[7];
+    tmp1 = (INT32) wsptr[5];
+    tmp2 = (INT32) wsptr[3];
+    tmp3 = (INT32) wsptr[1];
+    
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+    
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+    z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    
+    z3 += z5;
+    z4 += z5;
+    
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+    
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
+					  CONST_BITS+PASS1_BITS+3)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/src/libjpeg/jidctred.c b/src/libjpeg/jidctred.c
new file mode 100644
index 0000000..421f3c7
--- /dev/null
+++ b/src/libjpeg/jidctred.c
@@ -0,0 +1,398 @@
+/*
+ * jidctred.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains inverse-DCT routines that produce reduced-size output:
+ * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
+ *
+ * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
+ * algorithm used in jidctint.c.  We simply replace each 8-to-8 1-D IDCT step
+ * with an 8-to-4 step that produces the four averages of two adjacent outputs
+ * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
+ * These steps were derived by computing the corresponding values at the end
+ * of the normal LL&M code, then simplifying as much as possible.
+ *
+ * 1x1 is trivial: just take the DC coefficient divided by 8.
+ *
+ * See jidctint.c for additional comments.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"		/* Private declarations for DCT subsystem */
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling is the same as in jidctint.c. */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_211164243  ((INT32)  1730)	/* FIX(0.211164243) */
+#define FIX_0_509795579  ((INT32)  4176)	/* FIX(0.509795579) */
+#define FIX_0_601344887  ((INT32)  4926)	/* FIX(0.601344887) */
+#define FIX_0_720959822  ((INT32)  5906)	/* FIX(0.720959822) */
+#define FIX_0_765366865  ((INT32)  6270)	/* FIX(0.765366865) */
+#define FIX_0_850430095  ((INT32)  6967)	/* FIX(0.850430095) */
+#define FIX_0_899976223  ((INT32)  7373)	/* FIX(0.899976223) */
+#define FIX_1_061594337  ((INT32)  8697)	/* FIX(1.061594337) */
+#define FIX_1_272758580  ((INT32)  10426)	/* FIX(1.272758580) */
+#define FIX_1_451774981  ((INT32)  11893)	/* FIX(1.451774981) */
+#define FIX_1_847759065  ((INT32)  15137)	/* FIX(1.847759065) */
+#define FIX_2_172734803  ((INT32)  17799)	/* FIX(2.172734803) */
+#define FIX_2_562915447  ((INT32)  20995)	/* FIX(2.562915447) */
+#define FIX_3_624509785  ((INT32)  29692)	/* FIX(3.624509785) */
+#else
+#define FIX_0_211164243  FIX(0.211164243)
+#define FIX_0_509795579  FIX(0.509795579)
+#define FIX_0_601344887  FIX(0.601344887)
+#define FIX_0_720959822  FIX(0.720959822)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_850430095  FIX(0.850430095)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_061594337  FIX(1.061594337)
+#define FIX_1_272758580  FIX(1.272758580)
+#define FIX_1_451774981  FIX(1.451774981)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_172734803  FIX(2.172734803)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_624509785  FIX(3.624509785)
+#endif
+
+
+/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp2, tmp10, tmp12;
+  INT32 z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE*4];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process column 4, because second pass won't use it */
+    if (ctr == DCTSIZE-4)
+      continue;
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 &&
+	inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero; we need not examine term 4 for 4x4 output */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      
+      continue;
+    }
+    
+    /* Even part */
+    
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp0 <<= (CONST_BITS+1);
+    
+    z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);
+    
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+    
+    /* Odd part */
+    
+    z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    
+    tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
+	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+    
+    tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
+	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1);
+    wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1);
+  }
+  
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
+	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+    
+    tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1);
+    
+    tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
+	 + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
+    
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+    
+    /* Odd part */
+    
+    z1 = (INT32) wsptr[7];
+    z2 = (INT32) wsptr[5];
+    z3 = (INT32) wsptr[3];
+    z4 = (INT32) wsptr[1];
+    
+    tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
+	 + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
+	 + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
+	 + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
+    
+    tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
+	 + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
+	 + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
+	 + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
+					  CONST_BITS+PASS1_BITS+3+1)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  INT32 tmp0, tmp10, z1;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE * quantptr;
+  int * wsptr;
+  JSAMPROW outptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE*2];	/* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process columns 2,4,6 */
+    if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6)
+      continue;
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
+      int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      
+      continue;
+    }
+    
+    /* Even part */
+    
+    z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp10 = z1 << (CONST_BITS+2);
+    
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+    tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+
+    /* Final output stage */
+    
+    wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2);
+    wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2);
+  }
+  
+  /* Pass 2: process 2 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
+				  & RANGE_MASK];
+      
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      
+      wsptr += DCTSIZE;		/* advance pointer to next row */
+      continue;
+    }
+#endif
+    
+    /* Even part */
+    
+    tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2);
+    
+    /* Odd part */
+
+    tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
+	 + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
+	 + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
+	 + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
+
+    /* Final output stage */
+    
+    outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
+					  CONST_BITS+PASS1_BITS+3+2)
+			    & RANGE_MASK];
+    outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0,
+					  CONST_BITS+PASS1_BITS+3+2)
+			    & RANGE_MASK];
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ */
+
+GLOBAL(void)
+jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+	       JCOEFPTR coef_block,
+	       JSAMPARRAY output_buf, JDIMENSION output_col)
+{
+  int dcval;
+  ISLOW_MULT_TYPE * quantptr;
+  JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* We hardly need an inverse DCT routine for this: just take the
+   * average pixel value, which is one-eighth of the DC coefficient.
+   */
+  quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  dcval = (int) DESCALE((INT32) dcval, 3);
+
+  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
diff --git a/src/libjpeg/jinclude.h b/src/libjpeg/jinclude.h
new file mode 100644
index 0000000..3f943a3
--- /dev/null
+++ b/src/libjpeg/jinclude.h
@@ -0,0 +1,97 @@
+/*
+ * jinclude.h
+ *
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using the
+ * JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"		/* auto configuration options */
+#define JCONFIG_INCLUDED	/* so that jpeglib.h doesn't do it again */
+
+/*
+ * We need the NULL macro and size_t typedef.
+ * On an ANSI-conforming system it is sufficient to include <stddef.h>.
+ * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
+ * pull in <sys/types.h> as well.
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef NEED_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#include <stdio.h>
+
+/*
+ * We need memory copying and zeroing functions, plus strncpy().
+ * ANSI and System V implementations declare these in <string.h>.
+ * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
+ * Some systems may declare memset and memcpy in <memory.h>.
+ *
+ * NOTE: we assume the size parameters to these functions are of type size_t.
+ * Change the casts in these macros if not!
+ */
+
+#ifdef NEED_BSD_STRINGS
+
+#include <strings.h>
+#define MEMZERO(target,size)	bzero((void *)(target), (size_t)(size))
+#define MEMCOPY(dest,src,size)	bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+
+#else /* not BSD, assume ANSI/SysV string lib */
+
+#include <string.h>
+#define MEMZERO(target,size)	memset((void *)(target), 0, (size_t)(size))
+#define MEMCOPY(dest,src,size)	memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+
+#endif
+
+/*
+ * In ANSI C, and indeed any rational implementation, size_t is also the
+ * type returned by sizeof().  However, it seems there are some irrational
+ * implementations out there, in which sizeof() returns an int even though
+ * size_t is defined as long or unsigned long.  To ensure consistent results
+ * we always use this SIZEOF() macro in place of using sizeof() directly.
+ */
+
+#define SIZEOF(object)	((size_t) sizeof(object))
+
+/*
+ * The modules that use fread() and fwrite() always invoke them through
+ * these macros.  On some systems you may need to twiddle the argument casts.
+ * CAUTION: argument order is different from underlying functions!
+ */
+
+#ifndef HAVE_JFIO
+#define JFREAD(file,buf,sizeofbuf)  \
+  ((size_t) fread((void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define JFWRITE(file,buf,sizeofbuf)  \
+  ((size_t) fwrite((const void *) (buf), (size_t) 1, (size_t) (sizeofbuf), (file)))
+#define  JFFLUSH(file) \
+  ((size_t) fflush((file)))
+#define JFERROR(file) \
+  ((size_t) ferror((file)))
+#endif
diff --git a/src/libjpeg/jmemmgr.c b/src/libjpeg/jmemmgr.c
new file mode 100644
index 0000000..d801b32
--- /dev/null
+++ b/src/libjpeg/jmemmgr.c
@@ -0,0 +1,1118 @@
+/*
+ * jmemmgr.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER	/* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef NO_GETENV
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare getenv() */
+extern char * getenv JPP((const char * name));
+#endif
+#endif
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of sizeof(ALIGN_TYPE).
+ * By default, we define ALIGN_TYPE as double.  This is necessary on some
+ * workstations (where doubles really do need 8-byte alignment) and will work
+ * fine on nearly everything.  If your machine has lesser alignment needs,
+ * you can save a few bytes by making ALIGN_TYPE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_TYPE long" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_TYPE		/* so can override from jconfig.h */
+#define ALIGN_TYPE  double
+#endif
+
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical except that the latter's
+ * link pointer must be FAR on 80x86 machines.
+ * Notice that the "real" header fields are union'ed with a dummy ALIGN_TYPE
+ * field.  This forces the compiler to make SIZEOF(small_pool_hdr) a multiple
+ * of the alignment requirement of ALIGN_TYPE.
+ */
+
+typedef union small_pool_struct * small_pool_ptr;
+
+typedef union small_pool_struct {
+  struct {
+    small_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} small_pool_hdr;
+
+typedef union large_pool_struct FAR * large_pool_ptr;
+
+typedef union large_pool_struct {
+  struct {
+    large_pool_ptr next;	/* next in list of pools */
+    size_t bytes_used;		/* how many bytes already used within pool */
+    size_t bytes_left;		/* bytes still available in this pool */
+  } hdr;
+  ALIGN_TYPE dummy;		/* included in union to ensure alignment */
+} large_pool_hdr;
+
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;	/* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  long total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk;	/* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr * my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION samplesperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_sarray_ptr next;	/* link to next virtual sarray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;	/* => the in-memory buffer */
+  JDIMENSION rows_in_array;	/* total virtual array height */
+  JDIMENSION blocksperrow;	/* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;		/* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;	/* height of memory buffer */
+  JDIMENSION rowsperchunk;	/* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;	/* first logical row # in the buffer */
+  JDIMENSION first_undef_row;	/* row # of first uninitialized row */
+  boolean pre_zero;		/* pre-zero mode requested? */
+  boolean dirty;		/* do current buffer contents need written? */
+  boolean b_s_open;		/* is backing-store data valid? */
+  jvirt_barray_ptr next;	/* link to next virtual barray control block */
+  backing_store_info b_s_info;	/* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS		/* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+	  pool_id, mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->hdr.next) {
+    fprintf(stderr, "  Large chunk used %ld\n",
+	    (long) lhdr_ptr->hdr.bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->hdr.next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+	    (long) shdr_ptr->hdr.bytes_used,
+	    (long) shdr_ptr->hdr.bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(void)
+out_of_memory (j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;	/* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	1600,			/* first PERMANENT pool */
+	16000			/* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = 
+{
+	0,			/* additional PERMANENT pools */
+	5000			/* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50		/* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  char * data_ptr;
+  size_t odd_bytes, min_request, slop;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(small_pool_hdr)))
+    out_of_memory(cinfo, 1);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->hdr.bytes_left >= sizeofobject)
+      break;			/* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->hdr.next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeofobject + SIZEOF(small_pool_hdr);
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t) (MAX_ALLOC_CHUNK-min_request))
+      slop = (size_t) (MAX_ALLOC_CHUNK-min_request);
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr) jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+	break;
+      slop /= 2;
+      if (slop < MIN_SLOP)	/* give up when it gets real small */
+	out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->hdr.next = NULL;
+    hdr_ptr->hdr.bytes_used = 0;
+    hdr_ptr->hdr.bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)	/* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->hdr.next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *) (hdr_ptr + 1); /* point to first data byte in pool */
+  data_ptr += hdr_ptr->hdr.bytes_used; /* point to place for object */
+  hdr_ptr->hdr.bytes_used += sizeofobject;
+  hdr_ptr->hdr.bytes_left -= sizeofobject;
+
+  return (void *) data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects,
+ * except that FAR pointers are used on 80x86.  However the pool
+ * management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in JSAMPARRAY and JBLOCKARRAY
+ * structures.  The routines that create these structures (see below)
+ * deliberately bunch rows together to ensure a large request size.
+ */
+
+METHODDEF(void FAR *)
+alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  size_t odd_bytes;
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if (sizeofobject > (size_t) (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)))
+    out_of_memory(cinfo, 3);	/* request exceeds malloc's ability */
+
+  /* Round up the requested size to a multiple of SIZEOF(ALIGN_TYPE) */
+  odd_bytes = sizeofobject % SIZEOF(ALIGN_TYPE);
+  if (odd_bytes > 0)
+    sizeofobject += SIZEOF(ALIGN_TYPE) - odd_bytes;
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  hdr_ptr = (large_pool_ptr) jpeg_get_large(cinfo, sizeofobject +
+					    SIZEOF(large_pool_hdr));
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);	/* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + SIZEOF(large_pool_hdr);
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->hdr.next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->hdr.bytes_used = sizeofobject;
+  hdr_ptr->hdr.bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  return (void FAR *) (hdr_ptr + 1); /* point to first data byte in pool */
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ * The pointers are in near heap, the samples themselves in FAR heap.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION samplesperrow, JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) samplesperrow * SIZEOF(JSAMPLE));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JSAMPARRAY) alloc_small(cinfo, pool_id,
+				    (size_t) (numrows * SIZEOF(JSAMPROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JSAMPROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) samplesperrow
+		  * SIZEOF(JSAMPLE)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += samplesperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray (j_common_ptr cinfo, int pool_id,
+	      JDIMENSION blocksperrow, JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
+	  ((long) blocksperrow * SIZEOF(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long) numrows)
+    rowsperchunk = (JDIMENSION) ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY) alloc_small(cinfo, pool_id,
+				     (size_t) (numrows * SIZEOF(JBLOCKROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW) alloc_large(cinfo, pool_id,
+	(size_t) ((size_t) rowsperchunk * (size_t) blocksperrow
+		  * SIZEOF(JBLOCK)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION samplesperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray (j_common_ptr cinfo, int pool_id, boolean pre_zero,
+		     JDIMENSION blocksperrow, JDIMENSION numrows,
+		     JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr) alloc_small(cinfo, pool_id,
+					  SIZEOF(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;	/* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;	/* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays (j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  long space_per_minheight, maximum_space, avail_mem;
+  long minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) sptr->maxaccess *
+			     (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+      maximum_space += (long) sptr->rows_in_array *
+		       (long) sptr->samplesperrow * SIZEOF(JSAMPLE);
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      space_per_minheight += (long) bptr->maxaccess *
+			     (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+      maximum_space += (long) bptr->rows_in_array *
+		       (long) bptr->blocksperrow * SIZEOF(JBLOCK);
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;			/* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+				 mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	sptr->rows_in_mem = (JDIMENSION) (max_minheights * sptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & sptr->b_s_info,
+				(long) sptr->rows_in_array *
+				(long) sptr->samplesperrow *
+				(long) SIZEOF(JSAMPLE));
+	sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+				      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long) bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+	/* This buffer fits in memory */
+	bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+	/* It doesn't fit in memory, create backing store. */
+	bptr->rows_in_mem = (JDIMENSION) (max_minheights * bptr->maxaccess);
+	jpeg_open_backing_store(cinfo, & bptr->b_s_info,
+				(long) bptr->rows_in_array *
+				(long) bptr->blocksperrow *
+				(long) SIZEOF(JBLOCK));
+	bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+				      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io (j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->samplesperrow * SIZEOF(JSAMPLE);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io (j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long) ptr->blocksperrow * SIZEOF(JBLOCK);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long) ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long) ptr->rowsperchunk, (long) ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long) ptr->cur_start_row + i;
+    rows = MIN(rows, (long) ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long) ptr->rows_in_array - thisrow);
+    if (rows <= 0)		/* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, & ptr->b_s_info,
+					    (void FAR *) ptr->mem_buffer[i],
+					    file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, & ptr->b_s_info,
+					   (void FAR *) ptr->mem_buffer[i],
+					   file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->samplesperrow * SIZEOF(JSAMPLE);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+		    JDIMENSION start_row, JDIMENSION num_rows,
+		    boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row+ptr->rows_in_mem) {
+    if (! ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long) end_row - (long) ptr->rows_in_mem;
+      if (ltemp < 0)
+	ltemp = 0;		/* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION) ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)		/* writer skipped over a section of array */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;	/* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t) ptr->blocksperrow * SIZEOF(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+	jzero_far((void FAR *) ptr->mem_buffer[undef_row], bytesperrow);
+	undef_row++;
+      }
+    } else {
+      if (! writable)		/* reader looking at undefined data */
+	ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool (j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr) cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id);	/* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {	/* there may be no backing store */
+	sptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*sptr->b_s_info.close_backing_store) (cinfo, & sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {	/* there may be no backing store */
+	bptr->b_s_open = FALSE;	/* prevent recursive close if error */
+	(*bptr->b_s_info.close_backing_store) (cinfo, & bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->hdr.next;
+    space_freed = lhdr_ptr->hdr.bytes_used +
+		  lhdr_ptr->hdr.bytes_left +
+		  SIZEOF(large_pool_hdr);
+    jpeg_free_large(cinfo, (void FAR *) lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->hdr.next;
+    space_freed = shdr_ptr->hdr.bytes_used +
+		  shdr_ptr->hdr.bytes_left +
+		  SIZEOF(small_pool_hdr);
+    jpeg_free_small(cinfo, (void *) shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct (j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *) cinfo->mem, SIZEOF(my_memory_mgr));
+  cinfo->mem = NULL;		/* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);		/* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr (j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;		/* for safety if init fails */
+
+  /* Check for configuration errors.
+   * SIZEOF(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((SIZEOF(ALIGN_TYPE) & (SIZEOF(ALIGN_TYPE)-1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of SIZEOF(ALIGN_TYPE).
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t) MAX_ALLOC_CHUNK;
+  if ((long) test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % SIZEOF(ALIGN_TYPE)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr) jpeg_get_small(cinfo, SIZEOF(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);	/* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS-1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = SIZEOF(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = & mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  { char * memenv;
+
+    if ((memenv = getenv("JPEGMEM")) != NULL) {
+      char ch = 'x';
+
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+	if (ch == 'm' || ch == 'M')
+	  max_to_use *= 1000L;
+	mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/src/libjpeg/jmemnobs.c b/src/libjpeg/jmemnobs.c
new file mode 100644
index 0000000..eb8c337
--- /dev/null
+++ b/src/libjpeg/jmemnobs.c
@@ -0,0 +1,109 @@
+/*
+ * jmemnobs.c
+ *
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides a really simple implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that no backing-store files are needed: all required space
+ * can be obtained from malloc().
+ * This is very portable in the sense that it'll compile on almost anything,
+ * but you'd better have lots of main memory (or virtual memory) if you want
+ * to process big images.
+ * Note that the max_memory_to_use option is ignored by this implementation.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"		/* import the system-dependent declarations */
+
+#ifndef HAVE_STDLIB_H		/* <stdlib.h> should declare malloc(),free() */
+extern void * malloc JPP((size_t size));
+extern void free JPP((void *ptr));
+#endif
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ * NB: although we include FAR keywords in the routine declarations,
+ * this file won't actually work in 80x86 small/medium model; at least,
+ * you probably won't be able to process useful-size images in only 64KB.
+ */
+
+GLOBAL(void FAR *)
+jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void FAR *) malloc(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ * Here we always say, "we got all you want bud!"
+ */
+
+GLOBAL(long)
+jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed,
+		    long max_bytes_needed, long already_allocated)
+{
+  return max_bytes_needed;
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Since jpeg_mem_available always promised the moon,
+ * this should never be called and we can just error out.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info,
+			 long total_bytes_needed)
+{
+  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  Here, there isn't any.
+ */
+
+GLOBAL(long)
+jpeg_mem_init (j_common_ptr cinfo)
+{
+  return 0;			/* just set max_memory_to_use to 0 */
+}
+
+GLOBAL(void)
+jpeg_mem_term (j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/src/libjpeg/jmemsys.h b/src/libjpeg/jmemsys.h
new file mode 100644
index 0000000..6c3c6d3
--- /dev/null
+++ b/src/libjpeg/jmemsys.h
@@ -0,0 +1,198 @@
+/*
+ * jmemsys.h
+ *
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h, as we have done with USE_MSDOS_MEMMGR
+ * and USE_MAC_MEMMGR.
+ */
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_get_small		jGetSmall
+#define jpeg_free_small		jFreeSmall
+#define jpeg_get_large		jGetLarge
+#define jpeg_free_large		jFreeLarge
+#define jpeg_mem_available	jMemAvail
+#define jpeg_open_backing_store	jOpenBackStore
+#define jpeg_mem_init		jMemInit
+#define jpeg_mem_term		jMemTerm
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ * On an 80x86 machine using small-data memory model, these manage near heap.
+ */
+
+EXTERN(void *) jpeg_get_small JPP((j_common_ptr cinfo, size_t sizeofobject));
+EXTERN(void) jpeg_free_small JPP((j_common_ptr cinfo, void * object,
+				  size_t sizeofobject));
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * The interface is the same as above, except that on an 80x86 machine,
+ * far pointers are used.  On most other machines these are identical to
+ * the jpeg_get/free_small routines; but we keep them separate anyway,
+ * in case a different allocation strategy is desirable for large chunks.
+ */
+
+EXTERN(void FAR *) jpeg_get_large JPP((j_common_ptr cinfo,
+				       size_t sizeofobject));
+EXTERN(void) jpeg_free_large JPP((j_common_ptr cinfo, void FAR * object,
+				  size_t sizeofobject));
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro is needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On those machines, we expect that jconfig.h will provide a proper value.
+ * On machines with 32-bit flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK		/* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(long) jpeg_mem_available JPP((j_common_ptr cinfo,
+				     long min_bytes_needed,
+				     long max_bytes_needed,
+				     long already_allocated));
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64	/* max length of a temporary file's name */
+
+
+#ifdef USE_MSDOS_MEMMGR		/* DOS-specific junk */
+
+typedef unsigned short XMSH;	/* type of extended-memory handles */
+typedef unsigned short EMSH;	/* type of expanded-memory handles */
+
+typedef union {
+  short file_handle;		/* DOS file handle if it's a temp file */
+  XMSH xms_handle;		/* handle if it's a chunk of XMS */
+  EMSH ems_handle;		/* handle if it's a chunk of EMS */
+} handle_union;
+
+#endif /* USE_MSDOS_MEMMGR */
+
+#ifdef USE_MAC_MEMMGR		/* Mac-specific junk */
+#include <Files.h>
+#endif /* USE_MAC_MEMMGR */
+
+
+typedef struct backing_store_struct * backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  JMETHOD(void, read_backing_store, (j_common_ptr cinfo,
+				     backing_store_ptr info,
+				     void FAR * buffer_address,
+				     long file_offset, long byte_count));
+  JMETHOD(void, write_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info,
+				      void FAR * buffer_address,
+				      long file_offset, long byte_count));
+  JMETHOD(void, close_backing_store, (j_common_ptr cinfo,
+				      backing_store_ptr info));
+
+  /* Private fields for system-dependent backing-store management */
+#ifdef USE_MSDOS_MEMMGR
+  /* For the MS-DOS manager (jmemdos.c), we need: */
+  handle_union handle;		/* reference to backing-store storage object */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+#ifdef USE_MAC_MEMMGR
+  /* For the Mac manager (jmemmac.c), we need: */
+  short temp_file;		/* file reference number to temp file */
+  FSSpec tempSpec;		/* the FSSpec for the temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name if it's a file */
+#else
+  /* For a typical implementation with temp files, we need: */
+  FILE * temp_file;		/* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+#endif
+#endif
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store JPP((j_common_ptr cinfo,
+					  backing_store_ptr info,
+					  long total_bytes_needed));
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_mem_term JPP((j_common_ptr cinfo));
diff --git a/src/libjpeg/jmorecfg.h b/src/libjpeg/jmorecfg.h
new file mode 100644
index 0000000..2182f15
--- /dev/null
+++ b/src/libjpeg/jmorecfg.h
@@ -0,0 +1,364 @@
+/*
+ * jmorecfg.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ * We do not support run-time selection of data precision, sorry.
+ */
+
+#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of the JPEG spec, set this to 255.  However, darn
+ * few applications need more than 4 channels (maybe 5 for CMYK + alpha
+ * mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10	/* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+/* JSAMPLE should be the smallest type that will hold the values 0..255.
+ * You can use a signed char by having GETJSAMPLE mask it with 0xFF.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JSAMPLE;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJSAMPLE(value)  ((int) (value))
+#else
+#define GETJSAMPLE(value)  ((int) (value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+#define MAXJSAMPLE	255
+#define CENTERJSAMPLE	128
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE == 12
+/* JSAMPLE should be the smallest type that will hold the values 0..4095.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	4095
+#define CENTERJSAMPLE	2048
+
+#endif /* BITS_IN_JSAMPLE == 12 */
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+#ifdef HAVE_UNSIGNED_CHAR
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+#else /* not HAVE_UNSIGNED_CHAR */
+
+typedef char JOCTET;
+#ifdef CHAR_IS_UNSIGNED
+#define GETJOCTET(value)  (value)
+#else
+#define GETJOCTET(value)  ((value) & 0xFF)
+#endif /* CHAR_IS_UNSIGNED */
+
+#endif /* HAVE_UNSIGNED_CHAR */
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+#ifdef HAVE_UNSIGNED_CHAR
+typedef unsigned char UINT8;
+#else /* not HAVE_UNSIGNED_CHAR */
+#ifdef CHAR_IS_UNSIGNED
+typedef char UINT8;
+#else /* not CHAR_IS_UNSIGNED */
+typedef short UINT8;
+#endif /* CHAR_IS_UNSIGNED */
+#endif /* HAVE_UNSIGNED_CHAR */
+
+/* UINT16 must hold at least the values 0..65535. */
+
+#ifdef HAVE_UNSIGNED_SHORT
+typedef unsigned short UINT16;
+#else /* not HAVE_UNSIGNED_SHORT */
+typedef unsigned int UINT16;
+#endif /* HAVE_UNSIGNED_SHORT */
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values. */
+
+#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+typedef int JINT32;
+#define INT32 JINT32
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)		static type
+/* a function used only in its module: */
+#define LOCAL(type)		static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)		type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)		extern type
+
+
+/* This macro is used to declare a "method", that is, a function pointer.
+ * We want to supply prototype parameters if the compiler can cope.
+ * Note that the arglist parameter must be parenthesized!
+ * Again, you can customize this if you need special linkage keywords.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JMETHOD(type,methodname,arglist)  type (*methodname) arglist
+#else
+#define JMETHOD(type,methodname,arglist)  type (*methodname) ()
+#endif
+
+
+/* Here is the pseudo-keyword for declaring pointers that must be "far"
+ * on 80x86 machines.  Most of the specialized coding for 80x86 is handled
+ * by just saying "FAR *" where such a pointer is needed.  In a few places
+ * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol.
+ */
+
+#ifdef NEED_FAR_POINTERS
+#define FAR  far
+#else
+#define FAR
+#endif
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+#ifndef HAVE_BOOLEAN
+typedef int boolean;
+#endif
+#ifndef FALSE			/* in case these macros already exist */
+#define FALSE	0		/* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE	1
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Arithmetic coding is unsupported for legal reasons.  Complaints to IBM. */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
+#define DCT_IFAST_SUPPORTED	/* faster, less accurate integer method */
+#define DCT_FLOAT_SUPPORTED	/* floating-point: accurate, fast on fast HW */
+
+/* Encoder capability options: */
+
+#undef  C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
+/* Note: if you selected 12-bit data precision, it is dangerous to turn off
+ * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
+ * precision, so jchuff.c normally uses entropy optimization to compute
+ * usable tables for higher precision.  If you don't want to do optimization,
+ * you'll have to supply different default Huffman tables.
+ * The exact same statements apply for progressive JPEG: the default tables
+ * don't work for progressive mode.  (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#undef  D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
+#define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED	    /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED	    /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * Ordering of RGB data in scanlines passed to or from the application.
+ * If your application wants to deal with data in the order B,G,R, just
+ * change these macros.  You can also deal with formats such as R,G,B,X
+ * (one extra byte per pixel) by changing RGB_PIXELSIZE.  Note that changing
+ * the offsets will also change the order in which colormap data is organized.
+ * RESTRICTIONS:
+ * 1. The sample applications cjpeg,djpeg do NOT support modified RGB formats.
+ * 2. These macros only affect RGB<=>YCbCr color conversion, so they are not
+ *    useful if you are using JPEG color spaces other than YCbCr or grayscale.
+ * 3. The color quantizer modules will not behave desirably if RGB_PIXELSIZE
+ *    is not 3 (they don't understand about dummy color components!).  So you
+ *    can't use color quantization if you change that value.
+ */
+
+#define RGB_RED		0	/* Offset of Red in an RGB scanline element */
+#define RGB_GREEN	1	/* Offset of Green */
+#define RGB_BLUE	2	/* Offset of Blue */
+#define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */
+
+
+/* Definitions for speed-related optimizations. */
+
+
+/* If your compiler supports inline functions, define INLINE
+ * as the inline keyword; otherwise define it as empty.
+ */
+
+#ifndef INLINE
+#ifdef __GNUC__			/* for instance, GNU C knows about inline */
+#define INLINE __inline__
+#endif
+#ifndef INLINE
+#define INLINE			/* default is to define it as empty */
+#endif
+#endif
+
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#define MULTIPLIER  int		/* type for fastest integer multiply */
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ * Typically, float is faster in ANSI C compilers, while double is faster in
+ * pre-ANSI compilers (because they insist on converting to double anyway).
+ * The code below therefore chooses float if we have ANSI-style prototypes.
+ */
+
+#ifndef FAST_FLOAT
+#ifdef HAVE_PROTOTYPES
+#define FAST_FLOAT  float
+#else
+#define FAST_FLOAT  double
+#endif
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/src/libjpeg/jpegint.h b/src/libjpeg/jpegint.h
new file mode 100644
index 0000000..95b00d4
--- /dev/null
+++ b/src/libjpeg/jpegint.h
@@ -0,0 +1,392 @@
+/*
+ * jpegint.h
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {			/* Operating modes for buffer controllers */
+	JBUF_PASS_THRU,		/* Plain stripwise operation */
+	/* Remaining modes require a full-image buffer to have been created */
+	JBUF_SAVE_SOURCE,	/* Run source subobject only, save output */
+	JBUF_CRANK_DEST,	/* Run dest subobject only, using saved data */
+	JBUF_SAVE_AND_PASS	/* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START	100	/* after create_compress */
+#define CSTATE_SCANNING	101	/* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK	102	/* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS	103	/* jpeg_write_coefficients done */
+#define DSTATE_START	200	/* after create_decompress */
+#define DSTATE_INHEADER	201	/* reading header markers, no SOS yet */
+#define DSTATE_READY	202	/* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD	203	/* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN	204	/* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING	205	/* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK	206	/* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE	207	/* expecting jpeg_start_output */
+#define DSTATE_BUFPOST	208	/* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS	209	/* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING	210	/* looking for EOI in jpeg_finish_decompress */
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  JMETHOD(void, prepare_for_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, pass_startup, (j_compress_ptr cinfo));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;	/* True if pass_startup must be called */
+  boolean is_last_pass;		/* True during last pass */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_compress_ptr cinfo,
+			       JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
+			       JDIMENSION in_rows_avail));
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, pre_process_data, (j_compress_ptr cinfo,
+				   JSAMPARRAY input_buf,
+				   JDIMENSION *in_row_ctr,
+				   JDIMENSION in_rows_avail,
+				   JSAMPIMAGE output_buf,
+				   JDIMENSION *out_row_group_ctr,
+				   JDIMENSION out_row_groups_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_c_coef_controller {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(boolean, compress_data, (j_compress_ptr cinfo,
+				   JSAMPIMAGE input_buf));
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_compress_ptr cinfo,
+				JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+				JDIMENSION output_row, int num_rows));
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  JMETHOD(void, downsample, (j_compress_ptr cinfo,
+			     JSAMPIMAGE input_buf, JDIMENSION in_row_index,
+			     JSAMPIMAGE output_buf,
+			     JDIMENSION out_row_group_index));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Forward DCT (also controls coefficient quantization) */
+struct jpeg_forward_dct {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo));
+  /* perhaps this should be an array??? */
+  JMETHOD(void, forward_DCT, (j_compress_ptr cinfo,
+			      jpeg_component_info * compptr,
+			      JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+			      JDIMENSION start_row, JDIMENSION start_col,
+			      JDIMENSION num_blocks));
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  JMETHOD(void, start_pass, (j_compress_ptr cinfo, boolean gather_statistics));
+  JMETHOD(boolean, encode_mcu, (j_compress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_compress_ptr cinfo));
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  JMETHOD(void, write_file_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_frame_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_scan_header, (j_compress_ptr cinfo));
+  JMETHOD(void, write_file_trailer, (j_compress_ptr cinfo));
+  JMETHOD(void, write_tables_only, (j_compress_ptr cinfo));
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  JMETHOD(void, write_marker_header, (j_compress_ptr cinfo, int marker,
+				      unsigned int datalen));
+  JMETHOD(void, write_marker_byte, (j_compress_ptr cinfo, int val));
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  JMETHOD(void, prepare_for_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_output_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;	/* True during 1st pass for 2-pass quant */
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  JMETHOD(int, consume_input, (j_decompress_ptr cinfo));
+  JMETHOD(void, reset_input_controller, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, finish_input_pass, (j_decompress_ptr cinfo));
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;	/* True if file has multiple scans */
+  boolean eoi_reached;		/* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, process_data, (j_decompress_ptr cinfo,
+			       JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+			       JDIMENSION out_rows_avail));
+};
+
+/* Coefficient buffer control */
+struct jpeg_d_coef_controller {
+  JMETHOD(void, start_input_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, consume_data, (j_decompress_ptr cinfo));
+  JMETHOD(void, start_output_pass, (j_decompress_ptr cinfo));
+  JMETHOD(int, decompress_data, (j_decompress_ptr cinfo,
+				 JSAMPIMAGE output_buf));
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, J_BUF_MODE pass_mode));
+  JMETHOD(void, post_process_data, (j_decompress_ptr cinfo,
+				    JSAMPIMAGE input_buf,
+				    JDIMENSION *in_row_group_ctr,
+				    JDIMENSION in_row_groups_avail,
+				    JSAMPARRAY output_buf,
+				    JDIMENSION *out_row_ctr,
+				    JDIMENSION out_rows_avail));
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  JMETHOD(void, reset_marker_reader, (j_decompress_ptr cinfo));
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  JMETHOD(int, read_markers, (j_decompress_ptr cinfo));
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;		/* found SOI? */
+  boolean saw_SOF;		/* found SOF? */
+  int next_restart_num;		/* next restart number expected (0-7) */
+  unsigned int discarded_bytes;	/* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
+				JBLOCKROW *MCU_data));
+
+  /* This is here to share code between baseline and progressive decoders; */
+  /* other modules probably should not use it */
+  boolean insufficient_data;	/* set TRUE after emitting warning */
+};
+
+/* Inverse DCT (also performs dequantization) */
+typedef JMETHOD(void, inverse_DCT_method_ptr,
+		(j_decompress_ptr cinfo, jpeg_component_info * compptr,
+		 JCOEFPTR coef_block,
+		 JSAMPARRAY output_buf, JDIMENSION output_col));
+
+struct jpeg_inverse_dct {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, upsample, (j_decompress_ptr cinfo,
+			   JSAMPIMAGE input_buf,
+			   JDIMENSION *in_row_group_ctr,
+			   JDIMENSION in_row_groups_avail,
+			   JSAMPARRAY output_buf,
+			   JDIMENSION *out_row_ctr,
+			   JDIMENSION out_rows_avail));
+
+  boolean need_context_rows;	/* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, color_convert, (j_decompress_ptr cinfo,
+				JSAMPIMAGE input_buf, JDIMENSION input_row,
+				JSAMPARRAY output_buf, int num_rows));
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  JMETHOD(void, start_pass, (j_decompress_ptr cinfo, boolean is_pre_scan));
+  JMETHOD(void, color_quantize, (j_decompress_ptr cinfo,
+				 JSAMPARRAY input_buf, JSAMPARRAY output_buf,
+				 int num_rows));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
+  JMETHOD(void, new_color_map, (j_decompress_ptr cinfo));
+};
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a,b)	((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of an INT32 quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS	INT32 shift_temp;
+#define RIGHT_SHIFT(x,shft)  \
+	((shift_temp = (x)) < 0 ? \
+	 (shift_temp >> (shft)) | ((~((INT32) 0)) << (32-(shft))) : \
+	 (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x,shft)	((x) >> (shft))
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers. */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jinit_compress_master	jICompress
+#define jinit_c_master_control	jICMaster
+#define jinit_c_main_controller	jICMainC
+#define jinit_c_prep_controller	jICPrepC
+#define jinit_c_coef_controller	jICCoefC
+#define jinit_color_converter	jICColor
+#define jinit_downsampler	jIDownsampler
+#define jinit_forward_dct	jIFDCT
+#define jinit_huff_encoder	jIHEncoder
+#define jinit_phuff_encoder	jIPHEncoder
+#define jinit_marker_writer	jIMWriter
+#define jinit_master_decompress	jIDMaster
+#define jinit_d_main_controller	jIDMainC
+#define jinit_d_coef_controller	jIDCoefC
+#define jinit_d_post_controller	jIDPostC
+#define jinit_input_controller	jIInCtlr
+#define jinit_marker_reader	jIMReader
+#define jinit_huff_decoder	jIHDecoder
+#define jinit_phuff_decoder	jIPHDecoder
+#define jinit_inverse_dct	jIIDCT
+#define jinit_upsampler		jIUpsampler
+#define jinit_color_deconverter	jIDColor
+#define jinit_1pass_quantizer	jI1Quant
+#define jinit_2pass_quantizer	jI2Quant
+#define jinit_merged_upsampler	jIMUpsampler
+#define jinit_memory_mgr	jIMemMgr
+#define jdiv_round_up		jDivRound
+#define jround_up		jRound
+#define jcopy_sample_rows	jCopySamples
+#define jcopy_block_row		jCopyBlocks
+#define jzero_far		jZeroFar
+#define jpeg_zigzag_order	jZIGTable
+#define jpeg_natural_order	jZAGTable
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_c_master_control JPP((j_compress_ptr cinfo,
+					 boolean transcode_only));
+EXTERN(void) jinit_c_main_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_prep_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_c_coef_controller JPP((j_compress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo));
+EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo));
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_d_main_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_coef_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo,
+					  boolean need_full_buffer));
+EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_1pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_2pass_quantizer JPP((j_decompress_ptr cinfo));
+EXTERN(void) jinit_merged_upsampler JPP((j_decompress_ptr cinfo));
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up JPP((long a, long b));
+EXTERN(long) jround_up JPP((long a, long b));
+EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
+				    JSAMPARRAY output_array, int dest_row,
+				    int num_rows, JDIMENSION num_cols));
+EXTERN(void) jcopy_block_row JPP((JBLOCKROW input_row, JBLOCKROW output_row,
+				  JDIMENSION num_blocks));
+EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero));
+/* Constant tables in jutils.c */
+#if 0				/* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+
+/* Suppress undefined-structure complaints if necessary. */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef AM_MEMORY_MANAGER	/* only jmemmgr.c defines these */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+#endif
+#endif /* INCOMPLETE_TYPES_BROKEN */
diff --git a/src/libjpeg/jpeglib.h b/src/libjpeg/jpeglib.h
new file mode 100644
index 0000000..d1be8dd
--- /dev/null
+++ b/src/libjpeg/jpeglib.h
@@ -0,0 +1,1096 @@
+/*
+ * jpeglib.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED	/* in case jinclude.h already did */
+#include "jconfig.h"		/* widely used configuration options */
+#endif
+#include "jmorecfg.h"		/* seldom changed options */
+
+
+/* Version ID for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
+ */
+
+#define JPEG_LIB_VERSION  62	/* Version 6b */
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard, so don't change them
+ * if you want to be compatible.
+ */
+
+#define DCTSIZE		    8	/* The basic DCT block is 8x8 samples */
+#define DCTSIZE2	    64	/* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4	/* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4	/* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16	/* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4	/* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4	/* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on blocks per MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on blocks per MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ * On 80x86 machines, the image arrays are too big for near pointers,
+ * but the pointer arrays can fit in near memory.
+ */
+
+typedef JSAMPLE FAR *JSAMPROW;	/* ptr to one image row of pixel samples. */
+typedef JSAMPROW *JSAMPARRAY;	/* ptr to some rows (a 2-D sample array) */
+typedef JSAMPARRAY *JSAMPIMAGE;	/* a 3-D sample array: top index is color */
+
+typedef JCOEF JBLOCK[DCTSIZE2];	/* one block of coefficients */
+typedef JBLOCK FAR *JBLOCKROW;	/* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;		/* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;	/* a 3-D array of coefficient blocks */
+
+typedef JCOEF FAR *JCOEFPTR;	/* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];	/* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];		/* bits[k] = # of symbols with codes of */
+				/* length k bits; bits[0] is unused */
+  UINT8 huffval[256];		/* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;		/* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;		/* identifier for this component (0..255) */
+  int component_index;		/* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;		/* horizontal sampling factor (1..4) */
+  int v_samp_factor;		/* vertical sampling factor (1..4) */
+  int quant_tbl_no;		/* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;		/* DC entropy table selector (0..3) */
+  int ac_tbl_no;		/* AC entropy table selector (0..3) */
+  
+  /* Remaining fields should be treated as private by applications. */
+  
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in DCT blocks.
+   * Any dummy blocks added to complete an MCU are not counted; therefore
+   * these values do not depend on whether a scan is interleaved or not.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a DCT block in samples.  Always DCTSIZE for compression.
+   * For decompression this is the size of the output from one DCT block,
+   * reflecting any scaling we choose to apply during the IDCT step.
+   * Values of 1,2,4,8 are likely to be supported.  Note that different
+   * components may receive different IDCT scalings.
+   */
+  int DCT_scaled_size;
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface), thus
+   * downsampled_width = ceil(image_width * Hi/Hmax)
+   * and similarly for height.  For decompression, IDCT scaling is included, so
+   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE)
+   */
+  JDIMENSION downsampled_width;	 /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* This flag is used only for decompression.  In cases where some of the
+   * components will be ignored (eg grayscale output from YCbCr image),
+   * we can skip most computations for the unused components.
+   */
+  boolean component_needed;	/* do we need the value of this component? */
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;		/* number of blocks per MCU, horizontally */
+  int MCU_height;		/* number of blocks per MCU, vertically */
+  int MCU_blocks;		/* MCU_width * MCU_height */
+  int MCU_sample_width;		/* MCU width in samples, MCU_width*DCT_scaled_size */
+  int last_col_width;		/* # of non-dummy blocks across in last MCU */
+  int last_row_height;		/* # of non-dummy blocks down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL * quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void * dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;		/* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;			/* progressive JPEG spectral selection parms */
+  int Ah, Al;			/* progressive JPEG successive approx. parms */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct FAR * jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;	/* next in list, or NULL */
+  UINT8 marker;			/* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length;	/* # bytes of data in the file */
+  unsigned int data_length;	/* # bytes of data saved at data[] */
+  JOCTET FAR * data;		/* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+typedef enum {
+	JCS_UNKNOWN,		/* error/unspecified */
+	JCS_GRAYSCALE,		/* monochrome */
+	JCS_RGB,		/* red/green/blue */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
+	JCS_CMYK,		/* C/M/Y/K */
+	JCS_YCCK		/* Y/Cb/Cr/K */
+} J_COLOR_SPACE;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+	JDCT_ISLOW,		/* slow but accurate integer algorithm */
+	JDCT_IFAST,		/* faster, less accurate integer method */
+	JDCT_FLOAT		/* floating-point: accurate, fast on fast HW */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT		/* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST		/* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+	JDITHER_NONE,		/* no dithering */
+	JDITHER_ORDERED,	/* simple ordered dither */
+	JDITHER_FS		/* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr * err;	/* Error handler module */\
+  struct jpeg_memory_mgr * mem;	/* Memory manager module */\
+  struct jpeg_progress_mgr * progress; /* Progress monitor, or NULL if none */\
+  void * client_data;		/* Available for use by application */\
+  boolean is_decompressor;	/* So common code can tell which is which */\
+  int global_state		/* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;		/* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct * j_common_ptr;
+typedef struct jpeg_compress_struct * j_compress_ptr;
+typedef struct jpeg_decompress_struct * j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr * dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;	/* input image width */
+  JDIMENSION image_height;	/* input image height */
+  int input_components;		/* # of color components in input image */
+  J_COLOR_SPACE in_color_space;	/* colorspace of input image */
+
+  double input_gamma;		/* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+  
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+  
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+  
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;		/* # of entries in scan_info array */
+  const jpeg_scan_info * scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;		/* TRUE=caller supplies downsampled data */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;	/* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+  int smoothing_factor;		/* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;	/* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;		/* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;	/* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;	/* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean write_Adobe_marker;	/* should an Adobe marker be written? */
+  
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;	/* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;	/* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows to be input to coef ctlr */
+  /* The coefficient controller receives data in units of MCU rows as defined
+   * for fully interleaved scans (whether the JPEG file is interleaved or not).
+   * There are v_samp_factor * DCTSIZE sample rows of each component in an
+   * "iMCU" (interleaved MCU) row.
+   */
+  
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+  
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+  
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master * master;
+  struct jpeg_c_main_controller * main;
+  struct jpeg_c_prep_controller * prep;
+  struct jpeg_c_coef_controller * coef;
+  struct jpeg_marker_writer * marker;
+  struct jpeg_color_converter * cconvert;
+  struct jpeg_downsampler * downsample;
+  struct jpeg_forward_dct * fdct;
+  struct jpeg_entropy_encoder * entropy;
+  jpeg_scan_info * script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;		/* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr * src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION image_width;	/* nominal image width (from SOF marker) */
+  JDIMENSION image_height;	/* nominal image height */
+  int num_components;		/* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;		/* image gamma wanted in output */
+
+  boolean buffered_image;	/* TRUE=multiple output passes */
+  boolean raw_data_out;		/* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;	/* IDCT algorithm selector */
+  boolean do_fancy_upsampling;	/* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;	/* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;	/* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;	/* type of color dithering to use */
+  boolean two_pass_quantize;	/* TRUE=use two-pass color quantization */
+  int desired_number_of_colors;	/* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;	/* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;	/* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;	/* scaled image width */
+  JDIMENSION output_height;	/* scaled image height */
+  int out_color_components;	/* # of color components in out_color_space */
+  int output_components;	/* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;	/* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;	/* number of entries in use */
+  JSAMPARRAY colormap;		/* The color map as a 2-D pixel array */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;	/* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;	/* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;	/* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;	/* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;	/* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];	/* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;		/* bits of precision in image data */
+
+  jpeg_component_info * comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  boolean progressive_mode;	/* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;		/* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;	/* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;	/* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;		/* JFIF code for pixel size units */
+  UINT16 X_density;		/* Horizontal pixel density */
+  UINT16 Y_density;		/* Vertical pixel density */
+  boolean saw_Adobe_marker;	/* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;	/* Color transform code from Adobe marker */
+
+  boolean CCIR601_sampling;	/* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;	/* largest h_samp_factor */
+  int max_v_samp_factor;	/* largest v_samp_factor */
+
+  int min_DCT_scaled_size;	/* smallest DCT_scaled_size of any component */
+
+  JDIMENSION total_iMCU_rows;	/* # of iMCU rows in image */
+  /* The coefficient controller's input and output progress is measured in
+   * units of "iMCU" (interleaved MCU) rows.  These are the same as MCU rows
+   * in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  We define an iMCU row as v_samp_factor DCT block
+   * rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row.
+   */
+
+  JSAMPLE * sample_range_limit; /* table for fast range-limiting */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;		/* # of JPEG components in this scan */
+  jpeg_component_info * cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;	/* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;	/* # of MCU rows in the image */
+
+  int blocks_in_MCU;		/* # of DCT blocks per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th block in an MCU */
+
+  int Ss, Se, Ah, Al;		/* progressive JPEG parameters for scan */
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master * master;
+  struct jpeg_d_main_controller * main;
+  struct jpeg_d_coef_controller * coef;
+  struct jpeg_d_post_controller * post;
+  struct jpeg_input_controller * inputctl;
+  struct jpeg_marker_reader * marker;
+  struct jpeg_entropy_decoder * entropy;
+  struct jpeg_inverse_dct * idct;
+  struct jpeg_upsampler * upsample;
+  struct jpeg_color_deconverter * cconvert;
+  struct jpeg_color_quantizer * cquantize;
+};
+
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  JMETHOD(void, error_exit, (j_common_ptr cinfo));
+  /* Conditionally emit a trace or warning message */
+  JMETHOD(void, emit_message, (j_common_ptr cinfo, int msg_level));
+  /* Routine that actually outputs a trace or error message */
+  JMETHOD(void, output_message, (j_common_ptr cinfo));
+  /* Format a message string for the most recent JPEG error or message */
+  JMETHOD(void, format_message, (j_common_ptr cinfo, char * buffer));
+#define JMSG_LENGTH_MAX  200	/* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  JMETHOD(void, reset_error_mgr, (j_common_ptr cinfo));
+  
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+  
+  /* Standard state variables for error facility */
+  
+  int trace_level;		/* max msg_level that will be displayed */
+  
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;		/* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const * jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const * addon_message_table; /* Non-library errors */
+  int first_addon_message;	/* code for first string in addon table */
+  int last_addon_message;	/* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  JMETHOD(void, progress_monitor, (j_common_ptr cinfo));
+
+  long pass_counter;		/* work units completed in this pass */
+  long pass_limit;		/* total number of work units in this pass */
+  int completed_passes;		/* passes completed so far */
+  int total_passes;		/* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET * next_output_byte;	/* => next byte to write in buffer */
+  size_t free_in_buffer;	/* # of byte spaces remaining in buffer */
+
+  JMETHOD(void, init_destination, (j_compress_ptr cinfo));
+  JMETHOD(boolean, empty_output_buffer, (j_compress_ptr cinfo));
+  JMETHOD(void, term_destination, (j_compress_ptr cinfo));
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET * next_input_byte; /* => next byte to read from buffer */
+  size_t bytes_in_buffer;	/* # of bytes remaining in buffer */
+
+  JMETHOD(void, init_source, (j_decompress_ptr cinfo));
+  JMETHOD(boolean, fill_input_buffer, (j_decompress_ptr cinfo));
+  JMETHOD(void, skip_input_data, (j_decompress_ptr cinfo, long num_bytes));
+  JMETHOD(boolean, resync_to_restart, (j_decompress_ptr cinfo, int desired));
+  JMETHOD(void, term_source, (j_decompress_ptr cinfo));
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT	0	/* lasts until master record is destroyed */
+#define JPOOL_IMAGE	1	/* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS	2
+
+typedef struct jvirt_sarray_control * jvirt_sarray_ptr;
+typedef struct jvirt_barray_control * jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  JMETHOD(void *, alloc_small, (j_common_ptr cinfo, int pool_id,
+				size_t sizeofobject));
+  JMETHOD(void FAR *, alloc_large, (j_common_ptr cinfo, int pool_id,
+				     size_t sizeofobject));
+  JMETHOD(JSAMPARRAY, alloc_sarray, (j_common_ptr cinfo, int pool_id,
+				     JDIMENSION samplesperrow,
+				     JDIMENSION numrows));
+  JMETHOD(JBLOCKARRAY, alloc_barray, (j_common_ptr cinfo, int pool_id,
+				      JDIMENSION blocksperrow,
+				      JDIMENSION numrows));
+  JMETHOD(jvirt_sarray_ptr, request_virt_sarray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION samplesperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(jvirt_barray_ptr, request_virt_barray, (j_common_ptr cinfo,
+						  int pool_id,
+						  boolean pre_zero,
+						  JDIMENSION blocksperrow,
+						  JDIMENSION numrows,
+						  JDIMENSION maxaccess));
+  JMETHOD(void, realize_virt_arrays, (j_common_ptr cinfo));
+  JMETHOD(JSAMPARRAY, access_virt_sarray, (j_common_ptr cinfo,
+					   jvirt_sarray_ptr ptr,
+					   JDIMENSION start_row,
+					   JDIMENSION num_rows,
+					   boolean writable));
+  JMETHOD(JBLOCKARRAY, access_virt_barray, (j_common_ptr cinfo,
+					    jvirt_barray_ptr ptr,
+					    JDIMENSION start_row,
+					    JDIMENSION num_rows,
+					    boolean writable));
+  JMETHOD(void, free_pool, (j_common_ptr cinfo, int pool_id));
+  JMETHOD(void, self_destruct, (j_common_ptr cinfo));
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo));
+
+
+/* Declarations for routines called by application.
+ * The JPP macro hides prototype parameters from compilers that can't cope.
+ * Note JPP requires double parentheses.
+ */
+
+#ifdef HAVE_PROTOTYPES
+#define JPP(arglist)	arglist
+#else
+#define JPP(arglist)	()
+#endif
+
+
+/* Short forms of external names for systems with brain-damaged linkers.
+ * We shorten external names to be unique in the first six letters, which
+ * is good enough for all known systems.
+ * (If your compiler itself needs names to be unique in less than 15 
+ * characters, you are out of luck.  Get a better compiler.)
+ */
+
+#ifdef NEED_SHORT_EXTERNAL_NAMES
+#define jpeg_std_error		jStdError
+#define jpeg_CreateCompress	jCreaCompress
+#define jpeg_CreateDecompress	jCreaDecompress
+#define jpeg_destroy_compress	jDestCompress
+#define jpeg_destroy_decompress	jDestDecompress
+#define jpeg_stdio_dest		jStdDest
+#define jpeg_stdio_src		jStdSrc
+#define jpeg_set_defaults	jSetDefaults
+#define jpeg_set_colorspace	jSetColorspace
+#define jpeg_default_colorspace	jDefColorspace
+#define jpeg_set_quality	jSetQuality
+#define jpeg_set_linear_quality	jSetLQuality
+#define jpeg_add_quant_table	jAddQuantTable
+#define jpeg_quality_scaling	jQualityScaling
+#define jpeg_simple_progression	jSimProgress
+#define jpeg_suppress_tables	jSuppressTables
+#define jpeg_alloc_quant_table	jAlcQTable
+#define jpeg_alloc_huff_table	jAlcHTable
+#define jpeg_start_compress	jStrtCompress
+#define jpeg_write_scanlines	jWrtScanlines
+#define jpeg_finish_compress	jFinCompress
+#define jpeg_write_raw_data	jWrtRawData
+#define jpeg_write_marker	jWrtMarker
+#define jpeg_write_m_header	jWrtMHeader
+#define jpeg_write_m_byte	jWrtMByte
+#define jpeg_write_tables	jWrtTables
+#define jpeg_read_header	jReadHeader
+#define jpeg_start_decompress	jStrtDecompress
+#define jpeg_read_scanlines	jReadScanlines
+#define jpeg_finish_decompress	jFinDecompress
+#define jpeg_read_raw_data	jReadRawData
+#define jpeg_has_multiple_scans	jHasMultScn
+#define jpeg_start_output	jStrtOutput
+#define jpeg_finish_output	jFinOutput
+#define jpeg_input_complete	jInComplete
+#define jpeg_new_colormap	jNewCMap
+#define jpeg_consume_input	jConsumeInput
+#define jpeg_calc_output_dimensions	jCalcDimensions
+#define jpeg_save_markers	jSaveMarkers
+#define jpeg_set_marker_processor	jSetMarker
+#define jpeg_read_coefficients	jReadCoefs
+#define jpeg_write_coefficients	jWrtCoefs
+#define jpeg_copy_critical_parameters	jCopyCrit
+#define jpeg_abort_compress	jAbrtCompress
+#define jpeg_abort_decompress	jAbrtDecompress
+#define jpeg_abort		jAbort
+#define jpeg_destroy		jDestroy
+#define jpeg_resync_to_restart	jResyncRestart
+#endif /* NEED_SHORT_EXTERNAL_NAMES */
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error
+	JPP((struct jpeg_error_mgr * err));
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+    jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+			(size_t) sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+    jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+			  (size_t) sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress JPP((j_compress_ptr cinfo,
+				      int version, size_t structsize));
+EXTERN(void) jpeg_CreateDecompress JPP((j_decompress_ptr cinfo,
+					int version, size_t structsize));
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo));
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile));
+EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile));
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo));
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace JPP((j_compress_ptr cinfo,
+				      J_COLOR_SPACE colorspace));
+EXTERN(void) jpeg_default_colorspace JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality,
+				   boolean force_baseline));
+EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo,
+					  int scale_factor,
+					  boolean force_baseline));
+EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl,
+				       const unsigned int *basic_table,
+				       int scale_factor,
+				       boolean force_baseline));
+EXTERN(int) jpeg_quality_scaling JPP((int quality));
+EXTERN(void) jpeg_simple_progression JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_suppress_tables JPP((j_compress_ptr cinfo,
+				       boolean suppress));
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table JPP((j_common_ptr cinfo));
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table JPP((j_common_ptr cinfo));
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress JPP((j_compress_ptr cinfo,
+				      boolean write_all_tables));
+EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo,
+					     JSAMPARRAY scanlines,
+					     JDIMENSION num_lines));
+EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo));
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo,
+					    JSAMPIMAGE data,
+					    JDIMENSION num_lines));
+
+/* Write a special marker.  See libjpeg.doc concerning safe usage. */
+EXTERN(void) jpeg_write_marker
+	JPP((j_compress_ptr cinfo, int marker,
+	     const JOCTET * dataptr, unsigned int datalen));
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header
+	JPP((j_compress_ptr cinfo, int marker, unsigned int datalen));
+EXTERN(void) jpeg_write_m_byte
+	JPP((j_compress_ptr cinfo, int val));
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables JPP((j_compress_ptr cinfo));
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header JPP((j_decompress_ptr cinfo,
+				  boolean require_image));
+/* Return value is one of: */
+#define JPEG_SUSPENDED		0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK		1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY	2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress JPP((j_decompress_ptr cinfo));
+EXTERN(JDIMENSION) jpeg_read_scanlines JPP((j_decompress_ptr cinfo,
+					    JSAMPARRAY scanlines,
+					    JDIMENSION max_lines));
+EXTERN(boolean) jpeg_finish_decompress JPP((j_decompress_ptr cinfo));
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data JPP((j_decompress_ptr cinfo,
+					   JSAMPIMAGE data,
+					   JDIMENSION max_lines));
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_start_output JPP((j_decompress_ptr cinfo,
+				       int scan_number));
+EXTERN(boolean) jpeg_finish_output JPP((j_decompress_ptr cinfo));
+EXTERN(boolean) jpeg_input_complete JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_new_colormap JPP((j_decompress_ptr cinfo));
+EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo));
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED	0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS	1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI	2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED	3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED	4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo));
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     unsigned int length_limit));
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor
+	JPP((j_decompress_ptr cinfo, int marker_code,
+	     jpeg_marker_parser_method routine));
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients JPP((j_decompress_ptr cinfo));
+EXTERN(void) jpeg_write_coefficients JPP((j_compress_ptr cinfo,
+					  jvirt_barray_ptr * coef_arrays));
+EXTERN(void) jpeg_copy_critical_parameters JPP((j_decompress_ptr srcinfo,
+						j_compress_ptr dstinfo));
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress JPP((j_compress_ptr cinfo));
+EXTERN(void) jpeg_abort_decompress JPP((j_decompress_ptr cinfo));
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort JPP((j_common_ptr cinfo));
+EXTERN(void) jpeg_destroy JPP((j_common_ptr cinfo));
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart JPP((j_decompress_ptr cinfo,
+					    int desired));
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0	0xD0	/* RST0 marker code */
+#define JPEG_EOI	0xD9	/* EOI marker code */
+#define JPEG_APP0	0xE0	/* APP0 marker code */
+#define JPEG_COM	0xFE	/* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS		/* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"		/* fetch private declarations */
+#include "jerror.h"		/* fetch error codes too */
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/src/libjpeg/jquant1.c b/src/libjpeg/jquant1.c
new file mode 100644
index 0000000..b2f96aa
--- /dev/null
+++ b/src/libjpeg/jquant1.c
@@ -0,0 +1,856 @@
+/*
+ * jquant1.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_1PASS_SUPPORTED
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0..MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16	/* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS (ODITHER_SIZE*ODITHER_SIZE)	/* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE-1) /* mask for wrapping around counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0,192, 48,240, 12,204, 60,252,  3,195, 51,243, 15,207, 63,255 },
+  { 128, 64,176,112,140, 76,188,124,131, 67,179,115,143, 79,191,127 },
+  {  32,224, 16,208, 44,236, 28,220, 35,227, 19,211, 47,239, 31,223 },
+  { 160, 96,144, 80,172,108,156, 92,163, 99,147, 83,175,111,159, 95 },
+  {   8,200, 56,248,  4,196, 52,244, 11,203, 59,251,  7,199, 55,247 },
+  { 136, 72,184,120,132, 68,180,116,139, 75,187,123,135, 71,183,119 },
+  {  40,232, 24,216, 36,228, 20,212, 43,235, 27,219, 39,231, 23,215 },
+  { 168,104,152, 88,164,100,148, 84,171,107,155, 91,167,103,151, 87 },
+  {   2,194, 50,242, 14,206, 62,254,  1,193, 49,241, 13,205, 61,253 },
+  { 130, 66,178,114,142, 78,190,126,129, 65,177,113,141, 77,189,125 },
+  {  34,226, 18,210, 46,238, 30,222, 33,225, 17,209, 45,237, 29,221 },
+  { 162, 98,146, 82,174,110,158, 94,161, 97,145, 81,173,109,157, 93 },
+  {  10,202, 58,250,  6,198, 54,246,  9,201, 57,249,  5,197, 53,245 },
+  { 138, 74,186,122,134, 70,182,118,137, 73,185,121,133, 69,181,117 },
+  {  42,234, 26,218, 38,230, 22,214, 41,233, 25,217, 37,229, 21,213 },
+  { 170,106,154, 90,166,102,150, 86,169,105,153, 89,165,101,149, 85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS 4		/* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  JSAMPARRAY sv_colormap;	/* The color map as a 2-D pixel array */
+  int sv_actual;		/* number of entries in use */
+
+  JSAMPARRAY colorindex;	/* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;		/* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];	/* # of values alloced to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;		/* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;		/* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long) max_colors); /* repeat till iroot exceeds root */
+  iroot--;			/* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int) temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j]+1;	/* done in long arith to avoid oflo */
+      if (temp > (long) max_colors)
+	break;			/* won't fit, done with this pass */
+      Ncolors[j]++;		/* OK, apply the increment */
+      total_colors = (int) temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0..MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int) (((INT32) j * MAXJSAMPLE + maxj/2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value (j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int) (((INT32) (2*j + 1) * MAXJSAMPLE + maxj) / (2*maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colormap;		/* Created colormap */
+  int total_colors;		/* Number of distinct output colors */
+  int i,j,k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS,
+	     total_colors, cquantize->Ncolors[0],
+	     cquantize->Ncolors[1], cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) total_colors, (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci-1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+	/* fill in blksize entries beginning at ptr */
+	for (k = 0; k < blksize; k++)
+	  colormap[i][ptr+k] = (JSAMPLE) val;
+      }
+    }
+    blkdist = blksize;		/* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPROW indexptr;
+  int i,j,k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by MAXJSAMPLE in
+   * each direction (input index values can be -MAXJSAMPLE .. 2*MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = MAXJSAMPLE*2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (*cinfo->mem->alloc_sarray)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE,
+     (JDIMENSION) (MAXJSAMPLE+1 + pad),
+     (JDIMENSION) cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci-1);
+    for (j = 0; j <= MAXJSAMPLE; j++) {
+      while (j > k)		/* advance val if past boundary */
+	k = largest_input_value(cinfo, i, ++val, nci-1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (JSAMPLE) (val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= MAXJSAMPLE; j++) {
+	indexptr[-j] = indexptr[0];
+	indexptr[MAXJSAMPLE+j] = indexptr[MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array (j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j,k;
+  INT32 num,den;
+
+  odither = (ODITHER_MATRIX_PTR)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(ODITHER_MATRIX));
+  /* The inter-value distance for this color is MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((INT32) (ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((INT32) (ODITHER_CELLS-1 - 2*((int)base_dither_matrix[j][k])))
+	    * MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int) (num<0 ? -((-num)/den) : num/den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may 
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;		/* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+	odither = cquantize->odither[j];
+	break;
+      }
+    }
+    if (odither == NULL)	/* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+	pixcode += GETJSAMPLE(colorindex[ci][GETJSAMPLE(*ptrin++)]);
+      }
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3 (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		 JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW ptrin, ptrout;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*ptrin++)]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*ptrin++)]);
+      *ptrout++ = (JSAMPLE) pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		     JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  int * dither;			/* points to active row of dither matrix */
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void FAR *) output_buf[row],
+	      (size_t) (width * SIZEOF(JSAMPLE)));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+	/* Form pixel value + dither, range-limit to 0..MAXJSAMPLE,
+	 * select output value, accumulate into output code for this pixel.
+	 * Range-limiting need not be done explicitly, as we have extended
+	 * the colorindex table to produce the right answers for out-of-range
+	 * inputs.  The maximum dither is +- MAXJSAMPLE; this sets the
+	 * required amount of padding.
+	 */
+	*output_ptr += colorindex_ci[GETJSAMPLE(*input_ptr)+dither[col_index]];
+	input_ptr += nc;
+	output_ptr++;
+	col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		      JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register int pixcode;
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex0 = cquantize->colorindex[0];
+  JSAMPROW colorindex1 = cquantize->colorindex[1];
+  JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int * dither0;		/* points to active row of dither matrix */
+  int * dither1;
+  int * dither2;
+  int row_index, col_index;	/* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = GETJSAMPLE(colorindex0[GETJSAMPLE(*input_ptr++) +
+					dither0[col_index]]);
+      pixcode += GETJSAMPLE(colorindex1[GETJSAMPLE(*input_ptr++) +
+					dither1[col_index]]);
+      pixcode += GETJSAMPLE(colorindex2[GETJSAMPLE(*input_ptr++) +
+					dither2[col_index]]);
+      *output_ptr++ = (JSAMPLE) pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		    JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register LOCFSERROR cur;	/* current error or pixel value */
+  LOCFSERROR belowerr;		/* error for pixel below cur */
+  LOCFSERROR bpreverr;		/* error for below/prev col */
+  LOCFSERROR bnexterr;		/* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  register JSAMPROW input_ptr;
+  register JSAMPROW output_ptr;
+  JSAMPROW colorindex_ci;
+  JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;			/* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;			/* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void FAR *) output_buf[row],
+	      (size_t) (width * SIZEOF(JSAMPLE)));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+	/* work right to left in this row */
+	input_ptr += (width-1) * nc; /* so point to rightmost pixel */
+	output_ptr += width-1;
+	dir = -1;
+	dirnc = -nc;
+	errorptr = cquantize->fserrors[ci] + (width+1); /* => entry after last column */
+      } else {
+	/* work left to right in this row */
+	dir = 1;
+	dirnc = nc;
+	errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+	/* cur holds the error propagated from the previous pixel on the
+	 * current line.  Add the error propagated from the previous line
+	 * to form the complete error correction term for this pixel, and
+	 * round the error term (which is expressed * 16) to an integer.
+	 * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+	 * for either sign of the error value.
+	 * Note: errorptr points to *previous* column's array entry.
+	 */
+	cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+	/* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+	 * The maximum error is +- MAXJSAMPLE; this sets the required size
+	 * of the range_limit array.
+	 */
+	cur += GETJSAMPLE(*input_ptr);
+	cur = GETJSAMPLE(range_limit[cur]);
+	/* Select output value, accumulate into output code for this pixel */
+	pixcode = GETJSAMPLE(colorindex_ci[cur]);
+	*output_ptr += (JSAMPLE) pixcode;
+	/* Compute actual representation error at this pixel */
+	/* Note: we can do this even though we don't have the final */
+	/* pixel code, because the colormap is orthogonal. */
+	cur -= GETJSAMPLE(colormap_ci[pixcode]);
+	/* Compute error fractions to be propagated to adjacent pixels.
+	 * Add these into the running sums, and simultaneously shift the
+	 * next-line error sums left by 1 column.
+	 */
+	bnexterr = cur;
+	delta = cur * 2;
+	cur += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr + cur);
+	cur += delta;		/* form error * 5 */
+	bpreverr = belowerr + cur;
+	belowerr = bnexterr;
+	cur += delta;		/* form error * 7 */
+	/* At this point cur contains the 7/16 error value to be propagated
+	 * to the next pixel on the current line, and all the errors for the
+	 * next line have been shifted over. We are therefore ready to move on.
+	 */
+	input_ptr += dirnc;	/* advance input ptr to next column */
+	output_ptr += dir;	/* advance output ptr to next column */
+	errorptr += dir;	/* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR) bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR)
+      (*cinfo->mem->alloc_large)((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = color_quantize3;
+    else
+      cquantize->pub.color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub.color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub.color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;	/* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (! cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub.color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = (size_t) ((cinfo->output_width + 2) * SIZEOF(FSERROR));
+    for (i = 0; i < cinfo->out_color_components; i++)
+      jzero_far((void FAR *) cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant (j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant (j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_1pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL;	/* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (MAXJSAMPLE+1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXJSAMPLE+1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it is FAR storage and may affect the memory
+   * manager's space calculations.  If the user changes to FS dither
+   * mode in a later pass, we will allocate the space then, and will
+   * possibly overrun the max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* QUANT_1PASS_SUPPORTED */
diff --git a/src/libjpeg/jquant2.c b/src/libjpeg/jquant2.c
new file mode 100644
index 0000000..af601e3
--- /dev/null
+++ b/src/libjpeg/jquant2.c
@@ -0,0 +1,1310 @@
+/*
+ * jquant2.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ * 
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE 2		/* scale R distances by this much */
+#define G_SCALE 3		/* scale G distances by this much */
+#define B_SCALE 1		/* and B by this much */
+
+/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
+ * in jmorecfg.h.  As the code stands, it will do the right thing for R,G,B
+ * and B,G,R orders.  If you define some other weird order in jmorecfg.h,
+ * you'll get compile errors until you extend this logic.  In that case
+ * you'll probably want to tweak the histogram sizes too.
+ */
+
+#if RGB_RED == 0
+#define C0_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 0
+#define C0_SCALE B_SCALE
+#endif
+#if RGB_GREEN == 1
+#define C1_SCALE G_SCALE
+#endif
+#if RGB_RED == 2
+#define C2_SCALE R_SCALE
+#endif
+#if RGB_BLUE == 2
+#define C2_SCALE B_SCALE
+#endif
+
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.  Note that
+ * on 80x86 machines, the pointer row is in near memory but the actual
+ * arrays are in far memory (same arrangement as we use for image arrays).
+ */
+
+#define MAXNUMCOLORS  (MAXJSAMPLE+1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5		/* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6		/* bits of precision in G histogram */
+#define HIST_C2_BITS  5		/* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1<<HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1<<HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1<<HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE-HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE-HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE-HIST_C2_BITS)
+
+
+typedef UINT16 histcell;	/* histogram cell; prefer an unsigned type */
+
+typedef histcell FAR * histptr;	/* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d FAR * hist2d;	/* type for the 2nd-level pointers */
+typedef hist2d * hist3d;	/* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *		...	(here)	7/16
+ *		3/16	5/16	1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ *
+ * Note: on a wide image, we might not have enough room in a PC's near data
+ * segment to hold the error array; so it is allocated with alloc_large.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;		/* 16 bits should be enough */
+typedef int LOCFSERROR;		/* use 'int' for calculation temps */
+#else
+typedef INT32 FSERROR;		/* may need more than 16 bits */
+typedef INT32 LOCFSERROR;	/* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR FAR *FSERRPTR;	/* pointer to error array (in FAR storage!) */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  JSAMPARRAY sv_colormap;	/* colormap allocated at init time */
+  int desired;			/* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;		/* pointer to the histogram */
+
+  boolean needs_zeroed;		/* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;		/* accumulated errors */
+  boolean on_odd_row;		/* flag to remember which row we are on */
+  int * error_limiter;		/* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer * my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+		  JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  register JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = & histogram[GETJSAMPLE(ptr[0]) >> C0_SHIFT]
+			 [GETJSAMPLE(ptr[1]) >> C1_SHIFT]
+			 [GETJSAMPLE(ptr[2]) >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+	(*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  INT32 volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box * boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume (boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register INT32 maxv = 0;
+  boxptr which = NULL;
+  
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box (j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  INT32 dist0,dist1,dist2;
+  long ccount;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0min = c0min = c0;
+	    goto have_c0min;
+	  }
+      }
+ have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c0max = c0max = c0;
+	    goto have_c0max;
+	  }
+      }
+ have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1min = c1min = c1;
+	    goto have_c1min;
+	  }
+      }
+ have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1][c2min];
+	for (c2 = c2min; c2 <= c2max; c2++)
+	  if (*histp++ != 0) {
+	    boxp->c1max = c1max = c1;
+	    goto have_c1max;
+	  }
+      }
+ have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2min = c2min = c2;
+	    goto have_c2min;
+	  }
+      }
+ have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+	histp = & histogram[c0][c1min][c2];
+	for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+	  if (*histp != 0) {
+	    boxp->c2max = c2max = c2;
+	    goto have_c2max;
+	  }
+      }
+ have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0*dist0 + dist1*dist1 + dist2*dist2;
+  
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+	if (*histp != 0) {
+	  ccount++;
+	}
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+	    int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n,lb;
+  int c0,c1,c2,cmax;
+  register boxptr b1,b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes*2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)		/* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];	/* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max; b2->c1max = b1->c1max; b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min; b2->c1min = b1->c1min; b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+#if RGB_RED == 0
+    cmax = c1; n = 1;
+    if (c0 > cmax) { cmax = c0; n = 0; }
+    if (c2 > cmax) { n = 2; }
+#else
+    cmax = c1; n = 1;
+    if (c2 > cmax) { cmax = c2; n = 2; }
+    if (c0 > cmax) { n = 0; }
+#endif
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb+1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb+1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb+1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color (j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0,c1,c2;
+  int c0min,c0max,c1min,c1max,c2min,c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+  
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+  
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = & histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+	if ((count = *histp++) != 0) {
+	  total += count;
+	  c0total += ((c0 << C0_SHIFT) + ((1<<C0_SHIFT)>>1)) * count;
+	  c1total += ((c1 << C1_SHIFT) + ((1<<C1_SHIFT)>>1)) * count;
+	  c2total += ((c2 << C2_SHIFT) + ((1<<C2_SHIFT)>>1)) * count;
+	}
+      }
+    }
+  
+  cinfo->colormap[0][icolor] = (JSAMPLE) ((c0total + (total>>1)) / total);
+  cinfo->colormap[1][icolor] = (JSAMPLE) ((c1total + (total>>1)) / total);
+  cinfo->colormap[2][icolor] = (JSAMPLE) ((c2total + (total>>1)) / total);
+}
+
+
+LOCAL(void)
+select_colors (j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, desired_colors * SIZEOF(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, & boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, & boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be INT32s, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS-3)
+#define BOX_C1_LOG  (HIST_C1_BITS-3)
+#define BOX_C2_LOG  (HIST_C2_BITS-3)
+
+#define BOX_C0_ELEMS  (1<<BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1<<BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1<<BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		    JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  INT32 minmaxdist, min_dist, max_dist, tdist;
+  INT32 mindist[MAXNUMCOLORS];	/* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = GETJSAMPLE(cinfo->colormap[0][i]);
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist*tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+	tdist = (x - maxc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      } else {
+	tdist = (x - minc0) * C0_SCALE;
+	max_dist = tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[1][i]);
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+	tdist = (x - maxc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc1) * C1_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    x = GETJSAMPLE(cinfo->colormap[2][i]);
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist*tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist*tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+	tdist = (x - maxc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      } else {
+	tdist = (x - minc2) * C2_SCALE;
+	max_dist += tdist*tdist;
+      }
+    }
+
+    mindist[i] = min_dist;	/* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (JSAMPLE) i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors (j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+		  int numcolors, JSAMPLE colorlist[], JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register INT32 * bptr;	/* pointer into bestdist[] array */
+  JSAMPLE * cptr;		/* pointer into bestcolor[] array */
+  INT32 dist0, dist1;		/* initial distance values */
+  register INT32 dist2;		/* current distance in inner loop */
+  INT32 xx0, xx1;		/* distance increments */
+  register INT32 xx2;
+  INT32 inc0, inc1, inc2;	/* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  INT32 bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS*BOX_C1_ELEMS*BOX_C2_ELEMS-1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+  
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+  
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+  
+  for (i = 0; i < numcolors; i++) {
+    icolor = GETJSAMPLE(colorlist[i]);
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - GETJSAMPLE(cinfo->colormap[0][icolor])) * C0_SCALE;
+    dist0 = inc0*inc0;
+    inc1 = (minc1 - GETJSAMPLE(cinfo->colormap[1][icolor])) * C1_SCALE;
+    dist0 += inc1*inc1;
+    inc2 = (minc2 - GETJSAMPLE(cinfo->colormap[2][icolor])) * C2_SCALE;
+    dist0 += inc2*inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS-1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS-1; ic1 >= 0; ic1--) {
+	dist2 = dist1;
+	xx2 = inc2;
+	for (ic2 = BOX_C2_ELEMS-1; ic2 >= 0; ic2--) {
+	  if (dist2 < *bptr) {
+	    *bptr = dist2;
+	    *cptr = (JSAMPLE) icolor;
+	  }
+	  dist2 += xx2;
+	  xx2 += 2 * STEP_C2 * STEP_C2;
+	  bptr++;
+	  cptr++;
+	}
+	dist1 += xx1;
+	xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap (j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;	/* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register JSAMPLE * cptr;	/* pointer into bestcolor[] array */
+  register histptr cachep;	/* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;		/* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+  
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+		   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;		/* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = & histogram[c0+ic0][c1+ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+	*cachep++ = (histcell) (GETJSAMPLE(*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = GETJSAMPLE(*inptr++) >> C0_SHIFT;
+      c1 = GETJSAMPLE(*inptr++) >> C1_SHIFT;
+      c2 = GETJSAMPLE(*inptr++) >> C2_SHIFT;
+      cachep = & histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, c0,c1,c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (JSAMPLE) (*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither (j_decompress_ptr cinfo,
+		 JSAMPARRAY input_buf, JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2;	/* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;	/* => fserrors[] at column before current */
+  JSAMPROW inptr;		/* => current input pixel */
+  JSAMPROW outptr;		/* => current output pixel */
+  histptr cachep;
+  int dir;			/* +1 or -1 depending on direction */
+  int dir3;			/* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  JSAMPLE *range_limit = cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  JSAMPROW colormap0 = cinfo->colormap[0];
+  JSAMPROW colormap1 = cinfo->colormap[1];
+  JSAMPROW colormap2 = cinfo->colormap[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width-1) * 3;	/* so point to rightmost pixel */
+      outptr += width-1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width+1)*3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3+0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3+1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3+2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0..MAXJSAMPLE.
+       * The maximum error is +- MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += GETJSAMPLE(inptr[0]);
+      cur1 += GETJSAMPLE(inptr[1]);
+      cur2 += GETJSAMPLE(inptr[2]);
+      cur0 = GETJSAMPLE(range_limit[cur0]);
+      cur1 = GETJSAMPLE(range_limit[cur1]);
+      cur2 = GETJSAMPLE(range_limit[cur2]);
+      /* Index into the cache with adjusted pixel value */
+      cachep = & histogram[cur0>>C0_SHIFT][cur1>>C1_SHIFT][cur2>>C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+	fill_inverse_cmap(cinfo, cur0>>C0_SHIFT,cur1>>C1_SHIFT,cur2>>C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      { register int pixcode = *cachep - 1;
+	*outptr = (JSAMPLE) pixcode;
+	/* Compute representation error for this pixel */
+	cur0 -= GETJSAMPLE(colormap0[pixcode]);
+	cur1 -= GETJSAMPLE(colormap1[pixcode]);
+	cur2 -= GETJSAMPLE(colormap2[pixcode]);
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      { register LOCFSERROR bnexterr, delta;
+
+	bnexterr = cur0;	/* Process component 0 */
+	delta = cur0 * 2;
+	cur0 += delta;		/* form error * 3 */
+	errorptr[0] = (FSERROR) (bpreverr0 + cur0);
+	cur0 += delta;		/* form error * 5 */
+	bpreverr0 = belowerr0 + cur0;
+	belowerr0 = bnexterr;
+	cur0 += delta;		/* form error * 7 */
+	bnexterr = cur1;	/* Process component 1 */
+	delta = cur1 * 2;
+	cur1 += delta;		/* form error * 3 */
+	errorptr[1] = (FSERROR) (bpreverr1 + cur1);
+	cur1 += delta;		/* form error * 5 */
+	bpreverr1 = belowerr1 + cur1;
+	belowerr1 = bnexterr;
+	cur1 += delta;		/* form error * 7 */
+	bnexterr = cur2;	/* Process component 2 */
+	delta = cur2 * 2;
+	cur2 += delta;		/* form error * 3 */
+	errorptr[2] = (FSERROR) (bpreverr2 + cur2);
+	cur2 += delta;		/* form error * 5 */
+	bpreverr2 = belowerr2 + cur2;
+	belowerr2 = bnexterr;
+	cur2 += delta;		/* form error * 7 */
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;		/* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;		/* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR) bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR) bpreverr1;
+    errorptr[2] = (FSERROR) bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit (j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  int * table;
+  int in, out;
+
+  table = (int *) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE*2+1) * SIZEOF(int));
+  table += MAXJSAMPLE;		/* so can index -MAXJSAMPLE .. +MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE ((MAXJSAMPLE+1)/16)
+  /* Map errors 1:1 up to +- MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*MAXJSAMPLE/16 */
+  for (; in < STEPSIZE*3; in++, out += (in&1) ? 0 : 1) {
+    table[in] = out; table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (MAXJSAMPLE+1)/8) */
+  for (; in <= MAXJSAMPLE; in++) {
+    table[in] = out; table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1 (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2 (j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant (j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub.color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub.color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub.color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize = (size_t) ((cinfo->output_width + 2) *
+				   (3 * SIZEOF(FSERROR)));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+	cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+	  ((j_common_ptr) cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      jzero_far((void FAR *) cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+	init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      jzero_far((void FAR *) histogram[i],
+		HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr) cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+jinit_2pass_quantizer (j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *) cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;	/* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d) (*cinfo->mem->alloc_small)
+    ((j_common_ptr) cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * SIZEOF(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS*HIST_C2_ELEMS * SIZEOF(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it is FAR storage and may affect
+   * the memory manager's space calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (*cinfo->mem->alloc_sarray)
+      ((j_common_ptr) cinfo,JPOOL_IMAGE, (JDIMENSION) desired, (JDIMENSION) 3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give him F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it is FAR storage.
+   * Although we will cope with a later change in dither_mode,
+   * we do not promise to honor max_memory_to_use if dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR) (*cinfo->mem->alloc_large)
+      ((j_common_ptr) cinfo, JPOOL_IMAGE,
+       (size_t) ((cinfo->output_width + 2) * (3 * SIZEOF(FSERROR))));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
diff --git a/src/libjpeg/jutils.c b/src/libjpeg/jutils.c
new file mode 100644
index 0000000..d18a955
--- /dev/null
+++ b/src/libjpeg/jutils.c
@@ -0,0 +1,179 @@
+/*
+ * jutils.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0				/* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2+16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up (long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up (long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+
+/* On normal machines we can apply MEMCOPY() and MEMZERO() to sample arrays
+ * and coefficient-block arrays.  This won't work on 80x86 because the arrays
+ * are FAR and we're assuming a small-pointer memory model.  However, some
+ * DOS compilers provide far-pointer versions of memcpy() and memset() even
+ * in the small-model libraries.  These will be used if USE_FMEM is defined.
+ * Otherwise, the routines below do it the hard way.  (The performance cost
+ * is not all that great, because these routines aren't very heavily used.)
+ */
+
+#ifndef NEED_FAR_POINTERS	/* normal case, same as regular macros */
+#define FMEMCOPY(dest,src,size)	MEMCOPY(dest,src,size)
+#define FMEMZERO(target,size)	MEMZERO(target,size)
+#else				/* 80x86 case, define if we can */
+#ifdef USE_FMEM
+#define FMEMCOPY(dest,src,size)	_fmemcpy((void FAR *)(dest), (const void FAR *)(src), (size_t)(size))
+#define FMEMZERO(target,size)	_fmemset((void FAR *)(target), 0, (size_t)(size))
+#endif
+#endif
+
+
+GLOBAL(void)
+jcopy_sample_rows (JSAMPARRAY input_array, int source_row,
+		   JSAMPARRAY output_array, int dest_row,
+		   int num_rows, JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from input_array[source_row++]
+ * to output_array[dest_row++]; these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register JSAMPROW inptr, outptr;
+#ifdef FMEMCOPY
+  register size_t count = (size_t) (num_cols * SIZEOF(JSAMPLE));
+#else
+  register JDIMENSION count;
+#endif
+  register int row;
+
+  input_array += source_row;
+  output_array += dest_row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+#ifdef FMEMCOPY
+    FMEMCOPY(outptr, inptr, count);
+#else
+    for (count = num_cols; count > 0; count--)
+      *outptr++ = *inptr++;	/* needn't bother with GETJSAMPLE() here */
+#endif
+  }
+}
+
+
+GLOBAL(void)
+jcopy_block_row (JBLOCKROW input_row, JBLOCKROW output_row,
+		 JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+#ifdef FMEMCOPY
+  FMEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * SIZEOF(JCOEF)));
+#else
+  register JCOEFPTR inptr, outptr;
+  register long count;
+
+  inptr = (JCOEFPTR) input_row;
+  outptr = (JCOEFPTR) output_row;
+  for (count = (long) num_blocks * DCTSIZE2; count > 0; count--) {
+    *outptr++ = *inptr++;
+  }
+#endif
+}
+
+
+GLOBAL(void)
+jzero_far (void FAR * target, size_t bytestozero)
+/* Zero out a chunk of FAR memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+#ifdef FMEMZERO
+  FMEMZERO(target, bytestozero);
+#else
+  register char FAR * ptr = (char FAR *) target;
+  register size_t count;
+
+  for (count = bytestozero; count > 0; count--) {
+    *ptr++ = 0;
+  }
+#endif
+}
diff --git a/src/libjpeg/jversion.h b/src/libjpeg/jversion.h
new file mode 100644
index 0000000..6472c58
--- /dev/null
+++ b/src/libjpeg/jversion.h
@@ -0,0 +1,14 @@
+/*
+ * jversion.h
+ *
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#define JVERSION	"6b  27-Mar-1998"
+
+#define JCOPYRIGHT	"Copyright (C) 1998, Thomas G. Lane"
diff --git a/src/liblzf/lzf.h b/src/liblzf/lzf.h
new file mode 100644
index 0000000..8538609
--- /dev/null
+++ b/src/liblzf/lzf.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ *   3.  The name of the author may not be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License version 2 (the "GPL"), in which case the
+ * provisions of the GPL are applicable instead of the above. If you wish to
+ * allow the use of your version of this file only under the terms of the
+ * GPL and not to allow others to use your version of this file under the
+ * BSD license, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the GPL. If
+ * you do not delete the provisions above, a recipient may use your version
+ * of this file under either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+**	lzf -- an extremely fast/free compression/decompression-method
+**	http://liblzf.plan9.de/
+**
+**	This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5 */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs
+ * return 0, otherwise return the number of bytes used (which might
+ * be considerably larger than in_len, so it makes sense to always
+ * use out_len == in_len - 1), to ensure _some_ compression, and store
+ * the data uncompressed otherwise.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even diferent runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+unsigned int 
+lzf_compress (const void *const in_data,  unsigned int in_len,
+              void             *out_data, unsigned int out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
+ */
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len);
+
+#endif
+
diff --git a/src/liblzf/lzfP.h b/src/liblzf/lzfP.h
new file mode 100644
index 0000000..48963b2
--- /dev/null
+++ b/src/liblzf/lzfP.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ *   3.  The name of the author may not be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License version 2 (the "GPL"), in which case the
+ * provisions of the GPL are applicable instead of the above. If you wish to
+ * allow the use of your version of this file only under the terms of the
+ * GPL and not to allow others to use your version of this file under the
+ * BSD license, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the GPL. If
+ * you do not delete the provisions above, a recipient may use your version
+ * of this file under either the BSD or the GPL.
+ */
+
+#ifndef LZFP_h
+#define LZFP_h
+
+#define STANDALONE 1 /* at the moment, this is ok. */
+
+#ifndef STANDALONE
+# include "lzf.h"
+#endif
+
+/*
+ * size of hashtable is (1 << HLOG) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is usually a but faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more).
+ */
+#ifndef HLOG
+# define HLOG 14
+#endif
+
+/*
+ * sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferable mode of operation.
+ */
+
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * sacrifice some more compression quality in favour of compression speed.
+ * (roughly 1-2% worse compression for large blocks and
+ * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
+ */
+#ifndef ULTRA_FAST
+# define ULTRA_FAST 0
+#endif
+
+/*
+ * unconditionally aligning does not cost very much, so do it if unsure
+ */
+#ifndef STRICT_ALIGN
+# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+#endif
+
+/*
+ * use string functions to copy memory.
+ * this is usually a loss, even with glibc's optimized memcpy
+ */
+#ifndef USE_MEMCPY
+# define USE_MEMCPY 0
+#endif
+
+/*
+ * you may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks)
+ */
+#ifndef INIT_HTAB
+# define INIT_HTAB 0
+#endif
+
+/*
+ * avoid assigning values to errno variable? for some embedding purposes
+ * (linux kernel for example), this is neccessary. NOTE: this breaks
+ * the documentation in lzf.h.
+ */
+#ifndef AVOID_ERRNO
+# define AVOID_ERRNO 0
+#endif
+
+/*
+ * Wether to pass the LZF_STATE variable as argument, or allocate it
+ * on the stack. For small-stack environments, define this to 1.
+ * NOTE: this breaks the prototype in lzf.h.
+ */
+#ifndef LZF_STATE_ARG
+# define LZF_STATE_ARG 0
+#endif
+
+/*****************************************************************************/
+/* nothing should be changed below */
+
+typedef unsigned char u8;
+
+typedef const u8 *LZF_STATE[1 << (HLOG)];
+
+#if !STRICT_ALIGN
+/* for unaligned accesses we need a 16 bit datatype. */
+# include <limits.h>
+# if USHRT_MAX == 65535
+    typedef unsigned short u16;
+# elif UINT_MAX == 65535
+    typedef unsigned int u16;
+# else
+#  undef STRICT_ALIGN
+#  define STRICT_ALIGN 1
+# endif
+#endif
+
+#if ULTRA_FAST
+# if defined(VERY_FAST)
+#  undef VERY_FAST
+# endif
+#endif
+
+#if USE_MEMCPY || INIT_HTAB
+# ifdef __cplusplus
+#  include <cstring>
+# else
+#  include <string.h>
+# endif
+#endif
+
+#endif
+
diff --git a/src/liblzf/lzf_c.c b/src/liblzf/lzf_c.c
new file mode 100644
index 0000000..b9b9ef9
--- /dev/null
+++ b/src/liblzf/lzf_c.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ *   3.  The name of the author may not be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License version 2 (the "GPL"), in which case the
+ * provisions of the GPL are applicable instead of the above. If you wish to
+ * allow the use of your version of this file only under the terms of the
+ * GPL and not to allow others to use your version of this file under the
+ * BSD license, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the GPL. If
+ * you do not delete the provisions above, a recipient may use your version
+ * of this file under either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and sligthly better
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define        MAX_LIT        (1 <<  5)
+#define        MAX_OFF        (1 << 13)
+#define        MAX_REF        ((1 <<  8) + (1 << 3))
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1>    ; literal
+ * LLLooooo oooooooo ; backref L
+ * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+	      void *out_data, unsigned int out_len
+#if LZF_STATE_ARG
+              , LZF_STATE *htab
+#endif
+              )
+{
+#if !LZF_STATE_ARG
+  LZF_STATE htab;
+#endif
+  const u8 **hslot;
+  const u8 *ip = (const u8 *)in_data;
+        u8 *op = (u8 *)out_data;
+  const u8 *in_end  = ip + in_len;
+        u8 *out_end = op + out_len;
+  const u8 *ref;
+
+  unsigned int hval = FRST (ip);
+  unsigned long off;
+           int lit = 0;
+
+#if INIT_HTAB
+# if USE_MEMCPY
+    memset (htab, 0, sizeof (htab));
+# else
+    for (hslot = htab; hslot < htab + HSIZE; hslot++)
+      *hslot++ = ip;
+# endif
+#endif
+
+  for (;;)
+    {
+      if (ip < in_end - 2)
+        {
+          hval = NEXT (hval, ip);
+          hslot = htab + IDX (hval);
+          ref = *hslot; *hslot = ip;
+
+          if (1
+#if INIT_HTAB && !USE_MEMCPY
+              && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+              && (off = ip - ref - 1) < MAX_OFF
+              && ip + 4 < in_end
+              && ref > (u8 *)in_data
+#if STRICT_ALIGN
+              && ref[0] == ip[0]
+              && ref[1] == ip[1]
+              && ref[2] == ip[2]
+#else
+              && *(u16 *)ref == *(u16 *)ip
+              && ref[2] == ip[2]
+#endif
+            )
+            {
+              /* match found at *ref++ */
+              unsigned int len = 2;
+              unsigned int maxlen = in_end - ip - len;
+              maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+              if (op + lit + 1 + 3 >= out_end)
+                return 0;
+
+              do
+                len++;
+              while (len < maxlen && ref[len] == ip[len]);
+
+              if (lit)
+                {
+                  *op++ = lit - 1;
+                  lit = -lit;
+                  do
+                    *op++ = ip[lit];
+                  while (++lit);
+                }
+
+              len -= 2;
+              ip++;
+
+              if (len < 7)
+                {
+                  *op++ = (off >> 8) + (len << 5);
+                }
+              else
+                {
+                  *op++ = (off >> 8) + (  7 << 5);
+                  *op++ = len - 7;
+                }
+
+              *op++ = off;
+
+#if ULTRA_FAST || VERY_FAST
+              ip += len;
+#if VERY_FAST && !ULTRA_FAST
+              --ip;
+#endif
+              hval = FRST (ip);
+
+              hval = NEXT (hval, ip);
+              htab[IDX (hval)] = ip;
+              ip++;
+
+#if VERY_FAST && !ULTRA_FAST
+              hval = NEXT (hval, ip);
+              htab[IDX (hval)] = ip;
+              ip++;
+#endif
+#else
+              do
+                {
+                  hval = NEXT (hval, ip);
+                  htab[IDX (hval)] = ip;
+                  ip++;
+                }
+              while (len--);
+#endif
+              continue;
+            }
+        }
+      else if (ip == in_end)
+        break;
+
+      /* one more literal byte we must copy */
+      lit++;
+      ip++;
+
+      if (lit == MAX_LIT)
+        {
+          if (op + 1 + MAX_LIT >= out_end)
+            return 0;
+
+          *op++ = MAX_LIT - 1;
+#if USE_MEMCPY
+          memcpy (op, ip - MAX_LIT, MAX_LIT);
+          op += MAX_LIT;
+          lit = 0;
+#else
+          lit = -lit;
+          do
+            *op++ = ip[lit];
+          while (++lit);
+#endif
+        }
+    }
+
+  if (lit)
+    {
+      if (op + lit + 1 >= out_end)
+	return 0;
+
+      *op++ = lit - 1;
+      lit = -lit;
+      do
+	*op++ = ip[lit];
+      while (++lit);
+    }
+
+  return op - (u8 *) out_data;
+}
diff --git a/src/liblzf/lzf_d.c b/src/liblzf/lzf_d.c
new file mode 100644
index 0000000..d0229d7
--- /dev/null
+++ b/src/liblzf/lzf_d.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ *   3.  The name of the author may not be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License version 2 (the "GPL"), in which case the
+ * provisions of the GPL are applicable instead of the above. If you wish to
+ * allow the use of your version of this file only under the terms of the
+ * GPL and not to allow others to use your version of this file under the
+ * BSD license, indicate your decision by deleting the provisions above and
+ * replace them with the notice and other provisions required by the GPL. If
+ * you do not delete the provisions above, a recipient may use your version
+ * of this file under either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len)
+{
+  u8 const *ip = (const u8 *)in_data;
+  u8       *op = (u8 *)out_data;
+  u8 const *const in_end  = ip + in_len;
+  u8       *const out_end = op + out_len;
+
+  do
+    {
+      unsigned int ctrl = *ip++;
+
+      if (ctrl < (1 << 5)) /* literal run */
+        {
+          ctrl++;
+
+          if (op + ctrl > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+#if USE_MEMCPY
+          memcpy (op, ip, ctrl);
+          op += ctrl;
+          ip += ctrl;
+#else
+          do
+            *op++ = *ip++;
+          while (--ctrl);
+#endif
+        }
+      else /* back reference */
+        {
+          unsigned int len = ctrl >> 5;
+
+          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+          if (len == 7)
+            len += *ip++;
+          
+          ref -= *ip++;
+
+          if (op + len + 2 > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+          if (ref < (u8 *)out_data)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+
+          *op++ = *ref++;
+          *op++ = *ref++;
+
+          do
+            *op++ = *ref++;
+          while (--len);
+        }
+    }
+  while (op < out_end && ip < in_end);
+
+  return op - (u8 *)out_data;
+}
+
diff --git a/src/libpng/png.c b/src/libpng/png.c
new file mode 100644
index 0000000..40dd854
--- /dev/null
+++ b/src/libpng/png.c
@@ -0,0 +1,798 @@
+
+/* png.c - location for general purpose libpng functions
+ *
+ * Last changed in libpng 1.2.21 October 4, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#define PNG_NO_EXTERN
+#include "png.h"
+
+/* Generate a compiler error if there is an old png.h in the search path. */
+typedef version_1_2_22 Your_png_h_is_not_version_1_2_22;
+
+/* Version information for C files.  This had better match the version
+ * string defined in png.h.  */
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+/* png_libpng_ver was changed to a function in version 1.0.5c */
+PNG_CONST char png_libpng_ver[18] = PNG_LIBPNG_VER_STRING;
+
+#ifdef PNG_READ_SUPPORTED
+
+/* png_sig was changed to a function in version 1.0.5c */
+/* Place to hold the signature string for a PNG file. */
+PNG_CONST png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+#endif /* PNG_READ_SUPPORTED */
+
+/* Invoke global declarations for constant strings for known chunk types */
+PNG_IHDR;
+PNG_IDAT;
+PNG_IEND;
+PNG_PLTE;
+PNG_bKGD;
+PNG_cHRM;
+PNG_gAMA;
+PNG_hIST;
+PNG_iCCP;
+PNG_iTXt;
+PNG_oFFs;
+PNG_pCAL;
+PNG_sCAL;
+PNG_pHYs;
+PNG_sBIT;
+PNG_sPLT;
+PNG_sRGB;
+PNG_tEXt;
+PNG_tIME;
+PNG_tRNS;
+PNG_zTXt;
+
+#ifdef PNG_READ_SUPPORTED
+/* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+/* start of interlace block */
+PNG_CONST int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+
+/* offset to next interlace block */
+PNG_CONST int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+
+/* start of interlace block in the y direction */
+PNG_CONST int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+
+/* offset to next interlace block in the y direction */
+PNG_CONST int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+
+/* Height of interlace block.  This is not currently used - if you need
+ * it, uncomment it here and in png.h
+PNG_CONST int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+*/
+
+/* Mask to determine which pixels are valid in a pass */
+PNG_CONST int FARDATA png_pass_mask[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
+
+/* Mask to determine which pixels to overwrite while displaying */
+PNG_CONST int FARDATA png_pass_dsp_mask[]
+   = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
+
+#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_USE_GLOBAL_ARRAYS */
+
+/* Tells libpng that we have already handled the first "num_bytes" bytes
+ * of the PNG file signature.  If the PNG data is embedded into another
+ * stream we can set num_bytes = 8 so that libpng will not attempt to read
+ * or write any of the magic bytes before it starts on the IHDR.
+ */
+
+#ifdef PNG_READ_SUPPORTED
+void PNGAPI
+png_set_sig_bytes(png_structp png_ptr, int num_bytes)
+{
+   if(png_ptr == NULL) return;
+   png_debug(1, "in png_set_sig_bytes\n");
+   if (num_bytes > 8)
+      png_error(png_ptr, "Too many bytes for PNG signature.");
+
+   png_ptr->sig_bytes = (png_byte)(num_bytes < 0 ? 0 : num_bytes);
+}
+
+/* Checks whether the supplied bytes match the PNG signature.  We allow
+ * checking less than the full 8-byte signature so that those apps that
+ * already read the first few bytes of a file to determine the file type
+ * can simply check the remaining bytes for extra assurance.  Returns
+ * an integer less than, equal to, or greater than zero if sig is found,
+ * respectively, to be less than, to match, or be greater than the correct
+ * PNG signature (this is the same behaviour as strcmp, memcmp, etc).
+ */
+int PNGAPI
+png_sig_cmp(png_bytep sig, png_size_t start, png_size_t num_to_check)
+{
+   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+   if (num_to_check > 8)
+      num_to_check = 8;
+   else if (num_to_check < 1)
+      return (-1);
+
+   if (start > 7)
+      return (-1);
+
+   if (start + num_to_check > 8)
+      num_to_check = 8 - start;
+
+   return ((int)(png_memcmp(&sig[start], &png_signature[start], num_to_check)));
+}
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* (Obsolete) function to check signature bytes.  It does not allow one
+ * to check a partial signature.  This function might be removed in the
+ * future - use png_sig_cmp().  Returns true (nonzero) if the file is PNG.
+ */
+int PNGAPI
+png_check_sig(png_bytep sig, int num)
+{
+  return ((int)!png_sig_cmp(sig, (png_size_t)0, (png_size_t)num));
+}
+#endif
+#endif /* PNG_READ_SUPPORTED */
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+/* Function to allocate memory for zlib and clear it to 0. */
+#ifdef PNG_1_0_X
+voidpf PNGAPI
+#else
+voidpf /* private */
+#endif
+png_zalloc(voidpf png_ptr, uInt items, uInt size)
+{
+   png_voidp ptr;
+   png_structp p=(png_structp)png_ptr;
+   png_uint_32 save_flags=p->flags;
+   png_uint_32 num_bytes;
+
+   if(png_ptr == NULL) return (NULL);
+   if (items > PNG_UINT_32_MAX/size)
+   {
+     png_warning (p, "Potential overflow in png_zalloc()");
+     return (NULL);
+   }
+   num_bytes = (png_uint_32)items * size;
+
+   p->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK;
+   ptr = (png_voidp)png_malloc((png_structp)png_ptr, num_bytes);
+   p->flags=save_flags;
+
+#if defined(PNG_1_0_X) && !defined(PNG_NO_ZALLOC_ZERO)
+   if (ptr == NULL)
+       return ((voidpf)ptr);
+
+   if (num_bytes > (png_uint_32)0x8000L)
+   {
+      png_memset(ptr, 0, (png_size_t)0x8000L);
+      png_memset((png_bytep)ptr + (png_size_t)0x8000L, 0,
+         (png_size_t)(num_bytes - (png_uint_32)0x8000L));
+   }
+   else
+   {
+      png_memset(ptr, 0, (png_size_t)num_bytes);
+   }
+#endif
+   return ((voidpf)ptr);
+}
+
+/* function to free memory for zlib */
+#ifdef PNG_1_0_X
+void PNGAPI
+#else
+void /* private */
+#endif
+png_zfree(voidpf png_ptr, voidpf ptr)
+{
+   png_free((png_structp)png_ptr, (png_voidp)ptr);
+}
+
+/* Reset the CRC variable to 32 bits of 1's.  Care must be taken
+ * in case CRC is > 32 bits to leave the top bits 0.
+ */
+void /* PRIVATE */
+png_reset_crc(png_structp png_ptr)
+{
+   png_ptr->crc = crc32(0, Z_NULL, 0);
+}
+
+/* Calculate the CRC over a section of data.  We can only pass as
+ * much data to this routine as the largest single buffer size.  We
+ * also check that this data will actually be used before going to the
+ * trouble of calculating it.
+ */
+void /* PRIVATE */
+png_calculate_crc(png_structp png_ptr, png_bytep ptr, png_size_t length)
+{
+   int need_crc = 1;
+
+   if (png_ptr->chunk_name[0] & 0x20)                     /* ancillary */
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
+          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
+         need_crc = 0;
+   }
+   else                                                    /* critical */
+   {
+      if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE)
+         need_crc = 0;
+   }
+
+   if (need_crc)
+      png_ptr->crc = crc32(png_ptr->crc, ptr, (uInt)length);
+}
+
+/* Allocate the memory for an info_struct for the application.  We don't
+ * really need the png_ptr, but it could potentially be useful in the
+ * future.  This should be used in favour of malloc(png_sizeof(png_info))
+ * and png_info_init() so that applications that want to use a shared
+ * libpng don't have to be recompiled if png_info changes size.
+ */
+png_infop PNGAPI
+png_create_info_struct(png_structp png_ptr)
+{
+   png_infop info_ptr;
+
+   png_debug(1, "in png_create_info_struct\n");
+   if(png_ptr == NULL) return (NULL);
+#ifdef PNG_USER_MEM_SUPPORTED
+   info_ptr = (png_infop)png_create_struct_2(PNG_STRUCT_INFO,
+      png_ptr->malloc_fn, png_ptr->mem_ptr);
+#else
+   info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO);
+#endif
+   if (info_ptr != NULL)
+      png_info_init_3(&info_ptr, png_sizeof(png_info));
+
+   return (info_ptr);
+}
+
+/* This function frees the memory associated with a single info struct.
+ * Normally, one would use either png_destroy_read_struct() or
+ * png_destroy_write_struct() to free an info struct, but this may be
+ * useful for some applications.
+ */
+void PNGAPI
+png_destroy_info_struct(png_structp png_ptr, png_infopp info_ptr_ptr)
+{
+   png_infop info_ptr = NULL;
+   if(png_ptr == NULL) return;
+
+   png_debug(1, "in png_destroy_info_struct\n");
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (info_ptr != NULL)
+   {
+      png_info_destroy(png_ptr, info_ptr);
+
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)info_ptr, png_ptr->free_fn,
+          png_ptr->mem_ptr);
+#else
+      png_destroy_struct((png_voidp)info_ptr);
+#endif
+      *info_ptr_ptr = NULL;
+   }
+}
+
+/* Initialize the info structure.  This is now an internal function (0.89)
+ * and applications using it are urged to use png_create_info_struct()
+ * instead.
+ */
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+#undef png_info_init
+void PNGAPI
+png_info_init(png_infop info_ptr)
+{
+   /* We only come here via pre-1.0.12-compiled applications */
+   png_info_init_3(&info_ptr, 0);
+}
+#endif
+
+void PNGAPI
+png_info_init_3(png_infopp ptr_ptr, png_size_t png_info_struct_size)
+{
+   png_infop info_ptr = *ptr_ptr;
+
+   if(info_ptr == NULL) return;
+
+   png_debug(1, "in png_info_init_3\n");
+
+   if(png_sizeof(png_info) > png_info_struct_size)
+     {
+       png_destroy_struct(info_ptr);
+       info_ptr = (png_infop)png_create_struct(PNG_STRUCT_INFO);
+       *ptr_ptr = info_ptr;
+     }
+
+   /* set everything to 0 */
+   png_memset(info_ptr, 0, png_sizeof (png_info));
+}
+
+#ifdef PNG_FREE_ME_SUPPORTED
+void PNGAPI
+png_data_freer(png_structp png_ptr, png_infop info_ptr,
+   int freer, png_uint_32 mask)
+{
+   png_debug(1, "in png_data_freer\n");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if(freer == PNG_DESTROY_WILL_FREE_DATA)
+      info_ptr->free_me |= mask;
+   else if(freer == PNG_USER_WILL_FREE_DATA)
+      info_ptr->free_me &= ~mask;
+   else
+      png_warning(png_ptr,
+         "Unknown freer parameter in png_data_freer.");
+}
+#endif
+
+void PNGAPI
+png_free_data(png_structp png_ptr, png_infop info_ptr, png_uint_32 mask,
+   int num)
+{
+   png_debug(1, "in png_free_data\n");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+#if defined(PNG_TEXT_SUPPORTED)
+/* free text item num or (if num == -1) all text items */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_TEXT) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_TEXT)
+#endif
+{
+   if (num != -1)
+   {
+     if (info_ptr->text && info_ptr->text[num].key)
+     {
+         png_free(png_ptr, info_ptr->text[num].key);
+         info_ptr->text[num].key = NULL;
+     }
+   }
+   else
+   {
+       int i;
+       for (i = 0; i < info_ptr->num_text; i++)
+           png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, i);
+       png_free(png_ptr, info_ptr->text);
+       info_ptr->text = NULL;
+       info_ptr->num_text=0;
+   }
+}
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+/* free any tRNS entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_TRNS) & info_ptr->free_me)
+#else
+if ((mask & PNG_FREE_TRNS) && (png_ptr->flags & PNG_FLAG_FREE_TRNS))
+#endif
+{
+    png_free(png_ptr, info_ptr->trans);
+    info_ptr->valid &= ~PNG_INFO_tRNS;
+#ifndef PNG_FREE_ME_SUPPORTED
+    png_ptr->flags &= ~PNG_FLAG_FREE_TRNS;
+#endif
+    info_ptr->trans = NULL;
+}
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+/* free any sCAL entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_SCAL) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_SCAL)
+#endif
+{
+#if defined(PNG_FIXED_POINT_SUPPORTED) && !defined(PNG_FLOATING_POINT_SUPPORTED)
+    png_free(png_ptr, info_ptr->scal_s_width);
+    png_free(png_ptr, info_ptr->scal_s_height);
+    info_ptr->scal_s_width = NULL;
+    info_ptr->scal_s_height = NULL;
+#endif
+    info_ptr->valid &= ~PNG_INFO_sCAL;
+}
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+/* free any pCAL entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_PCAL) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_PCAL)
+#endif
+{
+    png_free(png_ptr, info_ptr->pcal_purpose);
+    png_free(png_ptr, info_ptr->pcal_units);
+    info_ptr->pcal_purpose = NULL;
+    info_ptr->pcal_units = NULL;
+    if (info_ptr->pcal_params != NULL)
+    {
+        int i;
+        for (i = 0; i < (int)info_ptr->pcal_nparams; i++)
+        {
+          png_free(png_ptr, info_ptr->pcal_params[i]);
+          info_ptr->pcal_params[i]=NULL;
+        }
+        png_free(png_ptr, info_ptr->pcal_params);
+        info_ptr->pcal_params = NULL;
+    }
+    info_ptr->valid &= ~PNG_INFO_pCAL;
+}
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+/* free any iCCP entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_ICCP) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_ICCP)
+#endif
+{
+    png_free(png_ptr, info_ptr->iccp_name);
+    png_free(png_ptr, info_ptr->iccp_profile);
+    info_ptr->iccp_name = NULL;
+    info_ptr->iccp_profile = NULL;
+    info_ptr->valid &= ~PNG_INFO_iCCP;
+}
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+/* free a given sPLT entry, or (if num == -1) all sPLT entries */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_SPLT) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_SPLT)
+#endif
+{
+   if (num != -1)
+   {
+      if(info_ptr->splt_palettes)
+      {
+          png_free(png_ptr, info_ptr->splt_palettes[num].name);
+          png_free(png_ptr, info_ptr->splt_palettes[num].entries);
+          info_ptr->splt_palettes[num].name = NULL;
+          info_ptr->splt_palettes[num].entries = NULL;
+      }
+   }
+   else
+   {
+       if(info_ptr->splt_palettes_num)
+       {
+         int i;
+         for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+            png_free_data(png_ptr, info_ptr, PNG_FREE_SPLT, i);
+
+         png_free(png_ptr, info_ptr->splt_palettes);
+         info_ptr->splt_palettes = NULL;
+         info_ptr->splt_palettes_num = 0;
+       }
+       info_ptr->valid &= ~PNG_INFO_sPLT;
+   }
+}
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+  if(png_ptr->unknown_chunk.data)
+  {
+    png_free(png_ptr, png_ptr->unknown_chunk.data);
+    png_ptr->unknown_chunk.data = NULL;
+  }
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_UNKN) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_UNKN)
+#endif
+{
+   if (num != -1)
+   {
+       if(info_ptr->unknown_chunks)
+       {
+          png_free(png_ptr, info_ptr->unknown_chunks[num].data);
+          info_ptr->unknown_chunks[num].data = NULL;
+       }
+   }
+   else
+   {
+       int i;
+
+       if(info_ptr->unknown_chunks_num)
+       {
+         for (i = 0; i < (int)info_ptr->unknown_chunks_num; i++)
+            png_free_data(png_ptr, info_ptr, PNG_FREE_UNKN, i);
+
+         png_free(png_ptr, info_ptr->unknown_chunks);
+         info_ptr->unknown_chunks = NULL;
+         info_ptr->unknown_chunks_num = 0;
+       }
+   }
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+/* free any hIST entry */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_HIST)  & info_ptr->free_me)
+#else
+if ((mask & PNG_FREE_HIST) && (png_ptr->flags & PNG_FLAG_FREE_HIST))
+#endif
+{
+    png_free(png_ptr, info_ptr->hist);
+    info_ptr->hist = NULL;
+    info_ptr->valid &= ~PNG_INFO_hIST;
+#ifndef PNG_FREE_ME_SUPPORTED
+    png_ptr->flags &= ~PNG_FLAG_FREE_HIST;
+#endif
+}
+#endif
+
+/* free any PLTE entry that was internally allocated */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_PLTE) & info_ptr->free_me)
+#else
+if ((mask & PNG_FREE_PLTE) && (png_ptr->flags & PNG_FLAG_FREE_PLTE))
+#endif
+{
+    png_zfree(png_ptr, info_ptr->palette);
+    info_ptr->palette = NULL;
+    info_ptr->valid &= ~PNG_INFO_PLTE;
+#ifndef PNG_FREE_ME_SUPPORTED
+    png_ptr->flags &= ~PNG_FLAG_FREE_PLTE;
+#endif
+    info_ptr->num_palette = 0;
+}
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+/* free any image bits attached to the info structure */
+#ifdef PNG_FREE_ME_SUPPORTED
+if ((mask & PNG_FREE_ROWS) & info_ptr->free_me)
+#else
+if (mask & PNG_FREE_ROWS)
+#endif
+{
+    if(info_ptr->row_pointers)
+    {
+       int row;
+       for (row = 0; row < (int)info_ptr->height; row++)
+       {
+          png_free(png_ptr, info_ptr->row_pointers[row]);
+          info_ptr->row_pointers[row]=NULL;
+       }
+       png_free(png_ptr, info_ptr->row_pointers);
+       info_ptr->row_pointers=NULL;
+    }
+    info_ptr->valid &= ~PNG_INFO_IDAT;
+}
+#endif
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   if(num == -1)
+     info_ptr->free_me &= ~mask;
+   else
+     info_ptr->free_me &= ~(mask & ~PNG_FREE_MUL);
+#endif
+}
+
+/* This is an internal routine to free any memory that the info struct is
+ * pointing to before re-using it or freeing the struct itself.  Recall
+ * that png_free() checks for NULL pointers for us.
+ */
+void /* PRIVATE */
+png_info_destroy(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_info_destroy\n");
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   if (png_ptr->num_chunk_list)
+   {
+       png_free(png_ptr, png_ptr->chunk_list);
+       png_ptr->chunk_list=NULL;
+       png_ptr->num_chunk_list=0;
+   }
+#endif
+
+   png_info_init_3(&info_ptr, png_sizeof(png_info));
+}
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
+
+/* This function returns a pointer to the io_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy() or png_read_destroy() are called.
+ */
+png_voidp PNGAPI
+png_get_io_ptr(png_structp png_ptr)
+{
+   if(png_ptr == NULL) return (NULL);
+   return (png_ptr->io_ptr);
+}
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#if !defined(PNG_NO_STDIO)
+/* Initialize the default input/output functions for the PNG file.  If you
+ * use your own read or write routines, you can call either png_set_read_fn()
+ * or png_set_write_fn() instead of png_init_io().  If you have defined
+ * PNG_NO_STDIO, you must use a function of your own because "FILE *" isn't
+ * necessarily available.
+ */
+void PNGAPI
+png_init_io(png_structp png_ptr, png_FILE_p fp)
+{
+   png_debug(1, "in png_init_io\n");
+   if(png_ptr == NULL) return;
+   png_ptr->io_ptr = (png_voidp)fp;
+}
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+/* Convert the supplied time into an RFC 1123 string suitable for use in
+ * a "Creation Time" or other text-based time string.
+ */
+png_charp PNGAPI
+png_convert_to_rfc1123(png_structp png_ptr, png_timep ptime)
+{
+   static PNG_CONST char short_months[12][4] =
+        {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+
+   if(png_ptr == NULL) return (NULL);
+   if (png_ptr->time_buffer == NULL)
+   {
+      png_ptr->time_buffer = (png_charp)png_malloc(png_ptr, (png_uint_32)(29*
+         png_sizeof(char)));
+   }
+
+#if defined(_WIN32_WCE)
+   {
+      wchar_t time_buf[29];
+      wsprintf(time_buf, TEXT("%d %S %d %02d:%02d:%02d +0000"),
+          ptime->day % 32, short_months[(ptime->month - 1) % 12],
+        ptime->year, ptime->hour % 24, ptime->minute % 60,
+          ptime->second % 61);
+      WideCharToMultiByte(CP_ACP, 0, time_buf, -1, png_ptr->time_buffer, 29,
+          NULL, NULL);
+   }
+#else
+#ifdef USE_FAR_KEYWORD
+   {
+      char near_time_buf[29];
+      png_snprintf6(near_time_buf,29,"%d %s %d %02d:%02d:%02d +0000",
+          ptime->day % 32, short_months[(ptime->month - 1) % 12],
+          ptime->year, ptime->hour % 24, ptime->minute % 60,
+          ptime->second % 61);
+      png_memcpy(png_ptr->time_buffer, near_time_buf,
+          29*png_sizeof(char));
+   }
+#else
+   png_snprintf6(png_ptr->time_buffer,29,"%d %s %d %02d:%02d:%02d +0000",
+       ptime->day % 32, short_months[(ptime->month - 1) % 12],
+       ptime->year, ptime->hour % 24, ptime->minute % 60,
+       ptime->second % 61);
+#endif
+#endif /* _WIN32_WCE */
+   return ((png_charp)png_ptr->time_buffer);
+}
+#endif /* PNG_TIME_RFC1123_SUPPORTED */
+
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
+
+png_charp PNGAPI
+png_get_copyright(png_structp png_ptr)
+{
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) "\n libpng version 1.2.22 - October 13, 2007\n\
+   Copyright (c) 1998-2007 Glenn Randers-Pehrson\n\
+   Copyright (c) 1996-1997 Andreas Dilger\n\
+   Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.\n");
+}
+
+/* The following return the library version as a short string in the
+ * format 1.0.0 through 99.99.99zz.  To get the version of *.h files
+ * used with your application, print out PNG_LIBPNG_VER_STRING, which
+ * is defined in png.h.
+ * Note: now there is no difference between png_get_libpng_ver() and
+ * png_get_header_ver().  Due to the version_nn_nn_nn typedef guard,
+ * it is guaranteed that png.c uses the correct version of png.h.
+ */
+png_charp PNGAPI
+png_get_libpng_ver(png_structp png_ptr)
+{
+   /* Version of *.c files used when building libpng */
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_LIBPNG_VER_STRING);
+}
+
+png_charp PNGAPI
+png_get_header_ver(png_structp png_ptr)
+{
+   /* Version of *.h files used when building libpng */
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_LIBPNG_VER_STRING);
+}
+
+png_charp PNGAPI
+png_get_header_version(png_structp png_ptr)
+{
+   /* Returns longer string containing both version and date */
+   png_ptr = png_ptr;  /* silence compiler warning about unused png_ptr */
+   return ((png_charp) PNG_HEADER_VERSION_STRING
+#ifndef PNG_READ_SUPPORTED
+   "     (NO READ SUPPORT)"
+#endif
+   "\n");
+}
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+int PNGAPI
+png_handle_as_unknown(png_structp png_ptr, png_bytep chunk_name)
+{
+   /* check chunk_name and return "keep" value if it's on the list, else 0 */
+   int i;
+   png_bytep p;
+   if(png_ptr == NULL || chunk_name == NULL || png_ptr->num_chunk_list<=0)
+      return 0;
+   p=png_ptr->chunk_list+png_ptr->num_chunk_list*5-5;
+   for (i = png_ptr->num_chunk_list; i; i--, p-=5)
+      if (!png_memcmp(chunk_name, p, 4))
+        return ((int)*(p+4));
+   return 0;
+}
+#endif
+
+/* This function, added to libpng-1.0.6g, is untested. */
+int PNGAPI
+png_reset_zstream(png_structp png_ptr)
+{
+   if (png_ptr == NULL) return Z_STREAM_ERROR;
+   return (inflateReset(&png_ptr->zstream));
+}
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
+
+/* This function was added to libpng-1.0.7 */
+png_uint_32 PNGAPI
+png_access_version_number(void)
+{
+   /* Version of *.c files used when building libpng */
+   return((png_uint_32) PNG_LIBPNG_VER);
+}
+
+
+#if defined(PNG_READ_SUPPORTED) && defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#if !defined(PNG_1_0_X)
+/* this function was added to libpng 1.2.0 */
+int PNGAPI
+png_mmx_support(void)
+{
+   /* obsolete, to be removed from libpng-1.4.0 */
+    return -1;
+}
+#endif /* PNG_1_0_X */
+#endif /* PNG_READ_SUPPORTED && PNG_ASSEMBLER_CODE_SUPPORTED */
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#ifdef PNG_SIZE_T
+/* Added at libpng version 1.2.6 */
+   PNG_EXTERN png_size_t PNGAPI png_convert_size PNGARG((size_t size));
+png_size_t PNGAPI
+png_convert_size(size_t size)
+{
+  if (size > (png_size_t)-1)
+     PNG_ABORT();  /* We haven't got access to png_ptr, so no png_error() */
+  return ((png_size_t)size);
+}
+#endif /* PNG_SIZE_T */
+#endif /* defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED) */
diff --git a/src/libpng/png.h b/src/libpng/png.h
new file mode 100644
index 0000000..81af541
--- /dev/null
+++ b/src/libpng/png.h
@@ -0,0 +1,3540 @@
+
+/* png.h - header file for PNG reference library
+ *
+ * libpng version 1.2.22 - October 13, 2007
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * Authors and maintainers:
+ *  libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
+ *  libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger
+ *  libpng versions 0.97, January 1998, through 1.2.22 - October 13, 2007: Glenn
+ *  See also "Contributing Authors", below.
+ *
+ * Note about libpng version numbers:
+ *
+ *    Due to various miscommunications, unforeseen code incompatibilities
+ *    and occasional factors outside the authors' control, version numbering
+ *    on the library has not always been consistent and straightforward.
+ *    The following table summarizes matters since version 0.89c, which was
+ *    the first widely used release:
+ *
+ *    source                 png.h  png.h  shared-lib
+ *    version                string   int  version
+ *    -------                ------ -----  ----------
+ *    0.89c "1.0 beta 3"     0.89      89  1.0.89
+ *    0.90  "1.0 beta 4"     0.90      90  0.90  [should have been 2.0.90]
+ *    0.95  "1.0 beta 5"     0.95      95  0.95  [should have been 2.0.95]
+ *    0.96  "1.0 beta 6"     0.96      96  0.96  [should have been 2.0.96]
+ *    0.97b "1.00.97 beta 7" 1.00.97   97  1.0.1 [should have been 2.0.97]
+ *    0.97c                  0.97      97  2.0.97
+ *    0.98                   0.98      98  2.0.98
+ *    0.99                   0.99      98  2.0.99
+ *    0.99a-m                0.99      99  2.0.99
+ *    1.00                   1.00     100  2.1.0 [100 should be 10000]
+ *    1.0.0      (from here on, the   100  2.1.0 [100 should be 10000]
+ *    1.0.1       png.h string is   10001  2.1.0
+ *    1.0.1a-e    identical to the  10002  from here on, the shared library
+ *    1.0.2       source version)   10002  is 2.V where V is the source code
+ *    1.0.2a-b                      10003  version, except as noted.
+ *    1.0.3                         10003
+ *    1.0.3a-d                      10004
+ *    1.0.4                         10004
+ *    1.0.4a-f                      10005
+ *    1.0.5 (+ 2 patches)           10005
+ *    1.0.5a-d                      10006
+ *    1.0.5e-r                      10100 (not source compatible)
+ *    1.0.5s-v                      10006 (not binary compatible)
+ *    1.0.6 (+ 3 patches)           10006 (still binary incompatible)
+ *    1.0.6d-f                      10007 (still binary incompatible)
+ *    1.0.6g                        10007
+ *    1.0.6h                        10007  10.6h (testing xy.z so-numbering)
+ *    1.0.6i                        10007  10.6i
+ *    1.0.6j                        10007  2.1.0.6j (incompatible with 1.0.0)
+ *    1.0.7beta11-14        DLLNUM  10007  2.1.0.7beta11-14 (binary compatible)
+ *    1.0.7beta15-18           1    10007  2.1.0.7beta15-18 (binary compatible)
+ *    1.0.7rc1-2               1    10007  2.1.0.7rc1-2 (binary compatible)
+ *    1.0.7                    1    10007  (still compatible)
+ *    1.0.8beta1-4             1    10008  2.1.0.8beta1-4
+ *    1.0.8rc1                 1    10008  2.1.0.8rc1
+ *    1.0.8                    1    10008  2.1.0.8
+ *    1.0.9beta1-6             1    10009  2.1.0.9beta1-6
+ *    1.0.9rc1                 1    10009  2.1.0.9rc1
+ *    1.0.9beta7-10            1    10009  2.1.0.9beta7-10
+ *    1.0.9rc2                 1    10009  2.1.0.9rc2
+ *    1.0.9                    1    10009  2.1.0.9
+ *    1.0.10beta1              1    10010  2.1.0.10beta1
+ *    1.0.10rc1                1    10010  2.1.0.10rc1
+ *    1.0.10                   1    10010  2.1.0.10
+ *    1.0.11beta1-3            1    10011  2.1.0.11beta1-3
+ *    1.0.11rc1                1    10011  2.1.0.11rc1
+ *    1.0.11                   1    10011  2.1.0.11
+ *    1.0.12beta1-2            2    10012  2.1.0.12beta1-2
+ *    1.0.12rc1                2    10012  2.1.0.12rc1
+ *    1.0.12                   2    10012  2.1.0.12
+ *    1.1.0a-f                 -    10100  2.1.1.0a-f (branch abandoned)
+ *    1.2.0beta1-2             2    10200  2.1.2.0beta1-2
+ *    1.2.0beta3-5             3    10200  3.1.2.0beta3-5
+ *    1.2.0rc1                 3    10200  3.1.2.0rc1
+ *    1.2.0                    3    10200  3.1.2.0
+ *    1.2.1beta1-4             3    10201  3.1.2.1beta1-4
+ *    1.2.1rc1-2               3    10201  3.1.2.1rc1-2
+ *    1.2.1                    3    10201  3.1.2.1
+ *    1.2.2beta1-6            12    10202  12.so.0.1.2.2beta1-6
+ *    1.0.13beta1             10    10013  10.so.0.1.0.13beta1
+ *    1.0.13rc1               10    10013  10.so.0.1.0.13rc1
+ *    1.2.2rc1                12    10202  12.so.0.1.2.2rc1
+ *    1.0.13                  10    10013  10.so.0.1.0.13
+ *    1.2.2                   12    10202  12.so.0.1.2.2
+ *    1.2.3rc1-6              12    10203  12.so.0.1.2.3rc1-6
+ *    1.2.3                   12    10203  12.so.0.1.2.3
+ *    1.2.4beta1-3            13    10204  12.so.0.1.2.4beta1-3
+ *    1.0.14rc1               13    10014  10.so.0.1.0.14rc1
+ *    1.2.4rc1                13    10204  12.so.0.1.2.4rc1
+ *    1.0.14                  10    10014  10.so.0.1.0.14
+ *    1.2.4                   13    10204  12.so.0.1.2.4
+ *    1.2.5beta1-2            13    10205  12.so.0.1.2.5beta1-2
+ *    1.0.15rc1-3             10    10015  10.so.0.1.0.15rc1-3
+ *    1.2.5rc1-3              13    10205  12.so.0.1.2.5rc1-3
+ *    1.0.15                  10    10015  10.so.0.1.0.15
+ *    1.2.5                   13    10205  12.so.0.1.2.5
+ *    1.2.6beta1-4            13    10206  12.so.0.1.2.6beta1-4
+ *    1.0.16                  10    10016  10.so.0.1.0.16
+ *    1.2.6                   13    10206  12.so.0.1.2.6
+ *    1.2.7beta1-2            13    10207  12.so.0.1.2.7beta1-2
+ *    1.0.17rc1               10    10017  10.so.0.1.0.17rc1
+ *    1.2.7rc1                13    10207  12.so.0.1.2.7rc1
+ *    1.0.17                  10    10017  10.so.0.1.0.17
+ *    1.2.7                   13    10207  12.so.0.1.2.7
+ *    1.2.8beta1-5            13    10208  12.so.0.1.2.8beta1-5
+ *    1.0.18rc1-5             10    10018  10.so.0.1.0.18rc1-5
+ *    1.2.8rc1-5              13    10208  12.so.0.1.2.8rc1-5
+ *    1.0.18                  10    10018  10.so.0.1.0.18
+ *    1.2.8                   13    10208  12.so.0.1.2.8
+ *    1.2.9beta1-3            13    10209  12.so.0.1.2.9beta1-3
+ *    1.2.9beta4-11           13    10209  12.so.0.9[.0]
+ *    1.2.9rc1                13    10209  12.so.0.9[.0]
+ *    1.2.9                   13    10209  12.so.0.9[.0]
+ *    1.2.10beta1-8           13    10210  12.so.0.10[.0]
+ *    1.2.10rc1-3             13    10210  12.so.0.10[.0]
+ *    1.2.10                  13    10210  12.so.0.10[.0]
+ *    1.2.11beta1-4           13    10211  12.so.0.11[.0]
+ *    1.0.19rc1-5             10    10019  10.so.0.19[.0]
+ *    1.2.11rc1-5             13    10211  12.so.0.11[.0]
+ *    1.0.19                  10    10019  10.so.0.19[.0]
+ *    1.2.11                  13    10211  12.so.0.11[.0]
+ *    1.0.20                  10    10020  10.so.0.20[.0]
+ *    1.2.12                  13    10212  12.so.0.12[.0]
+ *    1.2.13beta1             13    10213  12.so.0.13[.0]
+ *    1.0.21                  10    10021  10.so.0.21[.0]
+ *    1.2.13                  13    10213  12.so.0.13[.0]
+ *    1.2.14beta1-2           13    10214  12.so.0.14[.0]
+ *    1.0.22rc1               10    10022  10.so.0.22[.0]
+ *    1.2.14rc1               13    10214  12.so.0.14[.0]
+ *    1.0.22                  10    10022  10.so.0.22[.0]
+ *    1.2.14                  13    10214  12.so.0.14[.0]
+ *    1.2.15beta1-6           13    10215  12.so.0.15[.0]
+ *    1.0.23rc1-5             10    10023  10.so.0.23[.0]
+ *    1.2.15rc1-5             13    10215  12.so.0.15[.0]
+ *    1.0.23                  10    10023  10.so.0.23[.0]
+ *    1.2.15                  13    10215  12.so.0.15[.0]
+ *    1.2.16beta1-2           13    10216  12.so.0.16[.0]
+ *    1.2.16rc1               13    10216  12.so.0.16[.0]
+ *    1.0.24                  10    10024  10.so.0.24[.0]
+ *    1.2.16                  13    10216  12.so.0.16[.0]
+ *    1.2.17beta1-2           13    10217  12.so.0.17[.0]
+ *    1.0.25rc1               10    10025  10.so.0.25[.0]
+ *    1.2.17rc1-3             13    10217  12.so.0.17[.0]
+ *    1.0.25                  10    10025  10.so.0.25[.0]
+ *    1.2.17                  13    10217  12.so.0.17[.0]
+ *    1.0.26                  10    10026  10.so.0.26[.0]
+ *    1.2.18                  13    10218  12.so.0.18[.0]
+ *    1.2.19beta1-31          13    10219  12.so.0.19[.0]
+ *    1.0.27rc1-6             10    10027  10.so.0.27[.0]
+ *    1.2.19rc1-6             13    10219  12.so.0.19[.0]
+ *    1.0.27                  10    10027  10.so.0.27[.0]
+ *    1.2.19                  13    10219  12.so.0.19[.0]
+ *    1.2.20beta01-04         13    10220  12.so.0.20[.0]
+ *    1.0.28rc1-6             10    10028  10.so.0.28[.0]
+ *    1.2.20rc1-6             13    10220  12.so.0.20[.0]
+ *    1.0.28                  10    10028  10.so.0.28[.0]
+ *    1.2.20                  13    10220  12.so.0.20[.0]
+ *    1.2.21beta1-2           13    10221  12.so.0.21[.0]
+ *    1.2.21rc1-3             13    10221  12.so.0.21[.0]
+ *    1.0.29                  10    10029  10.so.0.29[.0]
+ *    1.2.21                  13    10221  12.so.0.21[.0]
+ *    1.2.22beta1-4           13    10222  12.so.0.22[.0]
+ *    1.0.30rc1               10    10030  10.so.0.30[.0]
+ *    1.2.22rc1               13    10222  12.so.0.22[.0]
+ *    1.0.30                  10    10030  10.so.0.30[.0]
+ *    1.2.22                  13    10222  12.so.0.22[.0]
+ *
+ *    Henceforth the source version will match the shared-library major
+ *    and minor numbers; the shared-library major version number will be
+ *    used for changes in backward compatibility, as it is intended.  The
+ *    PNG_LIBPNG_VER macro, which is not used within libpng but is available
+ *    for applications, is an unsigned integer of the form xyyzz corresponding
+ *    to the source version x.y.z (leading zeros in y and z).  Beta versions
+ *    were given the previous public release number plus a letter, until
+ *    version 1.0.6j; from then on they were given the upcoming public
+ *    release number plus "betaNN" or "rcN".
+ *
+ *    Binary incompatibility exists only when applications make direct access
+ *    to the info_ptr or png_ptr members through png.h, and the compiled
+ *    application is loaded with a different version of the library.
+ *
+ *    DLLNUM will change each time there are forward or backward changes
+ *    in binary compatibility (e.g., when a new feature is added).
+ *
+ * See libpng.txt or libpng.3 for more information.  The PNG specification
+ * is available as a W3C Recommendation and as an ISO Specification,
+ * <http://www.w3.org/TR/2003/REC-PNG-20031110/
+ */
+
+/*
+ * COPYRIGHT NOTICE, DISCLAIMER, and LICENSE:
+ *
+ * If you modify libpng you may insert additional notices immediately following
+ * this sentence.
+ *
+ * libpng versions 1.2.6, August 15, 2004, through 1.2.22, October 13, 2007, are
+ * Copyright (c) 2004, 2006-2007 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-1.2.5
+ * with the following individual added to the list of Contributing Authors:
+ *
+ *    Cosmin Truta
+ *
+ * libpng versions 1.0.7, July 1, 2000, through 1.2.5, October 3, 2002, are
+ * Copyright (c) 2000-2002 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-1.0.6
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    Simon-Pierre Cadieux
+ *    Eric S. Raymond
+ *    Gilles Vollant
+ *
+ * and with the following additions to the disclaimer:
+ *
+ *    There is no warranty against interference with your enjoyment of the
+ *    library or against infringement.  There is no warranty that our
+ *    efforts or the library will fulfill any of your particular purposes
+ *    or needs.  This library is provided with all faults, and the entire
+ *    risk of satisfactory quality, performance, accuracy, and effort is with
+ *    the user.
+ *
+ * libpng versions 0.97, January 1998, through 1.0.6, March 20, 2000, are
+ * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson, and are
+ * distributed according to the same disclaimer and license as libpng-0.96,
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    Tom Lane
+ *    Glenn Randers-Pehrson
+ *    Willem van Schaik
+ *
+ * libpng versions 0.89, June 1996, through 0.96, May 1997, are
+ * Copyright (c) 1996, 1997 Andreas Dilger
+ * Distributed according to the same disclaimer and license as libpng-0.88,
+ * with the following individuals added to the list of Contributing Authors:
+ *
+ *    John Bowler
+ *    Kevin Bracey
+ *    Sam Bushell
+ *    Magnus Holmgren
+ *    Greg Roelofs
+ *    Tom Tanner
+ *
+ * libpng versions 0.5, May 1995, through 0.88, January 1996, are
+ * Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.
+ *
+ * For the purposes of this copyright and license, "Contributing Authors"
+ * is defined as the following set of individuals:
+ *
+ *    Andreas Dilger
+ *    Dave Martindale
+ *    Guy Eric Schalnat
+ *    Paul Schmidt
+ *    Tim Wegner
+ *
+ * The PNG Reference Library is supplied "AS IS".  The Contributing Authors
+ * and Group 42, Inc. disclaim all warranties, expressed or implied,
+ * including, without limitation, the warranties of merchantability and of
+ * fitness for any purpose.  The Contributing Authors and Group 42, Inc.
+ * assume no liability for direct, indirect, incidental, special, exemplary,
+ * or consequential damages, which may result from the use of the PNG
+ * Reference Library, even if advised of the possibility of such damage.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * source code, or portions hereof, for any purpose, without fee, subject
+ * to the following restrictions:
+ *
+ * 1. The origin of this source code must not be misrepresented.
+ *
+ * 2. Altered versions must be plainly marked as such and
+ * must not be misrepresented as being the original source.
+ *
+ * 3. This Copyright notice may not be removed or altered from
+ *    any source or altered source distribution.
+ *
+ * The Contributing Authors and Group 42, Inc. specifically permit, without
+ * fee, and encourage the use of this source code as a component to
+ * supporting the PNG file format in commercial products.  If you use this
+ * source code in a product, acknowledgment is not required but would be
+ * appreciated.
+ */
+
+/*
+ * A "png_get_copyright" function is available, for convenient use in "about"
+ * boxes and the like:
+ *
+ * printf("%s",png_get_copyright(NULL));
+ *
+ * Also, the PNG logo (in PNG format, of course) is supplied in the
+ * files "pngbar.png" and "pngbar.jpg (88x31) and "pngnow.png" (98x31).
+ */
+
+/*
+ * Libpng is OSI Certified Open Source Software.  OSI Certified is a
+ * certification mark of the Open Source Initiative.
+ */
+
+/*
+ * The contributing authors would like to thank all those who helped
+ * with testing, bug fixes, and patience.  This wouldn't have been
+ * possible without all of you.
+ *
+ * Thanks to Frank J. T. Wojcik for helping with the documentation.
+ */
+
+/*
+ * Y2K compliance in libpng:
+ * =========================
+ *
+ *    October 13, 2007
+ *
+ *    Since the PNG Development group is an ad-hoc body, we can't make
+ *    an official declaration.
+ *
+ *    This is your unofficial assurance that libpng from version 0.71 and
+ *    upward through 1.2.22 are Y2K compliant.  It is my belief that earlier
+ *    versions were also Y2K compliant.
+ *
+ *    Libpng only has three year fields.  One is a 2-byte unsigned integer
+ *    that will hold years up to 65535.  The other two hold the date in text
+ *    format, and will hold years up to 9999.
+ *
+ *    The integer is
+ *        "png_uint_16 year" in png_time_struct.
+ *
+ *    The strings are
+ *        "png_charp time_buffer" in png_struct and
+ *        "near_time_buffer", which is a local character string in png.c.
+ *
+ *    There are seven time-related functions:
+ *        png.c: png_convert_to_rfc_1123() in png.c
+ *          (formerly png_convert_to_rfc_1152() in error)
+ *        png_convert_from_struct_tm() in pngwrite.c, called in pngwrite.c
+ *        png_convert_from_time_t() in pngwrite.c
+ *        png_get_tIME() in pngget.c
+ *        png_handle_tIME() in pngrutil.c, called in pngread.c
+ *        png_set_tIME() in pngset.c
+ *        png_write_tIME() in pngwutil.c, called in pngwrite.c
+ *
+ *    All handle dates properly in a Y2K environment.  The
+ *    png_convert_from_time_t() function calls gmtime() to convert from system
+ *    clock time, which returns (year - 1900), which we properly convert to
+ *    the full 4-digit year.  There is a possibility that applications using
+ *    libpng are not passing 4-digit years into the png_convert_to_rfc_1123()
+ *    function, or that they are incorrectly passing only a 2-digit year
+ *    instead of "year - 1900" into the png_convert_from_struct_tm() function,
+ *    but this is not under our control.  The libpng documentation has always
+ *    stated that it works with 4-digit years, and the APIs have been
+ *    documented as such.
+ *
+ *    The tIME chunk itself is also Y2K compliant.  It uses a 2-byte unsigned
+ *    integer to hold the year, and can hold years as large as 65535.
+ *
+ *    zlib, upon which libpng depends, is also Y2K compliant.  It contains
+ *    no date-related code.
+ *
+ *       Glenn Randers-Pehrson
+ *       libpng maintainer
+ *       PNG Development Group
+ */
+
+#ifndef PNG_H
+#define PNG_H
+
+/* This is not the place to learn how to use libpng.  The file libpng.txt
+ * describes how to use libpng, and the file example.c summarizes it
+ * with some code on which to build.  This file is useful for looking
+ * at the actual function definitions and structure components.
+ */
+
+/* Version information for png.h - this should match the version in png.c */
+#define PNG_LIBPNG_VER_STRING "1.2.22"
+#define PNG_HEADER_VERSION_STRING \
+   " libpng version 1.2.22 - October 13, 2007\n"
+
+#define PNG_LIBPNG_VER_SONUM   0
+#define PNG_LIBPNG_VER_DLLNUM  13
+
+/* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */
+#define PNG_LIBPNG_VER_MAJOR   1
+#define PNG_LIBPNG_VER_MINOR   2
+#define PNG_LIBPNG_VER_RELEASE 22
+/* This should match the numeric part of the final component of
+ * PNG_LIBPNG_VER_STRING, omitting any leading zero: */
+
+#define PNG_LIBPNG_VER_BUILD  0
+
+/* Release Status */
+#define PNG_LIBPNG_BUILD_ALPHA    1
+#define PNG_LIBPNG_BUILD_BETA     2
+#define PNG_LIBPNG_BUILD_RC       3
+#define PNG_LIBPNG_BUILD_STABLE   4
+#define PNG_LIBPNG_BUILD_RELEASE_STATUS_MASK 7
+  
+/* Release-Specific Flags */
+#define PNG_LIBPNG_BUILD_PATCH    8 /* Can be OR'ed with
+                                       PNG_LIBPNG_BUILD_STABLE only */
+#define PNG_LIBPNG_BUILD_PRIVATE 16 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_SPECIAL */
+#define PNG_LIBPNG_BUILD_SPECIAL 32 /* Cannot be OR'ed with
+                                       PNG_LIBPNG_BUILD_PRIVATE */
+
+#define PNG_LIBPNG_BUILD_BASE_TYPE PNG_LIBPNG_BUILD_STABLE
+
+/* Careful here.  At one time, Guy wanted to use 082, but that would be octal.
+ * We must not include leading zeros.
+ * Versions 0.7 through 1.0.0 were in the range 0 to 100 here (only
+ * version 1.0.0 was mis-numbered 100 instead of 10000).  From
+ * version 1.0.1 it's    xxyyzz, where x=major, y=minor, z=release */
+#define PNG_LIBPNG_VER 10222 /* 1.2.22 */
+
+#ifndef PNG_VERSION_INFO_ONLY
+/* include the compression library's header */
+#include "zlib.h"
+#endif
+
+/* include all user configurable info, including optional assembler routines */
+#include "pngconf.h"
+
+/*
+ * Added at libpng-1.2.8 */
+/* Ref MSDN: Private as priority over Special
+ * VS_FF_PRIVATEBUILD File *was not* built using standard release
+ * procedures. If this value is given, the StringFileInfo block must
+ * contain a PrivateBuild string. 
+ *
+ * VS_FF_SPECIALBUILD File *was* built by the original company using
+ * standard release procedures but is a variation of the standard
+ * file of the same version number. If this value is given, the
+ * StringFileInfo block must contain a SpecialBuild string. 
+ */
+
+#if defined(PNG_USER_PRIVATEBUILD)
+#  define PNG_LIBPNG_BUILD_TYPE \
+          (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_PRIVATE)
+#else
+#  if defined(PNG_LIBPNG_SPECIALBUILD)
+#    define PNG_LIBPNG_BUILD_TYPE \
+            (PNG_LIBPNG_BUILD_BASE_TYPE | PNG_LIBPNG_BUILD_SPECIAL)
+#  else
+#    define PNG_LIBPNG_BUILD_TYPE (PNG_LIBPNG_BUILD_BASE_TYPE)
+#  endif
+#endif
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* Inhibit C++ name-mangling for libpng functions but not for system calls. */
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* This file is arranged in several sections.  The first section contains
+ * structure and type definitions.  The second section contains the external
+ * library functions, while the third has the internal library functions,
+ * which applications aren't expected to use directly.
+ */
+
+#ifndef PNG_NO_TYPECAST_NULL
+#define int_p_NULL                (int *)NULL
+#define png_bytep_NULL            (png_bytep)NULL
+#define png_bytepp_NULL           (png_bytepp)NULL
+#define png_doublep_NULL          (png_doublep)NULL
+#define png_error_ptr_NULL        (png_error_ptr)NULL
+#define png_flush_ptr_NULL        (png_flush_ptr)NULL
+#define png_free_ptr_NULL         (png_free_ptr)NULL
+#define png_infopp_NULL           (png_infopp)NULL
+#define png_malloc_ptr_NULL       (png_malloc_ptr)NULL
+#define png_read_status_ptr_NULL  (png_read_status_ptr)NULL
+#define png_rw_ptr_NULL           (png_rw_ptr)NULL
+#define png_structp_NULL          (png_structp)NULL
+#define png_uint_16p_NULL         (png_uint_16p)NULL
+#define png_voidp_NULL            (png_voidp)NULL
+#define png_write_status_ptr_NULL (png_write_status_ptr)NULL
+#else
+#define int_p_NULL                NULL
+#define png_bytep_NULL            NULL
+#define png_bytepp_NULL           NULL
+#define png_doublep_NULL          NULL
+#define png_error_ptr_NULL        NULL
+#define png_flush_ptr_NULL        NULL
+#define png_free_ptr_NULL         NULL
+#define png_infopp_NULL           NULL
+#define png_malloc_ptr_NULL       NULL
+#define png_read_status_ptr_NULL  NULL
+#define png_rw_ptr_NULL           NULL
+#define png_structp_NULL          NULL
+#define png_uint_16p_NULL         NULL
+#define png_voidp_NULL            NULL
+#define png_write_status_ptr_NULL NULL
+#endif
+
+/* variables declared in png.c - only it needs to define PNG_NO_EXTERN */
+#if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN)
+/* Version information for C files, stored in png.c.  This had better match
+ * the version above.
+ */
+#ifdef PNG_USE_GLOBAL_ARRAYS
+PNG_EXPORT_VAR (PNG_CONST char) png_libpng_ver[18];
+  /* need room for 99.99.99beta99z */
+#else
+#define png_libpng_ver png_get_header_ver(NULL)
+#endif
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+/* This was removed in version 1.0.5c */
+/* Structures to facilitate easy interlacing.  See png.c for more details */
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_start[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_inc[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_ystart[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_yinc[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_mask[7];
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_dsp_mask[7];
+/* This isn't currently used.  If you need it, see png.c for more details.
+PNG_EXPORT_VAR (PNG_CONST int FARDATA) png_pass_height[7];
+*/
+#endif
+
+#endif /* PNG_NO_EXTERN */
+
+/* Three color definitions.  The order of the red, green, and blue, (and the
+ * exact size) is not important, although the size of the fields need to
+ * be png_byte or png_uint_16 (as defined below).
+ */
+typedef struct png_color_struct
+{
+   png_byte red;
+   png_byte green;
+   png_byte blue;
+} png_color;
+typedef png_color FAR * png_colorp;
+typedef png_color FAR * FAR * png_colorpp;
+
+typedef struct png_color_16_struct
+{
+   png_byte index;    /* used for palette files */
+   png_uint_16 red;   /* for use in red green blue files */
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 gray;  /* for use in grayscale files */
+} png_color_16;
+typedef png_color_16 FAR * png_color_16p;
+typedef png_color_16 FAR * FAR * png_color_16pp;
+
+typedef struct png_color_8_struct
+{
+   png_byte red;   /* for use in red green blue files */
+   png_byte green;
+   png_byte blue;
+   png_byte gray;  /* for use in grayscale files */
+   png_byte alpha; /* for alpha channel files */
+} png_color_8;
+typedef png_color_8 FAR * png_color_8p;
+typedef png_color_8 FAR * FAR * png_color_8pp;
+
+/*
+ * The following two structures are used for the in-core representation
+ * of sPLT chunks.
+ */
+typedef struct png_sPLT_entry_struct
+{
+   png_uint_16 red;
+   png_uint_16 green;
+   png_uint_16 blue;
+   png_uint_16 alpha;
+   png_uint_16 frequency;
+} png_sPLT_entry;
+typedef png_sPLT_entry FAR * png_sPLT_entryp;
+typedef png_sPLT_entry FAR * FAR * png_sPLT_entrypp;
+
+/*  When the depth of the sPLT palette is 8 bits, the color and alpha samples
+ *  occupy the LSB of their respective members, and the MSB of each member
+ *  is zero-filled.  The frequency member always occupies the full 16 bits.
+ */
+
+typedef struct png_sPLT_struct
+{
+   png_charp name;           /* palette name */
+   png_byte depth;           /* depth of palette samples */
+   png_sPLT_entryp entries;  /* palette entries */
+   png_int_32 nentries;      /* number of palette entries */
+} png_sPLT_t;
+typedef png_sPLT_t FAR * png_sPLT_tp;
+typedef png_sPLT_t FAR * FAR * png_sPLT_tpp;
+
+#ifdef PNG_TEXT_SUPPORTED
+/* png_text holds the contents of a text/ztxt/itxt chunk in a PNG file,
+ * and whether that contents is compressed or not.  The "key" field
+ * points to a regular zero-terminated C string.  The "text", "lang", and
+ * "lang_key" fields can be regular C strings, empty strings, or NULL pointers.
+ * However, the * structure returned by png_get_text() will always contain
+ * regular zero-terminated C strings (possibly empty), never NULL pointers,
+ * so they can be safely used in printf() and other string-handling functions.
+ */
+typedef struct png_text_struct
+{
+   int  compression;       /* compression value:
+                             -1: tEXt, none
+                              0: zTXt, deflate
+                              1: iTXt, none
+                              2: iTXt, deflate  */
+   png_charp key;          /* keyword, 1-79 character description of "text" */
+   png_charp text;         /* comment, may be an empty string (ie "")
+                              or a NULL pointer */
+   png_size_t text_length; /* length of the text string */
+#ifdef PNG_iTXt_SUPPORTED
+   png_size_t itxt_length; /* length of the itxt string */
+   png_charp lang;         /* language code, 0-79 characters
+                              or a NULL pointer */
+   png_charp lang_key;     /* keyword translated UTF-8 string, 0 or more
+                              chars or a NULL pointer */
+#endif
+} png_text;
+typedef png_text FAR * png_textp;
+typedef png_text FAR * FAR * png_textpp;
+#endif
+
+/* Supported compression types for text in PNG files (tEXt, and zTXt).
+ * The values of the PNG_TEXT_COMPRESSION_ defines should NOT be changed. */
+#define PNG_TEXT_COMPRESSION_NONE_WR -3
+#define PNG_TEXT_COMPRESSION_zTXt_WR -2
+#define PNG_TEXT_COMPRESSION_NONE    -1
+#define PNG_TEXT_COMPRESSION_zTXt     0
+#define PNG_ITXT_COMPRESSION_NONE     1
+#define PNG_ITXT_COMPRESSION_zTXt     2
+#define PNG_TEXT_COMPRESSION_LAST     3  /* Not a valid value */
+
+/* png_time is a way to hold the time in an machine independent way.
+ * Two conversions are provided, both from time_t and struct tm.  There
+ * is no portable way to convert to either of these structures, as far
+ * as I know.  If you know of a portable way, send it to me.  As a side
+ * note - PNG has always been Year 2000 compliant!
+ */
+typedef struct png_time_struct
+{
+   png_uint_16 year; /* full year, as in, 1995 */
+   png_byte month;   /* month of year, 1 - 12 */
+   png_byte day;     /* day of month, 1 - 31 */
+   png_byte hour;    /* hour of day, 0 - 23 */
+   png_byte minute;  /* minute of hour, 0 - 59 */
+   png_byte second;  /* second of minute, 0 - 60 (for leap seconds) */
+} png_time;
+typedef png_time FAR * png_timep;
+typedef png_time FAR * FAR * png_timepp;
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+/* png_unknown_chunk is a structure to hold queued chunks for which there is
+ * no specific support.  The idea is that we can use this to queue
+ * up private chunks for output even though the library doesn't actually
+ * know about their semantics.
+ */
+typedef struct png_unknown_chunk_t
+{
+    png_byte name[5];
+    png_byte *data;
+    png_size_t size;
+
+    /* libpng-using applications should NOT directly modify this byte. */
+    png_byte location; /* mode of operation at read time */
+}
+png_unknown_chunk;
+typedef png_unknown_chunk FAR * png_unknown_chunkp;
+typedef png_unknown_chunk FAR * FAR * png_unknown_chunkpp;
+#endif
+
+/* png_info is a structure that holds the information in a PNG file so
+ * that the application can find out the characteristics of the image.
+ * If you are reading the file, this structure will tell you what is
+ * in the PNG file.  If you are writing the file, fill in the information
+ * you want to put into the PNG file, then call png_write_info().
+ * The names chosen should be very close to the PNG specification, so
+ * consult that document for information about the meaning of each field.
+ *
+ * With libpng < 0.95, it was only possible to directly set and read the
+ * the values in the png_info_struct, which meant that the contents and
+ * order of the values had to remain fixed.  With libpng 0.95 and later,
+ * however, there are now functions that abstract the contents of
+ * png_info_struct from the application, so this makes it easier to use
+ * libpng with dynamic libraries, and even makes it possible to use
+ * libraries that don't have all of the libpng ancillary chunk-handing
+ * functionality.
+ *
+ * In any case, the order of the parameters in png_info_struct should NOT
+ * be changed for as long as possible to keep compatibility with applications
+ * that use the old direct-access method with png_info_struct.
+ *
+ * The following members may have allocated storage attached that should be
+ * cleaned up before the structure is discarded: palette, trans, text,
+ * pcal_purpose, pcal_units, pcal_params, hist, iccp_name, iccp_profile,
+ * splt_palettes, scal_unit, row_pointers, and unknowns.   By default, these
+ * are automatically freed when the info structure is deallocated, if they were
+ * allocated internally by libpng.  This behavior can be changed by means
+ * of the png_data_freer() function.
+ *
+ * More allocation details: all the chunk-reading functions that
+ * change these members go through the corresponding png_set_*
+ * functions.  A function to clear these members is available: see
+ * png_free_data().  The png_set_* functions do not depend on being
+ * able to point info structure members to any of the storage they are
+ * passed (they make their own copies), EXCEPT that the png_set_text
+ * functions use the same storage passed to them in the text_ptr or
+ * itxt_ptr structure argument, and the png_set_rows and png_set_unknowns
+ * functions do not make their own copies.
+ */
+typedef struct png_info_struct
+{
+   /* the following are necessary for every PNG file */
+   png_uint_32 width;       /* width of image in pixels (from IHDR) */
+   png_uint_32 height;      /* height of image in pixels (from IHDR) */
+   png_uint_32 valid;       /* valid chunk data (see PNG_INFO_ below) */
+   png_uint_32 rowbytes;    /* bytes needed to hold an untransformed row */
+   png_colorp palette;      /* array of color values (valid & PNG_INFO_PLTE) */
+   png_uint_16 num_palette; /* number of color entries in "palette" (PLTE) */
+   png_uint_16 num_trans;   /* number of transparent palette color (tRNS) */
+   png_byte bit_depth;      /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */
+   png_byte color_type;     /* see PNG_COLOR_TYPE_ below (from IHDR) */
+   /* The following three should have been named *_method not *_type */
+   png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */
+   png_byte filter_type;    /* must be PNG_FILTER_TYPE_BASE (from IHDR) */
+   png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
+
+   /* The following is informational only on read, and not used on writes. */
+   png_byte channels;       /* number of data channels per pixel (1, 2, 3, 4) */
+   png_byte pixel_depth;    /* number of bits per pixel */
+   png_byte spare_byte;     /* to align the data, and for future use */
+   png_byte signature[8];   /* magic bytes read by libpng from start of file */
+
+   /* The rest of the data is optional.  If you are reading, check the
+    * valid field to see if the information in these are valid.  If you
+    * are writing, set the valid field to those chunks you want written,
+    * and initialize the appropriate fields below.
+    */
+
+#if defined(PNG_gAMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+   /* The gAMA chunk describes the gamma characteristics of the system
+    * on which the image was created, normally in the range [1.0, 2.5].
+    * Data is valid if (valid & PNG_INFO_gAMA) is non-zero.
+    */
+   float gamma; /* gamma value of image, if (valid & PNG_INFO_gAMA) */
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+    /* GR-P, 0.96a */
+    /* Data valid if (valid & PNG_INFO_sRGB) non-zero. */
+   png_byte srgb_intent; /* sRGB rendering intent [0, 1, 2, or 3] */
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+   /* The tEXt, and zTXt chunks contain human-readable textual data in
+    * uncompressed, compressed, and optionally compressed forms, respectively.
+    * The data in "text" is an array of pointers to uncompressed,
+    * null-terminated C strings. Each chunk has a keyword that describes the
+    * textual data contained in that chunk.  Keywords are not required to be
+    * unique, and the text string may be empty.  Any number of text chunks may
+    * be in an image.
+    */
+   int num_text; /* number of comments read/to write */
+   int max_text; /* current size of text array */
+   png_textp text; /* array of comments read/to write */
+#endif /* PNG_TEXT_SUPPORTED */
+
+#if defined(PNG_tIME_SUPPORTED)
+   /* The tIME chunk holds the last time the displayed image data was
+    * modified.  See the png_time struct for the contents of this struct.
+    */
+   png_time mod_time;
+#endif
+
+#if defined(PNG_sBIT_SUPPORTED)
+   /* The sBIT chunk specifies the number of significant high-order bits
+    * in the pixel data.  Values are in the range [1, bit_depth], and are
+    * only specified for the channels in the pixel data.  The contents of
+    * the low-order bits is not specified.  Data is valid if
+    * (valid & PNG_INFO_sBIT) is non-zero.
+    */
+   png_color_8 sig_bit; /* significant bits in color channels */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_EXPAND_SUPPORTED) || \
+defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The tRNS chunk supplies transparency data for paletted images and
+    * other image types that don't need a full alpha channel.  There are
+    * "num_trans" transparency values for a paletted image, stored in the
+    * same order as the palette colors, starting from index 0.  Values
+    * for the data are in the range [0, 255], ranging from fully transparent
+    * to fully opaque, respectively.  For non-paletted images, there is a
+    * single color specified that should be treated as fully transparent.
+    * Data is valid if (valid & PNG_INFO_tRNS) is non-zero.
+    */
+   png_bytep trans; /* transparent values for paletted image */
+   png_color_16 trans_values; /* transparent color for non-palette image */
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* The bKGD chunk gives the suggested image background color if the
+    * display program does not have its own background color and the image
+    * is needs to composited onto a background before display.  The colors
+    * in "background" are normally in the same color space/depth as the
+    * pixel data.  Data is valid if (valid & PNG_INFO_bKGD) is non-zero.
+    */
+   png_color_16 background;
+#endif
+
+#if defined(PNG_oFFs_SUPPORTED)
+   /* The oFFs chunk gives the offset in "offset_unit_type" units rightwards
+    * and downwards from the top-left corner of the display, page, or other
+    * application-specific co-ordinate space.  See the PNG_OFFSET_ defines
+    * below for the unit types.  Valid if (valid & PNG_INFO_oFFs) non-zero.
+    */
+   png_int_32 x_offset; /* x offset on page */
+   png_int_32 y_offset; /* y offset on page */
+   png_byte offset_unit_type; /* offset units type */
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+   /* The pHYs chunk gives the physical pixel density of the image for
+    * display or printing in "phys_unit_type" units (see PNG_RESOLUTION_
+    * defines below).  Data is valid if (valid & PNG_INFO_pHYs) is non-zero.
+    */
+   png_uint_32 x_pixels_per_unit; /* horizontal pixel density */
+   png_uint_32 y_pixels_per_unit; /* vertical pixel density */
+   png_byte phys_unit_type; /* resolution type (see PNG_RESOLUTION_ below) */
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+   /* The hIST chunk contains the relative frequency or importance of the
+    * various palette entries, so that a viewer can intelligently select a
+    * reduced-color palette, if required.  Data is an array of "num_palette"
+    * values in the range [0,65535]. Data valid if (valid & PNG_INFO_hIST)
+    * is non-zero.
+    */
+   png_uint_16p hist;
+#endif
+
+#ifdef PNG_cHRM_SUPPORTED
+   /* The cHRM chunk describes the CIE color characteristics of the monitor
+    * on which the PNG was created.  This data allows the viewer to do gamut
+    * mapping of the input image to ensure that the viewer sees the same
+    * colors in the image as the creator.  Values are in the range
+    * [0.0, 0.8].  Data valid if (valid & PNG_INFO_cHRM) non-zero.
+    */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float x_white;
+   float y_white;
+   float x_red;
+   float y_red;
+   float x_green;
+   float y_green;
+   float x_blue;
+   float y_blue;
+#endif
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+   /* The pCAL chunk describes a transformation between the stored pixel
+    * values and original physical data values used to create the image.
+    * The integer range [0, 2^bit_depth - 1] maps to the floating-point
+    * range given by [pcal_X0, pcal_X1], and are further transformed by a
+    * (possibly non-linear) transformation function given by "pcal_type"
+    * and "pcal_params" into "pcal_units".  Please see the PNG_EQUATION_
+    * defines below, and the PNG-Group's PNG extensions document for a
+    * complete description of the transformations and how they should be
+    * implemented, and for a description of the ASCII parameter strings.
+    * Data values are valid if (valid & PNG_INFO_pCAL) non-zero.
+    */
+   png_charp pcal_purpose;  /* pCAL chunk description string */
+   png_int_32 pcal_X0;      /* minimum value */
+   png_int_32 pcal_X1;      /* maximum value */
+   png_charp pcal_units;    /* Latin-1 string giving physical units */
+   png_charpp pcal_params;  /* ASCII strings containing parameter values */
+   png_byte pcal_type;      /* equation type (see PNG_EQUATION_ below) */
+   png_byte pcal_nparams;   /* number of parameters given in pcal_params */
+#endif
+
+/* New members added in libpng-1.0.6 */
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_uint_32 free_me;     /* flags items libpng is responsible for freeing */
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   /* storage for unknown chunks that the library doesn't recognize. */
+   png_unknown_chunkp unknown_chunks;
+   png_size_t unknown_chunks_num;
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+   /* iCCP chunk data. */
+   png_charp iccp_name;     /* profile name */
+   png_charp iccp_profile;  /* International Color Consortium profile data */
+                            /* Note to maintainer: should be png_bytep */
+   png_uint_32 iccp_proflen;  /* ICC profile data length */
+   png_byte iccp_compression; /* Always zero */
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+   /* data on sPLT chunks (there may be more than one). */
+   png_sPLT_tp splt_palettes;
+   png_uint_32 splt_palettes_num;
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+   /* The sCAL chunk describes the actual physical dimensions of the
+    * subject matter of the graphic.  The chunk contains a unit specification
+    * a byte value, and two ASCII strings representing floating-point
+    * values.  The values are width and height corresponsing to one pixel
+    * in the image.  This external representation is converted to double
+    * here.  Data values are valid if (valid & PNG_INFO_sCAL) is non-zero.
+    */
+   png_byte scal_unit;         /* unit of physical scale */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   double scal_pixel_width;    /* width of one pixel */
+   double scal_pixel_height;   /* height of one pixel */
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp scal_s_width;     /* string containing height */
+   png_charp scal_s_height;    /* string containing width */
+#endif
+#endif
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+   /* Memory has been allocated if (valid & PNG_ALLOCATED_INFO_ROWS) non-zero */
+   /* Data valid if (valid & PNG_INFO_IDAT) non-zero */
+   png_bytepp row_pointers;        /* the image bits */
+#endif
+
+#if defined(PNG_FIXED_POINT_SUPPORTED) && defined(PNG_gAMA_SUPPORTED)
+   png_fixed_point int_gamma; /* gamma of image, if (valid & PNG_INFO_gAMA) */
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED) && defined(PNG_FIXED_POINT_SUPPORTED)
+   png_fixed_point int_x_white;
+   png_fixed_point int_y_white;
+   png_fixed_point int_x_red;
+   png_fixed_point int_y_red;
+   png_fixed_point int_x_green;
+   png_fixed_point int_y_green;
+   png_fixed_point int_x_blue;
+   png_fixed_point int_y_blue;
+#endif
+
+} png_info;
+
+typedef png_info FAR * png_infop;
+typedef png_info FAR * FAR * png_infopp;
+
+/* Maximum positive integer used in PNG is (2^31)-1 */
+#define PNG_UINT_31_MAX ((png_uint_32)0x7fffffffL)
+#define PNG_UINT_32_MAX ((png_uint_32)(-1))
+#define PNG_SIZE_MAX ((png_size_t)(-1))
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* PNG_MAX_UINT is deprecated; use PNG_UINT_31_MAX instead. */
+#define PNG_MAX_UINT PNG_UINT_31_MAX
+#endif
+
+/* These describe the color_type field in png_info. */
+/* color type masks */
+#define PNG_COLOR_MASK_PALETTE    1
+#define PNG_COLOR_MASK_COLOR      2
+#define PNG_COLOR_MASK_ALPHA      4
+
+/* color types.  Note that not all combinations are legal */
+#define PNG_COLOR_TYPE_GRAY 0
+#define PNG_COLOR_TYPE_PALETTE  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_PALETTE)
+#define PNG_COLOR_TYPE_RGB        (PNG_COLOR_MASK_COLOR)
+#define PNG_COLOR_TYPE_RGB_ALPHA  (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA)
+#define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA)
+/* aliases */
+#define PNG_COLOR_TYPE_RGBA  PNG_COLOR_TYPE_RGB_ALPHA
+#define PNG_COLOR_TYPE_GA  PNG_COLOR_TYPE_GRAY_ALPHA
+
+/* This is for compression type. PNG 1.0-1.2 only define the single type. */
+#define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */
+#define PNG_COMPRESSION_TYPE_DEFAULT PNG_COMPRESSION_TYPE_BASE
+
+/* This is for filter type. PNG 1.0-1.2 only define the single type. */
+#define PNG_FILTER_TYPE_BASE      0 /* Single row per-byte filtering */
+#define PNG_INTRAPIXEL_DIFFERENCING 64 /* Used only in MNG datastreams */
+#define PNG_FILTER_TYPE_DEFAULT   PNG_FILTER_TYPE_BASE
+
+/* These are for the interlacing type.  These values should NOT be changed. */
+#define PNG_INTERLACE_NONE        0 /* Non-interlaced image */
+#define PNG_INTERLACE_ADAM7       1 /* Adam7 interlacing */
+#define PNG_INTERLACE_LAST        2 /* Not a valid value */
+
+/* These are for the oFFs chunk.  These values should NOT be changed. */
+#define PNG_OFFSET_PIXEL          0 /* Offset in pixels */
+#define PNG_OFFSET_MICROMETER     1 /* Offset in micrometers (1/10^6 meter) */
+#define PNG_OFFSET_LAST           2 /* Not a valid value */
+
+/* These are for the pCAL chunk.  These values should NOT be changed. */
+#define PNG_EQUATION_LINEAR       0 /* Linear transformation */
+#define PNG_EQUATION_BASE_E       1 /* Exponential base e transform */
+#define PNG_EQUATION_ARBITRARY    2 /* Arbitrary base exponential transform */
+#define PNG_EQUATION_HYPERBOLIC   3 /* Hyperbolic sine transformation */
+#define PNG_EQUATION_LAST         4 /* Not a valid value */
+
+/* These are for the sCAL chunk.  These values should NOT be changed. */
+#define PNG_SCALE_UNKNOWN         0 /* unknown unit (image scale) */
+#define PNG_SCALE_METER           1 /* meters per pixel */
+#define PNG_SCALE_RADIAN          2 /* radians per pixel */
+#define PNG_SCALE_LAST            3 /* Not a valid value */
+
+/* These are for the pHYs chunk.  These values should NOT be changed. */
+#define PNG_RESOLUTION_UNKNOWN    0 /* pixels/unknown unit (aspect ratio) */
+#define PNG_RESOLUTION_METER      1 /* pixels/meter */
+#define PNG_RESOLUTION_LAST       2 /* Not a valid value */
+
+/* These are for the sRGB chunk.  These values should NOT be changed. */
+#define PNG_sRGB_INTENT_PERCEPTUAL 0
+#define PNG_sRGB_INTENT_RELATIVE   1
+#define PNG_sRGB_INTENT_SATURATION 2
+#define PNG_sRGB_INTENT_ABSOLUTE   3
+#define PNG_sRGB_INTENT_LAST       4 /* Not a valid value */
+
+/* This is for text chunks */
+#define PNG_KEYWORD_MAX_LENGTH     79
+
+/* Maximum number of entries in PLTE/sPLT/tRNS arrays */
+#define PNG_MAX_PALETTE_LENGTH    256
+
+/* These determine if an ancillary chunk's data has been successfully read
+ * from the PNG header, or if the application has filled in the corresponding
+ * data in the info_struct to be written into the output file.  The values
+ * of the PNG_INFO_<chunk> defines should NOT be changed.
+ */
+#define PNG_INFO_gAMA 0x0001
+#define PNG_INFO_sBIT 0x0002
+#define PNG_INFO_cHRM 0x0004
+#define PNG_INFO_PLTE 0x0008
+#define PNG_INFO_tRNS 0x0010
+#define PNG_INFO_bKGD 0x0020
+#define PNG_INFO_hIST 0x0040
+#define PNG_INFO_pHYs 0x0080
+#define PNG_INFO_oFFs 0x0100
+#define PNG_INFO_tIME 0x0200
+#define PNG_INFO_pCAL 0x0400
+#define PNG_INFO_sRGB 0x0800   /* GR-P, 0.96a */
+#define PNG_INFO_iCCP 0x1000   /* ESR, 1.0.6 */
+#define PNG_INFO_sPLT 0x2000   /* ESR, 1.0.6 */
+#define PNG_INFO_sCAL 0x4000   /* ESR, 1.0.6 */
+#define PNG_INFO_IDAT 0x8000L  /* ESR, 1.0.6 */
+
+/* This is used for the transformation routines, as some of them
+ * change these values for the row.  It also should enable using
+ * the routines for other purposes.
+ */
+typedef struct png_row_info_struct
+{
+   png_uint_32 width; /* width of row */
+   png_uint_32 rowbytes; /* number of bytes in row */
+   png_byte color_type; /* color type of row */
+   png_byte bit_depth; /* bit depth of row */
+   png_byte channels; /* number of channels (1, 2, 3, or 4) */
+   png_byte pixel_depth; /* bits per pixel (depth * channels) */
+} png_row_info;
+
+typedef png_row_info FAR * png_row_infop;
+typedef png_row_info FAR * FAR * png_row_infopp;
+
+/* These are the function types for the I/O functions and for the functions
+ * that allow the user to override the default I/O functions with his or her
+ * own.  The png_error_ptr type should match that of user-supplied warning
+ * and error functions, while the png_rw_ptr type should match that of the
+ * user read/write data functions.
+ */
+typedef struct png_struct_def png_struct;
+typedef png_struct FAR * png_structp;
+
+typedef void (PNGAPI *png_error_ptr) PNGARG((png_structp, png_const_charp));
+typedef void (PNGAPI *png_rw_ptr) PNGARG((png_structp, png_bytep, png_size_t));
+typedef void (PNGAPI *png_flush_ptr) PNGARG((png_structp));
+typedef void (PNGAPI *png_read_status_ptr) PNGARG((png_structp, png_uint_32,
+   int));
+typedef void (PNGAPI *png_write_status_ptr) PNGARG((png_structp, png_uint_32,
+   int));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+typedef void (PNGAPI *png_progressive_info_ptr) PNGARG((png_structp, png_infop));
+typedef void (PNGAPI *png_progressive_end_ptr) PNGARG((png_structp, png_infop));
+typedef void (PNGAPI *png_progressive_row_ptr) PNGARG((png_structp, png_bytep,
+   png_uint_32, int));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+typedef void (PNGAPI *png_user_transform_ptr) PNGARG((png_structp,
+    png_row_infop, png_bytep));
+#endif
+
+#if defined(PNG_USER_CHUNKS_SUPPORTED)
+typedef int (PNGAPI *png_user_chunk_ptr) PNGARG((png_structp, png_unknown_chunkp));
+#endif
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+typedef void (PNGAPI *png_unknown_chunk_ptr) PNGARG((png_structp));
+#endif
+
+/* Transform masks for the high-level interface */
+#define PNG_TRANSFORM_IDENTITY       0x0000    /* read and write */
+#define PNG_TRANSFORM_STRIP_16       0x0001    /* read only */
+#define PNG_TRANSFORM_STRIP_ALPHA    0x0002    /* read only */
+#define PNG_TRANSFORM_PACKING        0x0004    /* read and write */
+#define PNG_TRANSFORM_PACKSWAP       0x0008    /* read and write */
+#define PNG_TRANSFORM_EXPAND         0x0010    /* read only */
+#define PNG_TRANSFORM_INVERT_MONO    0x0020    /* read and write */
+#define PNG_TRANSFORM_SHIFT          0x0040    /* read and write */
+#define PNG_TRANSFORM_BGR            0x0080    /* read and write */
+#define PNG_TRANSFORM_SWAP_ALPHA     0x0100    /* read and write */
+#define PNG_TRANSFORM_SWAP_ENDIAN    0x0200    /* read and write */
+#define PNG_TRANSFORM_INVERT_ALPHA   0x0400    /* read and write */
+#define PNG_TRANSFORM_STRIP_FILLER   0x0800    /* WRITE only */
+
+/* Flags for MNG supported features */
+#define PNG_FLAG_MNG_EMPTY_PLTE     0x01
+#define PNG_FLAG_MNG_FILTER_64      0x04
+#define PNG_ALL_MNG_FEATURES        0x05
+
+typedef png_voidp (*png_malloc_ptr) PNGARG((png_structp, png_size_t));
+typedef void (*png_free_ptr) PNGARG((png_structp, png_voidp));
+
+/* The structure that holds the information to read and write PNG files.
+ * The only people who need to care about what is inside of this are the
+ * people who will be modifying the library for their own special needs.
+ * It should NOT be accessed directly by an application, except to store
+ * the jmp_buf.
+ */
+
+struct png_struct_def
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf jmpbuf;            /* used in png_error */
+#endif
+   png_error_ptr error_fn;    /* function for printing errors and aborting */
+   png_error_ptr warning_fn;  /* function for printing warnings */
+   png_voidp error_ptr;       /* user supplied struct for error functions */
+   png_rw_ptr write_data_fn;  /* function for writing output data */
+   png_rw_ptr read_data_fn;   /* function for reading input data */
+   png_voidp io_ptr;          /* ptr to application struct for I/O functions */
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   png_user_transform_ptr read_user_transform_fn; /* user read transform */
+#endif
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   png_user_transform_ptr write_user_transform_fn; /* user write transform */
+#endif
+
+/* These were added in libpng-1.0.2 */
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   png_voidp user_transform_ptr; /* user supplied struct for user transform */
+   png_byte user_transform_depth;    /* bit depth of user transformed pixels */
+   png_byte user_transform_channels; /* channels in user transformed pixels */
+#endif
+#endif
+
+   png_uint_32 mode;          /* tells us where we are in the PNG file */
+   png_uint_32 flags;         /* flags indicating various things to libpng */
+   png_uint_32 transformations; /* which transformations to perform */
+
+   z_stream zstream;          /* pointer to decompression structure (below) */
+   png_bytep zbuf;            /* buffer for zlib */
+   png_size_t zbuf_size;      /* size of zbuf */
+   int zlib_level;            /* holds zlib compression level */
+   int zlib_method;           /* holds zlib compression method */
+   int zlib_window_bits;      /* holds zlib compression window bits */
+   int zlib_mem_level;        /* holds zlib compression memory level */
+   int zlib_strategy;         /* holds zlib compression strategy */
+
+   png_uint_32 width;         /* width of image in pixels */
+   png_uint_32 height;        /* height of image in pixels */
+   png_uint_32 num_rows;      /* number of rows in current pass */
+   png_uint_32 usr_width;     /* width of row at start of write */
+   png_uint_32 rowbytes;      /* size of row in bytes */
+   png_uint_32 irowbytes;     /* size of current interlaced row in bytes */
+   png_uint_32 iwidth;        /* width of current interlaced row in pixels */
+   png_uint_32 row_number;    /* current row in interlace pass */
+   png_bytep prev_row;        /* buffer to save previous (unfiltered) row */
+   png_bytep row_buf;         /* buffer to save current (unfiltered) row */
+   png_bytep sub_row;         /* buffer to save "sub" row when filtering */
+   png_bytep up_row;          /* buffer to save "up" row when filtering */
+   png_bytep avg_row;         /* buffer to save "avg" row when filtering */
+   png_bytep paeth_row;       /* buffer to save "Paeth" row when filtering */
+   png_row_info row_info;     /* used for transformation routines */
+
+   png_uint_32 idat_size;     /* current IDAT size for read */
+   png_uint_32 crc;           /* current chunk CRC value */
+   png_colorp palette;        /* palette from the input file */
+   png_uint_16 num_palette;   /* number of color entries in palette */
+   png_uint_16 num_trans;     /* number of transparency values */
+   png_byte chunk_name[5];    /* null-terminated name of current chunk */
+   png_byte compression;      /* file compression type (always 0) */
+   png_byte filter;           /* file filter type (always 0) */
+   png_byte interlaced;       /* PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */
+   png_byte pass;             /* current interlace pass (0 - 6) */
+   png_byte do_filter;        /* row filter flags (see PNG_FILTER_ below ) */
+   png_byte color_type;       /* color type of file */
+   png_byte bit_depth;        /* bit depth of file */
+   png_byte usr_bit_depth;    /* bit depth of users row */
+   png_byte pixel_depth;      /* number of bits per pixel */
+   png_byte channels;         /* number of channels in file */
+   png_byte usr_channels;     /* channels at start of write */
+   png_byte sig_bytes;        /* magic bytes read/written from start of file */
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+#ifdef PNG_LEGACY_SUPPORTED
+   png_byte filler;           /* filler byte for pixel expansion */
+#else
+   png_uint_16 filler;           /* filler bytes for pixel expansion */
+#endif
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED)
+   png_byte background_gamma_type;
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+   float background_gamma;
+#  endif
+   png_color_16 background;   /* background color in screen gamma space */
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   png_color_16 background_1; /* background normalized to gamma 1.0 */
+#endif
+#endif /* PNG_bKGD_SUPPORTED */
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+   png_flush_ptr output_flush_fn;/* Function for flushing output */
+   png_uint_32 flush_dist;    /* how many rows apart to flush, 0 - no flush */
+   png_uint_32 flush_rows;    /* number of rows written since last flush */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   int gamma_shift;      /* number of "insignificant" bits 16-bit gamma */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float gamma;          /* file gamma value */
+   float screen_gamma;   /* screen gamma value (display_exponent) */
+#endif
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep gamma_table;     /* gamma table for 8-bit depth files */
+   png_bytep gamma_from_1;    /* converts from 1.0 to screen */
+   png_bytep gamma_to_1;      /* converts from file to 1.0 */
+   png_uint_16pp gamma_16_table; /* gamma table for 16-bit depth files */
+   png_uint_16pp gamma_16_from_1; /* converts from 1.0 to screen */
+   png_uint_16pp gamma_16_to_1; /* converts from file to 1.0 */
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_sBIT_SUPPORTED)
+   png_color_8 sig_bit;       /* significant bits in each available channel */
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+   png_color_8 shift;         /* shift for significant bit tranformation */
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) \
+ || defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_bytep trans;           /* transparency values for paletted files */
+   png_color_16 trans_values; /* transparency values for non-paletted files */
+#endif
+
+   png_read_status_ptr read_row_fn;   /* called after each row is decoded */
+   png_write_status_ptr write_row_fn; /* called after each row is encoded */
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_progressive_info_ptr info_fn; /* called after header data fully read */
+   png_progressive_row_ptr row_fn;   /* called after each prog. row is decoded */
+   png_progressive_end_ptr end_fn;   /* called after image is complete */
+   png_bytep save_buffer_ptr;        /* current location in save_buffer */
+   png_bytep save_buffer;            /* buffer for previously read data */
+   png_bytep current_buffer_ptr;     /* current location in current_buffer */
+   png_bytep current_buffer;         /* buffer for recently used data */
+   png_uint_32 push_length;          /* size of current input chunk */
+   png_uint_32 skip_length;          /* bytes to skip in input data */
+   png_size_t save_buffer_size;      /* amount of data now in save_buffer */
+   png_size_t save_buffer_max;       /* total size of save_buffer */
+   png_size_t buffer_size;           /* total amount of available input data */
+   png_size_t current_buffer_size;   /* amount of data now in current_buffer */
+   int process_mode;                 /* what push library is currently doing */
+   int cur_palette;                  /* current push library palette index */
+
+#  if defined(PNG_TEXT_SUPPORTED)
+     png_size_t current_text_size;   /* current size of text input data */
+     png_size_t current_text_left;   /* how much text left to read in input */
+     png_charp current_text;         /* current text chunk buffer */
+     png_charp current_text_ptr;     /* current location in current_text */
+#  endif /* PNG_TEXT_SUPPORTED */
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
+/* for the Borland special 64K segment handler */
+   png_bytepp offset_table_ptr;
+   png_bytep offset_table;
+   png_uint_16 offset_table_number;
+   png_uint_16 offset_table_count;
+   png_uint_16 offset_table_count_free;
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   png_bytep palette_lookup;         /* lookup table for dithering */
+   png_bytep dither_index;           /* index translation for palette files */
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED) || defined(PNG_hIST_SUPPORTED)
+   png_uint_16p hist;                /* histogram */
+#endif
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_byte heuristic_method;        /* heuristic for row filter selection */
+   png_byte num_prev_filters;        /* number of weights for previous rows */
+   png_bytep prev_filters;           /* filter type(s) of previous row(s) */
+   png_uint_16p filter_weights;      /* weight(s) for previous line(s) */
+   png_uint_16p inv_filter_weights;  /* 1/weight(s) for previous line(s) */
+   png_uint_16p filter_costs;        /* relative filter calculation cost */
+   png_uint_16p inv_filter_costs;    /* 1/relative filter calculation cost */
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+   png_charp time_buffer;            /* String to hold RFC 1123 time text */
+#endif
+
+/* New members added in libpng-1.0.6 */
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_uint_32 free_me;       /* flags items libpng is responsible for freeing */
+#endif
+
+#if defined(PNG_USER_CHUNKS_SUPPORTED)
+   png_voidp user_chunk_ptr;
+   png_user_chunk_ptr read_user_chunk_fn; /* user read chunk handler */
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   int num_chunk_list;
+   png_bytep chunk_list;
+#endif
+
+/* New members added in libpng-1.0.3 */
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   png_byte rgb_to_gray_status;
+   /* These were changed from png_byte in libpng-1.0.6 */
+   png_uint_16 rgb_to_gray_red_coeff;
+   png_uint_16 rgb_to_gray_green_coeff;
+   png_uint_16 rgb_to_gray_blue_coeff;
+#endif
+
+/* New member added in libpng-1.0.4 (renamed in 1.0.9) */
+#if defined(PNG_MNG_FEATURES_SUPPORTED) || \
+    defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+/* changed from png_byte to png_uint_32 at version 1.2.0 */
+#ifdef PNG_1_0_X
+   png_byte mng_features_permitted;
+#else
+   png_uint_32 mng_features_permitted;
+#endif /* PNG_1_0_X */
+#endif
+
+/* New member added in libpng-1.0.7 */
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_fixed_point int_gamma;
+#endif
+
+/* New member added in libpng-1.0.9, ifdef'ed out in 1.0.12, enabled in 1.2.0 */
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   png_byte filter_type;
+#endif
+
+#if defined(PNG_1_0_X)
+/* New member added in libpng-1.0.10, ifdef'ed out in 1.2.0 */
+   png_uint_32 row_buf_size;
+#endif
+
+/* New members added in libpng-1.2.0 */
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#  if !defined(PNG_1_0_X)
+#    if defined(PNG_MMX_CODE_SUPPORTED)
+   png_byte     mmx_bitdepth_threshold;
+   png_uint_32  mmx_rowbytes_threshold;
+#    endif
+   png_uint_32  asm_flags;
+#  endif
+#endif
+
+/* New members added in libpng-1.0.2 but first enabled by default in 1.2.0 */
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_voidp mem_ptr;                /* user supplied struct for mem functions */
+   png_malloc_ptr malloc_fn;         /* function for allocating memory */
+   png_free_ptr free_fn;             /* function for freeing memory */
+#endif
+
+/* New member added in libpng-1.0.13 and 1.2.0 */
+   png_bytep big_row_buf;         /* buffer to save current (unfiltered) row */
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+/* The following three members were added at version 1.0.14 and 1.2.4 */
+   png_bytep dither_sort;            /* working sort array */
+   png_bytep index_to_palette;       /* where the original index currently is */
+                                     /* in the palette */
+   png_bytep palette_to_index;       /* which original index points to this */
+                                     /* palette color */
+#endif
+
+/* New members added in libpng-1.0.16 and 1.2.6 */
+   png_byte compression_type;
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_uint_32 user_width_max;
+   png_uint_32 user_height_max;
+#endif
+
+/* New member added in libpng-1.0.25 and 1.2.17 */
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+   /* storage for unknown chunk that the library doesn't recognize. */
+   png_unknown_chunk unknown_chunk;
+#endif
+};
+
+
+/* This triggers a compiler error in png.c, if png.c and png.h
+ * do not agree upon the version number.
+ */
+typedef png_structp version_1_2_22;
+
+typedef png_struct FAR * FAR * png_structpp;
+
+/* Here are the function definitions most commonly used.  This is not
+ * the place to find out how to use libpng.  See libpng.txt for the
+ * full explanation, see example.c for the summary.  This just provides
+ * a simple one line description of the use of each function.
+ */
+
+/* Returns the version number of the library */
+extern PNG_EXPORT(png_uint_32,png_access_version_number) PNGARG((void));
+
+/* Tell lib we have already handled the first <num_bytes> magic bytes.
+ * Handling more than 8 bytes from the beginning of the file is an error.
+ */
+extern PNG_EXPORT(void,png_set_sig_bytes) PNGARG((png_structp png_ptr,
+   int num_bytes));
+
+/* Check sig[start] through sig[start + num_to_check - 1] to see if it's a
+ * PNG file.  Returns zero if the supplied bytes match the 8-byte PNG
+ * signature, and non-zero otherwise.  Having num_to_check == 0 or
+ * start > 7 will always fail (ie return non-zero).
+ */
+extern PNG_EXPORT(int,png_sig_cmp) PNGARG((png_bytep sig, png_size_t start,
+   png_size_t num_to_check));
+
+/* Simple signature checking function.  This is the same as calling
+ * png_check_sig(sig, n) := !png_sig_cmp(sig, 0, n).
+ */
+extern PNG_EXPORT(int,png_check_sig) PNGARG((png_bytep sig, int num));
+
+/* Allocate and initialize png_ptr struct for reading, and any other memory. */
+extern PNG_EXPORT(png_structp,png_create_read_struct)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn));
+
+/* Allocate and initialize png_ptr struct for writing, and any other memory */
+extern PNG_EXPORT(png_structp,png_create_write_struct)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn));
+
+#ifdef PNG_WRITE_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_compression_buffer_size)
+   PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+extern PNG_EXPORT(void,png_set_compression_buffer_size)
+   PNGARG((png_structp png_ptr, png_uint_32 size));
+#endif
+
+/* Reset the compression stream */
+extern PNG_EXPORT(int,png_reset_zstream) PNGARG((png_structp png_ptr));
+
+/* New functions added in libpng-1.0.2 (not enabled by default until 1.2.0) */
+#ifdef PNG_USER_MEM_SUPPORTED
+extern PNG_EXPORT(png_structp,png_create_read_struct_2)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+extern PNG_EXPORT(png_structp,png_create_write_struct_2)
+   PNGARG((png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+#endif
+
+/* Write a PNG chunk - size, type, (optional) data, CRC. */
+extern PNG_EXPORT(void,png_write_chunk) PNGARG((png_structp png_ptr,
+   png_bytep chunk_name, png_bytep data, png_size_t length));
+
+/* Write the start of a PNG chunk - length and chunk name. */
+extern PNG_EXPORT(void,png_write_chunk_start) PNGARG((png_structp png_ptr,
+   png_bytep chunk_name, png_uint_32 length));
+
+/* Write the data of a PNG chunk started with png_write_chunk_start(). */
+extern PNG_EXPORT(void,png_write_chunk_data) PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+/* Finish a chunk started with png_write_chunk_start() (includes CRC). */
+extern PNG_EXPORT(void,png_write_chunk_end) PNGARG((png_structp png_ptr));
+
+/* Allocate and initialize the info structure */
+extern PNG_EXPORT(png_infop,png_create_info_struct)
+   PNGARG((png_structp png_ptr));
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Initialize the info structure (old interface - DEPRECATED) */
+extern PNG_EXPORT(void,png_info_init) PNGARG((png_infop info_ptr));
+#undef png_info_init
+#define png_info_init(info_ptr) png_info_init_3(&info_ptr,\
+    png_sizeof(png_info));
+#endif
+
+extern PNG_EXPORT(void,png_info_init_3) PNGARG((png_infopp info_ptr,
+    png_size_t png_info_struct_size));
+
+/* Writes all the PNG information before the image. */
+extern PNG_EXPORT(void,png_write_info_before_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+extern PNG_EXPORT(void,png_write_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read the information before the actual image data. */
+extern PNG_EXPORT(void,png_read_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+extern PNG_EXPORT(png_charp,png_convert_to_rfc1123)
+   PNGARG((png_structp png_ptr, png_timep ptime));
+#endif
+
+#if !defined(_WIN32_WCE)
+/* "time.h" functions are not supported on WindowsCE */
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+/* convert from a struct tm to png_time */
+extern PNG_EXPORT(void,png_convert_from_struct_tm) PNGARG((png_timep ptime,
+   struct tm FAR * ttime));
+
+/* convert from time_t to png_time.  Uses gmtime() */
+extern PNG_EXPORT(void,png_convert_from_time_t) PNGARG((png_timep ptime,
+   time_t ttime));
+#endif /* PNG_WRITE_tIME_SUPPORTED */
+#endif /* _WIN32_WCE */
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+/* Expand data to 24-bit RGB, or 8-bit grayscale, with alpha if available. */
+extern PNG_EXPORT(void,png_set_expand) PNGARG((png_structp png_ptr));
+#if !defined(PNG_1_0_X)
+extern PNG_EXPORT(void,png_set_expand_gray_1_2_4_to_8) PNGARG((png_structp
+  png_ptr));
+#endif
+extern PNG_EXPORT(void,png_set_palette_to_rgb) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(void,png_set_tRNS_to_alpha) PNGARG((png_structp png_ptr));
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Deprecated */
+extern PNG_EXPORT(void,png_set_gray_1_2_4_to_8) PNGARG((png_structp png_ptr));
+#endif
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* Use blue, green, red order for pixels. */
+extern PNG_EXPORT(void,png_set_bgr) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+/* Expand the grayscale to 24-bit RGB if necessary. */
+extern PNG_EXPORT(void,png_set_gray_to_rgb) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+/* Reduce RGB to grayscale. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_rgb_to_gray) PNGARG((png_structp png_ptr,
+   int error_action, double red, double green ));
+#endif
+extern PNG_EXPORT(void,png_set_rgb_to_gray_fixed) PNGARG((png_structp png_ptr,
+   int error_action, png_fixed_point red, png_fixed_point green ));
+extern PNG_EXPORT(png_byte,png_get_rgb_to_gray_status) PNGARG((png_structp
+   png_ptr));
+#endif
+
+extern PNG_EXPORT(void,png_build_grayscale_palette) PNGARG((int bit_depth,
+   png_colorp palette));
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_strip_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_swap_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+extern PNG_EXPORT(void,png_set_invert_alpha) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+/* Add a filler byte to 8-bit Gray or 24-bit RGB images. */
+extern PNG_EXPORT(void,png_set_filler) PNGARG((png_structp png_ptr,
+   png_uint_32 filler, int flags));
+/* The values of the PNG_FILLER_ defines should NOT be changed */
+#define PNG_FILLER_BEFORE 0
+#define PNG_FILLER_AFTER 1
+/* Add an alpha byte to 8-bit Gray or 24-bit RGB images. */
+#if !defined(PNG_1_0_X)
+extern PNG_EXPORT(void,png_set_add_alpha) PNGARG((png_structp png_ptr,
+   png_uint_32 filler, int flags));
+#endif
+#endif /* PNG_READ_FILLER_SUPPORTED || PNG_WRITE_FILLER_SUPPORTED */
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* Swap bytes in 16-bit depth files. */
+extern PNG_EXPORT(void,png_set_swap) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
+/* Use 1 byte per pixel in 1, 2, or 4-bit depth files. */
+extern PNG_EXPORT(void,png_set_packing) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+/* Swap packing order of pixels in bytes. */
+extern PNG_EXPORT(void,png_set_packswap) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+/* Converts files to legal bit depths. */
+extern PNG_EXPORT(void,png_set_shift) PNGARG((png_structp png_ptr,
+   png_color_8p true_bits));
+#endif
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+    defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* Have the code handle the interlacing.  Returns the number of passes. */
+extern PNG_EXPORT(int,png_set_interlace_handling) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+/* Invert monochrome files */
+extern PNG_EXPORT(void,png_set_invert_mono) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+/* Handle alpha and tRNS by replacing with a background color. */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_background) PNGARG((png_structp png_ptr,
+   png_color_16p background_color, int background_gamma_code,
+   int need_expand, double background_gamma));
+#endif
+#define PNG_BACKGROUND_GAMMA_UNKNOWN 0
+#define PNG_BACKGROUND_GAMMA_SCREEN  1
+#define PNG_BACKGROUND_GAMMA_FILE    2
+#define PNG_BACKGROUND_GAMMA_UNIQUE  3
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+/* strip the second byte of information from a 16-bit depth file. */
+extern PNG_EXPORT(void,png_set_strip_16) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+/* Turn on dithering, and reduce the palette to the number of colors available. */
+extern PNG_EXPORT(void,png_set_dither) PNGARG((png_structp png_ptr,
+   png_colorp palette, int num_palette, int maximum_colors,
+   png_uint_16p histogram, int full_dither));
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+/* Handle gamma correction. Screen_gamma=(display_exponent) */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gamma) PNGARG((png_structp png_ptr,
+   double screen_gamma, double default_file_gamma));
+#endif
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+#if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+/* Permit or disallow empty PLTE (0: not permitted, 1: permitted) */
+/* Deprecated and will be removed.  Use png_permit_mng_features() instead. */
+extern PNG_EXPORT(void,png_permit_empty_plte) PNGARG((png_structp png_ptr,
+   int empty_plte_permitted));
+#endif
+#endif
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+/* Set how many lines between output flushes - 0 for no flushing */
+extern PNG_EXPORT(void,png_set_flush) PNGARG((png_structp png_ptr, int nrows));
+/* Flush the current PNG output buffer */
+extern PNG_EXPORT(void,png_write_flush) PNGARG((png_structp png_ptr));
+#endif
+
+/* optional update palette with requested transformations */
+extern PNG_EXPORT(void,png_start_read_image) PNGARG((png_structp png_ptr));
+
+/* optional call to update the users info structure */
+extern PNG_EXPORT(void,png_read_update_info) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read one or more rows of image data. */
+extern PNG_EXPORT(void,png_read_rows) PNGARG((png_structp png_ptr,
+   png_bytepp row, png_bytepp display_row, png_uint_32 num_rows));
+#endif
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read a row of data. */
+extern PNG_EXPORT(void,png_read_row) PNGARG((png_structp png_ptr,
+   png_bytep row,
+   png_bytep display_row));
+#endif
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read the whole image into memory at once. */
+extern PNG_EXPORT(void,png_read_image) PNGARG((png_structp png_ptr,
+   png_bytepp image));
+#endif
+
+/* write a row of image data */
+extern PNG_EXPORT(void,png_write_row) PNGARG((png_structp png_ptr,
+   png_bytep row));
+
+/* write a few rows of image data */
+extern PNG_EXPORT(void,png_write_rows) PNGARG((png_structp png_ptr,
+   png_bytepp row, png_uint_32 num_rows));
+
+/* write the image data */
+extern PNG_EXPORT(void,png_write_image) PNGARG((png_structp png_ptr,
+   png_bytepp image));
+
+/* writes the end of the PNG file. */
+extern PNG_EXPORT(void,png_write_end) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* read the end of the PNG file. */
+extern PNG_EXPORT(void,png_read_end) PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+/* free any memory associated with the png_info_struct */
+extern PNG_EXPORT(void,png_destroy_info_struct) PNGARG((png_structp png_ptr,
+   png_infopp info_ptr_ptr));
+
+/* free any memory associated with the png_struct and the png_info_structs */
+extern PNG_EXPORT(void,png_destroy_read_struct) PNGARG((png_structpp
+   png_ptr_ptr, png_infopp info_ptr_ptr, png_infopp end_info_ptr_ptr));
+
+/* free all memory used by the read (old method - NOT DLL EXPORTED) */
+extern void png_read_destroy PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_infop end_info_ptr));
+
+/* free any memory associated with the png_struct and the png_info_structs */
+extern PNG_EXPORT(void,png_destroy_write_struct)
+   PNGARG((png_structpp png_ptr_ptr, png_infopp info_ptr_ptr));
+
+/* free any memory used in png_ptr struct (old method - NOT DLL EXPORTED) */
+extern void png_write_destroy PNGARG((png_structp png_ptr));
+
+/* set the libpng method of handling chunk CRC errors */
+extern PNG_EXPORT(void,png_set_crc_action) PNGARG((png_structp png_ptr,
+   int crit_action, int ancil_action));
+
+/* Values for png_set_crc_action() to say how to handle CRC errors in
+ * ancillary and critical chunks, and whether to use the data contained
+ * therein.  Note that it is impossible to "discard" data in a critical
+ * chunk.  For versions prior to 0.90, the action was always error/quit,
+ * whereas in version 0.90 and later, the action for CRC errors in ancillary
+ * chunks is warn/discard.  These values should NOT be changed.
+ *
+ *      value                       action:critical     action:ancillary
+ */
+#define PNG_CRC_DEFAULT       0  /* error/quit          warn/discard data */
+#define PNG_CRC_ERROR_QUIT    1  /* error/quit          error/quit        */
+#define PNG_CRC_WARN_DISCARD  2  /* (INVALID)           warn/discard data */
+#define PNG_CRC_WARN_USE      3  /* warn/use data       warn/use data     */
+#define PNG_CRC_QUIET_USE     4  /* quiet/use data      quiet/use data    */
+#define PNG_CRC_NO_CHANGE     5  /* use current value   use current value */
+
+/* These functions give the user control over the scan-line filtering in
+ * libpng and the compression methods used by zlib.  These functions are
+ * mainly useful for testing, as the defaults should work with most users.
+ * Those users who are tight on memory or want faster performance at the
+ * expense of compression can modify them.  See the compression library
+ * header file (zlib.h) for an explination of the compression functions.
+ */
+
+/* set the filtering method(s) used by libpng.  Currently, the only valid
+ * value for "method" is 0.
+ */
+extern PNG_EXPORT(void,png_set_filter) PNGARG((png_structp png_ptr, int method,
+   int filters));
+
+/* Flags for png_set_filter() to say which filters to use.  The flags
+ * are chosen so that they don't conflict with real filter types
+ * below, in case they are supplied instead of the #defined constants.
+ * These values should NOT be changed.
+ */
+#define PNG_NO_FILTERS     0x00
+#define PNG_FILTER_NONE    0x08
+#define PNG_FILTER_SUB     0x10
+#define PNG_FILTER_UP      0x20
+#define PNG_FILTER_AVG     0x40
+#define PNG_FILTER_PAETH   0x80
+#define PNG_ALL_FILTERS (PNG_FILTER_NONE | PNG_FILTER_SUB | PNG_FILTER_UP | \
+                         PNG_FILTER_AVG | PNG_FILTER_PAETH)
+
+/* Filter values (not flags) - used in pngwrite.c, pngwutil.c for now.
+ * These defines should NOT be changed.
+ */
+#define PNG_FILTER_VALUE_NONE  0
+#define PNG_FILTER_VALUE_SUB   1
+#define PNG_FILTER_VALUE_UP    2
+#define PNG_FILTER_VALUE_AVG   3
+#define PNG_FILTER_VALUE_PAETH 4
+#define PNG_FILTER_VALUE_LAST  5
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED) /* EXPERIMENTAL */
+/* The "heuristic_method" is given by one of the PNG_FILTER_HEURISTIC_
+ * defines, either the default (minimum-sum-of-absolute-differences), or
+ * the experimental method (weighted-minimum-sum-of-absolute-differences).
+ *
+ * Weights are factors >= 1.0, indicating how important it is to keep the
+ * filter type consistent between rows.  Larger numbers mean the current
+ * filter is that many times as likely to be the same as the "num_weights"
+ * previous filters.  This is cumulative for each previous row with a weight.
+ * There needs to be "num_weights" values in "filter_weights", or it can be
+ * NULL if the weights aren't being specified.  Weights have no influence on
+ * the selection of the first row filter.  Well chosen weights can (in theory)
+ * improve the compression for a given image.
+ *
+ * Costs are factors >= 1.0 indicating the relative decoding costs of a
+ * filter type.  Higher costs indicate more decoding expense, and are
+ * therefore less likely to be selected over a filter with lower computational
+ * costs.  There needs to be a value in "filter_costs" for each valid filter
+ * type (given by PNG_FILTER_VALUE_LAST), or it can be NULL if you aren't
+ * setting the costs.  Costs try to improve the speed of decompression without
+ * unduly increasing the compressed image size.
+ *
+ * A negative weight or cost indicates the default value is to be used, and
+ * values in the range [0.0, 1.0) indicate the value is to remain unchanged.
+ * The default values for both weights and costs are currently 1.0, but may
+ * change if good general weighting/cost heuristics can be found.  If both
+ * the weights and costs are set to 1.0, this degenerates the WEIGHTED method
+ * to the UNWEIGHTED method, but with added encoding time/computation.
+ */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_filter_heuristics) PNGARG((png_structp png_ptr,
+   int heuristic_method, int num_weights, png_doublep filter_weights,
+   png_doublep filter_costs));
+#endif
+#endif /*  PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
+
+/* Heuristic used for row filter selection.  These defines should NOT be
+ * changed.
+ */
+#define PNG_FILTER_HEURISTIC_DEFAULT    0  /* Currently "UNWEIGHTED" */
+#define PNG_FILTER_HEURISTIC_UNWEIGHTED 1  /* Used by libpng < 0.95 */
+#define PNG_FILTER_HEURISTIC_WEIGHTED   2  /* Experimental feature */
+#define PNG_FILTER_HEURISTIC_LAST       3  /* Not a valid value */
+
+/* Set the library compression level.  Currently, valid values range from
+ * 0 - 9, corresponding directly to the zlib compression levels 0 - 9
+ * (0 - no compression, 9 - "maximal" compression).  Note that tests have
+ * shown that zlib compression levels 3-6 usually perform as well as level 9
+ * for PNG images, and do considerably fewer caclulations.  In the future,
+ * these values may not correspond directly to the zlib compression levels.
+ */
+extern PNG_EXPORT(void,png_set_compression_level) PNGARG((png_structp png_ptr,
+   int level));
+
+extern PNG_EXPORT(void,png_set_compression_mem_level)
+   PNGARG((png_structp png_ptr, int mem_level));
+
+extern PNG_EXPORT(void,png_set_compression_strategy)
+   PNGARG((png_structp png_ptr, int strategy));
+
+extern PNG_EXPORT(void,png_set_compression_window_bits)
+   PNGARG((png_structp png_ptr, int window_bits));
+
+extern PNG_EXPORT(void,png_set_compression_method) PNGARG((png_structp png_ptr,
+   int method));
+
+/* These next functions are called for input/output, memory, and error
+ * handling.  They are in the file pngrio.c, pngwio.c, and pngerror.c,
+ * and call standard C I/O routines such as fread(), fwrite(), and
+ * fprintf().  These functions can be made to use other I/O routines
+ * at run time for those applications that need to handle I/O in a
+ * different manner by calling png_set_???_fn().  See libpng.txt for
+ * more information.
+ */
+
+#if !defined(PNG_NO_STDIO)
+/* Initialize the input/output for the PNG file to the default functions. */
+extern PNG_EXPORT(void,png_init_io) PNGARG((png_structp png_ptr, png_FILE_p fp));
+#endif
+
+/* Replace the (error and abort), and warning functions with user
+ * supplied functions.  If no messages are to be printed you must still
+ * write and use replacement functions. The replacement error_fn should
+ * still do a longjmp to the last setjmp location if you are using this
+ * method of error handling.  If error_fn or warning_fn is NULL, the
+ * default function will be used.
+ */
+
+extern PNG_EXPORT(void,png_set_error_fn) PNGARG((png_structp png_ptr,
+   png_voidp error_ptr, png_error_ptr error_fn, png_error_ptr warning_fn));
+
+/* Return the user pointer associated with the error functions */
+extern PNG_EXPORT(png_voidp,png_get_error_ptr) PNGARG((png_structp png_ptr));
+
+/* Replace the default data output functions with a user supplied one(s).
+ * If buffered output is not used, then output_flush_fn can be set to NULL.
+ * If PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile time
+ * output_flush_fn will be ignored (and thus can be NULL).
+ */
+extern PNG_EXPORT(void,png_set_write_fn) PNGARG((png_structp png_ptr,
+   png_voidp io_ptr, png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn));
+
+/* Replace the default data input function with a user supplied one. */
+extern PNG_EXPORT(void,png_set_read_fn) PNGARG((png_structp png_ptr,
+   png_voidp io_ptr, png_rw_ptr read_data_fn));
+
+/* Return the user pointer associated with the I/O functions */
+extern PNG_EXPORT(png_voidp,png_get_io_ptr) PNGARG((png_structp png_ptr));
+
+extern PNG_EXPORT(void,png_set_read_status_fn) PNGARG((png_structp png_ptr,
+   png_read_status_ptr read_row_fn));
+
+extern PNG_EXPORT(void,png_set_write_status_fn) PNGARG((png_structp png_ptr,
+   png_write_status_ptr write_row_fn));
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* Replace the default memory allocation functions with user supplied one(s). */
+extern PNG_EXPORT(void,png_set_mem_fn) PNGARG((png_structp png_ptr,
+   png_voidp mem_ptr, png_malloc_ptr malloc_fn, png_free_ptr free_fn));
+/* Return the user pointer associated with the memory functions */
+extern PNG_EXPORT(png_voidp,png_get_mem_ptr) PNGARG((png_structp png_ptr));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+extern PNG_EXPORT(void,png_set_read_user_transform_fn) PNGARG((png_structp
+   png_ptr, png_user_transform_ptr read_user_transform_fn));
+#endif
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+extern PNG_EXPORT(void,png_set_write_user_transform_fn) PNGARG((png_structp
+   png_ptr, png_user_transform_ptr write_user_transform_fn));
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+extern PNG_EXPORT(void,png_set_user_transform_info) PNGARG((png_structp
+   png_ptr, png_voidp user_transform_ptr, int user_transform_depth,
+   int user_transform_channels));
+/* Return the user pointer associated with the user transform functions */
+extern PNG_EXPORT(png_voidp,png_get_user_transform_ptr)
+   PNGARG((png_structp png_ptr));
+#endif
+
+#ifdef PNG_USER_CHUNKS_SUPPORTED
+extern PNG_EXPORT(void,png_set_read_user_chunk_fn) PNGARG((png_structp png_ptr,
+   png_voidp user_chunk_ptr, png_user_chunk_ptr read_user_chunk_fn));
+extern PNG_EXPORT(png_voidp,png_get_user_chunk_ptr) PNGARG((png_structp
+   png_ptr));
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+/* Sets the function callbacks for the push reader, and a pointer to a
+ * user-defined structure available to the callback functions.
+ */
+extern PNG_EXPORT(void,png_set_progressive_read_fn) PNGARG((png_structp png_ptr,
+   png_voidp progressive_ptr,
+   png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
+   png_progressive_end_ptr end_fn));
+
+/* returns the user pointer associated with the push read functions */
+extern PNG_EXPORT(png_voidp,png_get_progressive_ptr)
+   PNGARG((png_structp png_ptr));
+
+/* function to be called when data becomes available */
+extern PNG_EXPORT(void,png_process_data) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep buffer, png_size_t buffer_size));
+
+/* function that combines rows.  Not very much different than the
+ * png_combine_row() call.  Is this even used?????
+ */
+extern PNG_EXPORT(void,png_progressive_combine_row) PNGARG((png_structp png_ptr,
+   png_bytep old_row, png_bytep new_row));
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+extern PNG_EXPORT(png_voidp,png_malloc) PNGARG((png_structp png_ptr,
+   png_uint_32 size));
+
+#if defined(PNG_1_0_X)
+#  define png_malloc_warn png_malloc
+#else
+/* Added at libpng version 1.2.4 */
+extern PNG_EXPORT(png_voidp,png_malloc_warn) PNGARG((png_structp png_ptr,
+   png_uint_32 size));
+#endif
+
+/* frees a pointer allocated by png_malloc() */
+extern PNG_EXPORT(void,png_free) PNGARG((png_structp png_ptr, png_voidp ptr));
+
+#if defined(PNG_1_0_X)
+/* Function to allocate memory for zlib. */
+extern PNG_EXPORT(voidpf,png_zalloc) PNGARG((voidpf png_ptr, uInt items,
+   uInt size));
+
+/* Function to free memory for zlib */
+extern PNG_EXPORT(void,png_zfree) PNGARG((voidpf png_ptr, voidpf ptr));
+#endif
+
+/* Free data that was allocated internally */
+extern PNG_EXPORT(void,png_free_data) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 free_me, int num));
+#ifdef PNG_FREE_ME_SUPPORTED
+/* Reassign responsibility for freeing existing data, whether allocated
+ * by libpng or by the application */
+extern PNG_EXPORT(void,png_data_freer) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int freer, png_uint_32 mask));
+#endif
+/* assignments for png_data_freer */
+#define PNG_DESTROY_WILL_FREE_DATA 1
+#define PNG_SET_WILL_FREE_DATA 1
+#define PNG_USER_WILL_FREE_DATA 2
+/* Flags for png_ptr->free_me and info_ptr->free_me */
+#define PNG_FREE_HIST 0x0008
+#define PNG_FREE_ICCP 0x0010
+#define PNG_FREE_SPLT 0x0020
+#define PNG_FREE_ROWS 0x0040
+#define PNG_FREE_PCAL 0x0080
+#define PNG_FREE_SCAL 0x0100
+#define PNG_FREE_UNKN 0x0200
+#define PNG_FREE_LIST 0x0400
+#define PNG_FREE_PLTE 0x1000
+#define PNG_FREE_TRNS 0x2000
+#define PNG_FREE_TEXT 0x4000
+#define PNG_FREE_ALL  0x7fff
+#define PNG_FREE_MUL  0x4220 /* PNG_FREE_SPLT|PNG_FREE_TEXT|PNG_FREE_UNKN */
+
+#ifdef PNG_USER_MEM_SUPPORTED
+extern PNG_EXPORT(png_voidp,png_malloc_default) PNGARG((png_structp png_ptr,
+   png_uint_32 size));
+extern PNG_EXPORT(void,png_free_default) PNGARG((png_structp png_ptr,
+   png_voidp ptr));
+#endif
+
+extern PNG_EXPORT(png_voidp,png_memcpy_check) PNGARG((png_structp png_ptr,
+   png_voidp s1, png_voidp s2, png_uint_32 size));
+
+extern PNG_EXPORT(png_voidp,png_memset_check) PNGARG((png_structp png_ptr,
+   png_voidp s1, int value, png_uint_32 size));
+
+#if defined(USE_FAR_KEYWORD)  /* memory model conversion function */
+extern void *png_far_to_near PNGARG((png_structp png_ptr,png_voidp ptr,
+   int check));
+#endif /* USE_FAR_KEYWORD */
+
+#ifndef PNG_NO_ERROR_TEXT
+/* Fatal error in PNG image of libpng - can't continue */
+extern PNG_EXPORT(void,png_error) PNGARG((png_structp png_ptr,
+   png_const_charp error_message));
+
+/* The same, but the chunk name is prepended to the error string. */
+extern PNG_EXPORT(void,png_chunk_error) PNGARG((png_structp png_ptr,
+   png_const_charp error_message));
+#else
+/* Fatal error in PNG image of libpng - can't continue */
+extern PNG_EXPORT(void,png_err) PNGARG((png_structp png_ptr));
+#endif
+
+#ifndef PNG_NO_WARNINGS
+/* Non-fatal error in libpng.  Can continue, but may have a problem. */
+extern PNG_EXPORT(void,png_warning) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+
+#ifdef PNG_READ_SUPPORTED
+/* Non-fatal error in libpng, chunk name is prepended to message. */
+extern PNG_EXPORT(void,png_chunk_warning) PNGARG((png_structp png_ptr,
+   png_const_charp warning_message));
+#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_NO_WARNINGS */
+
+/* The png_set_<chunk> functions are for storing values in the png_info_struct.
+ * Similarly, the png_get_<chunk> calls are used to read values from the
+ * png_info_struct, either storing the parameters in the passed variables, or
+ * setting pointers into the png_info_struct where the data is stored.  The
+ * png_get_<chunk> functions return a non-zero value if the data was available
+ * in info_ptr, or return zero and do not change any of the parameters if the
+ * data was not available.
+ *
+ * These functions should be used instead of directly accessing png_info
+ * to avoid problems with future changes in the size and internal layout of
+ * png_info_struct.
+ */
+/* Returns "flag" if chunk data is valid in info_ptr. */
+extern PNG_EXPORT(png_uint_32,png_get_valid) PNGARG((png_structp png_ptr,
+png_infop info_ptr, png_uint_32 flag));
+
+/* Returns number of bytes needed to hold a transformed row. */
+extern PNG_EXPORT(png_uint_32,png_get_rowbytes) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+/* Returns row_pointers, which is an array of pointers to scanlines that was
+returned from png_read_png(). */
+extern PNG_EXPORT(png_bytepp,png_get_rows) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+/* Set row_pointers, which is an array of pointers to scanlines for use
+by png_write_png(). */
+extern PNG_EXPORT(void,png_set_rows) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytepp row_pointers));
+#endif
+
+/* Returns number of color channels in image. */
+extern PNG_EXPORT(png_byte,png_get_channels) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* Returns image width in pixels. */
+extern PNG_EXPORT(png_uint_32, png_get_image_width) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image height in pixels. */
+extern PNG_EXPORT(png_uint_32, png_get_image_height) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image bit_depth. */
+extern PNG_EXPORT(png_byte, png_get_bit_depth) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image color_type. */
+extern PNG_EXPORT(png_byte, png_get_color_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image filter_type. */
+extern PNG_EXPORT(png_byte, png_get_filter_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image interlace_type. */
+extern PNG_EXPORT(png_byte, png_get_interlace_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image compression_type. */
+extern PNG_EXPORT(png_byte, png_get_compression_type) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns image resolution in pixels per meter, from pHYs chunk data. */
+extern PNG_EXPORT(png_uint_32, png_get_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_uint_32, png_get_x_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_uint_32, png_get_y_pixels_per_meter) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+/* Returns pixel aspect ratio, computed from pHYs chunk data.  */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(float, png_get_pixel_aspect_ratio) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+#endif
+
+/* Returns image x, y offset in pixels or microns, from oFFs chunk data. */
+extern PNG_EXPORT(png_int_32, png_get_x_offset_pixels) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_y_offset_pixels) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_x_offset_microns) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+extern PNG_EXPORT(png_int_32, png_get_y_offset_microns) PNGARG((png_structp
+png_ptr, png_infop info_ptr));
+
+#endif /* PNG_EASY_ACCESS_SUPPORTED */
+
+/* Returns pointer to signature string read from PNG header */
+extern PNG_EXPORT(png_bytep,png_get_signature) PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#if defined(PNG_bKGD_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_bKGD) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_16p *background));
+#endif
+
+#if defined(PNG_bKGD_SUPPORTED)
+extern PNG_EXPORT(void,png_set_bKGD) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_16p background));
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double *white_x, double *white_y, double *red_x,
+   double *red_y, double *green_x, double *green_y, double *blue_x,
+   double *blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_white_x, png_fixed_point
+   *int_white_y, png_fixed_point *int_red_x, png_fixed_point *int_red_y,
+   png_fixed_point *int_green_x, png_fixed_point *int_green_y, png_fixed_point
+   *int_blue_x, png_fixed_point *int_blue_y));
+#endif
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double white_x, double white_y, double red_x,
+   double red_y, double green_x, double green_y, double blue_x, double blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_cHRM_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_gAMA) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double *file_gamma));
+#endif
+extern PNG_EXPORT(png_uint_32,png_get_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point *int_file_gamma));
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_gAMA) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, double file_gamma));
+#endif
+extern PNG_EXPORT(void,png_set_gAMA_fixed) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_fixed_point int_file_gamma));
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_16p *hist));
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+extern PNG_EXPORT(void,png_set_hIST) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_16p hist));
+#endif
+
+extern PNG_EXPORT(png_uint_32,png_get_IHDR) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 *width, png_uint_32 *height,
+   int *bit_depth, int *color_type, int *interlace_method,
+   int *compression_method, int *filter_method));
+
+extern PNG_EXPORT(void,png_set_IHDR) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth,
+   int color_type, int interlace_method, int compression_method,
+   int filter_method));
+
+#if defined(PNG_oFFs_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_oFFs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_int_32 *offset_x, png_int_32 *offset_y,
+   int *unit_type));
+#endif
+
+#if defined(PNG_oFFs_SUPPORTED)
+extern PNG_EXPORT(void,png_set_oFFs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_int_32 offset_x, png_int_32 offset_y,
+   int unit_type));
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_pCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp *purpose, png_int_32 *X0, png_int_32 *X1,
+   int *type, int *nparams, png_charp *units, png_charpp *params));
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+extern PNG_EXPORT(void,png_set_pCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp purpose, png_int_32 X0, png_int_32 X1,
+   int type, int nparams, png_charp units, png_charpp params));
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_pHYs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+extern PNG_EXPORT(void,png_set_pHYs) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 res_x, png_uint_32 res_y, int unit_type));
+#endif
+
+extern PNG_EXPORT(png_uint_32,png_get_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_colorp *palette, int *num_palette));
+
+extern PNG_EXPORT(void,png_set_PLTE) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_colorp palette, int num_palette));
+
+#if defined(PNG_sBIT_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_sBIT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_8p *sig_bit));
+#endif
+
+#if defined(PNG_sBIT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_sBIT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_color_8p sig_bit));
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_sRGB) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *intent));
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+extern PNG_EXPORT(void,png_set_sRGB) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int intent));
+extern PNG_EXPORT(void,png_set_sRGB_gAMA_and_cHRM) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int intent));
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charpp name, int *compression_type,
+   png_charpp profile, png_uint_32 *proflen));
+   /* Note to maintainer: profile should be png_bytepp */
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+extern PNG_EXPORT(void,png_set_iCCP) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_charp name, int compression_type,
+   png_charp profile, png_uint_32 proflen));
+   /* Note to maintainer: profile should be png_bytep */
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_sPLT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_sPLT_tpp entries));
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_sPLT) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_sPLT_tp entries, int nentries));
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+/* png_get_text also returns the number of text chunks in *num_text */
+extern PNG_EXPORT(png_uint_32,png_get_text) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp *text_ptr, int *num_text));
+#endif
+
+/*
+ *  Note while png_set_text() will accept a structure whose text,
+ *  language, and  translated keywords are NULL pointers, the structure
+ *  returned by png_get_text will always contain regular
+ *  zero-terminated C strings.  They might be empty strings but
+ *  they will never be NULL pointers.
+ */
+
+#if defined(PNG_TEXT_SUPPORTED)
+extern PNG_EXPORT(void,png_set_text) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp text_ptr, int num_text));
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_tIME) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_timep *mod_time));
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+extern PNG_EXPORT(void,png_set_tIME) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_timep mod_time));
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+extern PNG_EXPORT(png_uint_32,png_get_tRNS) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep *trans, int *num_trans,
+   png_color_16p *trans_values));
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+extern PNG_EXPORT(void,png_set_tRNS) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_bytep trans, int num_trans,
+   png_color_16p trans_values));
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, double *width, double *height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_get_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int *unit, png_charpp swidth, png_charpp sheight));
+#endif
+#endif
+#endif /* PNG_sCAL_SUPPORTED */
+
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, double width, double height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+extern PNG_EXPORT(void,png_set_sCAL_s) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int unit, png_charp swidth, png_charp sheight));
+#endif
+#endif
+#endif /* PNG_sCAL_SUPPORTED || PNG_WRITE_sCAL_SUPPORTED */
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+/* provide a list of chunks and how they are to be handled, if the built-in
+   handling or default unknown chunk handling is not desired.  Any chunks not
+   listed will be handled in the default manner.  The IHDR and IEND chunks
+   must not be listed.
+      keep = 0: follow default behaviour
+           = 1: do not keep
+           = 2: keep only if safe-to-copy
+           = 3: keep even if unsafe-to-copy
+*/
+extern PNG_EXPORT(void, png_set_keep_unknown_chunks) PNGARG((png_structp
+   png_ptr, int keep, png_bytep chunk_list, int num_chunks));
+extern PNG_EXPORT(void, png_set_unknown_chunks) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns));
+extern PNG_EXPORT(void, png_set_unknown_chunk_location)
+   PNGARG((png_structp png_ptr, png_infop info_ptr, int chunk, int location));
+extern PNG_EXPORT(png_uint_32,png_get_unknown_chunks) PNGARG((png_structp
+   png_ptr, png_infop info_ptr, png_unknown_chunkpp entries));
+#endif
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+PNG_EXPORT(int,png_handle_as_unknown) PNGARG((png_structp png_ptr, png_bytep
+   chunk_name));
+#endif
+
+/* Png_free_data() will turn off the "valid" flag for anything it frees.
+   If you need to turn it off for a chunk that your application has freed,
+   you can use png_set_invalid(png_ptr, info_ptr, PNG_INFO_CHNK); */
+extern PNG_EXPORT(void, png_set_invalid) PNGARG((png_structp png_ptr,
+   png_infop info_ptr, int mask));
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+/* The "params" pointer is currently not used and is for future expansion. */
+extern PNG_EXPORT(void, png_read_png) PNGARG((png_structp png_ptr,
+                        png_infop info_ptr,
+                        int transforms,
+                        png_voidp params));
+extern PNG_EXPORT(void, png_write_png) PNGARG((png_structp png_ptr,
+                        png_infop info_ptr,
+                        int transforms,
+                        png_voidp params));
+#endif
+
+/* Define PNG_DEBUG at compile time for debugging information.  Higher
+ * numbers for PNG_DEBUG mean more debugging information.  This has
+ * only been added since version 0.95 so it is not implemented throughout
+ * libpng yet, but more support will be added as needed.
+ */
+#ifdef PNG_DEBUG
+#if (PNG_DEBUG > 0)
+#if !defined(PNG_DEBUG_FILE) && defined(_MSC_VER)
+#include <crtdbg.h>
+#if (PNG_DEBUG > 1)
+#define png_debug(l,m)  _RPT0(_CRT_WARN,m)
+#define png_debug1(l,m,p1)  _RPT1(_CRT_WARN,m,p1)
+#define png_debug2(l,m,p1,p2) _RPT2(_CRT_WARN,m,p1,p2)
+#endif
+#else /* PNG_DEBUG_FILE || !_MSC_VER */
+#ifndef PNG_DEBUG_FILE
+#define PNG_DEBUG_FILE stderr
+#endif /* PNG_DEBUG_FILE */
+#if (PNG_DEBUG > 1)
+#define png_debug(l,m) \
+{ \
+     int num_tabs=l; \
+     fprintf(PNG_DEBUG_FILE,"%s"m,(num_tabs==1 ? "\t" : \
+       (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":"")))); \
+}
+#define png_debug1(l,m,p1) \
+{ \
+     int num_tabs=l; \
+     fprintf(PNG_DEBUG_FILE,"%s"m,(num_tabs==1 ? "\t" : \
+       (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1); \
+}
+#define png_debug2(l,m,p1,p2) \
+{ \
+     int num_tabs=l; \
+     fprintf(PNG_DEBUG_FILE,"%s"m,(num_tabs==1 ? "\t" : \
+       (num_tabs==2 ? "\t\t":(num_tabs>2 ? "\t\t\t":""))),p1,p2); \
+}
+#endif /* (PNG_DEBUG > 1) */
+#endif /* _MSC_VER */
+#endif /* (PNG_DEBUG > 0) */
+#endif /* PNG_DEBUG */
+#ifndef png_debug
+#define png_debug(l, m)
+#endif
+#ifndef png_debug1
+#define png_debug1(l, m, p1)
+#endif
+#ifndef png_debug2
+#define png_debug2(l, m, p1, p2)
+#endif
+
+extern PNG_EXPORT(png_charp,png_get_copyright) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_header_ver) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_header_version) PNGARG((png_structp png_ptr));
+extern PNG_EXPORT(png_charp,png_get_libpng_ver) PNGARG((png_structp png_ptr));
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+extern PNG_EXPORT(png_uint_32,png_permit_mng_features) PNGARG((png_structp
+   png_ptr, png_uint_32 mng_features_permitted));
+#endif
+
+/* For use in png_set_keep_unknown, added to version 1.2.6 */
+#define PNG_HANDLE_CHUNK_AS_DEFAULT   0
+#define PNG_HANDLE_CHUNK_NEVER        1
+#define PNG_HANDLE_CHUNK_IF_SAFE      2
+#define PNG_HANDLE_CHUNK_ALWAYS       3
+
+/* Added to version 1.2.0 */
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#if defined(PNG_MMX_CODE_SUPPORTED)
+#define PNG_ASM_FLAG_MMX_SUPPORT_COMPILED  0x01  /* not user-settable */
+#define PNG_ASM_FLAG_MMX_SUPPORT_IN_CPU    0x02  /* not user-settable */
+#define PNG_ASM_FLAG_MMX_READ_COMBINE_ROW  0x04
+#define PNG_ASM_FLAG_MMX_READ_INTERLACE    0x08
+#define PNG_ASM_FLAG_MMX_READ_FILTER_SUB   0x10
+#define PNG_ASM_FLAG_MMX_READ_FILTER_UP    0x20
+#define PNG_ASM_FLAG_MMX_READ_FILTER_AVG   0x40
+#define PNG_ASM_FLAG_MMX_READ_FILTER_PAETH 0x80
+#define PNG_ASM_FLAGS_INITIALIZED          0x80000000  /* not user-settable */
+
+#define PNG_MMX_READ_FLAGS ( PNG_ASM_FLAG_MMX_READ_COMBINE_ROW  \
+                           | PNG_ASM_FLAG_MMX_READ_INTERLACE    \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_SUB   \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_UP    \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_AVG   \
+                           | PNG_ASM_FLAG_MMX_READ_FILTER_PAETH )
+#define PNG_MMX_WRITE_FLAGS ( 0 )
+
+#define PNG_MMX_FLAGS ( PNG_ASM_FLAG_MMX_SUPPORT_COMPILED \
+                      | PNG_ASM_FLAG_MMX_SUPPORT_IN_CPU   \
+                      | PNG_MMX_READ_FLAGS                \
+                      | PNG_MMX_WRITE_FLAGS )
+
+#define PNG_SELECT_READ   1
+#define PNG_SELECT_WRITE  2
+#endif /* PNG_MMX_CODE_SUPPORTED */
+
+#if !defined(PNG_1_0_X)
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_mmx_flagmask)
+   PNGARG((int flag_select, int *compilerID));
+
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_asm_flagmask)
+   PNGARG((int flag_select));
+
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_asm_flags)
+   PNGARG((png_structp png_ptr));
+
+/* pngget.c */
+extern PNG_EXPORT(png_byte,png_get_mmx_bitdepth_threshold)
+   PNGARG((png_structp png_ptr));
+
+/* pngget.c */
+extern PNG_EXPORT(png_uint_32,png_get_mmx_rowbytes_threshold)
+   PNGARG((png_structp png_ptr));
+
+/* pngset.c */
+extern PNG_EXPORT(void,png_set_asm_flags)
+   PNGARG((png_structp png_ptr, png_uint_32 asm_flags));
+
+/* pngset.c */
+extern PNG_EXPORT(void,png_set_mmx_thresholds)
+   PNGARG((png_structp png_ptr, png_byte mmx_bitdepth_threshold,
+   png_uint_32 mmx_rowbytes_threshold));
+
+#endif /* PNG_1_0_X */
+
+#if !defined(PNG_1_0_X)
+/* png.c, pnggccrd.c, or pngvcrd.c */
+extern PNG_EXPORT(int,png_mmx_support) PNGARG((void));
+#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
+
+/* Strip the prepended error numbers ("#nnn ") from error and warning
+ * messages before passing them to the error or warning handler. */
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+extern PNG_EXPORT(void,png_set_strip_error_numbers) PNGARG((png_structp
+   png_ptr, png_uint_32 strip_mode));
+#endif
+
+#endif /* PNG_1_0_X */
+
+/* Added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+extern PNG_EXPORT(void,png_set_user_limits) PNGARG((png_structp
+   png_ptr, png_uint_32 user_width_max, png_uint_32 user_height_max));
+extern PNG_EXPORT(png_uint_32,png_get_user_width_max) PNGARG((png_structp
+   png_ptr));
+extern PNG_EXPORT(png_uint_32,png_get_user_height_max) PNGARG((png_structp
+   png_ptr));
+#endif
+
+/* Maintainer: Put new public prototypes here ^, in libpng.3, and project defs */
+
+#ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED
+/* With these routines we avoid an integer divide, which will be slower on
+ * most machines.  However, it does take more operations than the corresponding
+ * divide method, so it may be slower on a few RISC systems.  There are two
+ * shifts (by 8 or 16 bits) and an addition, versus a single integer divide.
+ *
+ * Note that the rounding factors are NOT supposed to be the same!  128 and
+ * 32768 are correct for the NODIV code; 127 and 32767 are correct for the
+ * standard method.
+ *
+ * [Optimized code by Greg Roelofs and Mark Adler...blame us for bugs. :-) ]
+ */
+
+ /* fg and bg should be in `gamma 1.0' space; alpha is the opacity          */
+
+#  define png_composite(composite, fg, alpha, bg)                            \
+     { png_uint_16 temp = (png_uint_16)((png_uint_16)(fg) * (png_uint_16)(alpha) \
+                        +        (png_uint_16)(bg)*(png_uint_16)(255 -       \
+                        (png_uint_16)(alpha)) + (png_uint_16)128);           \
+       (composite) = (png_byte)((temp + (temp >> 8)) >> 8); }
+
+#  define png_composite_16(composite, fg, alpha, bg)                         \
+     { png_uint_32 temp = (png_uint_32)((png_uint_32)(fg) * (png_uint_32)(alpha) \
+                        + (png_uint_32)(bg)*(png_uint_32)(65535L -           \
+                        (png_uint_32)(alpha)) + (png_uint_32)32768L);        \
+       (composite) = (png_uint_16)((temp + (temp >> 16)) >> 16); }
+
+#else  /* standard method using integer division */
+
+#  define png_composite(composite, fg, alpha, bg)                            \
+     (composite) = (png_byte)(((png_uint_16)(fg) * (png_uint_16)(alpha) +    \
+       (png_uint_16)(bg) * (png_uint_16)(255 - (png_uint_16)(alpha)) +       \
+       (png_uint_16)127) / 255)
+
+#  define png_composite_16(composite, fg, alpha, bg)                         \
+     (composite) = (png_uint_16)(((png_uint_32)(fg) * (png_uint_32)(alpha) + \
+       (png_uint_32)(bg)*(png_uint_32)(65535L - (png_uint_32)(alpha)) +      \
+       (png_uint_32)32767) / (png_uint_32)65535L)
+
+#endif /* PNG_READ_COMPOSITE_NODIV_SUPPORTED */
+
+/* Inline macros to do direct reads of bytes from the input buffer.  These
+ * require that you are using an architecture that uses PNG byte ordering
+ * (MSB first) and supports unaligned data storage.  I think that PowerPC
+ * in big-endian mode and 680x0 are the only ones that will support this.
+ * The x86 line of processors definitely do not.  The png_get_int_32()
+ * routine also assumes we are using two's complement format for negative
+ * values, which is almost certainly true.
+ */
+#if defined(PNG_READ_BIG_ENDIAN_SUPPORTED)
+#  define png_get_uint_32(buf) ( *((png_uint_32p) (buf)))
+#  define png_get_uint_16(buf) ( *((png_uint_16p) (buf)))
+#  define png_get_int_32(buf)  ( *((png_int_32p)  (buf)))
+#else
+extern PNG_EXPORT(png_uint_32,png_get_uint_32) PNGARG((png_bytep buf));
+extern PNG_EXPORT(png_uint_16,png_get_uint_16) PNGARG((png_bytep buf));
+extern PNG_EXPORT(png_int_32,png_get_int_32) PNGARG((png_bytep buf));
+#endif /* !PNG_READ_BIG_ENDIAN_SUPPORTED */
+extern PNG_EXPORT(png_uint_32,png_get_uint_31)
+  PNGARG((png_structp png_ptr, png_bytep buf));
+/* No png_get_int_16 -- may be added if there's a real need for it. */
+
+/* Place a 32-bit number into a buffer in PNG byte order (big-endian).
+ */
+extern PNG_EXPORT(void,png_save_uint_32)
+   PNGARG((png_bytep buf, png_uint_32 i));
+extern PNG_EXPORT(void,png_save_int_32)
+   PNGARG((png_bytep buf, png_int_32 i));
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+extern PNG_EXPORT(void,png_save_uint_16)
+   PNGARG((png_bytep buf, unsigned int i));
+/* No png_save_int_16 -- may be added if there's a real need for it. */
+
+/* ************************************************************************* */
+
+/* These next functions are used internally in the code.  They generally
+ * shouldn't be used unless you are writing code to add or replace some
+ * functionality in libpng.  More information about most functions can
+ * be found in the files where the functions are located.
+ */
+
+
+/* Various modes of operation, that are visible to applications because
+ * they are used for unknown chunk location.
+ */
+#define PNG_HAVE_IHDR               0x01
+#define PNG_HAVE_PLTE               0x02
+#define PNG_HAVE_IDAT               0x04
+#define PNG_AFTER_IDAT              0x08 /* Have complete zlib datastream */
+#define PNG_HAVE_IEND               0x10
+
+#if defined(PNG_INTERNAL)
+
+/* More modes of operation.  Note that after an init, mode is set to
+ * zero automatically when the structure is created.
+ */
+#define PNG_HAVE_gAMA               0x20
+#define PNG_HAVE_cHRM               0x40
+#define PNG_HAVE_sRGB               0x80
+#define PNG_HAVE_CHUNK_HEADER      0x100
+#define PNG_WROTE_tIME             0x200
+#define PNG_WROTE_INFO_BEFORE_PLTE 0x400
+#define PNG_BACKGROUND_IS_GRAY     0x800
+#define PNG_HAVE_PNG_SIGNATURE    0x1000
+#define PNG_HAVE_CHUNK_AFTER_IDAT 0x2000 /* Have another chunk after IDAT */
+
+/* flags for the transformations the PNG library does on the image data */
+#define PNG_BGR                0x0001
+#define PNG_INTERLACE          0x0002
+#define PNG_PACK               0x0004
+#define PNG_SHIFT              0x0008
+#define PNG_SWAP_BYTES         0x0010
+#define PNG_INVERT_MONO        0x0020
+#define PNG_DITHER             0x0040
+#define PNG_BACKGROUND         0x0080
+#define PNG_BACKGROUND_EXPAND  0x0100
+                          /*   0x0200 unused */
+#define PNG_16_TO_8            0x0400
+#define PNG_RGBA               0x0800
+#define PNG_EXPAND             0x1000
+#define PNG_GAMMA              0x2000
+#define PNG_GRAY_TO_RGB        0x4000
+#define PNG_FILLER             0x8000L
+#define PNG_PACKSWAP          0x10000L
+#define PNG_SWAP_ALPHA        0x20000L
+#define PNG_STRIP_ALPHA       0x40000L
+#define PNG_INVERT_ALPHA      0x80000L
+#define PNG_USER_TRANSFORM   0x100000L
+#define PNG_RGB_TO_GRAY_ERR  0x200000L
+#define PNG_RGB_TO_GRAY_WARN 0x400000L
+#define PNG_RGB_TO_GRAY      0x600000L  /* two bits, RGB_TO_GRAY_ERR|WARN */
+                       /*    0x800000L     Unused */
+#define PNG_ADD_ALPHA       0x1000000L  /* Added to libpng-1.2.7 */
+#define PNG_EXPAND_tRNS     0x2000000L  /* Added to libpng-1.2.9 */
+                       /*   0x4000000L  unused */
+                       /*   0x8000000L  unused */
+                       /*  0x10000000L  unused */
+                       /*  0x20000000L  unused */
+                       /*  0x40000000L  unused */
+
+/* flags for png_create_struct */
+#define PNG_STRUCT_PNG   0x0001
+#define PNG_STRUCT_INFO  0x0002
+
+/* Scaling factor for filter heuristic weighting calculations */
+#define PNG_WEIGHT_SHIFT 8
+#define PNG_WEIGHT_FACTOR (1<<(PNG_WEIGHT_SHIFT))
+#define PNG_COST_SHIFT 3
+#define PNG_COST_FACTOR (1<<(PNG_COST_SHIFT))
+
+/* flags for the png_ptr->flags rather than declaring a byte for each one */
+#define PNG_FLAG_ZLIB_CUSTOM_STRATEGY     0x0001
+#define PNG_FLAG_ZLIB_CUSTOM_LEVEL        0x0002
+#define PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL    0x0004
+#define PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS  0x0008
+#define PNG_FLAG_ZLIB_CUSTOM_METHOD       0x0010
+#define PNG_FLAG_ZLIB_FINISHED            0x0020
+#define PNG_FLAG_ROW_INIT                 0x0040
+#define PNG_FLAG_FILLER_AFTER             0x0080
+#define PNG_FLAG_CRC_ANCILLARY_USE        0x0100
+#define PNG_FLAG_CRC_ANCILLARY_NOWARN     0x0200
+#define PNG_FLAG_CRC_CRITICAL_USE         0x0400
+#define PNG_FLAG_CRC_CRITICAL_IGNORE      0x0800
+#define PNG_FLAG_FREE_PLTE                0x1000
+#define PNG_FLAG_FREE_TRNS                0x2000
+#define PNG_FLAG_FREE_HIST                0x4000
+#define PNG_FLAG_KEEP_UNKNOWN_CHUNKS      0x8000L
+#define PNG_FLAG_KEEP_UNSAFE_CHUNKS       0x10000L
+#define PNG_FLAG_LIBRARY_MISMATCH         0x20000L
+#define PNG_FLAG_STRIP_ERROR_NUMBERS      0x40000L
+#define PNG_FLAG_STRIP_ERROR_TEXT         0x80000L
+#define PNG_FLAG_MALLOC_NULL_MEM_OK       0x100000L
+#define PNG_FLAG_ADD_ALPHA                0x200000L  /* Added to libpng-1.2.8 */
+#define PNG_FLAG_STRIP_ALPHA              0x400000L  /* Added to libpng-1.2.8 */
+                                  /*      0x800000L  unused */
+                                  /*     0x1000000L  unused */
+                                  /*     0x2000000L  unused */
+                                  /*     0x4000000L  unused */
+                                  /*     0x8000000L  unused */
+                                  /*    0x10000000L  unused */
+                                  /*    0x20000000L  unused */
+                                  /*    0x40000000L  unused */
+
+#define PNG_FLAG_CRC_ANCILLARY_MASK (PNG_FLAG_CRC_ANCILLARY_USE | \
+                                     PNG_FLAG_CRC_ANCILLARY_NOWARN)
+
+#define PNG_FLAG_CRC_CRITICAL_MASK  (PNG_FLAG_CRC_CRITICAL_USE | \
+                                     PNG_FLAG_CRC_CRITICAL_IGNORE)
+
+#define PNG_FLAG_CRC_MASK           (PNG_FLAG_CRC_ANCILLARY_MASK | \
+                                     PNG_FLAG_CRC_CRITICAL_MASK)
+
+/* save typing and make code easier to understand */
+
+#define PNG_COLOR_DIST(c1, c2) (abs((int)((c1).red) - (int)((c2).red)) + \
+   abs((int)((c1).green) - (int)((c2).green)) + \
+   abs((int)((c1).blue) - (int)((c2).blue)))
+
+/* Added to libpng-1.2.6 JB */
+#define PNG_ROWBYTES(pixel_bits, width) \
+    ((pixel_bits) >= 8 ? \
+    ((width) * (((png_uint_32)(pixel_bits)) >> 3)) : \
+    (( ((width) * ((png_uint_32)(pixel_bits))) + 7) >> 3) )
+
+/* PNG_OUT_OF_RANGE returns true if value is outside the range
+   ideal-delta..ideal+delta.  Each argument is evaluated twice.
+   "ideal" and "delta" should be constants, normally simple
+   integers, "value" a variable. Added to libpng-1.2.6 JB */
+#define PNG_OUT_OF_RANGE(value, ideal, delta) \
+        ( (value) < (ideal)-(delta) || (value) > (ideal)+(delta) )
+
+/* variables declared in png.c - only it needs to define PNG_NO_EXTERN */
+#if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN)
+/* place to hold the signature string for a PNG file. */
+#ifdef PNG_USE_GLOBAL_ARRAYS
+   PNG_EXPORT_VAR (PNG_CONST png_byte FARDATA) png_sig[8];
+#else
+#endif
+#endif /* PNG_NO_EXTERN */
+
+/* Constant strings for known chunk types.  If you need to add a chunk,
+ * define the name here, and add an invocation of the macro in png.c and
+ * wherever it's needed.
+ */
+#define PNG_IHDR png_byte png_IHDR[5] = { 73,  72,  68,  82, '\0'}
+#define PNG_IDAT png_byte png_IDAT[5] = { 73,  68,  65,  84, '\0'}
+#define PNG_IEND png_byte png_IEND[5] = { 73,  69,  78,  68, '\0'}
+#define PNG_PLTE png_byte png_PLTE[5] = { 80,  76,  84,  69, '\0'}
+#define PNG_bKGD png_byte png_bKGD[5] = { 98,  75,  71,  68, '\0'}
+#define PNG_cHRM png_byte png_cHRM[5] = { 99,  72,  82,  77, '\0'}
+#define PNG_gAMA png_byte png_gAMA[5] = {103,  65,  77,  65, '\0'}
+#define PNG_hIST png_byte png_hIST[5] = {104,  73,  83,  84, '\0'}
+#define PNG_iCCP png_byte png_iCCP[5] = {105,  67,  67,  80, '\0'}
+#define PNG_iTXt png_byte png_iTXt[5] = {105,  84,  88, 116, '\0'}
+#define PNG_oFFs png_byte png_oFFs[5] = {111,  70,  70, 115, '\0'}
+#define PNG_pCAL png_byte png_pCAL[5] = {112,  67,  65,  76, '\0'}
+#define PNG_sCAL png_byte png_sCAL[5] = {115,  67,  65,  76, '\0'}
+#define PNG_pHYs png_byte png_pHYs[5] = {112,  72,  89, 115, '\0'}
+#define PNG_sBIT png_byte png_sBIT[5] = {115,  66,  73,  84, '\0'}
+#define PNG_sPLT png_byte png_sPLT[5] = {115,  80,  76,  84, '\0'}
+#define PNG_sRGB png_byte png_sRGB[5] = {115,  82,  71,  66, '\0'}
+#define PNG_tEXt png_byte png_tEXt[5] = {116,  69,  88, 116, '\0'}
+#define PNG_tIME png_byte png_tIME[5] = {116,  73,  77,  69, '\0'}
+#define PNG_tRNS png_byte png_tRNS[5] = {116,  82,  78,  83, '\0'}
+#define PNG_zTXt png_byte png_zTXt[5] = {122,  84,  88, 116, '\0'}
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+PNG_EXPORT_VAR (png_byte FARDATA) png_IHDR[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IDAT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_IEND[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_PLTE[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_bKGD[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_cHRM[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_gAMA[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_hIST[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_iCCP[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_iTXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_oFFs[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_pCAL[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sCAL[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_pHYs[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sBIT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sPLT[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_sRGB[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tEXt[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tIME[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_tRNS[5];
+PNG_EXPORT_VAR (png_byte FARDATA) png_zTXt[5];
+#endif /* PNG_USE_GLOBAL_ARRAYS */
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Initialize png_ptr struct for reading, and allocate any other memory.
+ * (old interface - DEPRECATED - use png_create_read_struct instead).
+ */
+extern PNG_EXPORT(void,png_read_init) PNGARG((png_structp png_ptr));
+#undef png_read_init
+#define png_read_init(png_ptr) png_read_init_3(&png_ptr, \
+    PNG_LIBPNG_VER_STRING,  png_sizeof(png_struct));
+#endif
+
+extern PNG_EXPORT(void,png_read_init_3) PNGARG((png_structpp ptr_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size));
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+extern PNG_EXPORT(void,png_read_init_2) PNGARG((png_structp png_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size, png_size_t
+    png_info_size));
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Initialize png_ptr struct for writing, and allocate any other memory.
+ * (old interface - DEPRECATED - use png_create_write_struct instead).
+ */
+extern PNG_EXPORT(void,png_write_init) PNGARG((png_structp png_ptr));
+#undef png_write_init
+#define png_write_init(png_ptr) png_write_init_3(&png_ptr, \
+    PNG_LIBPNG_VER_STRING, png_sizeof(png_struct));
+#endif
+
+extern PNG_EXPORT(void,png_write_init_3) PNGARG((png_structpp ptr_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size));
+extern PNG_EXPORT(void,png_write_init_2) PNGARG((png_structp png_ptr,
+    png_const_charp user_png_ver, png_size_t png_struct_size, png_size_t
+    png_info_size));
+
+/* Allocate memory for an internal libpng struct */
+PNG_EXTERN png_voidp png_create_struct PNGARG((int type));
+
+/* Free memory from internal libpng struct */
+PNG_EXTERN void png_destroy_struct PNGARG((png_voidp struct_ptr));
+
+PNG_EXTERN png_voidp png_create_struct_2 PNGARG((int type, png_malloc_ptr
+  malloc_fn, png_voidp mem_ptr));
+PNG_EXTERN void png_destroy_struct_2 PNGARG((png_voidp struct_ptr,
+   png_free_ptr free_fn, png_voidp mem_ptr));
+
+/* Free any memory that info_ptr points to and reset struct. */
+PNG_EXTERN void png_info_destroy PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+#ifndef PNG_1_0_X
+/* Function to allocate memory for zlib. */
+PNG_EXTERN voidpf png_zalloc PNGARG((voidpf png_ptr, uInt items, uInt size));
+
+/* Function to free memory for zlib */
+PNG_EXTERN void png_zfree PNGARG((voidpf png_ptr, voidpf ptr));
+
+#ifdef PNG_SIZE_T
+/* Function to convert a sizeof an item to png_sizeof item */
+   PNG_EXTERN png_size_t PNGAPI png_convert_size PNGARG((size_t size));
+#endif
+
+/* Next four functions are used internally as callbacks.  PNGAPI is required
+ * but not PNG_EXPORT.  PNGAPI added at libpng version 1.2.3. */
+
+PNG_EXTERN void PNGAPI png_default_read_data PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_EXTERN void PNGAPI png_push_fill_buffer PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t length));
+#endif
+
+PNG_EXTERN void PNGAPI png_default_write_data PNGARG((png_structp png_ptr,
+   png_bytep data, png_size_t length));
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+#if !defined(PNG_NO_STDIO)
+PNG_EXTERN void PNGAPI png_default_flush PNGARG((png_structp png_ptr));
+#endif
+#endif
+#else /* PNG_1_0_X */
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_EXTERN void png_push_fill_buffer PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t length));
+#endif
+#endif /* PNG_1_0_X */
+
+/* Reset the CRC variable */
+PNG_EXTERN void png_reset_crc PNGARG((png_structp png_ptr));
+
+/* Write the "data" buffer to whatever output you are using. */
+PNG_EXTERN void png_write_data PNGARG((png_structp png_ptr, png_bytep data,
+   png_size_t length));
+
+/* Read data from whatever input you are using into the "data" buffer */
+PNG_EXTERN void png_read_data PNGARG((png_structp png_ptr, png_bytep data,
+   png_size_t length));
+
+/* Read bytes into buf, and update png_ptr->crc */
+PNG_EXTERN void png_crc_read PNGARG((png_structp png_ptr, png_bytep buf,
+   png_size_t length));
+
+/* Decompress data in a chunk that uses compression */
+#if defined(PNG_zTXt_SUPPORTED) || defined(PNG_iTXt_SUPPORTED) || \
+    defined(PNG_iCCP_SUPPORTED) || defined(PNG_sPLT_SUPPORTED)
+PNG_EXTERN png_charp png_decompress_chunk PNGARG((png_structp png_ptr,
+   int comp_type, png_charp chunkdata, png_size_t chunklength,
+   png_size_t prefix_length, png_size_t *data_length));
+#endif
+
+/* Read "skip" bytes, read the file crc, and (optionally) verify png_ptr->crc */
+PNG_EXTERN int png_crc_finish PNGARG((png_structp png_ptr, png_uint_32 skip));
+
+/* Read the CRC from the file and compare it to the libpng calculated CRC */
+PNG_EXTERN int png_crc_error PNGARG((png_structp png_ptr));
+
+/* Calculate the CRC over a section of data.  Note that we are only
+ * passing a maximum of 64K on systems that have this as a memory limit,
+ * since this is the maximum buffer size we can specify.
+ */
+PNG_EXTERN void png_calculate_crc PNGARG((png_structp png_ptr, png_bytep ptr,
+   png_size_t length));
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+PNG_EXTERN void png_flush PNGARG((png_structp png_ptr));
+#endif
+
+/* simple function to write the signature */
+PNG_EXTERN void png_write_sig PNGARG((png_structp png_ptr));
+
+/* write various chunks */
+
+/* Write the IHDR chunk, and update the png_struct with the necessary
+ * information.
+ */
+PNG_EXTERN void png_write_IHDR PNGARG((png_structp png_ptr, png_uint_32 width,
+   png_uint_32 height,
+   int bit_depth, int color_type, int compression_method, int filter_method,
+   int interlace_method));
+
+PNG_EXTERN void png_write_PLTE PNGARG((png_structp png_ptr, png_colorp palette,
+   png_uint_32 num_pal));
+
+PNG_EXTERN void png_write_IDAT PNGARG((png_structp png_ptr, png_bytep data,
+   png_size_t length));
+
+PNG_EXTERN void png_write_IEND PNGARG((png_structp png_ptr));
+
+#if defined(PNG_WRITE_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+PNG_EXTERN void png_write_gAMA PNGARG((png_structp png_ptr, double file_gamma));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_gAMA_fixed PNGARG((png_structp png_ptr, png_fixed_point
+    file_gamma));
+#endif
+#endif
+
+#if defined(PNG_WRITE_sBIT_SUPPORTED)
+PNG_EXTERN void png_write_sBIT PNGARG((png_structp png_ptr, png_color_8p sbit,
+   int color_type));
+#endif
+
+#if defined(PNG_WRITE_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+PNG_EXTERN void png_write_cHRM PNGARG((png_structp png_ptr,
+   double white_x, double white_y,
+   double red_x, double red_y, double green_x, double green_y,
+   double blue_x, double blue_y));
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_cHRM_fixed PNGARG((png_structp png_ptr,
+   png_fixed_point int_white_x, png_fixed_point int_white_y,
+   png_fixed_point int_red_x, png_fixed_point int_red_y, png_fixed_point
+   int_green_x, png_fixed_point int_green_y, png_fixed_point int_blue_x,
+   png_fixed_point int_blue_y));
+#endif
+#endif
+
+#if defined(PNG_WRITE_sRGB_SUPPORTED)
+PNG_EXTERN void png_write_sRGB PNGARG((png_structp png_ptr,
+   int intent));
+#endif
+
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+PNG_EXTERN void png_write_iCCP PNGARG((png_structp png_ptr,
+   png_charp name, int compression_type,
+   png_charp profile, int proflen));
+   /* Note to maintainer: profile should be png_bytep */
+#endif
+
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+PNG_EXTERN void png_write_sPLT PNGARG((png_structp png_ptr,
+   png_sPLT_tp palette));
+#endif
+
+#if defined(PNG_WRITE_tRNS_SUPPORTED)
+PNG_EXTERN void png_write_tRNS PNGARG((png_structp png_ptr, png_bytep trans,
+   png_color_16p values, int number, int color_type));
+#endif
+
+#if defined(PNG_WRITE_bKGD_SUPPORTED)
+PNG_EXTERN void png_write_bKGD PNGARG((png_structp png_ptr,
+   png_color_16p values, int color_type));
+#endif
+
+#if defined(PNG_WRITE_hIST_SUPPORTED)
+PNG_EXTERN void png_write_hIST PNGARG((png_structp png_ptr, png_uint_16p hist,
+   int num_hist));
+#endif
+
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
+    defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+PNG_EXTERN png_size_t png_check_keyword PNGARG((png_structp png_ptr,
+   png_charp key, png_charpp new_key));
+#endif
+
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+PNG_EXTERN void png_write_tEXt PNGARG((png_structp png_ptr, png_charp key,
+   png_charp text, png_size_t text_len));
+#endif
+
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+PNG_EXTERN void png_write_zTXt PNGARG((png_structp png_ptr, png_charp key,
+   png_charp text, png_size_t text_len, int compression));
+#endif
+
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+PNG_EXTERN void png_write_iTXt PNGARG((png_structp png_ptr,
+   int compression, png_charp key, png_charp lang, png_charp lang_key,
+   png_charp text));
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)  /* Added at version 1.0.14 and 1.2.4 */
+PNG_EXTERN int png_set_text_2 PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_textp text_ptr, int num_text));
+#endif
+
+#if defined(PNG_WRITE_oFFs_SUPPORTED)
+PNG_EXTERN void png_write_oFFs PNGARG((png_structp png_ptr,
+   png_int_32 x_offset, png_int_32 y_offset, int unit_type));
+#endif
+
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+PNG_EXTERN void png_write_pCAL PNGARG((png_structp png_ptr, png_charp purpose,
+   png_int_32 X0, png_int_32 X1, int type, int nparams,
+   png_charp units, png_charpp params));
+#endif
+
+#if defined(PNG_WRITE_pHYs_SUPPORTED)
+PNG_EXTERN void png_write_pHYs PNGARG((png_structp png_ptr,
+   png_uint_32 x_pixels_per_unit, png_uint_32 y_pixels_per_unit,
+   int unit_type));
+#endif
+
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+PNG_EXTERN void png_write_tIME PNGARG((png_structp png_ptr,
+   png_timep mod_time));
+#endif
+
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && !defined(PNG_NO_STDIO)
+PNG_EXTERN void png_write_sCAL PNGARG((png_structp png_ptr,
+   int unit, double width, double height));
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+PNG_EXTERN void png_write_sCAL_s PNGARG((png_structp png_ptr,
+   int unit, png_charp width, png_charp height));
+#endif
+#endif
+#endif
+
+/* Called when finished processing a row of data */
+PNG_EXTERN void png_write_finish_row PNGARG((png_structp png_ptr));
+
+/* Internal use only.   Called before first row of data */
+PNG_EXTERN void png_write_start_row PNGARG((png_structp png_ptr));
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+PNG_EXTERN void png_build_gamma_table PNGARG((png_structp png_ptr));
+#endif
+
+/* combine a row of data, dealing with alpha, etc. if requested */
+PNG_EXTERN void png_combine_row PNGARG((png_structp png_ptr, png_bytep row,
+   int mask));
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+/* expand an interlaced row */
+/* OLD pre-1.0.9 interface:
+PNG_EXTERN void png_do_read_interlace PNGARG((png_row_infop row_info,
+   png_bytep row, int pass, png_uint_32 transformations));
+ */
+PNG_EXTERN void png_do_read_interlace PNGARG((png_structp png_ptr));
+#endif
+
+/* GRR TO DO (2.0 or whenever):  simplify other internal calling interfaces */
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* grab pixels out of a row for an interlaced pass */
+PNG_EXTERN void png_do_write_interlace PNGARG((png_row_infop row_info,
+   png_bytep row, int pass));
+#endif
+
+/* unfilter a row */
+PNG_EXTERN void png_read_filter_row PNGARG((png_structp png_ptr,
+   png_row_infop row_info, png_bytep row, png_bytep prev_row, int filter));
+
+/* Choose the best filter to use and filter the row data */
+PNG_EXTERN void png_write_find_filter PNGARG((png_structp png_ptr,
+   png_row_infop row_info));
+
+/* Write out the filtered row. */
+PNG_EXTERN void png_write_filtered_row PNGARG((png_structp png_ptr,
+   png_bytep filtered_row));
+/* finish a row while reading, dealing with interlacing passes, etc. */
+PNG_EXTERN void png_read_finish_row PNGARG((png_structp png_ptr));
+
+/* initialize the row buffers, etc. */
+PNG_EXTERN void png_read_start_row PNGARG((png_structp png_ptr));
+/* optional call to update the users info structure */
+PNG_EXTERN void png_read_transform_info PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+
+/* these are the functions that do the transformations */
+#if defined(PNG_READ_FILLER_SUPPORTED)
+PNG_EXTERN void png_do_read_filler PNGARG((png_row_infop row_info,
+   png_bytep row, png_uint_32 filler, png_uint_32 flags));
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_read_swap_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_write_swap_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_read_invert_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_write_invert_alpha PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
+    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+PNG_EXTERN void png_do_strip_filler PNGARG((png_row_infop row_info,
+   png_bytep row, png_uint_32 flags));
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+PNG_EXTERN void png_do_swap PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED) || defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+PNG_EXTERN void png_do_packswap PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+PNG_EXTERN int png_do_rgb_to_gray PNGARG((png_structp png_ptr, png_row_infop
+   row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+PNG_EXTERN void png_do_gray_to_rgb PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+PNG_EXTERN void png_do_unpack PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+PNG_EXTERN void png_do_unshift PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_8p sig_bits));
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+PNG_EXTERN void png_do_invert PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+PNG_EXTERN void png_do_chop PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+PNG_EXTERN void png_do_dither PNGARG((png_row_infop row_info,
+   png_bytep row, png_bytep palette_lookup, png_bytep dither_lookup));
+
+#  if defined(PNG_CORRECT_PALETTE_SUPPORTED)
+PNG_EXTERN void png_correct_palette PNGARG((png_structp png_ptr,
+   png_colorp palette, int num_palette));
+#  endif
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+PNG_EXTERN void png_do_bgr PNGARG((png_row_infop row_info, png_bytep row));
+#endif
+
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+PNG_EXTERN void png_do_pack PNGARG((png_row_infop row_info,
+   png_bytep row, png_uint_32 bit_depth));
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+PNG_EXTERN void png_do_shift PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_8p bit_depth));
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+PNG_EXTERN void png_do_background PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_16p trans_values, png_color_16p background,
+   png_color_16p background_1,
+   png_bytep gamma_table, png_bytep gamma_from_1, png_bytep gamma_to_1,
+   png_uint_16pp gamma_16, png_uint_16pp gamma_16_from_1,
+   png_uint_16pp gamma_16_to_1, int gamma_shift));
+#else
+PNG_EXTERN void png_do_background PNGARG((png_row_infop row_info, png_bytep row,
+   png_color_16p trans_values, png_color_16p background));
+#endif
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+PNG_EXTERN void png_do_gamma PNGARG((png_row_infop row_info, png_bytep row,
+   png_bytep gamma_table, png_uint_16pp gamma_16_table,
+   int gamma_shift));
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+PNG_EXTERN void png_do_expand_palette PNGARG((png_row_infop row_info,
+   png_bytep row, png_colorp palette, png_bytep trans, int num_trans));
+PNG_EXTERN void png_do_expand PNGARG((png_row_infop row_info,
+   png_bytep row, png_color_16p trans_value));
+#endif
+
+/* The following decodes the appropriate chunks, and does error correction,
+ * then calls the appropriate callback for the chunk if it is valid.
+ */
+
+/* decode the IHDR chunk */
+PNG_EXTERN void png_handle_IHDR PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+PNG_EXTERN void png_handle_PLTE PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+PNG_EXTERN void png_handle_IEND PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+
+#if defined(PNG_READ_bKGD_SUPPORTED)
+PNG_EXTERN void png_handle_bKGD PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_cHRM_SUPPORTED)
+PNG_EXTERN void png_handle_cHRM PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_gAMA_SUPPORTED)
+PNG_EXTERN void png_handle_gAMA PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_hIST_SUPPORTED)
+PNG_EXTERN void png_handle_hIST PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_iCCP_SUPPORTED)
+extern void png_handle_iCCP PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif /* PNG_READ_iCCP_SUPPORTED */
+
+#if defined(PNG_READ_iTXt_SUPPORTED)
+PNG_EXTERN void png_handle_iTXt PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_oFFs_SUPPORTED)
+PNG_EXTERN void png_handle_oFFs PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_pCAL_SUPPORTED)
+PNG_EXTERN void png_handle_pCAL PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_pHYs_SUPPORTED)
+PNG_EXTERN void png_handle_pHYs PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_sBIT_SUPPORTED)
+PNG_EXTERN void png_handle_sBIT PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_sCAL_SUPPORTED)
+PNG_EXTERN void png_handle_sCAL PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_sPLT_SUPPORTED)
+extern void png_handle_sPLT PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif /* PNG_READ_sPLT_SUPPORTED */
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+PNG_EXTERN void png_handle_sRGB PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_tEXt_SUPPORTED)
+PNG_EXTERN void png_handle_tEXt PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_tIME_SUPPORTED)
+PNG_EXTERN void png_handle_tIME PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_tRNS_SUPPORTED)
+PNG_EXTERN void png_handle_tRNS PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+#if defined(PNG_READ_zTXt_SUPPORTED)
+PNG_EXTERN void png_handle_zTXt PNGARG((png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 length));
+#endif
+
+PNG_EXTERN void png_handle_unknown PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+
+PNG_EXTERN void png_check_chunk_name PNGARG((png_structp png_ptr,
+   png_bytep chunk_name));
+
+/* handle the transformations for reading and writing */
+PNG_EXTERN void png_do_read_transformations PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_do_write_transformations PNGARG((png_structp png_ptr));
+
+PNG_EXTERN void png_init_read_transformations PNGARG((png_structp png_ptr));
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+PNG_EXTERN void png_push_read_chunk PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_read_sig PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_check_crc PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_crc_skip PNGARG((png_structp png_ptr,
+   png_uint_32 length));
+PNG_EXTERN void png_push_crc_finish PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_save_buffer PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_restore_buffer PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t buffer_length));
+PNG_EXTERN void png_push_read_IDAT PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_process_IDAT_data PNGARG((png_structp png_ptr,
+   png_bytep buffer, png_size_t buffer_length));
+PNG_EXTERN void png_push_process_row PNGARG((png_structp png_ptr));
+PNG_EXTERN void png_push_handle_unknown PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_have_info PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_have_end PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_push_have_row PNGARG((png_structp png_ptr, png_bytep row));
+PNG_EXTERN void png_push_read_end PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_process_some_data PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+PNG_EXTERN void png_read_push_finish_row PNGARG((png_structp png_ptr));
+#if defined(PNG_READ_tEXt_SUPPORTED)
+PNG_EXTERN void png_push_handle_tEXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_read_tEXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+PNG_EXTERN void png_push_handle_zTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_read_zTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+PNG_EXTERN void png_push_handle_iTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr, png_uint_32 length));
+PNG_EXTERN void png_push_read_iTXt PNGARG((png_structp png_ptr,
+   png_infop info_ptr));
+#endif
+
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+#ifdef PNG_MNG_FEATURES_SUPPORTED
+PNG_EXTERN void png_do_read_intrapixel PNGARG((png_row_infop row_info,
+   png_bytep row));
+PNG_EXTERN void png_do_write_intrapixel PNGARG((png_row_infop row_info,
+   png_bytep row));
+#endif
+
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
+#if defined(PNG_MMX_CODE_SUPPORTED)
+/* png.c */ /* PRIVATE */
+PNG_EXTERN void png_init_mmx_flags PNGARG((png_structp png_ptr));
+#endif
+#endif
+
+#if defined(PNG_INCH_CONVERSIONS) && defined(PNG_FLOATING_POINT_SUPPORTED)
+PNG_EXTERN png_uint_32 png_get_pixels_per_inch PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN png_uint_32 png_get_x_pixels_per_inch PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN png_uint_32 png_get_y_pixels_per_inch PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN float png_get_x_offset_inches PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+PNG_EXTERN float png_get_y_offset_inches PNGARG((png_structp png_ptr,
+png_infop info_ptr));
+
+#if defined(PNG_pHYs_SUPPORTED)
+PNG_EXTERN png_uint_32 png_get_pHYs_dpi PNGARG((png_structp png_ptr,
+png_infop info_ptr, png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type));
+#endif /* PNG_pHYs_SUPPORTED */
+#endif  /* PNG_INCH_CONVERSIONS && PNG_FLOATING_POINT_SUPPORTED */
+
+/* Maintainer: Put new private prototypes here ^ and in libpngpf.3 */
+
+#endif /* PNG_INTERNAL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PNG_VERSION_INFO_ONLY */
+/* do not put anything past this line */
+#endif /* PNG_H */
diff --git a/src/libpng/pngconf.h b/src/libpng/pngconf.h
new file mode 100644
index 0000000..64f3ff0
--- /dev/null
+++ b/src/libpng/pngconf.h
@@ -0,0 +1,1483 @@
+
+/* pngconf.h - machine configurable file for libpng
+ *
+ * libpng version 1.2.22 - October 13, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+/* Any machine specific code is near the front of this file, so if you
+ * are configuring libpng for a machine, you may want to read the section
+ * starting here down to where it starts to typedef png_color, png_text,
+ * and png_info.
+ */
+
+#ifndef PNGCONF_H
+#define PNGCONF_H
+
+#define PNG_1_2_X
+
+/* 
+ * PNG_USER_CONFIG has to be defined on the compiler command line. This
+ * includes the resource compiler for Windows DLL configurations.
+ */
+#ifdef PNG_USER_CONFIG
+#  ifndef PNG_USER_PRIVATEBUILD
+#    define PNG_USER_PRIVATEBUILD
+#  endif
+#include "pngusr.h"
+#endif
+
+/* PNG_CONFIGURE_LIBPNG is set by the "configure" script. */
+#ifdef PNG_CONFIGURE_LIBPNG
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#endif
+
+/*
+ * Added at libpng-1.2.8
+ *  
+ * If you create a private DLL you need to define in "pngusr.h" the followings:
+ * #define PNG_USER_PRIVATEBUILD <Describes by whom and why this version of
+ *        the DLL was built>
+ *  e.g. #define PNG_USER_PRIVATEBUILD "Build by MyCompany for xyz reasons."
+ * #define PNG_USER_DLLFNAME_POSTFIX <two-letter postfix that serve to
+ *        distinguish your DLL from those of the official release. These
+ *        correspond to the trailing letters that come after the version
+ *        number and must match your private DLL name>
+ *  e.g. // private DLL "libpng13gx.dll"
+ *       #define PNG_USER_DLLFNAME_POSTFIX "gx"
+ * 
+ * The following macros are also at your disposal if you want to complete the 
+ * DLL VERSIONINFO structure.
+ * - PNG_USER_VERSIONINFO_COMMENTS
+ * - PNG_USER_VERSIONINFO_COMPANYNAME
+ * - PNG_USER_VERSIONINFO_LEGALTRADEMARKS
+ */
+
+#ifdef __STDC__
+#ifdef SPECIALBUILD
+#  pragma message("PNG_LIBPNG_SPECIALBUILD (and deprecated SPECIALBUILD)\
+ are now LIBPNG reserved macros. Use PNG_USER_PRIVATEBUILD instead.")
+#endif
+
+#ifdef PRIVATEBUILD
+# pragma message("PRIVATEBUILD is deprecated.\
+ Use PNG_USER_PRIVATEBUILD instead.")
+# define PNG_USER_PRIVATEBUILD PRIVATEBUILD
+#endif
+#endif /* __STDC__ */
+
+#ifndef PNG_VERSION_INFO_ONLY
+
+/* End of material added to libpng-1.2.8 */
+
+/* Added at libpng-1.2.19, removed at libpng-1.2.20 because it caused trouble
+   Restored at libpng-1.2.21 */
+#if !defined(PNG_NO_WARN_UNINITIALIZED_ROW) && \
+    !defined(PNG_WARN_UNINITIALIZED_ROW)
+#  define PNG_WARN_UNINITIALIZED_ROW 1
+#endif
+/* End of material added at libpng-1.2.19/1.2.21 */
+
+/* This is the size of the compression buffer, and thus the size of
+ * an IDAT chunk.  Make this whatever size you feel is best for your
+ * machine.  One of these will be allocated per png_struct.  When this
+ * is full, it writes the data to the disk, and does some other
+ * calculations.  Making this an extremely small size will slow
+ * the library down, but you may want to experiment to determine
+ * where it becomes significant, if you are concerned with memory
+ * usage.  Note that zlib allocates at least 32Kb also.  For readers,
+ * this describes the size of the buffer available to read the data in.
+ * Unless this gets smaller than the size of a row (compressed),
+ * it should not make much difference how big this is.
+ */
+
+#ifndef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 8192
+#endif
+
+/* Enable if you want a write-only libpng */
+
+#ifndef PNG_NO_READ_SUPPORTED
+#  define PNG_READ_SUPPORTED
+#endif
+
+/* Enable if you want a read-only libpng */
+
+#ifndef PNG_NO_WRITE_SUPPORTED
+#  define PNG_WRITE_SUPPORTED
+#endif
+
+/* Enabled by default in 1.2.0.  You can disable this if you don't need to
+   support PNGs that are embedded in MNG datastreams */
+#if !defined(PNG_1_0_X) && !defined(PNG_NO_MNG_FEATURES)
+#  ifndef PNG_MNG_FEATURES_SUPPORTED
+#    define PNG_MNG_FEATURES_SUPPORTED
+#  endif
+#endif
+
+#ifndef PNG_NO_FLOATING_POINT_SUPPORTED
+#  ifndef PNG_FLOATING_POINT_SUPPORTED
+#    define PNG_FLOATING_POINT_SUPPORTED
+#  endif
+#endif
+
+/* If you are running on a machine where you cannot allocate more
+ * than 64K of memory at once, uncomment this.  While libpng will not
+ * normally need that much memory in a chunk (unless you load up a very
+ * large file), zlib needs to know how big of a chunk it can use, and
+ * libpng thus makes sure to check any memory allocation to verify it
+ * will fit into memory.
+#define PNG_MAX_MALLOC_64K
+ */
+#if defined(MAXSEG_64K) && !defined(PNG_MAX_MALLOC_64K)
+#  define PNG_MAX_MALLOC_64K
+#endif
+
+/* Special munging to support doing things the 'cygwin' way:
+ * 'Normal' png-on-win32 defines/defaults:
+ *   PNG_BUILD_DLL -- building dll
+ *   PNG_USE_DLL   -- building an application, linking to dll
+ *   (no define)   -- building static library, or building an
+ *                    application and linking to the static lib
+ * 'Cygwin' defines/defaults:
+ *   PNG_BUILD_DLL -- (ignored) building the dll
+ *   (no define)   -- (ignored) building an application, linking to the dll
+ *   PNG_STATIC    -- (ignored) building the static lib, or building an 
+ *                    application that links to the static lib.
+ *   ALL_STATIC    -- (ignored) building various static libs, or building an 
+ *                    application that links to the static libs.
+ * Thus,
+ * a cygwin user should define either PNG_BUILD_DLL or PNG_STATIC, and
+ * this bit of #ifdefs will define the 'correct' config variables based on
+ * that. If a cygwin user *wants* to define 'PNG_USE_DLL' that's okay, but
+ * unnecessary.
+ *
+ * Also, the precedence order is:
+ *   ALL_STATIC (since we can't #undef something outside our namespace)
+ *   PNG_BUILD_DLL
+ *   PNG_STATIC
+ *   (nothing) == PNG_USE_DLL
+ * 
+ * CYGWIN (2002-01-20): The preceding is now obsolete. With the advent
+ *   of auto-import in binutils, we no longer need to worry about 
+ *   __declspec(dllexport) / __declspec(dllimport) and friends.  Therefore,
+ *   we don't need to worry about PNG_STATIC or ALL_STATIC when it comes
+ *   to __declspec() stuff.  However, we DO need to worry about 
+ *   PNG_BUILD_DLL and PNG_STATIC because those change some defaults
+ *   such as CONSOLE_IO and whether GLOBAL_ARRAYS are allowed.
+ */
+#if defined(__CYGWIN__)
+#  if defined(ALL_STATIC)
+#    if defined(PNG_BUILD_DLL)
+#      undef PNG_BUILD_DLL
+#    endif
+#    if defined(PNG_USE_DLL)
+#      undef PNG_USE_DLL
+#    endif
+#    if defined(PNG_DLL)
+#      undef PNG_DLL
+#    endif
+#    if !defined(PNG_STATIC)
+#      define PNG_STATIC
+#    endif
+#  else
+#    if defined (PNG_BUILD_DLL)
+#      if defined(PNG_STATIC)
+#        undef PNG_STATIC
+#      endif
+#      if defined(PNG_USE_DLL)
+#        undef PNG_USE_DLL
+#      endif
+#      if !defined(PNG_DLL)
+#        define PNG_DLL
+#      endif
+#    else
+#      if defined(PNG_STATIC)
+#        if defined(PNG_USE_DLL)
+#          undef PNG_USE_DLL
+#        endif
+#        if defined(PNG_DLL)
+#          undef PNG_DLL
+#        endif
+#      else
+#        if !defined(PNG_USE_DLL)
+#          define PNG_USE_DLL
+#        endif
+#        if !defined(PNG_DLL)
+#          define PNG_DLL
+#        endif
+#      endif  
+#    endif  
+#  endif
+#endif
+
+/* This protects us against compilers that run on a windowing system
+ * and thus don't have or would rather us not use the stdio types:
+ * stdin, stdout, and stderr.  The only one currently used is stderr
+ * in png_error() and png_warning().  #defining PNG_NO_CONSOLE_IO will
+ * prevent these from being compiled and used. #defining PNG_NO_STDIO
+ * will also prevent these, plus will prevent the entire set of stdio
+ * macros and functions (FILE *, printf, etc.) from being compiled and used,
+ * unless (PNG_DEBUG > 0) has been #defined.
+ *
+ * #define PNG_NO_CONSOLE_IO
+ * #define PNG_NO_STDIO
+ */
+
+#if defined(_WIN32_WCE)
+#  include <windows.h>
+   /* Console I/O functions are not supported on WindowsCE */
+#  define PNG_NO_CONSOLE_IO
+#  ifdef PNG_DEBUG
+#    undef PNG_DEBUG
+#  endif
+#endif
+
+#ifdef PNG_BUILD_DLL
+#  ifndef PNG_CONSOLE_IO_SUPPORTED
+#    ifndef PNG_NO_CONSOLE_IO
+#      define PNG_NO_CONSOLE_IO
+#    endif
+#  endif
+#endif
+
+#  ifdef PNG_NO_STDIO
+#    ifndef PNG_NO_CONSOLE_IO
+#      define PNG_NO_CONSOLE_IO
+#    endif
+#    ifdef PNG_DEBUG
+#      if (PNG_DEBUG > 0)
+#        include <stdio.h>
+#      endif
+#    endif
+#  else
+#    if !defined(_WIN32_WCE)
+/* "stdio.h" functions are not supported on WindowsCE */
+#      include <stdio.h>
+#    endif
+#  endif
+
+/* This macro protects us against machines that don't have function
+ * prototypes (ie K&R style headers).  If your compiler does not handle
+ * function prototypes, define this macro and use the included ansi2knr.
+ * I've always been able to use _NO_PROTO as the indicator, but you may
+ * need to drag the empty declaration out in front of here, or change the
+ * ifdef to suit your own needs.
+ */
+#ifndef PNGARG
+
+#ifdef OF /* zlib prototype munger */
+#  define PNGARG(arglist) OF(arglist)
+#else
+
+#ifdef _NO_PROTO
+#  define PNGARG(arglist) ()
+#  ifndef PNG_TYPECAST_NULL
+#     define PNG_TYPECAST_NULL
+#  endif
+#else
+#  define PNGARG(arglist) arglist
+#endif /* _NO_PROTO */
+
+
+#endif /* OF */
+
+#endif /* PNGARG */
+
+/* Try to determine if we are compiling on a Mac.  Note that testing for
+ * just __MWERKS__ is not good enough, because the Codewarrior is now used
+ * on non-Mac platforms.
+ */
+#ifndef MACOS
+#  if (defined(__MWERKS__) && defined(macintosh)) || defined(applec) || \
+      defined(THINK_C) || defined(__SC__) || defined(TARGET_OS_MAC)
+#    define MACOS
+#  endif
+#endif
+
+/* enough people need this for various reasons to include it here */
+#if !defined(MACOS) && !defined(RISCOS) && !defined(_WIN32_WCE)
+#  include <sys/types.h>
+#endif
+
+#if !defined(PNG_SETJMP_NOT_SUPPORTED) && !defined(PNG_NO_SETJMP_SUPPORTED)
+#  define PNG_SETJMP_SUPPORTED
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* This is an attempt to force a single setjmp behaviour on Linux.  If
+ * the X config stuff didn't define _BSD_SOURCE we wouldn't need this.
+ */
+
+#  ifdef __linux__
+#    ifdef _BSD_SOURCE
+#      define PNG_SAVE_BSD_SOURCE
+#      undef _BSD_SOURCE
+#    endif
+#    ifdef _SETJMP_H
+     /* If you encounter a compiler error here, see the explanation
+      * near the end of INSTALL.
+      */
+         __png.h__ already includes setjmp.h;
+         __dont__ include it again.;
+#    endif
+#  endif /* __linux__ */
+
+   /* include setjmp.h for error handling */
+#  include <setjmp.h>
+
+#  ifdef __linux__
+#    ifdef PNG_SAVE_BSD_SOURCE
+#      define _BSD_SOURCE
+#      undef PNG_SAVE_BSD_SOURCE
+#    endif
+#  endif /* __linux__ */
+#endif /* PNG_SETJMP_SUPPORTED */
+
+#ifdef BSD
+#  include <strings.h>
+#else
+#  include <string.h>
+#endif
+
+/* Other defines for things like memory and the like can go here.  */
+#ifdef PNG_INTERNAL
+
+#include <stdlib.h>
+
+/* The functions exported by PNG_EXTERN are PNG_INTERNAL functions, which
+ * aren't usually used outside the library (as far as I know), so it is
+ * debatable if they should be exported at all.  In the future, when it is
+ * possible to have run-time registry of chunk-handling functions, some of
+ * these will be made available again.
+#define PNG_EXTERN extern
+ */
+#define PNG_EXTERN
+
+/* Other defines specific to compilers can go here.  Try to keep
+ * them inside an appropriate ifdef/endif pair for portability.
+ */
+
+#if defined(PNG_FLOATING_POINT_SUPPORTED)
+#  if defined(MACOS)
+     /* We need to check that <math.h> hasn't already been included earlier
+      * as it seems it doesn't agree with <fp.h>, yet we should really use
+      * <fp.h> if possible.
+      */
+#    if !defined(__MATH_H__) && !defined(__MATH_H) && !defined(__cmath__)
+#      include <fp.h>
+#    endif
+#  else
+#    include <math.h>
+#  endif
+#  if defined(_AMIGA) && defined(__SASC) && defined(_M68881)
+     /* Amiga SAS/C: We must include builtin FPU functions when compiling using
+      * MATH=68881
+      */
+#    include <m68881.h>
+#  endif
+#endif
+
+/* Codewarrior on NT has linking problems without this. */
+#if (defined(__MWERKS__) && defined(WIN32)) || defined(__STDC__)
+#  define PNG_ALWAYS_EXTERN
+#endif
+
+/* This provides the non-ANSI (far) memory allocation routines. */
+#if defined(__TURBOC__) && defined(__MSDOS__)
+#  include <mem.h>
+#  include <alloc.h>
+#endif
+
+/* I have no idea why is this necessary... */
+#if defined(_MSC_VER) && (defined(WIN32) || defined(_Windows) || \
+    defined(_WINDOWS) || defined(_WIN32) || defined(__WIN32__))
+#  include <malloc.h>
+#endif
+
+/* This controls how fine the dithering gets.  As this allocates
+ * a largish chunk of memory (32K), those who are not as concerned
+ * with dithering quality can decrease some or all of these.
+ */
+#ifndef PNG_DITHER_RED_BITS
+#  define PNG_DITHER_RED_BITS 5
+#endif
+#ifndef PNG_DITHER_GREEN_BITS
+#  define PNG_DITHER_GREEN_BITS 5
+#endif
+#ifndef PNG_DITHER_BLUE_BITS
+#  define PNG_DITHER_BLUE_BITS 5
+#endif
+
+/* This controls how fine the gamma correction becomes when you
+ * are only interested in 8 bits anyway.  Increasing this value
+ * results in more memory being used, and more pow() functions
+ * being called to fill in the gamma tables.  Don't set this value
+ * less then 8, and even that may not work (I haven't tested it).
+ */
+
+#ifndef PNG_MAX_GAMMA_8
+#  define PNG_MAX_GAMMA_8 11
+#endif
+
+/* This controls how much a difference in gamma we can tolerate before
+ * we actually start doing gamma conversion.
+ */
+#ifndef PNG_GAMMA_THRESHOLD
+#  define PNG_GAMMA_THRESHOLD 0.05
+#endif
+
+#endif /* PNG_INTERNAL */
+
+/* The following uses const char * instead of char * for error
+ * and warning message functions, so some compilers won't complain.
+ * If you do not want to use const, define PNG_NO_CONST here.
+ */
+
+#ifndef PNG_NO_CONST
+#  define PNG_CONST const
+#else
+#  define PNG_CONST
+#endif
+
+/* The following defines give you the ability to remove code from the
+ * library that you will not be using.  I wish I could figure out how to
+ * automate this, but I can't do that without making it seriously hard
+ * on the users.  So if you are not using an ability, change the #define
+ * to and #undef, and that part of the library will not be compiled.  If
+ * your linker can't find a function, you may want to make sure the
+ * ability is defined here.  Some of these depend upon some others being
+ * defined.  I haven't figured out all the interactions here, so you may
+ * have to experiment awhile to get everything to compile.  If you are
+ * creating or using a shared library, you probably shouldn't touch this,
+ * as it will affect the size of the structures, and this will cause bad
+ * things to happen if the library and/or application ever change.
+ */
+
+/* Any features you will not be using can be undef'ed here */
+
+/* GR-P, 0.96a: Set "*TRANSFORMS_SUPPORTED as default but allow user
+ * to turn it off with "*TRANSFORMS_NOT_SUPPORTED" or *PNG_NO_*_TRANSFORMS
+ * on the compile line, then pick and choose which ones to define without
+ * having to edit this file. It is safe to use the *TRANSFORMS_NOT_SUPPORTED
+ * if you only want to have a png-compliant reader/writer but don't need
+ * any of the extra transformations.  This saves about 80 kbytes in a
+ * typical installation of the library. (PNG_NO_* form added in version
+ * 1.0.1c, for consistency)
+ */
+
+/* The size of the png_text structure changed in libpng-1.0.6 when
+ * iTXt support was added.  iTXt support was turned off by default through
+ * libpng-1.2.x, to support old apps that malloc the png_text structure
+ * instead of calling png_set_text() and letting libpng malloc it.  It
+ * was turned on by default in libpng-1.3.0.
+ */
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+#  ifndef PNG_NO_iTXt_SUPPORTED
+#    define PNG_NO_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_iTXt
+#    define PNG_NO_READ_iTXt
+#  endif
+#  ifndef PNG_NO_WRITE_iTXt
+#    define PNG_NO_WRITE_iTXt
+#  endif
+#endif
+
+#if !defined(PNG_NO_iTXt_SUPPORTED)
+#  if !defined(PNG_READ_iTXt_SUPPORTED) && !defined(PNG_NO_READ_iTXt)
+#    define PNG_READ_iTXt
+#  endif
+#  if !defined(PNG_WRITE_iTXt_SUPPORTED) && !defined(PNG_NO_WRITE_iTXt)
+#    define PNG_WRITE_iTXt
+#  endif
+#endif
+
+/* The following support, added after version 1.0.0, can be turned off here en
+ * masse by defining PNG_LEGACY_SUPPORTED in case you need binary compatibility
+ * with old applications that require the length of png_struct and png_info
+ * to remain unchanged.
+ */
+
+#ifdef PNG_LEGACY_SUPPORTED
+#  define PNG_NO_FREE_ME
+#  define PNG_NO_READ_UNKNOWN_CHUNKS
+#  define PNG_NO_WRITE_UNKNOWN_CHUNKS
+#  define PNG_NO_READ_USER_CHUNKS
+#  define PNG_NO_READ_iCCP
+#  define PNG_NO_WRITE_iCCP
+#  define PNG_NO_READ_iTXt
+#  define PNG_NO_WRITE_iTXt
+#  define PNG_NO_READ_sCAL
+#  define PNG_NO_WRITE_sCAL
+#  define PNG_NO_READ_sPLT
+#  define PNG_NO_WRITE_sPLT
+#  define PNG_NO_INFO_IMAGE
+#  define PNG_NO_READ_RGB_TO_GRAY
+#  define PNG_NO_READ_USER_TRANSFORM
+#  define PNG_NO_WRITE_USER_TRANSFORM
+#  define PNG_NO_USER_MEM
+#  define PNG_NO_READ_EMPTY_PLTE
+#  define PNG_NO_MNG_FEATURES
+#  define PNG_NO_FIXED_POINT_SUPPORTED
+#endif
+
+/* Ignore attempt to turn off both floating and fixed point support */
+#if !defined(PNG_FLOATING_POINT_SUPPORTED) || \
+    !defined(PNG_NO_FIXED_POINT_SUPPORTED)
+#  define PNG_FIXED_POINT_SUPPORTED
+#endif
+
+#ifndef PNG_NO_FREE_ME
+#  define PNG_FREE_ME_SUPPORTED
+#endif
+
+#if defined(PNG_READ_SUPPORTED)
+
+#if !defined(PNG_READ_TRANSFORMS_NOT_SUPPORTED) && \
+      !defined(PNG_NO_READ_TRANSFORMS)
+#  define PNG_READ_TRANSFORMS_SUPPORTED
+#endif
+
+#ifdef PNG_READ_TRANSFORMS_SUPPORTED
+#  ifndef PNG_NO_READ_EXPAND
+#    define PNG_READ_EXPAND_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SHIFT
+#    define PNG_READ_SHIFT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_PACK
+#    define PNG_READ_PACK_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_BGR
+#    define PNG_READ_BGR_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SWAP
+#    define PNG_READ_SWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_PACKSWAP
+#    define PNG_READ_PACKSWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_INVERT
+#    define PNG_READ_INVERT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_DITHER
+#    define PNG_READ_DITHER_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_BACKGROUND
+#    define PNG_READ_BACKGROUND_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_16_TO_8
+#    define PNG_READ_16_TO_8_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_FILLER
+#    define PNG_READ_FILLER_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_GAMMA
+#    define PNG_READ_GAMMA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_GRAY_TO_RGB
+#    define PNG_READ_GRAY_TO_RGB_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_SWAP_ALPHA
+#    define PNG_READ_SWAP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_INVERT_ALPHA
+#    define PNG_READ_INVERT_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_STRIP_ALPHA
+#    define PNG_READ_STRIP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_USER_TRANSFORM
+#    define PNG_READ_USER_TRANSFORM_SUPPORTED
+#  endif
+#  ifndef PNG_NO_READ_RGB_TO_GRAY
+#    define PNG_READ_RGB_TO_GRAY_SUPPORTED
+#  endif
+#endif /* PNG_READ_TRANSFORMS_SUPPORTED */
+
+#if !defined(PNG_NO_PROGRESSIVE_READ) && \
+ !defined(PNG_PROGRESSIVE_READ_SUPPORTED) /* if you don't do progressive   */
+#  define PNG_PROGRESSIVE_READ_SUPPORTED  /* reading.  This is not talking */
+#endif                            /* about interlacing capability!  You'll */
+           /* still have interlacing unless you change the following line: */
+
+#define PNG_READ_INTERLACING_SUPPORTED /* required in PNG-compliant decoders */
+
+#ifndef PNG_NO_READ_COMPOSITE_NODIV
+#  ifndef PNG_NO_READ_COMPOSITED_NODIV  /* libpng-1.0.x misspelling */
+#    define PNG_READ_COMPOSITE_NODIV_SUPPORTED  /* well tested on Intel, SGI */
+#  endif
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Deprecated, will be removed from version 2.0.0.
+   Use PNG_MNG_FEATURES_SUPPORTED instead. */
+#ifndef PNG_NO_READ_EMPTY_PLTE
+#  define PNG_READ_EMPTY_PLTE_SUPPORTED
+#endif
+#endif
+
+#endif /* PNG_READ_SUPPORTED */
+
+#if defined(PNG_WRITE_SUPPORTED)
+
+# if !defined(PNG_WRITE_TRANSFORMS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_WRITE_TRANSFORMS)
+#  define PNG_WRITE_TRANSFORMS_SUPPORTED
+#endif
+
+#ifdef PNG_WRITE_TRANSFORMS_SUPPORTED
+#  ifndef PNG_NO_WRITE_SHIFT
+#    define PNG_WRITE_SHIFT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_PACK
+#    define PNG_WRITE_PACK_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_BGR
+#    define PNG_WRITE_BGR_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_SWAP
+#    define PNG_WRITE_SWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_PACKSWAP
+#    define PNG_WRITE_PACKSWAP_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_INVERT
+#    define PNG_WRITE_INVERT_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_FILLER
+#    define PNG_WRITE_FILLER_SUPPORTED   /* same as WRITE_STRIP_ALPHA */
+#  endif
+#  ifndef PNG_NO_WRITE_SWAP_ALPHA
+#    define PNG_WRITE_SWAP_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_INVERT_ALPHA
+#    define PNG_WRITE_INVERT_ALPHA_SUPPORTED
+#  endif
+#  ifndef PNG_NO_WRITE_USER_TRANSFORM
+#    define PNG_WRITE_USER_TRANSFORM_SUPPORTED
+#  endif
+#endif /* PNG_WRITE_TRANSFORMS_SUPPORTED */
+
+#if !defined(PNG_NO_WRITE_INTERLACING_SUPPORTED) && \
+    !defined(PNG_WRITE_INTERLACING_SUPPORTED)
+#define PNG_WRITE_INTERLACING_SUPPORTED  /* not required for PNG-compliant
+                                            encoders, but can cause trouble
+                                            if left undefined */
+#endif
+
+#if !defined(PNG_NO_WRITE_WEIGHTED_FILTER) && \
+    !defined(PNG_WRITE_WEIGHTED_FILTER) && \
+     defined(PNG_FLOATING_POINT_SUPPORTED)
+#  define PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
+#endif
+
+#ifndef PNG_NO_WRITE_FLUSH
+#  define PNG_WRITE_FLUSH_SUPPORTED
+#endif
+
+#if defined(PNG_1_0_X) || defined (PNG_1_2_X)
+/* Deprecated, see PNG_MNG_FEATURES_SUPPORTED, above */
+#ifndef PNG_NO_WRITE_EMPTY_PLTE
+#  define PNG_WRITE_EMPTY_PLTE_SUPPORTED
+#endif
+#endif
+
+#endif /* PNG_WRITE_SUPPORTED */
+
+#ifndef PNG_1_0_X
+#  ifndef PNG_NO_ERROR_NUMBERS
+#    define PNG_ERROR_NUMBERS_SUPPORTED
+#  endif
+#endif /* PNG_1_0_X */
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+#  ifndef PNG_NO_USER_TRANSFORM_PTR
+#    define PNG_USER_TRANSFORM_PTR_SUPPORTED
+#  endif
+#endif
+
+#ifndef PNG_NO_STDIO
+#  define PNG_TIME_RFC1123_SUPPORTED
+#endif
+
+/* This adds extra functions in pngget.c for accessing data from the
+ * info pointer (added in version 0.99)
+ * png_get_image_width()
+ * png_get_image_height()
+ * png_get_bit_depth()
+ * png_get_color_type()
+ * png_get_compression_type()
+ * png_get_filter_type()
+ * png_get_interlace_type()
+ * png_get_pixel_aspect_ratio()
+ * png_get_pixels_per_meter()
+ * png_get_x_offset_pixels()
+ * png_get_y_offset_pixels()
+ * png_get_x_offset_microns()
+ * png_get_y_offset_microns()
+ */
+#if !defined(PNG_NO_EASY_ACCESS) && !defined(PNG_EASY_ACCESS_SUPPORTED)
+#  define PNG_EASY_ACCESS_SUPPORTED
+#endif
+
+/* PNG_ASSEMBLER_CODE was enabled by default in version 1.2.0 
+ * and removed from version 1.2.20.  The following will be removed
+ * from libpng-1.4.0
+*/
+
+#if defined(PNG_READ_SUPPORTED) && !defined(PNG_NO_OPTIMIZED_CODE)
+#  ifndef PNG_OPTIMIZED_CODE_SUPPORTED
+#    define PNG_OPTIMIZED_CODE_SUPPORTED
+#  endif
+#endif
+
+#if defined(PNG_READ_SUPPORTED) && !defined(PNG_NO_ASSEMBLER_CODE)
+#  ifndef PNG_ASSEMBLER_CODE_SUPPORTED
+#    define PNG_ASSEMBLER_CODE_SUPPORTED
+#  endif
+
+#  if defined(__GNUC__) && defined(__x86_64__) && (__GNUC__ < 4)
+     /* work around 64-bit gcc compiler bugs in gcc-3.x */
+#    if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#      define PNG_NO_MMX_CODE
+#    endif
+#  endif
+
+#  if defined(__APPLE__)
+#    if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#      define PNG_NO_MMX_CODE
+#    endif
+#  endif
+
+#  if (defined(__MWERKS__) && ((__MWERKS__ < 0x0900) || macintosh))
+#    if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#      define PNG_NO_MMX_CODE
+#    endif
+#  endif
+
+#  if !defined(PNG_MMX_CODE_SUPPORTED) && !defined(PNG_NO_MMX_CODE)
+#    define PNG_MMX_CODE_SUPPORTED
+#  endif
+
+#endif
+/* end of obsolete code to be removed from libpng-1.4.0 */
+
+#if !defined(PNG_1_0_X)
+#if !defined(PNG_NO_USER_MEM) && !defined(PNG_USER_MEM_SUPPORTED)
+#  define PNG_USER_MEM_SUPPORTED
+#endif
+#endif /* PNG_1_0_X */
+
+/* Added at libpng-1.2.6 */
+#if !defined(PNG_1_0_X)
+#ifndef PNG_SET_USER_LIMITS_SUPPORTED
+#if !defined(PNG_NO_SET_USER_LIMITS) && !defined(PNG_SET_USER_LIMITS_SUPPORTED)
+#  define PNG_SET_USER_LIMITS_SUPPORTED
+#endif
+#endif
+#endif /* PNG_1_0_X */
+
+/* Added at libpng-1.0.16 and 1.2.6.  To accept all valid PNGS no matter
+ * how large, set these limits to 0x7fffffffL
+ */
+#ifndef PNG_USER_WIDTH_MAX
+#  define PNG_USER_WIDTH_MAX 1000000L
+#endif
+#ifndef PNG_USER_HEIGHT_MAX
+#  define PNG_USER_HEIGHT_MAX 1000000L
+#endif
+
+/* These are currently experimental features, define them if you want */
+
+/* very little testing */
+/*
+#ifdef PNG_READ_SUPPORTED
+#  ifndef PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#    define PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED
+#  endif
+#endif
+*/
+
+/* This is only for PowerPC big-endian and 680x0 systems */
+/* some testing */
+/*
+#ifndef PNG_READ_BIG_ENDIAN_SUPPORTED
+#  define PNG_READ_BIG_ENDIAN_SUPPORTED
+#endif
+*/
+
+/* Buggy compilers (e.g., gcc 2.7.2.2) need this */
+/*
+#define PNG_NO_POINTER_INDEXING
+*/
+
+/* These functions are turned off by default, as they will be phased out. */
+/*
+#define  PNG_USELESS_TESTS_SUPPORTED
+#define  PNG_CORRECT_PALETTE_SUPPORTED
+*/
+
+/* Any chunks you are not interested in, you can undef here.  The
+ * ones that allocate memory may be expecially important (hIST,
+ * tEXt, zTXt, tRNS, pCAL).  Others will just save time and make png_info
+ * a bit smaller.
+ */
+
+#if defined(PNG_READ_SUPPORTED) && \
+    !defined(PNG_READ_ANCILLARY_CHUNKS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_READ_ANCILLARY_CHUNKS)
+#  define PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+#endif
+
+#if defined(PNG_WRITE_SUPPORTED) && \
+    !defined(PNG_WRITE_ANCILLARY_CHUNKS_NOT_SUPPORTED) && \
+    !defined(PNG_NO_WRITE_ANCILLARY_CHUNKS)
+#  define PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
+#endif
+
+#ifdef PNG_READ_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_READ_TEXT
+#  define PNG_NO_READ_iTXt
+#  define PNG_NO_READ_tEXt
+#  define PNG_NO_READ_zTXt
+#endif
+#ifndef PNG_NO_READ_bKGD
+#  define PNG_READ_bKGD_SUPPORTED
+#  define PNG_bKGD_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_cHRM
+#  define PNG_READ_cHRM_SUPPORTED
+#  define PNG_cHRM_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_gAMA
+#  define PNG_READ_gAMA_SUPPORTED
+#  define PNG_gAMA_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_hIST
+#  define PNG_READ_hIST_SUPPORTED
+#  define PNG_hIST_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_iCCP
+#  define PNG_READ_iCCP_SUPPORTED
+#  define PNG_iCCP_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_iTXt
+#  ifndef PNG_READ_iTXt_SUPPORTED
+#    define PNG_READ_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_READ_oFFs
+#  define PNG_READ_oFFs_SUPPORTED
+#  define PNG_oFFs_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_pCAL
+#  define PNG_READ_pCAL_SUPPORTED
+#  define PNG_pCAL_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sCAL
+#  define PNG_READ_sCAL_SUPPORTED
+#  define PNG_sCAL_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_pHYs
+#  define PNG_READ_pHYs_SUPPORTED
+#  define PNG_pHYs_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sBIT
+#  define PNG_READ_sBIT_SUPPORTED
+#  define PNG_sBIT_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sPLT
+#  define PNG_READ_sPLT_SUPPORTED
+#  define PNG_sPLT_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_sRGB
+#  define PNG_READ_sRGB_SUPPORTED
+#  define PNG_sRGB_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tEXt
+#  define PNG_READ_tEXt_SUPPORTED
+#  define PNG_tEXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tIME
+#  define PNG_READ_tIME_SUPPORTED
+#  define PNG_tIME_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_tRNS
+#  define PNG_READ_tRNS_SUPPORTED
+#  define PNG_tRNS_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_zTXt
+#  define PNG_READ_zTXt_SUPPORTED
+#  define PNG_zTXt_SUPPORTED
+#endif
+#ifndef PNG_NO_READ_UNKNOWN_CHUNKS
+#  define PNG_READ_UNKNOWN_CHUNKS_SUPPORTED
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_NO_HANDLE_AS_UNKNOWN
+#    define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#  endif
+#endif
+#if !defined(PNG_NO_READ_USER_CHUNKS) && \
+     defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+#  define PNG_READ_USER_CHUNKS_SUPPORTED
+#  define PNG_USER_CHUNKS_SUPPORTED
+#  ifdef PNG_NO_READ_UNKNOWN_CHUNKS
+#    undef PNG_NO_READ_UNKNOWN_CHUNKS
+#  endif
+#  ifdef PNG_NO_HANDLE_AS_UNKNOWN
+#    undef PNG_NO_HANDLE_AS_UNKNOWN
+#  endif
+#endif
+#ifndef PNG_NO_READ_OPT_PLTE
+#  define PNG_READ_OPT_PLTE_SUPPORTED /* only affects support of the */
+#endif                      /* optional PLTE chunk in RGB and RGBA images */
+#if defined(PNG_READ_iTXt_SUPPORTED) || defined(PNG_READ_tEXt_SUPPORTED) || \
+    defined(PNG_READ_zTXt_SUPPORTED)
+#  define PNG_READ_TEXT_SUPPORTED
+#  define PNG_TEXT_SUPPORTED
+#endif
+
+#endif /* PNG_READ_ANCILLARY_CHUNKS_SUPPORTED */
+
+#ifdef PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED
+
+#ifdef PNG_NO_WRITE_TEXT
+#  define PNG_NO_WRITE_iTXt
+#  define PNG_NO_WRITE_tEXt
+#  define PNG_NO_WRITE_zTXt
+#endif
+#ifndef PNG_NO_WRITE_bKGD
+#  define PNG_WRITE_bKGD_SUPPORTED
+#  ifndef PNG_bKGD_SUPPORTED
+#    define PNG_bKGD_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_cHRM
+#  define PNG_WRITE_cHRM_SUPPORTED
+#  ifndef PNG_cHRM_SUPPORTED
+#    define PNG_cHRM_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_gAMA
+#  define PNG_WRITE_gAMA_SUPPORTED
+#  ifndef PNG_gAMA_SUPPORTED
+#    define PNG_gAMA_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_hIST
+#  define PNG_WRITE_hIST_SUPPORTED
+#  ifndef PNG_hIST_SUPPORTED
+#    define PNG_hIST_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_iCCP
+#  define PNG_WRITE_iCCP_SUPPORTED
+#  ifndef PNG_iCCP_SUPPORTED
+#    define PNG_iCCP_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_iTXt
+#  ifndef PNG_WRITE_iTXt_SUPPORTED
+#    define PNG_WRITE_iTXt_SUPPORTED
+#  endif
+#  ifndef PNG_iTXt_SUPPORTED
+#    define PNG_iTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_oFFs
+#  define PNG_WRITE_oFFs_SUPPORTED
+#  ifndef PNG_oFFs_SUPPORTED
+#    define PNG_oFFs_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_pCAL
+#  define PNG_WRITE_pCAL_SUPPORTED
+#  ifndef PNG_pCAL_SUPPORTED
+#    define PNG_pCAL_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sCAL
+#  define PNG_WRITE_sCAL_SUPPORTED
+#  ifndef PNG_sCAL_SUPPORTED
+#    define PNG_sCAL_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_pHYs
+#  define PNG_WRITE_pHYs_SUPPORTED
+#  ifndef PNG_pHYs_SUPPORTED
+#    define PNG_pHYs_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sBIT
+#  define PNG_WRITE_sBIT_SUPPORTED
+#  ifndef PNG_sBIT_SUPPORTED
+#    define PNG_sBIT_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sPLT
+#  define PNG_WRITE_sPLT_SUPPORTED
+#  ifndef PNG_sPLT_SUPPORTED
+#    define PNG_sPLT_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_sRGB
+#  define PNG_WRITE_sRGB_SUPPORTED
+#  ifndef PNG_sRGB_SUPPORTED
+#    define PNG_sRGB_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tEXt
+#  define PNG_WRITE_tEXt_SUPPORTED
+#  ifndef PNG_tEXt_SUPPORTED
+#    define PNG_tEXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tIME
+#  define PNG_WRITE_tIME_SUPPORTED
+#  ifndef PNG_tIME_SUPPORTED
+#    define PNG_tIME_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_tRNS
+#  define PNG_WRITE_tRNS_SUPPORTED
+#  ifndef PNG_tRNS_SUPPORTED
+#    define PNG_tRNS_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_zTXt
+#  define PNG_WRITE_zTXt_SUPPORTED
+#  ifndef PNG_zTXt_SUPPORTED
+#    define PNG_zTXt_SUPPORTED
+#  endif
+#endif
+#ifndef PNG_NO_WRITE_UNKNOWN_CHUNKS
+#  define PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED
+#  ifndef PNG_UNKNOWN_CHUNKS_SUPPORTED
+#    define PNG_UNKNOWN_CHUNKS_SUPPORTED
+#  endif
+#  ifndef PNG_NO_HANDLE_AS_UNKNOWN
+#     ifndef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#       define PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+#     endif
+#  endif
+#endif
+#if defined(PNG_WRITE_iTXt_SUPPORTED) || defined(PNG_WRITE_tEXt_SUPPORTED) || \
+    defined(PNG_WRITE_zTXt_SUPPORTED)
+#  define PNG_WRITE_TEXT_SUPPORTED
+#  ifndef PNG_TEXT_SUPPORTED
+#    define PNG_TEXT_SUPPORTED
+#  endif
+#endif
+
+#endif /* PNG_WRITE_ANCILLARY_CHUNKS_SUPPORTED */
+
+/* Turn this off to disable png_read_png() and
+ * png_write_png() and leave the row_pointers member
+ * out of the info structure.
+ */
+#ifndef PNG_NO_INFO_IMAGE
+#  define PNG_INFO_IMAGE_SUPPORTED
+#endif
+
+/* need the time information for reading tIME chunks */
+#if defined(PNG_tIME_SUPPORTED)
+#  if !defined(_WIN32_WCE)
+     /* "time.h" functions are not supported on WindowsCE */
+#    include <time.h>
+#  endif
+#endif
+
+/* Some typedefs to get us started.  These should be safe on most of the
+ * common platforms.  The typedefs should be at least as large as the
+ * numbers suggest (a png_uint_32 must be at least 32 bits long), but they
+ * don't have to be exactly that size.  Some compilers dislike passing
+ * unsigned shorts as function parameters, so you may be better off using
+ * unsigned int for png_uint_16.  Likewise, for 64-bit systems, you may
+ * want to have unsigned int for png_uint_32 instead of unsigned long.
+ */
+
+typedef unsigned int png_uint_32;   /* IMLIB - changed long to int */
+typedef int png_int_32;             /* IMLIB - changed long to int */
+typedef unsigned short png_uint_16;
+typedef short png_int_16;
+typedef unsigned char png_byte;
+
+/* This is usually size_t.  It is typedef'ed just in case you need it to
+   change (I'm not sure if you will or not, so I thought I'd be safe) */
+#ifdef PNG_SIZE_T
+   typedef PNG_SIZE_T png_size_t;
+#  define png_sizeof(x) png_convert_size(sizeof (x))
+#else
+   typedef size_t png_size_t;
+#  define png_sizeof(x) sizeof (x)
+#endif
+
+/* The following is needed for medium model support.  It cannot be in the
+ * PNG_INTERNAL section.  Needs modification for other compilers besides
+ * MSC.  Model independent support declares all arrays and pointers to be
+ * large using the far keyword.  The zlib version used must also support
+ * model independent data.  As of version zlib 1.0.4, the necessary changes
+ * have been made in zlib.  The USE_FAR_KEYWORD define triggers other
+ * changes that are needed. (Tim Wegner)
+ */
+
+/* Separate compiler dependencies (problem here is that zlib.h always
+   defines FAR. (SJT) */
+#ifdef __BORLANDC__
+#  if defined(__LARGE__) || defined(__HUGE__) || defined(__COMPACT__)
+#    define LDATA 1
+#  else
+#    define LDATA 0
+#  endif
+   /* GRR:  why is Cygwin in here?  Cygwin is not Borland C... */
+#  if !defined(__WIN32__) && !defined(__FLAT__) && !defined(__CYGWIN__)
+#    define PNG_MAX_MALLOC_64K
+#    if (LDATA != 1)
+#      ifndef FAR
+#        define FAR __far
+#      endif
+#      define USE_FAR_KEYWORD
+#    endif   /* LDATA != 1 */
+     /* Possibly useful for moving data out of default segment.
+      * Uncomment it if you want. Could also define FARDATA as
+      * const if your compiler supports it. (SJT)
+#    define FARDATA FAR
+      */
+#  endif  /* __WIN32__, __FLAT__, __CYGWIN__ */
+#endif   /* __BORLANDC__ */
+
+
+/* Suggest testing for specific compiler first before testing for
+ * FAR.  The Watcom compiler defines both __MEDIUM__ and M_I86MM,
+ * making reliance oncertain keywords suspect. (SJT)
+ */
+
+/* MSC Medium model */
+#if defined(FAR)
+#  if defined(M_I86MM)
+#    define USE_FAR_KEYWORD
+#    define FARDATA FAR
+#    include <dos.h>
+#  endif
+#endif
+
+/* SJT: default case */
+#ifndef FAR
+#  define FAR
+#endif
+
+/* At this point FAR is always defined */
+#ifndef FARDATA
+#  define FARDATA
+#endif
+
+/* Typedef for floating-point numbers that are converted
+   to fixed-point with a multiple of 100,000, e.g., int_gamma */
+typedef png_int_32 png_fixed_point;
+
+/* Add typedefs for pointers */
+typedef void            FAR * png_voidp;
+typedef png_byte        FAR * png_bytep;
+typedef png_uint_32     FAR * png_uint_32p;
+typedef png_int_32      FAR * png_int_32p;
+typedef png_uint_16     FAR * png_uint_16p;
+typedef png_int_16      FAR * png_int_16p;
+typedef PNG_CONST char  FAR * png_const_charp;
+typedef char            FAR * png_charp;
+typedef png_fixed_point FAR * png_fixed_point_p;
+
+#ifndef PNG_NO_STDIO
+#if defined(_WIN32_WCE)
+typedef HANDLE                png_FILE_p;
+#else
+typedef FILE                * png_FILE_p;
+#endif
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          FAR * png_doublep;
+#endif
+
+/* Pointers to pointers; i.e. arrays */
+typedef png_byte        FAR * FAR * png_bytepp;
+typedef png_uint_32     FAR * FAR * png_uint_32pp;
+typedef png_int_32      FAR * FAR * png_int_32pp;
+typedef png_uint_16     FAR * FAR * png_uint_16pp;
+typedef png_int_16      FAR * FAR * png_int_16pp;
+typedef PNG_CONST char  FAR * FAR * png_const_charpp;
+typedef char            FAR * FAR * png_charpp;
+typedef png_fixed_point FAR * FAR * png_fixed_point_pp;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+typedef double          FAR * FAR * png_doublepp;
+#endif
+
+/* Pointers to pointers to pointers; i.e., pointer to array */
+typedef char            FAR * FAR * FAR * png_charppp;
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* SPC -  Is this stuff deprecated? */
+/* It'll be removed as of libpng-1.3.0 - GR-P */
+/* libpng typedefs for types in zlib. If zlib changes
+ * or another compression library is used, then change these.
+ * Eliminates need to change all the source files.
+ */
+typedef charf *         png_zcharp;
+typedef charf * FAR *   png_zcharpp;
+typedef z_stream FAR *  png_zstreamp;
+#endif /* (PNG_1_0_X) || defined(PNG_1_2_X) */
+
+/*
+ * Define PNG_BUILD_DLL if the module being built is a Windows
+ * LIBPNG DLL.
+ *
+ * Define PNG_USE_DLL if you want to *link* to the Windows LIBPNG DLL.
+ * It is equivalent to Microsoft predefined macro _DLL that is
+ * automatically defined when you compile using the share
+ * version of the CRT (C Run-Time library)
+ *
+ * The cygwin mods make this behavior a little different:
+ * Define PNG_BUILD_DLL if you are building a dll for use with cygwin
+ * Define PNG_STATIC if you are building a static library for use with cygwin,
+ *   -or- if you are building an application that you want to link to the
+ *   static library.
+ * PNG_USE_DLL is defined by default (no user action needed) unless one of
+ *   the other flags is defined.
+ */
+
+#if !defined(PNG_DLL) && (defined(PNG_BUILD_DLL) || defined(PNG_USE_DLL))
+#  define PNG_DLL
+#endif
+/* If CYGWIN, then disallow GLOBAL ARRAYS unless building a static lib.
+ * When building a static lib, default to no GLOBAL ARRAYS, but allow
+ * command-line override
+ */
+#if defined(__CYGWIN__)
+#  if !defined(PNG_STATIC)
+#    if defined(PNG_USE_GLOBAL_ARRAYS)
+#      undef PNG_USE_GLOBAL_ARRAYS
+#    endif
+#    if !defined(PNG_USE_LOCAL_ARRAYS)
+#      define PNG_USE_LOCAL_ARRAYS
+#    endif
+#  else
+#    if defined(PNG_USE_LOCAL_ARRAYS) || defined(PNG_NO_GLOBAL_ARRAYS)
+#      if defined(PNG_USE_GLOBAL_ARRAYS)
+#        undef PNG_USE_GLOBAL_ARRAYS
+#      endif
+#    endif
+#  endif
+#  if !defined(PNG_USE_LOCAL_ARRAYS) && !defined(PNG_USE_GLOBAL_ARRAYS)
+#    define PNG_USE_LOCAL_ARRAYS
+#  endif
+#endif
+
+/* Do not use global arrays (helps with building DLL's)
+ * They are no longer used in libpng itself, since version 1.0.5c,
+ * but might be required for some pre-1.0.5c applications.
+ */
+#if !defined(PNG_USE_LOCAL_ARRAYS) && !defined(PNG_USE_GLOBAL_ARRAYS)
+#  if defined(PNG_NO_GLOBAL_ARRAYS) || \
+      (defined(__GNUC__) && defined(PNG_DLL)) || defined(_MSC_VER)
+#    define PNG_USE_LOCAL_ARRAYS
+#  else
+#    define PNG_USE_GLOBAL_ARRAYS
+#  endif
+#endif
+
+#if defined(__CYGWIN__)
+#  undef PNGAPI
+#  define PNGAPI __cdecl
+#  undef PNG_IMPEXP
+#  define PNG_IMPEXP
+#endif  
+
+/* If you define PNGAPI, e.g., with compiler option "-DPNGAPI=__stdcall",
+ * you may get warnings regarding the linkage of png_zalloc and png_zfree.
+ * Don't ignore those warnings; you must also reset the default calling
+ * convention in your compiler to match your PNGAPI, and you must build
+ * zlib and your applications the same way you build libpng.
+ */
+
+#if defined(__MINGW32__) && !defined(PNG_MODULEDEF)
+#  ifndef PNG_NO_MODULEDEF
+#    define PNG_NO_MODULEDEF
+#  endif
+#endif
+
+#if !defined(PNG_IMPEXP) && defined(PNG_BUILD_DLL) && !defined(PNG_NO_MODULEDEF)
+#  define PNG_IMPEXP
+#endif
+
+#if defined(PNG_DLL) || defined(_DLL) || defined(__DLL__ ) || \
+    (( defined(_Windows) || defined(_WINDOWS) || \
+       defined(WIN32) || defined(_WIN32) || defined(__WIN32__) ))
+
+#  ifndef PNGAPI
+#     if defined(__GNUC__) || (defined (_MSC_VER) && (_MSC_VER >= 800))
+#        define PNGAPI __cdecl
+#     else
+#        define PNGAPI _cdecl
+#     endif
+#  endif
+
+#  if !defined(PNG_IMPEXP) && (!defined(PNG_DLL) || \
+       0 /* WINCOMPILER_WITH_NO_SUPPORT_FOR_DECLIMPEXP */)
+#     define PNG_IMPEXP
+#  endif
+
+#  if !defined(PNG_IMPEXP)
+
+#     define PNG_EXPORT_TYPE1(type,symbol)  PNG_IMPEXP type PNGAPI symbol
+#     define PNG_EXPORT_TYPE2(type,symbol)  type PNG_IMPEXP PNGAPI symbol
+
+      /* Borland/Microsoft */
+#     if defined(_MSC_VER) || defined(__BORLANDC__)
+#        if (_MSC_VER >= 800) || (__BORLANDC__ >= 0x500)
+#           define PNG_EXPORT PNG_EXPORT_TYPE1
+#        else
+#           define PNG_EXPORT PNG_EXPORT_TYPE2
+#           if defined(PNG_BUILD_DLL)
+#              define PNG_IMPEXP __export
+#           else
+#              define PNG_IMPEXP /*__import */ /* doesn't exist AFAIK in
+                                                 VC++ */
+#           endif                             /* Exists in Borland C++ for
+                                                 C++ classes (== huge) */
+#        endif
+#     endif
+
+#     if !defined(PNG_IMPEXP)
+#        if defined(PNG_BUILD_DLL)
+#           define PNG_IMPEXP __declspec(dllexport)
+#        else
+#           define PNG_IMPEXP __declspec(dllimport)
+#        endif
+#     endif
+#  endif  /* PNG_IMPEXP */
+#else /* !(DLL || non-cygwin WINDOWS) */
+#   if (defined(__IBMC__) || defined(__IBMCPP__)) && defined(__OS2__)
+#      ifndef PNGAPI
+#         define PNGAPI _System
+#      endif
+#   else
+#      if 0 /* ... other platforms, with other meanings */
+#      endif
+#   endif
+#endif
+
+#ifndef PNGAPI
+#  define PNGAPI
+#endif
+#ifndef PNG_IMPEXP
+#  define PNG_IMPEXP
+#endif
+
+#ifdef PNG_BUILDSYMS
+#  ifndef PNG_EXPORT
+#    define PNG_EXPORT(type,symbol) PNG_FUNCTION_EXPORT symbol END
+#  endif
+#  ifdef PNG_USE_GLOBAL_ARRAYS
+#    ifndef PNG_EXPORT_VAR
+#      define PNG_EXPORT_VAR(type) PNG_DATA_EXPORT
+#    endif
+#  endif
+#endif
+
+#ifndef PNG_EXPORT
+#  define PNG_EXPORT(type,symbol) PNG_IMPEXP type PNGAPI symbol
+#endif
+
+#ifdef PNG_USE_GLOBAL_ARRAYS
+#  ifndef PNG_EXPORT_VAR
+#    define PNG_EXPORT_VAR(type) extern PNG_IMPEXP type
+#  endif
+#endif
+
+/* User may want to use these so they are not in PNG_INTERNAL. Any library
+ * functions that are passed far data must be model independent.
+ */
+
+#ifndef PNG_ABORT
+#  define PNG_ABORT() abort()
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+#  define png_jmpbuf(png_ptr) ((png_ptr)->jmpbuf)
+#else
+#  define png_jmpbuf(png_ptr) \
+   (LIBPNG_WAS_COMPILED_WITH__PNG_SETJMP_NOT_SUPPORTED)
+#endif
+
+#if defined(USE_FAR_KEYWORD)  /* memory model independent fns */
+/* use this to make far-to-near assignments */
+#  define CHECK   1
+#  define NOCHECK 0
+#  define CVT_PTR(ptr) (png_far_to_near(png_ptr,ptr,CHECK))
+#  define CVT_PTR_NOCHECK(ptr) (png_far_to_near(png_ptr,ptr,NOCHECK))
+#  define png_snprintf _fsnprintf   /* Added to v 1.2.19 */
+#  define png_strcpy  _fstrcpy
+#  define png_strncpy _fstrncpy   /* Added to v 1.2.6 */
+#  define png_strlen  _fstrlen
+#  define png_memcmp  _fmemcmp    /* SJT: added */
+#  define png_memcpy  _fmemcpy
+#  define png_memset  _fmemset
+#else /* use the usual functions */
+#  define CVT_PTR(ptr)         (ptr)
+#  define CVT_PTR_NOCHECK(ptr) (ptr)
+#  ifndef PNG_NO_SNPRINTF
+#    ifdef _MSC_VER
+#      define png_snprintf _snprintf   /* Added to v 1.2.19 */
+#      define png_snprintf2 _snprintf
+#      define png_snprintf6 _snprintf
+#    else
+#      define png_snprintf snprintf   /* Added to v 1.2.19 */
+#      define png_snprintf2 snprintf
+#      define png_snprintf6 snprintf
+#    endif
+#  else
+     /* You don't have or don't want to use snprintf().  Caution: Using
+      * sprintf instead of snprintf exposes your application to accidental
+      * or malevolent buffer overflows.  If you don't have snprintf()
+      * as a general rule you should provide one (you can get one from
+      * Portable OpenSSH). */
+#    define png_snprintf(s1,n,fmt,x1) sprintf(s1,fmt,x1)
+#    define png_snprintf2(s1,n,fmt,x1,x2) sprintf(s1,fmt,x1,x2)
+#    define png_snprintf6(s1,n,fmt,x1,x2,x3,x4,x5,x6) \
+        sprintf(s1,fmt,x1,x2,x3,x4,x5,x6)
+#  endif
+#  define png_strcpy  strcpy
+#  define png_strncpy strncpy     /* Added to v 1.2.6 */
+#  define png_strlen  strlen
+#  define png_memcmp  memcmp      /* SJT: added */
+#  define png_memcpy  memcpy
+#  define png_memset  memset
+#endif
+/* End of memory model independent support */
+
+/* Just a little check that someone hasn't tried to define something
+ * contradictory.
+ */
+#if (PNG_ZBUF_SIZE > 65536L) && defined(PNG_MAX_MALLOC_64K)
+#  undef PNG_ZBUF_SIZE
+#  define PNG_ZBUF_SIZE 65536L
+#endif
+
+/* Added at libpng-1.2.8 */
+#endif /* PNG_VERSION_INFO_ONLY */
+
+#endif /* PNGCONF_H */
diff --git a/src/libpng/pngerror.c b/src/libpng/pngerror.c
new file mode 100644
index 0000000..bdafdc2
--- /dev/null
+++ b/src/libpng/pngerror.c
@@ -0,0 +1,341 @@
+
+/* pngerror.c - stub functions for i/o and memory allocation
+ *
+ * Last changed in libpng 1.2.22 [October 13, 2007]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all error handling.  Users who
+ * need special error handling are expected to write replacement functions
+ * and use png_set_error_fn() to use those functions.  See the instructions
+ * at each function.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+static void /* PRIVATE */
+png_default_error PNGARG((png_structp png_ptr,
+  png_const_charp error_message));
+#ifndef PNG_NO_WARNINGS
+static void /* PRIVATE */
+png_default_warning PNGARG((png_structp png_ptr,
+  png_const_charp warning_message));
+#endif /* PNG_NO_WARNINGS */
+
+/* This function is called whenever there is a fatal error.  This function
+ * should not be changed.  If there is a need to handle errors differently,
+ * you should supply a replacement error function and use png_set_error_fn()
+ * to replace the error function at run-time.
+ */
+#ifndef PNG_NO_ERROR_TEXT
+void PNGAPI
+png_error(png_structp png_ptr, png_const_charp error_message)
+{
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   char msg[16];
+   if (png_ptr != NULL)
+   {
+     if (png_ptr->flags&
+       (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))
+     {
+       if (*error_message == '#')
+       {
+           int offset;
+           for (offset=1; offset<15; offset++)
+              if (*(error_message+offset) == ' ')
+                  break;
+           if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT)
+           {
+              int i;
+              for (i=0; i<offset-1; i++)
+                 msg[i]=error_message[i+1];
+              msg[i]='\0';
+              error_message=msg;
+           }
+           else
+              error_message+=offset;
+       }
+       else
+       {
+           if (png_ptr->flags&PNG_FLAG_STRIP_ERROR_TEXT)
+           {
+              msg[0]='0';
+              msg[1]='\0';
+              error_message=msg;
+           }
+       }
+     }
+   }
+#endif
+   if (png_ptr != NULL && png_ptr->error_fn != NULL)
+      (*(png_ptr->error_fn))(png_ptr, error_message);
+
+   /* If the custom handler doesn't exist, or if it returns,
+      use the default handler, which will not return. */
+   png_default_error(png_ptr, error_message);
+}
+#else
+void PNGAPI
+png_err(png_structp png_ptr)
+{
+   if (png_ptr != NULL && png_ptr->error_fn != NULL)
+      (*(png_ptr->error_fn))(png_ptr, '\0');
+
+   /* If the custom handler doesn't exist, or if it returns,
+      use the default handler, which will not return. */
+   png_default_error(png_ptr, '\0');
+}
+#endif /* PNG_NO_ERROR_TEXT */
+
+#ifndef PNG_NO_WARNINGS
+/* This function is called whenever there is a non-fatal error.  This function
+ * should not be changed.  If there is a need to handle warnings differently,
+ * you should supply a replacement warning function and use
+ * png_set_error_fn() to replace the warning function at run-time.
+ */
+void PNGAPI
+png_warning(png_structp png_ptr, png_const_charp warning_message)
+{
+   int offset = 0;
+   if (png_ptr != NULL)
+   {
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   if (png_ptr->flags&
+     (PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))
+#endif
+     {
+       if (*warning_message == '#')
+       {
+           for (offset=1; offset<15; offset++)
+              if (*(warning_message+offset) == ' ')
+                  break;
+       }
+     }
+     if (png_ptr != NULL && png_ptr->warning_fn != NULL)
+        (*(png_ptr->warning_fn))(png_ptr, warning_message+offset);
+   }
+   else
+      png_default_warning(png_ptr, warning_message+offset);
+}
+#endif /* PNG_NO_WARNINGS */
+
+
+/* These utilities are used internally to build an error message that relates
+ * to the current chunk.  The chunk name comes from png_ptr->chunk_name,
+ * this is used to prefix the message.  The message is limited in length
+ * to 63 bytes, the name characters are output as hex digits wrapped in []
+ * if the character is invalid.
+ */
+#define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
+static PNG_CONST char png_digit[16] = {
+   '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+   'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+#if !defined(PNG_NO_WARNINGS) || !defined(PNG_NO_ERROR_TEXT)
+static void /* PRIVATE */
+png_format_buffer(png_structp png_ptr, png_charp buffer, png_const_charp
+   error_message)
+{
+   int iout = 0, iin = 0;
+
+   while (iin < 4)
+   {
+      int c = png_ptr->chunk_name[iin++];
+      if (isnonalpha(c))
+      {
+         buffer[iout++] = '[';
+         buffer[iout++] = png_digit[(c & 0xf0) >> 4];
+         buffer[iout++] = png_digit[c & 0x0f];
+         buffer[iout++] = ']';
+      }
+      else
+      {
+         buffer[iout++] = (png_byte)c;
+      }
+   }
+
+   if (error_message == NULL)
+      buffer[iout] = '\0';
+   else
+   {
+      buffer[iout++] = ':';
+      buffer[iout++] = ' ';
+      png_strncpy(buffer+iout, error_message, 63);
+      buffer[iout+63] = '\0';
+   }
+}
+
+#ifdef PNG_READ_SUPPORTED
+void PNGAPI
+png_chunk_error(png_structp png_ptr, png_const_charp error_message)
+{
+   char msg[18+64];
+   if (png_ptr == NULL)
+     png_error(png_ptr, error_message);
+   else
+   {
+     png_format_buffer(png_ptr, msg, error_message);
+     png_error(png_ptr, msg);
+   }
+}
+#endif /* PNG_READ_SUPPORTED */
+#endif /* !defined(PNG_NO_WARNINGS) || !defined(PNG_NO_ERROR_TEXT) */
+
+#ifndef PNG_NO_WARNINGS
+void PNGAPI
+png_chunk_warning(png_structp png_ptr, png_const_charp warning_message)
+{
+   char msg[18+64];
+   if (png_ptr == NULL)
+     png_warning(png_ptr, warning_message);
+   else
+   {
+     png_format_buffer(png_ptr, msg, warning_message);
+     png_warning(png_ptr, msg);
+   }
+}
+#endif /* PNG_NO_WARNINGS */
+
+
+/* This is the default error handling function.  Note that replacements for
+ * this function MUST NOT RETURN, or the program will likely crash.  This
+ * function is used by default, or if the program supplies NULL for the
+ * error function pointer in png_set_error_fn().
+ */
+static void /* PRIVATE */
+png_default_error(png_structp png_ptr, png_const_charp error_message)
+{
+#ifndef PNG_NO_CONSOLE_IO
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   if (*error_message == '#')
+   {
+     int offset;
+     char error_number[16];
+     for (offset=0; offset<15; offset++)
+     {
+         error_number[offset] = *(error_message+offset+1);
+         if (*(error_message+offset) == ' ')
+             break;
+     }
+     if((offset > 1) && (offset < 15))
+     {
+       error_number[offset-1]='\0';
+       fprintf(stderr, "libpng error no. %s: %s\n", error_number,
+          error_message+offset);
+     }
+     else
+       fprintf(stderr, "libpng error: %s, offset=%d\n", error_message,offset);
+   }
+   else
+#endif
+   fprintf(stderr, "libpng error: %s\n", error_message);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   if (png_ptr)
+   {
+#  ifdef USE_FAR_KEYWORD
+   {
+      jmp_buf jmpbuf;
+      png_memcpy(jmpbuf, png_ptr->jmpbuf, png_sizeof(jmp_buf));
+      longjmp(jmpbuf, 1);
+   }
+#  else
+   longjmp(png_ptr->jmpbuf, 1);
+#  endif
+   }
+#else
+   PNG_ABORT();
+#endif
+#ifdef PNG_NO_CONSOLE_IO
+   error_message = error_message; /* make compiler happy */
+#endif
+}
+
+#ifndef PNG_NO_WARNINGS
+/* This function is called when there is a warning, but the library thinks
+ * it can continue anyway.  Replacement functions don't have to do anything
+ * here if you don't want them to.  In the default configuration, png_ptr is
+ * not used, but it is passed in case it may be useful.
+ */
+static void /* PRIVATE */
+png_default_warning(png_structp png_ptr, png_const_charp warning_message)
+{
+#ifndef PNG_NO_CONSOLE_IO
+#  ifdef PNG_ERROR_NUMBERS_SUPPORTED
+   if (*warning_message == '#')
+   {
+     int offset;
+     char warning_number[16];
+     for (offset=0; offset<15; offset++)
+     {
+        warning_number[offset]=*(warning_message+offset+1);
+        if (*(warning_message+offset) == ' ')
+            break;
+     }
+     if((offset > 1) && (offset < 15))
+     {
+       warning_number[offset-1]='\0';
+       fprintf(stderr, "libpng warning no. %s: %s\n", warning_number,
+          warning_message+offset);
+     }
+     else
+       fprintf(stderr, "libpng warning: %s\n", warning_message);
+   }
+   else
+#  endif
+     fprintf(stderr, "libpng warning: %s\n", warning_message);
+#else
+   warning_message = warning_message; /* make compiler happy */
+#endif
+   png_ptr = png_ptr; /* make compiler happy */
+}
+#endif /* PNG_NO_WARNINGS */
+
+/* This function is called when the application wants to use another method
+ * of handling errors and warnings.  Note that the error function MUST NOT
+ * return to the calling routine or serious problems will occur.  The return
+ * method used in the default routine calls longjmp(png_ptr->jmpbuf, 1)
+ */
+void PNGAPI
+png_set_error_fn(png_structp png_ptr, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warning_fn)
+{
+   if (png_ptr == NULL)
+      return;
+   png_ptr->error_ptr = error_ptr;
+   png_ptr->error_fn = error_fn;
+   png_ptr->warning_fn = warning_fn;
+}
+
+
+/* This function returns a pointer to the error_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy and png_read_destroy are called.
+ */
+png_voidp PNGAPI
+png_get_error_ptr(png_structp png_ptr)
+{
+   if (png_ptr == NULL)
+      return NULL;
+   return ((png_voidp)png_ptr->error_ptr);
+}
+
+
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+void PNGAPI
+png_set_strip_error_numbers(png_structp png_ptr, png_uint_32 strip_mode)
+{
+   if(png_ptr != NULL)
+   {
+     png_ptr->flags &=
+       ((~(PNG_FLAG_STRIP_ERROR_NUMBERS|PNG_FLAG_STRIP_ERROR_TEXT))&strip_mode);
+   }
+}
+#endif
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pnggccrd.c b/src/libpng/pnggccrd.c
new file mode 100644
index 0000000..a7248d6
--- /dev/null
+++ b/src/libpng/pnggccrd.c
@@ -0,0 +1,101 @@
+/* pnggccrd.c was removed from libpng-1.2.20. */
+
+/* This code snippet is for use by configure's compilation test. */
+
+#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && \
+    defined(PNG_MMX_CODE_SUPPORTED)
+int PNGAPI png_dummy_mmx_support(void);
+
+static int _mmx_supported = 2; // 0: no MMX; 1: MMX supported; 2: not tested
+
+int PNGAPI
+png_dummy_mmx_support(void) __attribute__((noinline));
+
+int PNGAPI
+png_dummy_mmx_support(void)
+{
+   int result;
+#if defined(PNG_MMX_CODE_SUPPORTED)  // superfluous, but what the heck
+    __asm__ __volatile__ (
+#if defined(__x86_64__)
+        "pushq %%rbx          \n\t"  // rbx gets clobbered by CPUID instruction
+        "pushq %%rcx          \n\t"  // so does rcx...
+        "pushq %%rdx          \n\t"  // ...and rdx (but rcx & rdx safe on Linux)
+        "pushfq               \n\t"  // save Eflag to stack
+        "popq %%rax           \n\t"  // get Eflag from stack into rax
+        "movq %%rax, %%rcx    \n\t"  // make another copy of Eflag in rcx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushq %%rax          \n\t"  // save modified Eflag back to stack
+        "popfq                \n\t"  // restore modified value to Eflag reg
+        "pushfq               \n\t"  // save Eflag to stack
+        "popq %%rax           \n\t"  // get Eflag from stack
+        "pushq %%rcx          \n\t"  // save original Eflag to stack
+        "popfq                \n\t"  // restore original Eflag
+#else
+        "pushl %%ebx          \n\t"  // ebx gets clobbered by CPUID instruction
+        "pushl %%ecx          \n\t"  // so does ecx...
+        "pushl %%edx          \n\t"  // ...and edx (but ecx & edx safe on Linux)
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack into eax
+        "movl %%eax, %%ecx    \n\t"  // make another copy of Eflag in ecx
+        "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+        "pushl %%eax          \n\t"  // save modified Eflag back to stack
+        "popfl                \n\t"  // restore modified value to Eflag reg
+        "pushfl               \n\t"  // save Eflag to stack
+        "popl %%eax           \n\t"  // get Eflag from stack
+        "pushl %%ecx          \n\t"  // save original Eflag to stack
+        "popfl                \n\t"  // restore original Eflag
+#endif
+        "xorl %%ecx, %%eax    \n\t"  // compare new Eflag with original Eflag
+        "jz 0f                \n\t"  // if same, CPUID instr. is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero
+//      ".byte  0x0f, 0xa2    \n\t"  // CPUID instruction (two-byte opcode)
+        "cpuid                \n\t"  // get the CPU identification info
+        "cmpl $1, %%eax       \n\t"  // make sure eax return non-zero value
+        "jl 0f                \n\t"  // if eax is zero, MMX is not supported
+
+        "xorl %%eax, %%eax    \n\t"  // set eax to zero and...
+        "incl %%eax           \n\t"  // ...increment eax to 1.  This pair is
+                                     // faster than the instruction "mov eax, 1"
+        "cpuid                \n\t"  // get the CPU identification info again
+        "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
+        "cmpl $0, %%edx       \n\t"  // 0 = MMX not supported
+        "jz 0f                \n\t"  // non-zero = yes, MMX IS supported
+
+        "movl $1, %%eax       \n\t"  // set return value to 1
+        "jmp  1f              \n\t"  // DONE:  have MMX support
+
+    "0:                       \n\t"  // .NOT_SUPPORTED: target label for jump instructions
+        "movl $0, %%eax       \n\t"  // set return value to 0
+    "1:                       \n\t"  // .RETURN: target label for jump instructions
+#if defined(__x86_64__)
+        "popq %%rdx           \n\t"  // restore rdx
+        "popq %%rcx           \n\t"  // restore rcx
+        "popq %%rbx           \n\t"  // restore rbx
+#else
+        "popl %%edx           \n\t"  // restore edx
+        "popl %%ecx           \n\t"  // restore ecx
+        "popl %%ebx           \n\t"  // restore ebx
+#endif
+
+//      "ret                  \n\t"  // DONE:  no MMX support
+                                     // (fall through to standard C "ret")
+
+        : "=a" (result)              // output list
+
+        :                            // any variables used on input (none)
+
+                                     // no clobber list
+//      , "%ebx", "%ecx", "%edx"     // GRR:  we handle these manually
+//      , "memory"   // if write to a variable gcc thought was in a reg
+//      , "cc"       // "condition codes" (flag bits)
+    );
+    _mmx_supported = result;
+#else
+    _mmx_supported = 0;
+#endif /* PNG_MMX_CODE_SUPPORTED */
+
+    return _mmx_supported;
+}
+#endif
diff --git a/src/libpng/pngget.c b/src/libpng/pngget.c
new file mode 100644
index 0000000..a0e90bb
--- /dev/null
+++ b/src/libpng/pngget.c
@@ -0,0 +1,901 @@
+
+/* pngget.c - retrieval of values from info struct
+ *
+ * Last changed in libpng 1.2.15 January 5, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+png_uint_32 PNGAPI
+png_get_valid(png_structp png_ptr, png_infop info_ptr, png_uint_32 flag)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->valid & flag);
+   else
+      return(0);
+}
+
+png_uint_32 PNGAPI
+png_get_rowbytes(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->rowbytes);
+   else
+      return(0);
+}
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+png_bytepp PNGAPI
+png_get_rows(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->row_pointers);
+   else
+      return(0);
+}
+#endif
+
+#ifdef PNG_EASY_ACCESS_SUPPORTED
+/* easy access to info, added in libpng-0.99 */
+png_uint_32 PNGAPI
+png_get_image_width(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->width;
+   }
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_image_height(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->height;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_bit_depth(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->bit_depth;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_color_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->color_type;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_filter_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->filter_type;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_interlace_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->interlace_type;
+   }
+   return (0);
+}
+
+png_byte PNGAPI
+png_get_compression_type(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+   {
+      return info_ptr->compression_type;
+   }
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_x_pixels_per_meter(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_x_pixels_per_meter");
+      if(info_ptr->phys_unit_type != PNG_RESOLUTION_METER)
+          return (0);
+      else return (info_ptr->x_pixels_per_unit);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_y_pixels_per_meter(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_y_pixels_per_meter");
+      if(info_ptr->phys_unit_type != PNG_RESOLUTION_METER)
+          return (0);
+      else return (info_ptr->y_pixels_per_unit);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_uint_32 PNGAPI
+png_get_pixels_per_meter(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_pixels_per_meter");
+      if(info_ptr->phys_unit_type != PNG_RESOLUTION_METER ||
+         info_ptr->x_pixels_per_unit != info_ptr->y_pixels_per_unit)
+          return (0);
+      else return (info_ptr->x_pixels_per_unit);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+float PNGAPI
+png_get_pixel_aspect_ratio(png_structp png_ptr, png_infop info_ptr)
+   {
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_aspect_ratio");
+      if (info_ptr->x_pixels_per_unit == 0)
+         return ((float)0.0);
+      else
+         return ((float)((float)info_ptr->y_pixels_per_unit
+            /(float)info_ptr->x_pixels_per_unit));
+   }
+#else
+   return (0.0);
+#endif
+   return ((float)0.0);
+}
+#endif
+
+png_int_32 PNGAPI
+png_get_x_offset_microns(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_x_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_MICROMETER)
+          return (0);
+      else return (info_ptr->x_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_int_32 PNGAPI
+png_get_y_offset_microns(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_y_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_MICROMETER)
+          return (0);
+      else return (info_ptr->y_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_int_32 PNGAPI
+png_get_x_offset_pixels(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_x_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_PIXEL)
+          return (0);
+      else return (info_ptr->x_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+png_int_32 PNGAPI
+png_get_y_offset_pixels(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+#if defined(PNG_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+   {
+      png_debug1(1, "in %s retrieval function\n", "png_get_y_offset_microns");
+      if(info_ptr->offset_unit_type != PNG_OFFSET_PIXEL)
+          return (0);
+      else return (info_ptr->y_offset);
+   }
+#else
+   return (0);
+#endif
+   return (0);
+}
+
+#if defined(PNG_INCH_CONVERSIONS) && defined(PNG_FLOATING_POINT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((png_uint_32)((float)png_get_pixels_per_meter(png_ptr, info_ptr)
+     *.0254 +.5));
+}
+
+png_uint_32 PNGAPI
+png_get_x_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((png_uint_32)((float)png_get_x_pixels_per_meter(png_ptr, info_ptr)
+     *.0254 +.5));
+}
+
+png_uint_32 PNGAPI
+png_get_y_pixels_per_inch(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((png_uint_32)((float)png_get_y_pixels_per_meter(png_ptr, info_ptr)
+     *.0254 +.5));
+}
+
+float PNGAPI
+png_get_x_offset_inches(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((float)png_get_x_offset_microns(png_ptr, info_ptr)
+     *.00003937);
+}
+
+float PNGAPI
+png_get_y_offset_inches(png_structp png_ptr, png_infop info_ptr)
+{
+   return ((float)png_get_y_offset_microns(png_ptr, info_ptr)
+     *.00003937);
+}
+
+#if defined(PNG_pHYs_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pHYs_dpi(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
+{
+   png_uint_32 retval = 0;
+
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
+   {
+      png_debug1(1, "in %s retrieval function\n", "pHYs");
+      if (res_x != NULL)
+      {
+         *res_x = info_ptr->x_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (res_y != NULL)
+      {
+         *res_y = info_ptr->y_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (unit_type != NULL)
+      {
+         *unit_type = (int)info_ptr->phys_unit_type;
+         retval |= PNG_INFO_pHYs;
+         if(*unit_type == 1)
+         {
+            if (res_x != NULL) *res_x = (png_uint_32)(*res_x * .0254 + .50);
+            if (res_y != NULL) *res_y = (png_uint_32)(*res_y * .0254 + .50);
+         }
+      }
+   }
+   return (retval);
+}
+#endif /* PNG_pHYs_SUPPORTED */
+#endif  /* PNG_INCH_CONVERSIONS && PNG_FLOATING_POINT_SUPPORTED */
+
+/* png_get_channels really belongs in here, too, but it's been around longer */
+
+#endif  /* PNG_EASY_ACCESS_SUPPORTED */
+
+png_byte PNGAPI
+png_get_channels(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->channels);
+   else
+      return (0);
+}
+
+png_bytep PNGAPI
+png_get_signature(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr != NULL && info_ptr != NULL)
+      return(info_ptr->signature);
+   else
+      return (NULL);
+}
+
+#if defined(PNG_bKGD_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_bKGD(png_structp png_ptr, png_infop info_ptr,
+   png_color_16p *background)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD)
+      && background != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "bKGD");
+      *background = &(info_ptr->background);
+      return (PNG_INFO_bKGD);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_cHRM(png_structp png_ptr, png_infop info_ptr,
+   double *white_x, double *white_y, double *red_x, double *red_y,
+   double *green_x, double *green_y, double *blue_x, double *blue_y)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+   {
+      png_debug1(1, "in %s retrieval function\n", "cHRM");
+      if (white_x != NULL)
+         *white_x = (double)info_ptr->x_white;
+      if (white_y != NULL)
+         *white_y = (double)info_ptr->y_white;
+      if (red_x != NULL)
+         *red_x = (double)info_ptr->x_red;
+      if (red_y != NULL)
+         *red_y = (double)info_ptr->y_red;
+      if (green_x != NULL)
+         *green_x = (double)info_ptr->x_green;
+      if (green_y != NULL)
+         *green_y = (double)info_ptr->y_green;
+      if (blue_x != NULL)
+         *blue_x = (double)info_ptr->x_blue;
+      if (blue_y != NULL)
+         *blue_y = (double)info_ptr->y_blue;
+      return (PNG_INFO_cHRM);
+   }
+   return (0);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
+   png_fixed_point *white_x, png_fixed_point *white_y, png_fixed_point *red_x,
+   png_fixed_point *red_y, png_fixed_point *green_x, png_fixed_point *green_y,
+   png_fixed_point *blue_x, png_fixed_point *blue_y)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+   {
+      png_debug1(1, "in %s retrieval function\n", "cHRM");
+      if (white_x != NULL)
+         *white_x = info_ptr->int_x_white;
+      if (white_y != NULL)
+         *white_y = info_ptr->int_y_white;
+      if (red_x != NULL)
+         *red_x = info_ptr->int_x_red;
+      if (red_y != NULL)
+         *red_y = info_ptr->int_y_red;
+      if (green_x != NULL)
+         *green_x = info_ptr->int_x_green;
+      if (green_y != NULL)
+         *green_y = info_ptr->int_y_green;
+      if (blue_x != NULL)
+         *blue_x = info_ptr->int_x_blue;
+      if (blue_y != NULL)
+         *blue_y = info_ptr->int_y_blue;
+      return (PNG_INFO_cHRM);
+   }
+   return (0);
+}
+#endif
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_gAMA(png_structp png_ptr, png_infop info_ptr, double *file_gamma)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+      && file_gamma != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "gAMA");
+      *file_gamma = (double)info_ptr->gamma;
+      return (PNG_INFO_gAMA);
+   }
+   return (0);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_gAMA_fixed(png_structp png_ptr, png_infop info_ptr,
+    png_fixed_point *int_file_gamma)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+      && int_file_gamma != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "gAMA");
+      *int_file_gamma = info_ptr->int_gamma;
+      return (PNG_INFO_gAMA);
+   }
+   return (0);
+}
+#endif
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sRGB(png_structp png_ptr, png_infop info_ptr, int *file_srgb_intent)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB)
+      && file_srgb_intent != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "sRGB");
+      *file_srgb_intent = (int)info_ptr->srgb_intent;
+      return (PNG_INFO_sRGB);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_iCCP_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_iCCP(png_structp png_ptr, png_infop info_ptr,
+             png_charpp name, int *compression_type,
+             png_charpp profile, png_uint_32 *proflen)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_iCCP)
+      && name != NULL && profile != NULL && proflen != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "iCCP");
+      *name = info_ptr->iccp_name;
+      *profile = info_ptr->iccp_profile;
+      /* compression_type is a dummy so the API won't have to change
+         if we introduce multiple compression types later. */
+      *proflen = (int)info_ptr->iccp_proflen;
+      *compression_type = (int)info_ptr->iccp_compression;
+      return (PNG_INFO_iCCP);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sPLT(png_structp png_ptr, png_infop info_ptr,
+             png_sPLT_tpp spalettes)
+{
+   if (png_ptr != NULL && info_ptr != NULL && spalettes != NULL)
+   {
+     *spalettes = info_ptr->splt_palettes;
+     return ((png_uint_32)info_ptr->splt_palettes_num);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_16p *hist)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST)
+      && hist != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "hIST");
+      *hist = info_ptr->hist;
+      return (PNG_INFO_hIST);
+   }
+   return (0);
+}
+#endif
+
+png_uint_32 PNGAPI
+png_get_IHDR(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 *width, png_uint_32 *height, int *bit_depth,
+   int *color_type, int *interlace_type, int *compression_type,
+   int *filter_type)
+
+{
+   if (png_ptr != NULL && info_ptr != NULL && width != NULL && height != NULL &&
+      bit_depth != NULL && color_type != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "IHDR");
+      *width = info_ptr->width;
+      *height = info_ptr->height;
+      *bit_depth = info_ptr->bit_depth;
+      if (info_ptr->bit_depth < 1 || info_ptr->bit_depth > 16)
+        png_error(png_ptr, "Invalid bit depth");
+      *color_type = info_ptr->color_type;
+      if (info_ptr->color_type > 6)
+        png_error(png_ptr, "Invalid color type");
+      if (compression_type != NULL)
+         *compression_type = info_ptr->compression_type;
+      if (filter_type != NULL)
+         *filter_type = info_ptr->filter_type;
+      if (interlace_type != NULL)
+         *interlace_type = info_ptr->interlace_type;
+
+      /* check for potential overflow of rowbytes */
+      if (*width == 0 || *width > PNG_UINT_31_MAX)
+        png_error(png_ptr, "Invalid image width");
+      if (*height == 0 || *height > PNG_UINT_31_MAX)
+        png_error(png_ptr, "Invalid image height");
+      if (info_ptr->width > (PNG_UINT_32_MAX
+                 >> 3)      /* 8-byte RGBA pixels */
+                 - 64       /* bigrowbuf hack */
+                 - 1        /* filter byte */
+                 - 7*8      /* rounding of width to multiple of 8 pixels */
+                 - 8)       /* extra max_pixel_depth pad */
+      {
+         png_warning(png_ptr,
+            "Width too large for libpng to process image data.");
+      }
+      return (1);
+   }
+   return (0);
+}
+
+#if defined(PNG_oFFs_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_oFFs(png_structp png_ptr, png_infop info_ptr,
+   png_int_32 *offset_x, png_int_32 *offset_y, int *unit_type)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs)
+      && offset_x != NULL && offset_y != NULL && unit_type != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "oFFs");
+      *offset_x = info_ptr->x_offset;
+      *offset_y = info_ptr->y_offset;
+      *unit_type = (int)info_ptr->offset_unit_type;
+      return (PNG_INFO_oFFs);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pCAL(png_structp png_ptr, png_infop info_ptr,
+   png_charp *purpose, png_int_32 *X0, png_int_32 *X1, int *type, int *nparams,
+   png_charp *units, png_charpp *params)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL)
+      && purpose != NULL && X0 != NULL && X1 != NULL && type != NULL &&
+      nparams != NULL && units != NULL && params != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "pCAL");
+      *purpose = info_ptr->pcal_purpose;
+      *X0 = info_ptr->pcal_X0;
+      *X1 = info_ptr->pcal_X1;
+      *type = (int)info_ptr->pcal_type;
+      *nparams = (int)info_ptr->pcal_nparams;
+      *units = info_ptr->pcal_units;
+      *params = info_ptr->pcal_params;
+      return (PNG_INFO_pCAL);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sCAL(png_structp png_ptr, png_infop info_ptr,
+             int *unit, double *width, double *height)
+{
+    if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL))
+    {
+        *unit = info_ptr->scal_unit;
+        *width = info_ptr->scal_pixel_width;
+        *height = info_ptr->scal_pixel_height;
+        return (PNG_INFO_sCAL);
+    }
+    return(0);
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+png_uint_32 PNGAPI
+png_get_sCAL_s(png_structp png_ptr, png_infop info_ptr,
+             int *unit, png_charpp width, png_charpp height)
+{
+    if (png_ptr != NULL && info_ptr != NULL &&
+       (info_ptr->valid & PNG_INFO_sCAL))
+    {
+        *unit = info_ptr->scal_unit;
+        *width = info_ptr->scal_s_width;
+        *height = info_ptr->scal_s_height;
+        return (PNG_INFO_sCAL);
+    }
+    return(0);
+}
+#endif
+#endif
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_pHYs(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 *res_x, png_uint_32 *res_y, int *unit_type)
+{
+   png_uint_32 retval = 0;
+
+   if (png_ptr != NULL && info_ptr != NULL &&
+      (info_ptr->valid & PNG_INFO_pHYs))
+   {
+      png_debug1(1, "in %s retrieval function\n", "pHYs");
+      if (res_x != NULL)
+      {
+         *res_x = info_ptr->x_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (res_y != NULL)
+      {
+         *res_y = info_ptr->y_pixels_per_unit;
+         retval |= PNG_INFO_pHYs;
+      }
+      if (unit_type != NULL)
+      {
+         *unit_type = (int)info_ptr->phys_unit_type;
+         retval |= PNG_INFO_pHYs;
+      }
+   }
+   return (retval);
+}
+#endif
+
+png_uint_32 PNGAPI
+png_get_PLTE(png_structp png_ptr, png_infop info_ptr, png_colorp *palette,
+   int *num_palette)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_PLTE)
+       && palette != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "PLTE");
+      *palette = info_ptr->palette;
+      *num_palette = info_ptr->num_palette;
+      png_debug1(3, "num_palette = %d\n", *num_palette);
+      return (PNG_INFO_PLTE);
+   }
+   return (0);
+}
+
+#if defined(PNG_sBIT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_sBIT(png_structp png_ptr, png_infop info_ptr, png_color_8p *sig_bit)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT)
+      && sig_bit != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "sBIT");
+      *sig_bit = &(info_ptr->sig_bit);
+      return (PNG_INFO_sBIT);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_text(png_structp png_ptr, png_infop info_ptr, png_textp *text_ptr,
+   int *num_text)
+{
+   if (png_ptr != NULL && info_ptr != NULL && info_ptr->num_text > 0)
+   {
+      png_debug1(1, "in %s retrieval function\n",
+         (png_ptr->chunk_name[0] == '\0' ? "text"
+             : (png_const_charp)png_ptr->chunk_name));
+      if (text_ptr != NULL)
+         *text_ptr = info_ptr->text;
+      if (num_text != NULL)
+         *num_text = info_ptr->num_text;
+      return ((png_uint_32)info_ptr->num_text);
+   }
+   if (num_text != NULL)
+     *num_text = 0;
+   return(0);
+}
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_tIME(png_structp png_ptr, png_infop info_ptr, png_timep *mod_time)
+{
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME)
+       && mod_time != NULL)
+   {
+      png_debug1(1, "in %s retrieval function\n", "tIME");
+      *mod_time = &(info_ptr->mod_time);
+      return (PNG_INFO_tIME);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_tRNS(png_structp png_ptr, png_infop info_ptr,
+   png_bytep *trans, int *num_trans, png_color_16p *trans_values)
+{
+   png_uint_32 retval = 0;
+   if (png_ptr != NULL && info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
+   {
+      png_debug1(1, "in %s retrieval function\n", "tRNS");
+      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+          if (trans != NULL)
+          {
+             *trans = info_ptr->trans;
+             retval |= PNG_INFO_tRNS;
+          }
+          if (trans_values != NULL)
+             *trans_values = &(info_ptr->trans_values);
+      }
+      else /* if (info_ptr->color_type != PNG_COLOR_TYPE_PALETTE) */
+      {
+          if (trans_values != NULL)
+          {
+             *trans_values = &(info_ptr->trans_values);
+             retval |= PNG_INFO_tRNS;
+          }
+          if(trans != NULL)
+             *trans = NULL;
+      }
+      if(num_trans != NULL)
+      {
+         *num_trans = info_ptr->num_trans;
+         retval |= PNG_INFO_tRNS;
+      }
+   }
+   return (retval);
+}
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+png_uint_32 PNGAPI
+png_get_unknown_chunks(png_structp png_ptr, png_infop info_ptr,
+             png_unknown_chunkpp unknowns)
+{
+   if (png_ptr != NULL && info_ptr != NULL && unknowns != NULL)
+   {
+     *unknowns = info_ptr->unknown_chunks;
+     return ((png_uint_32)info_ptr->unknown_chunks_num);
+   }
+   return (0);
+}
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+png_byte PNGAPI
+png_get_rgb_to_gray_status (png_structp png_ptr)
+{
+   return (png_byte)(png_ptr? png_ptr->rgb_to_gray_status : 0);
+}
+#endif
+
+#if defined(PNG_USER_CHUNKS_SUPPORTED)
+png_voidp PNGAPI
+png_get_user_chunk_ptr(png_structp png_ptr)
+{
+   return (png_ptr? png_ptr->user_chunk_ptr : NULL);
+}
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+png_uint_32 PNGAPI
+png_get_compression_buffer_size(png_structp png_ptr)
+{
+   return (png_uint_32)(png_ptr? png_ptr->zbuf_size : 0L);
+}
+#endif
+
+#ifdef PNG_ASSEMBLER_CODE_SUPPORTED
+#ifndef PNG_1_0_X
+/* this function was added to libpng 1.2.0 and should exist by default */
+png_uint_32 PNGAPI
+png_get_asm_flags (png_structp png_ptr)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    return (png_ptr? 0L: 0L);
+}
+
+/* this function was added to libpng 1.2.0 and should exist by default */
+png_uint_32 PNGAPI
+png_get_asm_flagmask (int flag_select)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    flag_select=flag_select;
+    return 0L;
+}
+
+    /* GRR:  could add this:   && defined(PNG_MMX_CODE_SUPPORTED) */
+/* this function was added to libpng 1.2.0 */
+png_uint_32 PNGAPI
+png_get_mmx_flagmask (int flag_select, int *compilerID)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    flag_select=flag_select;
+    *compilerID = -1;   /* unknown (i.e., no asm/MMX code compiled) */
+    return 0L;
+}
+
+/* this function was added to libpng 1.2.0 */
+png_byte PNGAPI
+png_get_mmx_bitdepth_threshold (png_structp png_ptr)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    return (png_ptr? 0: 0);
+}
+
+/* this function was added to libpng 1.2.0 */
+png_uint_32 PNGAPI
+png_get_mmx_rowbytes_threshold (png_structp png_ptr)
+{
+    /* obsolete, to be removed from libpng-1.4.0 */
+    return (png_ptr? 0L: 0L);
+}
+#endif /* ?PNG_1_0_X */
+#endif /* ?PNG_ASSEMBLER_CODE_SUPPORTED */
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+/* these functions were added to libpng 1.2.6 */
+png_uint_32 PNGAPI
+png_get_user_width_max (png_structp png_ptr)
+{
+    return (png_ptr? png_ptr->user_width_max : 0);
+}
+png_uint_32 PNGAPI
+png_get_user_height_max (png_structp png_ptr)
+{
+    return (png_ptr? png_ptr->user_height_max : 0);
+}
+#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
+ 
+
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngmem.c b/src/libpng/pngmem.c
new file mode 100644
index 0000000..248060f
--- /dev/null
+++ b/src/libpng/pngmem.c
@@ -0,0 +1,608 @@
+
+/* pngmem.c - stub functions for memory allocation
+ *
+ * Last changed in libpng 1.2.13 November 13, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all memory allocation.  Users who
+ * need special memory handling are expected to supply replacement
+ * functions for png_malloc() and png_free(), and to use
+ * png_create_read_struct_2() and png_create_write_struct_2() to
+ * identify the replacement functions.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+/* Borland DOS special memory handler */
+#if defined(__TURBOC__) && !defined(_Windows) && !defined(__FLAT__)
+/* if you change this, be sure to change the one in png.h also */
+
+/* Allocate memory for a png_struct.  The malloc and memset can be replaced
+   by a single call to calloc() if this is thought to improve performance. */
+png_voidp /* PRIVATE */
+png_create_struct(int type)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_struct_2(type, png_malloc_ptr_NULL, png_voidp_NULL));
+}
+
+/* Alternate version of png_create_struct, for use with user-defined malloc. */
+png_voidp /* PRIVATE */
+png_create_struct_2(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_size_t size;
+   png_voidp struct_ptr;
+
+   if (type == PNG_STRUCT_INFO)
+     size = png_sizeof(png_info);
+   else if (type == PNG_STRUCT_PNG)
+     size = png_sizeof(png_struct);
+   else
+     return (png_get_copyright(NULL));
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if(malloc_fn != NULL)
+   {
+      png_struct dummy_struct;
+      png_structp png_ptr = &dummy_struct;
+      png_ptr->mem_ptr=mem_ptr;
+      struct_ptr = (*(malloc_fn))(png_ptr, (png_uint_32)size);
+   }
+   else
+#endif /* PNG_USER_MEM_SUPPORTED */
+      struct_ptr = (png_voidp)farmalloc(size);
+   if (struct_ptr != NULL)
+      png_memset(struct_ptr, 0, size);
+   return (struct_ptr);
+}
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct(png_voidp struct_ptr)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_destroy_struct_2(struct_ptr, png_free_ptr_NULL, png_voidp_NULL);
+}
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn,
+    png_voidp mem_ptr)
+{
+#endif
+   if (struct_ptr != NULL)
+   {
+#ifdef PNG_USER_MEM_SUPPORTED
+      if(free_fn != NULL)
+      {
+         png_struct dummy_struct;
+         png_structp png_ptr = &dummy_struct;
+         png_ptr->mem_ptr=mem_ptr;
+         (*(free_fn))(png_ptr, struct_ptr);
+         return;
+      }
+#endif /* PNG_USER_MEM_SUPPORTED */
+      farfree (struct_ptr);
+   }
+}
+
+/* Allocate memory.  For reasonable files, size should never exceed
+ * 64K.  However, zlib may allocate more then 64K if you don't tell
+ * it not to.  See zconf.h and png.h for more information. zlib does
+ * need to allocate exactly 64K, so whatever you call here must
+ * have the ability to do that.
+ *
+ * Borland seems to have a problem in DOS mode for exactly 64K.
+ * It gives you a segment with an offset of 8 (perhaps to store its
+ * memory stuff).  zlib doesn't like this at all, so we have to
+ * detect and deal with it.  This code should not be needed in
+ * Windows or OS/2 modes, and only in 16 bit mode.  This code has
+ * been updated by Alexander Lehmann for version 0.89 to waste less
+ * memory.
+ *
+ * Note that we can't use png_size_t for the "size" declaration,
+ * since on some systems a png_size_t is a 16-bit quantity, and as a
+ * result, we would be truncating potentially larger memory requests
+ * (which should cause a fatal error) and introducing major problems.
+ */
+
+png_voidp PNGAPI
+png_malloc(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if(png_ptr->malloc_fn != NULL)
+       ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, (png_size_t)size));
+   else
+       ret = (png_malloc_default(png_ptr, size));
+   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+       png_error(png_ptr, "Out of memory!");
+   return (ret);
+}
+
+png_voidp PNGAPI
+png_malloc_default(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (size > (png_uint_32)65536L)
+   {
+      png_warning(png_ptr, "Cannot Allocate > 64K");
+      ret = NULL;
+   }
+   else
+#endif
+
+   if (size != (size_t)size)
+     ret = NULL;
+   else if (size == (png_uint_32)65536L)
+   {
+      if (png_ptr->offset_table == NULL)
+      {
+         /* try to see if we need to do any of this fancy stuff */
+         ret = farmalloc(size);
+         if (ret == NULL || ((png_size_t)ret & 0xffff))
+         {
+            int num_blocks;
+            png_uint_32 total_size;
+            png_bytep table;
+            int i;
+            png_byte huge * hptr;
+
+            if (ret != NULL)
+            {
+               farfree(ret);
+               ret = NULL;
+            }
+
+            if(png_ptr->zlib_window_bits > 14)
+               num_blocks = (int)(1 << (png_ptr->zlib_window_bits - 14));
+            else
+               num_blocks = 1;
+            if (png_ptr->zlib_mem_level >= 7)
+               num_blocks += (int)(1 << (png_ptr->zlib_mem_level - 7));
+            else
+               num_blocks++;
+
+            total_size = ((png_uint_32)65536L) * (png_uint_32)num_blocks+16;
+
+            table = farmalloc(total_size);
+
+            if (table == NULL)
+            {
+#ifndef PNG_USER_MEM_SUPPORTED
+               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+                  png_error(png_ptr, "Out Of Memory."); /* Note "O" and "M" */
+               else
+                  png_warning(png_ptr, "Out Of Memory.");
+#endif
+               return (NULL);
+            }
+
+            if ((png_size_t)table & 0xfff0)
+            {
+#ifndef PNG_USER_MEM_SUPPORTED
+               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+                  png_error(png_ptr,
+                    "Farmalloc didn't return normalized pointer");
+               else
+                  png_warning(png_ptr,
+                    "Farmalloc didn't return normalized pointer");
+#endif
+               return (NULL);
+            }
+
+            png_ptr->offset_table = table;
+            png_ptr->offset_table_ptr = farmalloc(num_blocks *
+               png_sizeof (png_bytep));
+
+            if (png_ptr->offset_table_ptr == NULL)
+            {
+#ifndef PNG_USER_MEM_SUPPORTED
+               if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+                  png_error(png_ptr, "Out Of memory."); /* Note "O" and "M" */
+               else
+                  png_warning(png_ptr, "Out Of memory.");
+#endif
+               return (NULL);
+            }
+
+            hptr = (png_byte huge *)table;
+            if ((png_size_t)hptr & 0xf)
+            {
+               hptr = (png_byte huge *)((long)(hptr) & 0xfffffff0L);
+               hptr = hptr + 16L;  /* "hptr += 16L" fails on Turbo C++ 3.0 */
+            }
+            for (i = 0; i < num_blocks; i++)
+            {
+               png_ptr->offset_table_ptr[i] = (png_bytep)hptr;
+               hptr = hptr + (png_uint_32)65536L;  /* "+=" fails on TC++3.0 */
+            }
+
+            png_ptr->offset_table_number = num_blocks;
+            png_ptr->offset_table_count = 0;
+            png_ptr->offset_table_count_free = 0;
+         }
+      }
+
+      if (png_ptr->offset_table_count >= png_ptr->offset_table_number)
+      {
+#ifndef PNG_USER_MEM_SUPPORTED
+         if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+            png_error(png_ptr, "Out of Memory."); /* Note "o" and "M" */
+         else
+            png_warning(png_ptr, "Out of Memory.");
+#endif
+         return (NULL);
+      }
+
+      ret = png_ptr->offset_table_ptr[png_ptr->offset_table_count++];
+   }
+   else
+      ret = farmalloc(size);
+
+#ifndef PNG_USER_MEM_SUPPORTED
+   if (ret == NULL)
+   {
+      if ((png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+         png_error(png_ptr, "Out of memory."); /* Note "o" and "m" */
+      else
+         png_warning(png_ptr, "Out of memory."); /* Note "o" and "m" */
+   }
+#endif
+
+   return (ret);
+}
+
+/* free a pointer allocated by png_malloc().  In the default
+   configuration, png_ptr is not used, but is passed in case it
+   is needed.  If ptr is NULL, return without taking any action. */
+void PNGAPI
+png_free(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr->free_fn != NULL)
+   {
+      (*(png_ptr->free_fn))(png_ptr, ptr);
+      return;
+   }
+   else png_free_default(png_ptr, ptr);
+}
+
+void PNGAPI
+png_free_default(png_structp png_ptr, png_voidp ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   if(png_ptr == NULL) return;
+
+   if (png_ptr->offset_table != NULL)
+   {
+      int i;
+
+      for (i = 0; i < png_ptr->offset_table_count; i++)
+      {
+         if (ptr == png_ptr->offset_table_ptr[i])
+         {
+            ptr = NULL;
+            png_ptr->offset_table_count_free++;
+            break;
+         }
+      }
+      if (png_ptr->offset_table_count_free == png_ptr->offset_table_count)
+      {
+         farfree(png_ptr->offset_table);
+         farfree(png_ptr->offset_table_ptr);
+         png_ptr->offset_table = NULL;
+         png_ptr->offset_table_ptr = NULL;
+      }
+   }
+
+   if (ptr != NULL)
+   {
+      farfree(ptr);
+   }
+}
+
+#else /* Not the Borland DOS special memory handler */
+
+/* Allocate memory for a png_struct or a png_info.  The malloc and
+   memset can be replaced by a single call to calloc() if this is thought
+   to improve performance noticably. */
+png_voidp /* PRIVATE */
+png_create_struct(int type)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_struct_2(type, png_malloc_ptr_NULL, png_voidp_NULL));
+}
+
+/* Allocate memory for a png_struct or a png_info.  The malloc and
+   memset can be replaced by a single call to calloc() if this is thought
+   to improve performance noticably. */
+png_voidp /* PRIVATE */
+png_create_struct_2(int type, png_malloc_ptr malloc_fn, png_voidp mem_ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_size_t size;
+   png_voidp struct_ptr;
+
+   if (type == PNG_STRUCT_INFO)
+      size = png_sizeof(png_info);
+   else if (type == PNG_STRUCT_PNG)
+      size = png_sizeof(png_struct);
+   else
+      return (NULL);
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if(malloc_fn != NULL)
+   {
+      png_struct dummy_struct;
+      png_structp png_ptr = &dummy_struct;
+      png_ptr->mem_ptr=mem_ptr;
+      struct_ptr = (*(malloc_fn))(png_ptr, size);
+      if (struct_ptr != NULL)
+         png_memset(struct_ptr, 0, size);
+      return (struct_ptr);
+   }
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(__FLAT__)
+   struct_ptr = (png_voidp)farmalloc(size);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+   struct_ptr = (png_voidp)halloc(size,1);
+# else
+   struct_ptr = (png_voidp)malloc(size);
+# endif
+#endif
+   if (struct_ptr != NULL)
+      png_memset(struct_ptr, 0, size);
+
+   return (struct_ptr);
+}
+
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct(png_voidp struct_ptr)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_destroy_struct_2(struct_ptr, png_free_ptr_NULL, png_voidp_NULL);
+}
+
+/* Free memory allocated by a png_create_struct() call */
+void /* PRIVATE */
+png_destroy_struct_2(png_voidp struct_ptr, png_free_ptr free_fn,
+    png_voidp mem_ptr)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   if (struct_ptr != NULL)
+   {
+#ifdef PNG_USER_MEM_SUPPORTED
+      if(free_fn != NULL)
+      {
+         png_struct dummy_struct;
+         png_structp png_ptr = &dummy_struct;
+         png_ptr->mem_ptr=mem_ptr;
+         (*(free_fn))(png_ptr, struct_ptr);
+         return;
+      }
+#endif /* PNG_USER_MEM_SUPPORTED */
+#if defined(__TURBOC__) && !defined(__FLAT__)
+      farfree(struct_ptr);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+      hfree(struct_ptr);
+# else
+      free(struct_ptr);
+# endif
+#endif
+   }
+}
+
+/* Allocate memory.  For reasonable files, size should never exceed
+   64K.  However, zlib may allocate more then 64K if you don't tell
+   it not to.  See zconf.h and png.h for more information.  zlib does
+   need to allocate exactly 64K, so whatever you call here must
+   have the ability to do that. */
+
+png_voidp PNGAPI
+png_malloc(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+   if(png_ptr->malloc_fn != NULL)
+       ret = ((png_voidp)(*(png_ptr->malloc_fn))(png_ptr, (png_size_t)size));
+   else
+       ret = (png_malloc_default(png_ptr, size));
+   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+       png_error(png_ptr, "Out of Memory!");
+   return (ret);
+}
+
+png_voidp PNGAPI
+png_malloc_default(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ret;
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   if (png_ptr == NULL || size == 0)
+      return (NULL);
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (size > (png_uint_32)65536L)
+   {
+#ifndef PNG_USER_MEM_SUPPORTED
+      if(png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+         png_error(png_ptr, "Cannot Allocate > 64K");
+      else
+#endif
+         return NULL;
+   }
+#endif
+
+ /* Check for overflow */
+#if defined(__TURBOC__) && !defined(__FLAT__)
+ if (size != (unsigned long)size)
+   ret = NULL;
+ else
+   ret = farmalloc(size);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+ if (size != (unsigned long)size)
+   ret = NULL;
+ else
+   ret = halloc(size, 1);
+# else
+ if (size != (size_t)size)
+   ret = NULL;
+ else
+   ret = malloc((size_t)size);
+# endif
+#endif
+
+#ifndef PNG_USER_MEM_SUPPORTED
+   if (ret == NULL && (png_ptr->flags&PNG_FLAG_MALLOC_NULL_MEM_OK) == 0)
+      png_error(png_ptr, "Out of Memory");
+#endif
+
+   return (ret);
+}
+
+/* Free a pointer allocated by png_malloc().  If ptr is NULL, return
+   without taking any action. */
+void PNGAPI
+png_free(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   if (png_ptr->free_fn != NULL)
+   {
+      (*(png_ptr->free_fn))(png_ptr, ptr);
+      return;
+   }
+   else png_free_default(png_ptr, ptr);
+}
+void PNGAPI
+png_free_default(png_structp png_ptr, png_voidp ptr)
+{
+   if (png_ptr == NULL || ptr == NULL)
+      return;
+
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+#if defined(__TURBOC__) && !defined(__FLAT__)
+   farfree(ptr);
+#else
+# if defined(_MSC_VER) && defined(MAXSEG_64K)
+   hfree(ptr);
+# else
+   free(ptr);
+# endif
+#endif
+}
+
+#endif /* Not Borland DOS special memory handler */
+
+#if defined(PNG_1_0_X)
+#  define png_malloc_warn png_malloc
+#else
+/* This function was added at libpng version 1.2.3.  The png_malloc_warn()
+ * function will set up png_malloc() to issue a png_warning and return NULL
+ * instead of issuing a png_error, if it fails to allocate the requested
+ * memory.
+ */
+png_voidp PNGAPI
+png_malloc_warn(png_structp png_ptr, png_uint_32 size)
+{
+   png_voidp ptr;
+   png_uint_32 save_flags;
+   if(png_ptr == NULL) return (NULL);
+
+   save_flags=png_ptr->flags;
+   png_ptr->flags|=PNG_FLAG_MALLOC_NULL_MEM_OK;
+   ptr = (png_voidp)png_malloc((png_structp)png_ptr, size);
+   png_ptr->flags=save_flags;
+   return(ptr);
+}
+#endif
+
+png_voidp PNGAPI
+png_memcpy_check (png_structp png_ptr, png_voidp s1, png_voidp s2,
+   png_uint_32 length)
+{
+   png_size_t size;
+
+   size = (png_size_t)length;
+   if ((png_uint_32)size != length)
+      png_error(png_ptr,"Overflow in png_memcpy_check.");
+
+   return(png_memcpy (s1, s2, size));
+}
+
+png_voidp PNGAPI
+png_memset_check (png_structp png_ptr, png_voidp s1, int value,
+   png_uint_32 length)
+{
+   png_size_t size;
+
+   size = (png_size_t)length;
+   if ((png_uint_32)size != length)
+      png_error(png_ptr,"Overflow in png_memset_check.");
+
+   return (png_memset (s1, value, size));
+
+}
+
+#ifdef PNG_USER_MEM_SUPPORTED
+/* This function is called when the application wants to use another method
+ * of allocating and freeing memory.
+ */
+void PNGAPI
+png_set_mem_fn(png_structp png_ptr, png_voidp mem_ptr, png_malloc_ptr
+  malloc_fn, png_free_ptr free_fn)
+{
+   if(png_ptr != NULL) {
+   png_ptr->mem_ptr = mem_ptr;
+   png_ptr->malloc_fn = malloc_fn;
+   png_ptr->free_fn = free_fn;
+   }
+}
+
+/* This function returns a pointer to the mem_ptr associated with the user
+ * functions.  The application should free any memory associated with this
+ * pointer before png_write_destroy and png_read_destroy are called.
+ */
+png_voidp PNGAPI
+png_get_mem_ptr(png_structp png_ptr)
+{
+   if(png_ptr == NULL) return (NULL);
+   return ((png_voidp)png_ptr->mem_ptr);
+}
+#endif /* PNG_USER_MEM_SUPPORTED */
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngpread.c b/src/libpng/pngpread.c
new file mode 100644
index 0000000..cb6f6d3
--- /dev/null
+++ b/src/libpng/pngpread.c
@@ -0,0 +1,1584 @@
+
+/* pngpread.c - read a png file in push mode
+ *
+ * Last changed in libpng 1.2.22 [October 13, 2007]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+
+/* push model modes */
+#define PNG_READ_SIG_MODE   0
+#define PNG_READ_CHUNK_MODE 1
+#define PNG_READ_IDAT_MODE  2
+#define PNG_SKIP_MODE       3
+#define PNG_READ_tEXt_MODE  4
+#define PNG_READ_zTXt_MODE  5
+#define PNG_READ_DONE_MODE  6
+#define PNG_READ_iTXt_MODE  7
+#define PNG_ERROR_MODE      8
+
+void PNGAPI
+png_process_data(png_structp png_ptr, png_infop info_ptr,
+   png_bytep buffer, png_size_t buffer_size)
+{
+   if(png_ptr == NULL) return;
+   png_push_restore_buffer(png_ptr, buffer, buffer_size);
+
+   while (png_ptr->buffer_size)
+   {
+      png_process_some_data(png_ptr, info_ptr);
+   }
+}
+
+/* What we do with the incoming data depends on what we were previously
+ * doing before we ran out of data...
+ */
+void /* PRIVATE */
+png_process_some_data(png_structp png_ptr, png_infop info_ptr)
+{
+   if(png_ptr == NULL) return;
+   switch (png_ptr->process_mode)
+   {
+      case PNG_READ_SIG_MODE:
+      {
+         png_push_read_sig(png_ptr, info_ptr);
+         break;
+      }
+      case PNG_READ_CHUNK_MODE:
+      {
+         png_push_read_chunk(png_ptr, info_ptr);
+         break;
+      }
+      case PNG_READ_IDAT_MODE:
+      {
+         png_push_read_IDAT(png_ptr);
+         break;
+      }
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      case PNG_READ_tEXt_MODE:
+      {
+         png_push_read_tEXt(png_ptr, info_ptr);
+         break;
+      }
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      case PNG_READ_zTXt_MODE:
+      {
+         png_push_read_zTXt(png_ptr, info_ptr);
+         break;
+      }
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      case PNG_READ_iTXt_MODE:
+      {
+         png_push_read_iTXt(png_ptr, info_ptr);
+         break;
+      }
+#endif
+      case PNG_SKIP_MODE:
+      {
+         png_push_crc_finish(png_ptr);
+         break;
+      }
+      default:
+      {
+         png_ptr->buffer_size = 0;
+         break;
+      }
+   }
+}
+
+/* Read any remaining signature bytes from the stream and compare them with
+ * the correct PNG signature.  It is possible that this routine is called
+ * with bytes already read from the signature, either because they have been
+ * checked by the calling application, or because of multiple calls to this
+ * routine.
+ */
+void /* PRIVATE */
+png_push_read_sig(png_structp png_ptr, png_infop info_ptr)
+{
+   png_size_t num_checked = png_ptr->sig_bytes,
+             num_to_check = 8 - num_checked;
+
+   if (png_ptr->buffer_size < num_to_check)
+   {
+      num_to_check = png_ptr->buffer_size;
+   }
+
+   png_push_fill_buffer(png_ptr, &(info_ptr->signature[num_checked]),
+      num_to_check);
+   png_ptr->sig_bytes = (png_byte)(png_ptr->sig_bytes+num_to_check);
+
+   if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check))
+   {
+      if (num_checked < 4 &&
+          png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4))
+         png_error(png_ptr, "Not a PNG file");
+      else
+         png_error(png_ptr, "PNG file corrupted by ASCII conversion");
+   }
+   else
+   {
+      if (png_ptr->sig_bytes >= 8)
+      {
+         png_ptr->process_mode = PNG_READ_CHUNK_MODE;
+      }
+   }
+}
+
+void /* PRIVATE */
+png_push_read_chunk(png_structp png_ptr, png_infop info_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      PNG_CONST PNG_bKGD;
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      PNG_CONST PNG_cHRM;
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      PNG_CONST PNG_gAMA;
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      PNG_CONST PNG_hIST;
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_CONST PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_CONST PNG_iTXt;
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      PNG_CONST PNG_oFFs;
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      PNG_CONST PNG_pCAL;
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      PNG_CONST PNG_pHYs;
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      PNG_CONST PNG_sBIT;
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_CONST PNG_sCAL;
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      PNG_CONST PNG_sRGB;
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_CONST PNG_sPLT;
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      PNG_CONST PNG_tEXt;
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      PNG_CONST PNG_tIME;
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      PNG_CONST PNG_tRNS;
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      PNG_CONST PNG_zTXt;
+#endif
+#endif /* PNG_USE_LOCAL_ARRAYS */
+   /* First we make sure we have enough data for the 4 byte chunk name
+    * and the 4 byte chunk length before proceeding with decoding the
+    * chunk data.  To fully decode each of these chunks, we also make
+    * sure we have enough data in the buffer for the 4 byte CRC at the
+    * end of every chunk (except IDAT, which is handled separately).
+    */
+   if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
+   {
+      png_byte chunk_length[4];
+
+      if (png_ptr->buffer_size < 8)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_fill_buffer(png_ptr, chunk_length, 4);
+      png_ptr->push_length = png_get_uint_31(png_ptr,chunk_length);
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+      png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
+   }
+
+   if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+     if(png_ptr->mode & PNG_AFTER_IDAT)
+        png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+
+   if (!png_memcmp(png_ptr->chunk_name, png_IHDR, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_IHDR(png_ptr, info_ptr, png_ptr->push_length);
+   }
+   else if (!png_memcmp(png_ptr->chunk_name, png_IEND, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_IEND(png_ptr, info_ptr, png_ptr->push_length);
+
+      png_ptr->process_mode = PNG_READ_DONE_MODE;
+      png_push_have_end(png_ptr, info_ptr);
+   }
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+   else if (png_handle_as_unknown(png_ptr, png_ptr->chunk_name))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+         png_ptr->mode |= PNG_HAVE_IDAT;
+      png_handle_unknown(png_ptr, info_ptr, png_ptr->push_length);
+      if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+         png_ptr->mode |= PNG_HAVE_PLTE;
+      else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         if (!(png_ptr->mode & PNG_HAVE_IHDR))
+            png_error(png_ptr, "Missing IHDR before IDAT");
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+                  !(png_ptr->mode & PNG_HAVE_PLTE))
+            png_error(png_ptr, "Missing PLTE before IDAT");
+      }
+   }
+#endif
+   else if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_PLTE(png_ptr, info_ptr, png_ptr->push_length);
+   }
+   else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+   {
+      /* If we reach an IDAT chunk, this means we have read all of the
+       * header chunks, and we can start reading the image (or if this
+       * is called after the image has been read - we have an error).
+       */
+     if (!(png_ptr->mode & PNG_HAVE_IHDR))
+       png_error(png_ptr, "Missing IHDR before IDAT");
+     else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+         !(png_ptr->mode & PNG_HAVE_PLTE))
+       png_error(png_ptr, "Missing PLTE before IDAT");
+
+      if (png_ptr->mode & PNG_HAVE_IDAT)
+      {
+         if (!(png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
+           if (png_ptr->push_length == 0)
+              return;
+
+         if (png_ptr->mode & PNG_AFTER_IDAT)
+            png_error(png_ptr, "Too many IDAT's found");
+      }
+
+      png_ptr->idat_size = png_ptr->push_length;
+      png_ptr->mode |= PNG_HAVE_IDAT;
+      png_ptr->process_mode = PNG_READ_IDAT_MODE;
+      png_push_have_info(png_ptr, info_ptr);
+      png_ptr->zstream.avail_out = (uInt)png_ptr->irowbytes;
+      png_ptr->zstream.next_out = png_ptr->row_buf;
+      return;
+   }
+#if defined(PNG_READ_gAMA_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_gAMA, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_gAMA(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sBIT, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sBIT(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_cHRM, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_cHRM(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sRGB, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sRGB(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_iCCP(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sPLT(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_tRNS(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_bKGD_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_bKGD, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_bKGD(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_hIST, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_hIST(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_pHYs, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_pHYs(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_oFFs, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_oFFs(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_pCAL, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_pCAL(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_sCAL(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_handle_tIME(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_tEXt, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_tEXt(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_zTXt(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+   else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_iTXt(png_ptr, info_ptr, png_ptr->push_length);
+   }
+#endif
+   else
+   {
+      if (png_ptr->push_length + 4 > png_ptr->buffer_size)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+      png_push_handle_unknown(png_ptr, info_ptr, png_ptr->push_length);
+   }
+
+   png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
+}
+
+void /* PRIVATE */
+png_push_crc_skip(png_structp png_ptr, png_uint_32 skip)
+{
+   png_ptr->process_mode = PNG_SKIP_MODE;
+   png_ptr->skip_length = skip;
+}
+
+void /* PRIVATE */
+png_push_crc_finish(png_structp png_ptr)
+{
+   if (png_ptr->skip_length && png_ptr->save_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->skip_length < (png_uint_32)png_ptr->save_buffer_size)
+         save_size = (png_size_t)png_ptr->skip_length;
+      else
+         save_size = png_ptr->save_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size);
+
+      png_ptr->skip_length -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->save_buffer_size -= save_size;
+      png_ptr->save_buffer_ptr += save_size;
+   }
+   if (png_ptr->skip_length && png_ptr->current_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->skip_length < (png_uint_32)png_ptr->current_buffer_size)
+         save_size = (png_size_t)png_ptr->skip_length;
+      else
+         save_size = png_ptr->current_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size);
+
+      png_ptr->skip_length -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->current_buffer_size -= save_size;
+      png_ptr->current_buffer_ptr += save_size;
+   }
+   if (!png_ptr->skip_length)
+   {
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_crc_finish(png_ptr, 0);
+      png_ptr->process_mode = PNG_READ_CHUNK_MODE;
+   }
+}
+
+void PNGAPI
+png_push_fill_buffer(png_structp png_ptr, png_bytep buffer, png_size_t length)
+{
+   png_bytep ptr;
+
+   if(png_ptr == NULL) return;
+   ptr = buffer;
+   if (png_ptr->save_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (length < png_ptr->save_buffer_size)
+         save_size = length;
+      else
+         save_size = png_ptr->save_buffer_size;
+
+      png_memcpy(ptr, png_ptr->save_buffer_ptr, save_size);
+      length -= save_size;
+      ptr += save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->save_buffer_size -= save_size;
+      png_ptr->save_buffer_ptr += save_size;
+   }
+   if (length && png_ptr->current_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (length < png_ptr->current_buffer_size)
+         save_size = length;
+      else
+         save_size = png_ptr->current_buffer_size;
+
+      png_memcpy(ptr, png_ptr->current_buffer_ptr, save_size);
+      png_ptr->buffer_size -= save_size;
+      png_ptr->current_buffer_size -= save_size;
+      png_ptr->current_buffer_ptr += save_size;
+   }
+}
+
+void /* PRIVATE */
+png_push_save_buffer(png_structp png_ptr)
+{
+   if (png_ptr->save_buffer_size)
+   {
+      if (png_ptr->save_buffer_ptr != png_ptr->save_buffer)
+      {
+         png_size_t i,istop;
+         png_bytep sp;
+         png_bytep dp;
+
+         istop = png_ptr->save_buffer_size;
+         for (i = 0, sp = png_ptr->save_buffer_ptr, dp = png_ptr->save_buffer;
+            i < istop; i++, sp++, dp++)
+         {
+            *dp = *sp;
+         }
+      }
+   }
+   if (png_ptr->save_buffer_size + png_ptr->current_buffer_size >
+      png_ptr->save_buffer_max)
+   {
+      png_size_t new_max;
+      png_bytep old_buffer;
+
+      if (png_ptr->save_buffer_size > PNG_SIZE_MAX -
+         (png_ptr->current_buffer_size + 256))
+      {
+        png_error(png_ptr, "Potential overflow of save_buffer");
+      }
+      new_max = png_ptr->save_buffer_size + png_ptr->current_buffer_size + 256;
+      old_buffer = png_ptr->save_buffer;
+      png_ptr->save_buffer = (png_bytep)png_malloc(png_ptr,
+         (png_uint_32)new_max);
+      png_memcpy(png_ptr->save_buffer, old_buffer, png_ptr->save_buffer_size);
+      png_free(png_ptr, old_buffer);
+      png_ptr->save_buffer_max = new_max;
+   }
+   if (png_ptr->current_buffer_size)
+   {
+      png_memcpy(png_ptr->save_buffer + png_ptr->save_buffer_size,
+         png_ptr->current_buffer_ptr, png_ptr->current_buffer_size);
+      png_ptr->save_buffer_size += png_ptr->current_buffer_size;
+      png_ptr->current_buffer_size = 0;
+   }
+   png_ptr->save_buffer_ptr = png_ptr->save_buffer;
+   png_ptr->buffer_size = 0;
+}
+
+void /* PRIVATE */
+png_push_restore_buffer(png_structp png_ptr, png_bytep buffer,
+   png_size_t buffer_length)
+{
+   png_ptr->current_buffer = buffer;
+   png_ptr->current_buffer_size = buffer_length;
+   png_ptr->buffer_size = buffer_length + png_ptr->save_buffer_size;
+   png_ptr->current_buffer_ptr = png_ptr->current_buffer;
+}
+
+void /* PRIVATE */
+png_push_read_IDAT(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_CONST PNG_IDAT;
+#endif
+   if (!(png_ptr->mode & PNG_HAVE_CHUNK_HEADER))
+   {
+      png_byte chunk_length[4];
+
+      if (png_ptr->buffer_size < 8)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_fill_buffer(png_ptr, chunk_length, 4);
+      png_ptr->push_length = png_get_uint_31(png_ptr,chunk_length);
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+      png_ptr->mode |= PNG_HAVE_CHUNK_HEADER;
+
+      if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         png_ptr->process_mode = PNG_READ_CHUNK_MODE;
+         if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+            png_error(png_ptr, "Not enough compressed data");
+         return;
+      }
+
+      png_ptr->idat_size = png_ptr->push_length;
+   }
+   if (png_ptr->idat_size && png_ptr->save_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->idat_size < (png_uint_32)png_ptr->save_buffer_size)
+      {
+         save_size = (png_size_t)png_ptr->idat_size;
+         /* check for overflow */
+         if((png_uint_32)save_size != png_ptr->idat_size)
+            png_error(png_ptr, "save_size overflowed in pngpread");
+      }
+      else
+         save_size = png_ptr->save_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->save_buffer_ptr, save_size);
+      if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+         png_process_IDAT_data(png_ptr, png_ptr->save_buffer_ptr, save_size);
+      png_ptr->idat_size -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->save_buffer_size -= save_size;
+      png_ptr->save_buffer_ptr += save_size;
+   }
+   if (png_ptr->idat_size && png_ptr->current_buffer_size)
+   {
+      png_size_t save_size;
+
+      if (png_ptr->idat_size < (png_uint_32)png_ptr->current_buffer_size)
+      {
+         save_size = (png_size_t)png_ptr->idat_size;
+         /* check for overflow */
+         if((png_uint_32)save_size != png_ptr->idat_size)
+            png_error(png_ptr, "save_size overflowed in pngpread");
+      }
+      else
+         save_size = png_ptr->current_buffer_size;
+
+      png_calculate_crc(png_ptr, png_ptr->current_buffer_ptr, save_size);
+      if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+        png_process_IDAT_data(png_ptr, png_ptr->current_buffer_ptr, save_size);
+
+      png_ptr->idat_size -= save_size;
+      png_ptr->buffer_size -= save_size;
+      png_ptr->current_buffer_size -= save_size;
+      png_ptr->current_buffer_ptr += save_size;
+   }
+   if (!png_ptr->idat_size)
+   {
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_crc_finish(png_ptr, 0);
+      png_ptr->mode &= ~PNG_HAVE_CHUNK_HEADER;
+      png_ptr->mode |= PNG_AFTER_IDAT;
+   }
+}
+
+void /* PRIVATE */
+png_process_IDAT_data(png_structp png_ptr, png_bytep buffer,
+   png_size_t buffer_length)
+{
+   int ret;
+
+   if ((png_ptr->flags & PNG_FLAG_ZLIB_FINISHED) && buffer_length)
+      png_error(png_ptr, "Extra compression data");
+
+   png_ptr->zstream.next_in = buffer;
+   png_ptr->zstream.avail_in = (uInt)buffer_length;
+   for(;;)
+   {
+      ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+      if (ret != Z_OK)
+      {
+         if (ret == Z_STREAM_END)
+         {
+            if (png_ptr->zstream.avail_in)
+               png_error(png_ptr, "Extra compressed data");
+            if (!(png_ptr->zstream.avail_out))
+            {
+               png_push_process_row(png_ptr);
+            }
+
+            png_ptr->mode |= PNG_AFTER_IDAT;
+            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            break;
+         }
+         else if (ret == Z_BUF_ERROR)
+            break;
+         else
+            png_error(png_ptr, "Decompression Error");
+      }
+      if (!(png_ptr->zstream.avail_out))
+      {
+         if ((
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+             png_ptr->interlaced && png_ptr->pass > 6) ||
+             (!png_ptr->interlaced &&
+#endif
+             png_ptr->row_number == png_ptr->num_rows))
+         {
+           if (png_ptr->zstream.avail_in)
+             png_warning(png_ptr, "Too much data in IDAT chunks");
+           png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+           break;
+         }
+         png_push_process_row(png_ptr);
+         png_ptr->zstream.avail_out = (uInt)png_ptr->irowbytes;
+         png_ptr->zstream.next_out = png_ptr->row_buf;
+      }
+      else
+         break;
+   }
+}
+
+void /* PRIVATE */
+png_push_process_row(png_structp png_ptr)
+{
+   png_ptr->row_info.color_type = png_ptr->color_type;
+   png_ptr->row_info.width = png_ptr->iwidth;
+   png_ptr->row_info.channels = png_ptr->channels;
+   png_ptr->row_info.bit_depth = png_ptr->bit_depth;
+   png_ptr->row_info.pixel_depth = png_ptr->pixel_depth;
+
+   png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+       png_ptr->row_info.width);
+
+   png_read_filter_row(png_ptr, &(png_ptr->row_info),
+      png_ptr->row_buf + 1, png_ptr->prev_row + 1,
+      (int)(png_ptr->row_buf[0]));
+
+   png_memcpy_check(png_ptr, png_ptr->prev_row, png_ptr->row_buf,
+      png_ptr->rowbytes + 1);
+
+   if (png_ptr->transformations || (png_ptr->flags&PNG_FLAG_STRIP_ALPHA))
+      png_do_read_transformations(png_ptr);
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+   /* blow up interlaced rows to full size */
+   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   {
+      if (png_ptr->pass < 6)
+/*       old interface (pre-1.0.9):
+         png_do_read_interlace(&(png_ptr->row_info),
+            png_ptr->row_buf + 1, png_ptr->pass, png_ptr->transformations);
+ */
+         png_do_read_interlace(png_ptr);
+
+    switch (png_ptr->pass)
+    {
+         case 0:
+         {
+            int i;
+            for (i = 0; i < 8 && png_ptr->pass == 0; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr); /* updates png_ptr->pass */
+            }
+            if (png_ptr->pass == 2) /* pass 1 might be empty */
+            {
+               for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            if (png_ptr->pass == 4 && png_ptr->height <= 4)
+            {
+               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            if (png_ptr->pass == 6 && png_ptr->height <= 4)
+            {
+                png_push_have_row(png_ptr, png_bytep_NULL);
+                png_read_push_finish_row(png_ptr);
+            }
+            break;
+         }
+         case 1:
+         {
+            int i;
+            for (i = 0; i < 8 && png_ptr->pass == 1; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 2) /* skip top 4 generated rows */
+            {
+               for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            break;
+         }
+         case 2:
+         {
+            int i;
+            for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            for (i = 0; i < 4 && png_ptr->pass == 2; i++)
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 4) /* pass 3 might be empty */
+            {
+               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            break;
+         }
+         case 3:
+         {
+            int i;
+            for (i = 0; i < 4 && png_ptr->pass == 3; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 4) /* skip top two generated rows */
+            {
+               for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+               {
+                  png_push_have_row(png_ptr, png_bytep_NULL);
+                  png_read_push_finish_row(png_ptr);
+               }
+            }
+            break;
+         }
+         case 4:
+         {
+            int i;
+            for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            for (i = 0; i < 2 && png_ptr->pass == 4; i++)
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 6) /* pass 5 might be empty */
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            break;
+         }
+         case 5:
+         {
+            int i;
+            for (i = 0; i < 2 && png_ptr->pass == 5; i++)
+            {
+               png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+               png_read_push_finish_row(png_ptr);
+            }
+            if (png_ptr->pass == 6) /* skip top generated row */
+            {
+               png_push_have_row(png_ptr, png_bytep_NULL);
+               png_read_push_finish_row(png_ptr);
+            }
+            break;
+         }
+         case 6:
+         {
+            png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+            png_read_push_finish_row(png_ptr);
+            if (png_ptr->pass != 6)
+               break;
+            png_push_have_row(png_ptr, png_bytep_NULL);
+            png_read_push_finish_row(png_ptr);
+         }
+      }
+   }
+   else
+#endif
+   {
+      png_push_have_row(png_ptr, png_ptr->row_buf + 1);
+      png_read_push_finish_row(png_ptr);
+   }
+}
+
+void /* PRIVATE */
+png_read_push_finish_row(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   PNG_CONST int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   PNG_CONST int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   PNG_CONST int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   PNG_CONST int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2};
+
+   /* Height of interlace block.  This is not currently used - if you need
+    * it, uncomment it here and in png.h
+   PNG_CONST int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1};
+   */
+#endif
+
+   png_ptr->row_number++;
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+   if (png_ptr->interlaced)
+   {
+      png_ptr->row_number = 0;
+      png_memset_check(png_ptr, png_ptr->prev_row, 0,
+         png_ptr->rowbytes + 1);
+      do
+      {
+         png_ptr->pass++;
+         if ((png_ptr->pass == 1 && png_ptr->width < 5) ||
+             (png_ptr->pass == 3 && png_ptr->width < 3) ||
+             (png_ptr->pass == 5 && png_ptr->width < 2))
+           png_ptr->pass++;
+
+         if (png_ptr->pass > 7)
+            png_ptr->pass--;
+         if (png_ptr->pass >= 7)
+            break;
+
+         png_ptr->iwidth = (png_ptr->width +
+            png_pass_inc[png_ptr->pass] - 1 -
+            png_pass_start[png_ptr->pass]) /
+            png_pass_inc[png_ptr->pass];
+
+         png_ptr->irowbytes = PNG_ROWBYTES(png_ptr->pixel_depth,
+            png_ptr->iwidth) + 1;
+
+         if (png_ptr->transformations & PNG_INTERLACE)
+            break;
+
+         png_ptr->num_rows = (png_ptr->height +
+            png_pass_yinc[png_ptr->pass] - 1 -
+            png_pass_ystart[png_ptr->pass]) /
+            png_pass_yinc[png_ptr->pass];
+
+      } while (png_ptr->iwidth == 0 || png_ptr->num_rows == 0);
+   }
+}
+
+#if defined(PNG_READ_tEXt_SUPPORTED)
+void /* PRIVATE */
+png_push_handle_tEXt(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
+      {
+         png_error(png_ptr, "Out of place tEXt");
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
+      }
+
+#ifdef PNG_MAX_MALLOC_64K
+   png_ptr->skip_length = 0;  /* This may not be necessary */
+
+   if (length > (png_uint_32)65535L) /* Can't hold entire string in memory */
+   {
+      png_warning(png_ptr, "tEXt chunk too large to fit in memory");
+      png_ptr->skip_length = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   png_ptr->current_text = (png_charp)png_malloc(png_ptr,
+         (png_uint_32)(length+1));
+   png_ptr->current_text[length] = '\0';
+   png_ptr->current_text_ptr = png_ptr->current_text;
+   png_ptr->current_text_size = (png_size_t)length;
+   png_ptr->current_text_left = (png_size_t)length;
+   png_ptr->process_mode = PNG_READ_tEXt_MODE;
+}
+
+void /* PRIVATE */
+png_push_read_tEXt(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->buffer_size && png_ptr->current_text_left)
+   {
+      png_size_t text_size;
+
+      if (png_ptr->buffer_size < png_ptr->current_text_left)
+         text_size = png_ptr->buffer_size;
+      else
+         text_size = png_ptr->current_text_left;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->current_text_ptr, text_size);
+      png_ptr->current_text_left -= text_size;
+      png_ptr->current_text_ptr += text_size;
+   }
+   if (!(png_ptr->current_text_left))
+   {
+      png_textp text_ptr;
+      png_charp text;
+      png_charp key;
+      int ret;
+
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_crc_finish(png_ptr);
+
+#if defined(PNG_MAX_MALLOC_64K)
+      if (png_ptr->skip_length)
+         return;
+#endif
+
+      key = png_ptr->current_text;
+
+      for (text = key; *text; text++)
+         /* empty loop */ ;
+
+      if (text < key + png_ptr->current_text_size)
+         text++;
+
+      text_ptr = (png_textp)png_malloc(png_ptr,
+         (png_uint_32)png_sizeof(png_text));
+      text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
+      text_ptr->key = key;
+#ifdef PNG_iTXt_SUPPORTED
+      text_ptr->lang = NULL;
+      text_ptr->lang_key = NULL;
+#endif
+      text_ptr->text = text;
+
+      ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+      png_free(png_ptr, key);
+      png_free(png_ptr, text_ptr);
+      png_ptr->current_text = NULL;
+
+      if (ret)
+        png_warning(png_ptr, "Insufficient memory to store text chunk.");
+   }
+}
+#endif
+
+#if defined(PNG_READ_zTXt_SUPPORTED)
+void /* PRIVATE */
+png_push_handle_zTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
+      {
+         png_error(png_ptr, "Out of place zTXt");
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
+      }
+
+#ifdef PNG_MAX_MALLOC_64K
+   /* We can't handle zTXt chunks > 64K, since we don't have enough space
+    * to be able to store the uncompressed data.  Actually, the threshold
+    * is probably around 32K, but it isn't as definite as 64K is.
+    */
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "zTXt chunk too large to fit in memory");
+      png_push_crc_skip(png_ptr, length);
+      return;
+   }
+#endif
+
+   png_ptr->current_text = (png_charp)png_malloc(png_ptr,
+       (png_uint_32)(length+1));
+   png_ptr->current_text[length] = '\0';
+   png_ptr->current_text_ptr = png_ptr->current_text;
+   png_ptr->current_text_size = (png_size_t)length;
+   png_ptr->current_text_left = (png_size_t)length;
+   png_ptr->process_mode = PNG_READ_zTXt_MODE;
+}
+
+void /* PRIVATE */
+png_push_read_zTXt(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->buffer_size && png_ptr->current_text_left)
+   {
+      png_size_t text_size;
+
+      if (png_ptr->buffer_size < (png_uint_32)png_ptr->current_text_left)
+         text_size = png_ptr->buffer_size;
+      else
+         text_size = png_ptr->current_text_left;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->current_text_ptr, text_size);
+      png_ptr->current_text_left -= text_size;
+      png_ptr->current_text_ptr += text_size;
+   }
+   if (!(png_ptr->current_text_left))
+   {
+      png_textp text_ptr;
+      png_charp text;
+      png_charp key;
+      int ret;
+      png_size_t text_size, key_size;
+
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_crc_finish(png_ptr);
+
+      key = png_ptr->current_text;
+
+      for (text = key; *text; text++)
+         /* empty loop */ ;
+
+      /* zTXt can't have zero text */
+      if (text >= key + png_ptr->current_text_size)
+      {
+         png_ptr->current_text = NULL;
+         png_free(png_ptr, key);
+         return;
+      }
+
+      text++;
+
+      if (*text != PNG_TEXT_COMPRESSION_zTXt) /* check compression byte */
+      {
+         png_ptr->current_text = NULL;
+         png_free(png_ptr, key);
+         return;
+      }
+
+      text++;
+
+      png_ptr->zstream.next_in = (png_bytep )text;
+      png_ptr->zstream.avail_in = (uInt)(png_ptr->current_text_size -
+         (text - key));
+      png_ptr->zstream.next_out = png_ptr->zbuf;
+      png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+      key_size = text - key;
+      text_size = 0;
+      text = NULL;
+      ret = Z_STREAM_END;
+
+      while (png_ptr->zstream.avail_in)
+      {
+         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+         if (ret != Z_OK && ret != Z_STREAM_END)
+         {
+            inflateReset(&png_ptr->zstream);
+            png_ptr->zstream.avail_in = 0;
+            png_ptr->current_text = NULL;
+            png_free(png_ptr, key);
+            png_free(png_ptr, text);
+            return;
+         }
+         if (!(png_ptr->zstream.avail_out) || ret == Z_STREAM_END)
+         {
+            if (text == NULL)
+            {
+               text = (png_charp)png_malloc(png_ptr,
+                  (png_uint_32)(png_ptr->zbuf_size - png_ptr->zstream.avail_out
+                     + key_size + 1));
+               png_memcpy(text + key_size, png_ptr->zbuf,
+                  png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+               png_memcpy(text, key, key_size);
+               text_size = key_size + png_ptr->zbuf_size -
+                  png_ptr->zstream.avail_out;
+               *(text + text_size) = '\0';
+            }
+            else
+            {
+               png_charp tmp;
+
+               tmp = text;
+               text = (png_charp)png_malloc(png_ptr, text_size +
+                  (png_uint_32)(png_ptr->zbuf_size - png_ptr->zstream.avail_out
+                   + 1));
+               png_memcpy(text, tmp, text_size);
+               png_free(png_ptr, tmp);
+               png_memcpy(text + text_size, png_ptr->zbuf,
+                  png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+               text_size += png_ptr->zbuf_size - png_ptr->zstream.avail_out;
+               *(text + text_size) = '\0';
+            }
+            if (ret != Z_STREAM_END)
+            {
+               png_ptr->zstream.next_out = png_ptr->zbuf;
+               png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+            }
+         }
+         else
+         {
+            break;
+         }
+
+         if (ret == Z_STREAM_END)
+            break;
+      }
+
+      inflateReset(&png_ptr->zstream);
+      png_ptr->zstream.avail_in = 0;
+
+      if (ret != Z_STREAM_END)
+      {
+         png_ptr->current_text = NULL;
+         png_free(png_ptr, key);
+         png_free(png_ptr, text);
+         return;
+      }
+
+      png_ptr->current_text = NULL;
+      png_free(png_ptr, key);
+      key = text;
+      text += key_size;
+
+      text_ptr = (png_textp)png_malloc(png_ptr,
+          (png_uint_32)png_sizeof(png_text));
+      text_ptr->compression = PNG_TEXT_COMPRESSION_zTXt;
+      text_ptr->key = key;
+#ifdef PNG_iTXt_SUPPORTED
+      text_ptr->lang = NULL;
+      text_ptr->lang_key = NULL;
+#endif
+      text_ptr->text = text;
+
+      ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+      png_free(png_ptr, key);
+      png_free(png_ptr, text_ptr);
+
+      if (ret)
+        png_warning(png_ptr, "Insufficient memory to store text chunk.");
+   }
+}
+#endif
+
+#if defined(PNG_READ_iTXt_SUPPORTED)
+void /* PRIVATE */
+png_push_handle_iTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || (png_ptr->mode & PNG_HAVE_IEND))
+      {
+         png_error(png_ptr, "Out of place iTXt");
+         info_ptr = info_ptr; /* to quiet some compiler warnings */
+      }
+
+#ifdef PNG_MAX_MALLOC_64K
+   png_ptr->skip_length = 0;  /* This may not be necessary */
+
+   if (length > (png_uint_32)65535L) /* Can't hold entire string in memory */
+   {
+      png_warning(png_ptr, "iTXt chunk too large to fit in memory");
+      png_ptr->skip_length = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   png_ptr->current_text = (png_charp)png_malloc(png_ptr,
+         (png_uint_32)(length+1));
+   png_ptr->current_text[length] = '\0';
+   png_ptr->current_text_ptr = png_ptr->current_text;
+   png_ptr->current_text_size = (png_size_t)length;
+   png_ptr->current_text_left = (png_size_t)length;
+   png_ptr->process_mode = PNG_READ_iTXt_MODE;
+}
+
+void /* PRIVATE */
+png_push_read_iTXt(png_structp png_ptr, png_infop info_ptr)
+{
+
+   if (png_ptr->buffer_size && png_ptr->current_text_left)
+   {
+      png_size_t text_size;
+
+      if (png_ptr->buffer_size < png_ptr->current_text_left)
+         text_size = png_ptr->buffer_size;
+      else
+         text_size = png_ptr->current_text_left;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->current_text_ptr, text_size);
+      png_ptr->current_text_left -= text_size;
+      png_ptr->current_text_ptr += text_size;
+   }
+   if (!(png_ptr->current_text_left))
+   {
+      png_textp text_ptr;
+      png_charp key;
+      int comp_flag;
+      png_charp lang;
+      png_charp lang_key;
+      png_charp text;
+      int ret;
+
+      if (png_ptr->buffer_size < 4)
+      {
+         png_push_save_buffer(png_ptr);
+         return;
+      }
+
+      png_push_crc_finish(png_ptr);
+
+#if defined(PNG_MAX_MALLOC_64K)
+      if (png_ptr->skip_length)
+         return;
+#endif
+
+      key = png_ptr->current_text;
+
+      for (lang = key; *lang; lang++)
+         /* empty loop */ ;
+
+      if (lang < key + png_ptr->current_text_size - 3)
+         lang++;
+
+      comp_flag = *lang++;
+      lang++;     /* skip comp_type, always zero */
+
+      for (lang_key = lang; *lang_key; lang_key++)
+         /* empty loop */ ;
+      lang_key++;        /* skip NUL separator */
+
+      text=lang_key;
+      if (lang_key < key + png_ptr->current_text_size - 1)
+      {
+        for (; *text; text++)
+           /* empty loop */ ;
+      }
+
+      if (text < key + png_ptr->current_text_size)
+         text++;
+
+      text_ptr = (png_textp)png_malloc(png_ptr,
+         (png_uint_32)png_sizeof(png_text));
+      text_ptr->compression = comp_flag + 2;
+      text_ptr->key = key;
+      text_ptr->lang = lang;
+      text_ptr->lang_key = lang_key;
+      text_ptr->text = text;
+      text_ptr->text_length = 0;
+      text_ptr->itxt_length = png_strlen(text);
+
+      ret = png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+      png_ptr->current_text = NULL;
+
+      png_free(png_ptr, text_ptr);
+      if (ret)
+        png_warning(png_ptr, "Insufficient memory to store iTXt chunk.");
+   }
+}
+#endif
+
+/* This function is called when we haven't found a handler for this
+ * chunk.  If there isn't a problem with the chunk itself (ie a bad chunk
+ * name or a critical chunk), the chunk is (currently) silently ignored.
+ */
+void /* PRIVATE */
+png_push_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32
+   length)
+{
+   png_uint_32 skip=0;
+   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
+
+   if (!(png_ptr->chunk_name[0] & 0x20))
+   {
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+     if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+          PNG_HANDLE_CHUNK_ALWAYS
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+          && png_ptr->read_user_chunk_fn == NULL
+#endif
+        )
+#endif
+        png_chunk_error(png_ptr, "unknown critical chunk");
+
+     info_ptr = info_ptr; /* to quiet some compiler warnings */
+   }
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS)
+   {
+#ifdef PNG_MAX_MALLOC_64K
+      if (length > (png_uint_32)65535L)
+      {
+          png_warning(png_ptr, "unknown chunk too large to fit in memory");
+          skip = length - (png_uint_32)65535L;
+          length = (png_uint_32)65535L;
+      }
+#endif
+      png_strncpy((png_charp)png_ptr->unknown_chunk.name,
+	 (png_charp)png_ptr->chunk_name, 4);
+      png_ptr->unknown_chunk.name[4] = '\0';
+      png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
+      png_ptr->unknown_chunk.size = (png_size_t)length;
+      png_crc_read(png_ptr, (png_bytep)png_ptr->unknown_chunk.data, length);
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+      if(png_ptr->read_user_chunk_fn != NULL)
+      {
+         /* callback to user unknown chunk handler */
+         int ret;
+         ret = (*(png_ptr->read_user_chunk_fn))
+           (png_ptr, &png_ptr->unknown_chunk);
+         if (ret < 0)
+            png_chunk_error(png_ptr, "error in user chunk");
+         if (ret == 0)
+         {
+            if (!(png_ptr->chunk_name[0] & 0x20))
+               if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+                    PNG_HANDLE_CHUNK_ALWAYS)
+                  png_chunk_error(png_ptr, "unknown critical chunk");
+            png_set_unknown_chunks(png_ptr, info_ptr,
+               &png_ptr->unknown_chunk, 1);
+         }
+      }
+#else
+      png_set_unknown_chunks(png_ptr, info_ptr, &png_ptr->unknown_chunk, 1);
+#endif
+      png_free(png_ptr, png_ptr->unknown_chunk.data);
+      png_ptr->unknown_chunk.data = NULL;
+   }
+   else
+#endif
+      skip=length;
+   png_push_crc_skip(png_ptr, skip);
+}
+
+void /* PRIVATE */
+png_push_have_info(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->info_fn != NULL)
+      (*(png_ptr->info_fn))(png_ptr, info_ptr);
+}
+
+void /* PRIVATE */
+png_push_have_end(png_structp png_ptr, png_infop info_ptr)
+{
+   if (png_ptr->end_fn != NULL)
+      (*(png_ptr->end_fn))(png_ptr, info_ptr);
+}
+
+void /* PRIVATE */
+png_push_have_row(png_structp png_ptr, png_bytep row)
+{
+   if (png_ptr->row_fn != NULL)
+      (*(png_ptr->row_fn))(png_ptr, row, png_ptr->row_number,
+         (int)png_ptr->pass);
+}
+
+void PNGAPI
+png_progressive_combine_row (png_structp png_ptr,
+   png_bytep old_row, png_bytep new_row)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_CONST int FARDATA png_pass_dsp_mask[7] =
+      {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff};
+#endif
+   if(png_ptr == NULL) return;
+   if (new_row != NULL)    /* new_row must == png_ptr->row_buf here. */
+      png_combine_row(png_ptr, old_row, png_pass_dsp_mask[png_ptr->pass]);
+}
+
+void PNGAPI
+png_set_progressive_read_fn(png_structp png_ptr, png_voidp progressive_ptr,
+   png_progressive_info_ptr info_fn, png_progressive_row_ptr row_fn,
+   png_progressive_end_ptr end_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->info_fn = info_fn;
+   png_ptr->row_fn = row_fn;
+   png_ptr->end_fn = end_fn;
+
+   png_set_read_fn(png_ptr, progressive_ptr, png_push_fill_buffer);
+}
+
+png_voidp PNGAPI
+png_get_progressive_ptr(png_structp png_ptr)
+{
+   if(png_ptr == NULL) return (NULL);
+   return png_ptr->io_ptr;
+}
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
diff --git a/src/libpng/pngread.c b/src/libpng/pngread.c
new file mode 100644
index 0000000..02efb0d
--- /dev/null
+++ b/src/libpng/pngread.c
@@ -0,0 +1,1472 @@
+
+/* pngread.c - read a PNG file
+ *
+ * Last changed in libpng 1.2.20 September 7, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file contains routines that an application calls directly to
+ * read a PNG file or stream.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+/* Create a PNG structure for reading, and allocate any memory needed. */
+png_structp PNGAPI
+png_create_read_struct(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn)
+{
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_read_struct_2(user_png_ver, error_ptr, error_fn,
+      warn_fn, png_voidp_NULL, png_malloc_ptr_NULL, png_free_ptr_NULL));
+}
+
+/* Alternate create PNG structure for reading, and allocate any memory needed. */
+png_structp PNGAPI
+png_create_read_struct_2(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+
+   png_structp png_ptr;
+
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   jmp_buf jmpbuf;
+#endif
+#endif
+
+   int i;
+
+   png_debug(1, "in png_create_read_struct\n");
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG,
+      (png_malloc_ptr)malloc_fn, (png_voidp)mem_ptr);
+#else
+   png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+#endif
+   if (png_ptr == NULL)
+      return (NULL);
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+#else
+   if (setjmp(png_ptr->jmpbuf))
+#endif
+   {
+      png_free(png_ptr, png_ptr->zbuf);
+      png_ptr->zbuf=NULL;
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)png_ptr,
+         (png_free_ptr)free_fn, (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)png_ptr);
+#endif
+      return (NULL);
+   }
+#ifdef USE_FAR_KEYWORD
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#endif
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn);
+#endif
+
+   png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn);
+
+   i=0;
+   do
+   {
+     if(user_png_ver[i] != png_libpng_ver[i])
+        png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+   } while (png_libpng_ver[i++]);
+
+   if (png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH)
+   {
+     /* Libpng 0.90 and later are binary incompatible with libpng 0.89, so
+      * we must recompile any applications that use any older library version.
+      * For versions after libpng 1.0, we will be compatible, so we need
+      * only check the first digit.
+      */
+     if (user_png_ver == NULL || user_png_ver[0] != png_libpng_ver[0] ||
+         (user_png_ver[0] == '1' && user_png_ver[2] != png_libpng_ver[2]) ||
+         (user_png_ver[0] == '0' && user_png_ver[2] < '9'))
+     {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+        char msg[80];
+        if (user_png_ver)
+        {
+          png_snprintf(msg, 80,
+             "Application was compiled with png.h from libpng-%.20s",
+             user_png_ver);
+          png_warning(png_ptr, msg);
+        }
+        png_snprintf(msg, 80,
+             "Application  is  running with png.c from libpng-%.20s",
+           png_libpng_ver);
+        png_warning(png_ptr, msg);
+#endif
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+        png_ptr->flags=0;
+#endif
+        png_error(png_ptr,
+           "Incompatible libpng version in application and library");
+     }
+   }
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+     (png_uint_32)png_ptr->zbuf_size);
+   png_ptr->zstream.zalloc = png_zalloc;
+   png_ptr->zstream.zfree = png_zfree;
+   png_ptr->zstream.opaque = (voidpf)png_ptr;
+
+   switch (inflateInit(&png_ptr->zstream))
+   {
+     case Z_OK: /* Do nothing */ break;
+     case Z_MEM_ERROR:
+     case Z_STREAM_ERROR: png_error(png_ptr, "zlib memory error"); break;
+     case Z_VERSION_ERROR: png_error(png_ptr, "zlib version error"); break;
+     default: png_error(png_ptr, "Unknown zlib error");
+   }
+
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+   png_set_read_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL);
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* Applications that neglect to set up their own setjmp() and then encounter
+   a png_error() will longjmp here.  Since the jmpbuf is then meaningless we
+   abort instead of returning. */
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+      PNG_ABORT();
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#else
+   if (setjmp(png_ptr->jmpbuf))
+      PNG_ABORT();
+#endif
+#endif
+   return (png_ptr);
+}
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* Initialize PNG structure for reading, and allocate any memory needed.
+   This interface is deprecated in favour of the png_create_read_struct(),
+   and it will disappear as of libpng-1.3.0. */
+#undef png_read_init
+void PNGAPI
+png_read_init(png_structp png_ptr)
+{
+   /* We only come here via pre-1.0.7-compiled applications */
+   png_read_init_2(png_ptr, "1.0.6 or earlier", 0, 0);
+}
+
+void PNGAPI
+png_read_init_2(png_structp png_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size, png_size_t png_info_size)
+{
+   /* We only come here via pre-1.0.12-compiled applications */
+   if(png_ptr == NULL) return;
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+   if(png_sizeof(png_struct) > png_struct_size ||
+      png_sizeof(png_info) > png_info_size)
+   {
+      char msg[80];
+      png_ptr->warning_fn=NULL;
+      if (user_png_ver)
+      {
+        png_snprintf(msg, 80,
+           "Application was compiled with png.h from libpng-%.20s",
+           user_png_ver);
+        png_warning(png_ptr, msg);
+      }
+      png_snprintf(msg, 80,
+         "Application  is  running with png.c from libpng-%.20s",
+         png_libpng_ver);
+      png_warning(png_ptr, msg);
+   }
+#endif
+   if(png_sizeof(png_struct) > png_struct_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+       "The png struct allocated by the application for reading is too small.");
+     }
+   if(png_sizeof(png_info) > png_info_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+         "The info struct allocated by application for reading is too small.");
+     }
+   png_read_init_3(&png_ptr, user_png_ver, png_struct_size);
+}
+#endif /* PNG_1_0_X || PNG_1_2_X */
+
+void PNGAPI
+png_read_init_3(png_structpp ptr_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size)
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp;  /* to save current jump buffer */
+#endif
+
+   int i=0;
+
+   png_structp png_ptr=*ptr_ptr;
+
+   if(png_ptr == NULL) return;
+
+   do
+   {
+     if(user_png_ver[i] != png_libpng_ver[i])
+     {
+#ifdef PNG_LEGACY_SUPPORTED
+       png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+#else
+       png_ptr->warning_fn=NULL;
+       png_warning(png_ptr,
+        "Application uses deprecated png_read_init() and should be recompiled.");
+       break;
+#endif
+     }
+   } while (png_libpng_ver[i++]);
+
+   png_debug(1, "in png_read_init_3\n");
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* save jump buffer and error functions */
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   if(png_sizeof(png_struct) > png_struct_size)
+     {
+       png_destroy_struct(png_ptr);
+       *ptr_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+       png_ptr = *ptr_ptr;
+     }
+
+   /* reset all variables to 0 */
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* restore jump buffer */
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+     (png_uint_32)png_ptr->zbuf_size);
+   png_ptr->zstream.zalloc = png_zalloc;
+   png_ptr->zstream.zfree = png_zfree;
+   png_ptr->zstream.opaque = (voidpf)png_ptr;
+
+   switch (inflateInit(&png_ptr->zstream))
+   {
+     case Z_OK: /* Do nothing */ break;
+     case Z_MEM_ERROR:
+     case Z_STREAM_ERROR: png_error(png_ptr, "zlib memory"); break;
+     case Z_VERSION_ERROR: png_error(png_ptr, "zlib version"); break;
+     default: png_error(png_ptr, "Unknown zlib error");
+   }
+
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+   png_set_read_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL);
+}
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read the information before the actual image data.  This has been
+ * changed in v0.90 to allow reading a file that already has the magic
+ * bytes read from the stream.  You can tell libpng how many bytes have
+ * been read from the beginning of the stream (up to the maximum of 8)
+ * via png_set_sig_bytes(), and we will only check the remaining bytes
+ * here.  The application can then have access to the signature bytes we
+ * read if it is determined that this isn't a valid PNG file.
+ */
+void PNGAPI
+png_read_info(png_structp png_ptr, png_infop info_ptr)
+{
+   if(png_ptr == NULL) return;
+   png_debug(1, "in png_read_info\n");
+   /* If we haven't checked all of the PNG signature bytes, do so now. */
+   if (png_ptr->sig_bytes < 8)
+   {
+      png_size_t num_checked = png_ptr->sig_bytes,
+                 num_to_check = 8 - num_checked;
+
+      png_read_data(png_ptr, &(info_ptr->signature[num_checked]), num_to_check);
+      png_ptr->sig_bytes = 8;
+
+      if (png_sig_cmp(info_ptr->signature, num_checked, num_to_check))
+      {
+         if (num_checked < 4 &&
+             png_sig_cmp(info_ptr->signature, num_checked, num_to_check - 4))
+            png_error(png_ptr, "Not a PNG file");
+         else
+            png_error(png_ptr, "PNG file corrupted by ASCII conversion");
+      }
+      if (num_checked < 3)
+         png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
+   }
+
+   for(;;)
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      PNG_CONST PNG_bKGD;
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      PNG_CONST PNG_cHRM;
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      PNG_CONST PNG_gAMA;
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      PNG_CONST PNG_hIST;
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_CONST PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_CONST PNG_iTXt;
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      PNG_CONST PNG_oFFs;
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      PNG_CONST PNG_pCAL;
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      PNG_CONST PNG_pHYs;
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      PNG_CONST PNG_sBIT;
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_CONST PNG_sCAL;
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_CONST PNG_sPLT;
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      PNG_CONST PNG_sRGB;
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      PNG_CONST PNG_tEXt;
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      PNG_CONST PNG_tIME;
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      PNG_CONST PNG_tRNS;
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      PNG_CONST PNG_zTXt;
+#endif
+#endif /* PNG_USE_LOCAL_ARRAYS */
+      png_byte chunk_length[4];
+      png_uint_32 length;
+
+      png_read_data(png_ptr, chunk_length, 4);
+      length = png_get_uint_31(png_ptr,chunk_length);
+
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+
+      png_debug2(0, "Reading %s chunk, length=%lu.\n", png_ptr->chunk_name,
+         length);
+
+      /* This should be a binary subdivision search or a hash for
+       * matching the chunk name rather than a linear search.
+       */
+      if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+        if(png_ptr->mode & PNG_AFTER_IDAT)
+          png_ptr->mode |= PNG_HAVE_CHUNK_AFTER_IDAT;
+
+      if (!png_memcmp(png_ptr->chunk_name, png_IHDR, 4))
+         png_handle_IHDR(png_ptr, info_ptr, length);
+      else if (!png_memcmp(png_ptr->chunk_name, png_IEND, 4))
+         png_handle_IEND(png_ptr, info_ptr, length);
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+      else if (png_handle_as_unknown(png_ptr, png_ptr->chunk_name))
+      {
+         if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+            png_ptr->mode |= PNG_HAVE_IDAT;
+         png_handle_unknown(png_ptr, info_ptr, length);
+         if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+            png_ptr->mode |= PNG_HAVE_PLTE;
+         else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+         {
+            if (!(png_ptr->mode & PNG_HAVE_IHDR))
+               png_error(png_ptr, "Missing IHDR before IDAT");
+            else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+                     !(png_ptr->mode & PNG_HAVE_PLTE))
+               png_error(png_ptr, "Missing PLTE before IDAT");
+            break;
+         }
+      }
+#endif
+      else if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+         png_handle_PLTE(png_ptr, info_ptr, length);
+      else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         if (!(png_ptr->mode & PNG_HAVE_IHDR))
+            png_error(png_ptr, "Missing IHDR before IDAT");
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+                  !(png_ptr->mode & PNG_HAVE_PLTE))
+            png_error(png_ptr, "Missing PLTE before IDAT");
+
+         png_ptr->idat_size = length;
+         png_ptr->mode |= PNG_HAVE_IDAT;
+         break;
+      }
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_bKGD, 4))
+         png_handle_bKGD(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_cHRM, 4))
+         png_handle_cHRM(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_gAMA, 4))
+         png_handle_gAMA(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_hIST, 4))
+         png_handle_hIST(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_oFFs, 4))
+         png_handle_oFFs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pCAL, 4))
+         png_handle_pCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+         png_handle_sCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pHYs, 4))
+         png_handle_pHYs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sBIT, 4))
+         png_handle_sBIT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sRGB, 4))
+         png_handle_sRGB(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+         png_handle_iCCP(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+         png_handle_sPLT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tEXt, 4))
+         png_handle_tEXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
+         png_handle_tIME(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
+         png_handle_tRNS(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+         png_handle_zTXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
+         png_handle_iTXt(png_ptr, info_ptr, length);
+#endif
+      else
+         png_handle_unknown(png_ptr, info_ptr, length);
+   }
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+/* optional call to update the users info_ptr structure */
+void PNGAPI
+png_read_update_info(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_read_update_info\n");
+   if(png_ptr == NULL) return;
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      png_read_start_row(png_ptr);
+   else
+      png_warning(png_ptr,
+      "Ignoring extra png_read_update_info() call; row buffer not reallocated");
+   png_read_transform_info(png_ptr, info_ptr);
+}
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Initialize palette, background, etc, after transformations
+ * are set, but before any reading takes place.  This allows
+ * the user to obtain a gamma-corrected palette, for example.
+ * If the user doesn't call this, we will do it ourselves.
+ */
+void PNGAPI
+png_start_read_image(png_structp png_ptr)
+{
+   png_debug(1, "in png_start_read_image\n");
+   if(png_ptr == NULL) return;
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      png_read_start_row(png_ptr);
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+void PNGAPI
+png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_CONST PNG_IDAT;
+   PNG_CONST int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55,
+     0xff};
+   PNG_CONST int png_pass_mask[7] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
+#endif
+   int ret;
+   if(png_ptr == NULL) return;
+   png_debug2(1, "in png_read_row (row %lu, pass %d)\n",
+      png_ptr->row_number, png_ptr->pass);
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      png_read_start_row(png_ptr);
+   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
+   {
+   /* check for transforms that have been set but were defined out */
+#if defined(PNG_WRITE_INVERT_SUPPORTED) && !defined(PNG_READ_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_warning(png_ptr, "PNG_READ_INVERT_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_FILLER_SUPPORTED) && !defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_warning(png_ptr, "PNG_READ_FILLER_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED) && !defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_warning(png_ptr, "PNG_READ_PACKSWAP_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_PACK_SUPPORTED) && !defined(PNG_READ_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_warning(png_ptr, "PNG_READ_PACK_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_SHIFT_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_warning(png_ptr, "PNG_READ_SHIFT_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_BGR_SUPPORTED) && !defined(PNG_READ_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_warning(png_ptr, "PNG_READ_BGR_SUPPORTED is not defined.");
+#endif
+#if defined(PNG_WRITE_SWAP_SUPPORTED) && !defined(PNG_READ_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_warning(png_ptr, "PNG_READ_SWAP_SUPPORTED is not defined.");
+#endif
+   }
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+   /* if interlaced and we do not need a new row, combine row and return */
+   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   {
+      switch (png_ptr->pass)
+      {
+         case 0:
+            if (png_ptr->row_number & 0x07)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 1:
+            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 2:
+            if ((png_ptr->row_number & 0x07) != 4)
+            {
+               if (dsp_row != NULL && (png_ptr->row_number & 4))
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 3:
+            if ((png_ptr->row_number & 3) || png_ptr->width < 3)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 4:
+            if ((png_ptr->row_number & 3) != 2)
+            {
+               if (dsp_row != NULL && (png_ptr->row_number & 2))
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 5:
+            if ((png_ptr->row_number & 1) || png_ptr->width < 2)
+            {
+               if (dsp_row != NULL)
+                  png_combine_row(png_ptr, dsp_row,
+                     png_pass_dsp_mask[png_ptr->pass]);
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 6:
+            if (!(png_ptr->row_number & 1))
+            {
+               png_read_finish_row(png_ptr);
+               return;
+            }
+            break;
+      }
+   }
+#endif
+
+   if (!(png_ptr->mode & PNG_HAVE_IDAT))
+      png_error(png_ptr, "Invalid attempt to read row data");
+
+   png_ptr->zstream.next_out = png_ptr->row_buf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->irowbytes;
+   do
+   {
+      if (!(png_ptr->zstream.avail_in))
+      {
+         while (!png_ptr->idat_size)
+         {
+            png_byte chunk_length[4];
+
+            png_crc_finish(png_ptr, 0);
+
+            png_read_data(png_ptr, chunk_length, 4);
+            png_ptr->idat_size = png_get_uint_31(png_ptr,chunk_length);
+
+            png_reset_crc(png_ptr);
+            png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+            if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+               png_error(png_ptr, "Not enough image data");
+         }
+         png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size;
+         png_ptr->zstream.next_in = png_ptr->zbuf;
+         if (png_ptr->zbuf_size > png_ptr->idat_size)
+            png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size;
+         png_crc_read(png_ptr, png_ptr->zbuf,
+            (png_size_t)png_ptr->zstream.avail_in);
+         png_ptr->idat_size -= png_ptr->zstream.avail_in;
+      }
+      ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+      if (ret == Z_STREAM_END)
+      {
+         if (png_ptr->zstream.avail_out || png_ptr->zstream.avail_in ||
+            png_ptr->idat_size)
+            png_error(png_ptr, "Extra compressed data");
+         png_ptr->mode |= PNG_AFTER_IDAT;
+         png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+         break;
+      }
+      if (ret != Z_OK)
+         png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg :
+                   "Decompression error");
+
+   } while (png_ptr->zstream.avail_out);
+
+   png_ptr->row_info.color_type = png_ptr->color_type;
+   png_ptr->row_info.width = png_ptr->iwidth;
+   png_ptr->row_info.channels = png_ptr->channels;
+   png_ptr->row_info.bit_depth = png_ptr->bit_depth;
+   png_ptr->row_info.pixel_depth = png_ptr->pixel_depth;
+   png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+       png_ptr->row_info.width);
+
+   if(png_ptr->row_buf[0])
+   png_read_filter_row(png_ptr, &(png_ptr->row_info),
+      png_ptr->row_buf + 1, png_ptr->prev_row + 1,
+      (int)(png_ptr->row_buf[0]));
+
+   png_memcpy_check(png_ptr, png_ptr->prev_row, png_ptr->row_buf,
+      png_ptr->rowbytes + 1);
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   if((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
+   {
+      /* Intrapixel differencing */
+      png_do_read_intrapixel(&(png_ptr->row_info), png_ptr->row_buf + 1);
+   }
+#endif
+
+
+   if (png_ptr->transformations || (png_ptr->flags&PNG_FLAG_STRIP_ALPHA))
+      png_do_read_transformations(png_ptr);
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED)
+   /* blow up interlaced rows to full size */
+   if (png_ptr->interlaced &&
+      (png_ptr->transformations & PNG_INTERLACE))
+   {
+      if (png_ptr->pass < 6)
+/*       old interface (pre-1.0.9):
+         png_do_read_interlace(&(png_ptr->row_info),
+            png_ptr->row_buf + 1, png_ptr->pass, png_ptr->transformations);
+ */
+         png_do_read_interlace(png_ptr);
+
+      if (dsp_row != NULL)
+         png_combine_row(png_ptr, dsp_row,
+            png_pass_dsp_mask[png_ptr->pass]);
+      if (row != NULL)
+         png_combine_row(png_ptr, row,
+            png_pass_mask[png_ptr->pass]);
+   }
+   else
+#endif
+   {
+      if (row != NULL)
+         png_combine_row(png_ptr, row, 0xff);
+      if (dsp_row != NULL)
+         png_combine_row(png_ptr, dsp_row, 0xff);
+   }
+   png_read_finish_row(png_ptr);
+
+   if (png_ptr->read_row_fn != NULL)
+      (*(png_ptr->read_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read one or more rows of image data.  If the image is interlaced,
+ * and png_set_interlace_handling() has been called, the rows need to
+ * contain the contents of the rows from the previous pass.  If the
+ * image has alpha or transparency, and png_handle_alpha()[*] has been
+ * called, the rows contents must be initialized to the contents of the
+ * screen.
+ *
+ * "row" holds the actual image, and pixels are placed in it
+ * as they arrive.  If the image is displayed after each pass, it will
+ * appear to "sparkle" in.  "display_row" can be used to display a
+ * "chunky" progressive image, with finer detail added as it becomes
+ * available.  If you do not want this "chunky" display, you may pass
+ * NULL for display_row.  If you do not want the sparkle display, and
+ * you have not called png_handle_alpha(), you may pass NULL for rows.
+ * If you have called png_handle_alpha(), and the image has either an
+ * alpha channel or a transparency chunk, you must provide a buffer for
+ * rows.  In this case, you do not have to provide a display_row buffer
+ * also, but you may.  If the image is not interlaced, or if you have
+ * not called png_set_interlace_handling(), the display_row buffer will
+ * be ignored, so pass NULL to it.
+ *
+ * [*] png_handle_alpha() does not exist yet, as of this version of libpng
+ */
+
+void PNGAPI
+png_read_rows(png_structp png_ptr, png_bytepp row,
+   png_bytepp display_row, png_uint_32 num_rows)
+{
+   png_uint_32 i;
+   png_bytepp rp;
+   png_bytepp dp;
+
+   png_debug(1, "in png_read_rows\n");
+   if(png_ptr == NULL) return;
+   rp = row;
+   dp = display_row;
+   if (rp != NULL && dp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep rptr = *rp++;
+         png_bytep dptr = *dp++;
+
+         png_read_row(png_ptr, rptr, dptr);
+      }
+   else if(rp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep rptr = *rp;
+         png_read_row(png_ptr, rptr, png_bytep_NULL);
+         rp++;
+      }
+   else if(dp != NULL)
+      for (i = 0; i < num_rows; i++)
+      {
+         png_bytep dptr = *dp;
+         png_read_row(png_ptr, png_bytep_NULL, dptr);
+         dp++;
+      }
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read the entire image.  If the image has an alpha channel or a tRNS
+ * chunk, and you have called png_handle_alpha()[*], you will need to
+ * initialize the image to the current image that PNG will be overlaying.
+ * We set the num_rows again here, in case it was incorrectly set in
+ * png_read_start_row() by a call to png_read_update_info() or
+ * png_start_read_image() if png_set_interlace_handling() wasn't called
+ * prior to either of these functions like it should have been.  You can
+ * only call this function once.  If you desire to have an image for
+ * each pass of a interlaced image, use png_read_rows() instead.
+ *
+ * [*] png_handle_alpha() does not exist yet, as of this version of libpng
+ */
+void PNGAPI
+png_read_image(png_structp png_ptr, png_bytepp image)
+{
+   png_uint_32 i,image_height;
+   int pass, j;
+   png_bytepp rp;
+
+   png_debug(1, "in png_read_image\n");
+   if(png_ptr == NULL) return;
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+   pass = png_set_interlace_handling(png_ptr);
+#else
+   if (png_ptr->interlaced)
+      png_error(png_ptr,
+        "Cannot read interlaced image -- interlace handler disabled.");
+   pass = 1;
+#endif
+
+
+   image_height=png_ptr->height;
+   png_ptr->num_rows = image_height; /* Make sure this is set correctly */
+
+   for (j = 0; j < pass; j++)
+   {
+      rp = image;
+      for (i = 0; i < image_height; i++)
+      {
+         png_read_row(png_ptr, *rp, png_bytep_NULL);
+         rp++;
+      }
+   }
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+/* Read the end of the PNG file.  Will not read past the end of the
+ * file, will verify the end is accurate, and will read any comments
+ * or time information at the end of the file, if info is not NULL.
+ */
+void PNGAPI
+png_read_end(png_structp png_ptr, png_infop info_ptr)
+{
+   png_byte chunk_length[4];
+   png_uint_32 length;
+
+   png_debug(1, "in png_read_end\n");
+   if(png_ptr == NULL) return;
+   png_crc_finish(png_ptr, 0); /* Finish off CRC from last IDAT chunk */
+
+   do
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IHDR;
+      PNG_CONST PNG_IDAT;
+      PNG_CONST PNG_IEND;
+      PNG_CONST PNG_PLTE;
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      PNG_CONST PNG_bKGD;
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      PNG_CONST PNG_cHRM;
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      PNG_CONST PNG_gAMA;
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      PNG_CONST PNG_hIST;
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      PNG_CONST PNG_iCCP;
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      PNG_CONST PNG_iTXt;
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      PNG_CONST PNG_oFFs;
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      PNG_CONST PNG_pCAL;
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      PNG_CONST PNG_pHYs;
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      PNG_CONST PNG_sBIT;
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      PNG_CONST PNG_sCAL;
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      PNG_CONST PNG_sPLT;
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      PNG_CONST PNG_sRGB;
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      PNG_CONST PNG_tEXt;
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      PNG_CONST PNG_tIME;
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      PNG_CONST PNG_tRNS;
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      PNG_CONST PNG_zTXt;
+#endif
+#endif /* PNG_USE_LOCAL_ARRAYS */
+
+      png_read_data(png_ptr, chunk_length, 4);
+      length = png_get_uint_31(png_ptr,chunk_length);
+
+      png_reset_crc(png_ptr);
+      png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+
+      png_debug1(0, "Reading %s chunk.\n", png_ptr->chunk_name);
+
+      if (!png_memcmp(png_ptr->chunk_name, png_IHDR, 4))
+         png_handle_IHDR(png_ptr, info_ptr, length);
+      else if (!png_memcmp(png_ptr->chunk_name, png_IEND, 4))
+         png_handle_IEND(png_ptr, info_ptr, length);
+#ifdef PNG_HANDLE_AS_UNKNOWN_SUPPORTED
+      else if (png_handle_as_unknown(png_ptr, png_ptr->chunk_name))
+      {
+         if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+         {
+            if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
+               png_error(png_ptr, "Too many IDAT's found");
+         }
+         png_handle_unknown(png_ptr, info_ptr, length);
+         if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+            png_ptr->mode |= PNG_HAVE_PLTE;
+      }
+#endif
+      else if (!png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+      {
+         /* Zero length IDATs are legal after the last IDAT has been
+          * read, but not after other chunks have been read.
+          */
+         if ((length > 0) || (png_ptr->mode & PNG_HAVE_CHUNK_AFTER_IDAT))
+            png_error(png_ptr, "Too many IDAT's found");
+         png_crc_finish(png_ptr, length);
+      }
+      else if (!png_memcmp(png_ptr->chunk_name, png_PLTE, 4))
+         png_handle_PLTE(png_ptr, info_ptr, length);
+#if defined(PNG_READ_bKGD_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_bKGD, 4))
+         png_handle_bKGD(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_cHRM_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_cHRM, 4))
+         png_handle_cHRM(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_gAMA_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_gAMA, 4))
+         png_handle_gAMA(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_hIST, 4))
+         png_handle_hIST(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_oFFs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_oFFs, 4))
+         png_handle_oFFs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pCAL, 4))
+         png_handle_pCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sCAL_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sCAL, 4))
+         png_handle_sCAL(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_pHYs_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_pHYs, 4))
+         png_handle_pHYs(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sBIT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sBIT, 4))
+         png_handle_sBIT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sRGB, 4))
+         png_handle_sRGB(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iCCP_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iCCP, 4))
+         png_handle_iCCP(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_sPLT_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_sPLT, 4))
+         png_handle_sPLT(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tEXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tEXt, 4))
+         png_handle_tEXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tIME_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tIME, 4))
+         png_handle_tIME(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_tRNS_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_tRNS, 4))
+         png_handle_tRNS(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_zTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_zTXt, 4))
+         png_handle_zTXt(png_ptr, info_ptr, length);
+#endif
+#if defined(PNG_READ_iTXt_SUPPORTED)
+      else if (!png_memcmp(png_ptr->chunk_name, png_iTXt, 4))
+         png_handle_iTXt(png_ptr, info_ptr, length);
+#endif
+      else
+         png_handle_unknown(png_ptr, info_ptr, length);
+   } while (!(png_ptr->mode & PNG_HAVE_IEND));
+}
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+
+/* free all memory used by the read */
+void PNGAPI
+png_destroy_read_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr,
+   png_infopp end_info_ptr_ptr)
+{
+   png_structp png_ptr = NULL;
+   png_infop info_ptr = NULL, end_info_ptr = NULL;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn;
+   png_voidp mem_ptr;
+#endif
+
+   png_debug(1, "in png_destroy_read_struct\n");
+   if (png_ptr_ptr != NULL)
+      png_ptr = *png_ptr_ptr;
+
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (end_info_ptr_ptr != NULL)
+      end_info_ptr = *end_info_ptr_ptr;
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   free_fn = png_ptr->free_fn;
+   mem_ptr = png_ptr->mem_ptr;
+#endif
+
+   png_read_destroy(png_ptr, info_ptr, end_info_ptr);
+
+   if (info_ptr != NULL)
+   {
+#if defined(PNG_TEXT_SUPPORTED)
+      png_free_data(png_ptr, info_ptr, PNG_FREE_TEXT, -1);
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn,
+          (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)info_ptr);
+#endif
+      *info_ptr_ptr = NULL;
+   }
+
+   if (end_info_ptr != NULL)
+   {
+#if defined(PNG_READ_TEXT_SUPPORTED)
+      png_free_data(png_ptr, end_info_ptr, PNG_FREE_TEXT, -1);
+#endif
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)end_info_ptr, (png_free_ptr)free_fn,
+         (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)end_info_ptr);
+#endif
+      *end_info_ptr_ptr = NULL;
+   }
+
+   if (png_ptr != NULL)
+   {
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn,
+          (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)png_ptr);
+#endif
+      *png_ptr_ptr = NULL;
+   }
+}
+
+/* free all memory used by the read (old method) */
+void /* PRIVATE */
+png_read_destroy(png_structp png_ptr, png_infop info_ptr, png_infop end_info_ptr)
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp;
+#endif
+   png_error_ptr error_fn;
+   png_error_ptr warning_fn;
+   png_voidp error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn;
+#endif
+
+   png_debug(1, "in png_read_destroy\n");
+   if (info_ptr != NULL)
+      png_info_destroy(png_ptr, info_ptr);
+
+   if (end_info_ptr != NULL)
+      png_info_destroy(png_ptr, end_info_ptr);
+
+   png_free(png_ptr, png_ptr->zbuf);
+   png_free(png_ptr, png_ptr->big_row_buf);
+   png_free(png_ptr, png_ptr->prev_row);
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   png_free(png_ptr, png_ptr->palette_lookup);
+   png_free(png_ptr, png_ptr->dither_index);
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   png_free(png_ptr, png_ptr->gamma_table);
+#endif
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   png_free(png_ptr, png_ptr->gamma_from_1);
+   png_free(png_ptr, png_ptr->gamma_to_1);
+#endif
+#ifdef PNG_FREE_ME_SUPPORTED
+   if (png_ptr->free_me & PNG_FREE_PLTE)
+      png_zfree(png_ptr, png_ptr->palette);
+   png_ptr->free_me &= ~PNG_FREE_PLTE;
+#else
+   if (png_ptr->flags & PNG_FLAG_FREE_PLTE)
+      png_zfree(png_ptr, png_ptr->palette);
+   png_ptr->flags &= ~PNG_FLAG_FREE_PLTE;
+#endif
+#if defined(PNG_tRNS_SUPPORTED) || \
+    defined(PNG_READ_EXPAND_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+#ifdef PNG_FREE_ME_SUPPORTED
+   if (png_ptr->free_me & PNG_FREE_TRNS)
+      png_free(png_ptr, png_ptr->trans);
+   png_ptr->free_me &= ~PNG_FREE_TRNS;
+#else
+   if (png_ptr->flags & PNG_FLAG_FREE_TRNS)
+      png_free(png_ptr, png_ptr->trans);
+   png_ptr->flags &= ~PNG_FLAG_FREE_TRNS;
+#endif
+#endif
+#if defined(PNG_READ_hIST_SUPPORTED)
+#ifdef PNG_FREE_ME_SUPPORTED
+   if (png_ptr->free_me & PNG_FREE_HIST)
+      png_free(png_ptr, png_ptr->hist);
+   png_ptr->free_me &= ~PNG_FREE_HIST;
+#else
+   if (png_ptr->flags & PNG_FLAG_FREE_HIST)
+      png_free(png_ptr, png_ptr->hist);
+   png_ptr->flags &= ~PNG_FLAG_FREE_HIST;
+#endif
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if (png_ptr->gamma_16_table != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_table[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_table);
+   }
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if (png_ptr->gamma_16_from_1 != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_from_1[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_from_1);
+   }
+   if (png_ptr->gamma_16_to_1 != NULL)
+   {
+      int i;
+      int istop = (1 << (8 - png_ptr->gamma_shift));
+      for (i = 0; i < istop; i++)
+      {
+         png_free(png_ptr, png_ptr->gamma_16_to_1[i]);
+      }
+   png_free(png_ptr, png_ptr->gamma_16_to_1);
+   }
+#endif
+#endif
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+   png_free(png_ptr, png_ptr->time_buffer);
+#endif
+
+   inflateEnd(&png_ptr->zstream);
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+   png_free(png_ptr, png_ptr->save_buffer);
+#endif
+
+#ifdef PNG_PROGRESSIVE_READ_SUPPORTED
+#ifdef PNG_TEXT_SUPPORTED
+   png_free(png_ptr, png_ptr->current_text);
+#endif /* PNG_TEXT_SUPPORTED */
+#endif /* PNG_PROGRESSIVE_READ_SUPPORTED */
+
+   /* Save the important info out of the png_struct, in case it is
+    * being used again.
+    */
+#ifdef PNG_SETJMP_SUPPORTED
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   error_fn = png_ptr->error_fn;
+   warning_fn = png_ptr->warning_fn;
+   error_ptr = png_ptr->error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   free_fn = png_ptr->free_fn;
+#endif
+
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+   png_ptr->error_fn = error_fn;
+   png_ptr->warning_fn = warning_fn;
+   png_ptr->error_ptr = error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr->free_fn = free_fn;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+
+}
+
+void PNGAPI
+png_set_read_status_fn(png_structp png_ptr, png_read_status_ptr read_row_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->read_row_fn = read_row_fn;
+}
+
+
+#ifndef PNG_NO_SEQUENTIAL_READ_SUPPORTED
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+void PNGAPI
+png_read_png(png_structp png_ptr, png_infop info_ptr,
+                           int transforms,
+                           voidp params)
+{
+   int row;
+
+   if(png_ptr == NULL) return;
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+   /* invert the alpha channel from opacity to transparency
+    */
+   if (transforms & PNG_TRANSFORM_INVERT_ALPHA)
+       png_set_invert_alpha(png_ptr);
+#endif
+
+   /* png_read_info() gives us all of the information from the
+    * PNG file before the first IDAT (image data chunk).
+    */
+   png_read_info(png_ptr, info_ptr);
+   if (info_ptr->height > PNG_UINT_32_MAX/png_sizeof(png_bytep))
+      png_error(png_ptr,"Image is too high to process with png_read_png()");
+
+   /* -------------- image transformations start here ------------------- */
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+   /* tell libpng to strip 16 bit/color files down to 8 bits per color
+    */
+   if (transforms & PNG_TRANSFORM_STRIP_16)
+       png_set_strip_16(png_ptr);
+#endif
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+   /* Strip alpha bytes from the input data without combining with
+    * the background (not recommended).
+    */
+   if (transforms & PNG_TRANSFORM_STRIP_ALPHA)
+       png_set_strip_alpha(png_ptr);
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) && !defined(PNG_READ_EXPAND_SUPPORTED)
+   /* Extract multiple pixels with bit depths of 1, 2, or 4 from a single
+    * byte into separate bytes (useful for paletted and grayscale images).
+    */
+   if (transforms & PNG_TRANSFORM_PACKING)
+       png_set_packing(png_ptr);
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+   /* Change the order of packed pixels to least significant bit first
+    * (not useful if you are using png_set_packing).
+    */
+   if (transforms & PNG_TRANSFORM_PACKSWAP)
+       png_set_packswap(png_ptr);
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   /* Expand paletted colors into true RGB triplets
+    * Expand grayscale images to full 8 bits from 1, 2, or 4 bits/pixel
+    * Expand paletted or RGB images with transparency to full alpha
+    * channels so the data will be available as RGBA quartets.
+    */
+   if (transforms & PNG_TRANSFORM_EXPAND)
+       if ((png_ptr->bit_depth < 8) ||
+           (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE) ||
+           (png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)))
+         png_set_expand(png_ptr);
+#endif
+
+   /* We don't handle background color or gamma transformation or dithering.
+    */
+
+#if defined(PNG_READ_INVERT_SUPPORTED)
+   /* invert monochrome files to have 0 as white and 1 as black
+    */
+   if (transforms & PNG_TRANSFORM_INVERT_MONO)
+       png_set_invert_mono(png_ptr);
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+   /* If you want to shift the pixel values from the range [0,255] or
+    * [0,65535] to the original [0,7] or [0,31], or whatever range the
+    * colors were originally in:
+    */
+   if ((transforms & PNG_TRANSFORM_SHIFT)
+       && png_get_valid(png_ptr, info_ptr, PNG_INFO_sBIT))
+   {
+      png_color_8p sig_bit;
+
+      png_get_sBIT(png_ptr, info_ptr, &sig_bit);
+      png_set_shift(png_ptr, sig_bit);
+   }
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED)
+   /* flip the RGB pixels to BGR (or RGBA to BGRA)
+    */
+   if (transforms & PNG_TRANSFORM_BGR)
+       png_set_bgr(png_ptr);
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+   /* swap the RGBA or GA data to ARGB or AG (or BGRA to ABGR)
+    */
+   if (transforms & PNG_TRANSFORM_SWAP_ALPHA)
+       png_set_swap_alpha(png_ptr);
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED)
+   /* swap bytes of 16 bit files to least significant byte first
+    */
+   if (transforms & PNG_TRANSFORM_SWAP_ENDIAN)
+       png_set_swap(png_ptr);
+#endif
+
+   /* We don't handle adding filler bytes */
+
+   /* Optional call to gamma correct and add the background to the palette
+    * and update info structure.  REQUIRED if you are expecting libpng to
+    * update the palette for you (i.e., you selected such a transform above).
+    */
+   png_read_update_info(png_ptr, info_ptr);
+
+   /* -------------- image transformations end here ------------------- */
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
+#endif
+   if(info_ptr->row_pointers == NULL)
+   {
+      info_ptr->row_pointers = (png_bytepp)png_malloc(png_ptr,
+         info_ptr->height * png_sizeof(png_bytep));
+#ifdef PNG_FREE_ME_SUPPORTED
+      info_ptr->free_me |= PNG_FREE_ROWS;
+#endif
+      for (row = 0; row < (int)info_ptr->height; row++)
+      {
+         info_ptr->row_pointers[row] = (png_bytep)png_malloc(png_ptr,
+            png_get_rowbytes(png_ptr, info_ptr));
+      }
+   }
+
+   png_read_image(png_ptr, info_ptr->row_pointers);
+   info_ptr->valid |= PNG_INFO_IDAT;
+
+   /* read rest of file, and get additional chunks in info_ptr - REQUIRED */
+   png_read_end(png_ptr, info_ptr);
+
+   transforms = transforms; /* quiet compiler warnings */
+   params = params;
+
+}
+#endif /* PNG_INFO_IMAGE_SUPPORTED */
+#endif /* PNG_NO_SEQUENTIAL_READ_SUPPORTED */
+#endif /* PNG_READ_SUPPORTED */
diff --git a/src/libpng/pngrio.c b/src/libpng/pngrio.c
new file mode 100644
index 0000000..7d2522f
--- /dev/null
+++ b/src/libpng/pngrio.c
@@ -0,0 +1,167 @@
+
+/* pngrio.c - functions for data input
+ *
+ * Last changed in libpng 1.2.13 November 13, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all input.  Users who need
+ * special handling are expected to write a function that has the same
+ * arguments as this and performs a similar function, but that possibly
+ * has a different input method.  Note that you shouldn't change this
+ * function, but rather write a replacement function and then make
+ * libpng use it at run time with png_set_read_fn(...).
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+/* Read the data from whatever input you are using.  The default routine
+   reads from a file pointer.  Note that this routine sometimes gets called
+   with very small lengths, so you should implement some kind of simple
+   buffering if you are using unbuffered reads.  This should never be asked
+   to read more then 64K on a 16 bit machine. */
+void /* PRIVATE */
+png_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_debug1(4,"reading %d bytes\n", (int)length);
+   if (png_ptr->read_data_fn != NULL)
+      (*(png_ptr->read_data_fn))(png_ptr, data, length);
+   else
+      png_error(png_ptr, "Call to NULL read function");
+}
+
+#if !defined(PNG_NO_STDIO)
+/* This is the function that does the actual reading of data.  If you are
+   not reading from a standard C stream, you should create a replacement
+   read_data function and use it at run time with png_set_read_fn(), rather
+   than changing the library. */
+#ifndef USE_FAR_KEYWORD
+void PNGAPI
+png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_size_t check;
+
+   if(png_ptr == NULL) return;
+   /* fread() returns 0 on error, so it is OK to store this in a png_size_t
+    * instead of an int, which is what fread() actually returns.
+    */
+#if defined(_WIN32_WCE)
+   if ( !ReadFile((HANDLE)(png_ptr->io_ptr), data, length, &check, NULL) )
+      check = 0;
+#else
+   check = (png_size_t)fread(data, (png_size_t)1, length,
+      (png_FILE_p)png_ptr->io_ptr);
+#endif
+
+   if (check != length)
+      png_error(png_ptr, "Read Error");
+}
+#else
+/* this is the model-independent version. Since the standard I/O library
+   can't handle far buffers in the medium and small models, we have to copy
+   the data.
+*/
+
+#define NEAR_BUF_SIZE 1024
+#define MIN(a,b) (a <= b ? a : b)
+
+static void PNGAPI
+png_default_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   int check;
+   png_byte *n_data;
+   png_FILE_p io_ptr;
+
+   if(png_ptr == NULL) return;
+   /* Check if data really is near. If so, use usual code. */
+   n_data = (png_byte *)CVT_PTR_NOCHECK(data);
+   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
+   if ((png_bytep)n_data == data)
+   {
+#if defined(_WIN32_WCE)
+      if ( !ReadFile((HANDLE)(png_ptr->io_ptr), data, length, &check, NULL) )
+         check = 0;
+#else
+      check = fread(n_data, 1, length, io_ptr);
+#endif
+   }
+   else
+   {
+      png_byte buf[NEAR_BUF_SIZE];
+      png_size_t read, remaining, err;
+      check = 0;
+      remaining = length;
+      do
+      {
+         read = MIN(NEAR_BUF_SIZE, remaining);
+#if defined(_WIN32_WCE)
+         if ( !ReadFile((HANDLE)(io_ptr), buf, read, &err, NULL) )
+            err = 0;
+#else
+         err = fread(buf, (png_size_t)1, read, io_ptr);
+#endif
+         png_memcpy(data, buf, read); /* copy far buffer to near buffer */
+         if(err != read)
+            break;
+         else
+            check += err;
+         data += read;
+         remaining -= read;
+      }
+      while (remaining != 0);
+   }
+   if ((png_uint_32)check != (png_uint_32)length)
+      png_error(png_ptr, "read Error");
+}
+#endif
+#endif
+
+/* This function allows the application to supply a new input function
+   for libpng if standard C streams aren't being used.
+
+   This function takes as its arguments:
+   png_ptr      - pointer to a png input data structure
+   io_ptr       - pointer to user supplied structure containing info about
+                  the input functions.  May be NULL.
+   read_data_fn - pointer to a new input function that takes as its
+                  arguments a pointer to a png_struct, a pointer to
+                  a location where input data can be stored, and a 32-bit
+                  unsigned int that is the number of bytes to be read.
+                  To exit and output any fatal error messages the new write
+                  function should call png_error(png_ptr, "Error msg"). */
+void PNGAPI
+png_set_read_fn(png_structp png_ptr, png_voidp io_ptr,
+   png_rw_ptr read_data_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->io_ptr = io_ptr;
+
+#if !defined(PNG_NO_STDIO)
+   if (read_data_fn != NULL)
+      png_ptr->read_data_fn = read_data_fn;
+   else
+      png_ptr->read_data_fn = png_default_read_data;
+#else
+   png_ptr->read_data_fn = read_data_fn;
+#endif
+
+   /* It is an error to write to a read device */
+   if (png_ptr->write_data_fn != NULL)
+   {
+      png_ptr->write_data_fn = NULL;
+      png_warning(png_ptr,
+         "It's an error to set both read_data_fn and write_data_fn in the ");
+      png_warning(png_ptr,
+         "same structure.  Resetting write_data_fn to NULL.");
+   }
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+   png_ptr->output_flush_fn = NULL;
+#endif
+}
+#endif /* PNG_READ_SUPPORTED */
diff --git a/src/libpng/pngrtran.c b/src/libpng/pngrtran.c
new file mode 100644
index 0000000..cda3921
--- /dev/null
+++ b/src/libpng/pngrtran.c
@@ -0,0 +1,4284 @@
+
+/* pngrtran.c - transforms the data in a row for PNG readers
+ *
+ * Last changed in libpng 1.2.22 [October 13, 2007]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file contains functions optionally called by an application
+ * in order to tell libpng how to handle data when reading a PNG.
+ * Transformations that are used in both reading and writing are
+ * in pngtrans.c.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+/* Set the action on getting a CRC error for an ancillary or critical chunk. */
+void PNGAPI
+png_set_crc_action(png_structp png_ptr, int crit_action, int ancil_action)
+{
+   png_debug(1, "in png_set_crc_action\n");
+   /* Tell libpng how we react to CRC errors in critical chunks */
+   if(png_ptr == NULL) return;
+   switch (crit_action)
+   {
+      case PNG_CRC_NO_CHANGE:                        /* leave setting as is */
+         break;
+      case PNG_CRC_WARN_USE:                               /* warn/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_CRITICAL_USE;
+         break;
+      case PNG_CRC_QUIET_USE:                             /* quiet/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_CRITICAL_USE |
+                           PNG_FLAG_CRC_CRITICAL_IGNORE;
+         break;
+      case PNG_CRC_WARN_DISCARD:    /* not a valid action for critical data */
+         png_warning(png_ptr, "Can't discard critical data on CRC error.");
+      case PNG_CRC_ERROR_QUIT:                                /* error/quit */
+      case PNG_CRC_DEFAULT:
+      default:
+         png_ptr->flags &= ~PNG_FLAG_CRC_CRITICAL_MASK;
+         break;
+   }
+
+   switch (ancil_action)
+   {
+      case PNG_CRC_NO_CHANGE:                       /* leave setting as is */
+         break;
+      case PNG_CRC_WARN_USE:                              /* warn/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_USE;
+         break;
+      case PNG_CRC_QUIET_USE:                            /* quiet/use data */
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_USE |
+                           PNG_FLAG_CRC_ANCILLARY_NOWARN;
+         break;
+      case PNG_CRC_ERROR_QUIT:                               /* error/quit */
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         png_ptr->flags |= PNG_FLAG_CRC_ANCILLARY_NOWARN;
+         break;
+      case PNG_CRC_WARN_DISCARD:                      /* warn/discard data */
+      case PNG_CRC_DEFAULT:
+      default:
+         png_ptr->flags &= ~PNG_FLAG_CRC_ANCILLARY_MASK;
+         break;
+   }
+}
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
+    defined(PNG_FLOATING_POINT_SUPPORTED)
+/* handle alpha and tRNS via a background color */
+void PNGAPI
+png_set_background(png_structp png_ptr,
+   png_color_16p background_color, int background_gamma_code,
+   int need_expand, double background_gamma)
+{
+   png_debug(1, "in png_set_background\n");
+   if(png_ptr == NULL) return;
+   if (background_gamma_code == PNG_BACKGROUND_GAMMA_UNKNOWN)
+   {
+      png_warning(png_ptr, "Application must supply a known background gamma");
+      return;
+   }
+
+   png_ptr->transformations |= PNG_BACKGROUND;
+   png_memcpy(&(png_ptr->background), background_color,
+      png_sizeof(png_color_16));
+   png_ptr->background_gamma = (float)background_gamma;
+   png_ptr->background_gamma_type = (png_byte)(background_gamma_code);
+   png_ptr->transformations |= (need_expand ? PNG_BACKGROUND_EXPAND : 0);
+}
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+/* strip 16 bit depth files to 8 bit depth */
+void PNGAPI
+png_set_strip_16(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_strip_16\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_16_TO_8;
+}
+#endif
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+void PNGAPI
+png_set_strip_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_strip_alpha\n");
+   if(png_ptr == NULL) return;
+   png_ptr->flags |= PNG_FLAG_STRIP_ALPHA;
+}
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+/* Dither file to 8 bit.  Supply a palette, the current number
+ * of elements in the palette, the maximum number of elements
+ * allowed, and a histogram if possible.  If the current number
+ * of colors is greater then the maximum number, the palette will be
+ * modified to fit in the maximum number.  "full_dither" indicates
+ * whether we need a dithering cube set up for RGB images, or if we
+ * simply are reducing the number of colors in a paletted image.
+ */
+
+typedef struct png_dsort_struct
+{
+   struct png_dsort_struct FAR * next;
+   png_byte left;
+   png_byte right;
+} png_dsort;
+typedef png_dsort FAR *       png_dsortp;
+typedef png_dsort FAR * FAR * png_dsortpp;
+
+void PNGAPI
+png_set_dither(png_structp png_ptr, png_colorp palette,
+   int num_palette, int maximum_colors, png_uint_16p histogram,
+   int full_dither)
+{
+   png_debug(1, "in png_set_dither\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_DITHER;
+
+   if (!full_dither)
+   {
+      int i;
+
+      png_ptr->dither_index = (png_bytep)png_malloc(png_ptr,
+         (png_uint_32)(num_palette * png_sizeof (png_byte)));
+      for (i = 0; i < num_palette; i++)
+         png_ptr->dither_index[i] = (png_byte)i;
+   }
+
+   if (num_palette > maximum_colors)
+   {
+      if (histogram != NULL)
+      {
+         /* This is easy enough, just throw out the least used colors.
+            Perhaps not the best solution, but good enough. */
+
+         int i;
+
+         /* initialize an array to sort colors */
+         png_ptr->dither_sort = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(num_palette * png_sizeof (png_byte)));
+
+         /* initialize the dither_sort array */
+         for (i = 0; i < num_palette; i++)
+            png_ptr->dither_sort[i] = (png_byte)i;
+
+         /* Find the least used palette entries by starting a
+            bubble sort, and running it until we have sorted
+            out enough colors.  Note that we don't care about
+            sorting all the colors, just finding which are
+            least used. */
+
+         for (i = num_palette - 1; i >= maximum_colors; i--)
+         {
+            int done; /* to stop early if the list is pre-sorted */
+            int j;
+
+            done = 1;
+            for (j = 0; j < i; j++)
+            {
+               if (histogram[png_ptr->dither_sort[j]]
+                   < histogram[png_ptr->dither_sort[j + 1]])
+               {
+                  png_byte t;
+
+                  t = png_ptr->dither_sort[j];
+                  png_ptr->dither_sort[j] = png_ptr->dither_sort[j + 1];
+                  png_ptr->dither_sort[j + 1] = t;
+                  done = 0;
+               }
+            }
+            if (done)
+               break;
+         }
+
+         /* swap the palette around, and set up a table, if necessary */
+         if (full_dither)
+         {
+            int j = num_palette;
+
+            /* put all the useful colors within the max, but don't
+               move the others */
+            for (i = 0; i < maximum_colors; i++)
+            {
+               if ((int)png_ptr->dither_sort[i] >= maximum_colors)
+               {
+                  do
+                     j--;
+                  while ((int)png_ptr->dither_sort[j] >= maximum_colors);
+                  palette[i] = palette[j];
+               }
+            }
+         }
+         else
+         {
+            int j = num_palette;
+
+            /* move all the used colors inside the max limit, and
+               develop a translation table */
+            for (i = 0; i < maximum_colors; i++)
+            {
+               /* only move the colors we need to */
+               if ((int)png_ptr->dither_sort[i] >= maximum_colors)
+               {
+                  png_color tmp_color;
+
+                  do
+                     j--;
+                  while ((int)png_ptr->dither_sort[j] >= maximum_colors);
+
+                  tmp_color = palette[j];
+                  palette[j] = palette[i];
+                  palette[i] = tmp_color;
+                  /* indicate where the color went */
+                  png_ptr->dither_index[j] = (png_byte)i;
+                  png_ptr->dither_index[i] = (png_byte)j;
+               }
+            }
+
+            /* find closest color for those colors we are not using */
+            for (i = 0; i < num_palette; i++)
+            {
+               if ((int)png_ptr->dither_index[i] >= maximum_colors)
+               {
+                  int min_d, k, min_k, d_index;
+
+                  /* find the closest color to one we threw out */
+                  d_index = png_ptr->dither_index[i];
+                  min_d = PNG_COLOR_DIST(palette[d_index], palette[0]);
+                  for (k = 1, min_k = 0; k < maximum_colors; k++)
+                  {
+                     int d;
+
+                     d = PNG_COLOR_DIST(palette[d_index], palette[k]);
+
+                     if (d < min_d)
+                     {
+                        min_d = d;
+                        min_k = k;
+                     }
+                  }
+                  /* point to closest color */
+                  png_ptr->dither_index[i] = (png_byte)min_k;
+               }
+            }
+         }
+         png_free(png_ptr, png_ptr->dither_sort);
+         png_ptr->dither_sort=NULL;
+      }
+      else
+      {
+         /* This is much harder to do simply (and quickly).  Perhaps
+            we need to go through a median cut routine, but those
+            don't always behave themselves with only a few colors
+            as input.  So we will just find the closest two colors,
+            and throw out one of them (chosen somewhat randomly).
+            [We don't understand this at all, so if someone wants to
+             work on improving it, be our guest - AED, GRP]
+            */
+         int i;
+         int max_d;
+         int num_new_palette;
+         png_dsortp t;
+         png_dsortpp hash;
+
+         t=NULL;
+
+         /* initialize palette index arrays */
+         png_ptr->index_to_palette = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(num_palette * png_sizeof (png_byte)));
+         png_ptr->palette_to_index = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(num_palette * png_sizeof (png_byte)));
+
+         /* initialize the sort array */
+         for (i = 0; i < num_palette; i++)
+         {
+            png_ptr->index_to_palette[i] = (png_byte)i;
+            png_ptr->palette_to_index[i] = (png_byte)i;
+         }
+
+         hash = (png_dsortpp)png_malloc(png_ptr, (png_uint_32)(769 *
+            png_sizeof (png_dsortp)));
+         for (i = 0; i < 769; i++)
+            hash[i] = NULL;
+/*         png_memset(hash, 0, 769 * png_sizeof (png_dsortp)); */
+
+         num_new_palette = num_palette;
+
+         /* initial wild guess at how far apart the farthest pixel
+            pair we will be eliminating will be.  Larger
+            numbers mean more areas will be allocated, Smaller
+            numbers run the risk of not saving enough data, and
+            having to do this all over again.
+
+            I have not done extensive checking on this number.
+            */
+         max_d = 96;
+
+         while (num_new_palette > maximum_colors)
+         {
+            for (i = 0; i < num_new_palette - 1; i++)
+            {
+               int j;
+
+               for (j = i + 1; j < num_new_palette; j++)
+               {
+                  int d;
+
+                  d = PNG_COLOR_DIST(palette[i], palette[j]);
+
+                  if (d <= max_d)
+                  {
+
+                     t = (png_dsortp)png_malloc_warn(png_ptr,
+                         (png_uint_32)(png_sizeof(png_dsort)));
+                     if (t == NULL)
+                         break;
+                     t->next = hash[d];
+                     t->left = (png_byte)i;
+                     t->right = (png_byte)j;
+                     hash[d] = t;
+                  }
+               }
+               if (t == NULL)
+                  break;
+            }
+
+            if (t != NULL)
+            for (i = 0; i <= max_d; i++)
+            {
+               if (hash[i] != NULL)
+               {
+                  png_dsortp p;
+
+                  for (p = hash[i]; p; p = p->next)
+                  {
+                     if ((int)png_ptr->index_to_palette[p->left]
+                        < num_new_palette &&
+                        (int)png_ptr->index_to_palette[p->right]
+                        < num_new_palette)
+                     {
+                        int j, next_j;
+
+                        if (num_new_palette & 0x01)
+                        {
+                           j = p->left;
+                           next_j = p->right;
+                        }
+                        else
+                        {
+                           j = p->right;
+                           next_j = p->left;
+                        }
+
+                        num_new_palette--;
+                        palette[png_ptr->index_to_palette[j]]
+                          = palette[num_new_palette];
+                        if (!full_dither)
+                        {
+                           int k;
+
+                           for (k = 0; k < num_palette; k++)
+                           {
+                              if (png_ptr->dither_index[k] ==
+                                 png_ptr->index_to_palette[j])
+                                 png_ptr->dither_index[k] =
+                                    png_ptr->index_to_palette[next_j];
+                              if ((int)png_ptr->dither_index[k] ==
+                                 num_new_palette)
+                                 png_ptr->dither_index[k] =
+                                    png_ptr->index_to_palette[j];
+                           }
+                        }
+
+                        png_ptr->index_to_palette[png_ptr->palette_to_index
+                           [num_new_palette]] = png_ptr->index_to_palette[j];
+                        png_ptr->palette_to_index[png_ptr->index_to_palette[j]]
+                           = png_ptr->palette_to_index[num_new_palette];
+
+                        png_ptr->index_to_palette[j] = (png_byte)num_new_palette;
+                        png_ptr->palette_to_index[num_new_palette] = (png_byte)j;
+                     }
+                     if (num_new_palette <= maximum_colors)
+                        break;
+                  }
+                  if (num_new_palette <= maximum_colors)
+                     break;
+               }
+            }
+
+            for (i = 0; i < 769; i++)
+            {
+               if (hash[i] != NULL)
+               {
+                  png_dsortp p = hash[i];
+                  while (p)
+                  {
+                     t = p->next;
+                     png_free(png_ptr, p);
+                     p = t;
+                  }
+               }
+               hash[i] = 0;
+            }
+            max_d += 96;
+         }
+         png_free(png_ptr, hash);
+         png_free(png_ptr, png_ptr->palette_to_index);
+         png_free(png_ptr, png_ptr->index_to_palette);
+         png_ptr->palette_to_index=NULL;
+         png_ptr->index_to_palette=NULL;
+      }
+      num_palette = maximum_colors;
+   }
+   if (png_ptr->palette == NULL)
+   {
+      png_ptr->palette = palette;
+   }
+   png_ptr->num_palette = (png_uint_16)num_palette;
+
+   if (full_dither)
+   {
+      int i;
+      png_bytep distance;
+      int total_bits = PNG_DITHER_RED_BITS + PNG_DITHER_GREEN_BITS +
+         PNG_DITHER_BLUE_BITS;
+      int num_red = (1 << PNG_DITHER_RED_BITS);
+      int num_green = (1 << PNG_DITHER_GREEN_BITS);
+      int num_blue = (1 << PNG_DITHER_BLUE_BITS);
+      png_size_t num_entries = ((png_size_t)1 << total_bits);
+
+      png_ptr->palette_lookup = (png_bytep )png_malloc(png_ptr,
+         (png_uint_32)(num_entries * png_sizeof (png_byte)));
+
+      png_memset(png_ptr->palette_lookup, 0, num_entries *
+         png_sizeof (png_byte));
+
+      distance = (png_bytep)png_malloc(png_ptr, (png_uint_32)(num_entries *
+         png_sizeof(png_byte)));
+
+      png_memset(distance, 0xff, num_entries * png_sizeof(png_byte));
+
+      for (i = 0; i < num_palette; i++)
+      {
+         int ir, ig, ib;
+         int r = (palette[i].red >> (8 - PNG_DITHER_RED_BITS));
+         int g = (palette[i].green >> (8 - PNG_DITHER_GREEN_BITS));
+         int b = (palette[i].blue >> (8 - PNG_DITHER_BLUE_BITS));
+
+         for (ir = 0; ir < num_red; ir++)
+         {
+            /* int dr = abs(ir - r); */
+            int dr = ((ir > r) ? ir - r : r - ir);
+            int index_r = (ir << (PNG_DITHER_BLUE_BITS + PNG_DITHER_GREEN_BITS));
+
+            for (ig = 0; ig < num_green; ig++)
+            {
+               /* int dg = abs(ig - g); */
+               int dg = ((ig > g) ? ig - g : g - ig);
+               int dt = dr + dg;
+               int dm = ((dr > dg) ? dr : dg);
+               int index_g = index_r | (ig << PNG_DITHER_BLUE_BITS);
+
+               for (ib = 0; ib < num_blue; ib++)
+               {
+                  int d_index = index_g | ib;
+                  /* int db = abs(ib - b); */
+                  int db = ((ib > b) ? ib - b : b - ib);
+                  int dmax = ((dm > db) ? dm : db);
+                  int d = dmax + dt + db;
+
+                  if (d < (int)distance[d_index])
+                  {
+                     distance[d_index] = (png_byte)d;
+                     png_ptr->palette_lookup[d_index] = (png_byte)i;
+                  }
+               }
+            }
+         }
+      }
+
+      png_free(png_ptr, distance);
+   }
+}
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+/* Transform the image from the file_gamma to the screen_gamma.  We
+ * only do transformations on images where the file_gamma and screen_gamma
+ * are not close reciprocals, otherwise it slows things down slightly, and
+ * also needlessly introduces small errors.
+ *
+ * We will turn off gamma transformation later if no semitransparent entries
+ * are present in the tRNS array for palette images.  We can't do it here
+ * because we don't necessarily have the tRNS chunk yet.
+ */
+void PNGAPI
+png_set_gamma(png_structp png_ptr, double scrn_gamma, double file_gamma)
+{
+   png_debug(1, "in png_set_gamma\n");
+   if(png_ptr == NULL) return;
+   if ((fabs(scrn_gamma * file_gamma - 1.0) > PNG_GAMMA_THRESHOLD) ||
+       (png_ptr->color_type & PNG_COLOR_MASK_ALPHA) ||
+       (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE))
+     png_ptr->transformations |= PNG_GAMMA;
+   png_ptr->gamma = (float)file_gamma;
+   png_ptr->screen_gamma = (float)scrn_gamma;
+}
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+/* Expand paletted images to RGB, expand grayscale images of
+ * less than 8-bit depth to 8-bit depth, and expand tRNS chunks
+ * to alpha channels.
+ */
+void PNGAPI
+png_set_expand(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_expand\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+#endif
+}
+
+/* GRR 19990627:  the following three functions currently are identical
+ *  to png_set_expand().  However, it is entirely reasonable that someone
+ *  might wish to expand an indexed image to RGB but *not* expand a single,
+ *  fully transparent palette entry to a full alpha channel--perhaps instead
+ *  convert tRNS to the grayscale/RGB format (16-bit RGB value), or replace
+ *  the transparent color with a particular RGB value, or drop tRNS entirely.
+ *  IOW, a future version of the library may make the transformations flag
+ *  a bit more fine-grained, with separate bits for each of these three
+ *  functions.
+ *
+ *  More to the point, these functions make it obvious what libpng will be
+ *  doing, whereas "expand" can (and does) mean any number of things.
+ *
+ *  GRP 20060307: In libpng-1.4.0, png_set_gray_1_2_4_to_8() was modified
+ *  to expand only the sample depth but not to expand the tRNS to alpha.
+ */
+
+/* Expand paletted images to RGB. */
+void PNGAPI
+png_set_palette_to_rgb(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_palette_to_rgb\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+#endif
+}
+
+#if !defined(PNG_1_0_X)
+/* Expand grayscale images of less than 8-bit depth to 8 bits. */
+void PNGAPI
+png_set_expand_gray_1_2_4_to_8(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_expand_gray_1_2_4_to_8\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_EXPAND;
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+#endif
+}
+#endif
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* Expand grayscale images of less than 8-bit depth to 8 bits. */
+/* Deprecated as of libpng-1.2.9 */
+void PNGAPI
+png_set_gray_1_2_4_to_8(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_gray_1_2_4_to_8\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+}
+#endif
+
+
+/* Expand tRNS chunks to alpha channels. */
+void PNGAPI
+png_set_tRNS_to_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_tRNS_to_alpha\n");
+   png_ptr->transformations |= (PNG_EXPAND | PNG_EXPAND_tRNS);
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+#endif
+}
+#endif /* defined(PNG_READ_EXPAND_SUPPORTED) */
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+void PNGAPI
+png_set_gray_to_rgb(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_gray_to_rgb\n");
+   png_ptr->transformations |= PNG_GRAY_TO_RGB;
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   png_ptr->flags &= ~PNG_FLAG_ROW_INIT;
+#endif
+}
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+#if defined(PNG_FLOATING_POINT_SUPPORTED)
+/* Convert a RGB image to a grayscale of the same width.  This allows us,
+ * for example, to convert a 24 bpp RGB image into an 8 bpp grayscale image.
+ */
+
+void PNGAPI
+png_set_rgb_to_gray(png_structp png_ptr, int error_action, double red,
+   double green)
+{
+      int red_fixed = (int)((float)red*100000.0 + 0.5);
+      int green_fixed = (int)((float)green*100000.0 + 0.5);
+      if(png_ptr == NULL) return;
+      png_set_rgb_to_gray_fixed(png_ptr, error_action, red_fixed, green_fixed);
+}
+#endif
+
+void PNGAPI
+png_set_rgb_to_gray_fixed(png_structp png_ptr, int error_action,
+   png_fixed_point red, png_fixed_point green)
+{
+   png_debug(1, "in png_set_rgb_to_gray\n");
+   if(png_ptr == NULL) return;
+   switch(error_action)
+   {
+      case 1: png_ptr->transformations |= PNG_RGB_TO_GRAY;
+              break;
+      case 2: png_ptr->transformations |= PNG_RGB_TO_GRAY_WARN;
+              break;
+      case 3: png_ptr->transformations |= PNG_RGB_TO_GRAY_ERR;
+   }
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+      png_ptr->transformations |= PNG_EXPAND;
+#else
+   {
+      png_warning(png_ptr, "Cannot do RGB_TO_GRAY without EXPAND_SUPPORTED.");
+      png_ptr->transformations &= ~PNG_RGB_TO_GRAY;
+   }
+#endif
+   {
+      png_uint_16 red_int, green_int;
+      if(red < 0 || green < 0)
+      {
+         red_int   =  6968; /* .212671 * 32768 + .5 */
+         green_int = 23434; /* .715160 * 32768 + .5 */
+      }
+      else if(red + green < 100000L)
+      {
+        red_int = (png_uint_16)(((png_uint_32)red*32768L)/100000L);
+        green_int = (png_uint_16)(((png_uint_32)green*32768L)/100000L);
+      }
+      else
+      {
+         png_warning(png_ptr, "ignoring out of range rgb_to_gray coefficients");
+         red_int   =  6968;
+         green_int = 23434;
+      }
+      png_ptr->rgb_to_gray_red_coeff   = red_int;
+      png_ptr->rgb_to_gray_green_coeff = green_int;
+      png_ptr->rgb_to_gray_blue_coeff  = (png_uint_16)(32768-red_int-green_int);
+   }
+}
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+void PNGAPI
+png_set_read_user_transform_fn(png_structp png_ptr, png_user_transform_ptr
+   read_user_transform_fn)
+{
+   png_debug(1, "in png_set_read_user_transform_fn\n");
+   if(png_ptr == NULL) return;
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   png_ptr->transformations |= PNG_USER_TRANSFORM;
+   png_ptr->read_user_transform_fn = read_user_transform_fn;
+#endif
+#ifdef PNG_LEGACY_SUPPORTED
+   if(read_user_transform_fn)
+      png_warning(png_ptr,
+        "This version of libpng does not support user transforms");
+#endif
+}
+#endif
+
+/* Initialize everything needed for the read.  This includes modifying
+ * the palette.
+ */
+void /* PRIVATE */
+png_init_read_transformations(png_structp png_ptr)
+{
+   png_debug(1, "in png_init_read_transformations\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if(png_ptr != NULL)
+#endif
+  {
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || defined(PNG_READ_SHIFT_SUPPORTED) \
+ || defined(PNG_READ_GAMMA_SUPPORTED)
+   int color_type = png_ptr->color_type;
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED) && defined(PNG_READ_BACKGROUND_SUPPORTED)
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   /* Detect gray background and attempt to enable optimization
+    * for gray --> RGB case */
+   /* Note:  if PNG_BACKGROUND_EXPAND is set and color_type is either RGB or
+    * RGB_ALPHA (in which case need_expand is superfluous anyway), the
+    * background color might actually be gray yet not be flagged as such.
+    * This is not a problem for the current code, which uses
+    * PNG_BACKGROUND_IS_GRAY only to decide when to do the
+    * png_do_gray_to_rgb() transformation.
+    */
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
+       !(color_type & PNG_COLOR_MASK_COLOR))
+   {
+          png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
+   } else if ((png_ptr->transformations & PNG_BACKGROUND) &&
+              !(png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
+              (png_ptr->transformations & PNG_GRAY_TO_RGB) &&
+              png_ptr->background.red == png_ptr->background.green &&
+              png_ptr->background.red == png_ptr->background.blue)
+   {
+          png_ptr->mode |= PNG_BACKGROUND_IS_GRAY;
+          png_ptr->background.gray = png_ptr->background.red;
+   }
+#endif
+
+   if ((png_ptr->transformations & PNG_BACKGROUND_EXPAND) &&
+       (png_ptr->transformations & PNG_EXPAND))
+   {
+      if (!(color_type & PNG_COLOR_MASK_COLOR))  /* i.e., GRAY or GRAY_ALPHA */
+      {
+         /* expand background and tRNS chunks */
+         switch (png_ptr->bit_depth)
+         {
+            case 1:
+               png_ptr->background.gray *= (png_uint_16)0xff;
+               png_ptr->background.red = png_ptr->background.green
+                 =  png_ptr->background.blue = png_ptr->background.gray;
+               if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+               {
+                 png_ptr->trans_values.gray *= (png_uint_16)0xff;
+                 png_ptr->trans_values.red = png_ptr->trans_values.green
+                   = png_ptr->trans_values.blue = png_ptr->trans_values.gray;
+               }
+               break;
+            case 2:
+               png_ptr->background.gray *= (png_uint_16)0x55;
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+               if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+               {
+                 png_ptr->trans_values.gray *= (png_uint_16)0x55;
+                 png_ptr->trans_values.red = png_ptr->trans_values.green
+                   = png_ptr->trans_values.blue = png_ptr->trans_values.gray;
+               }
+               break;
+            case 4:
+               png_ptr->background.gray *= (png_uint_16)0x11;
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+               if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+               {
+                 png_ptr->trans_values.gray *= (png_uint_16)0x11;
+                 png_ptr->trans_values.red = png_ptr->trans_values.green
+                   = png_ptr->trans_values.blue = png_ptr->trans_values.gray;
+               }
+               break;
+            case 8:
+            case 16:
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+               break;
+         }
+      }
+      else if (color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_ptr->background.red   =
+            png_ptr->palette[png_ptr->background.index].red;
+         png_ptr->background.green =
+            png_ptr->palette[png_ptr->background.index].green;
+         png_ptr->background.blue  =
+            png_ptr->palette[png_ptr->background.index].blue;
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+        if (png_ptr->transformations & PNG_INVERT_ALPHA)
+        {
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+           if (!(png_ptr->transformations & PNG_EXPAND_tRNS))
+#endif
+           {
+           /* invert the alpha channel (in tRNS) unless the pixels are
+              going to be expanded, in which case leave it for later */
+              int i,istop;
+              istop=(int)png_ptr->num_trans;
+              for (i=0; i<istop; i++)
+                 png_ptr->trans[i] = (png_byte)(255 - png_ptr->trans[i]);
+           }
+        }
+#endif
+
+      }
+   }
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+   png_ptr->background_1 = png_ptr->background;
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+
+   if ((color_type == PNG_COLOR_TYPE_PALETTE && png_ptr->num_trans != 0)
+       && (fabs(png_ptr->screen_gamma * png_ptr->gamma - 1.0)
+         < PNG_GAMMA_THRESHOLD))
+   {
+    int i,k;
+    k=0;
+    for (i=0; i<png_ptr->num_trans; i++)
+    {
+      if (png_ptr->trans[i] != 0 && png_ptr->trans[i] != 0xff)
+        k=1; /* partial transparency is present */
+    }
+    if (k == 0)
+      png_ptr->transformations &= ~PNG_GAMMA;
+   }
+
+   if ((png_ptr->transformations & (PNG_GAMMA | PNG_RGB_TO_GRAY)) &&
+        png_ptr->gamma != 0.0)
+   {
+      png_build_gamma_table(png_ptr);
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+      if (png_ptr->transformations & PNG_BACKGROUND)
+      {
+         if (color_type == PNG_COLOR_TYPE_PALETTE)
+         {
+           /* could skip if no transparency and
+           */
+            png_color back, back_1;
+            png_colorp palette = png_ptr->palette;
+            int num_palette = png_ptr->num_palette;
+            int i;
+            if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_FILE)
+            {
+               back.red = png_ptr->gamma_table[png_ptr->background.red];
+               back.green = png_ptr->gamma_table[png_ptr->background.green];
+               back.blue = png_ptr->gamma_table[png_ptr->background.blue];
+
+               back_1.red = png_ptr->gamma_to_1[png_ptr->background.red];
+               back_1.green = png_ptr->gamma_to_1[png_ptr->background.green];
+               back_1.blue = png_ptr->gamma_to_1[png_ptr->background.blue];
+            }
+            else
+            {
+               double g, gs;
+
+               switch (png_ptr->background_gamma_type)
+               {
+                  case PNG_BACKGROUND_GAMMA_SCREEN:
+                     g = (png_ptr->screen_gamma);
+                     gs = 1.0;
+                     break;
+                  case PNG_BACKGROUND_GAMMA_FILE:
+                     g = 1.0 / (png_ptr->gamma);
+                     gs = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+                     break;
+                  case PNG_BACKGROUND_GAMMA_UNIQUE:
+                     g = 1.0 / (png_ptr->background_gamma);
+                     gs = 1.0 / (png_ptr->background_gamma *
+                                 png_ptr->screen_gamma);
+                     break;
+                  default:
+                     g = 1.0;    /* back_1 */
+                     gs = 1.0;   /* back */
+               }
+
+               if ( fabs(gs - 1.0) < PNG_GAMMA_THRESHOLD)
+               {
+                  back.red   = (png_byte)png_ptr->background.red;
+                  back.green = (png_byte)png_ptr->background.green;
+                  back.blue  = (png_byte)png_ptr->background.blue;
+               }
+               else
+               {
+                  back.red = (png_byte)(pow(
+                     (double)png_ptr->background.red/255, gs) * 255.0 + .5);
+                  back.green = (png_byte)(pow(
+                     (double)png_ptr->background.green/255, gs) * 255.0 + .5);
+                  back.blue = (png_byte)(pow(
+                     (double)png_ptr->background.blue/255, gs) * 255.0 + .5);
+               }
+
+               back_1.red = (png_byte)(pow(
+                  (double)png_ptr->background.red/255, g) * 255.0 + .5);
+               back_1.green = (png_byte)(pow(
+                  (double)png_ptr->background.green/255, g) * 255.0 + .5);
+               back_1.blue = (png_byte)(pow(
+                  (double)png_ptr->background.blue/255, g) * 255.0 + .5);
+            }
+            for (i = 0; i < num_palette; i++)
+            {
+               if (i < (int)png_ptr->num_trans && png_ptr->trans[i] != 0xff)
+               {
+                  if (png_ptr->trans[i] == 0)
+                  {
+                     palette[i] = back;
+                  }
+                  else /* if (png_ptr->trans[i] != 0xff) */
+                  {
+                     png_byte v, w;
+
+                     v = png_ptr->gamma_to_1[palette[i].red];
+                     png_composite(w, v, png_ptr->trans[i], back_1.red);
+                     palette[i].red = png_ptr->gamma_from_1[w];
+
+                     v = png_ptr->gamma_to_1[palette[i].green];
+                     png_composite(w, v, png_ptr->trans[i], back_1.green);
+                     palette[i].green = png_ptr->gamma_from_1[w];
+
+                     v = png_ptr->gamma_to_1[palette[i].blue];
+                     png_composite(w, v, png_ptr->trans[i], back_1.blue);
+                     palette[i].blue = png_ptr->gamma_from_1[w];
+                  }
+               }
+               else
+               {
+                  palette[i].red = png_ptr->gamma_table[palette[i].red];
+                  palette[i].green = png_ptr->gamma_table[palette[i].green];
+                  palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+               }
+            }
+         }
+         /* if (png_ptr->background_gamma_type!=PNG_BACKGROUND_GAMMA_UNKNOWN) */
+         else
+         /* color_type != PNG_COLOR_TYPE_PALETTE */
+         {
+            double m = (double)(((png_uint_32)1 << png_ptr->bit_depth) - 1);
+            double g = 1.0;
+            double gs = 1.0;
+
+            switch (png_ptr->background_gamma_type)
+            {
+               case PNG_BACKGROUND_GAMMA_SCREEN:
+                  g = (png_ptr->screen_gamma);
+                  gs = 1.0;
+                  break;
+               case PNG_BACKGROUND_GAMMA_FILE:
+                  g = 1.0 / (png_ptr->gamma);
+                  gs = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+                  break;
+               case PNG_BACKGROUND_GAMMA_UNIQUE:
+                  g = 1.0 / (png_ptr->background_gamma);
+                  gs = 1.0 / (png_ptr->background_gamma *
+                     png_ptr->screen_gamma);
+                  break;
+            }
+
+            png_ptr->background_1.gray = (png_uint_16)(pow(
+               (double)png_ptr->background.gray / m, g) * m + .5);
+            png_ptr->background.gray = (png_uint_16)(pow(
+               (double)png_ptr->background.gray / m, gs) * m + .5);
+
+            if ((png_ptr->background.red != png_ptr->background.green) ||
+                (png_ptr->background.red != png_ptr->background.blue) ||
+                (png_ptr->background.red != png_ptr->background.gray))
+            {
+               /* RGB or RGBA with color background */
+               png_ptr->background_1.red = (png_uint_16)(pow(
+                  (double)png_ptr->background.red / m, g) * m + .5);
+               png_ptr->background_1.green = (png_uint_16)(pow(
+                  (double)png_ptr->background.green / m, g) * m + .5);
+               png_ptr->background_1.blue = (png_uint_16)(pow(
+                  (double)png_ptr->background.blue / m, g) * m + .5);
+               png_ptr->background.red = (png_uint_16)(pow(
+                  (double)png_ptr->background.red / m, gs) * m + .5);
+               png_ptr->background.green = (png_uint_16)(pow(
+                  (double)png_ptr->background.green / m, gs) * m + .5);
+               png_ptr->background.blue = (png_uint_16)(pow(
+                  (double)png_ptr->background.blue / m, gs) * m + .5);
+            }
+            else
+            {
+               /* GRAY, GRAY ALPHA, RGB, or RGBA with gray background */
+               png_ptr->background_1.red = png_ptr->background_1.green
+                 = png_ptr->background_1.blue = png_ptr->background_1.gray;
+               png_ptr->background.red = png_ptr->background.green
+                 = png_ptr->background.blue = png_ptr->background.gray;
+            }
+         }
+      }
+      else
+      /* transformation does not include PNG_BACKGROUND */
+#endif /* PNG_READ_BACKGROUND_SUPPORTED */
+      if (color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_colorp palette = png_ptr->palette;
+         int num_palette = png_ptr->num_palette;
+         int i;
+
+         for (i = 0; i < num_palette; i++)
+         {
+            palette[i].red = png_ptr->gamma_table[palette[i].red];
+            palette[i].green = png_ptr->gamma_table[palette[i].green];
+            palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+         }
+      }
+   }
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   else
+#endif
+#endif /* PNG_READ_GAMMA_SUPPORTED && PNG_FLOATING_POINT_SUPPORTED */
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   /* No GAMMA transformation */
+   if ((png_ptr->transformations & PNG_BACKGROUND) &&
+       (color_type == PNG_COLOR_TYPE_PALETTE))
+   {
+      int i;
+      int istop = (int)png_ptr->num_trans;
+      png_color back;
+      png_colorp palette = png_ptr->palette;
+
+      back.red   = (png_byte)png_ptr->background.red;
+      back.green = (png_byte)png_ptr->background.green;
+      back.blue  = (png_byte)png_ptr->background.blue;
+
+      for (i = 0; i < istop; i++)
+      {
+         if (png_ptr->trans[i] == 0)
+         {
+            palette[i] = back;
+         }
+         else if (png_ptr->trans[i] != 0xff)
+         {
+            /* The png_composite() macro is defined in png.h */
+            png_composite(palette[i].red, palette[i].red,
+               png_ptr->trans[i], back.red);
+            png_composite(palette[i].green, palette[i].green,
+               png_ptr->trans[i], back.green);
+            png_composite(palette[i].blue, palette[i].blue,
+               png_ptr->trans[i], back.blue);
+         }
+      }
+   }
+#endif /* PNG_READ_BACKGROUND_SUPPORTED */
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+   if ((png_ptr->transformations & PNG_SHIFT) &&
+      (color_type == PNG_COLOR_TYPE_PALETTE))
+   {
+      png_uint_16 i;
+      png_uint_16 istop = png_ptr->num_palette;
+      int sr = 8 - png_ptr->sig_bit.red;
+      int sg = 8 - png_ptr->sig_bit.green;
+      int sb = 8 - png_ptr->sig_bit.blue;
+
+      if (sr < 0 || sr > 8)
+         sr = 0;
+      if (sg < 0 || sg > 8)
+         sg = 0;
+      if (sb < 0 || sb > 8)
+         sb = 0;
+      for (i = 0; i < istop; i++)
+      {
+         png_ptr->palette[i].red >>= sr;
+         png_ptr->palette[i].green >>= sg;
+         png_ptr->palette[i].blue >>= sb;
+      }
+   }
+#endif  /* PNG_READ_SHIFT_SUPPORTED */
+ }
+#if !defined(PNG_READ_GAMMA_SUPPORTED) && !defined(PNG_READ_SHIFT_SUPPORTED) \
+ && !defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if(png_ptr)
+      return;
+#endif
+}
+
+/* Modify the info structure to reflect the transformations.  The
+ * info should be updated so a PNG file could be written with it,
+ * assuming the transformations result in valid PNG data.
+ */
+void /* PRIVATE */
+png_read_transform_info(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_read_transform_info\n");
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   if (png_ptr->transformations & PNG_EXPAND)
+   {
+      if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (png_ptr->num_trans && (png_ptr->transformations & PNG_EXPAND_tRNS))
+            info_ptr->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+         else
+            info_ptr->color_type = PNG_COLOR_TYPE_RGB;
+         info_ptr->bit_depth = 8;
+         info_ptr->num_trans = 0;
+      }
+      else
+      {
+         if (png_ptr->num_trans)
+         {
+            if (png_ptr->transformations & PNG_EXPAND_tRNS)
+              info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
+            else
+              info_ptr->color_type |= PNG_COLOR_MASK_COLOR;
+         }
+         if (info_ptr->bit_depth < 8)
+            info_ptr->bit_depth = 8;
+         info_ptr->num_trans = 0;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if (png_ptr->transformations & PNG_BACKGROUND)
+   {
+      info_ptr->color_type &= ~PNG_COLOR_MASK_ALPHA;
+      info_ptr->num_trans = 0;
+      info_ptr->background = png_ptr->background;
+   }
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if (png_ptr->transformations & PNG_GAMMA)
+   {
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      info_ptr->gamma = png_ptr->gamma;
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      info_ptr->int_gamma = png_ptr->int_gamma;
+#endif
+   }
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+   if ((png_ptr->transformations & PNG_16_TO_8) && (info_ptr->bit_depth == 16))
+      info_ptr->bit_depth = 8;
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   if (png_ptr->transformations & PNG_GRAY_TO_RGB)
+      info_ptr->color_type |= PNG_COLOR_MASK_COLOR;
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+      info_ptr->color_type &= ~PNG_COLOR_MASK_COLOR;
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   if (png_ptr->transformations & PNG_DITHER)
+   {
+      if (((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
+         (info_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)) &&
+         png_ptr->palette_lookup && info_ptr->bit_depth == 8)
+      {
+         info_ptr->color_type = PNG_COLOR_TYPE_PALETTE;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+   if ((png_ptr->transformations & PNG_PACK) && (info_ptr->bit_depth < 8))
+      info_ptr->bit_depth = 8;
+#endif
+
+   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      info_ptr->channels = 1;
+   else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR)
+      info_ptr->channels = 3;
+   else
+      info_ptr->channels = 1;
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_STRIP_ALPHA)
+      info_ptr->color_type &= ~PNG_COLOR_MASK_ALPHA;
+#endif
+
+   if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA)
+      info_ptr->channels++;
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+   /* STRIP_ALPHA and FILLER allowed:  MASK_ALPHA bit stripped above */
+   if ((png_ptr->transformations & PNG_FILLER) &&
+       ((info_ptr->color_type == PNG_COLOR_TYPE_RGB) ||
+       (info_ptr->color_type == PNG_COLOR_TYPE_GRAY)))
+   {
+      info_ptr->channels++;
+      /* if adding a true alpha channel not just filler */
+#if !defined(PNG_1_0_X)
+      if (png_ptr->transformations & PNG_ADD_ALPHA)
+        info_ptr->color_type |= PNG_COLOR_MASK_ALPHA;
+#endif
+   }
+#endif
+
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) && \
+defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   if(png_ptr->transformations & PNG_USER_TRANSFORM)
+     {
+       if(info_ptr->bit_depth < png_ptr->user_transform_depth)
+         info_ptr->bit_depth = png_ptr->user_transform_depth;
+       if(info_ptr->channels < png_ptr->user_transform_channels)
+         info_ptr->channels = png_ptr->user_transform_channels;
+     }
+#endif
+
+   info_ptr->pixel_depth = (png_byte)(info_ptr->channels *
+      info_ptr->bit_depth);
+
+   info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth,info_ptr->width);
+
+#if !defined(PNG_READ_EXPAND_SUPPORTED)
+   if(png_ptr)
+      return;
+#endif
+}
+
+/* Transform the row.  The order of transformations is significant,
+ * and is very touchy.  If you add a transformation, take care to
+ * decide how it fits in with the other transformations here.
+ */
+void /* PRIVATE */
+png_do_read_transformations(png_structp png_ptr)
+{
+   png_debug(1, "in png_do_read_transformations\n");
+   if (png_ptr->row_buf == NULL)
+   {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+      char msg[50];
+
+      png_snprintf2(msg, 50,
+         "NULL row buffer for row %ld, pass %d", png_ptr->row_number,
+         png_ptr->pass);
+      png_error(png_ptr, msg);
+#else
+      png_error(png_ptr, "NULL row buffer");
+#endif
+   }
+#ifdef PNG_WARN_UNINITIALIZED_ROW
+   if (!(png_ptr->flags & PNG_FLAG_ROW_INIT))
+      /* Application has failed to call either png_read_start_image()
+       * or png_read_update_info() after setting transforms that expand
+       * pixels.  This check added to libpng-1.2.19 */
+#if (PNG_WARN_UNINITIALIZED_ROW==1)
+      png_error(png_ptr, "Uninitialized row");
+#else
+      png_warning(png_ptr, "Uninitialized row");
+#endif
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   if (png_ptr->transformations & PNG_EXPAND)
+   {
+      if (png_ptr->row_info.color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_do_expand_palette(&(png_ptr->row_info), png_ptr->row_buf + 1,
+            png_ptr->palette, png_ptr->trans, png_ptr->num_trans);
+      }
+      else
+      {
+         if (png_ptr->num_trans &&
+             (png_ptr->transformations & PNG_EXPAND_tRNS))
+            png_do_expand(&(png_ptr->row_info), png_ptr->row_buf + 1,
+               &(png_ptr->trans_values));
+         else
+            png_do_expand(&(png_ptr->row_info), png_ptr->row_buf + 1,
+               NULL);
+      }
+   }
+#endif
+
+#if defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+   if (png_ptr->flags & PNG_FLAG_STRIP_ALPHA)
+      png_do_strip_filler(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         PNG_FLAG_FILLER_AFTER | (png_ptr->flags & PNG_FLAG_STRIP_ALPHA));
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+   if (png_ptr->transformations & PNG_RGB_TO_GRAY)
+   {
+      int rgb_error =
+         png_do_rgb_to_gray(png_ptr, &(png_ptr->row_info), png_ptr->row_buf + 1);
+      if(rgb_error)
+      {
+         png_ptr->rgb_to_gray_status=1;
+         if((png_ptr->transformations & PNG_RGB_TO_GRAY) == 
+             PNG_RGB_TO_GRAY_WARN)
+            png_warning(png_ptr, "png_do_rgb_to_gray found nongray pixel");
+         if((png_ptr->transformations & PNG_RGB_TO_GRAY) ==
+             PNG_RGB_TO_GRAY_ERR)
+            png_error(png_ptr, "png_do_rgb_to_gray found nongray pixel");
+      }
+   }
+#endif
+
+/*
+From Andreas Dilger e-mail to png-implement, 26 March 1998:
+
+  In most cases, the "simple transparency" should be done prior to doing
+  gray-to-RGB, or you will have to test 3x as many bytes to check if a
+  pixel is transparent.  You would also need to make sure that the
+  transparency information is upgraded to RGB.
+
+  To summarize, the current flow is:
+  - Gray + simple transparency -> compare 1 or 2 gray bytes and composite
+                                  with background "in place" if transparent,
+                                  convert to RGB if necessary
+  - Gray + alpha -> composite with gray background and remove alpha bytes,
+                                  convert to RGB if necessary
+
+  To support RGB backgrounds for gray images we need:
+  - Gray + simple transparency -> convert to RGB + simple transparency, compare
+                                  3 or 6 bytes and composite with background
+                                  "in place" if transparent (3x compare/pixel
+                                  compared to doing composite with gray bkgrnd)
+  - Gray + alpha -> convert to RGB + alpha, composite with background and
+                                  remove alpha bytes (3x float operations/pixel
+                                  compared with composite on gray background)
+
+  Greg's change will do this.  The reason it wasn't done before is for
+  performance, as this increases the per-pixel operations.  If we would check
+  in advance if the background was gray or RGB, and position the gray-to-RGB
+  transform appropriately, then it would save a lot of work/time.
+ */
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   /* if gray -> RGB, do so now only if background is non-gray; else do later
+    * for performance reasons */
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) &&
+       !(png_ptr->mode & PNG_BACKGROUND_IS_GRAY))
+      png_do_gray_to_rgb(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if ((png_ptr->transformations & PNG_BACKGROUND) &&
+      ((png_ptr->num_trans != 0 ) ||
+      (png_ptr->color_type & PNG_COLOR_MASK_ALPHA)))
+      png_do_background(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         &(png_ptr->trans_values), &(png_ptr->background)
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+         , &(png_ptr->background_1),
+         png_ptr->gamma_table, png_ptr->gamma_from_1,
+         png_ptr->gamma_to_1, png_ptr->gamma_16_table,
+         png_ptr->gamma_16_from_1, png_ptr->gamma_16_to_1,
+         png_ptr->gamma_shift
+#endif
+);
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if ((png_ptr->transformations & PNG_GAMMA) &&
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+      !((png_ptr->transformations & PNG_BACKGROUND) &&
+      ((png_ptr->num_trans != 0) ||
+      (png_ptr->color_type & PNG_COLOR_MASK_ALPHA))) &&
+#endif
+      (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE))
+      png_do_gamma(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         png_ptr->gamma_table, png_ptr->gamma_16_table,
+         png_ptr->gamma_shift);
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+   if (png_ptr->transformations & PNG_16_TO_8)
+      png_do_chop(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+   if (png_ptr->transformations & PNG_DITHER)
+   {
+      png_do_dither((png_row_infop)&(png_ptr->row_info), png_ptr->row_buf + 1,
+         png_ptr->palette_lookup, png_ptr->dither_index);
+      if(png_ptr->row_info.rowbytes == (png_uint_32)0)
+         png_error(png_ptr, "png_do_dither returned rowbytes=0");
+   }
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_do_invert(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_do_unshift(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         &(png_ptr->shift));
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_do_unpack(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_do_bgr(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_do_packswap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   /* if gray -> RGB, do so now only if we did not do so above */
+   if ((png_ptr->transformations & PNG_GRAY_TO_RGB) &&
+       (png_ptr->mode & PNG_BACKGROUND_IS_GRAY))
+      png_do_gray_to_rgb(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_do_read_filler(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         (png_uint_32)png_ptr->filler, png_ptr->flags);
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_ALPHA)
+      png_do_read_invert_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_ALPHA)
+      png_do_read_swap_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_do_swap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED)
+   if (png_ptr->transformations & PNG_USER_TRANSFORM)
+    {
+      if(png_ptr->read_user_transform_fn != NULL)
+        (*(png_ptr->read_user_transform_fn)) /* user read transform function */
+          (png_ptr,                    /* png_ptr */
+           &(png_ptr->row_info),       /* row_info:     */
+             /*  png_uint_32 width;          width of row */
+             /*  png_uint_32 rowbytes;       number of bytes in row */
+             /*  png_byte color_type;        color type of pixels */
+             /*  png_byte bit_depth;         bit depth of samples */
+             /*  png_byte channels;          number of channels (1-4) */
+             /*  png_byte pixel_depth;       bits per pixel (depth*channels) */
+           png_ptr->row_buf + 1);      /* start of pixel data for row */
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+      if(png_ptr->user_transform_depth)
+         png_ptr->row_info.bit_depth = png_ptr->user_transform_depth;
+      if(png_ptr->user_transform_channels)
+         png_ptr->row_info.channels = png_ptr->user_transform_channels;
+#endif
+      png_ptr->row_info.pixel_depth = (png_byte)(png_ptr->row_info.bit_depth *
+         png_ptr->row_info.channels);
+      png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+         png_ptr->row_info.width);
+   }
+#endif
+
+}
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+/* Unpack pixels of 1, 2, or 4 bits per pixel into 1 byte per pixel,
+ * without changing the actual values.  Thus, if you had a row with
+ * a bit depth of 1, you would end up with bytes that only contained
+ * the numbers 0 or 1.  If you would rather they contain 0 and 255, use
+ * png_do_shift() after this.
+ */
+void /* PRIVATE */
+png_do_unpack(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_unpack\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL && row_info->bit_depth < 8)
+#else
+   if (row_info->bit_depth < 8)
+#endif
+   {
+      png_uint_32 i;
+      png_uint_32 row_width=row_info->width;
+
+      switch (row_info->bit_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = row + (png_size_t)((row_width - 1) >> 3);
+            png_bytep dp = row + (png_size_t)row_width - 1;
+            png_uint_32 shift = 7 - (int)((row_width + 7) & 0x07);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x01);
+               if (shift == 7)
+               {
+                  shift = 0;
+                  sp--;
+               }
+               else
+                  shift++;
+
+               dp--;
+            }
+            break;
+         }
+         case 2:
+         {
+
+            png_bytep sp = row + (png_size_t)((row_width - 1) >> 2);
+            png_bytep dp = row + (png_size_t)row_width - 1;
+            png_uint_32 shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x03);
+               if (shift == 6)
+               {
+                  shift = 0;
+                  sp--;
+               }
+               else
+                  shift += 2;
+
+               dp--;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp = row + (png_size_t)((row_width - 1) >> 1);
+            png_bytep dp = row + (png_size_t)row_width - 1;
+            png_uint_32 shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
+            for (i = 0; i < row_width; i++)
+            {
+               *dp = (png_byte)((*sp >> shift) & 0x0f);
+               if (shift == 4)
+               {
+                  shift = 0;
+                  sp--;
+               }
+               else
+                  shift = 4;
+
+               dp--;
+            }
+            break;
+         }
+      }
+      row_info->bit_depth = 8;
+      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
+      row_info->rowbytes = row_width * row_info->channels;
+   }
+}
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED)
+/* Reverse the effects of png_do_shift.  This routine merely shifts the
+ * pixels back to their significant bits values.  Thus, if you have
+ * a row of bit depth 8, but only 5 are significant, this will shift
+ * the values back to 0 through 31.
+ */
+void /* PRIVATE */
+png_do_unshift(png_row_infop row_info, png_bytep row, png_color_8p sig_bits)
+{
+   png_debug(1, "in png_do_unshift\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL && sig_bits != NULL &&
+#endif
+       row_info->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      int shift[4];
+      int channels = 0;
+      int c;
+      png_uint_16 value = 0;
+      png_uint_32 row_width = row_info->width;
+
+      if (row_info->color_type & PNG_COLOR_MASK_COLOR)
+      {
+         shift[channels++] = row_info->bit_depth - sig_bits->red;
+         shift[channels++] = row_info->bit_depth - sig_bits->green;
+         shift[channels++] = row_info->bit_depth - sig_bits->blue;
+      }
+      else
+      {
+         shift[channels++] = row_info->bit_depth - sig_bits->gray;
+      }
+      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      {
+         shift[channels++] = row_info->bit_depth - sig_bits->alpha;
+      }
+
+      for (c = 0; c < channels; c++)
+      {
+         if (shift[c] <= 0)
+            shift[c] = 0;
+         else
+            value = 1;
+      }
+
+      if (!value)
+         return;
+
+      switch (row_info->bit_depth)
+      {
+         case 2:
+         {
+            png_bytep bp;
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+
+            for (bp = row, i = 0; i < istop; i++)
+            {
+               *bp >>= 1;
+               *bp++ &= 0x55;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep bp = row;
+            png_uint_32 i;
+            png_uint_32 istop = row_info->rowbytes;
+            png_byte mask = (png_byte)((((int)0xf0 >> shift[0]) & (int)0xf0) |
+               (png_byte)((int)0xf >> shift[0]));
+
+            for (i = 0; i < istop; i++)
+            {
+               *bp >>= shift[0];
+               *bp++ &= mask;
+            }
+            break;
+         }
+         case 8:
+         {
+            png_bytep bp = row;
+            png_uint_32 i;
+            png_uint_32 istop = row_width * channels;
+
+            for (i = 0; i < istop; i++)
+            {
+               *bp++ >>= shift[i%channels];
+            }
+            break;
+         }
+         case 16:
+         {
+            png_bytep bp = row;
+            png_uint_32 i;
+            png_uint_32 istop = channels * row_width;
+
+            for (i = 0; i < istop; i++)
+            {
+               value = (png_uint_16)((*bp << 8) + *(bp + 1));
+               value >>= shift[i%channels];
+               *bp++ = (png_byte)(value >> 8);
+               *bp++ = (png_byte)(value & 0xff);
+            }
+            break;
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_16_TO_8_SUPPORTED)
+/* chop rows of bit depth 16 down to 8 */
+void /* PRIVATE */
+png_do_chop(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_chop\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL && row_info->bit_depth == 16)
+#else
+   if (row_info->bit_depth == 16)
+#endif
+   {
+      png_bytep sp = row;
+      png_bytep dp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->width * row_info->channels;
+
+      for (i = 0; i<istop; i++, sp += 2, dp++)
+      {
+#if defined(PNG_READ_16_TO_8_ACCURATE_SCALE_SUPPORTED)
+      /* This does a more accurate scaling of the 16-bit color
+       * value, rather than a simple low-byte truncation.
+       *
+       * What the ideal calculation should be:
+       *   *dp = (((((png_uint_32)(*sp) << 8) |
+       *          (png_uint_32)(*(sp + 1))) * 255 + 127) / (png_uint_32)65535L;
+       *
+       * GRR: no, I think this is what it really should be:
+       *   *dp = (((((png_uint_32)(*sp) << 8) |
+       *           (png_uint_32)(*(sp + 1))) + 128L) / (png_uint_32)257L;
+       *
+       * GRR: here's the exact calculation with shifts:
+       *   temp = (((png_uint_32)(*sp) << 8) | (png_uint_32)(*(sp + 1))) + 128L;
+       *   *dp = (temp - (temp >> 8)) >> 8;
+       *
+       * Approximate calculation with shift/add instead of multiply/divide:
+       *   *dp = ((((png_uint_32)(*sp) << 8) |
+       *          (png_uint_32)((int)(*(sp + 1)) - *sp)) + 128) >> 8;
+       *
+       * What we actually do to avoid extra shifting and conversion:
+       */
+
+         *dp = *sp + ((((int)(*(sp + 1)) - *sp) > 128) ? 1 : 0);
+#else
+       /* Simply discard the low order byte */
+         *dp = *sp;
+#endif
+      }
+      row_info->bit_depth = 8;
+      row_info->pixel_depth = (png_byte)(8 * row_info->channels);
+      row_info->rowbytes = row_info->width * row_info->channels;
+   }
+}
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_read_swap_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_swap_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      png_uint_32 row_width = row_info->width;
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This converts from RGBA to ARGB */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save;
+            }
+         }
+         /* This converts from RRGGBBAA to AARRGGBB */
+         else
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save[2];
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save[0] = *(--sp);
+               save[1] = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save[0];
+               *(--dp) = save[1];
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This converts from GA to AG */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save;
+            }
+         }
+         /* This converts from GGAA to AAGG */
+         else
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_byte save[2];
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               save[0] = *(--sp);
+               save[1] = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = save[0];
+               *(--dp) = save[1];
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_read_invert_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_invert_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      png_uint_32 row_width = row_info->width;
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This inverts the alpha channel in RGBA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+
+/*             This does nothing:
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               We can replace it with:
+*/
+               sp-=3;
+               dp=sp;
+            }
+         }
+         /* This inverts the alpha channel in RRGGBBAA */
+         else
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+               *(--dp) = (png_byte)(255 - *(--sp));
+
+/*             This does nothing:
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               We can replace it with:
+*/
+               sp-=6;
+               dp=sp;
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This inverts the alpha channel in GA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+               *(--dp) = *(--sp);
+            }
+         }
+         /* This inverts the alpha channel in GGAA */
+         else
+         {
+            png_bytep sp  = row + row_info->rowbytes;
+            png_bytep dp = sp;
+            png_uint_32 i;
+
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = (png_byte)(255 - *(--sp));
+               *(--dp) = (png_byte)(255 - *(--sp));
+/*
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+*/
+               sp-=2;
+               dp=sp;
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+/* Add filler channel if we have RGB color */
+void /* PRIVATE */
+png_do_read_filler(png_row_infop row_info, png_bytep row,
+   png_uint_32 filler, png_uint_32 flags)
+{
+   png_uint_32 i;
+   png_uint_32 row_width = row_info->width;
+
+   png_byte hi_filler = (png_byte)((filler>>8) & 0xff);
+   png_byte lo_filler = (png_byte)(filler & 0xff);
+
+   png_debug(1, "in png_do_read_filler\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL  && row_info != NULL &&
+#endif
+       row_info->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      if(row_info->bit_depth == 8)
+      {
+         /* This changes the data from G to GX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width;
+            png_bytep dp =  sp + (png_size_t)row_width;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = lo_filler;
+            row_info->channels = 2;
+            row_info->pixel_depth = 16;
+            row_info->rowbytes = row_width * 2;
+         }
+      /* This changes the data from G to XG */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width;
+            png_bytep dp = sp  + (png_size_t)row_width;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 2;
+            row_info->pixel_depth = 16;
+            row_info->rowbytes = row_width * 2;
+         }
+      }
+      else if(row_info->bit_depth == 16)
+      {
+         /* This changes the data from GG to GGXX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = hi_filler;
+            *(--dp) = lo_filler;
+            row_info->channels = 2;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+         /* This changes the data from GG to XXGG */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 2;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+      }
+   } /* COLOR_TYPE == GRAY */
+   else if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      if(row_info->bit_depth == 8)
+      {
+         /* This changes the data from RGB to RGBX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 3;
+            png_bytep dp = sp  + (png_size_t)row_width;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = lo_filler;
+            row_info->channels = 4;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+      /* This changes the data from RGB to XRGB */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 3;
+            png_bytep dp = sp + (png_size_t)row_width;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 4;
+            row_info->pixel_depth = 32;
+            row_info->rowbytes = row_width * 4;
+         }
+      }
+      else if(row_info->bit_depth == 16)
+      {
+         /* This changes the data from RRGGBB to RRGGBBXX */
+         if (flags & PNG_FLAG_FILLER_AFTER)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 6;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 1; i < row_width; i++)
+            {
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+            }
+            *(--dp) = hi_filler;
+            *(--dp) = lo_filler;
+            row_info->channels = 4;
+            row_info->pixel_depth = 64;
+            row_info->rowbytes = row_width * 8;
+         }
+         /* This changes the data from RRGGBB to XXRRGGBB */
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 6;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = *(--sp);
+               *(--dp) = hi_filler;
+               *(--dp) = lo_filler;
+            }
+            row_info->channels = 4;
+            row_info->pixel_depth = 64;
+            row_info->rowbytes = row_width * 8;
+         }
+      }
+   } /* COLOR_TYPE == RGB */
+}
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+/* expand grayscale files to RGB, with or without alpha */
+void /* PRIVATE */
+png_do_gray_to_rgb(png_row_infop row_info, png_bytep row)
+{
+   png_uint_32 i;
+   png_uint_32 row_width = row_info->width;
+
+   png_debug(1, "in png_do_gray_to_rgb\n");
+   if (row_info->bit_depth >= 8 &&
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      !(row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + (png_size_t)row_width - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *sp;
+               *(dp--) = *sp;
+               *(dp--) = *(sp--);
+            }
+         }
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2 - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 4;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *(sp--);
+               *(dp--) = *(sp--);
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp = row + (png_size_t)row_width * 2 - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 2;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *(sp--);
+               *(dp--) = *sp;
+               *(dp--) = *sp;
+               *(dp--) = *(sp--);
+            }
+         }
+         else
+         {
+            png_bytep sp = row + (png_size_t)row_width * 4 - 1;
+            png_bytep dp = sp  + (png_size_t)row_width * 4;
+            for (i = 0; i < row_width; i++)
+            {
+               *(dp--) = *(sp--);
+               *(dp--) = *(sp--);
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *sp;
+               *(dp--) = *(sp - 1);
+               *(dp--) = *(sp--);
+               *(dp--) = *(sp--);
+            }
+         }
+      }
+      row_info->channels += (png_byte)2;
+      row_info->color_type |= PNG_COLOR_MASK_COLOR;
+      row_info->pixel_depth = (png_byte)(row_info->channels *
+         row_info->bit_depth);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+   }
+}
+#endif
+
+#if defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+/* reduce RGB files to grayscale, with or without alpha
+ * using the equation given in Poynton's ColorFAQ at
+ * <http://www.inforamp.net/~poynton/>
+ * Copyright (c) 1998-01-04 Charles Poynton poynton at inforamp.net
+ *
+ *     Y = 0.212671 * R + 0.715160 * G + 0.072169 * B
+ *
+ *  We approximate this with
+ *
+ *     Y = 0.21268 * R    + 0.7151 * G    + 0.07217 * B
+ *
+ *  which can be expressed with integers as
+ *
+ *     Y = (6969 * R + 23434 * G + 2365 * B)/32768
+ *
+ *  The calculation is to be done in a linear colorspace.
+ *
+ *  Other integer coefficents can be used via png_set_rgb_to_gray().
+ */
+int /* PRIVATE */
+png_do_rgb_to_gray(png_structp png_ptr, png_row_infop row_info, png_bytep row)
+
+{
+   png_uint_32 i;
+
+   png_uint_32 row_width = row_info->width;
+   int rgb_error = 0;
+
+   png_debug(1, "in png_do_rgb_to_gray\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      png_uint_32 rc = png_ptr->rgb_to_gray_red_coeff;
+      png_uint_32 gc = png_ptr->rgb_to_gray_green_coeff;
+      png_uint_32 bc = png_ptr->rgb_to_gray_blue_coeff;
+
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (row_info->bit_depth == 8)
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_from_1 != NULL && png_ptr->gamma_to_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte green = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte blue  = png_ptr->gamma_to_1[*(sp++)];
+                  if(red != green || red != blue)
+                  {
+                     rgb_error |= 1;
+                     *(dp++) = png_ptr->gamma_from_1[
+                       (rc*red+gc*green+bc*blue)>>15];
+                  }
+                  else
+                     *(dp++) = *(sp-1);
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = *(sp++);
+                  png_byte green = *(sp++);
+                  png_byte blue  = *(sp++);
+                  if(red != green || red != blue)
+                  {
+                     rgb_error |= 1;
+                     *(dp++) = (png_byte)((rc*red+gc*green+bc*blue)>>15);
+                  }
+                  else
+                     *(dp++) = *(sp-1);
+               }
+            }
+         }
+
+         else /* RGB bit_depth == 16 */
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_16_to_1 != NULL &&
+                png_ptr->gamma_16_from_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, w;
+
+                  red   = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+
+                  if(red == green && red == blue)
+                     w = red;
+                  else
+                  {
+                     png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red&0xff) >>
+                                  png_ptr->gamma_shift][red>>8];
+                     png_uint_16 green_1 = png_ptr->gamma_16_to_1[(green&0xff) >>
+                                  png_ptr->gamma_shift][green>>8];
+                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >>
+                                  png_ptr->gamma_shift][blue>>8];
+                     png_uint_16 gray16  = (png_uint_16)((rc*red_1 + gc*green_1
+                                  + bc*blue_1)>>15);
+                     w = png_ptr->gamma_16_from_1[(gray16&0xff) >>
+                         png_ptr->gamma_shift][gray16 >> 8];
+                     rgb_error |= 1;
+                  }
+
+                  *(dp++) = (png_byte)((w>>8) & 0xff);
+                  *(dp++) = (png_byte)(w & 0xff);
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, gray16;
+
+                  red   = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  gray16  = (png_uint_16)((rc*red + gc*green + bc*blue)>>15);
+                  *(dp++) = (png_byte)((gray16>>8) & 0xff);
+                  *(dp++) = (png_byte)(gray16 & 0xff);
+               }
+            }
+         }
+      }
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         if (row_info->bit_depth == 8)
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_from_1 != NULL && png_ptr->gamma_to_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte green = png_ptr->gamma_to_1[*(sp++)];
+                  png_byte blue  = png_ptr->gamma_to_1[*(sp++)];
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  *(dp++) =  png_ptr->gamma_from_1
+                             [(rc*red + gc*green + bc*blue)>>15];
+                  *(dp++) = *(sp++);  /* alpha */
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_byte red   = *(sp++);
+                  png_byte green = *(sp++);
+                  png_byte blue  = *(sp++);
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  *(dp++) =  (png_byte)((rc*red + gc*green + bc*blue)>>15);
+                  *(dp++) = *(sp++);  /* alpha */
+               }
+            }
+         }
+         else /* RGBA bit_depth == 16 */
+         {
+#if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED)
+            if (png_ptr->gamma_16_to_1 != NULL &&
+                png_ptr->gamma_16_from_1 != NULL)
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, w;
+
+                  red   = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)(((*(sp))<<8) | *(sp+1)); sp+=2;
+
+                  if(red == green && red == blue)
+                     w = red;
+                  else
+                  {
+                     png_uint_16 red_1   = png_ptr->gamma_16_to_1[(red&0xff) >>
+                                  png_ptr->gamma_shift][red>>8];
+                     png_uint_16 green_1 = png_ptr->gamma_16_to_1[(green&0xff) >>
+                                  png_ptr->gamma_shift][green>>8];
+                     png_uint_16 blue_1  = png_ptr->gamma_16_to_1[(blue&0xff) >>
+                                  png_ptr->gamma_shift][blue>>8];
+                     png_uint_16 gray16  = (png_uint_16)((rc * red_1
+                                  + gc * green_1 + bc * blue_1)>>15);
+                     w = png_ptr->gamma_16_from_1[(gray16&0xff) >>
+                         png_ptr->gamma_shift][gray16 >> 8];
+                     rgb_error |= 1;
+                  }
+
+                  *(dp++) = (png_byte)((w>>8) & 0xff);
+                  *(dp++) = (png_byte)(w & 0xff);
+                  *(dp++) = *(sp++);  /* alpha */
+                  *(dp++) = *(sp++);
+               }
+            }
+            else
+#endif
+            {
+               png_bytep sp = row;
+               png_bytep dp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 red, green, blue, gray16;
+                  red   = (png_uint_16)((*(sp)<<8) | *(sp+1)); sp+=2;
+                  green = (png_uint_16)((*(sp)<<8) | *(sp+1)); sp+=2;
+                  blue  = (png_uint_16)((*(sp)<<8) | *(sp+1)); sp+=2;
+                  if(red != green || red != blue)
+                     rgb_error |= 1;
+                  gray16  = (png_uint_16)((rc*red + gc*green + bc*blue)>>15);
+                  *(dp++) = (png_byte)((gray16>>8) & 0xff);
+                  *(dp++) = (png_byte)(gray16 & 0xff);
+                  *(dp++) = *(sp++);  /* alpha */
+                  *(dp++) = *(sp++);
+               }
+            }
+         }
+      }
+   row_info->channels -= (png_byte)2;
+      row_info->color_type &= ~PNG_COLOR_MASK_COLOR;
+      row_info->pixel_depth = (png_byte)(row_info->channels *
+         row_info->bit_depth);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+   }
+   return rgb_error;
+}
+#endif
+
+/* Build a grayscale palette.  Palette is assumed to be 1 << bit_depth
+ * large of png_color.  This lets grayscale images be treated as
+ * paletted.  Most useful for gamma correction and simplification
+ * of code.
+ */
+void PNGAPI
+png_build_grayscale_palette(int bit_depth, png_colorp palette)
+{
+   int num_palette;
+   int color_inc;
+   int i;
+   int v;
+
+   png_debug(1, "in png_do_build_grayscale_palette\n");
+   if (palette == NULL)
+      return;
+
+   switch (bit_depth)
+   {
+      case 1:
+         num_palette = 2;
+         color_inc = 0xff;
+         break;
+      case 2:
+         num_palette = 4;
+         color_inc = 0x55;
+         break;
+      case 4:
+         num_palette = 16;
+         color_inc = 0x11;
+         break;
+      case 8:
+         num_palette = 256;
+         color_inc = 1;
+         break;
+      default:
+         num_palette = 0;
+         color_inc = 0;
+         break;
+   }
+
+   for (i = 0, v = 0; i < num_palette; i++, v += color_inc)
+   {
+      palette[i].red = (png_byte)v;
+      palette[i].green = (png_byte)v;
+      palette[i].blue = (png_byte)v;
+   }
+}
+
+/* This function is currently unused.  Do we really need it? */
+#if defined(PNG_READ_DITHER_SUPPORTED) && defined(PNG_CORRECT_PALETTE_SUPPORTED)
+void /* PRIVATE */
+png_correct_palette(png_structp png_ptr, png_colorp palette,
+   int num_palette)
+{
+   png_debug(1, "in png_correct_palette\n");
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) && \
+    defined(PNG_READ_GAMMA_SUPPORTED) && defined(PNG_FLOATING_POINT_SUPPORTED)
+   if (png_ptr->transformations & (PNG_GAMMA | PNG_BACKGROUND))
+   {
+      png_color back, back_1;
+
+      if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_FILE)
+      {
+         back.red = png_ptr->gamma_table[png_ptr->background.red];
+         back.green = png_ptr->gamma_table[png_ptr->background.green];
+         back.blue = png_ptr->gamma_table[png_ptr->background.blue];
+
+         back_1.red = png_ptr->gamma_to_1[png_ptr->background.red];
+         back_1.green = png_ptr->gamma_to_1[png_ptr->background.green];
+         back_1.blue = png_ptr->gamma_to_1[png_ptr->background.blue];
+      }
+      else
+      {
+         double g;
+
+         g = 1.0 / (png_ptr->background_gamma * png_ptr->screen_gamma);
+
+         if (png_ptr->background_gamma_type == PNG_BACKGROUND_GAMMA_SCREEN ||
+             fabs(g - 1.0) < PNG_GAMMA_THRESHOLD)
+         {
+            back.red = png_ptr->background.red;
+            back.green = png_ptr->background.green;
+            back.blue = png_ptr->background.blue;
+         }
+         else
+         {
+            back.red =
+               (png_byte)(pow((double)png_ptr->background.red/255, g) *
+                255.0 + 0.5);
+            back.green =
+               (png_byte)(pow((double)png_ptr->background.green/255, g) *
+                255.0 + 0.5);
+            back.blue =
+               (png_byte)(pow((double)png_ptr->background.blue/255, g) *
+                255.0 + 0.5);
+         }
+
+         g = 1.0 / png_ptr->background_gamma;
+
+         back_1.red =
+            (png_byte)(pow((double)png_ptr->background.red/255, g) *
+             255.0 + 0.5);
+         back_1.green =
+            (png_byte)(pow((double)png_ptr->background.green/255, g) *
+             255.0 + 0.5);
+         back_1.blue =
+            (png_byte)(pow((double)png_ptr->background.blue/255, g) *
+             255.0 + 0.5);
+      }
+
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_uint_32 i;
+
+         for (i = 0; i < (png_uint_32)num_palette; i++)
+         {
+            if (i < png_ptr->num_trans && png_ptr->trans[i] == 0)
+            {
+               palette[i] = back;
+            }
+            else if (i < png_ptr->num_trans && png_ptr->trans[i] != 0xff)
+            {
+               png_byte v, w;
+
+               v = png_ptr->gamma_to_1[png_ptr->palette[i].red];
+               png_composite(w, v, png_ptr->trans[i], back_1.red);
+               palette[i].red = png_ptr->gamma_from_1[w];
+
+               v = png_ptr->gamma_to_1[png_ptr->palette[i].green];
+               png_composite(w, v, png_ptr->trans[i], back_1.green);
+               palette[i].green = png_ptr->gamma_from_1[w];
+
+               v = png_ptr->gamma_to_1[png_ptr->palette[i].blue];
+               png_composite(w, v, png_ptr->trans[i], back_1.blue);
+               palette[i].blue = png_ptr->gamma_from_1[w];
+            }
+            else
+            {
+               palette[i].red = png_ptr->gamma_table[palette[i].red];
+               palette[i].green = png_ptr->gamma_table[palette[i].green];
+               palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+            }
+         }
+      }
+      else
+      {
+         int i;
+
+         for (i = 0; i < num_palette; i++)
+         {
+            if (palette[i].red == (png_byte)png_ptr->trans_values.gray)
+            {
+               palette[i] = back;
+            }
+            else
+            {
+               palette[i].red = png_ptr->gamma_table[palette[i].red];
+               palette[i].green = png_ptr->gamma_table[palette[i].green];
+               palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+            }
+         }
+      }
+   }
+   else
+#endif
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   if (png_ptr->transformations & PNG_GAMMA)
+   {
+      int i;
+
+      for (i = 0; i < num_palette; i++)
+      {
+         palette[i].red = png_ptr->gamma_table[palette[i].red];
+         palette[i].green = png_ptr->gamma_table[palette[i].green];
+         palette[i].blue = png_ptr->gamma_table[palette[i].blue];
+      }
+   }
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   else
+#endif
+#endif
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+   if (png_ptr->transformations & PNG_BACKGROUND)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         png_color back;
+
+         back.red   = (png_byte)png_ptr->background.red;
+         back.green = (png_byte)png_ptr->background.green;
+         back.blue  = (png_byte)png_ptr->background.blue;
+
+         for (i = 0; i < (int)png_ptr->num_trans; i++)
+         {
+            if (png_ptr->trans[i] == 0)
+            {
+               palette[i].red = back.red;
+               palette[i].green = back.green;
+               palette[i].blue = back.blue;
+            }
+            else if (png_ptr->trans[i] != 0xff)
+            {
+               png_composite(palette[i].red, png_ptr->palette[i].red,
+                  png_ptr->trans[i], back.red);
+               png_composite(palette[i].green, png_ptr->palette[i].green,
+                  png_ptr->trans[i], back.green);
+               png_composite(palette[i].blue, png_ptr->palette[i].blue,
+                  png_ptr->trans[i], back.blue);
+            }
+         }
+      }
+      else /* assume grayscale palette (what else could it be?) */
+      {
+         int i;
+
+         for (i = 0; i < num_palette; i++)
+         {
+            if (i == (png_byte)png_ptr->trans_values.gray)
+            {
+               palette[i].red = (png_byte)png_ptr->background.red;
+               palette[i].green = (png_byte)png_ptr->background.green;
+               palette[i].blue = (png_byte)png_ptr->background.blue;
+            }
+         }
+      }
+   }
+#endif
+}
+#endif
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED)
+/* Replace any alpha or transparency with the supplied background color.
+ * "background" is already in the screen gamma, while "background_1" is
+ * at a gamma of 1.0.  Paletted files have already been taken care of.
+ */
+void /* PRIVATE */
+png_do_background(png_row_infop row_info, png_bytep row,
+   png_color_16p trans_values, png_color_16p background
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+   , png_color_16p background_1,
+   png_bytep gamma_table, png_bytep gamma_from_1, png_bytep gamma_to_1,
+   png_uint_16pp gamma_16, png_uint_16pp gamma_16_from_1,
+   png_uint_16pp gamma_16_to_1, int gamma_shift
+#endif
+   )
+{
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+   int shift;
+
+   png_debug(1, "in png_do_background\n");
+   if (background != NULL &&
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      (!(row_info->color_type & PNG_COLOR_MASK_ALPHA) ||
+      (row_info->color_type != PNG_COLOR_TYPE_PALETTE && trans_values)))
+   {
+      switch (row_info->color_type)
+      {
+         case PNG_COLOR_TYPE_GRAY:
+         {
+            switch (row_info->bit_depth)
+            {
+               case 1:
+               {
+                  sp = row;
+                  shift = 7;
+                  for (i = 0; i < row_width; i++)
+                  {
+                     if ((png_uint_16)((*sp >> shift) & 0x01)
+                        == trans_values->gray)
+                     {
+                        *sp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                        *sp |= (png_byte)(background->gray << shift);
+                     }
+                     if (!shift)
+                     {
+                        shift = 7;
+                        sp++;
+                     }
+                     else
+                        shift--;
+                  }
+                  break;
+               }
+               case 2:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_table != NULL)
+                  {
+                     sp = row;
+                     shift = 6;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x03)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        else
+                        {
+                           png_byte p = (png_byte)((*sp >> shift) & 0x03);
+                           png_byte g = (png_byte)((gamma_table [p | (p << 2) |
+                               (p << 4) | (p << 6)] >> 6) & 0x03);
+                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                           *sp |= (png_byte)(g << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 6;
+                           sp++;
+                        }
+                        else
+                           shift -= 2;
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     shift = 6;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x03)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 6;
+                           sp++;
+                        }
+                        else
+                           shift -= 2;
+                     }
+                  }
+                  break;
+               }
+               case 4:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_table != NULL)
+                  {
+                     sp = row;
+                     shift = 4;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x0f)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        else
+                        {
+                           png_byte p = (png_byte)((*sp >> shift) & 0x0f);
+                           png_byte g = (png_byte)((gamma_table[p |
+                             (p << 4)] >> 4) & 0x0f);
+                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                           *sp |= (png_byte)(g << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 4;
+                           sp++;
+                        }
+                        else
+                           shift -= 4;
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     shift = 4;
+                     for (i = 0; i < row_width; i++)
+                     {
+                        if ((png_uint_16)((*sp >> shift) & 0x0f)
+                            == trans_values->gray)
+                        {
+                           *sp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                           *sp |= (png_byte)(background->gray << shift);
+                        }
+                        if (!shift)
+                        {
+                           shift = 4;
+                           sp++;
+                        }
+                        else
+                           shift -= 4;
+                     }
+                  }
+                  break;
+               }
+               case 8:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_table != NULL)
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp++)
+                     {
+                        if (*sp == trans_values->gray)
+                        {
+                           *sp = (png_byte)background->gray;
+                        }
+                        else
+                        {
+                           *sp = gamma_table[*sp];
+                        }
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp++)
+                     {
+                        if (*sp == trans_values->gray)
+                        {
+                           *sp = (png_byte)background->gray;
+                        }
+                     }
+                  }
+                  break;
+               }
+               case 16:
+               {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                  if (gamma_16 != NULL)
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp += 2)
+                     {
+                        png_uint_16 v;
+
+                        v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        if (v == trans_values->gray)
+                        {
+                           /* background is already in screen gamma */
+                           *sp = (png_byte)((background->gray >> 8) & 0xff);
+                           *(sp + 1) = (png_byte)(background->gray & 0xff);
+                        }
+                        else
+                        {
+                           v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                           *sp = (png_byte)((v >> 8) & 0xff);
+                           *(sp + 1) = (png_byte)(v & 0xff);
+                        }
+                     }
+                  }
+                  else
+#endif
+                  {
+                     sp = row;
+                     for (i = 0; i < row_width; i++, sp += 2)
+                     {
+                        png_uint_16 v;
+
+                        v = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        if (v == trans_values->gray)
+                        {
+                           *sp = (png_byte)((background->gray >> 8) & 0xff);
+                           *(sp + 1) = (png_byte)(background->gray & 0xff);
+                        }
+                     }
+                  }
+                  break;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_RGB:
+         {
+            if (row_info->bit_depth == 8)
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_table != NULL)
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 3)
+                  {
+                     if (*sp == trans_values->red &&
+                        *(sp + 1) == trans_values->green &&
+                        *(sp + 2) == trans_values->blue)
+                     {
+                        *sp = (png_byte)background->red;
+                        *(sp + 1) = (png_byte)background->green;
+                        *(sp + 2) = (png_byte)background->blue;
+                     }
+                     else
+                     {
+                        *sp = gamma_table[*sp];
+                        *(sp + 1) = gamma_table[*(sp + 1)];
+                        *(sp + 2) = gamma_table[*(sp + 2)];
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 3)
+                  {
+                     if (*sp == trans_values->red &&
+                        *(sp + 1) == trans_values->green &&
+                        *(sp + 2) == trans_values->blue)
+                     {
+                        *sp = (png_byte)background->red;
+                        *(sp + 1) = (png_byte)background->green;
+                        *(sp + 2) = (png_byte)background->blue;
+                     }
+                  }
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_16 != NULL)
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 6)
+                  {
+                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                     png_uint_16 g = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+                     png_uint_16 b = (png_uint_16)(((*(sp+4)) << 8) + *(sp+5));
+                     if (r == trans_values->red && g == trans_values->green &&
+                        b == trans_values->blue)
+                     {
+                        /* background is already in screen gamma */
+                        *sp = (png_byte)((background->red >> 8) & 0xff);
+                        *(sp + 1) = (png_byte)(background->red & 0xff);
+                        *(sp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(sp + 3) = (png_byte)(background->green & 0xff);
+                        *(sp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(sp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                     else
+                     {
+                        png_uint_16 v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        *sp = (png_byte)((v >> 8) & 0xff);
+                        *(sp + 1) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                        *(sp + 2) = (png_byte)((v >> 8) & 0xff);
+                        *(sp + 3) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                        *(sp + 4) = (png_byte)((v >> 8) & 0xff);
+                        *(sp + 5) = (png_byte)(v & 0xff);
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  for (i = 0; i < row_width; i++, sp += 6)
+                  {
+                     png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp+1));
+                     png_uint_16 g = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+                     png_uint_16 b = (png_uint_16)(((*(sp+4)) << 8) + *(sp+5));
+
+                     if (r == trans_values->red && g == trans_values->green &&
+                        b == trans_values->blue)
+                     {
+                        *sp = (png_byte)((background->red >> 8) & 0xff);
+                        *(sp + 1) = (png_byte)(background->red & 0xff);
+                        *(sp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(sp + 3) = (png_byte)(background->green & 0xff);
+                        *(sp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(sp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                  }
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_GRAY_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
+                   gamma_table != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 2, dp++)
+                  {
+                     png_uint_16 a = *(sp + 1);
+
+                     if (a == 0xff)
+                     {
+                        *dp = gamma_table[*sp];
+                     }
+                     else if (a == 0)
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)background->gray;
+                     }
+                     else
+                     {
+                        png_byte v, w;
+
+                        v = gamma_to_1[*sp];
+                        png_composite(w, v, a, background_1->gray);
+                        *dp = gamma_from_1[w];
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 2, dp++)
+                  {
+                     png_byte a = *(sp + 1);
+
+                     if (a == 0xff)
+                     {
+                        *dp = *sp;
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else if (a == 0)
+                     {
+                        *dp = (png_byte)background->gray;
+                     }
+                     else
+                     {
+                        png_composite(*dp, *sp, a, background_1->gray);
+                     }
+#else
+                     *dp = (png_byte)background->gray;
+#endif
+                  }
+               }
+            }
+            else /* if (png_ptr->bit_depth == 16) */
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
+                   gamma_16_to_1 != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 2)
+                  {
+                     png_uint_16 a = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_uint_16 v;
+
+                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else if (a == 0)
+#else
+                     else
+#endif
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)((background->gray >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->gray & 0xff);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else
+                     {
+                        png_uint_16 g, v, w;
+
+                        g = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
+                        png_composite_16(v, g, a, background_1->gray);
+                        w = gamma_16_from_1[(v&0xff) >> gamma_shift][v >> 8];
+                        *dp = (png_byte)((w >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(w & 0xff);
+                     }
+#endif
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 2)
+                  {
+                     png_uint_16 a = (png_uint_16)(((*(sp+2)) << 8) + *(sp+3));
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_memcpy(dp, sp, 2);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else if (a == 0)
+#else
+                     else
+#endif
+                     {
+                        *dp = (png_byte)((background->gray >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->gray & 0xff);
+                     }
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+                     else
+                     {
+                        png_uint_16 g, v;
+
+                        g = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        png_composite_16(v, g, a, background_1->gray);
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                     }
+#endif
+                  }
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_RGB_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_to_1 != NULL && gamma_from_1 != NULL &&
+                   gamma_table != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 3)
+                  {
+                     png_byte a = *(sp + 3);
+
+                     if (a == 0xff)
+                     {
+                        *dp = gamma_table[*sp];
+                        *(dp + 1) = gamma_table[*(sp + 1)];
+                        *(dp + 2) = gamma_table[*(sp + 2)];
+                     }
+                     else if (a == 0)
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)background->red;
+                        *(dp + 1) = (png_byte)background->green;
+                        *(dp + 2) = (png_byte)background->blue;
+                     }
+                     else
+                     {
+                        png_byte v, w;
+
+                        v = gamma_to_1[*sp];
+                        png_composite(w, v, a, background_1->red);
+                        *dp = gamma_from_1[w];
+                        v = gamma_to_1[*(sp + 1)];
+                        png_composite(w, v, a, background_1->green);
+                        *(dp + 1) = gamma_from_1[w];
+                        v = gamma_to_1[*(sp + 2)];
+                        png_composite(w, v, a, background_1->blue);
+                        *(dp + 2) = gamma_from_1[w];
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 4, dp += 3)
+                  {
+                     png_byte a = *(sp + 3);
+
+                     if (a == 0xff)
+                     {
+                        *dp = *sp;
+                        *(dp + 1) = *(sp + 1);
+                        *(dp + 2) = *(sp + 2);
+                     }
+                     else if (a == 0)
+                     {
+                        *dp = (png_byte)background->red;
+                        *(dp + 1) = (png_byte)background->green;
+                        *(dp + 2) = (png_byte)background->blue;
+                     }
+                     else
+                     {
+                        png_composite(*dp, *sp, a, background->red);
+                        png_composite(*(dp + 1), *(sp + 1), a,
+                           background->green);
+                        png_composite(*(dp + 2), *(sp + 2), a,
+                           background->blue);
+                     }
+                  }
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+               if (gamma_16 != NULL && gamma_16_from_1 != NULL &&
+                   gamma_16_to_1 != NULL)
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 8, dp += 6)
+                  {
+                     png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
+                         << 8) + (png_uint_16)(*(sp + 7)));
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_uint_16 v;
+
+                        v = gamma_16[*(sp + 1) >> gamma_shift][*sp];
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                        *(dp + 2) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(v & 0xff);
+                        v = gamma_16[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                        *(dp + 4) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(v & 0xff);
+                     }
+                     else if (a == 0)
+                     {
+                        /* background is already in screen gamma */
+                        *dp = (png_byte)((background->red >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->red & 0xff);
+                        *(dp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(background->green & 0xff);
+                        *(dp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                     else
+                     {
+                        png_uint_16 v, w, x;
+
+                        v = gamma_16_to_1[*(sp + 1) >> gamma_shift][*sp];
+                        png_composite_16(w, v, a, background_1->red);
+                        x = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
+                        *dp = (png_byte)((x >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(x & 0xff);
+                        v = gamma_16_to_1[*(sp + 3) >> gamma_shift][*(sp + 2)];
+                        png_composite_16(w, v, a, background_1->green);
+                        x = gamma_16_from_1[((w&0xff) >> gamma_shift)][w >> 8];
+                        *(dp + 2) = (png_byte)((x >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(x & 0xff);
+                        v = gamma_16_to_1[*(sp + 5) >> gamma_shift][*(sp + 4)];
+                        png_composite_16(w, v, a, background_1->blue);
+                        x = gamma_16_from_1[(w & 0xff) >> gamma_shift][w >> 8];
+                        *(dp + 4) = (png_byte)((x >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(x & 0xff);
+                     }
+                  }
+               }
+               else
+#endif
+               {
+                  sp = row;
+                  dp = row;
+                  for (i = 0; i < row_width; i++, sp += 8, dp += 6)
+                  {
+                     png_uint_16 a = (png_uint_16)(((png_uint_16)(*(sp + 6))
+                        << 8) + (png_uint_16)(*(sp + 7)));
+                     if (a == (png_uint_16)0xffff)
+                     {
+                        png_memcpy(dp, sp, 6);
+                     }
+                     else if (a == 0)
+                     {
+                        *dp = (png_byte)((background->red >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(background->red & 0xff);
+                        *(dp + 2) = (png_byte)((background->green >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(background->green & 0xff);
+                        *(dp + 4) = (png_byte)((background->blue >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(background->blue & 0xff);
+                     }
+                     else
+                     {
+                        png_uint_16 v;
+
+                        png_uint_16 r = (png_uint_16)(((*sp) << 8) + *(sp + 1));
+                        png_uint_16 g = (png_uint_16)(((*(sp + 2)) << 8)
+                            + *(sp + 3));
+                        png_uint_16 b = (png_uint_16)(((*(sp + 4)) << 8)
+                            + *(sp + 5));
+
+                        png_composite_16(v, r, a, background->red);
+                        *dp = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 1) = (png_byte)(v & 0xff);
+                        png_composite_16(v, g, a, background->green);
+                        *(dp + 2) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 3) = (png_byte)(v & 0xff);
+                        png_composite_16(v, b, a, background->blue);
+                        *(dp + 4) = (png_byte)((v >> 8) & 0xff);
+                        *(dp + 5) = (png_byte)(v & 0xff);
+                     }
+                  }
+               }
+            }
+            break;
+         }
+      }
+
+      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      {
+         row_info->color_type &= ~PNG_COLOR_MASK_ALPHA;
+         row_info->channels--;
+         row_info->pixel_depth = (png_byte)(row_info->channels *
+            row_info->bit_depth);
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+/* Gamma correct the image, avoiding the alpha channel.  Make sure
+ * you do this after you deal with the transparency issue on grayscale
+ * or RGB images. If your bit depth is 8, use gamma_table, if it
+ * is 16, use gamma_16_table and gamma_shift.  Build these with
+ * build_gamma_table().
+ */
+void /* PRIVATE */
+png_do_gamma(png_row_infop row_info, png_bytep row,
+   png_bytep gamma_table, png_uint_16pp gamma_16_table,
+   int gamma_shift)
+{
+   png_bytep sp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_gamma\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       ((row_info->bit_depth <= 8 && gamma_table != NULL) ||
+        (row_info->bit_depth == 16 && gamma_16_table != NULL)))
+   {
+      switch (row_info->color_type)
+      {
+         case PNG_COLOR_TYPE_RGB:
+         {
+            if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v;
+
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_RGB_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  *sp = gamma_table[*sp];
+                  sp++;
+                  sp++;
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+                  v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 4;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_GRAY_ALPHA:
+         {
+            if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp += 2;
+               }
+            }
+            else /* if (row_info->bit_depth == 16) */
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 4;
+               }
+            }
+            break;
+         }
+         case PNG_COLOR_TYPE_GRAY:
+         {
+            if (row_info->bit_depth == 2)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i += 4)
+               {
+                  int a = *sp & 0xc0;
+                  int b = *sp & 0x30;
+                  int c = *sp & 0x0c;
+                  int d = *sp & 0x03;
+
+                  *sp = (png_byte)(
+                        ((((int)gamma_table[a|(a>>2)|(a>>4)|(a>>6)])   ) & 0xc0)|
+                        ((((int)gamma_table[(b<<2)|b|(b>>2)|(b>>4)])>>2) & 0x30)|
+                        ((((int)gamma_table[(c<<4)|(c<<2)|c|(c>>2)])>>4) & 0x0c)|
+                        ((((int)gamma_table[(d<<6)|(d<<4)|(d<<2)|d])>>6) ));
+                  sp++;
+               }
+            }
+            if (row_info->bit_depth == 4)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i += 2)
+               {
+                  int msb = *sp & 0xf0;
+                  int lsb = *sp & 0x0f;
+
+                  *sp = (png_byte)((((int)gamma_table[msb | (msb >> 4)]) & 0xf0)
+                          | (((int)gamma_table[(lsb << 4) | lsb]) >> 4));
+                  sp++;
+               }
+            }
+            else if (row_info->bit_depth == 8)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  *sp = gamma_table[*sp];
+                  sp++;
+               }
+            }
+            else if (row_info->bit_depth == 16)
+            {
+               sp = row;
+               for (i = 0; i < row_width; i++)
+               {
+                  png_uint_16 v = gamma_16_table[*(sp + 1) >> gamma_shift][*sp];
+                  *sp = (png_byte)((v >> 8) & 0xff);
+                  *(sp + 1) = (png_byte)(v & 0xff);
+                  sp += 2;
+               }
+            }
+            break;
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+/* Expands a palette row to an RGB or RGBA row depending
+ * upon whether you supply trans and num_trans.
+ */
+void /* PRIVATE */
+png_do_expand_palette(png_row_infop row_info, png_bytep row,
+   png_colorp palette, png_bytep trans, int num_trans)
+{
+   int shift, value;
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_expand_palette\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       row_info->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (row_info->bit_depth < 8)
+      {
+         switch (row_info->bit_depth)
+         {
+            case 1:
+            {
+               sp = row + (png_size_t)((row_width - 1) >> 3);
+               dp = row + (png_size_t)row_width - 1;
+               shift = 7 - (int)((row_width + 7) & 0x07);
+               for (i = 0; i < row_width; i++)
+               {
+                  if ((*sp >> shift) & 0x01)
+                     *dp = 1;
+                  else
+                     *dp = 0;
+                  if (shift == 7)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
+                  else
+                     shift++;
+
+                  dp--;
+               }
+               break;
+            }
+            case 2:
+            {
+               sp = row + (png_size_t)((row_width - 1) >> 2);
+               dp = row + (png_size_t)row_width - 1;
+               shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
+               for (i = 0; i < row_width; i++)
+               {
+                  value = (*sp >> shift) & 0x03;
+                  *dp = (png_byte)value;
+                  if (shift == 6)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
+                  else
+                     shift += 2;
+
+                  dp--;
+               }
+               break;
+            }
+            case 4:
+            {
+               sp = row + (png_size_t)((row_width - 1) >> 1);
+               dp = row + (png_size_t)row_width - 1;
+               shift = (int)((row_width & 0x01) << 2);
+               for (i = 0; i < row_width; i++)
+               {
+                  value = (*sp >> shift) & 0x0f;
+                  *dp = (png_byte)value;
+                  if (shift == 4)
+                  {
+                     shift = 0;
+                     sp--;
+                  }
+                  else
+                     shift += 4;
+
+                  dp--;
+               }
+               break;
+            }
+         }
+         row_info->bit_depth = 8;
+         row_info->pixel_depth = 8;
+         row_info->rowbytes = row_width;
+      }
+      switch (row_info->bit_depth)
+      {
+         case 8:
+         {
+            if (trans != NULL)
+            {
+               sp = row + (png_size_t)row_width - 1;
+               dp = row + (png_size_t)(row_width << 2) - 1;
+
+               for (i = 0; i < row_width; i++)
+               {
+                  if ((int)(*sp) >= num_trans)
+                     *dp-- = 0xff;
+                  else
+                     *dp-- = trans[*sp];
+                  *dp-- = palette[*sp].blue;
+                  *dp-- = palette[*sp].green;
+                  *dp-- = palette[*sp].red;
+                  sp--;
+               }
+               row_info->bit_depth = 8;
+               row_info->pixel_depth = 32;
+               row_info->rowbytes = row_width * 4;
+               row_info->color_type = 6;
+               row_info->channels = 4;
+            }
+            else
+            {
+               sp = row + (png_size_t)row_width - 1;
+               dp = row + (png_size_t)(row_width * 3) - 1;
+
+               for (i = 0; i < row_width; i++)
+               {
+                  *dp-- = palette[*sp].blue;
+                  *dp-- = palette[*sp].green;
+                  *dp-- = palette[*sp].red;
+                  sp--;
+               }
+               row_info->bit_depth = 8;
+               row_info->pixel_depth = 24;
+               row_info->rowbytes = row_width * 3;
+               row_info->color_type = 2;
+               row_info->channels = 3;
+            }
+            break;
+         }
+      }
+   }
+}
+
+/* If the bit depth < 8, it is expanded to 8.  Also, if the already
+ * expanded transparency value is supplied, an alpha channel is built.
+ */
+void /* PRIVATE */
+png_do_expand(png_row_infop row_info, png_bytep row,
+   png_color_16p trans_value)
+{
+   int shift, value;
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_expand\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         png_uint_16 gray = (png_uint_16)(trans_value ? trans_value->gray : 0);
+
+         if (row_info->bit_depth < 8)
+         {
+            switch (row_info->bit_depth)
+            {
+               case 1:
+               {
+                  gray = (png_uint_16)((gray&0x01)*0xff);
+                  sp = row + (png_size_t)((row_width - 1) >> 3);
+                  dp = row + (png_size_t)row_width - 1;
+                  shift = 7 - (int)((row_width + 7) & 0x07);
+                  for (i = 0; i < row_width; i++)
+                  {
+                     if ((*sp >> shift) & 0x01)
+                        *dp = 0xff;
+                     else
+                        *dp = 0;
+                     if (shift == 7)
+                     {
+                        shift = 0;
+                        sp--;
+                     }
+                     else
+                        shift++;
+
+                     dp--;
+                  }
+                  break;
+               }
+               case 2:
+               {
+                  gray = (png_uint_16)((gray&0x03)*0x55);
+                  sp = row + (png_size_t)((row_width - 1) >> 2);
+                  dp = row + (png_size_t)row_width - 1;
+                  shift = (int)((3 - ((row_width + 3) & 0x03)) << 1);
+                  for (i = 0; i < row_width; i++)
+                  {
+                     value = (*sp >> shift) & 0x03;
+                     *dp = (png_byte)(value | (value << 2) | (value << 4) |
+                        (value << 6));
+                     if (shift == 6)
+                     {
+                        shift = 0;
+                        sp--;
+                     }
+                     else
+                        shift += 2;
+
+                     dp--;
+                  }
+                  break;
+               }
+               case 4:
+               {
+                  gray = (png_uint_16)((gray&0x0f)*0x11);
+                  sp = row + (png_size_t)((row_width - 1) >> 1);
+                  dp = row + (png_size_t)row_width - 1;
+                  shift = (int)((1 - ((row_width + 1) & 0x01)) << 2);
+                  for (i = 0; i < row_width; i++)
+                  {
+                     value = (*sp >> shift) & 0x0f;
+                     *dp = (png_byte)(value | (value << 4));
+                     if (shift == 4)
+                     {
+                        shift = 0;
+                        sp--;
+                     }
+                     else
+                        shift = 4;
+
+                     dp--;
+                  }
+                  break;
+               }
+            }
+            row_info->bit_depth = 8;
+            row_info->pixel_depth = 8;
+            row_info->rowbytes = row_width;
+         }
+
+         if (trans_value != NULL)
+         {
+            if (row_info->bit_depth == 8)
+            {
+               gray = gray & 0xff;
+               sp = row + (png_size_t)row_width - 1;
+               dp = row + (png_size_t)(row_width << 1) - 1;
+               for (i = 0; i < row_width; i++)
+               {
+                  if (*sp == gray)
+                     *dp-- = 0;
+                  else
+                     *dp-- = 0xff;
+                  *dp-- = *sp--;
+               }
+            }
+            else if (row_info->bit_depth == 16)
+            {
+               png_byte gray_high = (gray >> 8) & 0xff;
+               png_byte gray_low = gray & 0xff;
+               sp = row + row_info->rowbytes - 1;
+               dp = row + (row_info->rowbytes << 1) - 1;
+               for (i = 0; i < row_width; i++)
+               {
+                  if (*(sp-1) == gray_high && *(sp) == gray_low) 
+                  {
+                     *dp-- = 0;
+                     *dp-- = 0;
+                  }
+                  else
+                  {
+                     *dp-- = 0xff;
+                     *dp-- = 0xff;
+                  }
+                  *dp-- = *sp--;
+                  *dp-- = *sp--;
+               }
+            }
+            row_info->color_type = PNG_COLOR_TYPE_GRAY_ALPHA;
+            row_info->channels = 2;
+            row_info->pixel_depth = (png_byte)(row_info->bit_depth << 1);
+            row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+               row_width);
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_RGB && trans_value)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            png_byte red = trans_value->red & 0xff;
+            png_byte green = trans_value->green & 0xff;
+            png_byte blue = trans_value->blue & 0xff;
+            sp = row + (png_size_t)row_info->rowbytes - 1;
+            dp = row + (png_size_t)(row_width << 2) - 1;
+            for (i = 0; i < row_width; i++)
+            {
+               if (*(sp - 2) == red && *(sp - 1) == green && *(sp) == blue)
+                  *dp-- = 0;
+               else
+                  *dp-- = 0xff;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+            }
+         }
+         else if (row_info->bit_depth == 16)
+         {
+            png_byte red_high = (trans_value->red >> 8) & 0xff;
+            png_byte green_high = (trans_value->green >> 8) & 0xff;
+            png_byte blue_high = (trans_value->blue >> 8) & 0xff;
+            png_byte red_low = trans_value->red & 0xff;
+            png_byte green_low = trans_value->green & 0xff;
+            png_byte blue_low = trans_value->blue & 0xff;
+            sp = row + row_info->rowbytes - 1;
+            dp = row + (png_size_t)(row_width << 3) - 1;
+            for (i = 0; i < row_width; i++)
+            {
+               if (*(sp - 5) == red_high &&
+                  *(sp - 4) == red_low &&
+                  *(sp - 3) == green_high &&
+                  *(sp - 2) == green_low &&
+                  *(sp - 1) == blue_high &&
+                  *(sp    ) == blue_low)
+               {
+                  *dp-- = 0;
+                  *dp-- = 0;
+               }
+               else
+               {
+                  *dp-- = 0xff;
+                  *dp-- = 0xff;
+               }
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+               *dp-- = *sp--;
+            }
+         }
+         row_info->color_type = PNG_COLOR_TYPE_RGB_ALPHA;
+         row_info->channels = 4;
+         row_info->pixel_depth = (png_byte)(row_info->bit_depth << 2);
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_DITHER_SUPPORTED)
+void /* PRIVATE */
+png_do_dither(png_row_infop row_info, png_bytep row,
+    png_bytep palette_lookup, png_bytep dither_lookup)
+{
+   png_bytep sp, dp;
+   png_uint_32 i;
+   png_uint_32 row_width=row_info->width;
+
+   png_debug(1, "in png_do_dither\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB &&
+         palette_lookup && row_info->bit_depth == 8)
+      {
+         int r, g, b, p;
+         sp = row;
+         dp = row;
+         for (i = 0; i < row_width; i++)
+         {
+            r = *sp++;
+            g = *sp++;
+            b = *sp++;
+
+            /* this looks real messy, but the compiler will reduce
+               it down to a reasonable formula.  For example, with
+               5 bits per color, we get:
+               p = (((r >> 3) & 0x1f) << 10) |
+                  (((g >> 3) & 0x1f) << 5) |
+                  ((b >> 3) & 0x1f);
+               */
+            p = (((r >> (8 - PNG_DITHER_RED_BITS)) &
+               ((1 << PNG_DITHER_RED_BITS) - 1)) <<
+               (PNG_DITHER_GREEN_BITS + PNG_DITHER_BLUE_BITS)) |
+               (((g >> (8 - PNG_DITHER_GREEN_BITS)) &
+               ((1 << PNG_DITHER_GREEN_BITS) - 1)) <<
+               (PNG_DITHER_BLUE_BITS)) |
+               ((b >> (8 - PNG_DITHER_BLUE_BITS)) &
+               ((1 << PNG_DITHER_BLUE_BITS) - 1));
+
+            *dp++ = palette_lookup[p];
+         }
+         row_info->color_type = PNG_COLOR_TYPE_PALETTE;
+         row_info->channels = 1;
+         row_info->pixel_depth = row_info->bit_depth;
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
+         palette_lookup != NULL && row_info->bit_depth == 8)
+      {
+         int r, g, b, p;
+         sp = row;
+         dp = row;
+         for (i = 0; i < row_width; i++)
+         {
+            r = *sp++;
+            g = *sp++;
+            b = *sp++;
+            sp++;
+
+            p = (((r >> (8 - PNG_DITHER_RED_BITS)) &
+               ((1 << PNG_DITHER_RED_BITS) - 1)) <<
+               (PNG_DITHER_GREEN_BITS + PNG_DITHER_BLUE_BITS)) |
+               (((g >> (8 - PNG_DITHER_GREEN_BITS)) &
+               ((1 << PNG_DITHER_GREEN_BITS) - 1)) <<
+               (PNG_DITHER_BLUE_BITS)) |
+               ((b >> (8 - PNG_DITHER_BLUE_BITS)) &
+               ((1 << PNG_DITHER_BLUE_BITS) - 1));
+
+            *dp++ = palette_lookup[p];
+         }
+         row_info->color_type = PNG_COLOR_TYPE_PALETTE;
+         row_info->channels = 1;
+         row_info->pixel_depth = row_info->bit_depth;
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,row_width);
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_PALETTE &&
+         dither_lookup && row_info->bit_depth == 8)
+      {
+         sp = row;
+         for (i = 0; i < row_width; i++, sp++)
+         {
+            *sp = dither_lookup[*sp];
+         }
+      }
+   }
+}
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+#if defined(PNG_READ_GAMMA_SUPPORTED)
+static PNG_CONST int png_gamma_shift[] =
+   {0x10, 0x21, 0x42, 0x84, 0x110, 0x248, 0x550, 0xff0, 0x00};
+
+/* We build the 8- or 16-bit gamma tables here.  Note that for 16-bit
+ * tables, we don't make a full table if we are reducing to 8-bit in
+ * the future.  Note also how the gamma_16 tables are segmented so that
+ * we don't need to allocate > 64K chunks for a full 16-bit table.
+ */
+void /* PRIVATE */
+png_build_gamma_table(png_structp png_ptr)
+{
+  png_debug(1, "in png_build_gamma_table\n");
+
+  if (png_ptr->bit_depth <= 8)
+  {
+     int i;
+     double g;
+
+     if (png_ptr->screen_gamma > .000001)
+        g = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+     else
+        g = 1.0;
+
+     png_ptr->gamma_table = (png_bytep)png_malloc(png_ptr,
+        (png_uint_32)256);
+
+     for (i = 0; i < 256; i++)
+     {
+        png_ptr->gamma_table[i] = (png_byte)(pow((double)i / 255.0,
+           g) * 255.0 + .5);
+     }
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+     if (png_ptr->transformations & ((PNG_BACKGROUND) | PNG_RGB_TO_GRAY))
+     {
+
+        g = 1.0 / (png_ptr->gamma);
+
+        png_ptr->gamma_to_1 = (png_bytep)png_malloc(png_ptr,
+           (png_uint_32)256);
+
+        for (i = 0; i < 256; i++)
+        {
+           png_ptr->gamma_to_1[i] = (png_byte)(pow((double)i / 255.0,
+              g) * 255.0 + .5);
+        }
+
+
+        png_ptr->gamma_from_1 = (png_bytep)png_malloc(png_ptr,
+           (png_uint_32)256);
+
+        if(png_ptr->screen_gamma > 0.000001)
+           g = 1.0 / png_ptr->screen_gamma;
+        else
+           g = png_ptr->gamma;   /* probably doing rgb_to_gray */
+
+        for (i = 0; i < 256; i++)
+        {
+           png_ptr->gamma_from_1[i] = (png_byte)(pow((double)i / 255.0,
+              g) * 255.0 + .5);
+
+        }
+     }
+#endif /* PNG_READ_BACKGROUND_SUPPORTED || PNG_RGB_TO_GRAY_SUPPORTED */
+  }
+  else
+  {
+     double g;
+     int i, j, shift, num;
+     int sig_bit;
+     png_uint_32 ig;
+
+     if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
+     {
+        sig_bit = (int)png_ptr->sig_bit.red;
+        if ((int)png_ptr->sig_bit.green > sig_bit)
+           sig_bit = png_ptr->sig_bit.green;
+        if ((int)png_ptr->sig_bit.blue > sig_bit)
+           sig_bit = png_ptr->sig_bit.blue;
+     }
+     else
+     {
+        sig_bit = (int)png_ptr->sig_bit.gray;
+     }
+
+     if (sig_bit > 0)
+        shift = 16 - sig_bit;
+     else
+        shift = 0;
+
+     if (png_ptr->transformations & PNG_16_TO_8)
+     {
+        if (shift < (16 - PNG_MAX_GAMMA_8))
+           shift = (16 - PNG_MAX_GAMMA_8);
+     }
+
+     if (shift > 8)
+        shift = 8;
+     if (shift < 0)
+        shift = 0;
+
+     png_ptr->gamma_shift = (png_byte)shift;
+
+     num = (1 << (8 - shift));
+
+     if (png_ptr->screen_gamma > .000001)
+        g = 1.0 / (png_ptr->gamma * png_ptr->screen_gamma);
+     else
+        g = 1.0;
+
+     png_ptr->gamma_16_table = (png_uint_16pp)png_malloc(png_ptr,
+        (png_uint_32)(num * png_sizeof (png_uint_16p)));
+
+     if (png_ptr->transformations & (PNG_16_TO_8 | PNG_BACKGROUND))
+     {
+        double fin, fout;
+        png_uint_32 last, max;
+
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_table[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+        }
+
+        g = 1.0 / g;
+        last = 0;
+        for (i = 0; i < 256; i++)
+        {
+           fout = ((double)i + 0.5) / 256.0;
+           fin = pow(fout, g);
+           max = (png_uint_32)(fin * (double)((png_uint_32)num << 8));
+           while (last <= max)
+           {
+              png_ptr->gamma_16_table[(int)(last & (0xff >> shift))]
+                 [(int)(last >> (8 - shift))] = (png_uint_16)(
+                 (png_uint_16)i | ((png_uint_16)i << 8));
+              last++;
+           }
+        }
+        while (last < ((png_uint_32)num << 8))
+        {
+           png_ptr->gamma_16_table[(int)(last & (0xff >> shift))]
+              [(int)(last >> (8 - shift))] = (png_uint_16)65535L;
+           last++;
+        }
+     }
+     else
+     {
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_table[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+
+           ig = (((png_uint_32)i * (png_uint_32)png_gamma_shift[shift]) >> 4);
+           for (j = 0; j < 256; j++)
+           {
+              png_ptr->gamma_16_table[i][j] =
+                 (png_uint_16)(pow((double)(ig + ((png_uint_32)j << 8)) /
+                    65535.0, g) * 65535.0 + .5);
+           }
+        }
+     }
+
+#if defined(PNG_READ_BACKGROUND_SUPPORTED) || \
+   defined(PNG_READ_RGB_TO_GRAY_SUPPORTED)
+     if (png_ptr->transformations & (PNG_BACKGROUND | PNG_RGB_TO_GRAY))
+     {
+
+        g = 1.0 / (png_ptr->gamma);
+
+        png_ptr->gamma_16_to_1 = (png_uint_16pp)png_malloc(png_ptr,
+           (png_uint_32)(num * png_sizeof (png_uint_16p )));
+
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_to_1[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+
+           ig = (((png_uint_32)i *
+              (png_uint_32)png_gamma_shift[shift]) >> 4);
+           for (j = 0; j < 256; j++)
+           {
+              png_ptr->gamma_16_to_1[i][j] =
+                 (png_uint_16)(pow((double)(ig + ((png_uint_32)j << 8)) /
+                    65535.0, g) * 65535.0 + .5);
+           }
+        }
+
+        if(png_ptr->screen_gamma > 0.000001)
+           g = 1.0 / png_ptr->screen_gamma;
+        else
+           g = png_ptr->gamma;   /* probably doing rgb_to_gray */
+
+        png_ptr->gamma_16_from_1 = (png_uint_16pp)png_malloc(png_ptr,
+           (png_uint_32)(num * png_sizeof (png_uint_16p)));
+
+        for (i = 0; i < num; i++)
+        {
+           png_ptr->gamma_16_from_1[i] = (png_uint_16p)png_malloc(png_ptr,
+              (png_uint_32)(256 * png_sizeof (png_uint_16)));
+
+           ig = (((png_uint_32)i *
+              (png_uint_32)png_gamma_shift[shift]) >> 4);
+           for (j = 0; j < 256; j++)
+           {
+              png_ptr->gamma_16_from_1[i][j] =
+                 (png_uint_16)(pow((double)(ig + ((png_uint_32)j << 8)) /
+                    65535.0, g) * 65535.0 + .5);
+           }
+        }
+     }
+#endif /* PNG_READ_BACKGROUND_SUPPORTED || PNG_RGB_TO_GRAY_SUPPORTED */
+  }
+}
+#endif
+/* To do: install integer version of png_build_gamma_table here */
+#endif
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+/* undoes intrapixel differencing  */
+void /* PRIVATE */
+png_do_read_intrapixel(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_read_intrapixel\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      int bytes_per_pixel;
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 3;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 4;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            *(rp) = (png_byte)((256 + *rp + *(rp+1))&0xff);
+            *(rp+2) = (png_byte)((256 + *(rp+2) + *(rp+1))&0xff);
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 6;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 8;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            png_uint_32 s0   = (*(rp  ) << 8) | *(rp+1);
+            png_uint_32 s1   = (*(rp+2) << 8) | *(rp+3);
+            png_uint_32 s2   = (*(rp+4) << 8) | *(rp+5);
+            png_uint_32 red  = (png_uint_32)((s0+s1+65536L) & 0xffffL);
+            png_uint_32 blue = (png_uint_32)((s2+s1+65536L) & 0xffffL);
+            *(rp  ) = (png_byte)((red >> 8) & 0xff);
+            *(rp+1) = (png_byte)(red & 0xff);
+            *(rp+4) = (png_byte)((blue >> 8) & 0xff);
+            *(rp+5) = (png_byte)(blue & 0xff);
+         }
+      }
+   }
+}
+#endif /* PNG_MNG_FEATURES_SUPPORTED */
+#endif /* PNG_READ_SUPPORTED */
diff --git a/src/libpng/pngrutil.c b/src/libpng/pngrutil.c
new file mode 100644
index 0000000..ad4f1a9
--- /dev/null
+++ b/src/libpng/pngrutil.c
@@ -0,0 +1,3163 @@
+
+/* pngrutil.c - utilities to read a PNG file
+ *
+ * Last changed in libpng 1.2.22 [October 13, 2007]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file contains routines that are only called from within
+ * libpng itself during the course of reading an image.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED)
+
+#if defined(_WIN32_WCE) && (_WIN32_WCE<0x500)
+#  define WIN32_WCE_OLD
+#endif
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+#  if defined(WIN32_WCE_OLD)
+/* strtod() function is not supported on WindowsCE */
+__inline double png_strtod(png_structp png_ptr, PNG_CONST char *nptr, char **endptr)
+{
+   double result = 0;
+   int len;
+   wchar_t *str, *end;
+
+   len = MultiByteToWideChar(CP_ACP, 0, nptr, -1, NULL, 0);
+   str = (wchar_t *)png_malloc(png_ptr, len * sizeof(wchar_t));
+   if ( NULL != str )
+   {
+      MultiByteToWideChar(CP_ACP, 0, nptr, -1, str, len);
+      result = wcstod(str, &end);
+      len = WideCharToMultiByte(CP_ACP, 0, end, -1, NULL, 0, NULL, NULL);
+      *endptr = (char *)nptr + (png_strlen(nptr) - len + 1);
+      png_free(png_ptr, str);
+   }
+   return result;
+}
+#  else
+#    define png_strtod(p,a,b) strtod(a,b)
+#  endif
+#endif
+
+png_uint_32 PNGAPI
+png_get_uint_31(png_structp png_ptr, png_bytep buf)
+{
+   png_uint_32 i = png_get_uint_32(buf);
+   if (i > PNG_UINT_31_MAX)
+     png_error(png_ptr, "PNG unsigned integer out of range.");
+   return (i);
+}
+#ifndef PNG_READ_BIG_ENDIAN_SUPPORTED
+/* Grab an unsigned 32-bit integer from a buffer in big-endian format. */
+png_uint_32 PNGAPI
+png_get_uint_32(png_bytep buf)
+{
+   png_uint_32 i = ((png_uint_32)(*buf) << 24) +
+      ((png_uint_32)(*(buf + 1)) << 16) +
+      ((png_uint_32)(*(buf + 2)) << 8) +
+      (png_uint_32)(*(buf + 3));
+
+   return (i);
+}
+
+/* Grab a signed 32-bit integer from a buffer in big-endian format.  The
+ * data is stored in the PNG file in two's complement format, and it is
+ * assumed that the machine format for signed integers is the same. */
+png_int_32 PNGAPI
+png_get_int_32(png_bytep buf)
+{
+   png_int_32 i = ((png_int_32)(*buf) << 24) +
+      ((png_int_32)(*(buf + 1)) << 16) +
+      ((png_int_32)(*(buf + 2)) << 8) +
+      (png_int_32)(*(buf + 3));
+
+   return (i);
+}
+
+/* Grab an unsigned 16-bit integer from a buffer in big-endian format. */
+png_uint_16 PNGAPI
+png_get_uint_16(png_bytep buf)
+{
+   png_uint_16 i = (png_uint_16)(((png_uint_16)(*buf) << 8) +
+      (png_uint_16)(*(buf + 1)));
+
+   return (i);
+}
+#endif /* PNG_READ_BIG_ENDIAN_SUPPORTED */
+
+/* Read data, and (optionally) run it through the CRC. */
+void /* PRIVATE */
+png_crc_read(png_structp png_ptr, png_bytep buf, png_size_t length)
+{
+   if(png_ptr == NULL) return;
+   png_read_data(png_ptr, buf, length);
+   png_calculate_crc(png_ptr, buf, length);
+}
+
+/* Optionally skip data and then check the CRC.  Depending on whether we
+   are reading a ancillary or critical chunk, and how the program has set
+   things up, we may calculate the CRC on the data and print a message.
+   Returns '1' if there was a CRC error, '0' otherwise. */
+int /* PRIVATE */
+png_crc_finish(png_structp png_ptr, png_uint_32 skip)
+{
+   png_size_t i;
+   png_size_t istop = png_ptr->zbuf_size;
+
+   for (i = (png_size_t)skip; i > istop; i -= istop)
+   {
+      png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
+   }
+   if (i)
+   {
+      png_crc_read(png_ptr, png_ptr->zbuf, i);
+   }
+
+   if (png_crc_error(png_ptr))
+   {
+      if (((png_ptr->chunk_name[0] & 0x20) &&                /* Ancillary */
+           !(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN)) ||
+          (!(png_ptr->chunk_name[0] & 0x20) &&             /* Critical  */
+          (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_USE)))
+      {
+         png_chunk_warning(png_ptr, "CRC error");
+      }
+      else
+      {
+         png_chunk_error(png_ptr, "CRC error");
+      }
+      return (1);
+   }
+
+   return (0);
+}
+
+/* Compare the CRC stored in the PNG file with that calculated by libpng from
+   the data it has read thus far. */
+int /* PRIVATE */
+png_crc_error(png_structp png_ptr)
+{
+   png_byte crc_bytes[4];
+   png_uint_32 crc;
+   int need_crc = 1;
+
+   if (png_ptr->chunk_name[0] & 0x20)                     /* ancillary */
+   {
+      if ((png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_MASK) ==
+          (PNG_FLAG_CRC_ANCILLARY_USE | PNG_FLAG_CRC_ANCILLARY_NOWARN))
+         need_crc = 0;
+   }
+   else                                                    /* critical */
+   {
+      if (png_ptr->flags & PNG_FLAG_CRC_CRITICAL_IGNORE)
+         need_crc = 0;
+   }
+
+   png_read_data(png_ptr, crc_bytes, 4);
+
+   if (need_crc)
+   {
+      crc = png_get_uint_32(crc_bytes);
+      return ((int)(crc != png_ptr->crc));
+   }
+   else
+      return (0);
+}
+
+#if defined(PNG_READ_zTXt_SUPPORTED) || defined(PNG_READ_iTXt_SUPPORTED) || \
+    defined(PNG_READ_iCCP_SUPPORTED)
+/*
+ * Decompress trailing data in a chunk.  The assumption is that chunkdata
+ * points at an allocated area holding the contents of a chunk with a
+ * trailing compressed part.  What we get back is an allocated area
+ * holding the original prefix part and an uncompressed version of the
+ * trailing part (the malloc area passed in is freed).
+ */
+png_charp /* PRIVATE */
+png_decompress_chunk(png_structp png_ptr, int comp_type,
+                              png_charp chunkdata, png_size_t chunklength,
+                              png_size_t prefix_size, png_size_t *newlength)
+{
+   static PNG_CONST char msg[] = "Error decoding compressed text";
+   png_charp text;
+   png_size_t text_size;
+
+   if (comp_type == PNG_COMPRESSION_TYPE_BASE)
+   {
+      int ret = Z_OK;
+      png_ptr->zstream.next_in = (png_bytep)(chunkdata + prefix_size);
+      png_ptr->zstream.avail_in = (uInt)(chunklength - prefix_size);
+      png_ptr->zstream.next_out = png_ptr->zbuf;
+      png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+
+      text_size = 0;
+      text = NULL;
+
+      while (png_ptr->zstream.avail_in)
+      {
+         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+         if (ret != Z_OK && ret != Z_STREAM_END)
+         {
+            if (png_ptr->zstream.msg != NULL)
+               png_warning(png_ptr, png_ptr->zstream.msg);
+            else
+               png_warning(png_ptr, msg);
+            inflateReset(&png_ptr->zstream);
+            png_ptr->zstream.avail_in = 0;
+
+            if (text ==  NULL)
+            {
+               text_size = prefix_size + png_sizeof(msg) + 1;
+               text = (png_charp)png_malloc_warn(png_ptr, text_size);
+               if (text ==  NULL)
+                 {
+                    png_free(png_ptr,chunkdata);
+                    png_error(png_ptr,"Not enough memory to decompress chunk");
+                 }
+               png_memcpy(text, chunkdata, prefix_size);
+            }
+
+            text[text_size - 1] = 0x00;
+
+            /* Copy what we can of the error message into the text chunk */
+            text_size = (png_size_t)(chunklength - (text - chunkdata) - 1);
+            text_size = png_sizeof(msg) > text_size ? text_size :
+               png_sizeof(msg);
+            png_memcpy(text + prefix_size, msg, text_size + 1);
+            break;
+         }
+         if (!png_ptr->zstream.avail_out || ret == Z_STREAM_END)
+         {
+            if (text == NULL)
+            {
+               text_size = prefix_size +
+                   png_ptr->zbuf_size - png_ptr->zstream.avail_out;
+               text = (png_charp)png_malloc_warn(png_ptr, text_size + 1);
+               if (text ==  NULL)
+                 {
+                    png_free(png_ptr,chunkdata);
+                    png_error(png_ptr,"Not enough memory to decompress chunk.");
+                 }
+               png_memcpy(text + prefix_size, png_ptr->zbuf,
+                    text_size - prefix_size);
+               png_memcpy(text, chunkdata, prefix_size);
+               *(text + text_size) = 0x00;
+            }
+            else
+            {
+               png_charp tmp;
+
+               tmp = text;
+               text = (png_charp)png_malloc_warn(png_ptr,
+                  (png_uint_32)(text_size +
+                  png_ptr->zbuf_size - png_ptr->zstream.avail_out + 1));
+               if (text == NULL)
+               {
+                  png_free(png_ptr, tmp);
+                  png_free(png_ptr, chunkdata);
+                  png_error(png_ptr,"Not enough memory to decompress chunk..");
+               }
+               png_memcpy(text, tmp, text_size);
+               png_free(png_ptr, tmp);
+               png_memcpy(text + text_size, png_ptr->zbuf,
+                  (png_ptr->zbuf_size - png_ptr->zstream.avail_out));
+               text_size += png_ptr->zbuf_size - png_ptr->zstream.avail_out;
+               *(text + text_size) = 0x00;
+            }
+            if (ret == Z_STREAM_END)
+               break;
+            else
+            {
+               png_ptr->zstream.next_out = png_ptr->zbuf;
+               png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+            }
+         }
+      }
+      if (ret != Z_STREAM_END)
+      {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+         char umsg[52];
+
+         if (ret == Z_BUF_ERROR)
+            png_snprintf(umsg, 52,
+                "Buffer error in compressed datastream in %s chunk",
+                png_ptr->chunk_name);
+         else if (ret == Z_DATA_ERROR)
+            png_snprintf(umsg, 52,
+                "Data error in compressed datastream in %s chunk",
+                png_ptr->chunk_name);
+         else
+            png_snprintf(umsg, 52,
+                "Incomplete compressed datastream in %s chunk",
+                png_ptr->chunk_name);
+         png_warning(png_ptr, umsg);
+#else
+         png_warning(png_ptr,
+            "Incomplete compressed datastream in chunk other than IDAT");
+#endif
+         text_size=prefix_size;
+         if (text ==  NULL)
+         {
+            text = (png_charp)png_malloc_warn(png_ptr, text_size+1);
+            if (text == NULL)
+              {
+                png_free(png_ptr, chunkdata);
+                png_error(png_ptr,"Not enough memory for text.");
+              }
+            png_memcpy(text, chunkdata, prefix_size);
+         }
+         *(text + text_size) = 0x00;
+      }
+
+      inflateReset(&png_ptr->zstream);
+      png_ptr->zstream.avail_in = 0;
+
+      png_free(png_ptr, chunkdata);
+      chunkdata = text;
+      *newlength=text_size;
+   }
+   else /* if (comp_type != PNG_COMPRESSION_TYPE_BASE) */
+   {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+      char umsg[50];
+
+      png_snprintf(umsg, 50,
+         "Unknown zTXt compression type %d", comp_type);
+      png_warning(png_ptr, umsg);
+#else
+      png_warning(png_ptr, "Unknown zTXt compression type");
+#endif
+
+      *(chunkdata + prefix_size) = 0x00;
+      *newlength=prefix_size;
+   }
+
+   return chunkdata;
+}
+#endif
+
+/* read and check the IDHR chunk */
+void /* PRIVATE */
+png_handle_IHDR(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[13];
+   png_uint_32 width, height;
+   int bit_depth, color_type, compression_type, filter_type;
+   int interlace_type;
+
+   png_debug(1, "in png_handle_IHDR\n");
+
+   if (png_ptr->mode & PNG_HAVE_IHDR)
+      png_error(png_ptr, "Out of place IHDR");
+
+   /* check the length */
+   if (length != 13)
+      png_error(png_ptr, "Invalid IHDR chunk");
+
+   png_ptr->mode |= PNG_HAVE_IHDR;
+
+   png_crc_read(png_ptr, buf, 13);
+   png_crc_finish(png_ptr, 0);
+
+   width = png_get_uint_31(png_ptr, buf);
+   height = png_get_uint_31(png_ptr, buf + 4);
+   bit_depth = buf[8];
+   color_type = buf[9];
+   compression_type = buf[10];
+   filter_type = buf[11];
+   interlace_type = buf[12];
+
+   /* set internal variables */
+   png_ptr->width = width;
+   png_ptr->height = height;
+   png_ptr->bit_depth = (png_byte)bit_depth;
+   png_ptr->interlaced = (png_byte)interlace_type;
+   png_ptr->color_type = (png_byte)color_type;
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   png_ptr->filter_type = (png_byte)filter_type;
+#endif
+   png_ptr->compression_type = (png_byte)compression_type;
+
+   /* find number of channels */
+   switch (png_ptr->color_type)
+   {
+      case PNG_COLOR_TYPE_GRAY:
+      case PNG_COLOR_TYPE_PALETTE:
+         png_ptr->channels = 1;
+         break;
+      case PNG_COLOR_TYPE_RGB:
+         png_ptr->channels = 3;
+         break;
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         png_ptr->channels = 2;
+         break;
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         png_ptr->channels = 4;
+         break;
+   }
+
+   /* set up other useful info */
+   png_ptr->pixel_depth = (png_byte)(png_ptr->bit_depth *
+   png_ptr->channels);
+   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth,png_ptr->width);
+   png_debug1(3,"bit_depth = %d\n", png_ptr->bit_depth);
+   png_debug1(3,"channels = %d\n", png_ptr->channels);
+   png_debug1(3,"rowbytes = %lu\n", png_ptr->rowbytes);
+   png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth,
+      color_type, interlace_type, compression_type, filter_type);
+}
+
+/* read and check the palette */
+void /* PRIVATE */
+png_handle_PLTE(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_color palette[PNG_MAX_PALETTE_LENGTH];
+   int num, i;
+#ifndef PNG_NO_POINTER_INDEXING
+   png_colorp pal_ptr;
+#endif
+
+   png_debug(1, "in png_handle_PLTE\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before PLTE");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid PLTE after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      png_error(png_ptr, "Duplicate PLTE chunk");
+
+   png_ptr->mode |= PNG_HAVE_PLTE;
+
+   if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR))
+   {
+      png_warning(png_ptr,
+        "Ignoring PLTE chunk in grayscale PNG");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+#if !defined(PNG_READ_OPT_PLTE_SUPPORTED)
+   if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+#endif
+
+   if (length > 3*PNG_MAX_PALETTE_LENGTH || length % 3)
+   {
+      if (png_ptr->color_type != PNG_COLOR_TYPE_PALETTE)
+      {
+         png_warning(png_ptr, "Invalid palette chunk");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      else
+      {
+         png_error(png_ptr, "Invalid palette chunk");
+      }
+   }
+
+   num = (int)length / 3;
+
+#ifndef PNG_NO_POINTER_INDEXING
+   for (i = 0, pal_ptr = palette; i < num; i++, pal_ptr++)
+   {
+      png_byte buf[3];
+
+      png_crc_read(png_ptr, buf, 3);
+      pal_ptr->red = buf[0];
+      pal_ptr->green = buf[1];
+      pal_ptr->blue = buf[2];
+   }
+#else
+   for (i = 0; i < num; i++)
+   {
+      png_byte buf[3];
+
+      png_crc_read(png_ptr, buf, 3);
+      /* don't depend upon png_color being any order */
+      palette[i].red = buf[0];
+      palette[i].green = buf[1];
+      palette[i].blue = buf[2];
+   }
+#endif
+
+   /* If we actually NEED the PLTE chunk (ie for a paletted image), we do
+      whatever the normal CRC configuration tells us.  However, if we
+      have an RGB image, the PLTE can be considered ancillary, so
+      we will act as though it is. */
+#if !defined(PNG_READ_OPT_PLTE_SUPPORTED)
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+#endif
+   {
+      png_crc_finish(png_ptr, 0);
+   }
+#if !defined(PNG_READ_OPT_PLTE_SUPPORTED)
+   else if (png_crc_error(png_ptr))  /* Only if we have a CRC error */
+   {
+      /* If we don't want to use the data from an ancillary chunk,
+         we have two options: an error abort, or a warning and we
+         ignore the data in this chunk (which should be OK, since
+         it's considered ancillary for a RGB or RGBA image). */
+      if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_USE))
+      {
+         if (png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN)
+         {
+            png_chunk_error(png_ptr, "CRC error");
+         }
+         else
+         {
+            png_chunk_warning(png_ptr, "CRC error");
+            return;
+         }
+      }
+      /* Otherwise, we (optionally) emit a warning and use the chunk. */
+      else if (!(png_ptr->flags & PNG_FLAG_CRC_ANCILLARY_NOWARN))
+      {
+         png_chunk_warning(png_ptr, "CRC error");
+      }
+   }
+#endif
+
+   png_set_PLTE(png_ptr, info_ptr, palette, num);
+
+#if defined(PNG_READ_tRNS_SUPPORTED)
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
+      {
+         if (png_ptr->num_trans > (png_uint_16)num)
+         {
+            png_warning(png_ptr, "Truncating incorrect tRNS chunk length");
+            png_ptr->num_trans = (png_uint_16)num;
+         }
+         if (info_ptr->num_trans > (png_uint_16)num)
+         {
+            png_warning(png_ptr, "Truncating incorrect info tRNS chunk length");
+            info_ptr->num_trans = (png_uint_16)num;
+         }
+      }
+   }
+#endif
+
+}
+
+void /* PRIVATE */
+png_handle_IEND(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_debug(1, "in png_handle_IEND\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR) || !(png_ptr->mode & PNG_HAVE_IDAT))
+   {
+      png_error(png_ptr, "No image in file");
+   }
+
+   png_ptr->mode |= (PNG_AFTER_IDAT | PNG_HAVE_IEND);
+
+   if (length != 0)
+   {
+      png_warning(png_ptr, "Incorrect IEND chunk length");
+   }
+   png_crc_finish(png_ptr, length);
+
+   info_ptr =info_ptr; /* quiet compiler warnings about unused info_ptr */
+}
+
+#if defined(PNG_READ_gAMA_SUPPORTED)
+void /* PRIVATE */
+png_handle_gAMA(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_fixed_point igamma;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float file_gamma;
+#endif
+   png_byte buf[4];
+
+   png_debug(1, "in png_handle_gAMA\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before gAMA");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid gAMA after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place gAMA chunk");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA)
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      && !(info_ptr->valid & PNG_INFO_sRGB)
+#endif
+      )
+   {
+      png_warning(png_ptr, "Duplicate gAMA chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 4)
+   {
+      png_warning(png_ptr, "Incorrect gAMA chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 4);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   igamma = (png_fixed_point)png_get_uint_32(buf);
+   /* check for zero gamma */
+   if (igamma == 0)
+      {
+         png_warning(png_ptr,
+           "Ignoring gAMA chunk with gamma=0");
+         return;
+      }
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB))
+      if (PNG_OUT_OF_RANGE(igamma, 45500L, 500))
+      {
+         png_warning(png_ptr,
+           "Ignoring incorrect gAMA value when sRGB is also present");
+#ifndef PNG_NO_CONSOLE_IO
+         fprintf(stderr, "gamma = (%d/100000)\n", (int)igamma);
+#endif
+         return;
+      }
+#endif /* PNG_READ_sRGB_SUPPORTED */
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   file_gamma = (float)igamma / (float)100000.0;
+#  ifdef PNG_READ_GAMMA_SUPPORTED
+     png_ptr->gamma = file_gamma;
+#  endif
+     png_set_gAMA(png_ptr, info_ptr, file_gamma);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_gAMA_fixed(png_ptr, info_ptr, igamma);
+#endif
+}
+#endif
+
+#if defined(PNG_READ_sBIT_SUPPORTED)
+void /* PRIVATE */
+png_handle_sBIT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_size_t truelen;
+   png_byte buf[4];
+
+   png_debug(1, "in png_handle_sBIT\n");
+
+   buf[0] = buf[1] = buf[2] = buf[3] = 0;
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sBIT");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sBIT after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+   {
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place sBIT chunk");
+   }
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sBIT))
+   {
+      png_warning(png_ptr, "Duplicate sBIT chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      truelen = 3;
+   else
+      truelen = (png_size_t)png_ptr->channels;
+
+   if (length != truelen || length > 4)
+   {
+      png_warning(png_ptr, "Incorrect sBIT chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, truelen);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
+   {
+      png_ptr->sig_bit.red = buf[0];
+      png_ptr->sig_bit.green = buf[1];
+      png_ptr->sig_bit.blue = buf[2];
+      png_ptr->sig_bit.alpha = buf[3];
+   }
+   else
+   {
+      png_ptr->sig_bit.gray = buf[0];
+      png_ptr->sig_bit.red = buf[0];
+      png_ptr->sig_bit.green = buf[0];
+      png_ptr->sig_bit.blue = buf[0];
+      png_ptr->sig_bit.alpha = buf[1];
+   }
+   png_set_sBIT(png_ptr, info_ptr, &(png_ptr->sig_bit));
+}
+#endif
+
+#if defined(PNG_READ_cHRM_SUPPORTED)
+void /* PRIVATE */
+png_handle_cHRM(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[4];
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
+#endif
+   png_fixed_point int_x_white, int_y_white, int_x_red, int_y_red, int_x_green,
+      int_y_green, int_x_blue, int_y_blue;
+
+   png_uint_32 uint_x, uint_y;
+
+   png_debug(1, "in png_handle_cHRM\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before cHRM");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid cHRM after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Missing PLTE before cHRM");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM)
+#if defined(PNG_READ_sRGB_SUPPORTED)
+      && !(info_ptr->valid & PNG_INFO_sRGB)
+#endif
+      )
+   {
+      png_warning(png_ptr, "Duplicate cHRM chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 32)
+   {
+      png_warning(png_ptr, "Incorrect cHRM chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x > 80000L || uint_y > 80000L ||
+      uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM white point");
+      png_crc_finish(png_ptr, 24);
+      return;
+   }
+   int_x_white = (png_fixed_point)uint_x;
+   int_y_white = (png_fixed_point)uint_y;
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM red point");
+      png_crc_finish(png_ptr, 16);
+      return;
+   }
+   int_x_red = (png_fixed_point)uint_x;
+   int_y_red = (png_fixed_point)uint_y;
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM green point");
+      png_crc_finish(png_ptr, 8);
+      return;
+   }
+   int_x_green = (png_fixed_point)uint_x;
+   int_y_green = (png_fixed_point)uint_y;
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_x = png_get_uint_32(buf);
+
+   png_crc_read(png_ptr, buf, 4);
+   uint_y = png_get_uint_32(buf);
+
+   if (uint_x + uint_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM blue point");
+      png_crc_finish(png_ptr, 0);
+      return;
+   }
+   int_x_blue = (png_fixed_point)uint_x;
+   int_y_blue = (png_fixed_point)uint_y;
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   white_x = (float)int_x_white / (float)100000.0;
+   white_y = (float)int_y_white / (float)100000.0;
+   red_x   = (float)int_x_red   / (float)100000.0;
+   red_y   = (float)int_y_red   / (float)100000.0;
+   green_x = (float)int_x_green / (float)100000.0;
+   green_y = (float)int_y_green / (float)100000.0;
+   blue_x  = (float)int_x_blue  / (float)100000.0;
+   blue_y  = (float)int_y_blue  / (float)100000.0;
+#endif
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+   if ((info_ptr != NULL) && (info_ptr->valid & PNG_INFO_sRGB))
+      {
+      if (PNG_OUT_OF_RANGE(int_x_white, 31270,  1000) ||
+          PNG_OUT_OF_RANGE(int_y_white, 32900,  1000) ||
+          PNG_OUT_OF_RANGE(int_x_red,   64000L, 1000) ||
+          PNG_OUT_OF_RANGE(int_y_red,   33000,  1000) ||
+          PNG_OUT_OF_RANGE(int_x_green, 30000,  1000) ||
+          PNG_OUT_OF_RANGE(int_y_green, 60000L, 1000) ||
+          PNG_OUT_OF_RANGE(int_x_blue,  15000,  1000) ||
+          PNG_OUT_OF_RANGE(int_y_blue,   6000,  1000))
+         {
+            png_warning(png_ptr,
+              "Ignoring incorrect cHRM value when sRGB is also present");
+#ifndef PNG_NO_CONSOLE_IO
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+            fprintf(stderr,"wx=%f, wy=%f, rx=%f, ry=%f\n",
+               white_x, white_y, red_x, red_y);
+            fprintf(stderr,"gx=%f, gy=%f, bx=%f, by=%f\n",
+               green_x, green_y, blue_x, blue_y);
+#else
+            fprintf(stderr,"wx=%ld, wy=%ld, rx=%ld, ry=%ld\n",
+               int_x_white, int_y_white, int_x_red, int_y_red);
+            fprintf(stderr,"gx=%ld, gy=%ld, bx=%ld, by=%ld\n",
+               int_x_green, int_y_green, int_x_blue, int_y_blue);
+#endif
+#endif /* PNG_NO_CONSOLE_IO */
+         }
+         png_crc_finish(png_ptr, 0);
+         return;
+      }
+#endif /* PNG_READ_sRGB_SUPPORTED */
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   png_set_cHRM(png_ptr, info_ptr,
+      white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+      int_x_white, int_y_white, int_x_red, int_y_red, int_x_green,
+      int_y_green, int_x_blue, int_y_blue);
+#endif
+   if (png_crc_finish(png_ptr, 0))
+      return;
+}
+#endif
+
+#if defined(PNG_READ_sRGB_SUPPORTED)
+void /* PRIVATE */
+png_handle_sRGB(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   int intent;
+   png_byte buf[1];
+
+   png_debug(1, "in png_handle_sRGB\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sRGB");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sRGB after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place sRGB chunk");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sRGB))
+   {
+      png_warning(png_ptr, "Duplicate sRGB chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 1)
+   {
+      png_warning(png_ptr, "Incorrect sRGB chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 1);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   intent = buf[0];
+   /* check for bad intent */
+   if (intent >= PNG_sRGB_INTENT_LAST)
+   {
+      png_warning(png_ptr, "Unknown sRGB intent");
+      return;
+   }
+
+#if defined(PNG_READ_gAMA_SUPPORTED) && defined(PNG_READ_GAMMA_SUPPORTED)
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_gAMA))
+   {
+   png_fixed_point igamma;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      igamma=info_ptr->int_gamma;
+#else
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+      igamma=(png_fixed_point)(info_ptr->gamma * 100000.);
+#  endif
+#endif
+      if (PNG_OUT_OF_RANGE(igamma, 45500L, 500))
+      {
+         png_warning(png_ptr,
+           "Ignoring incorrect gAMA value when sRGB is also present");
+#ifndef PNG_NO_CONSOLE_IO
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+         fprintf(stderr,"incorrect gamma=(%d/100000)\n",(int)png_ptr->int_gamma);
+#  else
+#    ifdef PNG_FLOATING_POINT_SUPPORTED
+         fprintf(stderr,"incorrect gamma=%f\n",png_ptr->gamma);
+#    endif
+#  endif
+#endif
+      }
+   }
+#endif /* PNG_READ_gAMA_SUPPORTED */
+
+#ifdef PNG_READ_cHRM_SUPPORTED
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_cHRM))
+      if (PNG_OUT_OF_RANGE(info_ptr->int_x_white, 31270,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_white, 32900,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_x_red,   64000L, 1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_red,   33000,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_x_green, 30000,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_green, 60000L, 1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_x_blue,  15000,  1000) ||
+          PNG_OUT_OF_RANGE(info_ptr->int_y_blue,   6000,  1000))
+         {
+            png_warning(png_ptr,
+              "Ignoring incorrect cHRM value when sRGB is also present");
+         }
+#endif /* PNG_FIXED_POINT_SUPPORTED */
+#endif /* PNG_READ_cHRM_SUPPORTED */
+
+   png_set_sRGB_gAMA_and_cHRM(png_ptr, info_ptr, intent);
+}
+#endif /* PNG_READ_sRGB_SUPPORTED */
+
+#if defined(PNG_READ_iCCP_SUPPORTED)
+void /* PRIVATE */
+png_handle_iCCP(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+{
+   png_charp chunkdata;
+   png_byte compression_type;
+   png_bytep pC;
+   png_charp profile;
+   png_uint_32 skip = 0;
+   png_uint_32 profile_size, profile_length;
+   png_size_t slength, prefix_length, data_length;
+
+   png_debug(1, "in png_handle_iCCP\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before iCCP");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid iCCP after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->mode & PNG_HAVE_PLTE)
+      /* Should be an error, but we can cope with it */
+      png_warning(png_ptr, "Out of place iCCP chunk");
+
+   if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_iCCP))
+   {
+      png_warning(png_ptr, "Duplicate iCCP chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "iCCP chunk too large to fit in memory");
+      skip = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   chunkdata = (png_charp)png_malloc(png_ptr, length + 1);
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+
+   if (png_crc_finish(png_ptr, skip))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (profile = chunkdata; *profile; profile++)
+      /* empty loop to find end of name */ ;
+
+   ++profile;
+
+   /* there should be at least one zero (the compression type byte)
+      following the separator, and we should be on it  */
+   if ( profile >= chunkdata + slength - 1)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "Malformed iCCP chunk");
+      return;
+   }
+
+   /* compression_type should always be zero */
+   compression_type = *profile++;
+   if (compression_type)
+   {
+      png_warning(png_ptr, "Ignoring nonzero compression type in iCCP chunk");
+      compression_type=0x00;  /* Reset it to zero (libpng-1.0.6 through 1.0.8
+                                 wrote nonzero) */
+   }
+
+   prefix_length = profile - chunkdata;
+   chunkdata = png_decompress_chunk(png_ptr, compression_type, chunkdata,
+                                    slength, prefix_length, &data_length);
+
+   profile_length = data_length - prefix_length;
+
+   if ( prefix_length > data_length || profile_length < 4)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "Profile size field missing from iCCP chunk");
+      return;
+   }
+
+   /* Check the profile_size recorded in the first 32 bits of the ICC profile */
+   pC = (png_bytep)(chunkdata+prefix_length);
+   profile_size = ((*(pC  ))<<24) |
+                  ((*(pC+1))<<16) |
+                  ((*(pC+2))<< 8) |
+                  ((*(pC+3))    );
+
+   if(profile_size < profile_length)
+      profile_length = profile_size;
+
+   if(profile_size > profile_length)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "Ignoring truncated iCCP profile.");
+      return;
+   }
+
+   png_set_iCCP(png_ptr, info_ptr, chunkdata, compression_type,
+                chunkdata + prefix_length, profile_length);
+   png_free(png_ptr, chunkdata);
+}
+#endif /* PNG_READ_iCCP_SUPPORTED */
+
+#if defined(PNG_READ_sPLT_SUPPORTED)
+void /* PRIVATE */
+png_handle_sPLT(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+{
+   png_bytep chunkdata;
+   png_bytep entry_start;
+   png_sPLT_t new_palette;
+#ifdef PNG_NO_POINTER_INDEXING
+   png_sPLT_entryp pp;
+#endif
+   int data_length, entry_size, i;
+   png_uint_32 skip = 0;
+   png_size_t slength;
+
+   png_debug(1, "in png_handle_sPLT\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sPLT");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sPLT after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "sPLT chunk too large to fit in memory");
+      skip = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   chunkdata = (png_bytep)png_malloc(png_ptr, length + 1);
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+
+   if (png_crc_finish(png_ptr, skip))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (entry_start = chunkdata; *entry_start; entry_start++)
+      /* empty loop to find end of name */ ;
+   ++entry_start;
+
+   /* a sample depth should follow the separator, and we should be on it  */
+   if (entry_start > chunkdata + slength - 2)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "malformed sPLT chunk");
+      return;
+   }
+
+   new_palette.depth = *entry_start++;
+   entry_size = (new_palette.depth == 8 ? 6 : 10);
+   data_length = (slength - (entry_start - chunkdata));
+
+   /* integrity-check the data length */
+   if (data_length % entry_size)
+   {
+      png_free(png_ptr, chunkdata);
+      png_warning(png_ptr, "sPLT chunk has bad length");
+      return;
+   }
+
+   new_palette.nentries = (png_int_32) ( data_length / entry_size);
+   if ((png_uint_32) new_palette.nentries > (png_uint_32) (PNG_SIZE_MAX /
+       png_sizeof(png_sPLT_entry)))
+   {
+       png_warning(png_ptr, "sPLT chunk too long");
+       return;
+   }
+   new_palette.entries = (png_sPLT_entryp)png_malloc_warn(
+       png_ptr, new_palette.nentries * png_sizeof(png_sPLT_entry));
+   if (new_palette.entries == NULL)
+   {
+       png_warning(png_ptr, "sPLT chunk requires too much memory");
+       return;
+   }
+
+#ifndef PNG_NO_POINTER_INDEXING
+   for (i = 0; i < new_palette.nentries; i++)
+   {
+      png_sPLT_entryp pp = new_palette.entries + i;
+
+      if (new_palette.depth == 8)
+      {
+          pp->red = *entry_start++;
+          pp->green = *entry_start++;
+          pp->blue = *entry_start++;
+          pp->alpha = *entry_start++;
+      }
+      else
+      {
+          pp->red   = png_get_uint_16(entry_start); entry_start += 2;
+          pp->green = png_get_uint_16(entry_start); entry_start += 2;
+          pp->blue  = png_get_uint_16(entry_start); entry_start += 2;
+          pp->alpha = png_get_uint_16(entry_start); entry_start += 2;
+      }
+      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
+   }
+#else
+   pp = new_palette.entries;
+   for (i = 0; i < new_palette.nentries; i++)
+   {
+
+      if (new_palette.depth == 8)
+      {
+          pp[i].red   = *entry_start++;
+          pp[i].green = *entry_start++;
+          pp[i].blue  = *entry_start++;
+          pp[i].alpha = *entry_start++;
+      }
+      else
+      {
+          pp[i].red   = png_get_uint_16(entry_start); entry_start += 2;
+          pp[i].green = png_get_uint_16(entry_start); entry_start += 2;
+          pp[i].blue  = png_get_uint_16(entry_start); entry_start += 2;
+          pp[i].alpha = png_get_uint_16(entry_start); entry_start += 2;
+      }
+      pp->frequency = png_get_uint_16(entry_start); entry_start += 2;
+   }
+#endif
+
+   /* discard all chunk data except the name and stash that */
+   new_palette.name = (png_charp)chunkdata;
+
+   png_set_sPLT(png_ptr, info_ptr, &new_palette, 1);
+
+   png_free(png_ptr, chunkdata);
+   png_free(png_ptr, new_palette.entries);
+}
+#endif /* PNG_READ_sPLT_SUPPORTED */
+
+#if defined(PNG_READ_tRNS_SUPPORTED)
+void /* PRIVATE */
+png_handle_tRNS(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte readbuf[PNG_MAX_PALETTE_LENGTH];
+   int bit_mask;
+
+   png_debug(1, "in png_handle_tRNS\n");
+
+   /* For non-indexed color, mask off any bits in the tRNS value that
+    * exceed the bit depth.  Some creators were writing extra bits there.
+    * This is not needed for indexed color. */
+   bit_mask = (1 << png_ptr->bit_depth) - 1;
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before tRNS");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid tRNS after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tRNS))
+   {
+      png_warning(png_ptr, "Duplicate tRNS chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      png_byte buf[2];
+
+      if (length != 2)
+      {
+         png_warning(png_ptr, "Incorrect tRNS chunk length");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+
+      png_crc_read(png_ptr, buf, 2);
+      png_ptr->num_trans = 1;
+      png_ptr->trans_values.gray = png_get_uint_16(buf) & bit_mask;
+   }
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      png_byte buf[6];
+
+      if (length != 6)
+      {
+         png_warning(png_ptr, "Incorrect tRNS chunk length");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      png_crc_read(png_ptr, buf, (png_size_t)length);
+      png_ptr->num_trans = 1;
+      png_ptr->trans_values.red = png_get_uint_16(buf) & bit_mask;
+      png_ptr->trans_values.green = png_get_uint_16(buf + 2) & bit_mask;
+      png_ptr->trans_values.blue = png_get_uint_16(buf + 4) & bit_mask;
+   }
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (!(png_ptr->mode & PNG_HAVE_PLTE))
+      {
+         /* Should be an error, but we can cope with it. */
+         png_warning(png_ptr, "Missing PLTE before tRNS");
+      }
+      if (length > (png_uint_32)png_ptr->num_palette ||
+          length > PNG_MAX_PALETTE_LENGTH)
+      {
+         png_warning(png_ptr, "Incorrect tRNS chunk length");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      if (length == 0)
+      {
+         png_warning(png_ptr, "Zero length tRNS chunk");
+         png_crc_finish(png_ptr, length);
+         return;
+      }
+      png_crc_read(png_ptr, readbuf, (png_size_t)length);
+      png_ptr->num_trans = (png_uint_16)length;
+   }
+   else
+   {
+      png_warning(png_ptr, "tRNS chunk not allowed with alpha channel");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_ptr->num_trans = 0;
+      return;
+   }
+
+   png_set_tRNS(png_ptr, info_ptr, readbuf, png_ptr->num_trans,
+      &(png_ptr->trans_values));
+}
+#endif
+
+#if defined(PNG_READ_bKGD_SUPPORTED)
+void /* PRIVATE */
+png_handle_bKGD(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_size_t truelen;
+   png_byte buf[6];
+
+   png_debug(1, "in png_handle_bKGD\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before bKGD");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid bKGD after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE &&
+            !(png_ptr->mode & PNG_HAVE_PLTE))
+   {
+      png_warning(png_ptr, "Missing PLTE before bKGD");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_bKGD))
+   {
+      png_warning(png_ptr, "Duplicate bKGD chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      truelen = 1;
+   else if (png_ptr->color_type & PNG_COLOR_MASK_COLOR)
+      truelen = 6;
+   else
+      truelen = 2;
+
+   if (length != truelen)
+   {
+      png_warning(png_ptr, "Incorrect bKGD chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, truelen);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   /* We convert the index value into RGB components so that we can allow
+    * arbitrary RGB values for background when we have transparency, and
+    * so it is easy to determine the RGB values of the background color
+    * from the info_ptr struct. */
+   if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      png_ptr->background.index = buf[0];
+      if(info_ptr->num_palette)
+      {
+          if(buf[0] > info_ptr->num_palette)
+          {
+             png_warning(png_ptr, "Incorrect bKGD chunk index value");
+             return;
+          }
+          png_ptr->background.red =
+             (png_uint_16)png_ptr->palette[buf[0]].red;
+          png_ptr->background.green =
+             (png_uint_16)png_ptr->palette[buf[0]].green;
+          png_ptr->background.blue =
+             (png_uint_16)png_ptr->palette[buf[0]].blue;
+      }
+   }
+   else if (!(png_ptr->color_type & PNG_COLOR_MASK_COLOR)) /* GRAY */
+   {
+      png_ptr->background.red =
+      png_ptr->background.green =
+      png_ptr->background.blue =
+      png_ptr->background.gray = png_get_uint_16(buf);
+   }
+   else
+   {
+      png_ptr->background.red = png_get_uint_16(buf);
+      png_ptr->background.green = png_get_uint_16(buf + 2);
+      png_ptr->background.blue = png_get_uint_16(buf + 4);
+   }
+
+   png_set_bKGD(png_ptr, info_ptr, &(png_ptr->background));
+}
+#endif
+
+#if defined(PNG_READ_hIST_SUPPORTED)
+void /* PRIVATE */
+png_handle_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   unsigned int num, i;
+   png_uint_16 readbuf[PNG_MAX_PALETTE_LENGTH];
+
+   png_debug(1, "in png_handle_hIST\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before hIST");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid hIST after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (!(png_ptr->mode & PNG_HAVE_PLTE))
+   {
+      png_warning(png_ptr, "Missing PLTE before hIST");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_hIST))
+   {
+      png_warning(png_ptr, "Duplicate hIST chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   num = length / 2 ;
+   if (num != (unsigned int) png_ptr->num_palette || num >
+      (unsigned int) PNG_MAX_PALETTE_LENGTH)
+   {
+      png_warning(png_ptr, "Incorrect hIST chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   for (i = 0; i < num; i++)
+   {
+      png_byte buf[2];
+
+      png_crc_read(png_ptr, buf, 2);
+      readbuf[i] = png_get_uint_16(buf);
+   }
+
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   png_set_hIST(png_ptr, info_ptr, readbuf);
+}
+#endif
+
+#if defined(PNG_READ_pHYs_SUPPORTED)
+void /* PRIVATE */
+png_handle_pHYs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[9];
+   png_uint_32 res_x, res_y;
+   int unit_type;
+
+   png_debug(1, "in png_handle_pHYs\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before pHYs");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid pHYs after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pHYs))
+   {
+      png_warning(png_ptr, "Duplicate pHYs chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 9)
+   {
+      png_warning(png_ptr, "Incorrect pHYs chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 9);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   res_x = png_get_uint_32(buf);
+   res_y = png_get_uint_32(buf + 4);
+   unit_type = buf[8];
+   png_set_pHYs(png_ptr, info_ptr, res_x, res_y, unit_type);
+}
+#endif
+
+#if defined(PNG_READ_oFFs_SUPPORTED)
+void /* PRIVATE */
+png_handle_oFFs(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[9];
+   png_int_32 offset_x, offset_y;
+   int unit_type;
+
+   png_debug(1, "in png_handle_oFFs\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before oFFs");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid oFFs after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_oFFs))
+   {
+      png_warning(png_ptr, "Duplicate oFFs chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (length != 9)
+   {
+      png_warning(png_ptr, "Incorrect oFFs chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 9);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   offset_x = png_get_int_32(buf);
+   offset_y = png_get_int_32(buf + 4);
+   unit_type = buf[8];
+   png_set_oFFs(png_ptr, info_ptr, offset_x, offset_y, unit_type);
+}
+#endif
+
+#if defined(PNG_READ_pCAL_SUPPORTED)
+/* read the pCAL chunk (described in the PNG Extensions document) */
+void /* PRIVATE */
+png_handle_pCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_charp purpose;
+   png_int_32 X0, X1;
+   png_byte type, nparams;
+   png_charp buf, units, endptr;
+   png_charpp params;
+   png_size_t slength;
+   int i;
+
+   png_debug(1, "in png_handle_pCAL\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before pCAL");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid pCAL after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_pCAL))
+   {
+      png_warning(png_ptr, "Duplicate pCAL chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_debug1(2, "Allocating and reading pCAL chunk data (%lu bytes)\n",
+      length + 1);
+   purpose = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (purpose == NULL)
+     {
+       png_warning(png_ptr, "No memory for pCAL purpose.");
+       return;
+     }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)purpose, slength);
+
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, purpose);
+      return;
+   }
+
+   purpose[slength] = 0x00; /* null terminate the last string */
+
+   png_debug(3, "Finding end of pCAL purpose string\n");
+   for (buf = purpose; *buf; buf++)
+      /* empty loop */ ;
+
+   endptr = purpose + slength;
+
+   /* We need to have at least 12 bytes after the purpose string
+      in order to get the parameter information. */
+   if (endptr <= buf + 12)
+   {
+      png_warning(png_ptr, "Invalid pCAL data");
+      png_free(png_ptr, purpose);
+      return;
+   }
+
+   png_debug(3, "Reading pCAL X0, X1, type, nparams, and units\n");
+   X0 = png_get_int_32((png_bytep)buf+1);
+   X1 = png_get_int_32((png_bytep)buf+5);
+   type = buf[9];
+   nparams = buf[10];
+   units = buf + 11;
+
+   png_debug(3, "Checking pCAL equation type and number of parameters\n");
+   /* Check that we have the right number of parameters for known
+      equation types. */
+   if ((type == PNG_EQUATION_LINEAR && nparams != 2) ||
+       (type == PNG_EQUATION_BASE_E && nparams != 3) ||
+       (type == PNG_EQUATION_ARBITRARY && nparams != 3) ||
+       (type == PNG_EQUATION_HYPERBOLIC && nparams != 4))
+   {
+      png_warning(png_ptr, "Invalid pCAL parameters for equation type");
+      png_free(png_ptr, purpose);
+      return;
+   }
+   else if (type >= PNG_EQUATION_LAST)
+   {
+      png_warning(png_ptr, "Unrecognized equation type for pCAL chunk");
+   }
+
+   for (buf = units; *buf; buf++)
+      /* Empty loop to move past the units string. */ ;
+
+   png_debug(3, "Allocating pCAL parameters array\n");
+   params = (png_charpp)png_malloc_warn(png_ptr, (png_uint_32)(nparams
+      *png_sizeof(png_charp))) ;
+   if (params == NULL)
+     {
+       png_free(png_ptr, purpose);
+       png_warning(png_ptr, "No memory for pCAL params.");
+       return;
+     }
+
+   /* Get pointers to the start of each parameter string. */
+   for (i = 0; i < (int)nparams; i++)
+   {
+      buf++; /* Skip the null string terminator from previous parameter. */
+
+      png_debug1(3, "Reading pCAL parameter %d\n", i);
+      for (params[i] = buf; buf <= endptr && *buf != 0x00; buf++)
+         /* Empty loop to move past each parameter string */ ;
+
+      /* Make sure we haven't run out of data yet */
+      if (buf > endptr)
+      {
+         png_warning(png_ptr, "Invalid pCAL data");
+         png_free(png_ptr, purpose);
+         png_free(png_ptr, params);
+         return;
+      }
+   }
+
+   png_set_pCAL(png_ptr, info_ptr, purpose, X0, X1, type, nparams,
+      units, params);
+
+   png_free(png_ptr, purpose);
+   png_free(png_ptr, params);
+}
+#endif
+
+#if defined(PNG_READ_sCAL_SUPPORTED)
+/* read the sCAL chunk */
+void /* PRIVATE */
+png_handle_sCAL(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_charp buffer, ep;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   double width, height;
+   png_charp vp;
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_charp swidth, sheight;
+#endif
+#endif
+   png_size_t slength;
+
+   png_debug(1, "in png_handle_sCAL\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before sCAL");
+   else if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+      png_warning(png_ptr, "Invalid sCAL after IDAT");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_sCAL))
+   {
+      png_warning(png_ptr, "Duplicate sCAL chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_debug1(2, "Allocating and reading sCAL chunk data (%lu bytes)\n",
+      length + 1);
+   buffer = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (buffer == NULL)
+     {
+       png_warning(png_ptr, "Out of memory while processing sCAL chunk");
+       return;
+     }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)buffer, slength);
+
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, buffer);
+      return;
+   }
+
+   buffer[slength] = 0x00; /* null terminate the last string */
+
+   ep = buffer + 1;        /* skip unit byte */
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   width = png_strtod(png_ptr, ep, &vp);
+   if (*vp)
+   {
+       png_warning(png_ptr, "malformed width string in sCAL chunk");
+       return;
+   }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   swidth = (png_charp)png_malloc_warn(png_ptr, png_strlen(ep) + 1);
+   if (swidth == NULL)
+     {
+       png_warning(png_ptr, "Out of memory while processing sCAL chunk width");
+       return;
+     }
+   png_memcpy(swidth, ep, (png_size_t)png_strlen(ep));
+#endif
+#endif
+
+   for (ep = buffer; *ep; ep++)
+      /* empty loop */ ;
+   ep++;
+
+   if (buffer + slength < ep)
+   {
+       png_warning(png_ptr, "Truncated sCAL chunk");
+#if defined(PNG_FIXED_POINT_SUPPORTED) && \
+    !defined(PNG_FLOATING_POINT_SUPPORTED)
+       png_free(png_ptr, swidth);
+#endif
+      png_free(png_ptr, buffer);
+       return;
+   }
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   height = png_strtod(png_ptr, ep, &vp);
+   if (*vp)
+   {
+       png_warning(png_ptr, "malformed height string in sCAL chunk");
+       return;
+   }
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   sheight = (png_charp)png_malloc_warn(png_ptr, png_strlen(ep) + 1);
+   if (swidth == NULL)
+     {
+       png_warning(png_ptr, "Out of memory while processing sCAL chunk height");
+       return;
+     }
+   png_memcpy(sheight, ep, (png_size_t)png_strlen(ep));
+#endif
+#endif
+
+   if (buffer + slength < ep
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      || width <= 0. || height <= 0.
+#endif
+      )
+   {
+      png_warning(png_ptr, "Invalid sCAL data");
+      png_free(png_ptr, buffer);
+#if defined(PNG_FIXED_POINT_SUPPORTED) && !defined(PNG_FLOATING_POINT_SUPPORTED)
+      png_free(png_ptr, swidth);
+      png_free(png_ptr, sheight);
+#endif
+      return;
+   }
+
+
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   png_set_sCAL(png_ptr, info_ptr, buffer[0], width, height);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_set_sCAL_s(png_ptr, info_ptr, buffer[0], swidth, sheight);
+#endif
+#endif
+
+   png_free(png_ptr, buffer);
+#if defined(PNG_FIXED_POINT_SUPPORTED) && !defined(PNG_FLOATING_POINT_SUPPORTED)
+   png_free(png_ptr, swidth);
+   png_free(png_ptr, sheight);
+#endif
+}
+#endif
+
+#if defined(PNG_READ_tIME_SUPPORTED)
+void /* PRIVATE */
+png_handle_tIME(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_byte buf[7];
+   png_time mod_time;
+
+   png_debug(1, "in png_handle_tIME\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Out of place tIME chunk");
+   else if (info_ptr != NULL && (info_ptr->valid & PNG_INFO_tIME))
+   {
+      png_warning(png_ptr, "Duplicate tIME chunk");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+   if (length != 7)
+   {
+      png_warning(png_ptr, "Incorrect tIME chunk length");
+      png_crc_finish(png_ptr, length);
+      return;
+   }
+
+   png_crc_read(png_ptr, buf, 7);
+   if (png_crc_finish(png_ptr, 0))
+      return;
+
+   mod_time.second = buf[6];
+   mod_time.minute = buf[5];
+   mod_time.hour = buf[4];
+   mod_time.day = buf[3];
+   mod_time.month = buf[2];
+   mod_time.year = png_get_uint_16(buf);
+
+   png_set_tIME(png_ptr, info_ptr, &mod_time);
+}
+#endif
+
+#if defined(PNG_READ_tEXt_SUPPORTED)
+/* Note: this does not properly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_tEXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_textp text_ptr;
+   png_charp key;
+   png_charp text;
+   png_uint_32 skip = 0;
+   png_size_t slength;
+   int ret;
+
+   png_debug(1, "in png_handle_tEXt\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before tEXt");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   if (length > (png_uint_32)65535L)
+   {
+      png_warning(png_ptr, "tEXt chunk too large to fit in memory");
+      skip = length - (png_uint_32)65535L;
+      length = (png_uint_32)65535L;
+   }
+#endif
+
+   key = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (key == NULL)
+   {
+     png_warning(png_ptr, "No memory to process text chunk.");
+     return;
+   }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)key, slength);
+
+   if (png_crc_finish(png_ptr, skip))
+   {
+      png_free(png_ptr, key);
+      return;
+   }
+
+   key[slength] = 0x00;
+
+   for (text = key; *text; text++)
+      /* empty loop to find end of key */ ;
+
+   if (text != key + slength)
+      text++;
+
+   text_ptr = (png_textp)png_malloc_warn(png_ptr,
+      (png_uint_32)png_sizeof(png_text));
+   if (text_ptr == NULL)
+   {
+     png_warning(png_ptr, "Not enough memory to process text chunk.");
+     png_free(png_ptr, key);
+     return;
+   }
+   text_ptr->compression = PNG_TEXT_COMPRESSION_NONE;
+   text_ptr->key = key;
+#ifdef PNG_iTXt_SUPPORTED
+   text_ptr->lang = NULL;
+   text_ptr->lang_key = NULL;
+   text_ptr->itxt_length = 0;
+#endif
+   text_ptr->text = text;
+   text_ptr->text_length = png_strlen(text);
+
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+   png_free(png_ptr, key);
+   png_free(png_ptr, text_ptr);
+   if (ret)
+     png_warning(png_ptr, "Insufficient memory to process text chunk.");
+}
+#endif
+
+#if defined(PNG_READ_zTXt_SUPPORTED)
+/* note: this does not correctly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_zTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_textp text_ptr;
+   png_charp chunkdata;
+   png_charp text;
+   int comp_type;
+   int ret;
+   png_size_t slength, prefix_len, data_len;
+
+   png_debug(1, "in png_handle_zTXt\n");
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before zTXt");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   /* We will no doubt have problems with chunks even half this size, but
+      there is no hard and fast rule to tell us where to stop. */
+   if (length > (png_uint_32)65535L)
+   {
+     png_warning(png_ptr,"zTXt chunk too large to fit in memory");
+     png_crc_finish(png_ptr, length);
+     return;
+   }
+#endif
+
+   chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (chunkdata == NULL)
+   {
+     png_warning(png_ptr,"Out of memory processing zTXt chunk.");
+     return;
+   }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (text = chunkdata; *text; text++)
+      /* empty loop */ ;
+
+   /* zTXt must have some text after the chunkdataword */
+   if (text >= chunkdata + slength - 2)
+   {
+      png_warning(png_ptr, "Truncated zTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+   else
+   {
+       comp_type = *(++text);
+       if (comp_type != PNG_TEXT_COMPRESSION_zTXt)
+       {
+          png_warning(png_ptr, "Unknown compression type in zTXt chunk");
+          comp_type = PNG_TEXT_COMPRESSION_zTXt;
+       }
+       text++;        /* skip the compression_method byte */
+   }
+   prefix_len = text - chunkdata;
+
+   chunkdata = (png_charp)png_decompress_chunk(png_ptr, comp_type, chunkdata,
+                                    (png_size_t)length, prefix_len, &data_len);
+
+   text_ptr = (png_textp)png_malloc_warn(png_ptr,
+     (png_uint_32)png_sizeof(png_text));
+   if (text_ptr == NULL)
+   {
+     png_warning(png_ptr,"Not enough memory to process zTXt chunk.");
+     png_free(png_ptr, chunkdata);
+     return;
+   }
+   text_ptr->compression = comp_type;
+   text_ptr->key = chunkdata;
+#ifdef PNG_iTXt_SUPPORTED
+   text_ptr->lang = NULL;
+   text_ptr->lang_key = NULL;
+   text_ptr->itxt_length = 0;
+#endif
+   text_ptr->text = chunkdata + prefix_len;
+   text_ptr->text_length = data_len;
+
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+   png_free(png_ptr, text_ptr);
+   png_free(png_ptr, chunkdata);
+   if (ret)
+     png_error(png_ptr, "Insufficient memory to store zTXt chunk.");
+}
+#endif
+
+#if defined(PNG_READ_iTXt_SUPPORTED)
+/* note: this does not correctly handle chunks that are > 64K under DOS */
+void /* PRIVATE */
+png_handle_iTXt(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_textp text_ptr;
+   png_charp chunkdata;
+   png_charp key, lang, text, lang_key;
+   int comp_flag;
+   int comp_type = 0;
+   int ret;
+   png_size_t slength, prefix_len, data_len;
+
+   png_debug(1, "in png_handle_iTXt\n");
+
+   if (!(png_ptr->mode & PNG_HAVE_IHDR))
+      png_error(png_ptr, "Missing IHDR before iTXt");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+      png_ptr->mode |= PNG_AFTER_IDAT;
+
+#ifdef PNG_MAX_MALLOC_64K
+   /* We will no doubt have problems with chunks even half this size, but
+      there is no hard and fast rule to tell us where to stop. */
+   if (length > (png_uint_32)65535L)
+   {
+     png_warning(png_ptr,"iTXt chunk too large to fit in memory");
+     png_crc_finish(png_ptr, length);
+     return;
+   }
+#endif
+
+   chunkdata = (png_charp)png_malloc_warn(png_ptr, length + 1);
+   if (chunkdata == NULL)
+   {
+     png_warning(png_ptr, "No memory to process iTXt chunk.");
+     return;
+   }
+   slength = (png_size_t)length;
+   png_crc_read(png_ptr, (png_bytep)chunkdata, slength);
+   if (png_crc_finish(png_ptr, 0))
+   {
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   chunkdata[slength] = 0x00;
+
+   for (lang = chunkdata; *lang; lang++)
+      /* empty loop */ ;
+   lang++;        /* skip NUL separator */
+
+   /* iTXt must have a language tag (possibly empty), two compression bytes,
+      translated keyword (possibly empty), and possibly some text after the
+      keyword */
+
+   if (lang >= chunkdata + slength - 3)
+   {
+      png_warning(png_ptr, "Truncated iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+   else
+   {
+       comp_flag = *lang++;
+       comp_type = *lang++;
+   }
+
+   for (lang_key = lang; *lang_key; lang_key++)
+      /* empty loop */ ;
+   lang_key++;        /* skip NUL separator */
+
+   if (lang_key >= chunkdata + slength)
+   {
+      png_warning(png_ptr, "Truncated iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   for (text = lang_key; *text; text++)
+      /* empty loop */ ;
+   text++;        /* skip NUL separator */
+   if (text >= chunkdata + slength)
+   {
+      png_warning(png_ptr, "Malformed iTXt chunk");
+      png_free(png_ptr, chunkdata);
+      return;
+   }
+
+   prefix_len = text - chunkdata;
+
+   key=chunkdata;
+   if (comp_flag)
+       chunkdata = png_decompress_chunk(png_ptr, comp_type, chunkdata,
+          (size_t)length, prefix_len, &data_len);
+   else
+       data_len=png_strlen(chunkdata + prefix_len);
+   text_ptr = (png_textp)png_malloc_warn(png_ptr,
+      (png_uint_32)png_sizeof(png_text));
+   if (text_ptr == NULL)
+   {
+     png_warning(png_ptr,"Not enough memory to process iTXt chunk.");
+     png_free(png_ptr, chunkdata);
+     return;
+   }
+   text_ptr->compression = (int)comp_flag + 1;
+   text_ptr->lang_key = chunkdata+(lang_key-key);
+   text_ptr->lang = chunkdata+(lang-key);
+   text_ptr->itxt_length = data_len;
+   text_ptr->text_length = 0;
+   text_ptr->key = chunkdata;
+   text_ptr->text = chunkdata + prefix_len;
+
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, 1);
+
+   png_free(png_ptr, text_ptr);
+   png_free(png_ptr, chunkdata);
+   if (ret)
+     png_error(png_ptr, "Insufficient memory to store iTXt chunk.");
+}
+#endif
+
+/* This function is called when we haven't found a handler for a
+   chunk.  If there isn't a problem with the chunk itself (ie bad
+   chunk name, CRC, or a critical chunk), the chunk is silently ignored
+   -- unless the PNG_FLAG_UNKNOWN_CHUNKS_SUPPORTED flag is on in which
+   case it will be saved away to be written out later. */
+void /* PRIVATE */
+png_handle_unknown(png_structp png_ptr, png_infop info_ptr, png_uint_32 length)
+{
+   png_uint_32 skip = 0;
+
+   png_debug(1, "in png_handle_unknown\n");
+
+   if (png_ptr->mode & PNG_HAVE_IDAT)
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IDAT;
+#endif
+      if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))  /* not an IDAT */
+         png_ptr->mode |= PNG_AFTER_IDAT;
+   }
+
+   png_check_chunk_name(png_ptr, png_ptr->chunk_name);
+
+   if (!(png_ptr->chunk_name[0] & 0x20))
+   {
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+      if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+           PNG_HANDLE_CHUNK_ALWAYS
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+           && png_ptr->read_user_chunk_fn == NULL
+#endif
+        )
+#endif
+          png_chunk_error(png_ptr, "unknown critical chunk");
+   }
+
+#if defined(PNG_READ_UNKNOWN_CHUNKS_SUPPORTED)
+   if ((png_ptr->flags & PNG_FLAG_KEEP_UNKNOWN_CHUNKS) ||
+       (png_ptr->read_user_chunk_fn != NULL))
+   {
+#ifdef PNG_MAX_MALLOC_64K
+       if (length > (png_uint_32)65535L)
+       {
+           png_warning(png_ptr, "unknown chunk too large to fit in memory");
+           skip = length - (png_uint_32)65535L;
+           length = (png_uint_32)65535L;
+       }
+#endif
+       png_strncpy((png_charp)png_ptr->unknown_chunk.name,
+	 (png_charp)png_ptr->chunk_name, 4);
+       png_ptr->unknown_chunk.name[4] = '\0';
+       png_ptr->unknown_chunk.data = (png_bytep)png_malloc(png_ptr, length);
+       png_ptr->unknown_chunk.size = (png_size_t)length;
+       png_crc_read(png_ptr, (png_bytep)png_ptr->unknown_chunk.data, length);
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+       if(png_ptr->read_user_chunk_fn != NULL)
+       {
+          /* callback to user unknown chunk handler */
+          int ret;
+          ret = (*(png_ptr->read_user_chunk_fn))
+            (png_ptr, &png_ptr->unknown_chunk);
+          if (ret < 0)
+             png_chunk_error(png_ptr, "error in user chunk");
+          if (ret == 0)
+          {
+             if (!(png_ptr->chunk_name[0] & 0x20))
+                if(png_handle_as_unknown(png_ptr, png_ptr->chunk_name) !=
+                     PNG_HANDLE_CHUNK_ALWAYS)
+                   png_chunk_error(png_ptr, "unknown critical chunk");
+             png_set_unknown_chunks(png_ptr, info_ptr,
+               &png_ptr->unknown_chunk, 1);
+          }
+       }
+#else
+       png_set_unknown_chunks(png_ptr, info_ptr, &png_ptr->unknown_chunk, 1);
+#endif
+       png_free(png_ptr, png_ptr->unknown_chunk.data);
+       png_ptr->unknown_chunk.data = NULL;
+   }
+   else
+#endif
+      skip = length;
+
+   png_crc_finish(png_ptr, skip);
+
+#if !defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+   info_ptr = info_ptr; /* quiet compiler warnings about unused info_ptr */
+#endif
+}
+
+/* This function is called to verify that a chunk name is valid.
+   This function can't have the "critical chunk check" incorporated
+   into it, since in the future we will need to be able to call user
+   functions to handle unknown critical chunks after we check that
+   the chunk name itself is valid. */
+
+#define isnonalpha(c) ((c) < 65 || (c) > 122 || ((c) > 90 && (c) < 97))
+
+void /* PRIVATE */
+png_check_chunk_name(png_structp png_ptr, png_bytep chunk_name)
+{
+   png_debug(1, "in png_check_chunk_name\n");
+   if (isnonalpha(chunk_name[0]) || isnonalpha(chunk_name[1]) ||
+       isnonalpha(chunk_name[2]) || isnonalpha(chunk_name[3]))
+   {
+      png_chunk_error(png_ptr, "invalid chunk type");
+   }
+}
+
+/* Combines the row recently read in with the existing pixels in the
+   row.  This routine takes care of alpha and transparency if requested.
+   This routine also handles the two methods of progressive display
+   of interlaced images, depending on the mask value.
+   The mask value describes which pixels are to be combined with
+   the row.  The pattern always repeats every 8 pixels, so just 8
+   bits are needed.  A one indicates the pixel is to be combined,
+   a zero indicates the pixel is to be skipped.  This is in addition
+   to any alpha or transparency value associated with the pixel.  If
+   you want all pixels to be combined, pass 0xff (255) in mask.  */
+
+void /* PRIVATE */
+png_combine_row(png_structp png_ptr, png_bytep row, int mask)
+{
+   png_debug(1,"in png_combine_row\n");
+   if (mask == 0xff)
+   {
+      png_memcpy(row, png_ptr->row_buf + 1,
+         PNG_ROWBYTES(png_ptr->row_info.pixel_depth, png_ptr->width));
+   }
+   else
+   {
+      switch (png_ptr->row_info.pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            int s_inc, s_start, s_end;
+            int m = 0x80;
+            int shift;
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+            else
+#endif
+            {
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  int value;
+
+                  value = (*sp >> shift) & 0x01;
+                  *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            int s_start, s_end, s_inc;
+            int m = 0x80;
+            int shift;
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+            int value;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+            else
+#endif
+            {
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+
+            shift = s_start;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0x03;
+                  *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            int s_start, s_end, s_inc;
+            int m = 0x80;
+            int shift;
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+            int value;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (png_ptr->transformations & PNG_PACKSWAP)
+            {
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+            else
+#endif
+            {
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            shift = s_start;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  value = (*sp >> shift) & 0xf;
+                  *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
+                  *dp |= (png_byte)(value << shift);
+               }
+
+               if (shift == s_end)
+               {
+                  shift = s_start;
+                  sp++;
+                  dp++;
+               }
+               else
+                  shift += s_inc;
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+         default:
+         {
+            png_bytep sp = png_ptr->row_buf + 1;
+            png_bytep dp = row;
+            png_size_t pixel_bytes = (png_ptr->row_info.pixel_depth >> 3);
+            png_uint_32 i;
+            png_uint_32 row_width = png_ptr->width;
+            png_byte m = 0x80;
+
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (m & mask)
+               {
+                  png_memcpy(dp, sp, pixel_bytes);
+               }
+
+               sp += pixel_bytes;
+               dp += pixel_bytes;
+
+               if (m == 1)
+                  m = 0x80;
+               else
+                  m >>= 1;
+            }
+            break;
+         }
+      }
+   }
+}
+
+#ifdef PNG_READ_INTERLACING_SUPPORTED
+/* OLD pre-1.0.9 interface:
+void png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass,
+   png_uint_32 transformations)
+ */
+void /* PRIVATE */
+png_do_read_interlace(png_structp png_ptr)
+{
+   png_row_infop row_info = &(png_ptr->row_info);
+   png_bytep row = png_ptr->row_buf + 1;
+   int pass = png_ptr->pass;
+   png_uint_32 transformations = png_ptr->transformations;
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+   /* offset to next interlace block */
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+#endif
+
+   png_debug(1,"in png_do_read_interlace\n");
+   if (row != NULL && row_info != NULL)
+   {
+      png_uint_32 final_width;
+
+      final_width = row_info->width * png_pass_inc[pass];
+
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 3);
+            png_bytep dp = row + (png_size_t)((final_width - 1) >> 3);
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            int jstop = png_pass_inc[pass];
+            png_byte v;
+            png_uint_32 i;
+            int j;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+                sshift = (int)((row_info->width + 7) & 0x07);
+                dshift = (int)((final_width + 7) & 0x07);
+                s_start = 7;
+                s_end = 0;
+                s_inc = -1;
+            }
+            else
+#endif
+            {
+                sshift = 7 - (int)((row_info->width + 7) & 0x07);
+                dshift = 7 - (int)((final_width + 7) & 0x07);
+                s_start = 0;
+                s_end = 7;
+                s_inc = 1;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               v = (png_byte)((*sp >> sshift) & 0x01);
+               for (j = 0; j < jstop; j++)
+               {
+                  *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp = row + (png_uint_32)((row_info->width - 1) >> 2);
+            png_bytep dp = row + (png_uint_32)((final_width - 1) >> 2);
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            int jstop = png_pass_inc[pass];
+            png_uint_32 i;
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (int)(((row_info->width + 3) & 0x03) << 1);
+               dshift = (int)(((final_width + 3) & 0x03) << 1);
+               s_start = 6;
+               s_end = 0;
+               s_inc = -2;
+            }
+            else
+#endif
+            {
+               sshift = (int)((3 - ((row_info->width + 3) & 0x03)) << 1);
+               dshift = (int)((3 - ((final_width + 3) & 0x03)) << 1);
+               s_start = 0;
+               s_end = 6;
+               s_inc = 2;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v;
+               int j;
+
+               v = (png_byte)((*sp >> sshift) & 0x03);
+               for (j = 0; j < jstop; j++)
+               {
+                  *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp = row + (png_size_t)((row_info->width - 1) >> 1);
+            png_bytep dp = row + (png_size_t)((final_width - 1) >> 1);
+            int sshift, dshift;
+            int s_start, s_end, s_inc;
+            png_uint_32 i;
+            int jstop = png_pass_inc[pass];
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)
+            if (transformations & PNG_PACKSWAP)
+            {
+               sshift = (int)(((row_info->width + 1) & 0x01) << 2);
+               dshift = (int)(((final_width + 1) & 0x01) << 2);
+               s_start = 4;
+               s_end = 0;
+               s_inc = -4;
+            }
+            else
+#endif
+            {
+               sshift = (int)((1 - ((row_info->width + 1) & 0x01)) << 2);
+               dshift = (int)((1 - ((final_width + 1) & 0x01)) << 2);
+               s_start = 0;
+               s_end = 4;
+               s_inc = 4;
+            }
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v = (png_byte)((*sp >> sshift) & 0xf);
+               int j;
+
+               for (j = 0; j < jstop; j++)
+               {
+                  *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
+                  *dp |= (png_byte)(v << dshift);
+                  if (dshift == s_end)
+                  {
+                     dshift = s_start;
+                     dp--;
+                  }
+                  else
+                     dshift += s_inc;
+               }
+               if (sshift == s_end)
+               {
+                  sshift = s_start;
+                  sp--;
+               }
+               else
+                  sshift += s_inc;
+            }
+            break;
+         }
+         default:
+         {
+            png_size_t pixel_bytes = (row_info->pixel_depth >> 3);
+            png_bytep sp = row + (png_size_t)(row_info->width - 1) * pixel_bytes;
+            png_bytep dp = row + (png_size_t)(final_width - 1) * pixel_bytes;
+
+            int jstop = png_pass_inc[pass];
+            png_uint_32 i;
+
+            for (i = 0; i < row_info->width; i++)
+            {
+               png_byte v[8];
+               int j;
+
+               png_memcpy(v, sp, pixel_bytes);
+               for (j = 0; j < jstop; j++)
+               {
+                  png_memcpy(dp, v, pixel_bytes);
+                  dp -= pixel_bytes;
+               }
+               sp -= pixel_bytes;
+            }
+            break;
+         }
+      }
+      row_info->width = final_width;
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
+   }
+#if !defined(PNG_READ_PACKSWAP_SUPPORTED)
+   transformations = transformations; /* silence compiler warning */
+#endif
+}
+#endif /* PNG_READ_INTERLACING_SUPPORTED */
+
+void /* PRIVATE */
+png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep row,
+   png_bytep prev_row, int filter)
+{
+   png_debug(1, "in png_read_filter_row\n");
+   png_debug2(2,"row = %lu, filter = %d\n", png_ptr->row_number, filter);
+   switch (filter)
+   {
+      case PNG_FILTER_VALUE_NONE:
+         break;
+      case PNG_FILTER_VALUE_SUB:
+      {
+         png_uint_32 i;
+         png_uint_32 istop = row_info->rowbytes;
+         png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+         png_bytep rp = row + bpp;
+         png_bytep lp = row;
+
+         for (i = bpp; i < istop; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      case PNG_FILTER_VALUE_UP:
+      {
+         png_uint_32 i;
+         png_uint_32 istop = row_info->rowbytes;
+         png_bytep rp = row;
+         png_bytep pp = prev_row;
+
+         for (i = 0; i < istop; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      case PNG_FILTER_VALUE_AVG:
+      {
+         png_uint_32 i;
+         png_bytep rp = row;
+         png_bytep pp = prev_row;
+         png_bytep lp = row;
+         png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+         png_uint_32 istop = row_info->rowbytes - bpp;
+
+         for (i = 0; i < bpp; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) +
+               ((int)(*pp++) / 2 )) & 0xff);
+            rp++;
+         }
+
+         for (i = 0; i < istop; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) +
+               (int)(*pp++ + *lp++) / 2 ) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      case PNG_FILTER_VALUE_PAETH:
+      {
+         png_uint_32 i;
+         png_bytep rp = row;
+         png_bytep pp = prev_row;
+         png_bytep lp = row;
+         png_bytep cp = prev_row;
+         png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
+         png_uint_32 istop=row_info->rowbytes - bpp;
+
+         for (i = 0; i < bpp; i++)
+         {
+            *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+            rp++;
+         }
+
+         for (i = 0; i < istop; i++)   /* use leftover rp,pp */
+         {
+            int a, b, c, pa, pb, pc, p;
+
+            a = *lp++;
+            b = *pp++;
+            c = *cp++;
+
+            p = b - c;
+            pc = a - c;
+
+#ifdef PNG_USE_ABS
+            pa = abs(p);
+            pb = abs(pc);
+            pc = abs(p + pc);
+#else
+            pa = p < 0 ? -p : p;
+            pb = pc < 0 ? -pc : pc;
+            pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+            /*
+               if (pa <= pb && pa <= pc)
+                  p = a;
+               else if (pb <= pc)
+                  p = b;
+               else
+                  p = c;
+             */
+
+            p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+            *rp = (png_byte)(((int)(*rp) + p) & 0xff);
+            rp++;
+         }
+         break;
+      }
+      default:
+         png_warning(png_ptr, "Ignoring bad adaptive filter type");
+         *row=0;
+         break;
+   }
+}
+
+void /* PRIVATE */
+png_read_finish_row(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   PNG_CONST int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   PNG_CONST int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   PNG_CONST int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+
+   png_debug(1, "in png_read_finish_row\n");
+   png_ptr->row_number++;
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+   if (png_ptr->interlaced)
+   {
+      png_ptr->row_number = 0;
+      png_memset_check(png_ptr, png_ptr->prev_row, 0,
+         png_ptr->rowbytes + 1);
+      do
+      {
+         png_ptr->pass++;
+         if (png_ptr->pass >= 7)
+            break;
+         png_ptr->iwidth = (png_ptr->width +
+            png_pass_inc[png_ptr->pass] - 1 -
+            png_pass_start[png_ptr->pass]) /
+            png_pass_inc[png_ptr->pass];
+
+         png_ptr->irowbytes = PNG_ROWBYTES(png_ptr->pixel_depth,
+            png_ptr->iwidth) + 1;
+
+         if (!(png_ptr->transformations & PNG_INTERLACE))
+         {
+            png_ptr->num_rows = (png_ptr->height +
+               png_pass_yinc[png_ptr->pass] - 1 -
+               png_pass_ystart[png_ptr->pass]) /
+               png_pass_yinc[png_ptr->pass];
+            if (!(png_ptr->num_rows))
+               continue;
+         }
+         else  /* if (png_ptr->transformations & PNG_INTERLACE) */
+            break;
+      } while (png_ptr->iwidth == 0);
+
+      if (png_ptr->pass < 7)
+         return;
+   }
+
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_FINISHED))
+   {
+#ifdef PNG_USE_LOCAL_ARRAYS
+      PNG_CONST PNG_IDAT;
+#endif
+      char extra;
+      int ret;
+
+      png_ptr->zstream.next_out = (Byte *)&extra;
+      png_ptr->zstream.avail_out = (uInt)1;
+      for(;;)
+      {
+         if (!(png_ptr->zstream.avail_in))
+         {
+            while (!png_ptr->idat_size)
+            {
+               png_byte chunk_length[4];
+
+               png_crc_finish(png_ptr, 0);
+
+               png_read_data(png_ptr, chunk_length, 4);
+               png_ptr->idat_size = png_get_uint_31(png_ptr, chunk_length);
+               png_reset_crc(png_ptr);
+               png_crc_read(png_ptr, png_ptr->chunk_name, 4);
+               if (png_memcmp(png_ptr->chunk_name, png_IDAT, 4))
+                  png_error(png_ptr, "Not enough image data");
+
+            }
+            png_ptr->zstream.avail_in = (uInt)png_ptr->zbuf_size;
+            png_ptr->zstream.next_in = png_ptr->zbuf;
+            if (png_ptr->zbuf_size > png_ptr->idat_size)
+               png_ptr->zstream.avail_in = (uInt)png_ptr->idat_size;
+            png_crc_read(png_ptr, png_ptr->zbuf, png_ptr->zstream.avail_in);
+            png_ptr->idat_size -= png_ptr->zstream.avail_in;
+         }
+         ret = inflate(&png_ptr->zstream, Z_PARTIAL_FLUSH);
+         if (ret == Z_STREAM_END)
+         {
+            if (!(png_ptr->zstream.avail_out) || png_ptr->zstream.avail_in ||
+               png_ptr->idat_size)
+               png_warning(png_ptr, "Extra compressed data");
+            png_ptr->mode |= PNG_AFTER_IDAT;
+            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            break;
+         }
+         if (ret != Z_OK)
+            png_error(png_ptr, png_ptr->zstream.msg ? png_ptr->zstream.msg :
+                      "Decompression Error");
+
+         if (!(png_ptr->zstream.avail_out))
+         {
+            png_warning(png_ptr, "Extra compressed data.");
+            png_ptr->mode |= PNG_AFTER_IDAT;
+            png_ptr->flags |= PNG_FLAG_ZLIB_FINISHED;
+            break;
+         }
+
+      }
+      png_ptr->zstream.avail_out = 0;
+   }
+
+   if (png_ptr->idat_size || png_ptr->zstream.avail_in)
+      png_warning(png_ptr, "Extra compression data");
+
+   inflateReset(&png_ptr->zstream);
+
+   png_ptr->mode |= PNG_AFTER_IDAT;
+}
+
+void /* PRIVATE */
+png_read_start_row(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   PNG_CONST int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   PNG_CONST int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   PNG_CONST int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   PNG_CONST int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+
+   int max_pixel_depth;
+   png_uint_32 row_bytes;
+
+   png_debug(1, "in png_read_start_row\n");
+   png_ptr->zstream.avail_in = 0;
+   png_init_read_transformations(png_ptr);
+   if (png_ptr->interlaced)
+   {
+      if (!(png_ptr->transformations & PNG_INTERLACE))
+         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
+            png_pass_ystart[0]) / png_pass_yinc[0];
+      else
+         png_ptr->num_rows = png_ptr->height;
+
+      png_ptr->iwidth = (png_ptr->width +
+         png_pass_inc[png_ptr->pass] - 1 -
+         png_pass_start[png_ptr->pass]) /
+         png_pass_inc[png_ptr->pass];
+
+         row_bytes = PNG_ROWBYTES(png_ptr->pixel_depth,png_ptr->iwidth) + 1;
+
+         png_ptr->irowbytes = (png_size_t)row_bytes;
+         if((png_uint_32)png_ptr->irowbytes != row_bytes)
+            png_error(png_ptr, "Rowbytes overflow in png_read_start_row");
+   }
+   else
+   {
+      png_ptr->num_rows = png_ptr->height;
+      png_ptr->iwidth = png_ptr->width;
+      png_ptr->irowbytes = png_ptr->rowbytes + 1;
+   }
+   max_pixel_depth = png_ptr->pixel_depth;
+
+#if defined(PNG_READ_PACK_SUPPORTED)
+   if ((png_ptr->transformations & PNG_PACK) && png_ptr->bit_depth < 8)
+      max_pixel_depth = 8;
+#endif
+
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+   if (png_ptr->transformations & PNG_EXPAND)
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      {
+         if (png_ptr->num_trans)
+            max_pixel_depth = 32;
+         else
+            max_pixel_depth = 24;
+      }
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (max_pixel_depth < 8)
+            max_pixel_depth = 8;
+         if (png_ptr->num_trans)
+            max_pixel_depth *= 2;
+      }
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (png_ptr->num_trans)
+         {
+            max_pixel_depth *= 4;
+            max_pixel_depth /= 3;
+         }
+      }
+   }
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & (PNG_FILLER))
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         max_pixel_depth = 32;
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY)
+      {
+         if (max_pixel_depth <= 8)
+            max_pixel_depth = 16;
+         else
+            max_pixel_depth = 32;
+      }
+      else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+      {
+         if (max_pixel_depth <= 32)
+            max_pixel_depth = 32;
+         else
+            max_pixel_depth = 64;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_GRAY_TO_RGB_SUPPORTED)
+   if (png_ptr->transformations & PNG_GRAY_TO_RGB)
+   {
+      if (
+#if defined(PNG_READ_EXPAND_SUPPORTED)
+        (png_ptr->num_trans && (png_ptr->transformations & PNG_EXPAND)) ||
+#endif
+#if defined(PNG_READ_FILLER_SUPPORTED)
+        (png_ptr->transformations & (PNG_FILLER)) ||
+#endif
+        png_ptr->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         if (max_pixel_depth <= 16)
+            max_pixel_depth = 32;
+         else
+            max_pixel_depth = 64;
+      }
+      else
+      {
+         if (max_pixel_depth <= 8)
+           {
+             if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+               max_pixel_depth = 32;
+             else
+               max_pixel_depth = 24;
+           }
+         else if (png_ptr->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            max_pixel_depth = 64;
+         else
+            max_pixel_depth = 48;
+      }
+   }
+#endif
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) && \
+defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   if(png_ptr->transformations & PNG_USER_TRANSFORM)
+     {
+       int user_pixel_depth=png_ptr->user_transform_depth*
+         png_ptr->user_transform_channels;
+       if(user_pixel_depth > max_pixel_depth)
+         max_pixel_depth=user_pixel_depth;
+     }
+#endif
+
+   /* align the width on the next larger 8 pixels.  Mainly used
+      for interlacing */
+   row_bytes = ((png_ptr->width + 7) & ~((png_uint_32)7));
+   /* calculate the maximum bytes needed, adding a byte and a pixel
+      for safety's sake */
+   row_bytes = PNG_ROWBYTES(max_pixel_depth,row_bytes) +
+      1 + ((max_pixel_depth + 7) >> 3);
+#ifdef PNG_MAX_MALLOC_64K
+   if (row_bytes > (png_uint_32)65536L)
+      png_error(png_ptr, "This image requires a row greater than 64KB");
+#endif
+   png_ptr->big_row_buf = (png_bytep)png_malloc(png_ptr, row_bytes+64);
+   png_ptr->row_buf = png_ptr->big_row_buf+32;
+
+#ifdef PNG_MAX_MALLOC_64K
+   if ((png_uint_32)png_ptr->rowbytes + 1 > (png_uint_32)65536L)
+      png_error(png_ptr, "This image requires a row greater than 64KB");
+#endif
+   if ((png_uint_32)png_ptr->rowbytes > (png_uint_32)(PNG_SIZE_MAX - 1))
+      png_error(png_ptr, "Row has too many bytes to allocate in memory.");
+   png_ptr->prev_row = (png_bytep)png_malloc(png_ptr, (png_uint_32)(
+      png_ptr->rowbytes + 1));
+
+   png_memset_check(png_ptr, png_ptr->prev_row, 0, png_ptr->rowbytes + 1);
+
+   png_debug1(3, "width = %lu,\n", png_ptr->width);
+   png_debug1(3, "height = %lu,\n", png_ptr->height);
+   png_debug1(3, "iwidth = %lu,\n", png_ptr->iwidth);
+   png_debug1(3, "num_rows = %lu\n", png_ptr->num_rows);
+   png_debug1(3, "rowbytes = %lu,\n", png_ptr->rowbytes);
+   png_debug1(3, "irowbytes = %lu,\n", png_ptr->irowbytes);
+
+   png_ptr->flags |= PNG_FLAG_ROW_INIT;
+}
+#endif /* PNG_READ_SUPPORTED */
diff --git a/src/libpng/pngset.c b/src/libpng/pngset.c
new file mode 100644
index 0000000..b54a111
--- /dev/null
+++ b/src/libpng/pngset.c
@@ -0,0 +1,1246 @@
+
+/* pngset.c - storage of image information into info struct
+ *
+ * Last changed in libpng 1.2.22 [October 13, 2007]
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * The functions here are used during reads to store data from the file
+ * into the info struct, and during writes to store application data
+ * into the info struct for writing into the file.  This abstracts the
+ * info struct and allows us to change the structure in the future.
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+
+#if defined(PNG_bKGD_SUPPORTED)
+void PNGAPI
+png_set_bKGD(png_structp png_ptr, png_infop info_ptr, png_color_16p background)
+{
+   png_debug1(1, "in %s storage function\n", "bKGD");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_memcpy(&(info_ptr->background), background, png_sizeof(png_color_16));
+   info_ptr->valid |= PNG_INFO_bKGD;
+}
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_cHRM(png_structp png_ptr, png_infop info_ptr,
+   double white_x, double white_y, double red_x, double red_y,
+   double green_x, double green_y, double blue_x, double blue_y)
+{
+   png_debug1(1, "in %s storage function\n", "cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (white_x < 0.0 || white_y < 0.0 ||
+         red_x < 0.0 ||   red_y < 0.0 ||
+       green_x < 0.0 || green_y < 0.0 ||
+        blue_x < 0.0 ||  blue_y < 0.0)
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set negative chromaticity value");
+      return;
+   }
+   if (white_x > 21474.83 || white_y > 21474.83 ||
+         red_x > 21474.83 ||   red_y > 21474.83 ||
+       green_x > 21474.83 || green_y > 21474.83 ||
+        blue_x > 21474.83 ||  blue_y > 21474.83)
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set chromaticity value exceeding 21474.83");
+      return;
+   }
+
+   info_ptr->x_white = (float)white_x;
+   info_ptr->y_white = (float)white_y;
+   info_ptr->x_red   = (float)red_x;
+   info_ptr->y_red   = (float)red_y;
+   info_ptr->x_green = (float)green_x;
+   info_ptr->y_green = (float)green_y;
+   info_ptr->x_blue  = (float)blue_x;
+   info_ptr->y_blue  = (float)blue_y;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_x_white = (png_fixed_point)(white_x*100000.+0.5);
+   info_ptr->int_y_white = (png_fixed_point)(white_y*100000.+0.5);
+   info_ptr->int_x_red   = (png_fixed_point)(  red_x*100000.+0.5);
+   info_ptr->int_y_red   = (png_fixed_point)(  red_y*100000.+0.5);
+   info_ptr->int_x_green = (png_fixed_point)(green_x*100000.+0.5);
+   info_ptr->int_y_green = (png_fixed_point)(green_y*100000.+0.5);
+   info_ptr->int_x_blue  = (png_fixed_point)( blue_x*100000.+0.5);
+   info_ptr->int_y_blue  = (png_fixed_point)( blue_y*100000.+0.5);
+#endif
+   info_ptr->valid |= PNG_INFO_cHRM;
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void PNGAPI
+png_set_cHRM_fixed(png_structp png_ptr, png_infop info_ptr,
+   png_fixed_point white_x, png_fixed_point white_y, png_fixed_point red_x,
+   png_fixed_point red_y, png_fixed_point green_x, png_fixed_point green_y,
+   png_fixed_point blue_x, png_fixed_point blue_y)
+{
+   png_debug1(1, "in %s storage function\n", "cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (white_x < 0 || white_y < 0 ||
+         red_x < 0 ||   red_y < 0 ||
+       green_x < 0 || green_y < 0 ||
+        blue_x < 0 ||  blue_y < 0)
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set negative chromaticity value");
+      return;
+   }
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   if (white_x > (double) PNG_UINT_31_MAX ||
+       white_y > (double) PNG_UINT_31_MAX ||
+         red_x > (double) PNG_UINT_31_MAX ||
+         red_y > (double) PNG_UINT_31_MAX ||
+       green_x > (double) PNG_UINT_31_MAX ||
+       green_y > (double) PNG_UINT_31_MAX ||
+        blue_x > (double) PNG_UINT_31_MAX ||
+        blue_y > (double) PNG_UINT_31_MAX)
+#else
+   if (white_x > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+       white_y > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+         red_x > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+         red_y > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+       green_x > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+       green_y > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+        blue_x > (png_fixed_point) PNG_UINT_31_MAX/100000L ||
+        blue_y > (png_fixed_point) PNG_UINT_31_MAX/100000L)
+#endif
+   {
+      png_warning(png_ptr,
+        "Ignoring attempt to set chromaticity value exceeding 21474.83");
+      return;
+   }
+   info_ptr->int_x_white = white_x;
+   info_ptr->int_y_white = white_y;
+   info_ptr->int_x_red   = red_x;
+   info_ptr->int_y_red   = red_y;
+   info_ptr->int_x_green = green_x;
+   info_ptr->int_y_green = green_y;
+   info_ptr->int_x_blue  = blue_x;
+   info_ptr->int_y_blue  = blue_y;
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   info_ptr->x_white = (float)(white_x/100000.);
+   info_ptr->y_white = (float)(white_y/100000.);
+   info_ptr->x_red   = (float)(  red_x/100000.);
+   info_ptr->y_red   = (float)(  red_y/100000.);
+   info_ptr->x_green = (float)(green_x/100000.);
+   info_ptr->y_green = (float)(green_y/100000.);
+   info_ptr->x_blue  = (float)( blue_x/100000.);
+   info_ptr->y_blue  = (float)( blue_y/100000.);
+#endif
+   info_ptr->valid |= PNG_INFO_cHRM;
+}
+#endif
+#endif
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_gAMA(png_structp png_ptr, png_infop info_ptr, double file_gamma)
+{
+   double gamma;
+   png_debug1(1, "in %s storage function\n", "gAMA");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* Check for overflow */
+   if (file_gamma > 21474.83)
+   {
+      png_warning(png_ptr, "Limiting gamma to 21474.83");
+      gamma=21474.83;
+   }
+   else
+      gamma=file_gamma;
+   info_ptr->gamma = (float)gamma;
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_gamma = (int)(gamma*100000.+.5);
+#endif
+   info_ptr->valid |= PNG_INFO_gAMA;
+   if(gamma == 0.0)
+      png_warning(png_ptr, "Setting gamma=0");
+}
+#endif
+void PNGAPI
+png_set_gAMA_fixed(png_structp png_ptr, png_infop info_ptr, png_fixed_point
+   int_gamma)
+{
+   png_fixed_point gamma;
+
+   png_debug1(1, "in %s storage function\n", "gAMA");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (int_gamma > (png_fixed_point) PNG_UINT_31_MAX)
+   {
+     png_warning(png_ptr, "Limiting gamma to 21474.83");
+     gamma=PNG_UINT_31_MAX;
+   }
+   else
+   {
+     if (int_gamma < 0)
+     {
+       png_warning(png_ptr, "Setting negative gamma to zero");
+       gamma=0;
+     }
+     else
+       gamma=int_gamma;
+   }
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   info_ptr->gamma = (float)(gamma/100000.);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   info_ptr->int_gamma = gamma;
+#endif
+   info_ptr->valid |= PNG_INFO_gAMA;
+   if(gamma == 0)
+      png_warning(png_ptr, "Setting gamma=0");
+}
+#endif
+
+#if defined(PNG_hIST_SUPPORTED)
+void PNGAPI
+png_set_hIST(png_structp png_ptr, png_infop info_ptr, png_uint_16p hist)
+{
+   int i;
+
+   png_debug1(1, "in %s storage function\n", "hIST");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if (info_ptr->num_palette == 0 || info_ptr->num_palette
+       > PNG_MAX_PALETTE_LENGTH)
+   {
+       png_warning(png_ptr,
+          "Invalid palette size, hIST allocation skipped.");
+       return;
+   }
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_free_data(png_ptr, info_ptr, PNG_FREE_HIST, 0);
+#endif
+   /* Changed from info->num_palette to PNG_MAX_PALETTE_LENGTH in version
+      1.2.1 */
+   png_ptr->hist = (png_uint_16p)png_malloc_warn(png_ptr,
+      (png_uint_32)(PNG_MAX_PALETTE_LENGTH * png_sizeof (png_uint_16)));
+   if (png_ptr->hist == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for hIST chunk data.");
+       return;
+     }
+
+   for (i = 0; i < info_ptr->num_palette; i++)
+       png_ptr->hist[i] = hist[i];
+   info_ptr->hist = png_ptr->hist;
+   info_ptr->valid |= PNG_INFO_hIST;
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_HIST;
+#else
+   png_ptr->flags |= PNG_FLAG_FREE_HIST;
+#endif
+}
+#endif
+
+void PNGAPI
+png_set_IHDR(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 width, png_uint_32 height, int bit_depth,
+   int color_type, int interlace_type, int compression_type,
+   int filter_type)
+{
+   png_debug1(1, "in %s storage function\n", "IHDR");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   /* check for width and height valid values */
+   if (width == 0 || height == 0)
+      png_error(png_ptr, "Image width or height is zero in IHDR");
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   if (width > png_ptr->user_width_max || height > png_ptr->user_height_max)
+      png_error(png_ptr, "image size exceeds user limits in IHDR");
+#else
+   if (width > PNG_USER_WIDTH_MAX || height > PNG_USER_HEIGHT_MAX)
+      png_error(png_ptr, "image size exceeds user limits in IHDR");
+#endif
+   if (width > PNG_UINT_31_MAX || height > PNG_UINT_31_MAX)
+      png_error(png_ptr, "Invalid image size in IHDR");
+   if ( width > (PNG_UINT_32_MAX
+                 >> 3)      /* 8-byte RGBA pixels */
+                 - 64       /* bigrowbuf hack */
+                 - 1        /* filter byte */
+                 - 7*8      /* rounding of width to multiple of 8 pixels */
+                 - 8)       /* extra max_pixel_depth pad */
+      png_warning(png_ptr, "Width is too large for libpng to process pixels");
+
+   /* check other values */
+   if (bit_depth != 1 && bit_depth != 2 && bit_depth != 4 &&
+      bit_depth != 8 && bit_depth != 16)
+      png_error(png_ptr, "Invalid bit depth in IHDR");
+
+   if (color_type < 0 || color_type == 1 ||
+      color_type == 5 || color_type > 6)
+      png_error(png_ptr, "Invalid color type in IHDR");
+
+   if (((color_type == PNG_COLOR_TYPE_PALETTE) && bit_depth > 8) ||
+       ((color_type == PNG_COLOR_TYPE_RGB ||
+         color_type == PNG_COLOR_TYPE_GRAY_ALPHA ||
+         color_type == PNG_COLOR_TYPE_RGB_ALPHA) && bit_depth < 8))
+      png_error(png_ptr, "Invalid color type/bit depth combination in IHDR");
+
+   if (interlace_type >= PNG_INTERLACE_LAST)
+      png_error(png_ptr, "Unknown interlace method in IHDR");
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+      png_error(png_ptr, "Unknown compression method in IHDR");
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   /* Accept filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not read a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE)&&png_ptr->mng_features_permitted)
+      png_warning(png_ptr,"MNG features are not allowed in a PNG datastream");
+   if(filter_type != PNG_FILTER_TYPE_BASE)
+   {
+     if(!((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+        (filter_type == PNG_INTRAPIXEL_DIFFERENCING) &&
+        ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) == 0) &&
+        (color_type == PNG_COLOR_TYPE_RGB ||
+         color_type == PNG_COLOR_TYPE_RGB_ALPHA)))
+        png_error(png_ptr, "Unknown filter method in IHDR");
+     if(png_ptr->mode&PNG_HAVE_PNG_SIGNATURE)
+        png_warning(png_ptr, "Invalid filter method in IHDR");
+   }
+#else
+   if(filter_type != PNG_FILTER_TYPE_BASE)
+      png_error(png_ptr, "Unknown filter method in IHDR");
+#endif
+
+   info_ptr->width = width;
+   info_ptr->height = height;
+   info_ptr->bit_depth = (png_byte)bit_depth;
+   info_ptr->color_type =(png_byte) color_type;
+   info_ptr->compression_type = (png_byte)compression_type;
+   info_ptr->filter_type = (png_byte)filter_type;
+   info_ptr->interlace_type = (png_byte)interlace_type;
+   if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      info_ptr->channels = 1;
+   else if (info_ptr->color_type & PNG_COLOR_MASK_COLOR)
+      info_ptr->channels = 3;
+   else
+      info_ptr->channels = 1;
+   if (info_ptr->color_type & PNG_COLOR_MASK_ALPHA)
+      info_ptr->channels++;
+   info_ptr->pixel_depth = (png_byte)(info_ptr->channels * info_ptr->bit_depth);
+
+   /* check for potential overflow */
+   if (width > (PNG_UINT_32_MAX
+                 >> 3)      /* 8-byte RGBA pixels */
+                 - 64       /* bigrowbuf hack */
+                 - 1        /* filter byte */
+                 - 7*8      /* rounding of width to multiple of 8 pixels */
+                 - 8)       /* extra max_pixel_depth pad */
+      info_ptr->rowbytes = (png_size_t)0;
+   else
+      info_ptr->rowbytes = PNG_ROWBYTES(info_ptr->pixel_depth,width);
+}
+
+#if defined(PNG_oFFs_SUPPORTED)
+void PNGAPI
+png_set_oFFs(png_structp png_ptr, png_infop info_ptr,
+   png_int_32 offset_x, png_int_32 offset_y, int unit_type)
+{
+   png_debug1(1, "in %s storage function\n", "oFFs");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->x_offset = offset_x;
+   info_ptr->y_offset = offset_y;
+   info_ptr->offset_unit_type = (png_byte)unit_type;
+   info_ptr->valid |= PNG_INFO_oFFs;
+}
+#endif
+
+#if defined(PNG_pCAL_SUPPORTED)
+void PNGAPI
+png_set_pCAL(png_structp png_ptr, png_infop info_ptr,
+   png_charp purpose, png_int_32 X0, png_int_32 X1, int type, int nparams,
+   png_charp units, png_charpp params)
+{
+   png_uint_32 length;
+   int i;
+
+   png_debug1(1, "in %s storage function\n", "pCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   length = png_strlen(purpose) + 1;
+   png_debug1(3, "allocating purpose for info (%lu bytes)\n", length);
+   info_ptr->pcal_purpose = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->pcal_purpose == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for pCAL purpose.");
+       return;
+     }
+   png_memcpy(info_ptr->pcal_purpose, purpose, (png_size_t)length);
+
+   png_debug(3, "storing X0, X1, type, and nparams in info\n");
+   info_ptr->pcal_X0 = X0;
+   info_ptr->pcal_X1 = X1;
+   info_ptr->pcal_type = (png_byte)type;
+   info_ptr->pcal_nparams = (png_byte)nparams;
+
+   length = png_strlen(units) + 1;
+   png_debug1(3, "allocating units for info (%lu bytes)\n", length);
+   info_ptr->pcal_units = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->pcal_units == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for pCAL units.");
+       return;
+     }
+   png_memcpy(info_ptr->pcal_units, units, (png_size_t)length);
+
+   info_ptr->pcal_params = (png_charpp)png_malloc_warn(png_ptr,
+      (png_uint_32)((nparams + 1) * png_sizeof(png_charp)));
+   if (info_ptr->pcal_params == NULL)
+     {
+       png_warning(png_ptr, "Insufficient memory for pCAL params.");
+       return;
+     }
+
+   info_ptr->pcal_params[nparams] = NULL;
+
+   for (i = 0; i < nparams; i++)
+   {
+      length = png_strlen(params[i]) + 1;
+      png_debug2(3, "allocating parameter %d for info (%lu bytes)\n", i, length);
+      info_ptr->pcal_params[i] = (png_charp)png_malloc_warn(png_ptr, length);
+      if (info_ptr->pcal_params[i] == NULL)
+        {
+          png_warning(png_ptr, "Insufficient memory for pCAL parameter.");
+          return;
+        }
+      png_memcpy(info_ptr->pcal_params[i], params[i], (png_size_t)length);
+   }
+
+   info_ptr->valid |= PNG_INFO_pCAL;
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_PCAL;
+#endif
+}
+#endif
+
+#if defined(PNG_READ_sCAL_SUPPORTED) || defined(PNG_WRITE_sCAL_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void PNGAPI
+png_set_sCAL(png_structp png_ptr, png_infop info_ptr,
+             int unit, double width, double height)
+{
+   png_debug1(1, "in %s storage function\n", "sCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->scal_unit = (png_byte)unit;
+   info_ptr->scal_pixel_width = width;
+   info_ptr->scal_pixel_height = height;
+
+   info_ptr->valid |= PNG_INFO_sCAL;
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void PNGAPI
+png_set_sCAL_s(png_structp png_ptr, png_infop info_ptr,
+             int unit, png_charp swidth, png_charp sheight)
+{
+   png_uint_32 length;
+
+   png_debug1(1, "in %s storage function\n", "sCAL");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->scal_unit = (png_byte)unit;
+
+   length = png_strlen(swidth) + 1;
+   png_debug1(3, "allocating unit for info (%d bytes)\n", length);
+   info_ptr->scal_s_width = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->scal_s_width == NULL)
+   {
+      png_warning(png_ptr,
+       "Memory allocation failed while processing sCAL.");
+   }
+   png_memcpy(info_ptr->scal_s_width, swidth, (png_size_t)length);
+
+   length = png_strlen(sheight) + 1;
+   png_debug1(3, "allocating unit for info (%d bytes)\n", length);
+   info_ptr->scal_s_height = (png_charp)png_malloc_warn(png_ptr, length);
+   if (info_ptr->scal_s_height == NULL)
+   {
+      png_free (png_ptr, info_ptr->scal_s_width);
+      png_warning(png_ptr,
+       "Memory allocation failed while processing sCAL.");
+   }
+   png_memcpy(info_ptr->scal_s_height, sheight, (png_size_t)length);
+
+   info_ptr->valid |= PNG_INFO_sCAL;
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_SCAL;
+#endif
+}
+#endif
+#endif
+#endif
+
+#if defined(PNG_pHYs_SUPPORTED)
+void PNGAPI
+png_set_pHYs(png_structp png_ptr, png_infop info_ptr,
+   png_uint_32 res_x, png_uint_32 res_y, int unit_type)
+{
+   png_debug1(1, "in %s storage function\n", "pHYs");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->x_pixels_per_unit = res_x;
+   info_ptr->y_pixels_per_unit = res_y;
+   info_ptr->phys_unit_type = (png_byte)unit_type;
+   info_ptr->valid |= PNG_INFO_pHYs;
+}
+#endif
+
+void PNGAPI
+png_set_PLTE(png_structp png_ptr, png_infop info_ptr,
+   png_colorp palette, int num_palette)
+{
+
+   png_debug1(1, "in %s storage function\n", "PLTE");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (num_palette < 0 || num_palette > PNG_MAX_PALETTE_LENGTH)
+     {
+       if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         png_error(png_ptr, "Invalid palette length");
+       else
+       {
+         png_warning(png_ptr, "Invalid palette length");
+         return;
+       }
+     }
+
+   /*
+    * It may not actually be necessary to set png_ptr->palette here;
+    * we do it for backward compatibility with the way the png_handle_tRNS
+    * function used to do the allocation.
+    */
+#ifdef PNG_FREE_ME_SUPPORTED
+   png_free_data(png_ptr, info_ptr, PNG_FREE_PLTE, 0);
+#endif
+
+   /* Changed in libpng-1.2.1 to allocate PNG_MAX_PALETTE_LENGTH instead
+      of num_palette entries,
+      in case of an invalid PNG file that has too-large sample values. */
+   png_ptr->palette = (png_colorp)png_malloc(png_ptr,
+      PNG_MAX_PALETTE_LENGTH * png_sizeof(png_color));
+   png_memset(png_ptr->palette, 0, PNG_MAX_PALETTE_LENGTH *
+      png_sizeof(png_color));
+   png_memcpy(png_ptr->palette, palette, num_palette * png_sizeof (png_color));
+   info_ptr->palette = png_ptr->palette;
+   info_ptr->num_palette = png_ptr->num_palette = (png_uint_16)num_palette;
+
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_PLTE;
+#else
+   png_ptr->flags |= PNG_FLAG_FREE_PLTE;
+#endif
+
+   info_ptr->valid |= PNG_INFO_PLTE;
+}
+
+#if defined(PNG_sBIT_SUPPORTED)
+void PNGAPI
+png_set_sBIT(png_structp png_ptr, png_infop info_ptr,
+   png_color_8p sig_bit)
+{
+   png_debug1(1, "in %s storage function\n", "sBIT");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_memcpy(&(info_ptr->sig_bit), sig_bit, png_sizeof (png_color_8));
+   info_ptr->valid |= PNG_INFO_sBIT;
+}
+#endif
+
+#if defined(PNG_sRGB_SUPPORTED)
+void PNGAPI
+png_set_sRGB(png_structp png_ptr, png_infop info_ptr, int intent)
+{
+   png_debug1(1, "in %s storage function\n", "sRGB");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   info_ptr->srgb_intent = (png_byte)intent;
+   info_ptr->valid |= PNG_INFO_sRGB;
+}
+
+void PNGAPI
+png_set_sRGB_gAMA_and_cHRM(png_structp png_ptr, png_infop info_ptr,
+   int intent)
+{
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float file_gamma;
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_file_gamma;
+#endif
+#endif
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   float white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y;
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   png_fixed_point int_white_x, int_white_y, int_red_x, int_red_y, int_green_x,
+      int_green_y, int_blue_x, int_blue_y;
+#endif
+#endif
+   png_debug1(1, "in %s storage function\n", "sRGB_gAMA_and_cHRM");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_set_sRGB(png_ptr, info_ptr, intent);
+
+#if defined(PNG_gAMA_SUPPORTED)
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   file_gamma = (float).45455;
+   png_set_gAMA(png_ptr, info_ptr, file_gamma);
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   int_file_gamma = 45455L;
+   png_set_gAMA_fixed(png_ptr, info_ptr, int_file_gamma);
+#endif
+#endif
+
+#if defined(PNG_cHRM_SUPPORTED)
+#ifdef PNG_FIXED_POINT_SUPPORTED
+   int_white_x = 31270L;
+   int_white_y = 32900L;
+   int_red_x   = 64000L;
+   int_red_y   = 33000L;
+   int_green_x = 30000L;
+   int_green_y = 60000L;
+   int_blue_x  = 15000L;
+   int_blue_y  =  6000L;
+
+   png_set_cHRM_fixed(png_ptr, info_ptr,
+      int_white_x, int_white_y, int_red_x, int_red_y, int_green_x, int_green_y,
+      int_blue_x, int_blue_y);
+#endif
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+   white_x = (float).3127;
+   white_y = (float).3290;
+   red_x   = (float).64;
+   red_y   = (float).33;
+   green_x = (float).30;
+   green_y = (float).60;
+   blue_x  = (float).15;
+   blue_y  = (float).06;
+
+   png_set_cHRM(png_ptr, info_ptr,
+      white_x, white_y, red_x, red_y, green_x, green_y, blue_x, blue_y);
+#endif
+#endif
+}
+#endif
+
+
+#if defined(PNG_iCCP_SUPPORTED)
+void PNGAPI
+png_set_iCCP(png_structp png_ptr, png_infop info_ptr,
+             png_charp name, int compression_type,
+             png_charp profile, png_uint_32 proflen)
+{
+   png_charp new_iccp_name;
+   png_charp new_iccp_profile;
+
+   png_debug1(1, "in %s storage function\n", "iCCP");
+   if (png_ptr == NULL || info_ptr == NULL || name == NULL || profile == NULL)
+      return;
+
+   new_iccp_name = (png_charp)png_malloc_warn(png_ptr, png_strlen(name)+1);
+   if (new_iccp_name == NULL)
+   {
+      png_warning(png_ptr, "Insufficient memory to process iCCP chunk.");
+      return;
+   }
+   png_strncpy(new_iccp_name, name, png_strlen(name));
+   new_iccp_name[png_strlen(name)] = '\0';
+   new_iccp_profile = (png_charp)png_malloc_warn(png_ptr, proflen);
+   if (new_iccp_profile == NULL)
+   {
+      png_free (png_ptr, new_iccp_name);
+      png_warning(png_ptr, "Insufficient memory to process iCCP profile.");
+      return;
+   }
+   png_memcpy(new_iccp_profile, profile, (png_size_t)proflen);
+
+   png_free_data(png_ptr, info_ptr, PNG_FREE_ICCP, 0);
+
+   info_ptr->iccp_proflen = proflen;
+   info_ptr->iccp_name = new_iccp_name;
+   info_ptr->iccp_profile = new_iccp_profile;
+   /* Compression is always zero but is here so the API and info structure
+    * does not have to change if we introduce multiple compression types */
+   info_ptr->iccp_compression = (png_byte)compression_type;
+#ifdef PNG_FREE_ME_SUPPORTED
+   info_ptr->free_me |= PNG_FREE_ICCP;
+#endif
+   info_ptr->valid |= PNG_INFO_iCCP;
+}
+#endif
+
+#if defined(PNG_TEXT_SUPPORTED)
+void PNGAPI
+png_set_text(png_structp png_ptr, png_infop info_ptr, png_textp text_ptr,
+   int num_text)
+{
+   int ret;
+   ret=png_set_text_2(png_ptr, info_ptr, text_ptr, num_text);
+   if (ret)
+     png_error(png_ptr, "Insufficient memory to store text");
+}
+
+int /* PRIVATE */
+png_set_text_2(png_structp png_ptr, png_infop info_ptr, png_textp text_ptr,
+   int num_text)
+{
+   int i;
+
+   png_debug1(1, "in %s storage function\n", (png_ptr->chunk_name[0] == '\0' ?
+      "text" : (png_const_charp)png_ptr->chunk_name));
+
+   if (png_ptr == NULL || info_ptr == NULL || num_text == 0)
+      return(0);
+
+   /* Make sure we have enough space in the "text" array in info_struct
+    * to hold all of the incoming text_ptr objects.
+    */
+   if (info_ptr->num_text + num_text > info_ptr->max_text)
+   {
+      if (info_ptr->text != NULL)
+      {
+         png_textp old_text;
+         int old_max;
+
+         old_max = info_ptr->max_text;
+         info_ptr->max_text = info_ptr->num_text + num_text + 8;
+         old_text = info_ptr->text;
+         info_ptr->text = (png_textp)png_malloc_warn(png_ptr,
+            (png_uint_32)(info_ptr->max_text * png_sizeof (png_text)));
+         if (info_ptr->text == NULL)
+           {
+             png_free(png_ptr, old_text);
+             return(1);
+           }
+         png_memcpy(info_ptr->text, old_text, (png_size_t)(old_max *
+            png_sizeof(png_text)));
+         png_free(png_ptr, old_text);
+      }
+      else
+      {
+         info_ptr->max_text = num_text + 8;
+         info_ptr->num_text = 0;
+         info_ptr->text = (png_textp)png_malloc_warn(png_ptr,
+            (png_uint_32)(info_ptr->max_text * png_sizeof (png_text)));
+         if (info_ptr->text == NULL)
+           return(1);
+#ifdef PNG_FREE_ME_SUPPORTED
+         info_ptr->free_me |= PNG_FREE_TEXT;
+#endif
+      }
+      png_debug1(3, "allocated %d entries for info_ptr->text\n",
+         info_ptr->max_text);
+   }
+   for (i = 0; i < num_text; i++)
+   {
+      png_size_t text_length,key_len;
+      png_size_t lang_len,lang_key_len;
+      png_textp textp = &(info_ptr->text[info_ptr->num_text]);
+
+      if (text_ptr[i].key == NULL)
+          continue;
+
+      key_len = png_strlen(text_ptr[i].key);
+
+      if(text_ptr[i].compression <= 0)
+      {
+        lang_len = 0;
+        lang_key_len = 0;
+      }
+      else
+#ifdef PNG_iTXt_SUPPORTED
+      {
+        /* set iTXt data */
+        if (text_ptr[i].lang != NULL)
+          lang_len = png_strlen(text_ptr[i].lang);
+        else
+          lang_len = 0;
+        if (text_ptr[i].lang_key != NULL)
+          lang_key_len = png_strlen(text_ptr[i].lang_key);
+        else
+          lang_key_len = 0;
+      }
+#else
+      {
+        png_warning(png_ptr, "iTXt chunk not supported.");
+        continue;
+      }
+#endif
+
+      if (text_ptr[i].text == NULL || text_ptr[i].text[0] == '\0')
+      {
+         text_length = 0;
+#ifdef PNG_iTXt_SUPPORTED
+         if(text_ptr[i].compression > 0)
+            textp->compression = PNG_ITXT_COMPRESSION_NONE;
+         else
+#endif
+            textp->compression = PNG_TEXT_COMPRESSION_NONE;
+      }
+      else
+      {
+         text_length = png_strlen(text_ptr[i].text);
+         textp->compression = text_ptr[i].compression;
+      }
+
+      textp->key = (png_charp)png_malloc_warn(png_ptr,
+         (png_uint_32)(key_len + text_length + lang_len + lang_key_len + 4));
+      if (textp->key == NULL)
+        return(1);
+      png_debug2(2, "Allocated %lu bytes at %x in png_set_text\n",
+         (png_uint_32)(key_len + lang_len + lang_key_len + text_length + 4),
+         (int)textp->key);
+
+      png_memcpy(textp->key, text_ptr[i].key,
+         (png_size_t)(key_len));
+      *(textp->key+key_len) = '\0';
+#ifdef PNG_iTXt_SUPPORTED
+      if (text_ptr[i].compression > 0)
+      {
+         textp->lang=textp->key + key_len + 1;
+         png_memcpy(textp->lang, text_ptr[i].lang, lang_len);
+         *(textp->lang+lang_len) = '\0';
+         textp->lang_key=textp->lang + lang_len + 1;
+         png_memcpy(textp->lang_key, text_ptr[i].lang_key, lang_key_len);
+         *(textp->lang_key+lang_key_len) = '\0';
+         textp->text=textp->lang_key + lang_key_len + 1;
+      }
+      else
+#endif
+      {
+#ifdef PNG_iTXt_SUPPORTED
+         textp->lang=NULL;
+         textp->lang_key=NULL;
+#endif
+         textp->text=textp->key + key_len + 1;
+      }
+      if(text_length)
+         png_memcpy(textp->text, text_ptr[i].text,
+            (png_size_t)(text_length));
+      *(textp->text+text_length) = '\0';
+
+#ifdef PNG_iTXt_SUPPORTED
+      if(textp->compression > 0)
+      {
+         textp->text_length = 0;
+         textp->itxt_length = text_length;
+      }
+      else
+#endif
+      {
+         textp->text_length = text_length;
+#ifdef PNG_iTXt_SUPPORTED
+         textp->itxt_length = 0;
+#endif
+      }
+      info_ptr->num_text++;
+      png_debug1(3, "transferred text chunk %d\n", info_ptr->num_text);
+   }
+   return(0);
+}
+#endif
+
+#if defined(PNG_tIME_SUPPORTED)
+void PNGAPI
+png_set_tIME(png_structp png_ptr, png_infop info_ptr, png_timep mod_time)
+{
+   png_debug1(1, "in %s storage function\n", "tIME");
+   if (png_ptr == NULL || info_ptr == NULL ||
+       (png_ptr->mode & PNG_WROTE_tIME))
+      return;
+
+   png_memcpy(&(info_ptr->mod_time), mod_time, png_sizeof (png_time));
+   info_ptr->valid |= PNG_INFO_tIME;
+}
+#endif
+
+#if defined(PNG_tRNS_SUPPORTED)
+void PNGAPI
+png_set_tRNS(png_structp png_ptr, png_infop info_ptr,
+   png_bytep trans, int num_trans, png_color_16p trans_values)
+{
+   png_debug1(1, "in %s storage function\n", "tRNS");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if (trans != NULL)
+   {
+       /*
+        * It may not actually be necessary to set png_ptr->trans here;
+        * we do it for backward compatibility with the way the png_handle_tRNS
+        * function used to do the allocation.
+        */
+#ifdef PNG_FREE_ME_SUPPORTED
+       png_free_data(png_ptr, info_ptr, PNG_FREE_TRNS, 0);
+#endif
+       /* Changed from num_trans to PNG_MAX_PALETTE_LENGTH in version 1.2.1 */
+       png_ptr->trans = info_ptr->trans = (png_bytep)png_malloc(png_ptr,
+           (png_uint_32)PNG_MAX_PALETTE_LENGTH);
+       if (num_trans <= PNG_MAX_PALETTE_LENGTH)
+         png_memcpy(info_ptr->trans, trans, (png_size_t)num_trans);
+#ifdef PNG_FREE_ME_SUPPORTED
+       info_ptr->free_me |= PNG_FREE_TRNS;
+#else
+       png_ptr->flags |= PNG_FLAG_FREE_TRNS;
+#endif
+   }
+
+   if (trans_values != NULL)
+   {
+      png_memcpy(&(info_ptr->trans_values), trans_values,
+         png_sizeof(png_color_16));
+      if (num_trans == 0)
+        num_trans = 1;
+   }
+   info_ptr->num_trans = (png_uint_16)num_trans;
+   info_ptr->valid |= PNG_INFO_tRNS;
+}
+#endif
+
+#if defined(PNG_sPLT_SUPPORTED)
+void PNGAPI
+png_set_sPLT(png_structp png_ptr,
+             png_infop info_ptr, png_sPLT_tp entries, int nentries)
+{
+    png_sPLT_tp np;
+    int i;
+
+    if (png_ptr == NULL || info_ptr == NULL)
+       return;
+
+    np = (png_sPLT_tp)png_malloc_warn(png_ptr,
+        (info_ptr->splt_palettes_num + nentries) * png_sizeof(png_sPLT_t));
+    if (np == NULL)
+    {
+      png_warning(png_ptr, "No memory for sPLT palettes.");
+      return;
+    }
+
+    png_memcpy(np, info_ptr->splt_palettes,
+           info_ptr->splt_palettes_num * png_sizeof(png_sPLT_t));
+    png_free(png_ptr, info_ptr->splt_palettes);
+    info_ptr->splt_palettes=NULL;
+
+    for (i = 0; i < nentries; i++)
+    {
+        png_sPLT_tp to = np + info_ptr->splt_palettes_num + i;
+        png_sPLT_tp from = entries + i;
+
+        to->name = (png_charp)png_malloc_warn(png_ptr,
+          png_strlen(from->name) + 1);
+        if (to->name == NULL)
+        {
+           png_warning(png_ptr,
+             "Out of memory while processing sPLT chunk");
+        }
+        /* TODO: use png_malloc_warn */
+        png_strncpy(to->name, from->name, png_strlen(from->name));
+        to->name[png_strlen(from->name)] = '\0';
+        to->entries = (png_sPLT_entryp)png_malloc_warn(png_ptr,
+            from->nentries * png_sizeof(png_sPLT_entry));
+        /* TODO: use png_malloc_warn */
+        png_memcpy(to->entries, from->entries,
+            from->nentries * png_sizeof(png_sPLT_entry));
+        if (to->entries == NULL)
+        {
+           png_warning(png_ptr,
+             "Out of memory while processing sPLT chunk");
+           png_free(png_ptr,to->name);
+           to->name = NULL;
+        }
+        to->nentries = from->nentries;
+        to->depth = from->depth;
+    }
+
+    info_ptr->splt_palettes = np;
+    info_ptr->splt_palettes_num += nentries;
+    info_ptr->valid |= PNG_INFO_sPLT;
+#ifdef PNG_FREE_ME_SUPPORTED
+    info_ptr->free_me |= PNG_FREE_SPLT;
+#endif
+}
+#endif /* PNG_sPLT_SUPPORTED */
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+void PNGAPI
+png_set_unknown_chunks(png_structp png_ptr,
+   png_infop info_ptr, png_unknown_chunkp unknowns, int num_unknowns)
+{
+    png_unknown_chunkp np;
+    int i;
+
+    if (png_ptr == NULL || info_ptr == NULL || num_unknowns == 0)
+        return;
+
+    np = (png_unknown_chunkp)png_malloc_warn(png_ptr,
+        (info_ptr->unknown_chunks_num + num_unknowns) *
+        png_sizeof(png_unknown_chunk));
+    if (np == NULL)
+    {
+       png_warning(png_ptr,
+          "Out of memory while processing unknown chunk.");
+       return;
+    }
+
+    png_memcpy(np, info_ptr->unknown_chunks,
+           info_ptr->unknown_chunks_num * png_sizeof(png_unknown_chunk));
+    png_free(png_ptr, info_ptr->unknown_chunks);
+    info_ptr->unknown_chunks=NULL;
+
+    for (i = 0; i < num_unknowns; i++)
+    {
+        png_unknown_chunkp to = np + info_ptr->unknown_chunks_num + i;
+        png_unknown_chunkp from = unknowns + i;
+
+        png_strncpy((png_charp)to->name, (png_charp)from->name, 4);
+        to->name[4] = '\0';
+        to->data = (png_bytep)png_malloc_warn(png_ptr, from->size);
+        if (to->data == NULL)
+        {
+           png_warning(png_ptr,
+              "Out of memory while processing unknown chunk.");
+        }
+        else
+        {
+           png_memcpy(to->data, from->data, from->size);
+           to->size = from->size;
+
+           /* note our location in the read or write sequence */
+           to->location = (png_byte)(png_ptr->mode & 0xff);
+        }
+    }
+
+    info_ptr->unknown_chunks = np;
+    info_ptr->unknown_chunks_num += num_unknowns;
+#ifdef PNG_FREE_ME_SUPPORTED
+    info_ptr->free_me |= PNG_FREE_UNKN;
+#endif
+}
+void PNGAPI
+png_set_unknown_chunk_location(png_structp png_ptr, png_infop info_ptr,
+   int chunk, int location)
+{
+   if(png_ptr != NULL && info_ptr != NULL && chunk >= 0 && chunk <
+         (int)info_ptr->unknown_chunks_num)
+      info_ptr->unknown_chunks[chunk].location = (png_byte)location;
+}
+#endif
+
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+#if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \
+    defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED)
+void PNGAPI
+png_permit_empty_plte (png_structp png_ptr, int empty_plte_permitted)
+{
+   /* This function is deprecated in favor of png_permit_mng_features()
+      and will be removed from libpng-1.3.0 */
+   png_debug(1, "in png_permit_empty_plte, DEPRECATED.\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->mng_features_permitted = (png_byte)
+     ((png_ptr->mng_features_permitted & (~PNG_FLAG_MNG_EMPTY_PLTE)) |
+     ((empty_plte_permitted & PNG_FLAG_MNG_EMPTY_PLTE)));
+}
+#endif
+#endif
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+png_uint_32 PNGAPI
+png_permit_mng_features (png_structp png_ptr, png_uint_32 mng_features)
+{
+   png_debug(1, "in png_permit_mng_features\n");
+   if (png_ptr == NULL)
+      return (png_uint_32)0;
+   png_ptr->mng_features_permitted =
+     (png_byte)(mng_features & PNG_ALL_MNG_FEATURES);
+   return (png_uint_32)png_ptr->mng_features_permitted;
+}
+#endif
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+void PNGAPI
+png_set_keep_unknown_chunks(png_structp png_ptr, int keep, png_bytep
+   chunk_list, int num_chunks)
+{
+    png_bytep new_list, p;
+    int i, old_num_chunks;
+    if (png_ptr == NULL)
+       return;
+    if (num_chunks == 0)
+    {
+      if(keep == PNG_HANDLE_CHUNK_ALWAYS || keep == PNG_HANDLE_CHUNK_IF_SAFE)
+        png_ptr->flags |= PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
+      else
+        png_ptr->flags &= ~PNG_FLAG_KEEP_UNKNOWN_CHUNKS;
+
+      if(keep == PNG_HANDLE_CHUNK_ALWAYS)
+        png_ptr->flags |= PNG_FLAG_KEEP_UNSAFE_CHUNKS;
+      else
+        png_ptr->flags &= ~PNG_FLAG_KEEP_UNSAFE_CHUNKS;
+      return;
+    }
+    if (chunk_list == NULL)
+      return;
+    old_num_chunks=png_ptr->num_chunk_list;
+    new_list=(png_bytep)png_malloc(png_ptr,
+       (png_uint_32)(5*(num_chunks+old_num_chunks)));
+    if(png_ptr->chunk_list != NULL)
+    {
+       png_memcpy(new_list, png_ptr->chunk_list,
+          (png_size_t)(5*old_num_chunks));
+       png_free(png_ptr, png_ptr->chunk_list);
+       png_ptr->chunk_list=NULL;
+    }
+    png_memcpy(new_list+5*old_num_chunks, chunk_list,
+       (png_size_t)(5*num_chunks));
+    for (p=new_list+5*old_num_chunks+4, i=0; i<num_chunks; i++, p+=5)
+       *p=(png_byte)keep;
+    png_ptr->num_chunk_list=old_num_chunks+num_chunks;
+    png_ptr->chunk_list=new_list;
+#ifdef PNG_FREE_ME_SUPPORTED
+    png_ptr->free_me |= PNG_FREE_LIST;
+#endif
+}
+#endif
+
+#if defined(PNG_READ_USER_CHUNKS_SUPPORTED)
+void PNGAPI
+png_set_read_user_chunk_fn(png_structp png_ptr, png_voidp user_chunk_ptr,
+   png_user_chunk_ptr read_user_chunk_fn)
+{
+   png_debug(1, "in png_set_read_user_chunk_fn\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->read_user_chunk_fn = read_user_chunk_fn;
+   png_ptr->user_chunk_ptr = user_chunk_ptr;
+}
+#endif
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+void PNGAPI
+png_set_rows(png_structp png_ptr, png_infop info_ptr, png_bytepp row_pointers)
+{
+   png_debug1(1, "in %s storage function\n", "rows");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   if(info_ptr->row_pointers && (info_ptr->row_pointers != row_pointers))
+      png_free_data(png_ptr, info_ptr, PNG_FREE_ROWS, 0);
+   info_ptr->row_pointers = row_pointers;
+   if(row_pointers)
+      info_ptr->valid |= PNG_INFO_IDAT;
+}
+#endif
+
+#ifdef PNG_WRITE_SUPPORTED
+void PNGAPI
+png_set_compression_buffer_size(png_structp png_ptr, png_uint_32 size)
+{
+    if (png_ptr == NULL)
+       return;
+    if(png_ptr->zbuf)
+       png_free(png_ptr, png_ptr->zbuf);
+    png_ptr->zbuf_size = (png_size_t)size;
+    png_ptr->zbuf = (png_bytep)png_malloc(png_ptr, size);
+    png_ptr->zstream.next_out = png_ptr->zbuf;
+    png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+}
+#endif
+
+void PNGAPI
+png_set_invalid(png_structp png_ptr, png_infop info_ptr, int mask)
+{
+   if (png_ptr && info_ptr)
+      info_ptr->valid &= ~mask;
+}
+
+
+#ifndef PNG_1_0_X
+#ifdef PNG_ASSEMBLER_CODE_SUPPORTED
+/* function was added to libpng 1.2.0 and should always exist by default */
+void PNGAPI
+png_set_asm_flags (png_structp png_ptr, png_uint_32 asm_flags)
+{
+/* Obsolete as of libpng-1.2.20 and will be removed from libpng-1.4.0 */
+    if (png_ptr != NULL)
+    png_ptr->asm_flags = 0;
+}
+
+/* this function was added to libpng 1.2.0 */
+void PNGAPI
+png_set_mmx_thresholds (png_structp png_ptr,
+                        png_byte mmx_bitdepth_threshold,
+                        png_uint_32 mmx_rowbytes_threshold)
+{
+/* Obsolete as of libpng-1.2.20 and will be removed from libpng-1.4.0 */
+    if (png_ptr == NULL)
+       return;
+}
+#endif /* ?PNG_ASSEMBLER_CODE_SUPPORTED */
+
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+/* this function was added to libpng 1.2.6 */
+void PNGAPI
+png_set_user_limits (png_structp png_ptr, png_uint_32 user_width_max,
+    png_uint_32 user_height_max)
+{
+    /* Images with dimensions larger than these limits will be
+     * rejected by png_set_IHDR().  To accept any PNG datastream
+     * regardless of dimensions, set both limits to 0x7ffffffL.
+     */
+    if(png_ptr == NULL) return;
+    png_ptr->user_width_max = user_width_max;
+    png_ptr->user_height_max = user_height_max;
+}
+#endif /* ?PNG_SET_USER_LIMITS_SUPPORTED */
+
+#endif /* ?PNG_1_0_X */
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngtrans.c b/src/libpng/pngtrans.c
new file mode 100644
index 0000000..1640095
--- /dev/null
+++ b/src/libpng/pngtrans.c
@@ -0,0 +1,662 @@
+
+/* pngtrans.c - transforms the data in a row (used by both readers and writers)
+ *
+ * Last changed in libpng 1.2.17 May 15, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+
+#if defined(PNG_READ_SUPPORTED) || defined(PNG_WRITE_SUPPORTED)
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* turn on BGR-to-RGB mapping */
+void PNGAPI
+png_set_bgr(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_bgr\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_BGR;
+}
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* turn on 16 bit byte swapping */
+void PNGAPI
+png_set_swap(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_swap\n");
+   if(png_ptr == NULL) return;
+   if (png_ptr->bit_depth == 16)
+      png_ptr->transformations |= PNG_SWAP_BYTES;
+}
+#endif
+
+#if defined(PNG_READ_PACK_SUPPORTED) || defined(PNG_WRITE_PACK_SUPPORTED)
+/* turn on pixel packing */
+void PNGAPI
+png_set_packing(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_packing\n");
+   if(png_ptr == NULL) return;
+   if (png_ptr->bit_depth < 8)
+   {
+      png_ptr->transformations |= PNG_PACK;
+      png_ptr->usr_bit_depth = 8;
+   }
+}
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+/* turn on packed pixel swapping */
+void PNGAPI
+png_set_packswap(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_packswap\n");
+   if(png_ptr == NULL) return;
+   if (png_ptr->bit_depth < 8)
+      png_ptr->transformations |= PNG_PACKSWAP;
+}
+#endif
+
+#if defined(PNG_READ_SHIFT_SUPPORTED) || defined(PNG_WRITE_SHIFT_SUPPORTED)
+void PNGAPI
+png_set_shift(png_structp png_ptr, png_color_8p true_bits)
+{
+   png_debug(1, "in png_set_shift\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_SHIFT;
+   png_ptr->shift = *true_bits;
+}
+#endif
+
+#if defined(PNG_READ_INTERLACING_SUPPORTED) || \
+    defined(PNG_WRITE_INTERLACING_SUPPORTED)
+int PNGAPI
+png_set_interlace_handling(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_interlace handling\n");
+   if (png_ptr && png_ptr->interlaced)
+   {
+      png_ptr->transformations |= PNG_INTERLACE;
+      return (7);
+   }
+
+   return (1);
+}
+#endif
+
+#if defined(PNG_READ_FILLER_SUPPORTED) || defined(PNG_WRITE_FILLER_SUPPORTED)
+/* Add a filler byte on read, or remove a filler or alpha byte on write.
+ * The filler type has changed in v0.95 to allow future 2-byte fillers
+ * for 48-bit input data, as well as to avoid problems with some compilers
+ * that don't like bytes as parameters.
+ */
+void PNGAPI
+png_set_filler(png_structp png_ptr, png_uint_32 filler, int filler_loc)
+{
+   png_debug(1, "in png_set_filler\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_FILLER;
+   png_ptr->filler = (png_byte)filler;
+   if (filler_loc == PNG_FILLER_AFTER)
+      png_ptr->flags |= PNG_FLAG_FILLER_AFTER;
+   else
+      png_ptr->flags &= ~PNG_FLAG_FILLER_AFTER;
+
+   /* This should probably go in the "do_read_filler" routine.
+    * I attempted to do that in libpng-1.0.1a but that caused problems
+    * so I restored it in libpng-1.0.2a
+   */
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_RGB)
+   {
+      png_ptr->usr_channels = 4;
+   }
+
+   /* Also I added this in libpng-1.0.2a (what happens when we expand
+    * a less-than-8-bit grayscale to GA? */
+
+   if (png_ptr->color_type == PNG_COLOR_TYPE_GRAY && png_ptr->bit_depth >= 8)
+   {
+      png_ptr->usr_channels = 2;
+   }
+}
+
+#if !defined(PNG_1_0_X)
+/* Added to libpng-1.2.7 */
+void PNGAPI
+png_set_add_alpha(png_structp png_ptr, png_uint_32 filler, int filler_loc)
+{
+   png_debug(1, "in png_set_add_alpha\n");
+   if(png_ptr == NULL) return;
+   png_set_filler(png_ptr, filler, filler_loc);
+   png_ptr->transformations |= PNG_ADD_ALPHA;
+}
+#endif
+
+#endif
+
+#if defined(PNG_READ_SWAP_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+void PNGAPI
+png_set_swap_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_swap_alpha\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_SWAP_ALPHA;
+}
+#endif
+
+#if defined(PNG_READ_INVERT_ALPHA_SUPPORTED) || \
+    defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+void PNGAPI
+png_set_invert_alpha(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_invert_alpha\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_INVERT_ALPHA;
+}
+#endif
+
+#if defined(PNG_READ_INVERT_SUPPORTED) || defined(PNG_WRITE_INVERT_SUPPORTED)
+void PNGAPI
+png_set_invert_mono(png_structp png_ptr)
+{
+   png_debug(1, "in png_set_invert_mono\n");
+   if(png_ptr == NULL) return;
+   png_ptr->transformations |= PNG_INVERT_MONO;
+}
+
+/* invert monochrome grayscale data */
+void /* PRIVATE */
+png_do_invert(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_invert\n");
+  /* This test removed from libpng version 1.0.13 and 1.2.0:
+   *   if (row_info->bit_depth == 1 &&
+   */
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row == NULL || row_info == NULL)
+     return;
+#endif
+   if (row_info->color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->rowbytes;
+
+      for (i = 0; i < istop; i++)
+      {
+         *rp = (png_byte)(~(*rp));
+         rp++;
+      }
+   }
+   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+      row_info->bit_depth == 8)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->rowbytes;
+
+      for (i = 0; i < istop; i+=2)
+      {
+         *rp = (png_byte)(~(*rp));
+         rp+=2;
+      }
+   }
+   else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+      row_info->bit_depth == 16)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop = row_info->rowbytes;
+
+      for (i = 0; i < istop; i+=4)
+      {
+         *rp = (png_byte)(~(*rp));
+         *(rp+1) = (png_byte)(~(*(rp+1)));
+         rp+=4;
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_SWAP_SUPPORTED) || defined(PNG_WRITE_SWAP_SUPPORTED)
+/* swaps byte order on 16 bit depth images */
+void /* PRIVATE */
+png_do_swap(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_swap\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       row_info->bit_depth == 16)
+   {
+      png_bytep rp = row;
+      png_uint_32 i;
+      png_uint_32 istop= row_info->width * row_info->channels;
+
+      for (i = 0; i < istop; i++, rp += 2)
+      {
+         png_byte t = *rp;
+         *rp = *(rp + 1);
+         *(rp + 1) = t;
+      }
+   }
+}
+#endif
+
+#if defined(PNG_READ_PACKSWAP_SUPPORTED)||defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+static PNG_CONST png_byte onebppswaptable[256] = {
+   0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0,
+   0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+   0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8,
+   0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+   0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4,
+   0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+   0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
+   0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+   0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2,
+   0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+   0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA,
+   0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+   0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6,
+   0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+   0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE,
+   0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+   0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1,
+   0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+   0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9,
+   0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+   0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
+   0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+   0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED,
+   0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+   0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3,
+   0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+   0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB,
+   0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+   0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7,
+   0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+   0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF,
+   0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+};
+
+static PNG_CONST png_byte twobppswaptable[256] = {
+   0x00, 0x40, 0x80, 0xC0, 0x10, 0x50, 0x90, 0xD0,
+   0x20, 0x60, 0xA0, 0xE0, 0x30, 0x70, 0xB0, 0xF0,
+   0x04, 0x44, 0x84, 0xC4, 0x14, 0x54, 0x94, 0xD4,
+   0x24, 0x64, 0xA4, 0xE4, 0x34, 0x74, 0xB4, 0xF4,
+   0x08, 0x48, 0x88, 0xC8, 0x18, 0x58, 0x98, 0xD8,
+   0x28, 0x68, 0xA8, 0xE8, 0x38, 0x78, 0xB8, 0xF8,
+   0x0C, 0x4C, 0x8C, 0xCC, 0x1C, 0x5C, 0x9C, 0xDC,
+   0x2C, 0x6C, 0xAC, 0xEC, 0x3C, 0x7C, 0xBC, 0xFC,
+   0x01, 0x41, 0x81, 0xC1, 0x11, 0x51, 0x91, 0xD1,
+   0x21, 0x61, 0xA1, 0xE1, 0x31, 0x71, 0xB1, 0xF1,
+   0x05, 0x45, 0x85, 0xC5, 0x15, 0x55, 0x95, 0xD5,
+   0x25, 0x65, 0xA5, 0xE5, 0x35, 0x75, 0xB5, 0xF5,
+   0x09, 0x49, 0x89, 0xC9, 0x19, 0x59, 0x99, 0xD9,
+   0x29, 0x69, 0xA9, 0xE9, 0x39, 0x79, 0xB9, 0xF9,
+   0x0D, 0x4D, 0x8D, 0xCD, 0x1D, 0x5D, 0x9D, 0xDD,
+   0x2D, 0x6D, 0xAD, 0xED, 0x3D, 0x7D, 0xBD, 0xFD,
+   0x02, 0x42, 0x82, 0xC2, 0x12, 0x52, 0x92, 0xD2,
+   0x22, 0x62, 0xA2, 0xE2, 0x32, 0x72, 0xB2, 0xF2,
+   0x06, 0x46, 0x86, 0xC6, 0x16, 0x56, 0x96, 0xD6,
+   0x26, 0x66, 0xA6, 0xE6, 0x36, 0x76, 0xB6, 0xF6,
+   0x0A, 0x4A, 0x8A, 0xCA, 0x1A, 0x5A, 0x9A, 0xDA,
+   0x2A, 0x6A, 0xAA, 0xEA, 0x3A, 0x7A, 0xBA, 0xFA,
+   0x0E, 0x4E, 0x8E, 0xCE, 0x1E, 0x5E, 0x9E, 0xDE,
+   0x2E, 0x6E, 0xAE, 0xEE, 0x3E, 0x7E, 0xBE, 0xFE,
+   0x03, 0x43, 0x83, 0xC3, 0x13, 0x53, 0x93, 0xD3,
+   0x23, 0x63, 0xA3, 0xE3, 0x33, 0x73, 0xB3, 0xF3,
+   0x07, 0x47, 0x87, 0xC7, 0x17, 0x57, 0x97, 0xD7,
+   0x27, 0x67, 0xA7, 0xE7, 0x37, 0x77, 0xB7, 0xF7,
+   0x0B, 0x4B, 0x8B, 0xCB, 0x1B, 0x5B, 0x9B, 0xDB,
+   0x2B, 0x6B, 0xAB, 0xEB, 0x3B, 0x7B, 0xBB, 0xFB,
+   0x0F, 0x4F, 0x8F, 0xCF, 0x1F, 0x5F, 0x9F, 0xDF,
+   0x2F, 0x6F, 0xAF, 0xEF, 0x3F, 0x7F, 0xBF, 0xFF
+};
+
+static PNG_CONST png_byte fourbppswaptable[256] = {
+   0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+   0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0,
+   0x01, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71,
+   0x81, 0x91, 0xA1, 0xB1, 0xC1, 0xD1, 0xE1, 0xF1,
+   0x02, 0x12, 0x22, 0x32, 0x42, 0x52, 0x62, 0x72,
+   0x82, 0x92, 0xA2, 0xB2, 0xC2, 0xD2, 0xE2, 0xF2,
+   0x03, 0x13, 0x23, 0x33, 0x43, 0x53, 0x63, 0x73,
+   0x83, 0x93, 0xA3, 0xB3, 0xC3, 0xD3, 0xE3, 0xF3,
+   0x04, 0x14, 0x24, 0x34, 0x44, 0x54, 0x64, 0x74,
+   0x84, 0x94, 0xA4, 0xB4, 0xC4, 0xD4, 0xE4, 0xF4,
+   0x05, 0x15, 0x25, 0x35, 0x45, 0x55, 0x65, 0x75,
+   0x85, 0x95, 0xA5, 0xB5, 0xC5, 0xD5, 0xE5, 0xF5,
+   0x06, 0x16, 0x26, 0x36, 0x46, 0x56, 0x66, 0x76,
+   0x86, 0x96, 0xA6, 0xB6, 0xC6, 0xD6, 0xE6, 0xF6,
+   0x07, 0x17, 0x27, 0x37, 0x47, 0x57, 0x67, 0x77,
+   0x87, 0x97, 0xA7, 0xB7, 0xC7, 0xD7, 0xE7, 0xF7,
+   0x08, 0x18, 0x28, 0x38, 0x48, 0x58, 0x68, 0x78,
+   0x88, 0x98, 0xA8, 0xB8, 0xC8, 0xD8, 0xE8, 0xF8,
+   0x09, 0x19, 0x29, 0x39, 0x49, 0x59, 0x69, 0x79,
+   0x89, 0x99, 0xA9, 0xB9, 0xC9, 0xD9, 0xE9, 0xF9,
+   0x0A, 0x1A, 0x2A, 0x3A, 0x4A, 0x5A, 0x6A, 0x7A,
+   0x8A, 0x9A, 0xAA, 0xBA, 0xCA, 0xDA, 0xEA, 0xFA,
+   0x0B, 0x1B, 0x2B, 0x3B, 0x4B, 0x5B, 0x6B, 0x7B,
+   0x8B, 0x9B, 0xAB, 0xBB, 0xCB, 0xDB, 0xEB, 0xFB,
+   0x0C, 0x1C, 0x2C, 0x3C, 0x4C, 0x5C, 0x6C, 0x7C,
+   0x8C, 0x9C, 0xAC, 0xBC, 0xCC, 0xDC, 0xEC, 0xFC,
+   0x0D, 0x1D, 0x2D, 0x3D, 0x4D, 0x5D, 0x6D, 0x7D,
+   0x8D, 0x9D, 0xAD, 0xBD, 0xCD, 0xDD, 0xED, 0xFD,
+   0x0E, 0x1E, 0x2E, 0x3E, 0x4E, 0x5E, 0x6E, 0x7E,
+   0x8E, 0x9E, 0xAE, 0xBE, 0xCE, 0xDE, 0xEE, 0xFE,
+   0x0F, 0x1F, 0x2F, 0x3F, 0x4F, 0x5F, 0x6F, 0x7F,
+   0x8F, 0x9F, 0xAF, 0xBF, 0xCF, 0xDF, 0xEF, 0xFF
+};
+
+/* swaps pixel packing order within bytes */
+void /* PRIVATE */
+png_do_packswap(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_packswap\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       row_info->bit_depth < 8)
+   {
+      png_bytep rp, end, table;
+
+      end = row + row_info->rowbytes;
+
+      if (row_info->bit_depth == 1)
+         table = (png_bytep)onebppswaptable;
+      else if (row_info->bit_depth == 2)
+         table = (png_bytep)twobppswaptable;
+      else if (row_info->bit_depth == 4)
+         table = (png_bytep)fourbppswaptable;
+      else
+         return;
+
+      for (rp = row; rp < end; rp++)
+         *rp = table[*rp];
+   }
+}
+#endif /* PNG_READ_PACKSWAP_SUPPORTED or PNG_WRITE_PACKSWAP_SUPPORTED */
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED) || \
+    defined(PNG_READ_STRIP_ALPHA_SUPPORTED)
+/* remove filler or alpha byte(s) */
+void /* PRIVATE */
+png_do_strip_filler(png_row_infop row_info, png_bytep row, png_uint_32 flags)
+{
+   png_debug(1, "in png_do_strip_filler\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      png_bytep sp=row;
+      png_bytep dp=row;
+      png_uint_32 row_width=row_info->width;
+      png_uint_32 i;
+
+      if ((row_info->color_type == PNG_COLOR_TYPE_RGB ||
+         (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA &&
+         (flags & PNG_FLAG_STRIP_ALPHA))) &&
+         row_info->channels == 4)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            /* This converts from RGBX or RGBA to RGB */
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               dp+=3; sp+=4;
+               for (i = 1; i < row_width; i++)
+               {
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  sp++;
+               }
+            }
+            /* This converts from XRGB or ARGB to RGB */
+            else
+            {
+               for (i = 0; i < row_width; i++)
+               {
+                  sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 24;
+            row_info->rowbytes = row_width * 3;
+         }
+         else /* if (row_info->bit_depth == 16) */
+         {
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               /* This converts from RRGGBBXX or RRGGBBAA to RRGGBB */
+               sp += 8; dp += 6;
+               for (i = 1; i < row_width; i++)
+               {
+                  /* This could be (although png_memcpy is probably slower):
+                  png_memcpy(dp, sp, 6);
+                  sp += 8;
+                  dp += 6;
+                  */
+
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  sp += 2;
+               }
+            }
+            else
+            {
+               /* This converts from XXRRGGBB or AARRGGBB to RRGGBB */
+               for (i = 0; i < row_width; i++)
+               {
+                  /* This could be (although png_memcpy is probably slower):
+                  png_memcpy(dp, sp, 6);
+                  sp += 8;
+                  dp += 6;
+                  */
+
+                  sp+=2;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 48;
+            row_info->rowbytes = row_width * 6;
+         }
+         row_info->channels = 3;
+      }
+      else if ((row_info->color_type == PNG_COLOR_TYPE_GRAY ||
+         (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA &&
+         (flags & PNG_FLAG_STRIP_ALPHA))) &&
+          row_info->channels == 2)
+      {
+         if (row_info->bit_depth == 8)
+         {
+            /* This converts from GX or GA to G */
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               for (i = 0; i < row_width; i++)
+               {
+                  *dp++ = *sp++;
+                  sp++;
+               }
+            }
+            /* This converts from XG or AG to G */
+            else
+            {
+               for (i = 0; i < row_width; i++)
+               {
+                  sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 8;
+            row_info->rowbytes = row_width;
+         }
+         else /* if (row_info->bit_depth == 16) */
+         {
+            if (flags & PNG_FLAG_FILLER_AFTER)
+            {
+               /* This converts from GGXX or GGAA to GG */
+               sp += 4; dp += 2;
+               for (i = 1; i < row_width; i++)
+               {
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+                  sp += 2;
+               }
+            }
+            else
+            {
+               /* This converts from XXGG or AAGG to GG */
+               for (i = 0; i < row_width; i++)
+               {
+                  sp += 2;
+                  *dp++ = *sp++;
+                  *dp++ = *sp++;
+               }
+            }
+            row_info->pixel_depth = 16;
+            row_info->rowbytes = row_width * 2;
+         }
+         row_info->channels = 1;
+      }
+      if (flags & PNG_FLAG_STRIP_ALPHA)
+        row_info->color_type &= ~PNG_COLOR_MASK_ALPHA;
+   }
+}
+#endif
+
+#if defined(PNG_READ_BGR_SUPPORTED) || defined(PNG_WRITE_BGR_SUPPORTED)
+/* swaps red and blue bytes within a pixel */
+void /* PRIVATE */
+png_do_bgr(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_bgr\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 3)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 2);
+               *(rp + 2) = save;
+            }
+         }
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 4)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 2);
+               *(rp + 2) = save;
+            }
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 6)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 4);
+               *(rp + 4) = save;
+               save = *(rp + 1);
+               *(rp + 1) = *(rp + 5);
+               *(rp + 5) = save;
+            }
+         }
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+         {
+            png_bytep rp;
+            png_uint_32 i;
+
+            for (i = 0, rp = row; i < row_width; i++, rp += 8)
+            {
+               png_byte save = *rp;
+               *rp = *(rp + 4);
+               *(rp + 4) = save;
+               save = *(rp + 1);
+               *(rp + 1) = *(rp + 5);
+               *(rp + 5) = save;
+            }
+         }
+      }
+   }
+}
+#endif /* PNG_READ_BGR_SUPPORTED or PNG_WRITE_BGR_SUPPORTED */
+
+#if defined(PNG_READ_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) || \
+    defined(PNG_LEGACY_SUPPORTED)
+void PNGAPI
+png_set_user_transform_info(png_structp png_ptr, png_voidp
+   user_transform_ptr, int user_transform_depth, int user_transform_channels)
+{
+   png_debug(1, "in png_set_user_transform_info\n");
+   if(png_ptr == NULL) return;
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   png_ptr->user_transform_ptr = user_transform_ptr;
+   png_ptr->user_transform_depth = (png_byte)user_transform_depth;
+   png_ptr->user_transform_channels = (png_byte)user_transform_channels;
+#else
+   if(user_transform_ptr || user_transform_depth || user_transform_channels)
+      png_warning(png_ptr,
+        "This version of libpng does not support user transform info");
+#endif
+}
+#endif
+
+/* This function returns a pointer to the user_transform_ptr associated with
+ * the user transform functions.  The application should free any memory
+ * associated with this pointer before png_write_destroy and png_read_destroy
+ * are called.
+ */
+png_voidp PNGAPI
+png_get_user_transform_ptr(png_structp png_ptr)
+{
+#if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED)
+   if (png_ptr == NULL) return (NULL);
+   return ((png_voidp)png_ptr->user_transform_ptr);
+#else
+   return (NULL);
+#endif
+}
+#endif /* PNG_READ_SUPPORTED || PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngvcrd.c b/src/libpng/pngvcrd.c
new file mode 100644
index 0000000..ce4233e
--- /dev/null
+++ b/src/libpng/pngvcrd.c
@@ -0,0 +1 @@
+/* pnggvrd.c was removed from libpng-1.2.20. */
diff --git a/src/libpng/pngwio.c b/src/libpng/pngwio.c
new file mode 100644
index 0000000..371a4fa
--- /dev/null
+++ b/src/libpng/pngwio.c
@@ -0,0 +1,234 @@
+
+/* pngwio.c - functions for data output
+ *
+ * Last changed in libpng 1.2.13 November 13, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ *
+ * This file provides a location for all output.  Users who need
+ * special handling are expected to write functions that have the same
+ * arguments as these and perform similar functions, but that possibly
+ * use different output methods.  Note that you shouldn't change these
+ * functions, but rather write replacement functions and then change
+ * them at run time with png_set_write_fn(...).
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Write the data to whatever output you are using.  The default routine
+   writes to a file pointer.  Note that this routine sometimes gets called
+   with very small lengths, so you should implement some kind of simple
+   buffering if you are using unbuffered writes.  This should never be asked
+   to write more than 64K on a 16 bit machine.  */
+
+void /* PRIVATE */
+png_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   if (png_ptr->write_data_fn != NULL )
+      (*(png_ptr->write_data_fn))(png_ptr, data, length);
+   else
+      png_error(png_ptr, "Call to NULL write function");
+}
+
+#if !defined(PNG_NO_STDIO)
+/* This is the function that does the actual writing of data.  If you are
+   not writing to a standard C stream, you should create a replacement
+   write_data function and use it at run time with png_set_write_fn(), rather
+   than changing the library. */
+#ifndef USE_FAR_KEYWORD
+void PNGAPI
+png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_uint_32 check;
+
+   if(png_ptr == NULL) return;
+#if defined(_WIN32_WCE)
+   if ( !WriteFile((HANDLE)(png_ptr->io_ptr), data, length, &check, NULL) )
+      check = 0;
+#else
+   check = fwrite(data, 1, length, (png_FILE_p)(png_ptr->io_ptr));
+#endif
+   if (check != length)
+      png_error(png_ptr, "Write Error");
+}
+#else
+/* this is the model-independent version. Since the standard I/O library
+   can't handle far buffers in the medium and small models, we have to copy
+   the data.
+*/
+
+#define NEAR_BUF_SIZE 1024
+#define MIN(a,b) (a <= b ? a : b)
+
+void PNGAPI
+png_default_write_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   png_uint_32 check;
+   png_byte *near_data;  /* Needs to be "png_byte *" instead of "png_bytep" */
+   png_FILE_p io_ptr;
+
+   if(png_ptr == NULL) return;
+   /* Check if data really is near. If so, use usual code. */
+   near_data = (png_byte *)CVT_PTR_NOCHECK(data);
+   io_ptr = (png_FILE_p)CVT_PTR(png_ptr->io_ptr);
+   if ((png_bytep)near_data == data)
+   {
+#if defined(_WIN32_WCE)
+      if ( !WriteFile(io_ptr, near_data, length, &check, NULL) )
+         check = 0;
+#else
+      check = fwrite(near_data, 1, length, io_ptr);
+#endif
+   }
+   else
+   {
+      png_byte buf[NEAR_BUF_SIZE];
+      png_size_t written, remaining, err;
+      check = 0;
+      remaining = length;
+      do
+      {
+         written = MIN(NEAR_BUF_SIZE, remaining);
+         png_memcpy(buf, data, written); /* copy far buffer to near buffer */
+#if defined(_WIN32_WCE)
+         if ( !WriteFile(io_ptr, buf, written, &err, NULL) )
+            err = 0;
+#else
+         err = fwrite(buf, 1, written, io_ptr);
+#endif
+         if (err != written)
+            break;
+         else
+            check += err;
+         data += written;
+         remaining -= written;
+      }
+      while (remaining != 0);
+   }
+   if (check != length)
+      png_error(png_ptr, "Write Error");
+}
+
+#endif
+#endif
+
+/* This function is called to output any data pending writing (normally
+   to disk).  After png_flush is called, there should be no data pending
+   writing in any buffers. */
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+void /* PRIVATE */
+png_flush(png_structp png_ptr)
+{
+   if (png_ptr->output_flush_fn != NULL)
+      (*(png_ptr->output_flush_fn))(png_ptr);
+}
+
+#if !defined(PNG_NO_STDIO)
+void PNGAPI
+png_default_flush(png_structp png_ptr)
+{
+#if !defined(_WIN32_WCE)
+   png_FILE_p io_ptr;
+#endif
+   if(png_ptr == NULL) return;
+#if !defined(_WIN32_WCE)
+   io_ptr = (png_FILE_p)CVT_PTR((png_ptr->io_ptr));
+   if (io_ptr != NULL)
+      fflush(io_ptr);
+#endif
+}
+#endif
+#endif
+
+/* This function allows the application to supply new output functions for
+   libpng if standard C streams aren't being used.
+
+   This function takes as its arguments:
+   png_ptr       - pointer to a png output data structure
+   io_ptr        - pointer to user supplied structure containing info about
+                   the output functions.  May be NULL.
+   write_data_fn - pointer to a new output function that takes as its
+                   arguments a pointer to a png_struct, a pointer to
+                   data to be written, and a 32-bit unsigned int that is
+                   the number of bytes to be written.  The new write
+                   function should call png_error(png_ptr, "Error msg")
+                   to exit and output any fatal error messages.
+   flush_data_fn - pointer to a new flush function that takes as its
+                   arguments a pointer to a png_struct.  After a call to
+                   the flush function, there should be no data in any buffers
+                   or pending transmission.  If the output method doesn't do
+                   any buffering of ouput, a function prototype must still be
+                   supplied although it doesn't have to do anything.  If
+                   PNG_WRITE_FLUSH_SUPPORTED is not defined at libpng compile
+                   time, output_flush_fn will be ignored, although it must be
+                   supplied for compatibility. */
+void PNGAPI
+png_set_write_fn(png_structp png_ptr, png_voidp io_ptr,
+   png_rw_ptr write_data_fn, png_flush_ptr output_flush_fn)
+{
+   if(png_ptr == NULL) return;
+   png_ptr->io_ptr = io_ptr;
+
+#if !defined(PNG_NO_STDIO)
+   if (write_data_fn != NULL)
+      png_ptr->write_data_fn = write_data_fn;
+   else
+      png_ptr->write_data_fn = png_default_write_data;
+#else
+   png_ptr->write_data_fn = write_data_fn;
+#endif
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+#if !defined(PNG_NO_STDIO)
+   if (output_flush_fn != NULL)
+      png_ptr->output_flush_fn = output_flush_fn;
+   else
+      png_ptr->output_flush_fn = png_default_flush;
+#else
+   png_ptr->output_flush_fn = output_flush_fn;
+#endif
+#endif /* PNG_WRITE_FLUSH_SUPPORTED */
+
+   /* It is an error to read while writing a png file */
+   if (png_ptr->read_data_fn != NULL)
+   {
+      png_ptr->read_data_fn = NULL;
+      png_warning(png_ptr,
+         "Attempted to set both read_data_fn and write_data_fn in");
+      png_warning(png_ptr,
+         "the same structure.  Resetting read_data_fn to NULL.");
+   }
+}
+
+#if defined(USE_FAR_KEYWORD)
+#if defined(_MSC_VER)
+void *png_far_to_near(png_structp png_ptr,png_voidp ptr, int check)
+{
+   void *near_ptr;
+   void FAR *far_ptr;
+   FP_OFF(near_ptr) = FP_OFF(ptr);
+   far_ptr = (void FAR *)near_ptr;
+   if(check != 0)
+      if(FP_SEG(ptr) != FP_SEG(far_ptr))
+         png_error(png_ptr,"segment lost in conversion");
+   return(near_ptr);
+}
+#  else
+void *png_far_to_near(png_structp png_ptr,png_voidp ptr, int check)
+{
+   void *near_ptr;
+   void FAR *far_ptr;
+   near_ptr = (void FAR *)ptr;
+   far_ptr = (void FAR *)near_ptr;
+   if(check != 0)
+      if(far_ptr != ptr)
+         png_error(png_ptr,"segment lost in conversion");
+   return(near_ptr);
+}
+#   endif
+#   endif
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngwrite.c b/src/libpng/pngwrite.c
new file mode 100644
index 0000000..c6df1ef
--- /dev/null
+++ b/src/libpng/pngwrite.c
@@ -0,0 +1,1514 @@
+
+/* pngwrite.c - general routines to write a PNG file
+ *
+ * Last changed in libpng 1.2.15 January 5, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+/* get internal access to png.h */
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Writes all the PNG information.  This is the suggested way to use the
+ * library.  If you have a new chunk to add, make a function to write it,
+ * and put it in the correct location here.  If you want the chunk written
+ * after the image data, put it in png_write_end().  I strongly encourage
+ * you to supply a PNG_INFO_ flag, and check info_ptr->valid before writing
+ * the chunk, as that will keep the code from breaking if you want to just
+ * write a plain PNG file.  If you have long comments, I suggest writing
+ * them in png_write_end(), and compressing them.
+ */
+void PNGAPI
+png_write_info_before_PLTE(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_write_info_before_PLTE\n");
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+   if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
+   {
+   png_write_sig(png_ptr); /* write PNG signature */
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   if((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE)&&(png_ptr->mng_features_permitted))
+   {
+      png_warning(png_ptr,"MNG features are not allowed in a PNG datastream");
+      png_ptr->mng_features_permitted=0;
+   }
+#endif
+   /* write IHDR information. */
+   png_write_IHDR(png_ptr, info_ptr->width, info_ptr->height,
+      info_ptr->bit_depth, info_ptr->color_type, info_ptr->compression_type,
+      info_ptr->filter_type,
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+      info_ptr->interlace_type);
+#else
+      0);
+#endif
+   /* the rest of these check to see if the valid field has the appropriate
+      flag set, and if it does, writes the chunk. */
+#if defined(PNG_WRITE_gAMA_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_gAMA)
+   {
+#  ifdef PNG_FLOATING_POINT_SUPPORTED
+      png_write_gAMA(png_ptr, info_ptr->gamma);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_gAMA_fixed(png_ptr, info_ptr->int_gamma);
+#  endif
+#endif
+   }
+#endif
+#if defined(PNG_WRITE_sRGB_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sRGB)
+      png_write_sRGB(png_ptr, (int)info_ptr->srgb_intent);
+#endif
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_iCCP)
+      png_write_iCCP(png_ptr, info_ptr->iccp_name, PNG_COMPRESSION_TYPE_BASE,
+                     info_ptr->iccp_profile, (int)info_ptr->iccp_proflen);
+#endif
+#if defined(PNG_WRITE_sBIT_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sBIT)
+      png_write_sBIT(png_ptr, &(info_ptr->sig_bit), info_ptr->color_type);
+#endif
+#if defined(PNG_WRITE_cHRM_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_cHRM)
+   {
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+      png_write_cHRM(png_ptr,
+         info_ptr->x_white, info_ptr->y_white,
+         info_ptr->x_red, info_ptr->y_red,
+         info_ptr->x_green, info_ptr->y_green,
+         info_ptr->x_blue, info_ptr->y_blue);
+#else
+#  ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_cHRM_fixed(png_ptr,
+         info_ptr->int_x_white, info_ptr->int_y_white,
+         info_ptr->int_x_red, info_ptr->int_y_red,
+         info_ptr->int_x_green, info_ptr->int_y_green,
+         info_ptr->int_x_blue, info_ptr->int_y_blue);
+#  endif
+#endif
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         int keep=png_handle_as_unknown(png_ptr, up->name);
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+            up->location && !(up->location & PNG_HAVE_PLTE) &&
+            !(up->location & PNG_HAVE_IDAT) &&
+            ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
+      png_ptr->mode |= PNG_WROTE_INFO_BEFORE_PLTE;
+   }
+}
+
+void PNGAPI
+png_write_info(png_structp png_ptr, png_infop info_ptr)
+{
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+   int i;
+#endif
+
+   png_debug(1, "in png_write_info\n");
+
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+
+   png_write_info_before_PLTE(png_ptr, info_ptr);
+
+   if (info_ptr->valid & PNG_INFO_PLTE)
+      png_write_PLTE(png_ptr, info_ptr->palette,
+         (png_uint_32)info_ptr->num_palette);
+   else if (info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+      png_error(png_ptr, "Valid palette required for paletted images");
+
+#if defined(PNG_WRITE_tRNS_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_tRNS)
+      {
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+         /* invert the alpha channel (in tRNS) */
+         if ((png_ptr->transformations & PNG_INVERT_ALPHA) &&
+            info_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+         {
+            int j;
+            for (j=0; j<(int)info_ptr->num_trans; j++)
+               info_ptr->trans[j] = (png_byte)(255 - info_ptr->trans[j]);
+         }
+#endif
+      png_write_tRNS(png_ptr, info_ptr->trans, &(info_ptr->trans_values),
+         info_ptr->num_trans, info_ptr->color_type);
+      }
+#endif
+#if defined(PNG_WRITE_bKGD_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_bKGD)
+      png_write_bKGD(png_ptr, &(info_ptr->background), info_ptr->color_type);
+#endif
+#if defined(PNG_WRITE_hIST_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_hIST)
+      png_write_hIST(png_ptr, info_ptr->hist, info_ptr->num_palette);
+#endif
+#if defined(PNG_WRITE_oFFs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_oFFs)
+      png_write_oFFs(png_ptr, info_ptr->x_offset, info_ptr->y_offset,
+         info_ptr->offset_unit_type);
+#endif
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pCAL)
+      png_write_pCAL(png_ptr, info_ptr->pcal_purpose, info_ptr->pcal_X0,
+         info_ptr->pcal_X1, info_ptr->pcal_type, info_ptr->pcal_nparams,
+         info_ptr->pcal_units, info_ptr->pcal_params);
+#endif
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sCAL)
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && !defined(PNG_NO_STDIO)
+      png_write_sCAL(png_ptr, (int)info_ptr->scal_unit,
+          info_ptr->scal_pixel_width, info_ptr->scal_pixel_height);
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+      png_write_sCAL_s(png_ptr, (int)info_ptr->scal_unit,
+          info_ptr->scal_s_width, info_ptr->scal_s_height);
+#else
+      png_warning(png_ptr,
+          "png_write_sCAL not supported; sCAL chunk not written.");
+#endif
+#endif
+#endif
+#if defined(PNG_WRITE_pHYs_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_pHYs)
+      png_write_pHYs(png_ptr, info_ptr->x_pixels_per_unit,
+         info_ptr->y_pixels_per_unit, info_ptr->phys_unit_type);
+#endif
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_tIME)
+   {
+      png_write_tIME(png_ptr, &(info_ptr->mod_time));
+      png_ptr->mode |= PNG_WROTE_tIME;
+   }
+#endif
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+   if (info_ptr->valid & PNG_INFO_sPLT)
+     for (i = 0; i < (int)info_ptr->splt_palettes_num; i++)
+       png_write_sPLT(png_ptr, info_ptr->splt_palettes + i);
+#endif
+#if defined(PNG_WRITE_TEXT_SUPPORTED)
+   /* Check to see if we need to write text chunks */
+   for (i = 0; i < info_ptr->num_text; i++)
+   {
+      png_debug2(2, "Writing header text chunk %d, type %d\n", i,
+         info_ptr->text[i].compression);
+      /* an internationalized chunk? */
+      if (info_ptr->text[i].compression > 0)
+      {
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+          /* write international chunk */
+          png_write_iTXt(png_ptr,
+                         info_ptr->text[i].compression,
+                         info_ptr->text[i].key,
+                         info_ptr->text[i].lang,
+                         info_ptr->text[i].lang_key,
+                         info_ptr->text[i].text);
+#else
+          png_warning(png_ptr, "Unable to write international text");
+#endif
+          /* Mark this chunk as written */
+          info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+      }
+      /* If we want a compressed text chunk */
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_zTXt)
+      {
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+         /* write compressed chunk */
+         png_write_zTXt(png_ptr, info_ptr->text[i].key,
+            info_ptr->text[i].text, 0,
+            info_ptr->text[i].compression);
+#else
+         png_warning(png_ptr, "Unable to write compressed text");
+#endif
+         /* Mark this chunk as written */
+         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+      }
+      else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+      {
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+         /* write uncompressed chunk */
+         png_write_tEXt(png_ptr, info_ptr->text[i].key,
+                         info_ptr->text[i].text,
+                         0);
+#else
+         png_warning(png_ptr, "Unable to write uncompressed text");
+#endif
+         /* Mark this chunk as written */
+         info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+      }
+   }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         int keep=png_handle_as_unknown(png_ptr, up->name);
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+            up->location && (up->location & PNG_HAVE_PLTE) &&
+            !(up->location & PNG_HAVE_IDAT) &&
+            ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
+}
+
+/* Writes the end of the PNG file.  If you don't want to write comments or
+ * time information, you can pass NULL for info.  If you already wrote these
+ * in png_write_info(), do not write them again here.  If you have long
+ * comments, I suggest writing them here, and compressing them.
+ */
+void PNGAPI
+png_write_end(png_structp png_ptr, png_infop info_ptr)
+{
+   png_debug(1, "in png_write_end\n");
+   if (png_ptr == NULL)
+      return;
+   if (!(png_ptr->mode & PNG_HAVE_IDAT))
+      png_error(png_ptr, "No IDATs written into file");
+
+   /* see if user wants us to write information chunks */
+   if (info_ptr != NULL)
+   {
+#if defined(PNG_WRITE_TEXT_SUPPORTED)
+      int i; /* local index variable */
+#endif
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+      /* check to see if user has supplied a time chunk */
+      if ((info_ptr->valid & PNG_INFO_tIME) &&
+         !(png_ptr->mode & PNG_WROTE_tIME))
+         png_write_tIME(png_ptr, &(info_ptr->mod_time));
+#endif
+#if defined(PNG_WRITE_TEXT_SUPPORTED)
+      /* loop through comment chunks */
+      for (i = 0; i < info_ptr->num_text; i++)
+      {
+         png_debug2(2, "Writing trailer text chunk %d, type %d\n", i,
+            info_ptr->text[i].compression);
+         /* an internationalized chunk? */
+         if (info_ptr->text[i].compression > 0)
+         {
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+             /* write international chunk */
+             png_write_iTXt(png_ptr,
+                         info_ptr->text[i].compression,
+                         info_ptr->text[i].key,
+                         info_ptr->text[i].lang,
+                         info_ptr->text[i].lang_key,
+                         info_ptr->text[i].text);
+#else
+             png_warning(png_ptr, "Unable to write international text");
+#endif
+             /* Mark this chunk as written */
+             info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+         }
+         else if (info_ptr->text[i].compression >= PNG_TEXT_COMPRESSION_zTXt)
+         {
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+            /* write compressed chunk */
+            png_write_zTXt(png_ptr, info_ptr->text[i].key,
+               info_ptr->text[i].text, 0,
+               info_ptr->text[i].compression);
+#else
+            png_warning(png_ptr, "Unable to write compressed text");
+#endif
+            /* Mark this chunk as written */
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_zTXt_WR;
+         }
+         else if (info_ptr->text[i].compression == PNG_TEXT_COMPRESSION_NONE)
+         {
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+            /* write uncompressed chunk */
+            png_write_tEXt(png_ptr, info_ptr->text[i].key,
+               info_ptr->text[i].text, 0);
+#else
+            png_warning(png_ptr, "Unable to write uncompressed text");
+#endif
+
+            /* Mark this chunk as written */
+            info_ptr->text[i].compression = PNG_TEXT_COMPRESSION_NONE_WR;
+         }
+      }
+#endif
+#if defined(PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED)
+   if (info_ptr->unknown_chunks_num)
+   {
+       png_unknown_chunk *up;
+
+       png_debug(5, "writing extra chunks\n");
+
+       for (up = info_ptr->unknown_chunks;
+            up < info_ptr->unknown_chunks + info_ptr->unknown_chunks_num;
+            up++)
+       {
+         int keep=png_handle_as_unknown(png_ptr, up->name);
+         if (keep != PNG_HANDLE_CHUNK_NEVER &&
+            up->location && (up->location & PNG_AFTER_IDAT) &&
+            ((up->name[3] & 0x20) || keep == PNG_HANDLE_CHUNK_ALWAYS ||
+            (png_ptr->flags & PNG_FLAG_KEEP_UNSAFE_CHUNKS)))
+         {
+            png_write_chunk(png_ptr, up->name, up->data, up->size);
+         }
+       }
+   }
+#endif
+   }
+
+   png_ptr->mode |= PNG_AFTER_IDAT;
+
+   /* write end of PNG file */
+   png_write_IEND(png_ptr);
+}
+
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+#if !defined(_WIN32_WCE)
+/* "time.h" functions are not supported on WindowsCE */
+void PNGAPI
+png_convert_from_struct_tm(png_timep ptime, struct tm FAR * ttime)
+{
+   png_debug(1, "in png_convert_from_struct_tm\n");
+   ptime->year = (png_uint_16)(1900 + ttime->tm_year);
+   ptime->month = (png_byte)(ttime->tm_mon + 1);
+   ptime->day = (png_byte)ttime->tm_mday;
+   ptime->hour = (png_byte)ttime->tm_hour;
+   ptime->minute = (png_byte)ttime->tm_min;
+   ptime->second = (png_byte)ttime->tm_sec;
+}
+
+void PNGAPI
+png_convert_from_time_t(png_timep ptime, time_t ttime)
+{
+   struct tm *tbuf;
+
+   png_debug(1, "in png_convert_from_time_t\n");
+   tbuf = gmtime(&ttime);
+   png_convert_from_struct_tm(ptime, tbuf);
+}
+#endif
+#endif
+
+/* Initialize png_ptr structure, and allocate any memory needed */
+png_structp PNGAPI
+png_create_write_struct(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn)
+{
+#ifdef PNG_USER_MEM_SUPPORTED
+   return (png_create_write_struct_2(user_png_ver, error_ptr, error_fn,
+      warn_fn, png_voidp_NULL, png_malloc_ptr_NULL, png_free_ptr_NULL));
+}
+
+/* Alternate initialize png_ptr structure, and allocate any memory needed */
+png_structp PNGAPI
+png_create_write_struct_2(png_const_charp user_png_ver, png_voidp error_ptr,
+   png_error_ptr error_fn, png_error_ptr warn_fn, png_voidp mem_ptr,
+   png_malloc_ptr malloc_fn, png_free_ptr free_fn)
+{
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_structp png_ptr;
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   jmp_buf jmpbuf;
+#endif
+#endif
+   int i;
+   png_debug(1, "in png_create_write_struct\n");
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr = (png_structp)png_create_struct_2(PNG_STRUCT_PNG,
+      (png_malloc_ptr)malloc_fn, (png_voidp)mem_ptr);
+#else
+   png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+#endif /* PNG_USER_MEM_SUPPORTED */
+   if (png_ptr == NULL)
+      return (NULL);
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+#else
+   if (setjmp(png_ptr->jmpbuf))
+#endif
+   {
+      png_free(png_ptr, png_ptr->zbuf);
+      png_ptr->zbuf=NULL;
+      png_destroy_struct(png_ptr);
+      return (NULL);
+   }
+#ifdef USE_FAR_KEYWORD
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#endif
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_set_mem_fn(png_ptr, mem_ptr, malloc_fn, free_fn);
+#endif /* PNG_USER_MEM_SUPPORTED */
+   png_set_error_fn(png_ptr, error_ptr, error_fn, warn_fn);
+
+   i=0;
+   do
+   {
+     if(user_png_ver[i] != png_libpng_ver[i])
+        png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+   } while (png_libpng_ver[i++]);
+
+   if (png_ptr->flags & PNG_FLAG_LIBRARY_MISMATCH)
+   {
+     /* Libpng 0.90 and later are binary incompatible with libpng 0.89, so
+      * we must recompile any applications that use any older library version.
+      * For versions after libpng 1.0, we will be compatible, so we need
+      * only check the first digit.
+      */
+     if (user_png_ver == NULL || user_png_ver[0] != png_libpng_ver[0] ||
+         (user_png_ver[0] == '1' && user_png_ver[2] != png_libpng_ver[2]) ||
+         (user_png_ver[0] == '0' && user_png_ver[2] < '9'))
+     {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+        char msg[80];
+        if (user_png_ver)
+        {
+          png_snprintf(msg, 80,
+             "Application was compiled with png.h from libpng-%.20s",
+             user_png_ver);
+          png_warning(png_ptr, msg);
+        }
+        png_snprintf(msg, 80,
+           "Application  is  running with png.c from libpng-%.20s",
+           png_libpng_ver);
+        png_warning(png_ptr, msg);
+#endif
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+        png_ptr->flags=0;
+#endif
+        png_error(png_ptr,
+           "Incompatible libpng version in application and library");
+     }
+   }
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+      (png_uint_32)png_ptr->zbuf_size);
+
+   png_set_write_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL,
+      png_flush_ptr_NULL);
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_set_filter_heuristics(png_ptr, PNG_FILTER_HEURISTIC_DEFAULT,
+      1, png_doublep_NULL, png_doublep_NULL);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+/* Applications that neglect to set up their own setjmp() and then encounter
+   a png_error() will longjmp here.  Since the jmpbuf is then meaningless we
+   abort instead of returning. */
+#ifdef USE_FAR_KEYWORD
+   if (setjmp(jmpbuf))
+      PNG_ABORT();
+   png_memcpy(png_ptr->jmpbuf,jmpbuf,png_sizeof(jmp_buf));
+#else
+   if (setjmp(png_ptr->jmpbuf))
+      PNG_ABORT();
+#endif
+#endif
+   return (png_ptr);
+}
+
+/* Initialize png_ptr structure, and allocate any memory needed */
+#if defined(PNG_1_0_X) || defined(PNG_1_2_X)
+/* Deprecated. */
+#undef png_write_init
+void PNGAPI
+png_write_init(png_structp png_ptr)
+{
+   /* We only come here via pre-1.0.7-compiled applications */
+   png_write_init_2(png_ptr, "1.0.6 or earlier", 0, 0);
+}
+
+void PNGAPI
+png_write_init_2(png_structp png_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size, png_size_t png_info_size)
+{
+   /* We only come here via pre-1.0.12-compiled applications */
+   if(png_ptr == NULL) return;
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+   if(png_sizeof(png_struct) > png_struct_size ||
+      png_sizeof(png_info) > png_info_size)
+   {
+      char msg[80];
+      png_ptr->warning_fn=NULL;
+      if (user_png_ver)
+      {
+        png_snprintf(msg, 80,
+           "Application was compiled with png.h from libpng-%.20s",
+           user_png_ver);
+        png_warning(png_ptr, msg);
+      }
+      png_snprintf(msg, 80,
+         "Application  is  running with png.c from libpng-%.20s",
+         png_libpng_ver);
+      png_warning(png_ptr, msg);
+   }
+#endif
+   if(png_sizeof(png_struct) > png_struct_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+       "The png struct allocated by the application for writing is too small.");
+     }
+   if(png_sizeof(png_info) > png_info_size)
+     {
+       png_ptr->error_fn=NULL;
+#ifdef PNG_ERROR_NUMBERS_SUPPORTED
+       png_ptr->flags=0;
+#endif
+       png_error(png_ptr,
+       "The info struct allocated by the application for writing is too small.");
+     }
+   png_write_init_3(&png_ptr, user_png_ver, png_struct_size);
+}
+#endif /* PNG_1_0_X || PNG_1_2_X */
+
+
+void PNGAPI
+png_write_init_3(png_structpp ptr_ptr, png_const_charp user_png_ver,
+   png_size_t png_struct_size)
+{
+   png_structp png_ptr=*ptr_ptr;
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp; /* to save current jump buffer */
+#endif
+
+   int i = 0;
+
+   if (png_ptr == NULL)
+      return;
+
+   do
+   {
+     if (user_png_ver[i] != png_libpng_ver[i])
+     {
+#ifdef PNG_LEGACY_SUPPORTED
+       png_ptr->flags |= PNG_FLAG_LIBRARY_MISMATCH;
+#else
+       png_ptr->warning_fn=NULL;
+       png_warning(png_ptr,
+     "Application uses deprecated png_write_init() and should be recompiled.");
+       break;
+#endif
+     }
+   } while (png_libpng_ver[i++]);
+
+   png_debug(1, "in png_write_init_3\n");
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* save jump buffer and error functions */
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   if (png_sizeof(png_struct) > png_struct_size)
+     {
+       png_destroy_struct(png_ptr);
+       png_ptr = (png_structp)png_create_struct(PNG_STRUCT_PNG);
+       *ptr_ptr = png_ptr;
+     }
+
+   /* reset all variables to 0 */
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+   /* added at libpng-1.2.6 */
+#ifdef PNG_SET_USER_LIMITS_SUPPORTED
+   png_ptr->user_width_max=PNG_USER_WIDTH_MAX;
+   png_ptr->user_height_max=PNG_USER_HEIGHT_MAX;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* restore jump buffer */
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+
+   png_set_write_fn(png_ptr, png_voidp_NULL, png_rw_ptr_NULL,
+      png_flush_ptr_NULL);
+
+   /* initialize zbuf - compression buffer */
+   png_ptr->zbuf_size = PNG_ZBUF_SIZE;
+   png_ptr->zbuf = (png_bytep)png_malloc(png_ptr,
+      (png_uint_32)png_ptr->zbuf_size);
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_set_filter_heuristics(png_ptr, PNG_FILTER_HEURISTIC_DEFAULT,
+      1, png_doublep_NULL, png_doublep_NULL);
+#endif
+}
+
+/* Write a few rows of image data.  If the image is interlaced,
+ * either you will have to write the 7 sub images, or, if you
+ * have called png_set_interlace_handling(), you will have to
+ * "write" the image seven times.
+ */
+void PNGAPI
+png_write_rows(png_structp png_ptr, png_bytepp row,
+   png_uint_32 num_rows)
+{
+   png_uint_32 i; /* row counter */
+   png_bytepp rp; /* row pointer */
+
+   png_debug(1, "in png_write_rows\n");
+
+   if (png_ptr == NULL)
+      return;
+
+   /* loop through the rows */
+   for (i = 0, rp = row; i < num_rows; i++, rp++)
+   {
+      png_write_row(png_ptr, *rp);
+   }
+}
+
+/* Write the image.  You only need to call this function once, even
+ * if you are writing an interlaced image.
+ */
+void PNGAPI
+png_write_image(png_structp png_ptr, png_bytepp image)
+{
+   png_uint_32 i; /* row index */
+   int pass, num_pass; /* pass variables */
+   png_bytepp rp; /* points to current row */
+
+   if (png_ptr == NULL)
+      return;
+
+   png_debug(1, "in png_write_image\n");
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* intialize interlace handling.  If image is not interlaced,
+      this will set pass to 1 */
+   num_pass = png_set_interlace_handling(png_ptr);
+#else
+   num_pass = 1;
+#endif
+   /* loop through passes */
+   for (pass = 0; pass < num_pass; pass++)
+   {
+      /* loop through image */
+      for (i = 0, rp = image; i < png_ptr->height; i++, rp++)
+      {
+         png_write_row(png_ptr, *rp);
+      }
+   }
+}
+
+/* called by user to write a row of image data */
+void PNGAPI
+png_write_row(png_structp png_ptr, png_bytep row)
+{
+   if (png_ptr == NULL)
+      return;
+   png_debug2(1, "in png_write_row (row %ld, pass %d)\n",
+      png_ptr->row_number, png_ptr->pass);
+
+   /* initialize transformations and other stuff if first time */
+   if (png_ptr->row_number == 0 && png_ptr->pass == 0)
+   {
+   /* make sure we wrote the header info */
+   if (!(png_ptr->mode & PNG_WROTE_INFO_BEFORE_PLTE))
+      png_error(png_ptr,
+         "png_write_info was never called before png_write_row.");
+
+   /* check for transforms that have been set but were defined out */
+#if !defined(PNG_WRITE_INVERT_SUPPORTED) && defined(PNG_READ_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_warning(png_ptr, "PNG_WRITE_INVERT_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_FILLER_SUPPORTED) && defined(PNG_READ_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_warning(png_ptr, "PNG_WRITE_FILLER_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_PACKSWAP_SUPPORTED) && defined(PNG_READ_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_warning(png_ptr, "PNG_WRITE_PACKSWAP_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_PACK_SUPPORTED) && defined(PNG_READ_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_warning(png_ptr, "PNG_WRITE_PACK_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_SHIFT_SUPPORTED) && defined(PNG_READ_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_warning(png_ptr, "PNG_WRITE_SHIFT_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_BGR_SUPPORTED) && defined(PNG_READ_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_warning(png_ptr, "PNG_WRITE_BGR_SUPPORTED is not defined.");
+#endif
+#if !defined(PNG_WRITE_SWAP_SUPPORTED) && defined(PNG_READ_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_warning(png_ptr, "PNG_WRITE_SWAP_SUPPORTED is not defined.");
+#endif
+
+      png_write_start_row(png_ptr);
+   }
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* if interlaced and not interested in row, return */
+   if (png_ptr->interlaced && (png_ptr->transformations & PNG_INTERLACE))
+   {
+      switch (png_ptr->pass)
+      {
+         case 0:
+            if (png_ptr->row_number & 0x07)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 1:
+            if ((png_ptr->row_number & 0x07) || png_ptr->width < 5)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 2:
+            if ((png_ptr->row_number & 0x07) != 4)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 3:
+            if ((png_ptr->row_number & 0x03) || png_ptr->width < 3)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 4:
+            if ((png_ptr->row_number & 0x03) != 2)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 5:
+            if ((png_ptr->row_number & 0x01) || png_ptr->width < 2)
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+         case 6:
+            if (!(png_ptr->row_number & 0x01))
+            {
+               png_write_finish_row(png_ptr);
+               return;
+            }
+            break;
+      }
+   }
+#endif
+
+   /* set up row info for transformations */
+   png_ptr->row_info.color_type = png_ptr->color_type;
+   png_ptr->row_info.width = png_ptr->usr_width;
+   png_ptr->row_info.channels = png_ptr->usr_channels;
+   png_ptr->row_info.bit_depth = png_ptr->usr_bit_depth;
+   png_ptr->row_info.pixel_depth = (png_byte)(png_ptr->row_info.bit_depth *
+      png_ptr->row_info.channels);
+
+   png_ptr->row_info.rowbytes = PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
+      png_ptr->row_info.width);
+
+   png_debug1(3, "row_info->color_type = %d\n", png_ptr->row_info.color_type);
+   png_debug1(3, "row_info->width = %lu\n", png_ptr->row_info.width);
+   png_debug1(3, "row_info->channels = %d\n", png_ptr->row_info.channels);
+   png_debug1(3, "row_info->bit_depth = %d\n", png_ptr->row_info.bit_depth);
+   png_debug1(3, "row_info->pixel_depth = %d\n", png_ptr->row_info.pixel_depth);
+   png_debug1(3, "row_info->rowbytes = %lu\n", png_ptr->row_info.rowbytes);
+
+   /* Copy user's row into buffer, leaving room for filter byte. */
+   png_memcpy_check(png_ptr, png_ptr->row_buf + 1, row,
+      png_ptr->row_info.rowbytes);
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+   /* handle interlacing */
+   if (png_ptr->interlaced && png_ptr->pass < 6 &&
+      (png_ptr->transformations & PNG_INTERLACE))
+   {
+      png_do_write_interlace(&(png_ptr->row_info),
+         png_ptr->row_buf + 1, png_ptr->pass);
+      /* this should always get caught above, but still ... */
+      if (!(png_ptr->row_info.width))
+      {
+         png_write_finish_row(png_ptr);
+         return;
+      }
+   }
+#endif
+
+   /* handle other transformations */
+   if (png_ptr->transformations)
+      png_do_write_transformations(png_ptr);
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   /* Write filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not write a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      (png_ptr->filter_type == PNG_INTRAPIXEL_DIFFERENCING))
+   {
+      /* Intrapixel differencing */
+      png_do_write_intrapixel(&(png_ptr->row_info), png_ptr->row_buf + 1);
+   }
+#endif
+
+   /* Find a filter if necessary, filter the row and write it out. */
+   png_write_find_filter(png_ptr, &(png_ptr->row_info));
+
+   if (png_ptr->write_row_fn != NULL)
+      (*(png_ptr->write_row_fn))(png_ptr, png_ptr->row_number, png_ptr->pass);
+}
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+/* Set the automatic flush interval or 0 to turn flushing off */
+void PNGAPI
+png_set_flush(png_structp png_ptr, int nrows)
+{
+   png_debug(1, "in png_set_flush\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flush_dist = (nrows < 0 ? 0 : nrows);
+}
+
+/* flush the current output buffers now */
+void PNGAPI
+png_write_flush(png_structp png_ptr)
+{
+   int wrote_IDAT;
+
+   png_debug(1, "in png_write_flush\n");
+   if (png_ptr == NULL)
+      return;
+   /* We have already written out all of the data */
+   if (png_ptr->row_number >= png_ptr->num_rows)
+     return;
+
+   do
+   {
+      int ret;
+
+      /* compress the data */
+      ret = deflate(&png_ptr->zstream, Z_SYNC_FLUSH);
+      wrote_IDAT = 0;
+
+      /* check for compression errors */
+      if (ret != Z_OK)
+      {
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+
+      if (!(png_ptr->zstream.avail_out))
+      {
+         /* write the IDAT and reset the zlib output buffer */
+         png_write_IDAT(png_ptr, png_ptr->zbuf,
+                        png_ptr->zbuf_size);
+         png_ptr->zstream.next_out = png_ptr->zbuf;
+         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+         wrote_IDAT = 1;
+      }
+   } while(wrote_IDAT == 1);
+
+   /* If there is any data left to be output, write it into a new IDAT */
+   if (png_ptr->zbuf_size != png_ptr->zstream.avail_out)
+   {
+      /* write the IDAT and reset the zlib output buffer */
+      png_write_IDAT(png_ptr, png_ptr->zbuf,
+                     png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+      png_ptr->zstream.next_out = png_ptr->zbuf;
+      png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   }
+   png_ptr->flush_rows = 0;
+   png_flush(png_ptr);
+}
+#endif /* PNG_WRITE_FLUSH_SUPPORTED */
+
+/* free all memory used by the write */
+void PNGAPI
+png_destroy_write_struct(png_structpp png_ptr_ptr, png_infopp info_ptr_ptr)
+{
+   png_structp png_ptr = NULL;
+   png_infop info_ptr = NULL;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn = NULL;
+   png_voidp mem_ptr = NULL;
+#endif
+
+   png_debug(1, "in png_destroy_write_struct\n");
+   if (png_ptr_ptr != NULL)
+   {
+      png_ptr = *png_ptr_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+      free_fn = png_ptr->free_fn;
+      mem_ptr = png_ptr->mem_ptr;
+#endif
+   }
+
+   if (info_ptr_ptr != NULL)
+      info_ptr = *info_ptr_ptr;
+
+   if (info_ptr != NULL)
+   {
+      png_free_data(png_ptr, info_ptr, PNG_FREE_ALL, -1);
+
+#if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED)
+      if (png_ptr->num_chunk_list)
+      {
+         png_free(png_ptr, png_ptr->chunk_list);
+         png_ptr->chunk_list=NULL;
+         png_ptr->num_chunk_list=0;
+      }
+#endif
+
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)info_ptr, (png_free_ptr)free_fn,
+         (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)info_ptr);
+#endif
+      *info_ptr_ptr = NULL;
+   }
+
+   if (png_ptr != NULL)
+   {
+      png_write_destroy(png_ptr);
+#ifdef PNG_USER_MEM_SUPPORTED
+      png_destroy_struct_2((png_voidp)png_ptr, (png_free_ptr)free_fn,
+         (png_voidp)mem_ptr);
+#else
+      png_destroy_struct((png_voidp)png_ptr);
+#endif
+      *png_ptr_ptr = NULL;
+   }
+}
+
+
+/* Free any memory used in png_ptr struct (old method) */
+void /* PRIVATE */
+png_write_destroy(png_structp png_ptr)
+{
+#ifdef PNG_SETJMP_SUPPORTED
+   jmp_buf tmp_jmp; /* save jump buffer */
+#endif
+   png_error_ptr error_fn;
+   png_error_ptr warning_fn;
+   png_voidp error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_free_ptr free_fn;
+#endif
+
+   png_debug(1, "in png_write_destroy\n");
+   /* free any memory zlib uses */
+   deflateEnd(&png_ptr->zstream);
+
+   /* free our memory.  png_free checks NULL for us. */
+   png_free(png_ptr, png_ptr->zbuf);
+   png_free(png_ptr, png_ptr->row_buf);
+   png_free(png_ptr, png_ptr->prev_row);
+   png_free(png_ptr, png_ptr->sub_row);
+   png_free(png_ptr, png_ptr->up_row);
+   png_free(png_ptr, png_ptr->avg_row);
+   png_free(png_ptr, png_ptr->paeth_row);
+
+#if defined(PNG_TIME_RFC1123_SUPPORTED)
+   png_free(png_ptr, png_ptr->time_buffer);
+#endif
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   png_free(png_ptr, png_ptr->prev_filters);
+   png_free(png_ptr, png_ptr->filter_weights);
+   png_free(png_ptr, png_ptr->inv_filter_weights);
+   png_free(png_ptr, png_ptr->filter_costs);
+   png_free(png_ptr, png_ptr->inv_filter_costs);
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   /* reset structure */
+   png_memcpy(tmp_jmp, png_ptr->jmpbuf, png_sizeof (jmp_buf));
+#endif
+
+   error_fn = png_ptr->error_fn;
+   warning_fn = png_ptr->warning_fn;
+   error_ptr = png_ptr->error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   free_fn = png_ptr->free_fn;
+#endif
+
+   png_memset(png_ptr, 0, png_sizeof (png_struct));
+
+   png_ptr->error_fn = error_fn;
+   png_ptr->warning_fn = warning_fn;
+   png_ptr->error_ptr = error_ptr;
+#ifdef PNG_USER_MEM_SUPPORTED
+   png_ptr->free_fn = free_fn;
+#endif
+
+#ifdef PNG_SETJMP_SUPPORTED
+   png_memcpy(png_ptr->jmpbuf, tmp_jmp, png_sizeof (jmp_buf));
+#endif
+}
+
+/* Allow the application to select one or more row filters to use. */
+void PNGAPI
+png_set_filter(png_structp png_ptr, int method, int filters)
+{
+   png_debug(1, "in png_set_filter\n");
+   if (png_ptr == NULL)
+      return;
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   if((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      (method == PNG_INTRAPIXEL_DIFFERENCING))
+         method = PNG_FILTER_TYPE_BASE;
+#endif
+   if (method == PNG_FILTER_TYPE_BASE)
+   {
+      switch (filters & (PNG_ALL_FILTERS | 0x07))
+      {
+#ifndef PNG_NO_WRITE_FILTER
+         case 5:
+         case 6:
+         case 7: png_warning(png_ptr, "Unknown row filter for method 0");
+#endif /* PNG_NO_WRITE_FILTER */
+         case PNG_FILTER_VALUE_NONE:
+              png_ptr->do_filter=PNG_FILTER_NONE; break;
+#ifndef PNG_NO_WRITE_FILTER
+         case PNG_FILTER_VALUE_SUB:
+              png_ptr->do_filter=PNG_FILTER_SUB; break;
+         case PNG_FILTER_VALUE_UP:
+              png_ptr->do_filter=PNG_FILTER_UP; break;
+         case PNG_FILTER_VALUE_AVG:
+              png_ptr->do_filter=PNG_FILTER_AVG; break;
+         case PNG_FILTER_VALUE_PAETH:
+              png_ptr->do_filter=PNG_FILTER_PAETH; break;
+         default: png_ptr->do_filter = (png_byte)filters; break;
+#else
+         default: png_warning(png_ptr, "Unknown row filter for method 0");
+#endif /* PNG_NO_WRITE_FILTER */
+      }
+
+      /* If we have allocated the row_buf, this means we have already started
+       * with the image and we should have allocated all of the filter buffers
+       * that have been selected.  If prev_row isn't already allocated, then
+       * it is too late to start using the filters that need it, since we
+       * will be missing the data in the previous row.  If an application
+       * wants to start and stop using particular filters during compression,
+       * it should start out with all of the filters, and then add and
+       * remove them after the start of compression.
+       */
+      if (png_ptr->row_buf != NULL)
+      {
+#ifndef PNG_NO_WRITE_FILTER
+         if ((png_ptr->do_filter & PNG_FILTER_SUB) && png_ptr->sub_row == NULL)
+         {
+            png_ptr->sub_row = (png_bytep)png_malloc(png_ptr,
+              (png_ptr->rowbytes + 1));
+            png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB;
+         }
+
+         if ((png_ptr->do_filter & PNG_FILTER_UP) && png_ptr->up_row == NULL)
+         {
+            if (png_ptr->prev_row == NULL)
+            {
+               png_warning(png_ptr, "Can't add Up filter after starting");
+               png_ptr->do_filter &= ~PNG_FILTER_UP;
+            }
+            else
+            {
+               png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
+                  (png_ptr->rowbytes + 1));
+               png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
+            }
+         }
+
+         if ((png_ptr->do_filter & PNG_FILTER_AVG) && png_ptr->avg_row == NULL)
+         {
+            if (png_ptr->prev_row == NULL)
+            {
+               png_warning(png_ptr, "Can't add Average filter after starting");
+               png_ptr->do_filter &= ~PNG_FILTER_AVG;
+            }
+            else
+            {
+               png_ptr->avg_row = (png_bytep)png_malloc(png_ptr,
+                  (png_ptr->rowbytes + 1));
+               png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG;
+            }
+         }
+
+         if ((png_ptr->do_filter & PNG_FILTER_PAETH) &&
+             png_ptr->paeth_row == NULL)
+         {
+            if (png_ptr->prev_row == NULL)
+            {
+               png_warning(png_ptr, "Can't add Paeth filter after starting");
+               png_ptr->do_filter &= (png_byte)(~PNG_FILTER_PAETH);
+            }
+            else
+            {
+               png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
+                  (png_ptr->rowbytes + 1));
+               png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
+            }
+         }
+
+         if (png_ptr->do_filter == PNG_NO_FILTERS)
+#endif /* PNG_NO_WRITE_FILTER */
+            png_ptr->do_filter = PNG_FILTER_NONE;
+      }
+   }
+   else
+      png_error(png_ptr, "Unknown custom filter method");
+}
+
+/* This allows us to influence the way in which libpng chooses the "best"
+ * filter for the current scanline.  While the "minimum-sum-of-absolute-
+ * differences metric is relatively fast and effective, there is some
+ * question as to whether it can be improved upon by trying to keep the
+ * filtered data going to zlib more consistent, hopefully resulting in
+ * better compression.
+ */
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)      /* GRR 970116 */
+void PNGAPI
+png_set_filter_heuristics(png_structp png_ptr, int heuristic_method,
+   int num_weights, png_doublep filter_weights,
+   png_doublep filter_costs)
+{
+   int i;
+
+   png_debug(1, "in png_set_filter_heuristics\n");
+   if (png_ptr == NULL)
+      return;
+   if (heuristic_method >= PNG_FILTER_HEURISTIC_LAST)
+   {
+      png_warning(png_ptr, "Unknown filter heuristic method");
+      return;
+   }
+
+   if (heuristic_method == PNG_FILTER_HEURISTIC_DEFAULT)
+   {
+      heuristic_method = PNG_FILTER_HEURISTIC_UNWEIGHTED;
+   }
+
+   if (num_weights < 0 || filter_weights == NULL ||
+      heuristic_method == PNG_FILTER_HEURISTIC_UNWEIGHTED)
+   {
+      num_weights = 0;
+   }
+
+   png_ptr->num_prev_filters = (png_byte)num_weights;
+   png_ptr->heuristic_method = (png_byte)heuristic_method;
+
+   if (num_weights > 0)
+   {
+      if (png_ptr->prev_filters == NULL)
+      {
+         png_ptr->prev_filters = (png_bytep)png_malloc(png_ptr,
+            (png_uint_32)(png_sizeof(png_byte) * num_weights));
+
+         /* To make sure that the weighting starts out fairly */
+         for (i = 0; i < num_weights; i++)
+         {
+            png_ptr->prev_filters[i] = 255;
+         }
+      }
+
+      if (png_ptr->filter_weights == NULL)
+      {
+         png_ptr->filter_weights = (png_uint_16p)png_malloc(png_ptr,
+            (png_uint_32)(png_sizeof(png_uint_16) * num_weights));
+
+         png_ptr->inv_filter_weights = (png_uint_16p)png_malloc(png_ptr,
+            (png_uint_32)(png_sizeof(png_uint_16) * num_weights));
+         for (i = 0; i < num_weights; i++)
+         {
+            png_ptr->inv_filter_weights[i] =
+            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
+         }
+      }
+
+      for (i = 0; i < num_weights; i++)
+      {
+         if (filter_weights[i] < 0.0)
+         {
+            png_ptr->inv_filter_weights[i] =
+            png_ptr->filter_weights[i] = PNG_WEIGHT_FACTOR;
+         }
+         else
+         {
+            png_ptr->inv_filter_weights[i] =
+               (png_uint_16)((double)PNG_WEIGHT_FACTOR*filter_weights[i]+0.5);
+            png_ptr->filter_weights[i] =
+               (png_uint_16)((double)PNG_WEIGHT_FACTOR/filter_weights[i]+0.5);
+         }
+      }
+   }
+
+   /* If, in the future, there are other filter methods, this would
+    * need to be based on png_ptr->filter.
+    */
+   if (png_ptr->filter_costs == NULL)
+   {
+      png_ptr->filter_costs = (png_uint_16p)png_malloc(png_ptr,
+         (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST));
+
+      png_ptr->inv_filter_costs = (png_uint_16p)png_malloc(png_ptr,
+         (png_uint_32)(png_sizeof(png_uint_16) * PNG_FILTER_VALUE_LAST));
+
+      for (i = 0; i < PNG_FILTER_VALUE_LAST; i++)
+      {
+         png_ptr->inv_filter_costs[i] =
+         png_ptr->filter_costs[i] = PNG_COST_FACTOR;
+      }
+   }
+
+   /* Here is where we set the relative costs of the different filters.  We
+    * should take the desired compression level into account when setting
+    * the costs, so that Paeth, for instance, has a high relative cost at low
+    * compression levels, while it has a lower relative cost at higher
+    * compression settings.  The filter types are in order of increasing
+    * relative cost, so it would be possible to do this with an algorithm.
+    */
+   for (i = 0; i < PNG_FILTER_VALUE_LAST; i++)
+   {
+      if (filter_costs == NULL || filter_costs[i] < 0.0)
+      {
+         png_ptr->inv_filter_costs[i] =
+         png_ptr->filter_costs[i] = PNG_COST_FACTOR;
+      }
+      else if (filter_costs[i] >= 1.0)
+      {
+         png_ptr->inv_filter_costs[i] =
+            (png_uint_16)((double)PNG_COST_FACTOR / filter_costs[i] + 0.5);
+         png_ptr->filter_costs[i] =
+            (png_uint_16)((double)PNG_COST_FACTOR * filter_costs[i] + 0.5);
+      }
+   }
+}
+#endif /* PNG_WRITE_WEIGHTED_FILTER_SUPPORTED */
+
+void PNGAPI
+png_set_compression_level(png_structp png_ptr, int level)
+{
+   png_debug(1, "in png_set_compression_level\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_LEVEL;
+   png_ptr->zlib_level = level;
+}
+
+void PNGAPI
+png_set_compression_mem_level(png_structp png_ptr, int mem_level)
+{
+   png_debug(1, "in png_set_compression_mem_level\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL;
+   png_ptr->zlib_mem_level = mem_level;
+}
+
+void PNGAPI
+png_set_compression_strategy(png_structp png_ptr, int strategy)
+{
+   png_debug(1, "in png_set_compression_strategy\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_STRATEGY;
+   png_ptr->zlib_strategy = strategy;
+}
+
+void PNGAPI
+png_set_compression_window_bits(png_structp png_ptr, int window_bits)
+{
+   if (png_ptr == NULL)
+      return;
+   if (window_bits > 15)
+      png_warning(png_ptr, "Only compression windows <= 32k supported by PNG");
+   else if (window_bits < 8)
+      png_warning(png_ptr, "Only compression windows >= 256 supported by PNG");
+#ifndef WBITS_8_OK
+   /* avoid libpng bug with 256-byte windows */
+   if (window_bits == 8)
+     {
+       png_warning(png_ptr, "Compression window is being reset to 512");
+       window_bits=9;
+     }
+#endif
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS;
+   png_ptr->zlib_window_bits = window_bits;
+}
+
+void PNGAPI
+png_set_compression_method(png_structp png_ptr, int method)
+{
+   png_debug(1, "in png_set_compression_method\n");
+   if (png_ptr == NULL)
+      return;
+   if (method != 8)
+      png_warning(png_ptr, "Only compression method 8 is supported by PNG");
+   png_ptr->flags |= PNG_FLAG_ZLIB_CUSTOM_METHOD;
+   png_ptr->zlib_method = method;
+}
+
+void PNGAPI
+png_set_write_status_fn(png_structp png_ptr, png_write_status_ptr write_row_fn)
+{
+   if (png_ptr == NULL)
+      return;
+   png_ptr->write_row_fn = write_row_fn;
+}
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+void PNGAPI
+png_set_write_user_transform_fn(png_structp png_ptr, png_user_transform_ptr
+   write_user_transform_fn)
+{
+   png_debug(1, "in png_set_write_user_transform_fn\n");
+   if (png_ptr == NULL)
+      return;
+   png_ptr->transformations |= PNG_USER_TRANSFORM;
+   png_ptr->write_user_transform_fn = write_user_transform_fn;
+}
+#endif
+
+
+#if defined(PNG_INFO_IMAGE_SUPPORTED)
+void PNGAPI
+png_write_png(png_structp png_ptr, png_infop info_ptr,
+              int transforms, voidp params)
+{
+   if (png_ptr == NULL || info_ptr == NULL)
+      return;
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+   /* invert the alpha channel from opacity to transparency */
+   if (transforms & PNG_TRANSFORM_INVERT_ALPHA)
+       png_set_invert_alpha(png_ptr);
+#endif
+
+   /* Write the file header information. */
+   png_write_info(png_ptr, info_ptr);
+
+   /* ------ these transformations don't touch the info structure ------- */
+
+#if defined(PNG_WRITE_INVERT_SUPPORTED)
+   /* invert monochrome pixels */
+   if (transforms & PNG_TRANSFORM_INVERT_MONO)
+       png_set_invert_mono(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+   /* Shift the pixels up to a legal bit depth and fill in
+    * as appropriate to correctly scale the image.
+    */
+   if ((transforms & PNG_TRANSFORM_SHIFT)
+               && (info_ptr->valid & PNG_INFO_sBIT))
+       png_set_shift(png_ptr, &info_ptr->sig_bit);
+#endif
+
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+   /* pack pixels into bytes */
+   if (transforms & PNG_TRANSFORM_PACKING)
+       png_set_packing(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+   /* swap location of alpha bytes from ARGB to RGBA */
+   if (transforms & PNG_TRANSFORM_SWAP_ALPHA)
+       png_set_swap_alpha(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_FILLER_SUPPORTED)
+   /* Get rid of filler (OR ALPHA) bytes, pack XRGB/RGBX/ARGB/RGBA into
+    * RGB (4 channels -> 3 channels). The second parameter is not used.
+    */
+   if (transforms & PNG_TRANSFORM_STRIP_FILLER)
+       png_set_filler(png_ptr, 0, PNG_FILLER_BEFORE);
+#endif
+
+#if defined(PNG_WRITE_BGR_SUPPORTED)
+   /* flip BGR pixels to RGB */
+   if (transforms & PNG_TRANSFORM_BGR)
+       png_set_bgr(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_SWAP_SUPPORTED)
+   /* swap bytes of 16-bit files to most significant byte first */
+   if (transforms & PNG_TRANSFORM_SWAP_ENDIAN)
+       png_set_swap(png_ptr);
+#endif
+
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+   /* swap bits of 1, 2, 4 bit packed pixel formats */
+   if (transforms & PNG_TRANSFORM_PACKSWAP)
+       png_set_packswap(png_ptr);
+#endif
+
+   /* ----------------------- end of transformations ------------------- */
+
+   /* write the bits */
+   if (info_ptr->valid & PNG_INFO_IDAT)
+       png_write_image(png_ptr, info_ptr->row_pointers);
+
+   /* It is REQUIRED to call this to finish writing the rest of the file */
+   png_write_end(png_ptr, info_ptr);
+
+   transforms = transforms; /* quiet compiler warnings */
+   params = params;
+}
+#endif
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngwtran.c b/src/libpng/pngwtran.c
new file mode 100644
index 0000000..0372fe6
--- /dev/null
+++ b/src/libpng/pngwtran.c
@@ -0,0 +1,572 @@
+
+/* pngwtran.c - transforms the data in a row for PNG writers
+ *
+ * Last changed in libpng 1.2.9 April 14, 2006
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2006 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Transform the data according to the user's wishes.  The order of
+ * transformations is significant.
+ */
+void /* PRIVATE */
+png_do_write_transformations(png_structp png_ptr)
+{
+   png_debug(1, "in png_do_write_transformations\n");
+
+   if (png_ptr == NULL)
+      return;
+
+#if defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED)
+   if (png_ptr->transformations & PNG_USER_TRANSFORM)
+      if(png_ptr->write_user_transform_fn != NULL)
+        (*(png_ptr->write_user_transform_fn)) /* user write transform function */
+          (png_ptr,                    /* png_ptr */
+           &(png_ptr->row_info),       /* row_info:     */
+             /*  png_uint_32 width;          width of row */
+             /*  png_uint_32 rowbytes;       number of bytes in row */
+             /*  png_byte color_type;        color type of pixels */
+             /*  png_byte bit_depth;         bit depth of samples */
+             /*  png_byte channels;          number of channels (1-4) */
+             /*  png_byte pixel_depth;       bits per pixel (depth*channels) */
+           png_ptr->row_buf + 1);      /* start of pixel data for row */
+#endif
+#if defined(PNG_WRITE_FILLER_SUPPORTED)
+   if (png_ptr->transformations & PNG_FILLER)
+      png_do_strip_filler(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         png_ptr->flags);
+#endif
+#if defined(PNG_WRITE_PACKSWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACKSWAP)
+      png_do_packswap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+   if (png_ptr->transformations & PNG_PACK)
+      png_do_pack(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         (png_uint_32)png_ptr->bit_depth);
+#endif
+#if defined(PNG_WRITE_SWAP_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_BYTES)
+      png_do_swap(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+   if (png_ptr->transformations & PNG_SHIFT)
+      png_do_shift(&(png_ptr->row_info), png_ptr->row_buf + 1,
+         &(png_ptr->shift));
+#endif
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_SWAP_ALPHA)
+      png_do_write_swap_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_ALPHA)
+      png_do_write_invert_alpha(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_BGR_SUPPORTED)
+   if (png_ptr->transformations & PNG_BGR)
+      png_do_bgr(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+#if defined(PNG_WRITE_INVERT_SUPPORTED)
+   if (png_ptr->transformations & PNG_INVERT_MONO)
+      png_do_invert(&(png_ptr->row_info), png_ptr->row_buf + 1);
+#endif
+}
+
+#if defined(PNG_WRITE_PACK_SUPPORTED)
+/* Pack pixels into bytes.  Pass the true bit depth in bit_depth.  The
+ * row_info bit depth should be 8 (one pixel per byte).  The channels
+ * should be 1 (this only happens on grayscale and paletted images).
+ */
+void /* PRIVATE */
+png_do_pack(png_row_infop row_info, png_bytep row, png_uint_32 bit_depth)
+{
+   png_debug(1, "in png_do_pack\n");
+   if (row_info->bit_depth == 8 &&
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+      row_info->channels == 1)
+   {
+      switch ((int)bit_depth)
+      {
+         case 1:
+         {
+            png_bytep sp, dp;
+            int mask, v;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            sp = row;
+            dp = row;
+            mask = 0x80;
+            v = 0;
+
+            for (i = 0; i < row_width; i++)
+            {
+               if (*sp != 0)
+                  v |= mask;
+               sp++;
+               if (mask > 1)
+                  mask >>= 1;
+               else
+               {
+                  mask = 0x80;
+                  *dp = (png_byte)v;
+                  dp++;
+                  v = 0;
+               }
+            }
+            if (mask != 0x80)
+               *dp = (png_byte)v;
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp, dp;
+            int shift, v;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            sp = row;
+            dp = row;
+            shift = 6;
+            v = 0;
+            for (i = 0; i < row_width; i++)
+            {
+               png_byte value;
+
+               value = (png_byte)(*sp & 0x03);
+               v |= (value << shift);
+               if (shift == 0)
+               {
+                  shift = 6;
+                  *dp = (png_byte)v;
+                  dp++;
+                  v = 0;
+               }
+               else
+                  shift -= 2;
+               sp++;
+            }
+            if (shift != 6)
+               *dp = (png_byte)v;
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp, dp;
+            int shift, v;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            sp = row;
+            dp = row;
+            shift = 4;
+            v = 0;
+            for (i = 0; i < row_width; i++)
+            {
+               png_byte value;
+
+               value = (png_byte)(*sp & 0x0f);
+               v |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 4;
+                  *dp = (png_byte)v;
+                  dp++;
+                  v = 0;
+               }
+               else
+                  shift -= 4;
+
+               sp++;
+            }
+            if (shift != 4)
+               *dp = (png_byte)v;
+            break;
+         }
+      }
+      row_info->bit_depth = (png_byte)bit_depth;
+      row_info->pixel_depth = (png_byte)(bit_depth * row_info->channels);
+      row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+         row_info->width);
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_SHIFT_SUPPORTED)
+/* Shift pixel values to take advantage of whole range.  Pass the
+ * true number of bits in bit_depth.  The row should be packed
+ * according to row_info->bit_depth.  Thus, if you had a row of
+ * bit depth 4, but the pixels only had values from 0 to 7, you
+ * would pass 3 as bit_depth, and this routine would translate the
+ * data to 0 to 15.
+ */
+void /* PRIVATE */
+png_do_shift(png_row_infop row_info, png_bytep row, png_color_8p bit_depth)
+{
+   png_debug(1, "in png_do_shift\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL &&
+#else
+   if (
+#endif
+      row_info->color_type != PNG_COLOR_TYPE_PALETTE)
+   {
+      int shift_start[4], shift_dec[4];
+      int channels = 0;
+
+      if (row_info->color_type & PNG_COLOR_MASK_COLOR)
+      {
+         shift_start[channels] = row_info->bit_depth - bit_depth->red;
+         shift_dec[channels] = bit_depth->red;
+         channels++;
+         shift_start[channels] = row_info->bit_depth - bit_depth->green;
+         shift_dec[channels] = bit_depth->green;
+         channels++;
+         shift_start[channels] = row_info->bit_depth - bit_depth->blue;
+         shift_dec[channels] = bit_depth->blue;
+         channels++;
+      }
+      else
+      {
+         shift_start[channels] = row_info->bit_depth - bit_depth->gray;
+         shift_dec[channels] = bit_depth->gray;
+         channels++;
+      }
+      if (row_info->color_type & PNG_COLOR_MASK_ALPHA)
+      {
+         shift_start[channels] = row_info->bit_depth - bit_depth->alpha;
+         shift_dec[channels] = bit_depth->alpha;
+         channels++;
+      }
+
+      /* with low row depths, could only be grayscale, so one channel */
+      if (row_info->bit_depth < 8)
+      {
+         png_bytep bp = row;
+         png_uint_32 i;
+         png_byte mask;
+         png_uint_32 row_bytes = row_info->rowbytes;
+
+         if (bit_depth->gray == 1 && row_info->bit_depth == 2)
+            mask = 0x55;
+         else if (row_info->bit_depth == 4 && bit_depth->gray == 3)
+            mask = 0x11;
+         else
+            mask = 0xff;
+
+         for (i = 0; i < row_bytes; i++, bp++)
+         {
+            png_uint_16 v;
+            int j;
+
+            v = *bp;
+            *bp = 0;
+            for (j = shift_start[0]; j > -shift_dec[0]; j -= shift_dec[0])
+            {
+               if (j > 0)
+                  *bp |= (png_byte)((v << j) & 0xff);
+               else
+                  *bp |= (png_byte)((v >> (-j)) & mask);
+            }
+         }
+      }
+      else if (row_info->bit_depth == 8)
+      {
+         png_bytep bp = row;
+         png_uint_32 i;
+         png_uint_32 istop = channels * row_info->width;
+
+         for (i = 0; i < istop; i++, bp++)
+         {
+
+            png_uint_16 v;
+            int j;
+            int c = (int)(i%channels);
+
+            v = *bp;
+            *bp = 0;
+            for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
+            {
+               if (j > 0)
+                  *bp |= (png_byte)((v << j) & 0xff);
+               else
+                  *bp |= (png_byte)((v >> (-j)) & 0xff);
+            }
+         }
+      }
+      else
+      {
+         png_bytep bp;
+         png_uint_32 i;
+         png_uint_32 istop = channels * row_info->width;
+
+         for (bp = row, i = 0; i < istop; i++)
+         {
+            int c = (int)(i%channels);
+            png_uint_16 value, v;
+            int j;
+
+            v = (png_uint_16)(((png_uint_16)(*bp) << 8) + *(bp + 1));
+            value = 0;
+            for (j = shift_start[c]; j > -shift_dec[c]; j -= shift_dec[c])
+            {
+               if (j > 0)
+                  value |= (png_uint_16)((v << j) & (png_uint_16)0xffff);
+               else
+                  value |= (png_uint_16)((v >> (-j)) & (png_uint_16)0xffff);
+            }
+            *bp++ = (png_byte)(value >> 8);
+            *bp++ = (png_byte)(value & 0xff);
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_SWAP_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_write_swap_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_swap_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This converts from ARGB to RGBA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save;
+            }
+         }
+         /* This converts from AARRGGBB to RRGGBBAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save[2];
+               save[0] = *(sp++);
+               save[1] = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save[0];
+               *(dp++) = save[1];
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This converts from AG to GA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save;
+            }
+         }
+         /* This converts from AAGG to GGAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               png_byte save[2];
+               save[0] = *(sp++);
+               save[1] = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = save[0];
+               *(dp++) = save[1];
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_INVERT_ALPHA_SUPPORTED)
+void /* PRIVATE */
+png_do_write_invert_alpha(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_invert_alpha\n");
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL)
+#endif
+   {
+      if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+      {
+         /* This inverts the alpha channel in RGBA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               /* does nothing
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               */
+               sp+=3; dp = sp;
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+         /* This inverts the alpha channel in RRGGBBAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               /* does nothing
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               */
+               sp+=6; dp = sp;
+               *(dp++) = (png_byte)(255 - *(sp++));
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+      }
+      else if (row_info->color_type == PNG_COLOR_TYPE_GRAY_ALPHA)
+      {
+         /* This inverts the alpha channel in GA */
+         if (row_info->bit_depth == 8)
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               *(dp++) = *(sp++);
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+         /* This inverts the alpha channel in GGAA */
+         else
+         {
+            png_bytep sp, dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            for (i = 0, sp = dp = row; i < row_width; i++)
+            {
+               /* does nothing
+               *(dp++) = *(sp++);
+               *(dp++) = *(sp++);
+               */
+               sp+=2; dp = sp;
+               *(dp++) = (png_byte)(255 - *(sp++));
+               *(dp++) = (png_byte)(255 - *(sp++));
+            }
+         }
+      }
+   }
+}
+#endif
+
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+/* undoes intrapixel differencing  */
+void /* PRIVATE */
+png_do_write_intrapixel(png_row_infop row_info, png_bytep row)
+{
+   png_debug(1, "in png_do_write_intrapixel\n");
+   if (
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+       row != NULL && row_info != NULL &&
+#endif
+       (row_info->color_type & PNG_COLOR_MASK_COLOR))
+   {
+      int bytes_per_pixel;
+      png_uint_32 row_width = row_info->width;
+      if (row_info->bit_depth == 8)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 3;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 4;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            *(rp)   = (png_byte)((*rp     - *(rp+1))&0xff);
+            *(rp+2) = (png_byte)((*(rp+2) - *(rp+1))&0xff);
+         }
+      }
+      else if (row_info->bit_depth == 16)
+      {
+         png_bytep rp;
+         png_uint_32 i;
+
+         if (row_info->color_type == PNG_COLOR_TYPE_RGB)
+            bytes_per_pixel = 6;
+         else if (row_info->color_type == PNG_COLOR_TYPE_RGB_ALPHA)
+            bytes_per_pixel = 8;
+         else
+            return;
+
+         for (i = 0, rp = row; i < row_width; i++, rp += bytes_per_pixel)
+         {
+            png_uint_32 s0   = (*(rp  ) << 8) | *(rp+1);
+            png_uint_32 s1   = (*(rp+2) << 8) | *(rp+3);
+            png_uint_32 s2   = (*(rp+4) << 8) | *(rp+5);
+            png_uint_32 red  = (png_uint_32)((s0-s1) & 0xffffL);
+            png_uint_32 blue = (png_uint_32)((s2-s1) & 0xffffL);
+            *(rp  ) = (png_byte)((red >> 8) & 0xff);
+            *(rp+1) = (png_byte)(red & 0xff);
+            *(rp+4) = (png_byte)((blue >> 8) & 0xff);
+            *(rp+5) = (png_byte)(blue & 0xff);
+         }
+      }
+   }
+}
+#endif /* PNG_MNG_FEATURES_SUPPORTED */
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/src/libpng/pngwutil.c b/src/libpng/pngwutil.c
new file mode 100644
index 0000000..fef38ae
--- /dev/null
+++ b/src/libpng/pngwutil.c
@@ -0,0 +1,2792 @@
+
+/* pngwutil.c - utilities to write a PNG file
+ *
+ * Last changed in libpng 1.2.20 Septhember 3, 2007
+ * For conditions of distribution and use, see copyright notice in png.h
+ * Copyright (c) 1998-2007 Glenn Randers-Pehrson
+ * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
+ * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
+ */
+
+#define PNG_INTERNAL
+#include "png.h"
+#ifdef PNG_WRITE_SUPPORTED
+
+/* Place a 32-bit number into a buffer in PNG byte order.  We work
+ * with unsigned numbers for convenience, although one supported
+ * ancillary chunk uses signed (two's complement) numbers.
+ */
+void PNGAPI
+png_save_uint_32(png_bytep buf, png_uint_32 i)
+{
+   buf[0] = (png_byte)((i >> 24) & 0xff);
+   buf[1] = (png_byte)((i >> 16) & 0xff);
+   buf[2] = (png_byte)((i >> 8) & 0xff);
+   buf[3] = (png_byte)(i & 0xff);
+}
+
+/* The png_save_int_32 function assumes integers are stored in two's
+ * complement format.  If this isn't the case, then this routine needs to
+ * be modified to write data in two's complement format.
+ */
+void PNGAPI
+png_save_int_32(png_bytep buf, png_int_32 i)
+{
+   buf[0] = (png_byte)((i >> 24) & 0xff);
+   buf[1] = (png_byte)((i >> 16) & 0xff);
+   buf[2] = (png_byte)((i >> 8) & 0xff);
+   buf[3] = (png_byte)(i & 0xff);
+}
+
+/* Place a 16-bit number into a buffer in PNG byte order.
+ * The parameter is declared unsigned int, not png_uint_16,
+ * just to avoid potential problems on pre-ANSI C compilers.
+ */
+void PNGAPI
+png_save_uint_16(png_bytep buf, unsigned int i)
+{
+   buf[0] = (png_byte)((i >> 8) & 0xff);
+   buf[1] = (png_byte)(i & 0xff);
+}
+
+/* Write a PNG chunk all at once.  The type is an array of ASCII characters
+ * representing the chunk name.  The array must be at least 4 bytes in
+ * length, and does not need to be null terminated.  To be safe, pass the
+ * pre-defined chunk names here, and if you need a new one, define it
+ * where the others are defined.  The length is the length of the data.
+ * All the data must be present.  If that is not possible, use the
+ * png_write_chunk_start(), png_write_chunk_data(), and png_write_chunk_end()
+ * functions instead.
+ */
+void PNGAPI
+png_write_chunk(png_structp png_ptr, png_bytep chunk_name,
+   png_bytep data, png_size_t length)
+{
+   if(png_ptr == NULL) return;
+   png_write_chunk_start(png_ptr, chunk_name, (png_uint_32)length);
+   png_write_chunk_data(png_ptr, data, length);
+   png_write_chunk_end(png_ptr);
+}
+
+/* Write the start of a PNG chunk.  The type is the chunk type.
+ * The total_length is the sum of the lengths of all the data you will be
+ * passing in png_write_chunk_data().
+ */
+void PNGAPI
+png_write_chunk_start(png_structp png_ptr, png_bytep chunk_name,
+   png_uint_32 length)
+{
+   png_byte buf[4];
+   png_debug2(0, "Writing %s chunk (%lu bytes)\n", chunk_name, length);
+   if(png_ptr == NULL) return;
+
+   /* write the length */
+   png_save_uint_32(buf, length);
+   png_write_data(png_ptr, buf, (png_size_t)4);
+
+   /* write the chunk name */
+   png_write_data(png_ptr, chunk_name, (png_size_t)4);
+   /* reset the crc and run it over the chunk name */
+   png_reset_crc(png_ptr);
+   png_calculate_crc(png_ptr, chunk_name, (png_size_t)4);
+}
+
+/* Write the data of a PNG chunk started with png_write_chunk_start().
+ * Note that multiple calls to this function are allowed, and that the
+ * sum of the lengths from these calls *must* add up to the total_length
+ * given to png_write_chunk_start().
+ */
+void PNGAPI
+png_write_chunk_data(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+   /* write the data, and run the CRC over it */
+   if(png_ptr == NULL) return;
+   if (data != NULL && length > 0)
+   {
+      png_calculate_crc(png_ptr, data, length);
+      png_write_data(png_ptr, data, length);
+   }
+}
+
+/* Finish a chunk started with png_write_chunk_start(). */
+void PNGAPI
+png_write_chunk_end(png_structp png_ptr)
+{
+   png_byte buf[4];
+
+   if(png_ptr == NULL) return;
+
+   /* write the crc */
+   png_save_uint_32(buf, png_ptr->crc);
+
+   png_write_data(png_ptr, buf, (png_size_t)4);
+}
+
+/* Simple function to write the signature.  If we have already written
+ * the magic bytes of the signature, or more likely, the PNG stream is
+ * being embedded into another stream and doesn't need its own signature,
+ * we should call png_set_sig_bytes() to tell libpng how many of the
+ * bytes have already been written.
+ */
+void /* PRIVATE */
+png_write_sig(png_structp png_ptr)
+{
+   png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+   /* write the rest of the 8 byte signature */
+   png_write_data(png_ptr, &png_signature[png_ptr->sig_bytes],
+      (png_size_t)8 - png_ptr->sig_bytes);
+   if(png_ptr->sig_bytes < 3)
+      png_ptr->mode |= PNG_HAVE_PNG_SIGNATURE;
+}
+
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_iCCP_SUPPORTED)
+/*
+ * This pair of functions encapsulates the operation of (a) compressing a
+ * text string, and (b) issuing it later as a series of chunk data writes.
+ * The compression_state structure is shared context for these functions
+ * set up by the caller in order to make the whole mess thread-safe.
+ */
+
+typedef struct
+{
+    char *input;   /* the uncompressed input data */
+    int input_len;   /* its length */
+    int num_output_ptr; /* number of output pointers used */
+    int max_output_ptr; /* size of output_ptr */
+    png_charpp output_ptr; /* array of pointers to output */
+} compression_state;
+
+/* compress given text into storage in the png_ptr structure */
+static int /* PRIVATE */
+png_text_compress(png_structp png_ptr,
+        png_charp text, png_size_t text_len, int compression,
+        compression_state *comp)
+{
+   int ret;
+
+   comp->num_output_ptr = 0;
+   comp->max_output_ptr = 0;
+   comp->output_ptr = NULL;
+   comp->input = NULL;
+   comp->input_len = 0;
+
+   /* we may just want to pass the text right through */
+   if (compression == PNG_TEXT_COMPRESSION_NONE)
+   {
+       comp->input = text;
+       comp->input_len = text_len;
+       return((int)text_len);
+   }
+
+   if (compression >= PNG_TEXT_COMPRESSION_LAST)
+   {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+      char msg[50];
+      png_snprintf(msg, 50, "Unknown compression type %d", compression);
+      png_warning(png_ptr, msg);
+#else
+      png_warning(png_ptr, "Unknown compression type");
+#endif
+   }
+
+   /* We can't write the chunk until we find out how much data we have,
+    * which means we need to run the compressor first and save the
+    * output.  This shouldn't be a problem, as the vast majority of
+    * comments should be reasonable, but we will set up an array of
+    * malloc'd pointers to be sure.
+    *
+    * If we knew the application was well behaved, we could simplify this
+    * greatly by assuming we can always malloc an output buffer large
+    * enough to hold the compressed text ((1001 * text_len / 1000) + 12)
+    * and malloc this directly.  The only time this would be a bad idea is
+    * if we can't malloc more than 64K and we have 64K of random input
+    * data, or if the input string is incredibly large (although this
+    * wouldn't cause a failure, just a slowdown due to swapping).
+    */
+
+   /* set up the compression buffers */
+   png_ptr->zstream.avail_in = (uInt)text_len;
+   png_ptr->zstream.next_in = (Bytef *)text;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   png_ptr->zstream.next_out = (Bytef *)png_ptr->zbuf;
+
+   /* this is the same compression loop as in png_write_row() */
+   do
+   {
+      /* compress the data */
+      ret = deflate(&png_ptr->zstream, Z_NO_FLUSH);
+      if (ret != Z_OK)
+      {
+         /* error */
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+      /* check to see if we need more room */
+      if (!(png_ptr->zstream.avail_out))
+      {
+         /* make sure the output array has room */
+         if (comp->num_output_ptr >= comp->max_output_ptr)
+         {
+            int old_max;
+
+            old_max = comp->max_output_ptr;
+            comp->max_output_ptr = comp->num_output_ptr + 4;
+            if (comp->output_ptr != NULL)
+            {
+               png_charpp old_ptr;
+
+               old_ptr = comp->output_ptr;
+               comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                  (png_uint_32)(comp->max_output_ptr *
+                  png_sizeof (png_charpp)));
+               png_memcpy(comp->output_ptr, old_ptr, old_max
+                  * png_sizeof (png_charp));
+               png_free(png_ptr, old_ptr);
+            }
+            else
+               comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                  (png_uint_32)(comp->max_output_ptr *
+                  png_sizeof (png_charp)));
+         }
+
+         /* save the data */
+         comp->output_ptr[comp->num_output_ptr] = (png_charp)png_malloc(png_ptr,
+            (png_uint_32)png_ptr->zbuf_size);
+         png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf,
+            png_ptr->zbuf_size);
+         comp->num_output_ptr++;
+
+         /* and reset the buffer */
+         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+         png_ptr->zstream.next_out = png_ptr->zbuf;
+      }
+   /* continue until we don't have any more to compress */
+   } while (png_ptr->zstream.avail_in);
+
+   /* finish the compression */
+   do
+   {
+      /* tell zlib we are finished */
+      ret = deflate(&png_ptr->zstream, Z_FINISH);
+
+      if (ret == Z_OK)
+      {
+         /* check to see if we need more room */
+         if (!(png_ptr->zstream.avail_out))
+         {
+            /* check to make sure our output array has room */
+            if (comp->num_output_ptr >= comp->max_output_ptr)
+            {
+               int old_max;
+
+               old_max = comp->max_output_ptr;
+               comp->max_output_ptr = comp->num_output_ptr + 4;
+               if (comp->output_ptr != NULL)
+               {
+                  png_charpp old_ptr;
+
+                  old_ptr = comp->output_ptr;
+                  /* This could be optimized to realloc() */
+                  comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                     (png_uint_32)(comp->max_output_ptr *
+                     png_sizeof (png_charpp)));
+                  png_memcpy(comp->output_ptr, old_ptr,
+                     old_max * png_sizeof (png_charp));
+                  png_free(png_ptr, old_ptr);
+               }
+               else
+                  comp->output_ptr = (png_charpp)png_malloc(png_ptr,
+                     (png_uint_32)(comp->max_output_ptr *
+                     png_sizeof (png_charp)));
+            }
+
+            /* save off the data */
+            comp->output_ptr[comp->num_output_ptr] =
+               (png_charp)png_malloc(png_ptr, (png_uint_32)png_ptr->zbuf_size);
+            png_memcpy(comp->output_ptr[comp->num_output_ptr], png_ptr->zbuf,
+               png_ptr->zbuf_size);
+            comp->num_output_ptr++;
+
+            /* and reset the buffer pointers */
+            png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+            png_ptr->zstream.next_out = png_ptr->zbuf;
+         }
+      }
+      else if (ret != Z_STREAM_END)
+      {
+         /* we got an error */
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+   } while (ret != Z_STREAM_END);
+
+   /* text length is number of buffers plus last buffer */
+   text_len = png_ptr->zbuf_size * comp->num_output_ptr;
+   if (png_ptr->zstream.avail_out < png_ptr->zbuf_size)
+      text_len += png_ptr->zbuf_size - (png_size_t)png_ptr->zstream.avail_out;
+
+   return((int)text_len);
+}
+
+/* ship the compressed text out via chunk writes */
+static void /* PRIVATE */
+png_write_compressed_data_out(png_structp png_ptr, compression_state *comp)
+{
+   int i;
+
+   /* handle the no-compression case */
+   if (comp->input)
+   {
+       png_write_chunk_data(png_ptr, (png_bytep)comp->input,
+                            (png_size_t)comp->input_len);
+       return;
+   }
+
+   /* write saved output buffers, if any */
+   for (i = 0; i < comp->num_output_ptr; i++)
+   {
+      png_write_chunk_data(png_ptr,(png_bytep)comp->output_ptr[i],
+         png_ptr->zbuf_size);
+      png_free(png_ptr, comp->output_ptr[i]);
+      comp->output_ptr[i]=NULL;
+   }
+   if (comp->max_output_ptr != 0)
+      png_free(png_ptr, comp->output_ptr);
+      comp->output_ptr=NULL;
+   /* write anything left in zbuf */
+   if (png_ptr->zstream.avail_out < (png_uint_32)png_ptr->zbuf_size)
+      png_write_chunk_data(png_ptr, png_ptr->zbuf,
+         png_ptr->zbuf_size - png_ptr->zstream.avail_out);
+
+   /* reset zlib for another zTXt/iTXt or image data */
+   deflateReset(&png_ptr->zstream);
+   png_ptr->zstream.data_type = Z_BINARY;
+}
+#endif
+
+/* Write the IHDR chunk, and update the png_struct with the necessary
+ * information.  Note that the rest of this code depends upon this
+ * information being correct.
+ */
+void /* PRIVATE */
+png_write_IHDR(png_structp png_ptr, png_uint_32 width, png_uint_32 height,
+   int bit_depth, int color_type, int compression_type, int filter_type,
+   int interlace_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_IHDR;
+#endif
+   png_byte buf[13]; /* buffer to store the IHDR info */
+
+   png_debug(1, "in png_write_IHDR\n");
+   /* Check that we have valid input data from the application info */
+   switch (color_type)
+   {
+      case PNG_COLOR_TYPE_GRAY:
+         switch (bit_depth)
+         {
+            case 1:
+            case 2:
+            case 4:
+            case 8:
+            case 16: png_ptr->channels = 1; break;
+            default: png_error(png_ptr,"Invalid bit depth for grayscale image");
+         }
+         break;
+      case PNG_COLOR_TYPE_RGB:
+         if (bit_depth != 8 && bit_depth != 16)
+            png_error(png_ptr, "Invalid bit depth for RGB image");
+         png_ptr->channels = 3;
+         break;
+      case PNG_COLOR_TYPE_PALETTE:
+         switch (bit_depth)
+         {
+            case 1:
+            case 2:
+            case 4:
+            case 8: png_ptr->channels = 1; break;
+            default: png_error(png_ptr, "Invalid bit depth for paletted image");
+         }
+         break;
+      case PNG_COLOR_TYPE_GRAY_ALPHA:
+         if (bit_depth != 8 && bit_depth != 16)
+            png_error(png_ptr, "Invalid bit depth for grayscale+alpha image");
+         png_ptr->channels = 2;
+         break;
+      case PNG_COLOR_TYPE_RGB_ALPHA:
+         if (bit_depth != 8 && bit_depth != 16)
+            png_error(png_ptr, "Invalid bit depth for RGBA image");
+         png_ptr->channels = 4;
+         break;
+      default:
+         png_error(png_ptr, "Invalid image color type specified");
+   }
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Invalid compression type specified");
+      compression_type = PNG_COMPRESSION_TYPE_BASE;
+   }
+
+   /* Write filter_method 64 (intrapixel differencing) only if
+    * 1. Libpng was compiled with PNG_MNG_FEATURES_SUPPORTED and
+    * 2. Libpng did not write a PNG signature (this filter_method is only
+    *    used in PNG datastreams that are embedded in MNG datastreams) and
+    * 3. The application called png_permit_mng_features with a mask that
+    *    included PNG_FLAG_MNG_FILTER_64 and
+    * 4. The filter_method is 64 and
+    * 5. The color_type is RGB or RGBA
+    */
+   if (
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+      !((png_ptr->mng_features_permitted & PNG_FLAG_MNG_FILTER_64) &&
+      ((png_ptr->mode&PNG_HAVE_PNG_SIGNATURE) == 0) &&
+      (color_type == PNG_COLOR_TYPE_RGB ||
+       color_type == PNG_COLOR_TYPE_RGB_ALPHA) &&
+      (filter_type == PNG_INTRAPIXEL_DIFFERENCING)) &&
+#endif
+      filter_type != PNG_FILTER_TYPE_BASE)
+   {
+      png_warning(png_ptr, "Invalid filter type specified");
+      filter_type = PNG_FILTER_TYPE_BASE;
+   }
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   if (interlace_type != PNG_INTERLACE_NONE &&
+      interlace_type != PNG_INTERLACE_ADAM7)
+   {
+      png_warning(png_ptr, "Invalid interlace type specified");
+      interlace_type = PNG_INTERLACE_ADAM7;
+   }
+#else
+   interlace_type=PNG_INTERLACE_NONE;
+#endif
+
+   /* save off the relevent information */
+   png_ptr->bit_depth = (png_byte)bit_depth;
+   png_ptr->color_type = (png_byte)color_type;
+   png_ptr->interlaced = (png_byte)interlace_type;
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+   png_ptr->filter_type = (png_byte)filter_type;
+#endif
+   png_ptr->compression_type = (png_byte)compression_type;
+   png_ptr->width = width;
+   png_ptr->height = height;
+
+   png_ptr->pixel_depth = (png_byte)(bit_depth * png_ptr->channels);
+   png_ptr->rowbytes = PNG_ROWBYTES(png_ptr->pixel_depth, width);
+   /* set the usr info, so any transformations can modify it */
+   png_ptr->usr_width = png_ptr->width;
+   png_ptr->usr_bit_depth = png_ptr->bit_depth;
+   png_ptr->usr_channels = png_ptr->channels;
+
+   /* pack the header information into the buffer */
+   png_save_uint_32(buf, width);
+   png_save_uint_32(buf + 4, height);
+   buf[8] = (png_byte)bit_depth;
+   buf[9] = (png_byte)color_type;
+   buf[10] = (png_byte)compression_type;
+   buf[11] = (png_byte)filter_type;
+   buf[12] = (png_byte)interlace_type;
+
+   /* write the chunk */
+   png_write_chunk(png_ptr, png_IHDR, buf, (png_size_t)13);
+
+   /* initialize zlib with PNG info */
+   png_ptr->zstream.zalloc = png_zalloc;
+   png_ptr->zstream.zfree = png_zfree;
+   png_ptr->zstream.opaque = (voidpf)png_ptr;
+   if (!(png_ptr->do_filter))
+   {
+      if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE ||
+         png_ptr->bit_depth < 8)
+         png_ptr->do_filter = PNG_FILTER_NONE;
+      else
+         png_ptr->do_filter = PNG_ALL_FILTERS;
+   }
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_STRATEGY))
+   {
+      if (png_ptr->do_filter != PNG_FILTER_NONE)
+         png_ptr->zlib_strategy = Z_FILTERED;
+      else
+         png_ptr->zlib_strategy = Z_DEFAULT_STRATEGY;
+   }
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_LEVEL))
+      png_ptr->zlib_level = Z_DEFAULT_COMPRESSION;
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_MEM_LEVEL))
+      png_ptr->zlib_mem_level = 8;
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_WINDOW_BITS))
+      png_ptr->zlib_window_bits = 15;
+   if (!(png_ptr->flags & PNG_FLAG_ZLIB_CUSTOM_METHOD))
+      png_ptr->zlib_method = 8;
+   if (deflateInit2(&png_ptr->zstream, png_ptr->zlib_level,
+      png_ptr->zlib_method, png_ptr->zlib_window_bits,
+      png_ptr->zlib_mem_level, png_ptr->zlib_strategy) != Z_OK)
+       png_error(png_ptr, "zlib failed to initialize compressor");
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   /* libpng is not interested in zstream.data_type */
+   /* set it to a predefined value, to avoid its evaluation inside zlib */
+   png_ptr->zstream.data_type = Z_BINARY;
+
+   png_ptr->mode = PNG_HAVE_IHDR;
+}
+
+/* write the palette.  We are careful not to trust png_color to be in the
+ * correct order for PNG, so people can redefine it to any convenient
+ * structure.
+ */
+void /* PRIVATE */
+png_write_PLTE(png_structp png_ptr, png_colorp palette, png_uint_32 num_pal)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_PLTE;
+#endif
+   png_uint_32 i;
+   png_colorp pal_ptr;
+   png_byte buf[3];
+
+   png_debug(1, "in png_write_PLTE\n");
+   if ((
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+        !(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) &&
+#endif
+        num_pal == 0) || num_pal > 256)
+   {
+     if (png_ptr->color_type == PNG_COLOR_TYPE_PALETTE)
+     {
+        png_error(png_ptr, "Invalid number of colors in palette");
+     }
+     else
+     {
+        png_warning(png_ptr, "Invalid number of colors in palette");
+        return;
+     }
+   }
+
+   if (!(png_ptr->color_type&PNG_COLOR_MASK_COLOR))
+   {
+      png_warning(png_ptr,
+        "Ignoring request to write a PLTE chunk in grayscale PNG");
+      return;
+   }
+
+   png_ptr->num_palette = (png_uint_16)num_pal;
+   png_debug1(3, "num_palette = %d\n", png_ptr->num_palette);
+
+   png_write_chunk_start(png_ptr, png_PLTE, num_pal * 3);
+#ifndef PNG_NO_POINTER_INDEXING
+   for (i = 0, pal_ptr = palette; i < num_pal; i++, pal_ptr++)
+   {
+      buf[0] = pal_ptr->red;
+      buf[1] = pal_ptr->green;
+      buf[2] = pal_ptr->blue;
+      png_write_chunk_data(png_ptr, buf, (png_size_t)3);
+   }
+#else
+   /* This is a little slower but some buggy compilers need to do this instead */
+   pal_ptr=palette;
+   for (i = 0; i < num_pal; i++)
+   {
+      buf[0] = pal_ptr[i].red;
+      buf[1] = pal_ptr[i].green;
+      buf[2] = pal_ptr[i].blue;
+      png_write_chunk_data(png_ptr, buf, (png_size_t)3);
+   }
+#endif
+   png_write_chunk_end(png_ptr);
+   png_ptr->mode |= PNG_HAVE_PLTE;
+}
+
+/* write an IDAT chunk */
+void /* PRIVATE */
+png_write_IDAT(png_structp png_ptr, png_bytep data, png_size_t length)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_IDAT;
+#endif
+   png_debug(1, "in png_write_IDAT\n");
+
+   /* Optimize the CMF field in the zlib stream. */
+   /* This hack of the zlib stream is compliant to the stream specification. */
+   if (!(png_ptr->mode & PNG_HAVE_IDAT) &&
+       png_ptr->compression_type == PNG_COMPRESSION_TYPE_BASE)
+   {
+      unsigned int z_cmf = data[0];  /* zlib compression method and flags */
+      if ((z_cmf & 0x0f) == 8 && (z_cmf & 0xf0) <= 0x70)
+      {
+         /* Avoid memory underflows and multiplication overflows. */
+         /* The conditions below are practically always satisfied;
+            however, they still must be checked. */
+         if (length >= 2 &&
+             png_ptr->height < 16384 && png_ptr->width < 16384)
+         {
+            png_uint_32 uncompressed_idat_size = png_ptr->height *
+               ((png_ptr->width *
+               png_ptr->channels * png_ptr->bit_depth + 15) >> 3);
+            unsigned int z_cinfo = z_cmf >> 4;
+            unsigned int half_z_window_size = 1 << (z_cinfo + 7);
+            while (uncompressed_idat_size <= half_z_window_size &&
+                   half_z_window_size >= 256)
+            {
+               z_cinfo--;
+               half_z_window_size >>= 1;
+            }
+            z_cmf = (z_cmf & 0x0f) | (z_cinfo << 4);
+            if (data[0] != (png_byte)z_cmf)
+            {
+               data[0] = (png_byte)z_cmf;
+               data[1] &= 0xe0;
+               data[1] += (png_byte)(0x1f - ((z_cmf << 8) + data[1]) % 0x1f);
+            }
+         }
+      }
+      else
+         png_error(png_ptr,
+            "Invalid zlib compression method or flags in IDAT");
+   }
+
+   png_write_chunk(png_ptr, png_IDAT, data, length);
+   png_ptr->mode |= PNG_HAVE_IDAT;
+}
+
+/* write an IEND chunk */
+void /* PRIVATE */
+png_write_IEND(png_structp png_ptr)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_IEND;
+#endif
+   png_debug(1, "in png_write_IEND\n");
+   png_write_chunk(png_ptr, png_IEND, png_bytep_NULL,
+     (png_size_t)0);
+   png_ptr->mode |= PNG_HAVE_IEND;
+}
+
+#if defined(PNG_WRITE_gAMA_SUPPORTED)
+/* write a gAMA chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_gAMA(png_structp png_ptr, double file_gamma)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_gAMA;
+#endif
+   png_uint_32 igamma;
+   png_byte buf[4];
+
+   png_debug(1, "in png_write_gAMA\n");
+   /* file_gamma is saved in 1/100,000ths */
+   igamma = (png_uint_32)(file_gamma * 100000.0 + 0.5);
+   png_save_uint_32(buf, igamma);
+   png_write_chunk(png_ptr, png_gAMA, buf, (png_size_t)4);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_gAMA_fixed(png_structp png_ptr, png_fixed_point file_gamma)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_gAMA;
+#endif
+   png_byte buf[4];
+
+   png_debug(1, "in png_write_gAMA\n");
+   /* file_gamma is saved in 1/100,000ths */
+   png_save_uint_32(buf, (png_uint_32)file_gamma);
+   png_write_chunk(png_ptr, png_gAMA, buf, (png_size_t)4);
+}
+#endif
+#endif
+
+#if defined(PNG_WRITE_sRGB_SUPPORTED)
+/* write a sRGB chunk */
+void /* PRIVATE */
+png_write_sRGB(png_structp png_ptr, int srgb_intent)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sRGB;
+#endif
+   png_byte buf[1];
+
+   png_debug(1, "in png_write_sRGB\n");
+   if(srgb_intent >= PNG_sRGB_INTENT_LAST)
+         png_warning(png_ptr,
+            "Invalid sRGB rendering intent specified");
+   buf[0]=(png_byte)srgb_intent;
+   png_write_chunk(png_ptr, png_sRGB, buf, (png_size_t)1);
+}
+#endif
+
+#if defined(PNG_WRITE_iCCP_SUPPORTED)
+/* write an iCCP chunk */
+void /* PRIVATE */
+png_write_iCCP(png_structp png_ptr, png_charp name, int compression_type,
+   png_charp profile, int profile_len)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_iCCP;
+#endif
+   png_size_t name_len;
+   png_charp new_name;
+   compression_state comp;
+   int embedded_profile_len = 0;
+
+   png_debug(1, "in png_write_iCCP\n");
+
+   comp.num_output_ptr = 0;
+   comp.max_output_ptr = 0;
+   comp.output_ptr = NULL;
+   comp.input = NULL;
+   comp.input_len = 0;
+
+   if (name == NULL || (name_len = png_check_keyword(png_ptr, name,
+      &new_name)) == 0)
+   {
+      png_warning(png_ptr, "Empty keyword in iCCP chunk");
+      return;
+   }
+
+   if (compression_type != PNG_COMPRESSION_TYPE_BASE)
+      png_warning(png_ptr, "Unknown compression type in iCCP chunk");
+
+   if (profile == NULL)
+      profile_len = 0;
+
+   if (profile_len > 3)
+      embedded_profile_len =
+          ((*( (png_bytep)profile  ))<<24) |
+          ((*( (png_bytep)profile+1))<<16) |
+          ((*( (png_bytep)profile+2))<< 8) |
+          ((*( (png_bytep)profile+3))    );
+
+   if (profile_len < embedded_profile_len)
+     {
+        png_warning(png_ptr,
+          "Embedded profile length too large in iCCP chunk");
+        return;
+     }
+
+   if (profile_len > embedded_profile_len)
+     {
+        png_warning(png_ptr,
+          "Truncating profile to actual length in iCCP chunk");
+        profile_len = embedded_profile_len;
+     }
+
+   if (profile_len)
+       profile_len = png_text_compress(png_ptr, profile, (png_size_t)profile_len,
+          PNG_COMPRESSION_TYPE_BASE, &comp);
+
+   /* make sure we include the NULL after the name and the compression type */
+   png_write_chunk_start(png_ptr, png_iCCP,
+          (png_uint_32)name_len+profile_len+2);
+   new_name[name_len+1]=0x00;
+   png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 2);
+
+   if (profile_len)
+      png_write_compressed_data_out(png_ptr, &comp);
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_name);
+}
+#endif
+
+#if defined(PNG_WRITE_sPLT_SUPPORTED)
+/* write a sPLT chunk */
+void /* PRIVATE */
+png_write_sPLT(png_structp png_ptr, png_sPLT_tp spalette)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sPLT;
+#endif
+   png_size_t name_len;
+   png_charp new_name;
+   png_byte entrybuf[10];
+   int entry_size = (spalette->depth == 8 ? 6 : 10);
+   int palette_size = entry_size * spalette->nentries;
+   png_sPLT_entryp ep;
+#ifdef PNG_NO_POINTER_INDEXING
+   int i;
+#endif
+
+   png_debug(1, "in png_write_sPLT\n");
+   if (spalette->name == NULL || (name_len = png_check_keyword(png_ptr,
+      spalette->name, &new_name))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in sPLT chunk");
+      return;
+   }
+
+   /* make sure we include the NULL after the name */
+   png_write_chunk_start(png_ptr, png_sPLT,
+          (png_uint_32)(name_len + 2 + palette_size));
+   png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 1);
+   png_write_chunk_data(png_ptr, (png_bytep)&spalette->depth, 1);
+
+   /* loop through each palette entry, writing appropriately */
+#ifndef PNG_NO_POINTER_INDEXING
+   for (ep = spalette->entries; ep<spalette->entries+spalette->nentries; ep++)
+   {
+       if (spalette->depth == 8)
+       {
+           entrybuf[0] = (png_byte)ep->red;
+           entrybuf[1] = (png_byte)ep->green;
+           entrybuf[2] = (png_byte)ep->blue;
+           entrybuf[3] = (png_byte)ep->alpha;
+           png_save_uint_16(entrybuf + 4, ep->frequency);
+       }
+       else
+       {
+           png_save_uint_16(entrybuf + 0, ep->red);
+           png_save_uint_16(entrybuf + 2, ep->green);
+           png_save_uint_16(entrybuf + 4, ep->blue);
+           png_save_uint_16(entrybuf + 6, ep->alpha);
+           png_save_uint_16(entrybuf + 8, ep->frequency);
+       }
+       png_write_chunk_data(png_ptr, entrybuf, (png_size_t)entry_size);
+   }
+#else
+   ep=spalette->entries;
+   for (i=0; i>spalette->nentries; i++)
+   {
+       if (spalette->depth == 8)
+       {
+           entrybuf[0] = (png_byte)ep[i].red;
+           entrybuf[1] = (png_byte)ep[i].green;
+           entrybuf[2] = (png_byte)ep[i].blue;
+           entrybuf[3] = (png_byte)ep[i].alpha;
+           png_save_uint_16(entrybuf + 4, ep[i].frequency);
+       }
+       else
+       {
+           png_save_uint_16(entrybuf + 0, ep[i].red);
+           png_save_uint_16(entrybuf + 2, ep[i].green);
+           png_save_uint_16(entrybuf + 4, ep[i].blue);
+           png_save_uint_16(entrybuf + 6, ep[i].alpha);
+           png_save_uint_16(entrybuf + 8, ep[i].frequency);
+       }
+       png_write_chunk_data(png_ptr, entrybuf, entry_size);
+   }
+#endif
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_name);
+}
+#endif
+
+#if defined(PNG_WRITE_sBIT_SUPPORTED)
+/* write the sBIT chunk */
+void /* PRIVATE */
+png_write_sBIT(png_structp png_ptr, png_color_8p sbit, int color_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sBIT;
+#endif
+   png_byte buf[4];
+   png_size_t size;
+
+   png_debug(1, "in png_write_sBIT\n");
+   /* make sure we don't depend upon the order of PNG_COLOR_8 */
+   if (color_type & PNG_COLOR_MASK_COLOR)
+   {
+      png_byte maxbits;
+
+      maxbits = (png_byte)(color_type==PNG_COLOR_TYPE_PALETTE ? 8 :
+                png_ptr->usr_bit_depth);
+      if (sbit->red == 0 || sbit->red > maxbits ||
+          sbit->green == 0 || sbit->green > maxbits ||
+          sbit->blue == 0 || sbit->blue > maxbits)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+      buf[0] = sbit->red;
+      buf[1] = sbit->green;
+      buf[2] = sbit->blue;
+      size = 3;
+   }
+   else
+   {
+      if (sbit->gray == 0 || sbit->gray > png_ptr->usr_bit_depth)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+      buf[0] = sbit->gray;
+      size = 1;
+   }
+
+   if (color_type & PNG_COLOR_MASK_ALPHA)
+   {
+      if (sbit->alpha == 0 || sbit->alpha > png_ptr->usr_bit_depth)
+      {
+         png_warning(png_ptr, "Invalid sBIT depth specified");
+         return;
+      }
+      buf[size++] = sbit->alpha;
+   }
+
+   png_write_chunk(png_ptr, png_sBIT, buf, size);
+}
+#endif
+
+#if defined(PNG_WRITE_cHRM_SUPPORTED)
+/* write the cHRM chunk */
+#ifdef PNG_FLOATING_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_cHRM(png_structp png_ptr, double white_x, double white_y,
+   double red_x, double red_y, double green_x, double green_y,
+   double blue_x, double blue_y)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_cHRM;
+#endif
+   png_byte buf[32];
+   png_uint_32 itemp;
+
+   png_debug(1, "in png_write_cHRM\n");
+   /* each value is saved in 1/100,000ths */
+   if (white_x < 0 || white_x > 0.8 || white_y < 0 || white_y > 0.8 ||
+       white_x + white_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM white point specified");
+#if !defined(PNG_NO_CONSOLE_IO)
+      fprintf(stderr,"white_x=%f, white_y=%f\n",white_x, white_y);
+#endif
+      return;
+   }
+   itemp = (png_uint_32)(white_x * 100000.0 + 0.5);
+   png_save_uint_32(buf, itemp);
+   itemp = (png_uint_32)(white_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 4, itemp);
+
+   if (red_x < 0 ||  red_y < 0 || red_x + red_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM red point specified");
+      return;
+   }
+   itemp = (png_uint_32)(red_x * 100000.0 + 0.5);
+   png_save_uint_32(buf + 8, itemp);
+   itemp = (png_uint_32)(red_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 12, itemp);
+
+   if (green_x < 0 || green_y < 0 || green_x + green_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM green point specified");
+      return;
+   }
+   itemp = (png_uint_32)(green_x * 100000.0 + 0.5);
+   png_save_uint_32(buf + 16, itemp);
+   itemp = (png_uint_32)(green_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 20, itemp);
+
+   if (blue_x < 0 || blue_y < 0 || blue_x + blue_y > 1.0)
+   {
+      png_warning(png_ptr, "Invalid cHRM blue point specified");
+      return;
+   }
+   itemp = (png_uint_32)(blue_x * 100000.0 + 0.5);
+   png_save_uint_32(buf + 24, itemp);
+   itemp = (png_uint_32)(blue_y * 100000.0 + 0.5);
+   png_save_uint_32(buf + 28, itemp);
+
+   png_write_chunk(png_ptr, png_cHRM, buf, (png_size_t)32);
+}
+#endif
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_cHRM_fixed(png_structp png_ptr, png_fixed_point white_x,
+   png_fixed_point white_y, png_fixed_point red_x, png_fixed_point red_y,
+   png_fixed_point green_x, png_fixed_point green_y, png_fixed_point blue_x,
+   png_fixed_point blue_y)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_cHRM;
+#endif
+   png_byte buf[32];
+
+   png_debug(1, "in png_write_cHRM\n");
+   /* each value is saved in 1/100,000ths */
+   if (white_x > 80000L || white_y > 80000L || white_x + white_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM white point specified");
+#if !defined(PNG_NO_CONSOLE_IO)
+      fprintf(stderr,"white_x=%ld, white_y=%ld\n",white_x, white_y);
+#endif
+      return;
+   }
+   png_save_uint_32(buf, (png_uint_32)white_x);
+   png_save_uint_32(buf + 4, (png_uint_32)white_y);
+
+   if (red_x + red_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid cHRM fixed red point specified");
+      return;
+   }
+   png_save_uint_32(buf + 8, (png_uint_32)red_x);
+   png_save_uint_32(buf + 12, (png_uint_32)red_y);
+
+   if (green_x + green_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM green point specified");
+      return;
+   }
+   png_save_uint_32(buf + 16, (png_uint_32)green_x);
+   png_save_uint_32(buf + 20, (png_uint_32)green_y);
+
+   if (blue_x + blue_y > 100000L)
+   {
+      png_warning(png_ptr, "Invalid fixed cHRM blue point specified");
+      return;
+   }
+   png_save_uint_32(buf + 24, (png_uint_32)blue_x);
+   png_save_uint_32(buf + 28, (png_uint_32)blue_y);
+
+   png_write_chunk(png_ptr, png_cHRM, buf, (png_size_t)32);
+}
+#endif
+#endif
+
+#if defined(PNG_WRITE_tRNS_SUPPORTED)
+/* write the tRNS chunk */
+void /* PRIVATE */
+png_write_tRNS(png_structp png_ptr, png_bytep trans, png_color_16p tran,
+   int num_trans, int color_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_tRNS;
+#endif
+   png_byte buf[6];
+
+   png_debug(1, "in png_write_tRNS\n");
+   if (color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (num_trans <= 0 || num_trans > (int)png_ptr->num_palette)
+      {
+         png_warning(png_ptr,"Invalid number of transparent colors specified");
+         return;
+      }
+      /* write the chunk out as it is */
+      png_write_chunk(png_ptr, png_tRNS, trans, (png_size_t)num_trans);
+   }
+   else if (color_type == PNG_COLOR_TYPE_GRAY)
+   {
+      /* one 16 bit value */
+      if(tran->gray >= (1 << png_ptr->bit_depth))
+      {
+         png_warning(png_ptr,
+           "Ignoring attempt to write tRNS chunk out-of-range for bit_depth");
+         return;
+      }
+      png_save_uint_16(buf, tran->gray);
+      png_write_chunk(png_ptr, png_tRNS, buf, (png_size_t)2);
+   }
+   else if (color_type == PNG_COLOR_TYPE_RGB)
+   {
+      /* three 16 bit values */
+      png_save_uint_16(buf, tran->red);
+      png_save_uint_16(buf + 2, tran->green);
+      png_save_uint_16(buf + 4, tran->blue);
+      if(png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]))
+         {
+            png_warning(png_ptr,
+              "Ignoring attempt to write 16-bit tRNS chunk when bit_depth is 8");
+            return;
+         }
+      png_write_chunk(png_ptr, png_tRNS, buf, (png_size_t)6);
+   }
+   else
+   {
+      png_warning(png_ptr, "Can't write tRNS with an alpha channel");
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_bKGD_SUPPORTED)
+/* write the background chunk */
+void /* PRIVATE */
+png_write_bKGD(png_structp png_ptr, png_color_16p back, int color_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_bKGD;
+#endif
+   png_byte buf[6];
+
+   png_debug(1, "in png_write_bKGD\n");
+   if (color_type == PNG_COLOR_TYPE_PALETTE)
+   {
+      if (
+#if defined(PNG_MNG_FEATURES_SUPPORTED)
+          (png_ptr->num_palette ||
+          (!(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE))) &&
+#endif
+         back->index > png_ptr->num_palette)
+      {
+         png_warning(png_ptr, "Invalid background palette index");
+         return;
+      }
+      buf[0] = back->index;
+      png_write_chunk(png_ptr, png_bKGD, buf, (png_size_t)1);
+   }
+   else if (color_type & PNG_COLOR_MASK_COLOR)
+   {
+      png_save_uint_16(buf, back->red);
+      png_save_uint_16(buf + 2, back->green);
+      png_save_uint_16(buf + 4, back->blue);
+      if(png_ptr->bit_depth == 8 && (buf[0] | buf[2] | buf[4]))
+         {
+            png_warning(png_ptr,
+              "Ignoring attempt to write 16-bit bKGD chunk when bit_depth is 8");
+            return;
+         }
+      png_write_chunk(png_ptr, png_bKGD, buf, (png_size_t)6);
+   }
+   else
+   {
+      if(back->gray >= (1 << png_ptr->bit_depth))
+      {
+         png_warning(png_ptr,
+           "Ignoring attempt to write bKGD chunk out-of-range for bit_depth");
+         return;
+      }
+      png_save_uint_16(buf, back->gray);
+      png_write_chunk(png_ptr, png_bKGD, buf, (png_size_t)2);
+   }
+}
+#endif
+
+#if defined(PNG_WRITE_hIST_SUPPORTED)
+/* write the histogram */
+void /* PRIVATE */
+png_write_hIST(png_structp png_ptr, png_uint_16p hist, int num_hist)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_hIST;
+#endif
+   int i;
+   png_byte buf[3];
+
+   png_debug(1, "in png_write_hIST\n");
+   if (num_hist > (int)png_ptr->num_palette)
+   {
+      png_debug2(3, "num_hist = %d, num_palette = %d\n", num_hist,
+         png_ptr->num_palette);
+      png_warning(png_ptr, "Invalid number of histogram entries specified");
+      return;
+   }
+
+   png_write_chunk_start(png_ptr, png_hIST, (png_uint_32)(num_hist * 2));
+   for (i = 0; i < num_hist; i++)
+   {
+      png_save_uint_16(buf, hist[i]);
+      png_write_chunk_data(png_ptr, buf, (png_size_t)2);
+   }
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#if defined(PNG_WRITE_TEXT_SUPPORTED) || defined(PNG_WRITE_pCAL_SUPPORTED) || \
+    defined(PNG_WRITE_iCCP_SUPPORTED) || defined(PNG_WRITE_sPLT_SUPPORTED)
+/* Check that the tEXt or zTXt keyword is valid per PNG 1.0 specification,
+ * and if invalid, correct the keyword rather than discarding the entire
+ * chunk.  The PNG 1.0 specification requires keywords 1-79 characters in
+ * length, forbids leading or trailing whitespace, multiple internal spaces,
+ * and the non-break space (0x80) from ISO 8859-1.  Returns keyword length.
+ *
+ * The new_key is allocated to hold the corrected keyword and must be freed
+ * by the calling routine.  This avoids problems with trying to write to
+ * static keywords without having to have duplicate copies of the strings.
+ */
+png_size_t /* PRIVATE */
+png_check_keyword(png_structp png_ptr, png_charp key, png_charpp new_key)
+{
+   png_size_t key_len;
+   png_charp kp, dp;
+   int kflag;
+   int kwarn=0;
+
+   png_debug(1, "in png_check_keyword\n");
+   *new_key = NULL;
+
+   if (key == NULL || (key_len = png_strlen(key)) == 0)
+   {
+      png_warning(png_ptr, "zero length keyword");
+      return ((png_size_t)0);
+   }
+
+   png_debug1(2, "Keyword to be checked is '%s'\n", key);
+
+   *new_key = (png_charp)png_malloc_warn(png_ptr, (png_uint_32)(key_len + 2));
+   if (*new_key == NULL)
+   {
+      png_warning(png_ptr, "Out of memory while procesing keyword");
+      return ((png_size_t)0);
+   }
+
+   /* Replace non-printing characters with a blank and print a warning */
+   for (kp = key, dp = *new_key; *kp != '\0'; kp++, dp++)
+   {
+      if ((png_byte)*kp < 0x20 ||
+         ((png_byte)*kp > 0x7E && (png_byte)*kp < 0xA1))
+      {
+#if !defined(PNG_NO_STDIO) && !defined(_WIN32_WCE)
+         char msg[40];
+
+         png_snprintf(msg, 40,
+           "invalid keyword character 0x%02X", (png_byte)*kp);
+         png_warning(png_ptr, msg);
+#else
+         png_warning(png_ptr, "invalid character in keyword");
+#endif
+         *dp = ' ';
+      }
+      else
+      {
+         *dp = *kp;
+      }
+   }
+   *dp = '\0';
+
+   /* Remove any trailing white space. */
+   kp = *new_key + key_len - 1;
+   if (*kp == ' ')
+   {
+      png_warning(png_ptr, "trailing spaces removed from keyword");
+
+      while (*kp == ' ')
+      {
+        *(kp--) = '\0';
+        key_len--;
+      }
+   }
+
+   /* Remove any leading white space. */
+   kp = *new_key;
+   if (*kp == ' ')
+   {
+      png_warning(png_ptr, "leading spaces removed from keyword");
+
+      while (*kp == ' ')
+      {
+        kp++;
+        key_len--;
+      }
+   }
+
+   png_debug1(2, "Checking for multiple internal spaces in '%s'\n", kp);
+
+   /* Remove multiple internal spaces. */
+   for (kflag = 0, dp = *new_key; *kp != '\0'; kp++)
+   {
+      if (*kp == ' ' && kflag == 0)
+      {
+         *(dp++) = *kp;
+         kflag = 1;
+      }
+      else if (*kp == ' ')
+      {
+         key_len--;
+         kwarn=1;
+      }
+      else
+      {
+         *(dp++) = *kp;
+         kflag = 0;
+      }
+   }
+   *dp = '\0';
+   if(kwarn)
+      png_warning(png_ptr, "extra interior spaces removed from keyword");
+
+   if (key_len == 0)
+   {
+      png_free(png_ptr, *new_key);
+      *new_key=NULL;
+      png_warning(png_ptr, "Zero length keyword");
+   }
+
+   if (key_len > 79)
+   {
+      png_warning(png_ptr, "keyword length must be 1 - 79 characters");
+      new_key[79] = '\0';
+      key_len = 79;
+   }
+
+   return (key_len);
+}
+#endif
+
+#if defined(PNG_WRITE_tEXt_SUPPORTED)
+/* write a tEXt chunk */
+void /* PRIVATE */
+png_write_tEXt(png_structp png_ptr, png_charp key, png_charp text,
+   png_size_t text_len)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_tEXt;
+#endif
+   png_size_t key_len;
+   png_charp new_key;
+
+   png_debug(1, "in png_write_tEXt\n");
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in tEXt chunk");
+      return;
+   }
+
+   if (text == NULL || *text == '\0')
+      text_len = 0;
+   else
+      text_len = png_strlen(text);
+
+   /* make sure we include the 0 after the key */
+   png_write_chunk_start(png_ptr, png_tEXt, (png_uint_32)key_len+text_len+1);
+   /*
+    * We leave it to the application to meet PNG-1.0 requirements on the
+    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
+    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
+    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
+    */
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+   if (text_len)
+      png_write_chunk_data(png_ptr, (png_bytep)text, text_len);
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_key);
+}
+#endif
+
+#if defined(PNG_WRITE_zTXt_SUPPORTED)
+/* write a compressed text chunk */
+void /* PRIVATE */
+png_write_zTXt(png_structp png_ptr, png_charp key, png_charp text,
+   png_size_t text_len, int compression)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_zTXt;
+#endif
+   png_size_t key_len;
+   char buf[1];
+   png_charp new_key;
+   compression_state comp;
+
+   png_debug(1, "in png_write_zTXt\n");
+
+   comp.num_output_ptr = 0;
+   comp.max_output_ptr = 0;
+   comp.output_ptr = NULL;
+   comp.input = NULL;
+   comp.input_len = 0;
+
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in zTXt chunk");
+      return;
+   }
+
+   if (text == NULL || *text == '\0' || compression==PNG_TEXT_COMPRESSION_NONE)
+   {
+      png_write_tEXt(png_ptr, new_key, text, (png_size_t)0);
+      png_free(png_ptr, new_key);
+      return;
+   }
+
+   text_len = png_strlen(text);
+
+   /* compute the compressed data; do it now for the length */
+   text_len = png_text_compress(png_ptr, text, text_len, compression,
+       &comp);
+
+   /* write start of chunk */
+   png_write_chunk_start(png_ptr, png_zTXt, (png_uint_32)
+      (key_len+text_len+2));
+   /* write key */
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+   png_free(png_ptr, new_key);
+
+   buf[0] = (png_byte)compression;
+   /* write compression */
+   png_write_chunk_data(png_ptr, (png_bytep)buf, (png_size_t)1);
+   /* write the compressed data */
+   png_write_compressed_data_out(png_ptr, &comp);
+
+   /* close the chunk */
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#if defined(PNG_WRITE_iTXt_SUPPORTED)
+/* write an iTXt chunk */
+void /* PRIVATE */
+png_write_iTXt(png_structp png_ptr, int compression, png_charp key,
+    png_charp lang, png_charp lang_key, png_charp text)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_iTXt;
+#endif
+   png_size_t lang_len, key_len, lang_key_len, text_len;
+   png_charp new_lang, new_key;
+   png_byte cbuf[2];
+   compression_state comp;
+
+   png_debug(1, "in png_write_iTXt\n");
+
+   comp.num_output_ptr = 0;
+   comp.max_output_ptr = 0;
+   comp.output_ptr = NULL;
+   comp.input = NULL;
+
+   if (key == NULL || (key_len = png_check_keyword(png_ptr, key, &new_key))==0)
+   {
+      png_warning(png_ptr, "Empty keyword in iTXt chunk");
+      return;
+   }
+   if (lang == NULL || (lang_len = png_check_keyword(png_ptr, lang, &new_lang))==0)
+   {
+      png_warning(png_ptr, "Empty language field in iTXt chunk");
+      new_lang = NULL;
+      lang_len = 0;
+   }
+
+   if (lang_key == NULL)
+     lang_key_len = 0;
+   else
+     lang_key_len = png_strlen(lang_key);
+
+   if (text == NULL)
+      text_len = 0;
+   else
+     text_len = png_strlen(text);
+
+   /* compute the compressed data; do it now for the length */
+   text_len = png_text_compress(png_ptr, text, text_len, compression-2,
+      &comp);
+
+
+   /* make sure we include the compression flag, the compression byte,
+    * and the NULs after the key, lang, and lang_key parts */
+
+   png_write_chunk_start(png_ptr, png_iTXt,
+          (png_uint_32)(
+        5 /* comp byte, comp flag, terminators for key, lang and lang_key */
+        + key_len
+        + lang_len
+        + lang_key_len
+        + text_len));
+
+   /*
+    * We leave it to the application to meet PNG-1.0 requirements on the
+    * contents of the text.  PNG-1.0 through PNG-1.2 discourage the use of
+    * any non-Latin-1 characters except for NEWLINE.  ISO PNG will forbid them.
+    * The NUL character is forbidden by PNG-1.0 through PNG-1.2 and ISO PNG.
+    */
+   png_write_chunk_data(png_ptr, (png_bytep)new_key, key_len + 1);
+
+   /* set the compression flag */
+   if (compression == PNG_ITXT_COMPRESSION_NONE || \
+       compression == PNG_TEXT_COMPRESSION_NONE)
+       cbuf[0] = 0;
+   else /* compression == PNG_ITXT_COMPRESSION_zTXt */
+       cbuf[0] = 1;
+   /* set the compression method */
+   cbuf[1] = 0;
+   png_write_chunk_data(png_ptr, cbuf, 2);
+
+   cbuf[0] = 0;
+   png_write_chunk_data(png_ptr, (new_lang ? (png_bytep)new_lang : cbuf), lang_len + 1);
+   png_write_chunk_data(png_ptr, (lang_key ? (png_bytep)lang_key : cbuf), lang_key_len + 1);
+   png_write_compressed_data_out(png_ptr, &comp);
+
+   png_write_chunk_end(png_ptr);
+   png_free(png_ptr, new_key);
+   if (new_lang)
+     png_free(png_ptr, new_lang);
+}
+#endif
+
+#if defined(PNG_WRITE_oFFs_SUPPORTED)
+/* write the oFFs chunk */
+void /* PRIVATE */
+png_write_oFFs(png_structp png_ptr, png_int_32 x_offset, png_int_32 y_offset,
+   int unit_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_oFFs;
+#endif
+   png_byte buf[9];
+
+   png_debug(1, "in png_write_oFFs\n");
+   if (unit_type >= PNG_OFFSET_LAST)
+      png_warning(png_ptr, "Unrecognized unit type for oFFs chunk");
+
+   png_save_int_32(buf, x_offset);
+   png_save_int_32(buf + 4, y_offset);
+   buf[8] = (png_byte)unit_type;
+
+   png_write_chunk(png_ptr, png_oFFs, buf, (png_size_t)9);
+}
+#endif
+#if defined(PNG_WRITE_pCAL_SUPPORTED)
+/* write the pCAL chunk (described in the PNG extensions document) */
+void /* PRIVATE */
+png_write_pCAL(png_structp png_ptr, png_charp purpose, png_int_32 X0,
+   png_int_32 X1, int type, int nparams, png_charp units, png_charpp params)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_pCAL;
+#endif
+   png_size_t purpose_len, units_len, total_len;
+   png_uint_32p params_len;
+   png_byte buf[10];
+   png_charp new_purpose;
+   int i;
+
+   png_debug1(1, "in png_write_pCAL (%d parameters)\n", nparams);
+   if (type >= PNG_EQUATION_LAST)
+      png_warning(png_ptr, "Unrecognized equation type for pCAL chunk");
+
+   purpose_len = png_check_keyword(png_ptr, purpose, &new_purpose) + 1;
+   png_debug1(3, "pCAL purpose length = %d\n", (int)purpose_len);
+   units_len = png_strlen(units) + (nparams == 0 ? 0 : 1);
+   png_debug1(3, "pCAL units length = %d\n", (int)units_len);
+   total_len = purpose_len + units_len + 10;
+
+   params_len = (png_uint_32p)png_malloc(png_ptr, (png_uint_32)(nparams
+      *png_sizeof(png_uint_32)));
+
+   /* Find the length of each parameter, making sure we don't count the
+      null terminator for the last parameter. */
+   for (i = 0; i < nparams; i++)
+   {
+      params_len[i] = png_strlen(params[i]) + (i == nparams - 1 ? 0 : 1);
+      png_debug2(3, "pCAL parameter %d length = %lu\n", i, params_len[i]);
+      total_len += (png_size_t)params_len[i];
+   }
+
+   png_debug1(3, "pCAL total length = %d\n", (int)total_len);
+   png_write_chunk_start(png_ptr, png_pCAL, (png_uint_32)total_len);
+   png_write_chunk_data(png_ptr, (png_bytep)new_purpose, purpose_len);
+   png_save_int_32(buf, X0);
+   png_save_int_32(buf + 4, X1);
+   buf[8] = (png_byte)type;
+   buf[9] = (png_byte)nparams;
+   png_write_chunk_data(png_ptr, buf, (png_size_t)10);
+   png_write_chunk_data(png_ptr, (png_bytep)units, (png_size_t)units_len);
+
+   png_free(png_ptr, new_purpose);
+
+   for (i = 0; i < nparams; i++)
+   {
+      png_write_chunk_data(png_ptr, (png_bytep)params[i],
+         (png_size_t)params_len[i]);
+   }
+
+   png_free(png_ptr, params_len);
+   png_write_chunk_end(png_ptr);
+}
+#endif
+
+#if defined(PNG_WRITE_sCAL_SUPPORTED)
+/* write the sCAL chunk */
+#if defined(PNG_FLOATING_POINT_SUPPORTED) && !defined(PNG_NO_STDIO)
+void /* PRIVATE */
+png_write_sCAL(png_structp png_ptr, int unit, double width, double height)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sCAL;
+#endif
+   char buf[64];
+   png_size_t total_len;
+
+   png_debug(1, "in png_write_sCAL\n");
+
+   buf[0] = (char)unit;
+#if defined(_WIN32_WCE)
+/* sprintf() function is not supported on WindowsCE */
+   {
+      wchar_t wc_buf[32];
+      size_t wc_len;
+      swprintf(wc_buf, TEXT("%12.12e"), width);
+      wc_len = wcslen(wc_buf);
+      WideCharToMultiByte(CP_ACP, 0, wc_buf, -1, buf + 1, wc_len, NULL, NULL);
+      total_len = wc_len + 2;
+      swprintf(wc_buf, TEXT("%12.12e"), height);
+      wc_len = wcslen(wc_buf);
+      WideCharToMultiByte(CP_ACP, 0, wc_buf, -1, buf + total_len, wc_len,
+         NULL, NULL);
+      total_len += wc_len;
+   }
+#else
+   png_snprintf(buf + 1, 63, "%12.12e", width);
+   total_len = 1 + png_strlen(buf + 1) + 1;
+   png_snprintf(buf + total_len, 64-total_len, "%12.12e", height);
+   total_len += png_strlen(buf + total_len);
+#endif
+
+   png_debug1(3, "sCAL total length = %u\n", (unsigned int)total_len);
+   png_write_chunk(png_ptr, png_sCAL, (png_bytep)buf, total_len);
+}
+#else
+#ifdef PNG_FIXED_POINT_SUPPORTED
+void /* PRIVATE */
+png_write_sCAL_s(png_structp png_ptr, int unit, png_charp width,
+   png_charp height)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_sCAL;
+#endif
+   png_byte buf[64];
+   png_size_t wlen, hlen, total_len;
+
+   png_debug(1, "in png_write_sCAL_s\n");
+
+   wlen = png_strlen(width);
+   hlen = png_strlen(height);
+   total_len = wlen + hlen + 2;
+   if (total_len > 64)
+   {
+      png_warning(png_ptr, "Can't write sCAL (buffer too small)");
+      return;
+   }
+
+   buf[0] = (png_byte)unit;
+   png_memcpy(buf + 1, width, wlen + 1);      /* append the '\0' here */
+   png_memcpy(buf + wlen + 2, height, hlen);  /* do NOT append the '\0' here */
+
+   png_debug1(3, "sCAL total length = %u\n", (unsigned int)total_len);
+   png_write_chunk(png_ptr, png_sCAL, buf, total_len);
+}
+#endif
+#endif
+#endif
+
+#if defined(PNG_WRITE_pHYs_SUPPORTED)
+/* write the pHYs chunk */
+void /* PRIVATE */
+png_write_pHYs(png_structp png_ptr, png_uint_32 x_pixels_per_unit,
+   png_uint_32 y_pixels_per_unit,
+   int unit_type)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_pHYs;
+#endif
+   png_byte buf[9];
+
+   png_debug(1, "in png_write_pHYs\n");
+   if (unit_type >= PNG_RESOLUTION_LAST)
+      png_warning(png_ptr, "Unrecognized unit type for pHYs chunk");
+
+   png_save_uint_32(buf, x_pixels_per_unit);
+   png_save_uint_32(buf + 4, y_pixels_per_unit);
+   buf[8] = (png_byte)unit_type;
+
+   png_write_chunk(png_ptr, png_pHYs, buf, (png_size_t)9);
+}
+#endif
+
+#if defined(PNG_WRITE_tIME_SUPPORTED)
+/* Write the tIME chunk.  Use either png_convert_from_struct_tm()
+ * or png_convert_from_time_t(), or fill in the structure yourself.
+ */
+void /* PRIVATE */
+png_write_tIME(png_structp png_ptr, png_timep mod_time)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   PNG_tIME;
+#endif
+   png_byte buf[7];
+
+   png_debug(1, "in png_write_tIME\n");
+   if (mod_time->month  > 12 || mod_time->month  < 1 ||
+       mod_time->day    > 31 || mod_time->day    < 1 ||
+       mod_time->hour   > 23 || mod_time->second > 60)
+   {
+      png_warning(png_ptr, "Invalid time specified for tIME chunk");
+      return;
+   }
+
+   png_save_uint_16(buf, mod_time->year);
+   buf[2] = mod_time->month;
+   buf[3] = mod_time->day;
+   buf[4] = mod_time->hour;
+   buf[5] = mod_time->minute;
+   buf[6] = mod_time->second;
+
+   png_write_chunk(png_ptr, png_tIME, buf, (png_size_t)7);
+}
+#endif
+
+/* initializes the row writing capability of libpng */
+void /* PRIVATE */
+png_write_start_row(png_structp png_ptr)
+{
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+#endif
+
+   png_size_t buf_size;
+
+   png_debug(1, "in png_write_start_row\n");
+   buf_size = (png_size_t)(PNG_ROWBYTES(
+      png_ptr->usr_channels*png_ptr->usr_bit_depth,png_ptr->width)+1);
+
+   /* set up row buffer */
+   png_ptr->row_buf = (png_bytep)png_malloc(png_ptr, (png_uint_32)buf_size);
+   png_ptr->row_buf[0] = PNG_FILTER_VALUE_NONE;
+
+#ifndef PNG_NO_WRITE_FILTERING
+   /* set up filtering buffer, if using this filter */
+   if (png_ptr->do_filter & PNG_FILTER_SUB)
+   {
+      png_ptr->sub_row = (png_bytep)png_malloc(png_ptr,
+         (png_ptr->rowbytes + 1));
+      png_ptr->sub_row[0] = PNG_FILTER_VALUE_SUB;
+   }
+
+   /* We only need to keep the previous row if we are using one of these. */
+   if (png_ptr->do_filter & (PNG_FILTER_AVG | PNG_FILTER_UP | PNG_FILTER_PAETH))
+   {
+     /* set up previous row buffer */
+      png_ptr->prev_row = (png_bytep)png_malloc(png_ptr, (png_uint_32)buf_size);
+      png_memset(png_ptr->prev_row, 0, buf_size);
+
+      if (png_ptr->do_filter & PNG_FILTER_UP)
+      {
+         png_ptr->up_row = (png_bytep)png_malloc(png_ptr,
+            (png_ptr->rowbytes + 1));
+         png_ptr->up_row[0] = PNG_FILTER_VALUE_UP;
+      }
+
+      if (png_ptr->do_filter & PNG_FILTER_AVG)
+      {
+         png_ptr->avg_row = (png_bytep)png_malloc(png_ptr,
+            (png_ptr->rowbytes + 1));
+         png_ptr->avg_row[0] = PNG_FILTER_VALUE_AVG;
+      }
+
+      if (png_ptr->do_filter & PNG_FILTER_PAETH)
+      {
+         png_ptr->paeth_row = (png_bytep)png_malloc(png_ptr,
+            (png_ptr->rowbytes + 1));
+         png_ptr->paeth_row[0] = PNG_FILTER_VALUE_PAETH;
+      }
+#endif /* PNG_NO_WRITE_FILTERING */
+   }
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* if interlaced, we need to set up width and height of pass */
+   if (png_ptr->interlaced)
+   {
+      if (!(png_ptr->transformations & PNG_INTERLACE))
+      {
+         png_ptr->num_rows = (png_ptr->height + png_pass_yinc[0] - 1 -
+            png_pass_ystart[0]) / png_pass_yinc[0];
+         png_ptr->usr_width = (png_ptr->width + png_pass_inc[0] - 1 -
+            png_pass_start[0]) / png_pass_inc[0];
+      }
+      else
+      {
+         png_ptr->num_rows = png_ptr->height;
+         png_ptr->usr_width = png_ptr->width;
+      }
+   }
+   else
+#endif
+   {
+      png_ptr->num_rows = png_ptr->height;
+      png_ptr->usr_width = png_ptr->width;
+   }
+   png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+   png_ptr->zstream.next_out = png_ptr->zbuf;
+}
+
+/* Internal use only.  Called when finished processing a row of data. */
+void /* PRIVATE */
+png_write_finish_row(png_structp png_ptr)
+{
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+
+   /* start of interlace block in the y direction */
+   int png_pass_ystart[7] = {0, 0, 4, 0, 2, 0, 1};
+
+   /* offset to next interlace block in the y direction */
+   int png_pass_yinc[7] = {8, 8, 8, 4, 4, 2, 2};
+#endif
+#endif
+
+   int ret;
+
+   png_debug(1, "in png_write_finish_row\n");
+   /* next row */
+   png_ptr->row_number++;
+
+   /* see if we are done */
+   if (png_ptr->row_number < png_ptr->num_rows)
+      return;
+
+#ifdef PNG_WRITE_INTERLACING_SUPPORTED
+   /* if interlaced, go to next pass */
+   if (png_ptr->interlaced)
+   {
+      png_ptr->row_number = 0;
+      if (png_ptr->transformations & PNG_INTERLACE)
+      {
+         png_ptr->pass++;
+      }
+      else
+      {
+         /* loop until we find a non-zero width or height pass */
+         do
+         {
+            png_ptr->pass++;
+            if (png_ptr->pass >= 7)
+               break;
+            png_ptr->usr_width = (png_ptr->width +
+               png_pass_inc[png_ptr->pass] - 1 -
+               png_pass_start[png_ptr->pass]) /
+               png_pass_inc[png_ptr->pass];
+            png_ptr->num_rows = (png_ptr->height +
+               png_pass_yinc[png_ptr->pass] - 1 -
+               png_pass_ystart[png_ptr->pass]) /
+               png_pass_yinc[png_ptr->pass];
+            if (png_ptr->transformations & PNG_INTERLACE)
+               break;
+         } while (png_ptr->usr_width == 0 || png_ptr->num_rows == 0);
+
+      }
+
+      /* reset the row above the image for the next pass */
+      if (png_ptr->pass < 7)
+      {
+         if (png_ptr->prev_row != NULL)
+            png_memset(png_ptr->prev_row, 0,
+               (png_size_t)(PNG_ROWBYTES(png_ptr->usr_channels*
+               png_ptr->usr_bit_depth,png_ptr->width))+1);
+         return;
+      }
+   }
+#endif
+
+   /* if we get here, we've just written the last row, so we need
+      to flush the compressor */
+   do
+   {
+      /* tell the compressor we are done */
+      ret = deflate(&png_ptr->zstream, Z_FINISH);
+      /* check for an error */
+      if (ret == Z_OK)
+      {
+         /* check to see if we need more room */
+         if (!(png_ptr->zstream.avail_out))
+         {
+            png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
+            png_ptr->zstream.next_out = png_ptr->zbuf;
+            png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+         }
+      }
+      else if (ret != Z_STREAM_END)
+      {
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+   } while (ret != Z_STREAM_END);
+
+   /* write any extra space */
+   if (png_ptr->zstream.avail_out < png_ptr->zbuf_size)
+   {
+      png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size -
+         png_ptr->zstream.avail_out);
+   }
+
+   deflateReset(&png_ptr->zstream);
+   png_ptr->zstream.data_type = Z_BINARY;
+}
+
+#if defined(PNG_WRITE_INTERLACING_SUPPORTED)
+/* Pick out the correct pixels for the interlace pass.
+ * The basic idea here is to go through the row with a source
+ * pointer and a destination pointer (sp and dp), and copy the
+ * correct pixels for the pass.  As the row gets compacted,
+ * sp will always be >= dp, so we should never overwrite anything.
+ * See the default: case for the easiest code to understand.
+ */
+void /* PRIVATE */
+png_do_write_interlace(png_row_infop row_info, png_bytep row, int pass)
+{
+#ifdef PNG_USE_LOCAL_ARRAYS
+   /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */
+
+   /* start of interlace block */
+   int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
+
+   /* offset to next interlace block */
+   int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
+#endif
+
+   png_debug(1, "in png_do_write_interlace\n");
+   /* we don't have to do anything on the last pass (6) */
+#if defined(PNG_USELESS_TESTS_SUPPORTED)
+   if (row != NULL && row_info != NULL && pass < 6)
+#else
+   if (pass < 6)
+#endif
+   {
+      /* each pixel depth is handled separately */
+      switch (row_info->pixel_depth)
+      {
+         case 1:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            d = 0;
+            shift = 7;
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (png_size_t)(i >> 3);
+               value = (int)(*sp >> (7 - (int)(i & 0x07))) & 0x01;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 7;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+               else
+                  shift--;
+
+            }
+            if (shift != 7)
+               *dp = (png_byte)d;
+            break;
+         }
+         case 2:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            shift = 6;
+            d = 0;
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (png_size_t)(i >> 2);
+               value = (*sp >> ((3 - (int)(i & 0x03)) << 1)) & 0x03;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 6;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+               else
+                  shift -= 2;
+            }
+            if (shift != 6)
+                   *dp = (png_byte)d;
+            break;
+         }
+         case 4:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            int shift;
+            int d;
+            int value;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+
+            dp = row;
+            shift = 4;
+            d = 0;
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               sp = row + (png_size_t)(i >> 1);
+               value = (*sp >> ((1 - (int)(i & 0x01)) << 2)) & 0x0f;
+               d |= (value << shift);
+
+               if (shift == 0)
+               {
+                  shift = 4;
+                  *dp++ = (png_byte)d;
+                  d = 0;
+               }
+               else
+                  shift -= 4;
+            }
+            if (shift != 4)
+               *dp = (png_byte)d;
+            break;
+         }
+         default:
+         {
+            png_bytep sp;
+            png_bytep dp;
+            png_uint_32 i;
+            png_uint_32 row_width = row_info->width;
+            png_size_t pixel_bytes;
+
+            /* start at the beginning */
+            dp = row;
+            /* find out how many bytes each pixel takes up */
+            pixel_bytes = (row_info->pixel_depth >> 3);
+            /* loop through the row, only looking at the pixels that
+               matter */
+            for (i = png_pass_start[pass]; i < row_width;
+               i += png_pass_inc[pass])
+            {
+               /* find out where the original pixel is */
+               sp = row + (png_size_t)i * pixel_bytes;
+               /* move the pixel */
+               if (dp != sp)
+                  png_memcpy(dp, sp, pixel_bytes);
+               /* next pixel */
+               dp += pixel_bytes;
+            }
+            break;
+         }
+      }
+      /* set new row width */
+      row_info->width = (row_info->width +
+         png_pass_inc[pass] - 1 -
+         png_pass_start[pass]) /
+         png_pass_inc[pass];
+         row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,
+            row_info->width);
+   }
+}
+#endif
+
+/* This filters the row, chooses which filter to use, if it has not already
+ * been specified by the application, and then writes the row out with the
+ * chosen filter.
+ */
+#define PNG_MAXSUM (((png_uint_32)(-1)) >> 1)
+#define PNG_HISHIFT 10
+#define PNG_LOMASK ((png_uint_32)0xffffL)
+#define PNG_HIMASK ((png_uint_32)(~PNG_LOMASK >> PNG_HISHIFT))
+void /* PRIVATE */
+png_write_find_filter(png_structp png_ptr, png_row_infop row_info)
+{
+   png_bytep best_row;
+#ifndef PNG_NO_WRITE_FILTER
+   png_bytep prev_row, row_buf;
+   png_uint_32 mins, bpp;
+   png_byte filter_to_do = png_ptr->do_filter;
+   png_uint_32 row_bytes = row_info->rowbytes;
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   int num_p_filters = (int)png_ptr->num_prev_filters;
+#endif
+
+   png_debug(1, "in png_write_find_filter\n");
+   /* find out how many bytes offset each pixel is */
+   bpp = (row_info->pixel_depth + 7) >> 3;
+
+   prev_row = png_ptr->prev_row;
+#endif
+   best_row = png_ptr->row_buf;
+#ifndef PNG_NO_WRITE_FILTER
+   row_buf = best_row;
+   mins = PNG_MAXSUM;
+
+   /* The prediction method we use is to find which method provides the
+    * smallest value when summing the absolute values of the distances
+    * from zero, using anything >= 128 as negative numbers.  This is known
+    * as the "minimum sum of absolute differences" heuristic.  Other
+    * heuristics are the "weighted minimum sum of absolute differences"
+    * (experimental and can in theory improve compression), and the "zlib
+    * predictive" method (not implemented yet), which does test compressions
+    * of lines using different filter methods, and then chooses the
+    * (series of) filter(s) that give minimum compressed data size (VERY
+    * computationally expensive).
+    *
+    * GRR 980525:  consider also
+    *   (1) minimum sum of absolute differences from running average (i.e.,
+    *       keep running sum of non-absolute differences & count of bytes)
+    *       [track dispersion, too?  restart average if dispersion too large?]
+    *  (1b) minimum sum of absolute differences from sliding average, probably
+    *       with window size <= deflate window (usually 32K)
+    *   (2) minimum sum of squared differences from zero or running average
+    *       (i.e., ~ root-mean-square approach)
+    */
+
+
+   /* We don't need to test the 'no filter' case if this is the only filter
+    * that has been chosen, as it doesn't actually do anything to the data.
+    */
+   if ((filter_to_do & PNG_FILTER_NONE) &&
+       filter_to_do != PNG_FILTER_NONE)
+   {
+      png_bytep rp;
+      png_uint_32 sum = 0;
+      png_uint_32 i;
+      int v;
+
+      for (i = 0, rp = row_buf + 1; i < row_bytes; i++, rp++)
+      {
+         v = *rp;
+         sum += (v < 128) ? v : 256 - v;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         png_uint_32 sumhi, sumlo;
+         int j;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK; /* Gives us some footroom */
+
+         /* Reduce the sum if we match any of the previous rows */
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         /* Factor in the cost of this filter (this is here for completeness,
+          * but it makes no sense to have a "cost" for the NONE filter, as
+          * it has the minimum possible computational cost - none).
+          */
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_NONE]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+      mins = sum;
+   }
+
+   /* sub filter */
+   if (filter_to_do == PNG_FILTER_SUB)
+   /* it's the only filter so no testing is needed */
+   {
+      png_bytep rp, lp, dp;
+      png_uint_32 i;
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp;
+           i++, rp++, dp++)
+      {
+         *dp = *rp;
+      }
+      for (lp = row_buf + 1; i < row_bytes;
+         i++, rp++, lp++, dp++)
+      {
+         *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
+      }
+      best_row = png_ptr->sub_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_SUB)
+   {
+      png_bytep rp, dp, lp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      /* We temporarily increase the "minimum sum" by the factor we
+       * would reduce the sum of this filter, so that we can do the
+       * early exit comparison without scaling the sum each time.
+       */
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->sub_row + 1; i < bpp;
+           i++, rp++, dp++)
+      {
+         v = *dp = *rp;
+
+         sum += (v < 128) ? v : 256 - v;
+      }
+      for (lp = row_buf + 1; i < row_bytes;
+         i++, rp++, lp++, dp++)
+      {
+         v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_SUB)
+            {
+               sumlo = (sumlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_SUB]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->sub_row;
+      }
+   }
+
+   /* up filter */
+   if (filter_to_do == PNG_FILTER_UP)
+   {
+      png_bytep rp, dp, pp;
+      png_uint_32 i;
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1,
+           pp = prev_row + 1; i < row_bytes;
+           i++, rp++, pp++, dp++)
+      {
+         *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
+      }
+      best_row = png_ptr->up_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_UP)
+   {
+      png_bytep rp, dp, pp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->up_row + 1,
+           pp = prev_row + 1; i < row_bytes; i++)
+      {
+         v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_UP)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_UP]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->up_row;
+      }
+   }
+
+   /* avg filter */
+   if (filter_to_do == PNG_FILTER_AVG)
+   {
+      png_bytep rp, dp, pp, lp;
+      png_uint_32 i;
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
+      }
+      for (lp = row_buf + 1; i < row_bytes; i++)
+      {
+         *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
+                 & 0xff);
+      }
+      best_row = png_ptr->avg_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_AVG)
+   {
+      png_bytep rp, dp, pp, lp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_AVG)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->avg_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         v = *dp++ = (png_byte)(((int)*rp++ - ((int)*pp++ / 2)) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+      }
+      for (lp = row_buf + 1; i < row_bytes; i++)
+      {
+         v = *dp++ =
+          (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_NONE)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_AVG]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         mins = sum;
+         best_row = png_ptr->avg_row;
+      }
+   }
+
+   /* Paeth filter */
+   if (filter_to_do == PNG_FILTER_PAETH)
+   {
+      png_bytep rp, dp, pp, cp, lp;
+      png_uint_32 i;
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+      }
+
+      for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++)
+      {
+         int a, b, c, pa, pb, pc, p;
+
+         b = *pp++;
+         c = *cp++;
+         a = *lp++;
+
+         p = b - c;
+         pc = a - c;
+
+#ifdef PNG_USE_ABS
+         pa = abs(p);
+         pb = abs(pc);
+         pc = abs(p + pc);
+#else
+         pa = p < 0 ? -p : p;
+         pb = pc < 0 ? -pc : pc;
+         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+
+         p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+
+         *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
+      }
+      best_row = png_ptr->paeth_row;
+   }
+
+   else if (filter_to_do & PNG_FILTER_PAETH)
+   {
+      png_bytep rp, dp, pp, cp, lp;
+      png_uint_32 sum = 0, lmins = mins;
+      png_uint_32 i;
+      int v;
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 lmhi, lmlo;
+         lmlo = lmins & PNG_LOMASK;
+         lmhi = (lmins >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH)
+            {
+               lmlo = (lmlo * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               lmhi = (lmhi * png_ptr->inv_filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         lmlo = (lmlo * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+         lmhi = (lmhi * png_ptr->inv_filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+
+         if (lmhi > PNG_HIMASK)
+            lmins = PNG_MAXSUM;
+         else
+            lmins = (lmhi << PNG_HISHIFT) + lmlo;
+      }
+#endif
+
+      for (i = 0, rp = row_buf + 1, dp = png_ptr->paeth_row + 1,
+           pp = prev_row + 1; i < bpp; i++)
+      {
+         v = *dp++ = (png_byte)(((int)*rp++ - (int)*pp++) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+      }
+
+      for (lp = row_buf + 1, cp = prev_row + 1; i < row_bytes; i++)
+      {
+         int a, b, c, pa, pb, pc, p;
+
+         b = *pp++;
+         c = *cp++;
+         a = *lp++;
+
+#ifndef PNG_SLOW_PAETH
+         p = b - c;
+         pc = a - c;
+#ifdef PNG_USE_ABS
+         pa = abs(p);
+         pb = abs(pc);
+         pc = abs(p + pc);
+#else
+         pa = p < 0 ? -p : p;
+         pb = pc < 0 ? -pc : pc;
+         pc = (p + pc) < 0 ? -(p + pc) : p + pc;
+#endif
+         p = (pa <= pb && pa <=pc) ? a : (pb <= pc) ? b : c;
+#else /* PNG_SLOW_PAETH */
+         p = a + b - c;
+         pa = abs(p - a);
+         pb = abs(p - b);
+         pc = abs(p - c);
+         if (pa <= pb && pa <= pc)
+            p = a;
+         else if (pb <= pc)
+            p = b;
+         else
+            p = c;
+#endif /* PNG_SLOW_PAETH */
+
+         v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
+
+         sum += (v < 128) ? v : 256 - v;
+
+         if (sum > lmins)  /* We are already worse, don't continue. */
+            break;
+      }
+
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+      if (png_ptr->heuristic_method == PNG_FILTER_HEURISTIC_WEIGHTED)
+      {
+         int j;
+         png_uint_32 sumhi, sumlo;
+         sumlo = sum & PNG_LOMASK;
+         sumhi = (sum >> PNG_HISHIFT) & PNG_HIMASK;
+
+         for (j = 0; j < num_p_filters; j++)
+         {
+            if (png_ptr->prev_filters[j] == PNG_FILTER_VALUE_PAETH)
+            {
+               sumlo = (sumlo * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+               sumhi = (sumhi * png_ptr->filter_weights[j]) >>
+                  PNG_WEIGHT_SHIFT;
+            }
+         }
+
+         sumlo = (sumlo * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+         sumhi = (sumhi * png_ptr->filter_costs[PNG_FILTER_VALUE_PAETH]) >>
+            PNG_COST_SHIFT;
+
+         if (sumhi > PNG_HIMASK)
+            sum = PNG_MAXSUM;
+         else
+            sum = (sumhi << PNG_HISHIFT) + sumlo;
+      }
+#endif
+
+      if (sum < mins)
+      {
+         best_row = png_ptr->paeth_row;
+      }
+   }
+#endif /* PNG_NO_WRITE_FILTER */
+   /* Do the actual writing of the filtered row data from the chosen filter. */
+
+   png_write_filtered_row(png_ptr, best_row);
+
+#ifndef PNG_NO_WRITE_FILTER
+#if defined(PNG_WRITE_WEIGHTED_FILTER_SUPPORTED)
+   /* Save the type of filter we picked this time for future calculations */
+   if (png_ptr->num_prev_filters > 0)
+   {
+      int j;
+      for (j = 1; j < num_p_filters; j++)
+      {
+         png_ptr->prev_filters[j] = png_ptr->prev_filters[j - 1];
+      }
+      png_ptr->prev_filters[j] = best_row[0];
+   }
+#endif
+#endif /* PNG_NO_WRITE_FILTER */
+}
+
+
+/* Do the actual writing of a previously filtered row. */
+void /* PRIVATE */
+png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row)
+{
+   png_debug(1, "in png_write_filtered_row\n");
+   png_debug1(2, "filter = %d\n", filtered_row[0]);
+   /* set up the zlib input buffer */
+
+   png_ptr->zstream.next_in = filtered_row;
+   png_ptr->zstream.avail_in = (uInt)png_ptr->row_info.rowbytes + 1;
+   /* repeat until we have compressed all the data */
+   do
+   {
+      int ret; /* return of zlib */
+
+      /* compress the data */
+      ret = deflate(&png_ptr->zstream, Z_NO_FLUSH);
+      /* check for compression errors */
+      if (ret != Z_OK)
+      {
+         if (png_ptr->zstream.msg != NULL)
+            png_error(png_ptr, png_ptr->zstream.msg);
+         else
+            png_error(png_ptr, "zlib error");
+      }
+
+      /* see if it is time to write another IDAT */
+      if (!(png_ptr->zstream.avail_out))
+      {
+         /* write the IDAT and reset the zlib output buffer */
+         png_write_IDAT(png_ptr, png_ptr->zbuf, png_ptr->zbuf_size);
+         png_ptr->zstream.next_out = png_ptr->zbuf;
+         png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size;
+      }
+   /* repeat until all data has been compressed */
+   } while (png_ptr->zstream.avail_in);
+
+   /* swap the current and previous rows */
+   if (png_ptr->prev_row != NULL)
+   {
+      png_bytep tptr;
+
+      tptr = png_ptr->prev_row;
+      png_ptr->prev_row = png_ptr->row_buf;
+      png_ptr->row_buf = tptr;
+   }
+
+   /* finish row - updates counters and flushes zlib if last row */
+   png_write_finish_row(png_ptr);
+
+#if defined(PNG_WRITE_FLUSH_SUPPORTED)
+   png_ptr->flush_rows++;
+
+   if (png_ptr->flush_dist > 0 &&
+       png_ptr->flush_rows >= png_ptr->flush_dist)
+   {
+      png_write_flush(png_ptr);
+   }
+#endif
+}
+#endif /* PNG_WRITE_SUPPORTED */
diff --git a/src/libtiff/t4.h b/src/libtiff/t4.h
new file mode 100644
index 0000000..b8f08fd
--- /dev/null
+++ b/src/libtiff/t4.h
@@ -0,0 +1,285 @@
+/* $Id: t4.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _T4_
+#define	_T4_
+/*
+ * CCITT T.4 1D Huffman runlength codes and
+ * related definitions.  Given the small sizes
+ * of these tables it does not seem
+ * worthwhile to make code & length 8 bits.
+ */
+typedef struct tableentry {
+    unsigned short length;	/* bit length of g3 code */
+    unsigned short code;	/* g3 code */
+    short	runlen;		/* run length in bits */
+} tableentry;
+
+#define	EOL	0x001	/* EOL code value - 0000 0000 0000 1 */
+
+/* status values returned instead of a run length */
+#define	G3CODE_EOL	-1	/* NB: ACT_EOL - ACT_WRUNT */
+#define	G3CODE_INVALID	-2	/* NB: ACT_INVALID - ACT_WRUNT */
+#define	G3CODE_EOF	-3	/* end of input data */
+#define	G3CODE_INCOMP	-4	/* incomplete run code */
+
+/*
+ * Note that these tables are ordered such that the
+ * index into the table is known to be either the
+ * run length, or (run length / 64) + a fixed offset.
+ *
+ * NB: The G3CODE_INVALID entries are only used
+ *     during state generation (see mkg3states.c).
+ */
+#ifdef G3CODES
+const tableentry TIFFFaxWhiteCodes[] = {
+    { 8, 0x35, 0 },	/* 0011 0101 */
+    { 6, 0x7, 1 },	/* 0001 11 */
+    { 4, 0x7, 2 },	/* 0111 */
+    { 4, 0x8, 3 },	/* 1000 */
+    { 4, 0xB, 4 },	/* 1011 */
+    { 4, 0xC, 5 },	/* 1100 */
+    { 4, 0xE, 6 },	/* 1110 */
+    { 4, 0xF, 7 },	/* 1111 */
+    { 5, 0x13, 8 },	/* 1001 1 */
+    { 5, 0x14, 9 },	/* 1010 0 */
+    { 5, 0x7, 10 },	/* 0011 1 */
+    { 5, 0x8, 11 },	/* 0100 0 */
+    { 6, 0x8, 12 },	/* 0010 00 */
+    { 6, 0x3, 13 },	/* 0000 11 */
+    { 6, 0x34, 14 },	/* 1101 00 */
+    { 6, 0x35, 15 },	/* 1101 01 */
+    { 6, 0x2A, 16 },	/* 1010 10 */
+    { 6, 0x2B, 17 },	/* 1010 11 */
+    { 7, 0x27, 18 },	/* 0100 111 */
+    { 7, 0xC, 19 },	/* 0001 100 */
+    { 7, 0x8, 20 },	/* 0001 000 */
+    { 7, 0x17, 21 },	/* 0010 111 */
+    { 7, 0x3, 22 },	/* 0000 011 */
+    { 7, 0x4, 23 },	/* 0000 100 */
+    { 7, 0x28, 24 },	/* 0101 000 */
+    { 7, 0x2B, 25 },	/* 0101 011 */
+    { 7, 0x13, 26 },	/* 0010 011 */
+    { 7, 0x24, 27 },	/* 0100 100 */
+    { 7, 0x18, 28 },	/* 0011 000 */
+    { 8, 0x2, 29 },	/* 0000 0010 */
+    { 8, 0x3, 30 },	/* 0000 0011 */
+    { 8, 0x1A, 31 },	/* 0001 1010 */
+    { 8, 0x1B, 32 },	/* 0001 1011 */
+    { 8, 0x12, 33 },	/* 0001 0010 */
+    { 8, 0x13, 34 },	/* 0001 0011 */
+    { 8, 0x14, 35 },	/* 0001 0100 */
+    { 8, 0x15, 36 },	/* 0001 0101 */
+    { 8, 0x16, 37 },	/* 0001 0110 */
+    { 8, 0x17, 38 },	/* 0001 0111 */
+    { 8, 0x28, 39 },	/* 0010 1000 */
+    { 8, 0x29, 40 },	/* 0010 1001 */
+    { 8, 0x2A, 41 },	/* 0010 1010 */
+    { 8, 0x2B, 42 },	/* 0010 1011 */
+    { 8, 0x2C, 43 },	/* 0010 1100 */
+    { 8, 0x2D, 44 },	/* 0010 1101 */
+    { 8, 0x4, 45 },	/* 0000 0100 */
+    { 8, 0x5, 46 },	/* 0000 0101 */
+    { 8, 0xA, 47 },	/* 0000 1010 */
+    { 8, 0xB, 48 },	/* 0000 1011 */
+    { 8, 0x52, 49 },	/* 0101 0010 */
+    { 8, 0x53, 50 },	/* 0101 0011 */
+    { 8, 0x54, 51 },	/* 0101 0100 */
+    { 8, 0x55, 52 },	/* 0101 0101 */
+    { 8, 0x24, 53 },	/* 0010 0100 */
+    { 8, 0x25, 54 },	/* 0010 0101 */
+    { 8, 0x58, 55 },	/* 0101 1000 */
+    { 8, 0x59, 56 },	/* 0101 1001 */
+    { 8, 0x5A, 57 },	/* 0101 1010 */
+    { 8, 0x5B, 58 },	/* 0101 1011 */
+    { 8, 0x4A, 59 },	/* 0100 1010 */
+    { 8, 0x4B, 60 },	/* 0100 1011 */
+    { 8, 0x32, 61 },	/* 0011 0010 */
+    { 8, 0x33, 62 },	/* 0011 0011 */
+    { 8, 0x34, 63 },	/* 0011 0100 */
+    { 5, 0x1B, 64 },	/* 1101 1 */
+    { 5, 0x12, 128 },	/* 1001 0 */
+    { 6, 0x17, 192 },	/* 0101 11 */
+    { 7, 0x37, 256 },	/* 0110 111 */
+    { 8, 0x36, 320 },	/* 0011 0110 */
+    { 8, 0x37, 384 },	/* 0011 0111 */
+    { 8, 0x64, 448 },	/* 0110 0100 */
+    { 8, 0x65, 512 },	/* 0110 0101 */
+    { 8, 0x68, 576 },	/* 0110 1000 */
+    { 8, 0x67, 640 },	/* 0110 0111 */
+    { 9, 0xCC, 704 },	/* 0110 0110 0 */
+    { 9, 0xCD, 768 },	/* 0110 0110 1 */
+    { 9, 0xD2, 832 },	/* 0110 1001 0 */
+    { 9, 0xD3, 896 },	/* 0110 1001 1 */
+    { 9, 0xD4, 960 },	/* 0110 1010 0 */
+    { 9, 0xD5, 1024 },	/* 0110 1010 1 */
+    { 9, 0xD6, 1088 },	/* 0110 1011 0 */
+    { 9, 0xD7, 1152 },	/* 0110 1011 1 */
+    { 9, 0xD8, 1216 },	/* 0110 1100 0 */
+    { 9, 0xD9, 1280 },	/* 0110 1100 1 */
+    { 9, 0xDA, 1344 },	/* 0110 1101 0 */
+    { 9, 0xDB, 1408 },	/* 0110 1101 1 */
+    { 9, 0x98, 1472 },	/* 0100 1100 0 */
+    { 9, 0x99, 1536 },	/* 0100 1100 1 */
+    { 9, 0x9A, 1600 },	/* 0100 1101 0 */
+    { 6, 0x18, 1664 },	/* 0110 00 */
+    { 9, 0x9B, 1728 },	/* 0100 1101 1 */
+    { 11, 0x8, 1792 },	/* 0000 0001 000 */
+    { 11, 0xC, 1856 },	/* 0000 0001 100 */
+    { 11, 0xD, 1920 },	/* 0000 0001 101 */
+    { 12, 0x12, 1984 },	/* 0000 0001 0010 */
+    { 12, 0x13, 2048 },	/* 0000 0001 0011 */
+    { 12, 0x14, 2112 },	/* 0000 0001 0100 */
+    { 12, 0x15, 2176 },	/* 0000 0001 0101 */
+    { 12, 0x16, 2240 },	/* 0000 0001 0110 */
+    { 12, 0x17, 2304 },	/* 0000 0001 0111 */
+    { 12, 0x1C, 2368 },	/* 0000 0001 1100 */
+    { 12, 0x1D, 2432 },	/* 0000 0001 1101 */
+    { 12, 0x1E, 2496 },	/* 0000 0001 1110 */
+    { 12, 0x1F, 2560 },	/* 0000 0001 1111 */
+    { 12, 0x1, G3CODE_EOL },	/* 0000 0000 0001 */
+    { 9, 0x1, G3CODE_INVALID },	/* 0000 0000 1 */
+    { 10, 0x1, G3CODE_INVALID },	/* 0000 0000 01 */
+    { 11, 0x1, G3CODE_INVALID },	/* 0000 0000 001 */
+    { 12, 0x0, G3CODE_INVALID },	/* 0000 0000 0000 */
+};
+
+const tableentry TIFFFaxBlackCodes[] = {
+    { 10, 0x37, 0 },	/* 0000 1101 11 */
+    { 3, 0x2, 1 },	/* 010 */
+    { 2, 0x3, 2 },	/* 11 */
+    { 2, 0x2, 3 },	/* 10 */
+    { 3, 0x3, 4 },	/* 011 */
+    { 4, 0x3, 5 },	/* 0011 */
+    { 4, 0x2, 6 },	/* 0010 */
+    { 5, 0x3, 7 },	/* 0001 1 */
+    { 6, 0x5, 8 },	/* 0001 01 */
+    { 6, 0x4, 9 },	/* 0001 00 */
+    { 7, 0x4, 10 },	/* 0000 100 */
+    { 7, 0x5, 11 },	/* 0000 101 */
+    { 7, 0x7, 12 },	/* 0000 111 */
+    { 8, 0x4, 13 },	/* 0000 0100 */
+    { 8, 0x7, 14 },	/* 0000 0111 */
+    { 9, 0x18, 15 },	/* 0000 1100 0 */
+    { 10, 0x17, 16 },	/* 0000 0101 11 */
+    { 10, 0x18, 17 },	/* 0000 0110 00 */
+    { 10, 0x8, 18 },	/* 0000 0010 00 */
+    { 11, 0x67, 19 },	/* 0000 1100 111 */
+    { 11, 0x68, 20 },	/* 0000 1101 000 */
+    { 11, 0x6C, 21 },	/* 0000 1101 100 */
+    { 11, 0x37, 22 },	/* 0000 0110 111 */
+    { 11, 0x28, 23 },	/* 0000 0101 000 */
+    { 11, 0x17, 24 },	/* 0000 0010 111 */
+    { 11, 0x18, 25 },	/* 0000 0011 000 */
+    { 12, 0xCA, 26 },	/* 0000 1100 1010 */
+    { 12, 0xCB, 27 },	/* 0000 1100 1011 */
+    { 12, 0xCC, 28 },	/* 0000 1100 1100 */
+    { 12, 0xCD, 29 },	/* 0000 1100 1101 */
+    { 12, 0x68, 30 },	/* 0000 0110 1000 */
+    { 12, 0x69, 31 },	/* 0000 0110 1001 */
+    { 12, 0x6A, 32 },	/* 0000 0110 1010 */
+    { 12, 0x6B, 33 },	/* 0000 0110 1011 */
+    { 12, 0xD2, 34 },	/* 0000 1101 0010 */
+    { 12, 0xD3, 35 },	/* 0000 1101 0011 */
+    { 12, 0xD4, 36 },	/* 0000 1101 0100 */
+    { 12, 0xD5, 37 },	/* 0000 1101 0101 */
+    { 12, 0xD6, 38 },	/* 0000 1101 0110 */
+    { 12, 0xD7, 39 },	/* 0000 1101 0111 */
+    { 12, 0x6C, 40 },	/* 0000 0110 1100 */
+    { 12, 0x6D, 41 },	/* 0000 0110 1101 */
+    { 12, 0xDA, 42 },	/* 0000 1101 1010 */
+    { 12, 0xDB, 43 },	/* 0000 1101 1011 */
+    { 12, 0x54, 44 },	/* 0000 0101 0100 */
+    { 12, 0x55, 45 },	/* 0000 0101 0101 */
+    { 12, 0x56, 46 },	/* 0000 0101 0110 */
+    { 12, 0x57, 47 },	/* 0000 0101 0111 */
+    { 12, 0x64, 48 },	/* 0000 0110 0100 */
+    { 12, 0x65, 49 },	/* 0000 0110 0101 */
+    { 12, 0x52, 50 },	/* 0000 0101 0010 */
+    { 12, 0x53, 51 },	/* 0000 0101 0011 */
+    { 12, 0x24, 52 },	/* 0000 0010 0100 */
+    { 12, 0x37, 53 },	/* 0000 0011 0111 */
+    { 12, 0x38, 54 },	/* 0000 0011 1000 */
+    { 12, 0x27, 55 },	/* 0000 0010 0111 */
+    { 12, 0x28, 56 },	/* 0000 0010 1000 */
+    { 12, 0x58, 57 },	/* 0000 0101 1000 */
+    { 12, 0x59, 58 },	/* 0000 0101 1001 */
+    { 12, 0x2B, 59 },	/* 0000 0010 1011 */
+    { 12, 0x2C, 60 },	/* 0000 0010 1100 */
+    { 12, 0x5A, 61 },	/* 0000 0101 1010 */
+    { 12, 0x66, 62 },	/* 0000 0110 0110 */
+    { 12, 0x67, 63 },	/* 0000 0110 0111 */
+    { 10, 0xF, 64 },	/* 0000 0011 11 */
+    { 12, 0xC8, 128 },	/* 0000 1100 1000 */
+    { 12, 0xC9, 192 },	/* 0000 1100 1001 */
+    { 12, 0x5B, 256 },	/* 0000 0101 1011 */
+    { 12, 0x33, 320 },	/* 0000 0011 0011 */
+    { 12, 0x34, 384 },	/* 0000 0011 0100 */
+    { 12, 0x35, 448 },	/* 0000 0011 0101 */
+    { 13, 0x6C, 512 },	/* 0000 0011 0110 0 */
+    { 13, 0x6D, 576 },	/* 0000 0011 0110 1 */
+    { 13, 0x4A, 640 },	/* 0000 0010 0101 0 */
+    { 13, 0x4B, 704 },	/* 0000 0010 0101 1 */
+    { 13, 0x4C, 768 },	/* 0000 0010 0110 0 */
+    { 13, 0x4D, 832 },	/* 0000 0010 0110 1 */
+    { 13, 0x72, 896 },	/* 0000 0011 1001 0 */
+    { 13, 0x73, 960 },	/* 0000 0011 1001 1 */
+    { 13, 0x74, 1024 },	/* 0000 0011 1010 0 */
+    { 13, 0x75, 1088 },	/* 0000 0011 1010 1 */
+    { 13, 0x76, 1152 },	/* 0000 0011 1011 0 */
+    { 13, 0x77, 1216 },	/* 0000 0011 1011 1 */
+    { 13, 0x52, 1280 },	/* 0000 0010 1001 0 */
+    { 13, 0x53, 1344 },	/* 0000 0010 1001 1 */
+    { 13, 0x54, 1408 },	/* 0000 0010 1010 0 */
+    { 13, 0x55, 1472 },	/* 0000 0010 1010 1 */
+    { 13, 0x5A, 1536 },	/* 0000 0010 1101 0 */
+    { 13, 0x5B, 1600 },	/* 0000 0010 1101 1 */
+    { 13, 0x64, 1664 },	/* 0000 0011 0010 0 */
+    { 13, 0x65, 1728 },	/* 0000 0011 0010 1 */
+    { 11, 0x8, 1792 },	/* 0000 0001 000 */
+    { 11, 0xC, 1856 },	/* 0000 0001 100 */
+    { 11, 0xD, 1920 },	/* 0000 0001 101 */
+    { 12, 0x12, 1984 },	/* 0000 0001 0010 */
+    { 12, 0x13, 2048 },	/* 0000 0001 0011 */
+    { 12, 0x14, 2112 },	/* 0000 0001 0100 */
+    { 12, 0x15, 2176 },	/* 0000 0001 0101 */
+    { 12, 0x16, 2240 },	/* 0000 0001 0110 */
+    { 12, 0x17, 2304 },	/* 0000 0001 0111 */
+    { 12, 0x1C, 2368 },	/* 0000 0001 1100 */
+    { 12, 0x1D, 2432 },	/* 0000 0001 1101 */
+    { 12, 0x1E, 2496 },	/* 0000 0001 1110 */
+    { 12, 0x1F, 2560 },	/* 0000 0001 1111 */
+    { 12, 0x1, G3CODE_EOL },	/* 0000 0000 0001 */
+    { 9, 0x1, G3CODE_INVALID },	/* 0000 0000 1 */
+    { 10, 0x1, G3CODE_INVALID },	/* 0000 0000 01 */
+    { 11, 0x1, G3CODE_INVALID },	/* 0000 0000 001 */
+    { 12, 0x0, G3CODE_INVALID },	/* 0000 0000 0000 */
+};
+#else
+extern	const tableentry TIFFFaxWhiteCodes[];
+extern	const tableentry TIFFFaxBlackCodes[];
+#endif
+#endif /* _T4_ */
diff --git a/src/libtiff/tif_aux.c b/src/libtiff/tif_aux.c
new file mode 100644
index 0000000..d33a144
--- /dev/null
+++ b/src/libtiff/tif_aux.c
@@ -0,0 +1,267 @@
+/* $Id: tif_aux.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Auxiliary Support Routines.
+ */
+#include "tiffiop.h"
+#include "tif_predict.h"
+#include <math.h>
+
+tdata_t
+_TIFFCheckMalloc(TIFF* tif, size_t nmemb, size_t elem_size, const char* what)
+{
+	tdata_t cp = NULL;
+	tsize_t	bytes = nmemb * elem_size;
+
+	/*
+	 * XXX: Check for integer overflow.
+	 */
+	if (nmemb && elem_size && bytes / elem_size == nmemb)
+		cp = _TIFFmalloc(bytes);
+
+	if (cp == NULL)
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "No space %s", what);
+
+	return (cp);
+}
+
+static int
+TIFFDefaultTransferFunction(TIFFDirectory* td)
+{
+	uint16 **tf = td->td_transferfunction;
+	tsize_t i, n, nbytes;
+
+	tf[0] = tf[1] = tf[2] = 0;
+	if (td->td_bitspersample >= sizeof(tsize_t) * 8 - 2)
+		return 0;
+
+	n = 1<<td->td_bitspersample;
+	nbytes = n * sizeof (uint16);
+	if (!(tf[0] = (uint16 *)_TIFFmalloc(nbytes)))
+		return 0;
+	tf[0][0] = 0;
+	for (i = 1; i < n; i++) {
+		double t = (double)i/((double) n-1.);
+		tf[0][i] = (uint16)floor(65535.*pow(t, 2.2) + .5);
+	}
+
+	if (td->td_samplesperpixel - td->td_extrasamples > 1) {
+		if (!(tf[1] = (uint16 *)_TIFFmalloc(nbytes)))
+			goto bad;
+		_TIFFmemcpy(tf[1], tf[0], nbytes);
+		if (!(tf[2] = (uint16 *)_TIFFmalloc(nbytes)))
+			goto bad;
+		_TIFFmemcpy(tf[2], tf[0], nbytes);
+	}
+	return 1;
+
+bad:
+	if (tf[0])
+		_TIFFfree(tf[0]);
+	if (tf[1])
+		_TIFFfree(tf[1]);
+	if (tf[2])
+		_TIFFfree(tf[2]);
+	tf[0] = tf[1] = tf[2] = 0;
+	return 0;
+}
+
+/*
+ * Like TIFFGetField, but return any default
+ * value if the tag is not present in the directory.
+ *
+ * NB:	We use the value in the directory, rather than
+ *	explcit values so that defaults exist only one
+ *	place in the library -- in TIFFDefaultDirectory.
+ */
+int
+TIFFVGetFieldDefaulted(TIFF* tif, ttag_t tag, va_list ap)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if (TIFFVGetField(tif, tag, ap))
+		return (1);
+	switch (tag) {
+	case TIFFTAG_SUBFILETYPE:
+		*va_arg(ap, uint32 *) = td->td_subfiletype;
+		return (1);
+	case TIFFTAG_BITSPERSAMPLE:
+		*va_arg(ap, uint16 *) = td->td_bitspersample;
+		return (1);
+	case TIFFTAG_THRESHHOLDING:
+		*va_arg(ap, uint16 *) = td->td_threshholding;
+		return (1);
+	case TIFFTAG_FILLORDER:
+		*va_arg(ap, uint16 *) = td->td_fillorder;
+		return (1);
+	case TIFFTAG_ORIENTATION:
+		*va_arg(ap, uint16 *) = td->td_orientation;
+		return (1);
+	case TIFFTAG_SAMPLESPERPIXEL:
+		*va_arg(ap, uint16 *) = td->td_samplesperpixel;
+		return (1);
+	case TIFFTAG_ROWSPERSTRIP:
+		*va_arg(ap, uint32 *) = td->td_rowsperstrip;
+		return (1);
+	case TIFFTAG_MINSAMPLEVALUE:
+		*va_arg(ap, uint16 *) = td->td_minsamplevalue;
+		return (1);
+	case TIFFTAG_MAXSAMPLEVALUE:
+		*va_arg(ap, uint16 *) = td->td_maxsamplevalue;
+		return (1);
+	case TIFFTAG_PLANARCONFIG:
+		*va_arg(ap, uint16 *) = td->td_planarconfig;
+		return (1);
+	case TIFFTAG_RESOLUTIONUNIT:
+		*va_arg(ap, uint16 *) = td->td_resolutionunit;
+		return (1);
+	case TIFFTAG_PREDICTOR:
+                {
+			TIFFPredictorState* sp = (TIFFPredictorState*) tif->tif_data;
+			*va_arg(ap, uint16*) = (uint16) sp->predictor;
+			return 1;
+                }
+	case TIFFTAG_DOTRANGE:
+		*va_arg(ap, uint16 *) = 0;
+		*va_arg(ap, uint16 *) = (1<<td->td_bitspersample)-1;
+		return (1);
+	case TIFFTAG_INKSET:
+		*va_arg(ap, uint16 *) = INKSET_CMYK;
+		return 1;
+	case TIFFTAG_NUMBEROFINKS:
+		*va_arg(ap, uint16 *) = 4;
+		return (1);
+	case TIFFTAG_EXTRASAMPLES:
+		*va_arg(ap, uint16 *) = td->td_extrasamples;
+		*va_arg(ap, uint16 **) = td->td_sampleinfo;
+		return (1);
+	case TIFFTAG_MATTEING:
+		*va_arg(ap, uint16 *) =
+		    (td->td_extrasamples == 1 &&
+		     td->td_sampleinfo[0] == EXTRASAMPLE_ASSOCALPHA);
+		return (1);
+	case TIFFTAG_TILEDEPTH:
+		*va_arg(ap, uint32 *) = td->td_tiledepth;
+		return (1);
+	case TIFFTAG_DATATYPE:
+		*va_arg(ap, uint16 *) = td->td_sampleformat-1;
+		return (1);
+	case TIFFTAG_SAMPLEFORMAT:
+		*va_arg(ap, uint16 *) = td->td_sampleformat;
+                return(1);
+	case TIFFTAG_IMAGEDEPTH:
+		*va_arg(ap, uint32 *) = td->td_imagedepth;
+		return (1);
+	case TIFFTAG_YCBCRCOEFFICIENTS:
+		{
+			/* defaults are from CCIR Recommendation 601-1 */
+			static float ycbcrcoeffs[] = { 0.299f, 0.587f, 0.114f };
+			*va_arg(ap, float **) = ycbcrcoeffs;
+			return 1;
+		}
+	case TIFFTAG_YCBCRSUBSAMPLING:
+		*va_arg(ap, uint16 *) = td->td_ycbcrsubsampling[0];
+		*va_arg(ap, uint16 *) = td->td_ycbcrsubsampling[1];
+		return (1);
+	case TIFFTAG_YCBCRPOSITIONING:
+		*va_arg(ap, uint16 *) = td->td_ycbcrpositioning;
+		return (1);
+	case TIFFTAG_WHITEPOINT:
+		{
+			static float whitepoint[2];
+
+			/* TIFF 6.0 specification tells that it is no default
+			   value for the WhitePoint, but AdobePhotoshop TIFF
+			   Technical Note tells that it should be CIE D50. */
+			whitepoint[0] =	D50_X0 / (D50_X0 + D50_Y0 + D50_Z0);
+			whitepoint[1] =	D50_Y0 / (D50_X0 + D50_Y0 + D50_Z0);
+			*va_arg(ap, float **) = whitepoint;
+			return 1;
+		}
+	case TIFFTAG_TRANSFERFUNCTION:
+		if (!td->td_transferfunction[0] &&
+		    !TIFFDefaultTransferFunction(td)) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "No space for \"TransferFunction\" tag");
+			return (0);
+		}
+		*va_arg(ap, uint16 **) = td->td_transferfunction[0];
+		if (td->td_samplesperpixel - td->td_extrasamples > 1) {
+			*va_arg(ap, uint16 **) = td->td_transferfunction[1];
+			*va_arg(ap, uint16 **) = td->td_transferfunction[2];
+		}
+		return (1);
+	case TIFFTAG_REFERENCEBLACKWHITE:
+		{
+			int i;
+			static float ycbcr_refblackwhite[] = 
+			{ 0.0F, 255.0F, 128.0F, 255.0F, 128.0F, 255.0F };
+			static float rgb_refblackwhite[6];
+
+			for (i = 0; i < 3; i++) {
+				rgb_refblackwhite[2 * i + 0] = 0.0F;
+				rgb_refblackwhite[2 * i + 1] =
+					(float)((1L<<td->td_bitspersample)-1L);
+			}
+			
+			if (td->td_photometric == PHOTOMETRIC_YCBCR) {
+				/*
+				 * YCbCr (Class Y) images must have the
+				 * ReferenceBlackWhite tag set. Fix the
+				 * broken images, which lacks that tag.
+				 */
+				*va_arg(ap, float **) = ycbcr_refblackwhite;
+			} else {
+				/*
+				 * Assume RGB (Class R)
+				 */
+				*va_arg(ap, float **) = rgb_refblackwhite;
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Like TIFFGetField, but return any default
+ * value if the tag is not present in the directory.
+ */
+int
+TIFFGetFieldDefaulted(TIFF* tif, ttag_t tag, ...)
+{
+	int ok;
+	va_list ap;
+
+	va_start(ap, tag);
+	ok =  TIFFVGetFieldDefaulted(tif, tag, ap);
+	va_end(ap);
+	return (ok);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_close.c b/src/libtiff/tif_close.c
new file mode 100644
index 0000000..5047d37
--- /dev/null
+++ b/src/libtiff/tif_close.c
@@ -0,0 +1,119 @@
+/* $Id: tif_close.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ */
+#include "tiffiop.h"
+
+/************************************************************************/
+/*                            TIFFCleanup()                             */
+/************************************************************************/
+
+/**
+ * Auxiliary function to free the TIFF structure. Given structure will be
+ * completetly freed, so you should save opened file handle and pointer
+ * to the close procedure in external variables before calling
+ * _TIFFCleanup(), if you will need these ones to close the file.
+ * 
+ * @param tif A TIFF pointer.
+ */
+
+void
+TIFFCleanup(TIFF* tif)
+{
+	if (tif->tif_mode != O_RDONLY)
+	    /*
+	     * Flush buffered data and directory (if dirty).
+	     */
+	    TIFFFlush(tif);
+	(*tif->tif_cleanup)(tif);
+	TIFFFreeDirectory(tif);
+
+	if (tif->tif_dirlist)
+	    _TIFFfree(tif->tif_dirlist);
+	    
+	/* Clean up client info links */
+	while( tif->tif_clientinfo )
+	{
+	    TIFFClientInfoLink *link = tif->tif_clientinfo;
+
+	    tif->tif_clientinfo = link->next;
+	    _TIFFfree( link->name );
+	    _TIFFfree( link );
+	}
+
+	if (tif->tif_rawdata && (tif->tif_flags&TIFF_MYBUFFER))
+	    _TIFFfree(tif->tif_rawdata);
+	if (isMapped(tif))
+	    TIFFUnmapFileContents(tif, tif->tif_base, tif->tif_size);
+
+	/* Clean up custom fields */
+	if (tif->tif_nfields > 0) 
+	{
+	    size_t  i;
+
+	    for (i = 0; i < tif->tif_nfields; i++) 
+	    {
+		TIFFFieldInfo *fld = tif->tif_fieldinfo[i];
+		if (fld->field_bit == FIELD_CUSTOM && 
+		    strncmp("Tag ", fld->field_name, 4) == 0) 
+		{
+		    _TIFFfree(fld->field_name);
+		    _TIFFfree(fld);
+		}
+	    }   
+	  
+	    _TIFFfree(tif->tif_fieldinfo);
+	}
+
+	_TIFFfree(tif);
+}
+
+/************************************************************************/
+/*                            TIFFClose()                               */
+/************************************************************************/
+
+/**
+ * Close a previously opened TIFF file.
+ *
+ * TIFFClose closes a file that was previously opened with TIFFOpen().
+ * Any buffered data are flushed to the file, including the contents of
+ * the current directory (if modified); and all resources are reclaimed.
+ * 
+ * @param tif A TIFF pointer.
+ */
+
+void
+TIFFClose(TIFF* tif)
+{
+	TIFFCloseProc closeproc = tif->tif_closeproc;
+	thandle_t fd = tif->tif_clientdata;
+
+	TIFFCleanup(tif);
+	(void) (*closeproc)(fd);
+}
+
diff --git a/src/libtiff/tif_codec.c b/src/libtiff/tif_codec.c
new file mode 100644
index 0000000..377e083
--- /dev/null
+++ b/src/libtiff/tif_codec.c
@@ -0,0 +1,150 @@
+/* $Id: tif_codec.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library
+ *
+ * Builtin Compression Scheme Configuration Support.
+ */
+#include "tiffiop.h"
+
+static	int NotConfigured(TIFF*, int);
+
+#ifndef	LZW_SUPPORT
+#define	TIFFInitLZW		NotConfigured
+#endif
+#ifndef	PACKBITS_SUPPORT
+#define	TIFFInitPackBits	NotConfigured
+#endif
+#ifndef	THUNDER_SUPPORT
+#define	TIFFInitThunderScan	NotConfigured
+#endif
+#ifndef	NEXT_SUPPORT
+#define	TIFFInitNeXT		NotConfigured
+#endif
+#ifndef	JPEG_SUPPORT
+#define	TIFFInitJPEG		NotConfigured
+#endif
+#ifndef	OJPEG_SUPPORT
+#define	TIFFInitOJPEG		NotConfigured
+#endif
+#ifndef	CCITT_SUPPORT
+#define	TIFFInitCCITTRLE	NotConfigured
+#define	TIFFInitCCITTRLEW	NotConfigured
+#define	TIFFInitCCITTFax3	NotConfigured
+#define	TIFFInitCCITTFax4	NotConfigured
+#endif
+#ifndef JBIG_SUPPORT
+#define	TIFFInitJBIG		NotConfigured
+#endif
+#ifndef	ZIP_SUPPORT
+#define	TIFFInitZIP		NotConfigured
+#endif
+#ifndef	PIXARLOG_SUPPORT
+#define	TIFFInitPixarLog	NotConfigured
+#endif
+#ifndef LOGLUV_SUPPORT
+#define TIFFInitSGILog		NotConfigured
+#endif
+
+/*
+ * Compression schemes statically built into the library.
+ */
+#ifdef VMS
+const TIFFCodec _TIFFBuiltinCODECS[] = {
+#else
+TIFFCodec _TIFFBuiltinCODECS[] = {
+#endif
+    { "None",		COMPRESSION_NONE,	TIFFInitDumpMode },
+    { "LZW",		COMPRESSION_LZW,	TIFFInitLZW },
+    { "PackBits",	COMPRESSION_PACKBITS,	TIFFInitPackBits },
+    { "ThunderScan",	COMPRESSION_THUNDERSCAN,TIFFInitThunderScan },
+    { "NeXT",		COMPRESSION_NEXT,	TIFFInitNeXT },
+    { "JPEG",		COMPRESSION_JPEG,	TIFFInitJPEG },
+    { "Old-style JPEG",	COMPRESSION_OJPEG,	TIFFInitOJPEG },
+    { "CCITT RLE",	COMPRESSION_CCITTRLE,	TIFFInitCCITTRLE },
+    { "CCITT RLE/W",	COMPRESSION_CCITTRLEW,	TIFFInitCCITTRLEW },
+    { "CCITT Group 3",	COMPRESSION_CCITTFAX3,	TIFFInitCCITTFax3 },
+    { "CCITT Group 4",	COMPRESSION_CCITTFAX4,	TIFFInitCCITTFax4 },
+    { "ISO JBIG",	COMPRESSION_JBIG,	TIFFInitJBIG },
+    { "Deflate",	COMPRESSION_DEFLATE,	TIFFInitZIP },
+    { "AdobeDeflate",   COMPRESSION_ADOBE_DEFLATE , TIFFInitZIP }, 
+    { "PixarLog",	COMPRESSION_PIXARLOG,	TIFFInitPixarLog },
+    { "SGILog",		COMPRESSION_SGILOG,	TIFFInitSGILog },
+    { "SGILog24",	COMPRESSION_SGILOG24,	TIFFInitSGILog },
+    { NULL,             0,                      NULL }
+};
+
+static int
+_notConfigured(TIFF* tif)
+{
+	const TIFFCodec* c = TIFFFindCODEC(tif->tif_dir.td_compression);
+
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    "%s compression support is not configured", c->name);
+	return (0);
+}
+
+static int
+NotConfigured(TIFF* tif, int scheme)
+{
+    (void) scheme;
+    
+    tif->tif_decodestatus = FALSE;
+    tif->tif_setupdecode = _notConfigured;
+    tif->tif_encodestatus = FALSE;
+    tif->tif_setupencode = _notConfigured;
+    return (1);
+}
+
+/************************************************************************/
+/*                       TIFFIsCODECConfigured()                        */
+/************************************************************************/
+
+/**
+ * Check whether we have working codec for the specific coding scheme.
+ * 
+ * @return returns 1 if the codec is configured and working. Otherwise
+ * 0 will be returned.
+ */
+
+int
+TIFFIsCODECConfigured(uint16 scheme)
+{
+	const TIFFCodec* codec = TIFFFindCODEC(scheme);
+
+	if(codec == NULL) {
+            return 0;
+        }
+        if(codec->init == NULL) {
+            return 0;
+        }
+	if(codec->init != NotConfigured){
+            return 1;
+        }
+	return 0;
+}
+
diff --git a/src/libtiff/tif_color.c b/src/libtiff/tif_color.c
new file mode 100644
index 0000000..9e9481e
--- /dev/null
+++ b/src/libtiff/tif_color.c
@@ -0,0 +1,275 @@
+/* $Id: tif_color.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * CIE L*a*b* to CIE XYZ and CIE XYZ to RGB conversion routines are taken
+ * from the VIPS library (http://www.vips.ecs.soton.ac.uk) with
+ * the permission of John Cupitt, the VIPS author.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Color space conversion routines.
+ */
+
+#include "tiffiop.h"
+#include <math.h>
+
+/*
+ * Convert color value from the CIE L*a*b* 1976 space to CIE XYZ.
+ */
+void
+TIFFCIELabToXYZ(TIFFCIELabToRGB *cielab, uint32 l, int32 a, int32 b,
+		float *X, float *Y, float *Z)
+{
+	float L = (float)l * 100.0F / 255.0F;
+	float cby, tmp;
+
+	if( L < 8.856F ) {
+		*Y = (L * cielab->Y0) / 903.292F;
+		cby = 7.787F * (*Y / cielab->Y0) + 16.0F / 116.0F;
+	} else {
+		cby = (L + 16.0F) / 116.0F;
+		*Y = cielab->Y0 * cby * cby * cby;
+	}
+
+	tmp = (float)a / 500.0F + cby;
+	if( tmp < 0.2069F )
+		*X = cielab->X0 * (tmp - 0.13793F) / 7.787F;
+	else    
+		*X = cielab->X0 * tmp * tmp * tmp;
+
+	tmp = cby - (float)b / 200.0F;
+	if( tmp < 0.2069F )
+		*Z = cielab->Z0 * (tmp - 0.13793F) / 7.787F;
+	else    
+		*Z = cielab->Z0 * tmp * tmp * tmp;
+}
+
+#define RINT(R) ((uint32)((R)>0?((R)+0.5):((R)-0.5)))
+/*
+ * Convert color value from the XYZ space to RGB.
+ */
+void
+TIFFXYZToRGB(TIFFCIELabToRGB *cielab, float X, float Y, float Z,
+	     uint32 *r, uint32 *g, uint32 *b)
+{
+	int i;
+	float Yr, Yg, Yb;
+	float *matrix = &cielab->display.d_mat[0][0];
+
+	/* Multiply through the matrix to get luminosity values. */
+	Yr =  matrix[0] * X + matrix[1] * Y + matrix[2] * Z;
+	Yg =  matrix[3] * X + matrix[4] * Y + matrix[5] * Z;
+	Yb =  matrix[6] * X + matrix[7] * Y + matrix[8] * Z;
+
+	/* Clip input */
+	Yr = TIFFmax(Yr, cielab->display.d_Y0R);
+	Yg = TIFFmax(Yg, cielab->display.d_Y0G);
+	Yb = TIFFmax(Yb, cielab->display.d_Y0B);
+
+	/* Avoid overflow in case of wrong input values */
+	Yr = TIFFmin(Yr, cielab->display.d_YCR);
+	Yg = TIFFmin(Yg, cielab->display.d_YCG);
+	Yb = TIFFmin(Yb, cielab->display.d_YCB);
+
+	/* Turn luminosity to colour value. */
+	i = (int)((Yr - cielab->display.d_Y0R) / cielab->rstep);
+	i = TIFFmin(cielab->range, i);
+	*r = RINT(cielab->Yr2r[i]);
+
+	i = (int)((Yg - cielab->display.d_Y0G) / cielab->gstep);
+	i = TIFFmin(cielab->range, i);
+	*g = RINT(cielab->Yg2g[i]);
+
+	i = (int)((Yb - cielab->display.d_Y0B) / cielab->bstep);
+	i = TIFFmin(cielab->range, i);
+	*b = RINT(cielab->Yb2b[i]);
+
+	/* Clip output. */
+	*r = TIFFmin(*r, cielab->display.d_Vrwr);
+	*g = TIFFmin(*g, cielab->display.d_Vrwg);
+	*b = TIFFmin(*b, cielab->display.d_Vrwb);
+}
+#undef RINT
+
+/* 
+ * Allocate conversion state structures and make look_up tables for
+ * the Yr,Yb,Yg <=> r,g,b conversions.
+ */
+int
+TIFFCIELabToRGBInit(TIFFCIELabToRGB* cielab,
+		    TIFFDisplay *display, float *refWhite)
+{
+	int i;
+	double gamma;
+
+	cielab->range = CIELABTORGB_TABLE_RANGE;
+
+	_TIFFmemcpy(&cielab->display, display, sizeof(TIFFDisplay));
+
+	/* Red */
+	gamma = 1.0 / cielab->display.d_gammaR ;
+	cielab->rstep =
+		(cielab->display.d_YCR - cielab->display.d_Y0R)	/ cielab->range;
+	for(i = 0; i <= cielab->range; i++) {
+		cielab->Yr2r[i] = cielab->display.d_Vrwr
+		    * ((float)pow((double)i / cielab->range, gamma));
+	}
+
+	/* Green */
+	gamma = 1.0 / cielab->display.d_gammaG ;
+	cielab->gstep =
+	    (cielab->display.d_YCR - cielab->display.d_Y0R) / cielab->range;
+	for(i = 0; i <= cielab->range; i++) {
+		cielab->Yg2g[i] = cielab->display.d_Vrwg
+		    * ((float)pow((double)i / cielab->range, gamma));
+	}
+
+	/* Blue */
+	gamma = 1.0 / cielab->display.d_gammaB ;
+	cielab->bstep =
+	    (cielab->display.d_YCR - cielab->display.d_Y0R) / cielab->range;
+	for(i = 0; i <= cielab->range; i++) {
+		cielab->Yb2b[i] = cielab->display.d_Vrwb
+		    * ((float)pow((double)i / cielab->range, gamma));
+	}
+
+	/* Init reference white point */
+	cielab->X0 = refWhite[0];
+	cielab->Y0 = refWhite[1];
+	cielab->Z0 = refWhite[2];
+
+	return 0;
+}
+
+/* 
+ * Convert color value from the YCbCr space to CIE XYZ.
+ * The colorspace conversion algorithm comes from the IJG v5a code;
+ * see below for more information on how it works.
+ */
+#define	SHIFT			16
+#define	FIX(x)			((int32)((x) * (1L<<SHIFT) + 0.5))
+#define	ONE_HALF		((int32)(1<<(SHIFT-1)))
+#define	Code2V(c, RB, RW, CR)	((((c)-(int32)(RB))*(float)(CR))/(float)(((RW)-(RB)) ? ((RW)-(RB)) : 1))
+#define	CLAMP(f,min,max)	((f)<(min)?(min):(f)>(max)?(max):(f))
+#define HICLAMP(f,max)		((f)>(max)?(max):(f))
+
+void
+TIFFYCbCrtoRGB(TIFFYCbCrToRGB *ycbcr, uint32 Y, int32 Cb, int32 Cr,
+	       uint32 *r, uint32 *g, uint32 *b)
+{
+	/* XXX: Only 8-bit YCbCr input supported for now */
+	Y = HICLAMP(Y, 255), Cb = CLAMP(Cb, 0, 255), Cr = CLAMP(Cr, 0, 255);
+
+	*r = ycbcr->clamptab[ycbcr->Y_tab[Y] + ycbcr->Cr_r_tab[Cr]];
+	*g = ycbcr->clamptab[ycbcr->Y_tab[Y]
+	    + (int)((ycbcr->Cb_g_tab[Cb] + ycbcr->Cr_g_tab[Cr]) >> SHIFT)];
+	*b = ycbcr->clamptab[ycbcr->Y_tab[Y] + ycbcr->Cb_b_tab[Cb]];
+}
+
+/*
+ * Initialize the YCbCr->RGB conversion tables.  The conversion
+ * is done according to the 6.0 spec:
+ *
+ *    R = Y + Cr*(2 - 2*LumaRed)
+ *    B = Y + Cb*(2 - 2*LumaBlue)
+ *    G =   Y
+ *        - LumaBlue*Cb*(2-2*LumaBlue)/LumaGreen
+ *        - LumaRed*Cr*(2-2*LumaRed)/LumaGreen
+ *
+ * To avoid floating point arithmetic the fractional constants that
+ * come out of the equations are represented as fixed point values
+ * in the range 0...2^16.  We also eliminate multiplications by
+ * pre-calculating possible values indexed by Cb and Cr (this code
+ * assumes conversion is being done for 8-bit samples).
+ */
+int
+TIFFYCbCrToRGBInit(TIFFYCbCrToRGB* ycbcr, float *luma, float *refBlackWhite)
+{
+    TIFFRGBValue* clamptab;
+    int i;
+    
+#define LumaRed	    luma[0]
+#define LumaGreen   luma[1]
+#define LumaBlue    luma[2]
+
+    clamptab = (TIFFRGBValue*)(
+	(tidata_t) ycbcr+TIFFroundup(sizeof (TIFFYCbCrToRGB), sizeof (long)));
+    _TIFFmemset(clamptab, 0, 256);		/* v < 0 => 0 */
+    ycbcr->clamptab = (clamptab += 256);
+    for (i = 0; i < 256; i++)
+	clamptab[i] = (TIFFRGBValue) i;
+    _TIFFmemset(clamptab+256, 255, 2*256);	/* v > 255 => 255 */
+    ycbcr->Cr_r_tab = (int*) (clamptab + 3*256);
+    ycbcr->Cb_b_tab = ycbcr->Cr_r_tab + 256;
+    ycbcr->Cr_g_tab = (int32*) (ycbcr->Cb_b_tab + 256);
+    ycbcr->Cb_g_tab = ycbcr->Cr_g_tab + 256;
+    ycbcr->Y_tab = ycbcr->Cb_g_tab + 256;
+
+    { float f1 = 2-2*LumaRed;		int32 D1 = FIX(f1);
+      float f2 = LumaRed*f1/LumaGreen;	int32 D2 = -FIX(f2);
+      float f3 = 2-2*LumaBlue;		int32 D3 = FIX(f3);
+      float f4 = LumaBlue*f3/LumaGreen;	int32 D4 = -FIX(f4);
+      int x;
+
+#undef LumaBlue
+#undef LumaGreen
+#undef LumaRed
+      
+      /*
+       * i is the actual input pixel value in the range 0..255
+       * Cb and Cr values are in the range -128..127 (actually
+       * they are in a range defined by the ReferenceBlackWhite
+       * tag) so there is some range shifting to do here when
+       * constructing tables indexed by the raw pixel data.
+       */
+      for (i = 0, x = -128; i < 256; i++, x++) {
+	    int32 Cr = (int32)Code2V(x, refBlackWhite[4] - 128.0F,
+			    refBlackWhite[5] - 128.0F, 127);
+	    int32 Cb = (int32)Code2V(x, refBlackWhite[2] - 128.0F,
+			    refBlackWhite[3] - 128.0F, 127);
+
+	    ycbcr->Cr_r_tab[i] = (int32)((D1*Cr + ONE_HALF)>>SHIFT);
+	    ycbcr->Cb_b_tab[i] = (int32)((D3*Cb + ONE_HALF)>>SHIFT);
+	    ycbcr->Cr_g_tab[i] = D2*Cr;
+	    ycbcr->Cb_g_tab[i] = D4*Cb + ONE_HALF;
+	    ycbcr->Y_tab[i] =
+		    (int32)Code2V(x + 128, refBlackWhite[0], refBlackWhite[1], 255);
+      }
+    }
+
+    return 0;
+}
+#undef	HICLAMP
+#undef	CLAMP
+#undef	Code2V
+#undef	SHIFT
+#undef	ONE_HALF
+#undef	FIX
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_compress.c b/src/libtiff/tif_compress.c
new file mode 100644
index 0000000..cbbb295
--- /dev/null
+++ b/src/libtiff/tif_compress.c
@@ -0,0 +1,286 @@
+/* $Id: tif_compress.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library
+ *
+ * Compression Scheme Configuration Support.
+ */
+#include "tiffiop.h"
+
+static int
+TIFFNoEncode(TIFF* tif, const char* method)
+{
+	const TIFFCodec* c = TIFFFindCODEC(tif->tif_dir.td_compression);
+
+	if (c) { 
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%s %s encoding is not implemented",
+                          c->name, method);
+	} else { 
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			  "Compression scheme %u %s encoding is not implemented",
+		    tif->tif_dir.td_compression, method);
+	}
+	return (-1);
+}
+
+int
+_TIFFNoRowEncode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) pp; (void) cc; (void) s;
+	return (TIFFNoEncode(tif, "scanline"));
+}
+
+int
+_TIFFNoStripEncode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) pp; (void) cc; (void) s;
+	return (TIFFNoEncode(tif, "strip"));
+}
+
+int
+_TIFFNoTileEncode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) pp; (void) cc; (void) s;
+	return (TIFFNoEncode(tif, "tile"));
+}
+
+static int
+TIFFNoDecode(TIFF* tif, const char* method)
+{
+	const TIFFCodec* c = TIFFFindCODEC(tif->tif_dir.td_compression);
+
+	if (c)
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%s %s decoding is not implemented",
+		    c->name, method);
+	else
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "Compression scheme %u %s decoding is not implemented",
+		    tif->tif_dir.td_compression, method);
+	return (-1);
+}
+
+int
+_TIFFNoRowDecode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) pp; (void) cc; (void) s;
+	return (TIFFNoDecode(tif, "scanline"));
+}
+
+int
+_TIFFNoStripDecode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) pp; (void) cc; (void) s;
+	return (TIFFNoDecode(tif, "strip"));
+}
+
+int
+_TIFFNoTileDecode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) pp; (void) cc; (void) s;
+	return (TIFFNoDecode(tif, "tile"));
+}
+
+int
+_TIFFNoSeek(TIFF* tif, uint32 off)
+{
+	(void) off;
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    "Compression algorithm does not support random access");
+	return (0);
+}
+
+int
+_TIFFNoPreCode(TIFF* tif, tsample_t s)
+{
+	(void) tif; (void) s;
+	return (1);
+}
+
+static int _TIFFtrue(TIFF* tif) { (void) tif; return (1); }
+static void _TIFFvoid(TIFF* tif) { (void) tif; }
+
+void
+_TIFFSetDefaultCompressionState(TIFF* tif)
+{
+	tif->tif_decodestatus = TRUE;
+	tif->tif_setupdecode = _TIFFtrue;
+	tif->tif_predecode = _TIFFNoPreCode;
+	tif->tif_decoderow = _TIFFNoRowDecode;
+	tif->tif_decodestrip = _TIFFNoStripDecode;
+	tif->tif_decodetile = _TIFFNoTileDecode;
+	tif->tif_encodestatus = TRUE;
+	tif->tif_setupencode = _TIFFtrue;
+	tif->tif_preencode = _TIFFNoPreCode;
+	tif->tif_postencode = _TIFFtrue;
+	tif->tif_encoderow = _TIFFNoRowEncode;
+	tif->tif_encodestrip = _TIFFNoStripEncode;
+	tif->tif_encodetile = _TIFFNoTileEncode;
+	tif->tif_close = _TIFFvoid;
+	tif->tif_seek = _TIFFNoSeek;
+	tif->tif_cleanup = _TIFFvoid;
+	tif->tif_defstripsize = _TIFFDefaultStripSize;
+	tif->tif_deftilesize = _TIFFDefaultTileSize;
+	tif->tif_flags &= ~TIFF_NOBITREV;
+}
+
+int
+TIFFSetCompressionScheme(TIFF* tif, int scheme)
+{
+	const TIFFCodec *c = TIFFFindCODEC((uint16) scheme);
+
+	_TIFFSetDefaultCompressionState(tif);
+	/*
+	 * Don't treat an unknown compression scheme as an error.
+	 * This permits applications to open files with data that
+	 * the library does not have builtin support for, but which
+	 * may still be meaningful.
+	 */
+	return (c ? (*c->init)(tif, scheme) : 1);
+}
+
+/*
+ * Other compression schemes may be registered.  Registered
+ * schemes can also override the builtin versions provided
+ * by this library.
+ */
+typedef struct _codec {
+	struct _codec*	next;
+	TIFFCodec*	info;
+} codec_t;
+static	codec_t* registeredCODECS = NULL;
+
+const TIFFCodec*
+TIFFFindCODEC(uint16 scheme)
+{
+	const TIFFCodec* c;
+	codec_t* cd;
+
+	for (cd = registeredCODECS; cd; cd = cd->next)
+		if (cd->info->scheme == scheme)
+			return ((const TIFFCodec*) cd->info);
+	for (c = _TIFFBuiltinCODECS; c->name; c++)
+		if (c->scheme == scheme)
+			return (c);
+	return ((const TIFFCodec*) 0);
+}
+
+TIFFCodec*
+TIFFRegisterCODEC(uint16 scheme, const char* name, TIFFInitMethod init)
+{
+	codec_t* cd = (codec_t*)
+	    _TIFFmalloc(sizeof (codec_t) + sizeof (TIFFCodec) + strlen(name)+1);
+
+	if (cd != NULL) {
+		cd->info = (TIFFCodec*) ((tidata_t) cd + sizeof (codec_t));
+		cd->info->name = (char*)
+		    ((tidata_t) cd->info + sizeof (TIFFCodec));
+		strcpy(cd->info->name, name);
+		cd->info->scheme = scheme;
+		cd->info->init = init;
+		cd->next = registeredCODECS;
+		registeredCODECS = cd;
+	} else {
+		TIFFErrorExt(0, "TIFFRegisterCODEC",
+		    "No space to register compression scheme %s", name);
+		return NULL;
+	}
+	return (cd->info);
+}
+
+void
+TIFFUnRegisterCODEC(TIFFCodec* c)
+{
+	codec_t* cd;
+	codec_t** pcd;
+
+	for (pcd = &registeredCODECS; (cd = *pcd); pcd = &cd->next)
+		if (cd->info == c) {
+			*pcd = cd->next;
+			_TIFFfree(cd);
+			return;
+		}
+	TIFFErrorExt(0, "TIFFUnRegisterCODEC",
+	    "Cannot remove compression scheme %s; not registered", c->name);
+}
+
+/************************************************************************/
+/*                       TIFFGetConfisuredCODECs()                      */
+/************************************************************************/
+
+/**
+ * Get list of configured codecs, both built-in and registered by user.
+ * Caller is responsible to free this structure.
+ * 
+ * @return returns array of TIFFCodec records (the last record should be NULL)
+ * or NULL if function failed.
+ */
+
+TIFFCodec*
+TIFFGetConfiguredCODECs()
+{
+	int		i = 1;
+        codec_t		*cd;
+        const TIFFCodec	*c;
+	TIFFCodec	*codecs = NULL, *new_codecs;
+
+        for (cd = registeredCODECS; cd; cd = cd->next) {
+                new_codecs = (TIFFCodec *)
+			_TIFFrealloc(codecs, i * sizeof(TIFFCodec));
+		if (!new_codecs) {
+			_TIFFfree (codecs);
+			return NULL;
+		}
+		codecs = new_codecs;
+		_TIFFmemcpy(codecs + i - 1, cd, sizeof(TIFFCodec));
+		i++;
+	}
+        for (c = _TIFFBuiltinCODECS; c->name; c++) {
+                if (TIFFIsCODECConfigured(c->scheme)) {
+                        new_codecs = (TIFFCodec *)
+				_TIFFrealloc(codecs, i * sizeof(TIFFCodec));
+			if (!new_codecs) {
+				_TIFFfree (codecs);
+				return NULL;
+			}
+			codecs = new_codecs;
+			_TIFFmemcpy(codecs + i - 1, (const tdata_t)c, sizeof(TIFFCodec));
+			i++;
+		}
+	}
+
+	new_codecs = (TIFFCodec *) _TIFFrealloc(codecs, i * sizeof(TIFFCodec));
+	if (!new_codecs) {
+		_TIFFfree (codecs);
+		return NULL;
+	}
+	codecs = new_codecs;
+	_TIFFmemset(codecs + i - 1, 0, sizeof(TIFFCodec));
+
+        return codecs;
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_config.h b/src/libtiff/tif_config.h
new file mode 100644
index 0000000..f9db461
--- /dev/null
+++ b/src/libtiff/tif_config.h
@@ -0,0 +1,263 @@
+/* libtiff/tif_config.h.in.  IMLIB - Modified for IM  */
+
+/* Support CCITT Group 3 & 4 algorithms */
+#define CCITT_SUPPORT
+
+/* Pick up YCbCr subsampling info from the JPEG data stream to support files
+   lacking the tag (default enabled). */
+#define CHECK_JPEG_YCBCR_SUBSAMPLING
+
+/* Support C++ stream API (requires C++ compiler) */
+#undef CXX_SUPPORT
+
+/* Treat extra sample as alpha (default enabled). The RGBA interface will
+   treat a fourth sample with no EXTRASAMPLE_ value as being ASSOCALPHA. Many
+   packages produce RGBA files but don't mark the alpha properly. */
+#define DEFAULT_EXTRASAMPLE_AS_ALPHA
+
+/* Use the Apple OpenGL framework. */
+#undef HAVE_APPLE_OPENGL_FRAMEWORK
+
+/* Define to 1 if you have the <assert.h> header file. */
+#define HAVE_ASSERT_H
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define HAVE_FCNTL_H
+
+/* Define to 1 if you have the `floor' function. */
+#define HAVE_FLOOR
+
+/* Define to 1 if you have the `getopt' function. */
+#undef HAVE_GETOPT
+
+/* Define as 0 or 1 according to the floating point format suported by the
+   machine */
+#define HAVE_IEEEFP
+
+/* Define to 1 if the system has the type `int16'. */
+/* #undef HAVE_INT16 */
+
+/* Define to 1 if the system has the type `int32'. */
+/* #undef HAVE_INT32 */
+
+/* Define to 1 if the system has the type `int8'. */
+/* #undef HAVE_INT8 */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the `isascii' function. */
+#undef HAVE_ISASCII
+
+/* Define to 1 if you have the `lfind' function. */
+#undef HAVE_LFIND
+
+/* Define to 1 if you have the `c' library (-lc). */
+#undef HAVE_LIBC
+
+/* Define to 1 if you have the `m' library (-lm). */
+#undef HAVE_LIBM
+
+/* Define to 1 if you have the <limits.h> header file. */
+#undef HAVE_LIMITS_H
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#undef HAVE_MALLOC_H
+
+/* Define to 1 if you have the `memmove' function. */
+#undef HAVE_MEMMOVE
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `memset' function. */
+#undef HAVE_MEMSET
+
+/* Define to 1 if you have the `mmap' function. */
+#undef HAVE_MMAP
+
+/* Define to 1 if you have the `pow' function. */
+#undef HAVE_POW
+
+/* Define if you have POSIX threads libraries and header files. */
+#undef HAVE_PTHREAD
+
+/* Define to 1 if you have the <search.h> header file. */
+/* #undef HAVE_SEARCH_H */
+
+/* Define to 1 if you have the `sqrt' function. */
+#undef HAVE_SQRT
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `strcasecmp' function. */
+#undef HAVE_STRCASECMP
+
+/* Define to 1 if you have the `strchr' function. */
+#undef HAVE_STRCHR
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H
+
+/* Define to 1 if you have the `strrchr' function. */
+#undef HAVE_STRRCHR
+
+/* Define to 1 if you have the `strstr' function. */
+#undef HAVE_STRSTR
+
+/* Define to 1 if you have the `strtol' function. */
+#undef HAVE_STRTOL
+
+/* Define to 1 if you have the `strtoul' function. */
+#undef HAVE_STRTOUL
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the <windows.h> header file. */
+#undef HAVE_WINDOWS_H
+
+/* Native cpu byte order: 1 if big-endian (Motorola) or 0 if little-endian
+   (Intel) */
+#undef HOST_BIGENDIAN
+
+/* Set the native cpu bit order (FILLORDER_LSB2MSB or FILLORDER_MSB2LSB) */
+#define HOST_FILLORDER FILLORDER_LSB2MSB  /* Not used by IM */
+
+/* Support JPEG compression (requires IJG JPEG library) */
+#define JPEG_SUPPORT
+
+/* Support LogLuv high dynamic range encoding */
+#define LOGLUV_SUPPORT
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LT_OBJDIR
+
+/* Support LZW algorithm */
+#define LZW_SUPPORT
+
+/* Support Microsoft Document Imaging format */
+#define MDI_SUPPORT 1
+
+/* Support NeXT 2-bit RLE algorithm */
+#define NEXT_SUPPORT
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
+/* Support Old JPEG compresson (read contrib/ojpeg/README first! Compilation
+   fails with unpatched IJG JPEG library) */
+#define OJPEG_SUPPORT
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Support Macintosh PackBits algorithm */
+#define PACKBITS_SUPPORT
+
+/* Support Pixar log-format algorithm (requires Zlib) */
+#define PIXARLOG_SUPPORT
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+#undef PTHREAD_CREATE_JOINABLE
+
+/* The size of a `int', as computed by sizeof. */
+#define SIZEOF_INT 4
+
+/* The size of a `long', as computed by sizeof. */
+/* #define SIZEOF_LONG 8  -  Used by "tif_fax3.c" for 64 bits. */
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Support strip chopping (whether or not to convert single-strip uncompressed
+   images to mutiple strips of specified size to reduce memory usage) */
+#undef STRIPCHOP_DEFAULT
+
+/* Default size of the strip in bytes (when strip chopping enabled) */
+#undef STRIP_SIZE_DEFAULT
+
+/* Enable SubIFD tag (330) support */
+#define SUBIFD_SUPPORT
+
+/* Support ThunderScan 4-bit RLE algorithm */
+#define THUNDER_SUPPORT
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define to 1 if your <sys/time.h> declares `struct tm'. */
+#undef TM_IN_SYS_TIME
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to 1 if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+/* #undef WORDS_BIGENDIAN  -  Defined in the config.mak fo IM */
+
+/* Define to 1 if the X Window System is missing or not being used. */
+#undef X_DISPLAY_MISSING
+
+/* Support Deflate compression */
+#define ZIP_SUPPORT
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+#undef _FILE_OFFSET_BITS
+
+/* Define for large files, on AIX-style hosts. */
+#undef _LARGE_FILES
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef const */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#define inline
+#endif
+
+/* Define to `long' if <sys/types.h> does not define. */
+/* #undef off_t */
+
+/* Define to `unsigned' if <sys/types.h> does not define. */
+/* #undef size_t */
+
+/* To avoid the inclusion of <windows.h> */
+#define AVOID_WIN32_FILEIO 1
diff --git a/src/libtiff/tif_dir.c b/src/libtiff/tif_dir.c
new file mode 100644
index 0000000..f2d1ee7
--- /dev/null
+++ b/src/libtiff/tif_dir.c
@@ -0,0 +1,1350 @@
+/* $Id: tif_dir.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Directory Tag Get & Set Routines.
+ * (and also some miscellaneous stuff)
+ */
+#include "tiffiop.h"
+
+/*
+ * These are used in the backwards compatibility code...
+ */
+#define DATATYPE_VOID		0       /* !untyped data */
+#define DATATYPE_INT		1       /* !signed integer data */
+#define DATATYPE_UINT		2       /* !unsigned integer data */
+#define DATATYPE_IEEEFP		3       /* !IEEE floating point data */
+
+static void
+setByteArray(void** vpp, void* vp, size_t nmemb, size_t elem_size)
+{
+	if (*vpp)
+		_TIFFfree(*vpp), *vpp = 0;
+	if (vp) {
+		tsize_t	bytes = nmemb * elem_size;
+		if (elem_size && bytes / elem_size == nmemb)
+			*vpp = (void*) _TIFFmalloc(bytes);
+		if (*vpp)
+			_TIFFmemcpy(*vpp, vp, bytes);
+	}
+}
+void _TIFFsetByteArray(void** vpp, void* vp, uint32 n)
+    { setByteArray(vpp, vp, n, 1); }
+void _TIFFsetString(char** cpp, char* cp)
+    { setByteArray((void**) cpp, (void*) cp, strlen(cp)+1, 1); }
+void _TIFFsetNString(char** cpp, char* cp, uint32 n)
+    { setByteArray((void**) cpp, (void*) cp, n, 1); }
+void _TIFFsetShortArray(uint16** wpp, uint16* wp, uint32 n)
+    { setByteArray((void**) wpp, (void*) wp, n, sizeof (uint16)); }
+void _TIFFsetLongArray(uint32** lpp, uint32* lp, uint32 n)
+    { setByteArray((void**) lpp, (void*) lp, n, sizeof (uint32)); }
+void _TIFFsetFloatArray(float** fpp, float* fp, uint32 n)
+    { setByteArray((void**) fpp, (void*) fp, n, sizeof (float)); }
+void _TIFFsetDoubleArray(double** dpp, double* dp, uint32 n)
+    { setByteArray((void**) dpp, (void*) dp, n, sizeof (double)); }
+
+/*
+ * Install extra samples information.
+ */
+static int
+setExtraSamples(TIFFDirectory* td, va_list ap, uint32* v)
+{
+	uint16* va;
+	uint32 i;
+
+	*v = va_arg(ap, uint32);
+	if ((uint16) *v > td->td_samplesperpixel)
+		return (0);
+	va = va_arg(ap, uint16*);
+	if (*v > 0 && va == NULL)		/* typically missing param */
+		return (0);
+	for (i = 0; i < *v; i++)
+		if (va[i] > EXTRASAMPLE_UNASSALPHA)
+			return (0);
+	td->td_extrasamples = (uint16) *v;
+	_TIFFsetShortArray(&td->td_sampleinfo, va, td->td_extrasamples);
+	return (1);
+}
+
+static uint32
+checkInkNamesString(TIFF* tif, uint32 slen, const char* s)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+	uint16 i = td->td_samplesperpixel;
+
+	if (slen > 0) {
+		const char* ep = s+slen;
+		const char* cp = s;
+		for (; i > 0; i--) {
+			for (; *cp != '\0'; cp++)
+				if (cp >= ep)
+					goto bad;
+			cp++;				/* skip \0 */
+		}
+		return (cp-s);
+	}
+bad:
+	TIFFErrorExt(tif->tif_clientdata, "TIFFSetField",
+	    "%s: Invalid InkNames value; expecting %d names, found %d",
+	    tif->tif_name,
+	    td->td_samplesperpixel,
+	    td->td_samplesperpixel-i);
+	return (0);
+}
+
+static int
+_TIFFVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	static const char module[] = "_TIFFVSetField";
+	
+	TIFFDirectory* td = &tif->tif_dir;
+	int status = 1;
+	uint32 v32, i, v;
+	char* s;
+
+	switch (tag) {
+	case TIFFTAG_SUBFILETYPE:
+		td->td_subfiletype = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_IMAGEWIDTH:
+		td->td_imagewidth = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_IMAGELENGTH:
+		td->td_imagelength = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_BITSPERSAMPLE:
+		td->td_bitspersample = (uint16) va_arg(ap, int);
+		/*
+		 * If the data require post-decoding processing to byte-swap
+		 * samples, set it up here.  Note that since tags are required
+		 * to be ordered, compression code can override this behaviour
+		 * in the setup method if it wants to roll the post decoding
+		 * work in with its normal work.
+		 */
+		if (tif->tif_flags & TIFF_SWAB) {
+			if (td->td_bitspersample == 16)
+				tif->tif_postdecode = _TIFFSwab16BitData;
+			else if (td->td_bitspersample == 24)
+				tif->tif_postdecode = _TIFFSwab24BitData;
+			else if (td->td_bitspersample == 32)
+				tif->tif_postdecode = _TIFFSwab32BitData;
+			else if (td->td_bitspersample == 64)
+				tif->tif_postdecode = _TIFFSwab64BitData;
+			else if (td->td_bitspersample == 128) /* two 64's */
+				tif->tif_postdecode = _TIFFSwab64BitData;
+		}
+		break;
+	case TIFFTAG_COMPRESSION:
+		v = va_arg(ap, uint32) & 0xffff;
+		/*
+		 * If we're changing the compression scheme, the notify the
+		 * previous module so that it can cleanup any state it's
+		 * setup.
+		 */
+		if (TIFFFieldSet(tif, FIELD_COMPRESSION)) {
+			if (td->td_compression == v)
+				break;
+			(*tif->tif_cleanup)(tif);
+			tif->tif_flags &= ~TIFF_CODERSETUP;
+		}
+		/*
+		 * Setup new compression routine state.
+		 */
+		if( (status = TIFFSetCompressionScheme(tif, v)) != 0 )
+                    td->td_compression = (uint16) v;
+                else
+                    status = 0;
+		break;
+	case TIFFTAG_PHOTOMETRIC:
+		td->td_photometric = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_THRESHHOLDING:
+		td->td_threshholding = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_FILLORDER:
+		v = va_arg(ap, uint32);
+		if (v != FILLORDER_LSB2MSB && v != FILLORDER_MSB2LSB)
+			goto badvalue;
+		td->td_fillorder = (uint16) v;
+		break;
+		break;
+	case TIFFTAG_ORIENTATION:
+		v = va_arg(ap, uint32);
+		if (v < ORIENTATION_TOPLEFT || ORIENTATION_LEFTBOT < v) {
+			TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+			    "Bad value %lu for \"%s\" tag ignored",
+			    v, _TIFFFieldWithTag(tif, tag)->field_name);
+		} else
+			td->td_orientation = (uint16) v;
+		break;
+	case TIFFTAG_SAMPLESPERPIXEL:
+		/* XXX should cross check -- e.g. if pallette, then 1 */
+		v = va_arg(ap, uint32);
+		if (v == 0)
+			goto badvalue;
+		td->td_samplesperpixel = (uint16) v;
+		break;
+	case TIFFTAG_ROWSPERSTRIP:
+		v32 = va_arg(ap, uint32);
+		if (v32 == 0)
+			goto badvalue32;
+		td->td_rowsperstrip = v32;
+		if (!TIFFFieldSet(tif, FIELD_TILEDIMENSIONS)) {
+			td->td_tilelength = v32;
+			td->td_tilewidth = td->td_imagewidth;
+		}
+		break;
+	case TIFFTAG_MINSAMPLEVALUE:
+		td->td_minsamplevalue = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_MAXSAMPLEVALUE:
+		td->td_maxsamplevalue = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_SMINSAMPLEVALUE:
+		td->td_sminsamplevalue = va_arg(ap, double);
+		break;
+	case TIFFTAG_SMAXSAMPLEVALUE:
+		td->td_smaxsamplevalue = va_arg(ap, double);
+		break;
+	case TIFFTAG_XRESOLUTION:
+		td->td_xresolution = (float) va_arg(ap, double);
+		break;
+	case TIFFTAG_YRESOLUTION:
+		td->td_yresolution = (float) va_arg(ap, double);
+		break;
+	case TIFFTAG_PLANARCONFIG:
+		v = va_arg(ap, uint32);
+		if (v != PLANARCONFIG_CONTIG && v != PLANARCONFIG_SEPARATE)
+			goto badvalue;
+		td->td_planarconfig = (uint16) v;
+		break;
+	case TIFFTAG_XPOSITION:
+		td->td_xposition = (float) va_arg(ap, double);
+		break;
+	case TIFFTAG_YPOSITION:
+		td->td_yposition = (float) va_arg(ap, double);
+		break;
+	case TIFFTAG_RESOLUTIONUNIT:
+		v = va_arg(ap, uint32);
+		if (v < RESUNIT_NONE || RESUNIT_CENTIMETER < v)
+			goto badvalue;
+		td->td_resolutionunit = (uint16) v;
+		break;
+	case TIFFTAG_PAGENUMBER:
+		td->td_pagenumber[0] = (uint16) va_arg(ap, int);
+		td->td_pagenumber[1] = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_HALFTONEHINTS:
+		td->td_halftonehints[0] = (uint16) va_arg(ap, int);
+		td->td_halftonehints[1] = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_COLORMAP:
+		v32 = (uint32)(1L<<td->td_bitspersample);
+		_TIFFsetShortArray(&td->td_colormap[0], va_arg(ap, uint16*), v32);
+		_TIFFsetShortArray(&td->td_colormap[1], va_arg(ap, uint16*), v32);
+		_TIFFsetShortArray(&td->td_colormap[2], va_arg(ap, uint16*), v32);
+		break;
+	case TIFFTAG_EXTRASAMPLES:
+		if (!setExtraSamples(td, ap, &v))
+			goto badvalue;
+		break;
+	case TIFFTAG_MATTEING:
+		td->td_extrasamples = (uint16) (va_arg(ap, int) != 0);
+		if (td->td_extrasamples) {
+			uint16 sv = EXTRASAMPLE_ASSOCALPHA;
+			_TIFFsetShortArray(&td->td_sampleinfo, &sv, 1);
+		}
+		break;
+	case TIFFTAG_TILEWIDTH:
+		v32 = va_arg(ap, uint32);
+		if (v32 % 16) {
+			if (tif->tif_mode != O_RDONLY)
+				goto badvalue32;
+			TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+				"Nonstandard tile width %d, convert file", v32);
+		}
+		td->td_tilewidth = v32;
+		tif->tif_flags |= TIFF_ISTILED;
+		break;
+	case TIFFTAG_TILELENGTH:
+		v32 = va_arg(ap, uint32);
+		if (v32 % 16) {
+			if (tif->tif_mode != O_RDONLY)
+				goto badvalue32;
+			TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+			    "Nonstandard tile length %d, convert file", v32);
+		}
+		td->td_tilelength = v32;
+		tif->tif_flags |= TIFF_ISTILED;
+		break;
+	case TIFFTAG_TILEDEPTH:
+		v32 = va_arg(ap, uint32);
+		if (v32 == 0)
+			goto badvalue32;
+		td->td_tiledepth = v32;
+		break;
+	case TIFFTAG_DATATYPE:
+		v = va_arg(ap, uint32);
+		switch (v) {
+		case DATATYPE_VOID:	v = SAMPLEFORMAT_VOID;	break;
+		case DATATYPE_INT:	v = SAMPLEFORMAT_INT;	break;
+		case DATATYPE_UINT:	v = SAMPLEFORMAT_UINT;	break;
+		case DATATYPE_IEEEFP:	v = SAMPLEFORMAT_IEEEFP;break;
+		default:		goto badvalue;
+		}
+		td->td_sampleformat = (uint16) v;
+		break;
+	case TIFFTAG_SAMPLEFORMAT:
+		v = va_arg(ap, uint32);
+		if (v < SAMPLEFORMAT_UINT || SAMPLEFORMAT_COMPLEXIEEEFP < v)
+			goto badvalue;
+		td->td_sampleformat = (uint16) v;
+
+                /*  Try to fix up the SWAB function for complex data. */
+                if( td->td_sampleformat == SAMPLEFORMAT_COMPLEXINT 
+                    && td->td_bitspersample == 32
+                    && tif->tif_postdecode == _TIFFSwab32BitData )
+                    tif->tif_postdecode = _TIFFSwab16BitData;
+                else if( (td->td_sampleformat == SAMPLEFORMAT_COMPLEXINT 
+                          || td->td_sampleformat == SAMPLEFORMAT_COMPLEXIEEEFP)
+                         && td->td_bitspersample == 64
+                         && tif->tif_postdecode == _TIFFSwab64BitData )
+                    tif->tif_postdecode = _TIFFSwab32BitData;
+		break;
+	case TIFFTAG_IMAGEDEPTH:
+		td->td_imagedepth = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_SUBIFD:
+		if ((tif->tif_flags & TIFF_INSUBIFD) == 0) {
+			td->td_nsubifd = (uint16) va_arg(ap, int);
+			_TIFFsetLongArray(&td->td_subifd, va_arg(ap, uint32*),
+			    (long) td->td_nsubifd);
+		} else {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Sorry, cannot nest SubIFDs",
+				  tif->tif_name);
+			status = 0;
+		}
+		break;
+	case TIFFTAG_YCBCRPOSITIONING:
+		td->td_ycbcrpositioning = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_YCBCRSUBSAMPLING:
+		td->td_ycbcrsubsampling[0] = (uint16) va_arg(ap, int);
+		td->td_ycbcrsubsampling[1] = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_TRANSFERFUNCTION:
+		v = (td->td_samplesperpixel - td->td_extrasamples) > 1 ? 3 : 1;
+		for (i = 0; i < v; i++)
+			_TIFFsetShortArray(&td->td_transferfunction[i],
+			    va_arg(ap, uint16*), 1L<<td->td_bitspersample);
+		break;
+	case TIFFTAG_INKNAMES:
+		v = va_arg(ap, uint32);
+		s = va_arg(ap, char*);
+		v = checkInkNamesString(tif, v, s);
+                status = v > 0;
+		if( v > 0 ) {
+			_TIFFsetNString(&td->td_inknames, s, v);
+			td->td_inknameslen = v;
+		}
+		break;
+        default: {
+            const TIFFFieldInfo* fip = _TIFFFindFieldInfo(tif, tag, TIFF_ANY);
+            TIFFTagValue *tv;
+            int tv_size, iCustom;
+
+            /*
+	     * This can happen if multiple images are open with different
+	     * codecs which have private tags.  The global tag information
+	     * table may then have tags that are valid for one file but not
+	     * the other. If the client tries to set a tag that is not valid
+	     * for the image's codec then we'll arrive here.  This
+	     * happens, for example, when tiffcp is used to convert between
+	     * compression schemes and codec-specific tags are blindly copied.
+             */
+            if(fip == NULL || fip->field_bit != FIELD_CUSTOM) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: Invalid %stag \"%s\" (not supported by codec)",
+		    tif->tif_name, isPseudoTag(tag) ? "pseudo-" : "",
+		    _TIFFFieldWithTag(tif, tag)->field_name);
+		status = 0;
+		break;
+            }
+
+            /*
+             * Find the existing entry for this custom value.
+             */
+            tv = NULL;
+            for(iCustom = 0; iCustom < td->td_customValueCount; iCustom++) {
+                if(td->td_customValues[iCustom].info == fip) {
+                    tv = td->td_customValues + iCustom;
+                    if(tv->value != NULL)
+                    {
+                        _TIFFfree(tv->value);
+                        tv->value = NULL;
+                    }
+                    break;
+                }
+            }
+
+            /*
+             * Grow the custom list if the entry was not found.
+             */
+            if(tv == NULL) {
+		TIFFTagValue	*new_customValues;
+		
+		td->td_customValueCount++;
+		new_customValues = (TIFFTagValue *)
+			_TIFFrealloc(td->td_customValues,
+				     sizeof(TIFFTagValue) * td->td_customValueCount);
+		if (!new_customValues) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+		"%s: Failed to allocate space for list of custom values",
+				  tif->tif_name);
+			status = 0;
+			goto end;
+		}
+
+		td->td_customValues = new_customValues;
+
+                tv = td->td_customValues + (td->td_customValueCount-1);
+                tv->info = fip;
+                tv->value = NULL;
+                tv->count = 0;
+            }
+
+            /*
+             * Set custom value ... save a copy of the custom tag value.
+             */
+	    tv_size = _TIFFDataSize(fip->field_type);
+	    if (tv_size == 0) {
+		    status = 0;
+		    TIFFErrorExt(tif->tif_clientdata, module,
+				 "%s: Bad field type %d for \"%s\"",
+				 tif->tif_name, fip->field_type,
+				 fip->field_name);
+		    goto end;
+	    }
+           
+            if(fip->field_passcount) {
+		    if (fip->field_writecount == TIFF_VARIABLE2)
+			tv->count = (uint32) va_arg(ap, uint32);
+		    else
+			tv->count = (int) va_arg(ap, int);
+	    } else if (fip->field_writecount == TIFF_VARIABLE
+		       || fip->field_writecount == TIFF_VARIABLE2)
+		tv->count = 1;
+	    else if (fip->field_writecount == TIFF_SPP)
+		tv->count = td->td_samplesperpixel;
+	    else
+                tv->count = fip->field_writecount;
+            
+    
+	    if (fip->field_type == TIFF_ASCII)
+		    _TIFFsetString((char **)&tv->value, va_arg(ap, char *));
+	    else {
+                tv->value = _TIFFmalloc(tv_size * tv->count);
+		if (!tv->value) {
+		    status = 0;
+		    goto end;
+		}
+
+		if ((fip->field_passcount
+		    || fip->field_writecount == TIFF_VARIABLE
+		    || fip->field_writecount == TIFF_VARIABLE2
+		    || fip->field_writecount == TIFF_SPP
+		    || tv->count > 1)
+		    && fip->field_tag != TIFFTAG_PAGENUMBER
+		    && fip->field_tag != TIFFTAG_HALFTONEHINTS
+		    && fip->field_tag != TIFFTAG_YCBCRSUBSAMPLING
+		    && fip->field_tag != TIFFTAG_DOTRANGE) {
+                    _TIFFmemcpy(tv->value, va_arg(ap, void *),
+				tv->count * tv_size);
+		} else {
+		    /*
+		     * XXX: The following loop required to handle
+		     * TIFFTAG_PAGENUMBER, TIFFTAG_HALFTONEHINTS,
+		     * TIFFTAG_YCBCRSUBSAMPLING and TIFFTAG_DOTRANGE tags.
+		     * These tags are actually arrays and should be passed as
+		     * array pointers to TIFFSetField() function, but actually
+		     * passed as a list of separate values. This behaviour
+		     * must be changed in the future!
+		     */
+		    int i;
+		    char *val = (char *)tv->value;
+
+		    for (i = 0; i < tv->count; i++, val += tv_size) {
+			    switch (fip->field_type) {
+				case TIFF_BYTE:
+				case TIFF_UNDEFINED:
+				    {
+					uint8 v = (uint8)va_arg(ap, int);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_SBYTE:
+				    {
+					int8 v = (int8)va_arg(ap, int);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_SHORT:
+				    {
+					uint16 v = (uint16)va_arg(ap, int);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_SSHORT:
+				    {
+					int16 v = (int16)va_arg(ap, int);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_LONG:
+				case TIFF_IFD:
+				    {
+					uint32 v = va_arg(ap, uint32);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_SLONG:
+				    {
+					int32 v = va_arg(ap, int32);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_RATIONAL:
+				case TIFF_SRATIONAL:
+				case TIFF_FLOAT:
+				    {
+					float v = (float)va_arg(ap, double);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				case TIFF_DOUBLE:
+				    {
+					double v = va_arg(ap, double);
+					_TIFFmemcpy(val, &v, tv_size);
+				    }
+				    break;
+				default:
+				    _TIFFmemset(val, 0, tv_size);
+				    status = 0;
+				    break;
+			    }
+		    }
+		}
+	    }
+          }
+	}
+	if (status) {
+		TIFFSetFieldBit(tif, _TIFFFieldWithTag(tif, tag)->field_bit);
+		tif->tif_flags |= TIFF_DIRTYDIRECT;
+	}
+
+end:
+	va_end(ap);
+	return (status);
+badvalue:
+	TIFFErrorExt(tif->tif_clientdata, module, "%s: Bad value %d for \"%s\"",
+		  tif->tif_name, v, _TIFFFieldWithTag(tif, tag)->field_name);
+	va_end(ap);
+	return (0);
+badvalue32:
+	TIFFErrorExt(tif->tif_clientdata, module, "%s: Bad value %ld for \"%s\"",
+		   tif->tif_name, v32, _TIFFFieldWithTag(tif, tag)->field_name);
+	va_end(ap);
+	return (0);
+}
+
+/*
+ * Return 1/0 according to whether or not
+ * it is permissible to set the tag's value.
+ * Note that we allow ImageLength to be changed
+ * so that we can append and extend to images.
+ * Any other tag may not be altered once writing
+ * has commenced, unless its value has no effect
+ * on the format of the data that is written.
+ */
+static int
+OkToChangeTag(TIFF* tif, ttag_t tag)
+{
+	const TIFFFieldInfo* fip = _TIFFFindFieldInfo(tif, tag, TIFF_ANY);
+	if (!fip) {			/* unknown tag */
+		TIFFErrorExt(tif->tif_clientdata, "TIFFSetField", "%s: Unknown %stag %u",
+		    tif->tif_name, isPseudoTag(tag) ? "pseudo-" : "", tag);
+		return (0);
+	}
+	if (tag != TIFFTAG_IMAGELENGTH && (tif->tif_flags & TIFF_BEENWRITING) &&
+	    !fip->field_oktochange) {
+		/*
+		 * Consult info table to see if tag can be changed
+		 * after we've started writing.  We only allow changes
+		 * to those tags that don't/shouldn't affect the
+		 * compression and/or format of the data.
+		 */
+		TIFFErrorExt(tif->tif_clientdata, "TIFFSetField",
+		    "%s: Cannot modify tag \"%s\" while writing",
+		    tif->tif_name, fip->field_name);
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Record the value of a field in the
+ * internal directory structure.  The
+ * field will be written to the file
+ * when/if the directory structure is
+ * updated.
+ */
+int
+TIFFSetField(TIFF* tif, ttag_t tag, ...)
+{
+	va_list ap;
+	int status;
+
+	va_start(ap, tag);
+	status = TIFFVSetField(tif, tag, ap);
+	va_end(ap);
+	return (status);
+}
+
+/*
+ * Like TIFFSetField, but taking a varargs
+ * parameter list.  This routine is useful
+ * for building higher-level interfaces on
+ * top of the library.
+ */
+int
+TIFFVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	return OkToChangeTag(tif, tag) ?
+	    (*tif->tif_tagmethods.vsetfield)(tif, tag, ap) : 0;
+}
+
+static int
+_TIFFVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+    TIFFDirectory* td = &tif->tif_dir;
+    int            ret_val = 1;
+
+    switch (tag) {
+	case TIFFTAG_SUBFILETYPE:
+            *va_arg(ap, uint32*) = td->td_subfiletype;
+            break;
+	case TIFFTAG_IMAGEWIDTH:
+            *va_arg(ap, uint32*) = td->td_imagewidth;
+            break;
+	case TIFFTAG_IMAGELENGTH:
+            *va_arg(ap, uint32*) = td->td_imagelength;
+            break;
+	case TIFFTAG_BITSPERSAMPLE:
+            *va_arg(ap, uint16*) = td->td_bitspersample;
+            break;
+	case TIFFTAG_COMPRESSION:
+            *va_arg(ap, uint16*) = td->td_compression;
+            break;
+	case TIFFTAG_PHOTOMETRIC:
+            *va_arg(ap, uint16*) = td->td_photometric;
+            break;
+	case TIFFTAG_THRESHHOLDING:
+            *va_arg(ap, uint16*) = td->td_threshholding;
+            break;
+	case TIFFTAG_FILLORDER:
+            *va_arg(ap, uint16*) = td->td_fillorder;
+            break;
+	case TIFFTAG_ORIENTATION:
+            *va_arg(ap, uint16*) = td->td_orientation;
+            break;
+	case TIFFTAG_SAMPLESPERPIXEL:
+            *va_arg(ap, uint16*) = td->td_samplesperpixel;
+            break;
+	case TIFFTAG_ROWSPERSTRIP:
+            *va_arg(ap, uint32*) = td->td_rowsperstrip;
+            break;
+	case TIFFTAG_MINSAMPLEVALUE:
+            *va_arg(ap, uint16*) = td->td_minsamplevalue;
+            break;
+	case TIFFTAG_MAXSAMPLEVALUE:
+            *va_arg(ap, uint16*) = td->td_maxsamplevalue;
+            break;
+	case TIFFTAG_SMINSAMPLEVALUE:
+            *va_arg(ap, double*) = td->td_sminsamplevalue;
+            break;
+	case TIFFTAG_SMAXSAMPLEVALUE:
+            *va_arg(ap, double*) = td->td_smaxsamplevalue;
+            break;
+	case TIFFTAG_XRESOLUTION:
+            *va_arg(ap, float*) = td->td_xresolution;
+            break;
+	case TIFFTAG_YRESOLUTION:
+            *va_arg(ap, float*) = td->td_yresolution;
+            break;
+	case TIFFTAG_PLANARCONFIG:
+            *va_arg(ap, uint16*) = td->td_planarconfig;
+            break;
+	case TIFFTAG_XPOSITION:
+            *va_arg(ap, float*) = td->td_xposition;
+            break;
+	case TIFFTAG_YPOSITION:
+            *va_arg(ap, float*) = td->td_yposition;
+            break;
+	case TIFFTAG_RESOLUTIONUNIT:
+            *va_arg(ap, uint16*) = td->td_resolutionunit;
+            break;
+	case TIFFTAG_PAGENUMBER:
+            *va_arg(ap, uint16*) = td->td_pagenumber[0];
+            *va_arg(ap, uint16*) = td->td_pagenumber[1];
+            break;
+	case TIFFTAG_HALFTONEHINTS:
+            *va_arg(ap, uint16*) = td->td_halftonehints[0];
+            *va_arg(ap, uint16*) = td->td_halftonehints[1];
+            break;
+	case TIFFTAG_COLORMAP:
+            *va_arg(ap, uint16**) = td->td_colormap[0];
+            *va_arg(ap, uint16**) = td->td_colormap[1];
+            *va_arg(ap, uint16**) = td->td_colormap[2];
+            break;
+	case TIFFTAG_STRIPOFFSETS:
+	case TIFFTAG_TILEOFFSETS:
+            *va_arg(ap, uint32**) = td->td_stripoffset;
+            break;
+	case TIFFTAG_STRIPBYTECOUNTS:
+	case TIFFTAG_TILEBYTECOUNTS:
+            *va_arg(ap, uint32**) = td->td_stripbytecount;
+            break;
+	case TIFFTAG_MATTEING:
+            *va_arg(ap, uint16*) =
+                (td->td_extrasamples == 1 &&
+                 td->td_sampleinfo[0] == EXTRASAMPLE_ASSOCALPHA);
+            break;
+	case TIFFTAG_EXTRASAMPLES:
+            *va_arg(ap, uint16*) = td->td_extrasamples;
+            *va_arg(ap, uint16**) = td->td_sampleinfo;
+            break;
+	case TIFFTAG_TILEWIDTH:
+            *va_arg(ap, uint32*) = td->td_tilewidth;
+            break;
+	case TIFFTAG_TILELENGTH:
+            *va_arg(ap, uint32*) = td->td_tilelength;
+            break;
+	case TIFFTAG_TILEDEPTH:
+            *va_arg(ap, uint32*) = td->td_tiledepth;
+            break;
+	case TIFFTAG_DATATYPE:
+            switch (td->td_sampleformat) {
+		case SAMPLEFORMAT_UINT:
+                    *va_arg(ap, uint16*) = DATATYPE_UINT;
+                    break;
+		case SAMPLEFORMAT_INT:
+                    *va_arg(ap, uint16*) = DATATYPE_INT;
+                    break;
+		case SAMPLEFORMAT_IEEEFP:
+                    *va_arg(ap, uint16*) = DATATYPE_IEEEFP;
+                    break;
+		case SAMPLEFORMAT_VOID:
+                    *va_arg(ap, uint16*) = DATATYPE_VOID;
+                    break;
+            }
+            break;
+	case TIFFTAG_SAMPLEFORMAT:
+            *va_arg(ap, uint16*) = td->td_sampleformat;
+            break;
+	case TIFFTAG_IMAGEDEPTH:
+            *va_arg(ap, uint32*) = td->td_imagedepth;
+            break;
+	case TIFFTAG_SUBIFD:
+            *va_arg(ap, uint16*) = td->td_nsubifd;
+            *va_arg(ap, uint32**) = td->td_subifd;
+            break;
+	case TIFFTAG_YCBCRPOSITIONING:
+            *va_arg(ap, uint16*) = td->td_ycbcrpositioning;
+            break;
+	case TIFFTAG_YCBCRSUBSAMPLING:
+            *va_arg(ap, uint16*) = td->td_ycbcrsubsampling[0];
+            *va_arg(ap, uint16*) = td->td_ycbcrsubsampling[1];
+            break;
+	case TIFFTAG_TRANSFERFUNCTION:
+            *va_arg(ap, uint16**) = td->td_transferfunction[0];
+            if (td->td_samplesperpixel - td->td_extrasamples > 1) {
+                *va_arg(ap, uint16**) = td->td_transferfunction[1];
+                *va_arg(ap, uint16**) = td->td_transferfunction[2];
+            }
+            break;
+	case TIFFTAG_INKNAMES:
+            *va_arg(ap, char**) = td->td_inknames;
+            break;
+        default:
+        {
+            const TIFFFieldInfo* fip = _TIFFFindFieldInfo(tif, tag, TIFF_ANY);
+            int           i;
+            
+            /*
+             * This can happen if multiple images are open with
+             * different codecs which have private tags.  The
+             * global tag information table may then have tags
+             * that are valid for one file but not the other. 
+             * If the client tries to get a tag that is not valid
+             * for the image's codec then we'll arrive here.
+             */
+            if( fip == NULL || fip->field_bit != FIELD_CUSTOM )
+            {
+				TIFFErrorExt(tif->tif_clientdata, "_TIFFVGetField",
+                          "%s: Invalid %stag \"%s\" (not supported by codec)",
+                          tif->tif_name, isPseudoTag(tag) ? "pseudo-" : "",
+                          _TIFFFieldWithTag(tif, tag)->field_name);
+                ret_val = 0;
+                break;
+            }
+
+            /*
+	     * Do we have a custom value?
+	     */
+            ret_val = 0;
+            for (i = 0; i < td->td_customValueCount; i++) {
+		TIFFTagValue *tv = td->td_customValues + i;
+
+		if (tv->info->field_tag != tag)
+			continue;
+                
+		if (fip->field_passcount) {
+			if (fip->field_readcount == TIFF_VARIABLE2) 
+				*va_arg(ap, uint32*) = (uint32)tv->count;
+			else	/* Assume TIFF_VARIABLE */
+				*va_arg(ap, uint16*) = (uint16)tv->count;
+			*va_arg(ap, void **) = tv->value;
+			ret_val = 1;
+                } else {
+			if ((fip->field_type == TIFF_ASCII
+			    || fip->field_readcount == TIFF_VARIABLE
+			    || fip->field_readcount == TIFF_VARIABLE2
+			    || fip->field_readcount == TIFF_SPP
+			    || tv->count > 1)
+			    && fip->field_tag != TIFFTAG_PAGENUMBER
+			    && fip->field_tag != TIFFTAG_HALFTONEHINTS
+			    && fip->field_tag != TIFFTAG_YCBCRSUBSAMPLING
+			    && fip->field_tag != TIFFTAG_DOTRANGE) {
+				*va_arg(ap, void **) = tv->value;
+				ret_val = 1;
+			} else {
+			    int j;
+			    char *val = (char *)tv->value;
+
+			    for (j = 0; j < tv->count;
+				 j++, val += _TIFFDataSize(tv->info->field_type)) {
+				switch (fip->field_type) {
+					case TIFF_BYTE:
+					case TIFF_UNDEFINED:
+						*va_arg(ap, uint8*) =
+							*(uint8 *)val;
+						ret_val = 1;
+						break;
+					case TIFF_SBYTE:
+						*va_arg(ap, int8*) =
+							*(int8 *)val;
+						ret_val = 1;
+						break;
+					case TIFF_SHORT:
+						*va_arg(ap, uint16*) =
+							*(uint16 *)val;
+						ret_val = 1;
+						break;
+					case TIFF_SSHORT:
+						*va_arg(ap, int16*) =
+							*(int16 *)val;
+						ret_val = 1;
+						break;
+					case TIFF_LONG:
+					case TIFF_IFD:
+						*va_arg(ap, uint32*) =
+							*(uint32 *)val;
+						ret_val = 1;
+						break;
+					case TIFF_SLONG:
+						*va_arg(ap, int32*) =
+							*(int32 *)val;
+						ret_val = 1;
+						break;
+					case TIFF_RATIONAL:
+					case TIFF_SRATIONAL:
+					case TIFF_FLOAT:
+						*va_arg(ap, float*) =
+							*(float *)val;
+						ret_val = 1;
+						break;
+					case TIFF_DOUBLE:
+						*va_arg(ap, double*) =
+							*(double *)val;
+						ret_val = 1;
+						break;
+					default:
+						ret_val = 0;
+						break;
+				}
+			    }
+			}
+                }
+		break;
+            }
+        }
+    }
+    return(ret_val);
+}
+
+/*
+ * Return the value of a field in the
+ * internal directory structure.
+ */
+int
+TIFFGetField(TIFF* tif, ttag_t tag, ...)
+{
+	int status;
+	va_list ap;
+
+	va_start(ap, tag);
+	status = TIFFVGetField(tif, tag, ap);
+	va_end(ap);
+	return (status);
+}
+
+/*
+ * Like TIFFGetField, but taking a varargs
+ * parameter list.  This routine is useful
+ * for building higher-level interfaces on
+ * top of the library.
+ */
+int
+TIFFVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	const TIFFFieldInfo* fip = _TIFFFindFieldInfo(tif, tag, TIFF_ANY);
+	return (fip && (isPseudoTag(tag) || TIFFFieldSet(tif, fip->field_bit)) ?
+	    (*tif->tif_tagmethods.vgetfield)(tif, tag, ap) : 0);
+}
+
+#define	CleanupField(member) {		\
+    if (td->member) {			\
+	_TIFFfree(td->member);		\
+	td->member = 0;			\
+    }					\
+}
+
+/*
+ * Release storage associated with a directory.
+ */
+void
+TIFFFreeDirectory(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	int            i;
+
+	_TIFFmemset(td->td_fieldsset, 0, FIELD_SETLONGS);
+	CleanupField(td_colormap[0]);
+	CleanupField(td_colormap[1]);
+	CleanupField(td_colormap[2]);
+	CleanupField(td_sampleinfo);
+	CleanupField(td_subifd);
+	CleanupField(td_inknames);
+	CleanupField(td_transferfunction[0]);
+	CleanupField(td_transferfunction[1]);
+	CleanupField(td_transferfunction[2]);
+	CleanupField(td_stripoffset);
+	CleanupField(td_stripbytecount);
+	TIFFClrFieldBit(tif, FIELD_YCBCRSUBSAMPLING);
+	TIFFClrFieldBit(tif, FIELD_YCBCRPOSITIONING);
+
+	/* Cleanup custom tag values */
+	for( i = 0; i < td->td_customValueCount; i++ ) {
+		if (td->td_customValues[i].value)
+			_TIFFfree(td->td_customValues[i].value);
+	}
+
+	td->td_customValueCount = 0;
+	CleanupField(td_customValues);
+}
+#undef CleanupField
+
+/*
+ * Client Tag extension support (from Niles Ritter).
+ */
+static TIFFExtendProc _TIFFextender = (TIFFExtendProc) NULL;
+
+TIFFExtendProc
+TIFFSetTagExtender(TIFFExtendProc extender)
+{
+	TIFFExtendProc prev = _TIFFextender;
+	_TIFFextender = extender;
+	return (prev);
+}
+
+/*
+ * Setup for a new directory.  Should we automatically call
+ * TIFFWriteDirectory() if the current one is dirty?
+ *
+ * The newly created directory will not exist on the file till
+ * TIFFWriteDirectory(), TIFFFlush() or TIFFClose() is called.
+ */
+int
+TIFFCreateDirectory(TIFF* tif)
+{
+    TIFFDefaultDirectory(tif);
+    tif->tif_diroff = 0;
+    tif->tif_nextdiroff = 0;
+    tif->tif_curoff = 0;
+    tif->tif_row = (uint32) -1;
+    tif->tif_curstrip = (tstrip_t) -1;
+
+    return 0;
+}
+
+/*
+ * Setup a default directory structure.
+ */
+int
+TIFFDefaultDirectory(TIFF* tif)
+{
+	register TIFFDirectory* td = &tif->tif_dir;
+
+	size_t tiffFieldInfoCount;
+	const TIFFFieldInfo *tiffFieldInfo =
+		_TIFFGetFieldInfo(&tiffFieldInfoCount);
+	_TIFFSetupFieldInfo(tif, tiffFieldInfo, tiffFieldInfoCount);
+
+	_TIFFmemset(td, 0, sizeof (*td));
+	td->td_fillorder = FILLORDER_MSB2LSB;
+	td->td_bitspersample = 1;
+	td->td_threshholding = THRESHHOLD_BILEVEL;
+	td->td_orientation = ORIENTATION_TOPLEFT;
+	td->td_samplesperpixel = 1;
+	td->td_rowsperstrip = (uint32) -1;
+	td->td_tilewidth = 0;
+	td->td_tilelength = 0;
+	td->td_tiledepth = 1;
+	td->td_stripbytecountsorted = 1; /* Our own arrays always sorted. */
+	td->td_resolutionunit = RESUNIT_INCH;
+	td->td_sampleformat = SAMPLEFORMAT_UINT;
+	td->td_imagedepth = 1;
+	td->td_ycbcrsubsampling[0] = 2;
+	td->td_ycbcrsubsampling[1] = 2;
+	td->td_ycbcrpositioning = YCBCRPOSITION_CENTERED;
+	tif->tif_postdecode = _TIFFNoPostDecode;
+        tif->tif_foundfield = NULL;
+	tif->tif_tagmethods.vsetfield = _TIFFVSetField;
+	tif->tif_tagmethods.vgetfield = _TIFFVGetField;
+	tif->tif_tagmethods.printdir = NULL;
+	/*
+	 *  Give client code a chance to install their own
+	 *  tag extensions & methods, prior to compression overloads.
+	 */
+	if (_TIFFextender)
+		(*_TIFFextender)(tif);
+	(void) TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
+	/*
+	 * NB: The directory is marked dirty as a result of setting
+	 * up the default compression scheme.  However, this really
+	 * isn't correct -- we want TIFF_DIRTYDIRECT to be set only
+	 * if the user does something.  We could just do the setup
+	 * by hand, but it seems better to use the normal mechanism
+	 * (i.e. TIFFSetField).
+	 */
+	tif->tif_flags &= ~TIFF_DIRTYDIRECT;
+
+        /*
+         * As per http://bugzilla.remotesensing.org/show_bug.cgi?id=19
+         * we clear the ISTILED flag when setting up a new directory.
+         * Should we also be clearing stuff like INSUBIFD?
+         */
+        tif->tif_flags &= ~TIFF_ISTILED;
+
+	return (1);
+}
+
+static int
+TIFFAdvanceDirectory(TIFF* tif, uint32* nextdir, toff_t* off)
+{
+    static const char module[] = "TIFFAdvanceDirectory";
+    uint16 dircount;
+    if (isMapped(tif))
+    {
+        toff_t poff=*nextdir;
+        if (poff+sizeof(uint16) > tif->tif_size)
+        {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Error fetching directory count",
+                      tif->tif_name);
+            return (0);
+        }
+        _TIFFmemcpy(&dircount, tif->tif_base+poff, sizeof (uint16));
+        if (tif->tif_flags & TIFF_SWAB)
+            TIFFSwabShort(&dircount);
+        poff+=sizeof (uint16)+dircount*sizeof (TIFFDirEntry);
+        if (off != NULL)
+            *off = poff;
+        if (((toff_t) (poff+sizeof (uint32))) > tif->tif_size)
+        {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Error fetching directory link",
+                      tif->tif_name);
+            return (0);
+        }
+        _TIFFmemcpy(nextdir, tif->tif_base+poff, sizeof (uint32));
+        if (tif->tif_flags & TIFF_SWAB)
+            TIFFSwabLong(nextdir);
+        return (1);
+    }
+    else
+    {
+        if (!SeekOK(tif, *nextdir) ||
+            !ReadOK(tif, &dircount, sizeof (uint16))) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Error fetching directory count",
+                      tif->tif_name);
+            return (0);
+        }
+        if (tif->tif_flags & TIFF_SWAB)
+            TIFFSwabShort(&dircount);
+        if (off != NULL)
+            *off = TIFFSeekFile(tif,
+                                dircount*sizeof (TIFFDirEntry), SEEK_CUR);
+        else
+            (void) TIFFSeekFile(tif,
+                                dircount*sizeof (TIFFDirEntry), SEEK_CUR);
+        if (!ReadOK(tif, nextdir, sizeof (uint32))) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Error fetching directory link",
+                      tif->tif_name);
+            return (0);
+        }
+        if (tif->tif_flags & TIFF_SWAB)
+            TIFFSwabLong(nextdir);
+        return (1);
+    }
+}
+
+/*
+ * Count the number of directories in a file.
+ */
+tdir_t
+TIFFNumberOfDirectories(TIFF* tif)
+{
+    toff_t nextdir = tif->tif_header.tiff_diroff;
+    tdir_t n = 0;
+    
+    while (nextdir != 0 && TIFFAdvanceDirectory(tif, &nextdir, NULL))
+        n++;
+    return (n);
+}
+
+/*
+ * Set the n-th directory as the current directory.
+ * NB: Directories are numbered starting at 0.
+ */
+int
+TIFFSetDirectory(TIFF* tif, tdir_t dirn)
+{
+	toff_t nextdir;
+	tdir_t n;
+
+	nextdir = tif->tif_header.tiff_diroff;
+	for (n = dirn; n > 0 && nextdir != 0; n--)
+		if (!TIFFAdvanceDirectory(tif, &nextdir, NULL))
+			return (0);
+	tif->tif_nextdiroff = nextdir;
+	/*
+	 * Set curdir to the actual directory index.  The
+	 * -1 is because TIFFReadDirectory will increment
+	 * tif_curdir after successfully reading the directory.
+	 */
+	tif->tif_curdir = (dirn - n) - 1;
+	/*
+	 * Reset tif_dirnumber counter and start new list of seen directories.
+	 * We need this to prevent IFD loops.
+	 */
+	tif->tif_dirnumber = 0;
+	return (TIFFReadDirectory(tif));
+}
+
+/*
+ * Set the current directory to be the directory
+ * located at the specified file offset.  This interface
+ * is used mainly to access directories linked with
+ * the SubIFD tag (e.g. thumbnail images).
+ */
+int
+TIFFSetSubDirectory(TIFF* tif, uint32 diroff)
+{
+	tif->tif_nextdiroff = diroff;
+	/*
+	 * Reset tif_dirnumber counter and start new list of seen directories.
+	 * We need this to prevent IFD loops.
+	 */
+	tif->tif_dirnumber = 0;
+	return (TIFFReadDirectory(tif));
+}
+
+/*
+ * Return file offset of the current directory.
+ */
+uint32
+TIFFCurrentDirOffset(TIFF* tif)
+{
+	return (tif->tif_diroff);
+}
+
+/*
+ * Return an indication of whether or not we are
+ * at the last directory in the file.
+ */
+int
+TIFFLastDirectory(TIFF* tif)
+{
+	return (tif->tif_nextdiroff == 0);
+}
+
+/*
+ * Unlink the specified directory from the directory chain.
+ */
+int
+TIFFUnlinkDirectory(TIFF* tif, tdir_t dirn)
+{
+	static const char module[] = "TIFFUnlinkDirectory";
+	toff_t nextdir;
+	toff_t off;
+	tdir_t n;
+
+	if (tif->tif_mode == O_RDONLY) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+                             "Can not unlink directory in read-only file");
+		return (0);
+	}
+	/*
+	 * Go to the directory before the one we want
+	 * to unlink and nab the offset of the link
+	 * field we'll need to patch.
+	 */
+	nextdir = tif->tif_header.tiff_diroff;
+	off = sizeof (uint16) + sizeof (uint16);
+	for (n = dirn-1; n > 0; n--) {
+		if (nextdir == 0) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Directory %d does not exist", dirn);
+			return (0);
+		}
+		if (!TIFFAdvanceDirectory(tif, &nextdir, &off))
+			return (0);
+	}
+	/*
+	 * Advance to the directory to be unlinked and fetch
+	 * the offset of the directory that follows.
+	 */
+	if (!TIFFAdvanceDirectory(tif, &nextdir, NULL))
+		return (0);
+	/*
+	 * Go back and patch the link field of the preceding
+	 * directory to point to the offset of the directory
+	 * that follows.
+	 */
+	(void) TIFFSeekFile(tif, off, SEEK_SET);
+	if (tif->tif_flags & TIFF_SWAB)
+		TIFFSwabLong(&nextdir);
+	if (!WriteOK(tif, &nextdir, sizeof (uint32))) {
+		TIFFErrorExt(tif->tif_clientdata, module, "Error writing directory link");
+		return (0);
+	}
+	/*
+	 * Leave directory state setup safely.  We don't have
+	 * facilities for doing inserting and removing directories,
+	 * so it's safest to just invalidate everything.  This
+	 * means that the caller can only append to the directory
+	 * chain.
+	 */
+	(*tif->tif_cleanup)(tif);
+	if ((tif->tif_flags & TIFF_MYBUFFER) && tif->tif_rawdata) {
+		_TIFFfree(tif->tif_rawdata);
+		tif->tif_rawdata = NULL;
+		tif->tif_rawcc = 0;
+	}
+	tif->tif_flags &= ~(TIFF_BEENWRITING|TIFF_BUFFERSETUP|TIFF_POSTENCODE);
+	TIFFFreeDirectory(tif);
+	TIFFDefaultDirectory(tif);
+	tif->tif_diroff = 0;			/* force link on next write */
+	tif->tif_nextdiroff = 0;		/* next write must be at end */
+	tif->tif_curoff = 0;
+	tif->tif_row = (uint32) -1;
+	tif->tif_curstrip = (tstrip_t) -1;
+	return (1);
+}
+
+/*			[BFC]
+ *
+ * Author: Bruce Cameron <cameron@petris.com>
+ *
+ * Set a table of tags that are to be replaced during directory process by the
+ * 'IGNORE' state - or return TRUE/FALSE for the requested tag such that
+ * 'ReadDirectory' can use the stored information.
+ *
+ * FIXME: this is never used properly. Should be removed in the future.
+ */
+int
+TIFFReassignTagToIgnore (enum TIFFIgnoreSense task, int TIFFtagID)
+{
+    static int TIFFignoretags [FIELD_LAST];
+    static int tagcount = 0 ;
+    int		i;					/* Loop index */
+    int		j;					/* Loop index */
+
+    switch (task)
+    {
+      case TIS_STORE:
+        if ( tagcount < (FIELD_LAST - 1) )
+        {
+            for ( j = 0 ; j < tagcount ; ++j )
+            {					/* Do not add duplicate tag */
+                if ( TIFFignoretags [j] == TIFFtagID )
+                    return (TRUE) ;
+            }
+            TIFFignoretags [tagcount++] = TIFFtagID ;
+            return (TRUE) ;
+        }
+        break ;
+        
+      case TIS_EXTRACT:
+        for ( i = 0 ; i < tagcount ; ++i )
+        {
+            if ( TIFFignoretags [i] == TIFFtagID )
+                return (TRUE) ;
+        }
+        break;
+        
+      case TIS_EMPTY:
+        tagcount = 0 ;			/* Clear the list */
+        return (TRUE) ;
+        
+      default:
+        break;
+    }
+    
+    return (FALSE);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_dir.h b/src/libtiff/tif_dir.h
new file mode 100644
index 0000000..039a9cc
--- /dev/null
+++ b/src/libtiff/tif_dir.h
@@ -0,0 +1,199 @@
+/* $Id: tif_dir.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _TIFFDIR_
+#define	_TIFFDIR_
+/*
+ * ``Library-private'' Directory-related Definitions.
+ */
+
+/*
+ * Internal format of a TIFF directory entry.
+ */
+typedef	struct {
+#define	FIELD_SETLONGS	4
+	/* bit vector of fields that are set */
+	unsigned long	td_fieldsset[FIELD_SETLONGS];
+
+	uint32	td_imagewidth, td_imagelength, td_imagedepth;
+	uint32	td_tilewidth, td_tilelength, td_tiledepth;
+	uint32	td_subfiletype;
+	uint16	td_bitspersample;
+	uint16	td_sampleformat;
+	uint16	td_compression;
+	uint16	td_photometric;
+	uint16	td_threshholding;
+	uint16	td_fillorder;
+	uint16	td_orientation;
+	uint16	td_samplesperpixel;
+	uint32	td_rowsperstrip;
+	uint16	td_minsamplevalue, td_maxsamplevalue;
+	double	td_sminsamplevalue, td_smaxsamplevalue;
+	float	td_xresolution, td_yresolution;
+	uint16	td_resolutionunit;
+	uint16	td_planarconfig;
+	float	td_xposition, td_yposition;
+	uint16	td_pagenumber[2];
+	uint16*	td_colormap[3];
+	uint16	td_halftonehints[2];
+	uint16	td_extrasamples;
+	uint16*	td_sampleinfo;
+	tstrip_t td_stripsperimage;
+	tstrip_t td_nstrips;		/* size of offset & bytecount arrays */
+	uint32*	td_stripoffset;
+	uint32*	td_stripbytecount;
+	int	td_stripbytecountsorted; /* is the bytecount array sorted ascending? */
+	uint16	td_nsubifd;
+	uint32*	td_subifd;
+	/* YCbCr parameters */
+	uint16	td_ycbcrsubsampling[2];
+	uint16	td_ycbcrpositioning;
+	/* Colorimetry parameters */
+	uint16*	td_transferfunction[3];
+	/* CMYK parameters */
+	int	td_inknameslen;
+	char*	td_inknames;
+
+	int     td_customValueCount;
+        TIFFTagValue *td_customValues;
+} TIFFDirectory;
+
+/*
+ * Field flags used to indicate fields that have
+ * been set in a directory, and to reference fields
+ * when manipulating a directory.
+ */
+
+/*
+ * FIELD_IGNORE is used to signify tags that are to
+ * be processed but otherwise ignored.  This permits
+ * antiquated tags to be quietly read and discarded.
+ * Note that a bit *is* allocated for ignored tags;
+ * this is understood by the directory reading logic
+ * which uses this fact to avoid special-case handling
+ */ 
+#define	FIELD_IGNORE			0
+
+/* multi-item fields */
+#define	FIELD_IMAGEDIMENSIONS		1
+#define FIELD_TILEDIMENSIONS		2
+#define	FIELD_RESOLUTION		3
+#define	FIELD_POSITION			4
+
+/* single-item fields */
+#define	FIELD_SUBFILETYPE		5
+#define	FIELD_BITSPERSAMPLE		6
+#define	FIELD_COMPRESSION		7
+#define	FIELD_PHOTOMETRIC		8
+#define	FIELD_THRESHHOLDING		9
+#define	FIELD_FILLORDER			10
+#define	FIELD_ORIENTATION		15
+#define	FIELD_SAMPLESPERPIXEL		16
+#define	FIELD_ROWSPERSTRIP		17
+#define	FIELD_MINSAMPLEVALUE		18
+#define	FIELD_MAXSAMPLEVALUE		19
+#define	FIELD_PLANARCONFIG		20
+#define	FIELD_RESOLUTIONUNIT		22
+#define	FIELD_PAGENUMBER		23
+#define	FIELD_STRIPBYTECOUNTS		24
+#define	FIELD_STRIPOFFSETS		25
+#define	FIELD_COLORMAP			26
+#define	FIELD_EXTRASAMPLES		31
+#define FIELD_SAMPLEFORMAT		32
+#define	FIELD_SMINSAMPLEVALUE		33
+#define	FIELD_SMAXSAMPLEVALUE		34
+#define FIELD_IMAGEDEPTH		35
+#define FIELD_TILEDEPTH			36
+#define	FIELD_HALFTONEHINTS		37
+#define FIELD_YCBCRSUBSAMPLING		39
+#define FIELD_YCBCRPOSITIONING		40
+#define	FIELD_TRANSFERFUNCTION		44
+#define	FIELD_INKNAMES			46
+#define	FIELD_SUBIFD			49
+/*      FIELD_CUSTOM (see tiffio.h)     65 */
+/* end of support for well-known tags; codec-private tags follow */
+#define	FIELD_CODEC			66	/* base of codec-private tags */
+
+
+/*
+ * Pseudo-tags don't normally need field bits since they
+ * are not written to an output file (by definition).
+ * The library also has express logic to always query a
+ * codec for a pseudo-tag so allocating a field bit for
+ * one is a waste.   If codec wants to promote the notion
+ * of a pseudo-tag being ``set'' or ``unset'' then it can
+ * do using internal state flags without polluting the
+ * field bit space defined for real tags.
+ */
+#define	FIELD_PSEUDO			0
+
+#define	FIELD_LAST			(32*FIELD_SETLONGS-1)
+
+#define	TIFFExtractData(tif, type, v) \
+    ((uint32) ((tif)->tif_header.tiff_magic == TIFF_BIGENDIAN ? \
+        ((v) >> (tif)->tif_typeshift[type]) & (tif)->tif_typemask[type] : \
+	(v) & (tif)->tif_typemask[type]))
+#define	TIFFInsertData(tif, type, v) \
+    ((uint32) ((tif)->tif_header.tiff_magic == TIFF_BIGENDIAN ? \
+        ((v) & (tif)->tif_typemask[type]) << (tif)->tif_typeshift[type] : \
+	(v) & (tif)->tif_typemask[type]))
+
+
+#define BITn(n)				(((unsigned long)1L)<<((n)&0x1f)) 
+#define BITFIELDn(tif, n)		((tif)->tif_dir.td_fieldsset[(n)/32]) 
+#define TIFFFieldSet(tif, field)	(BITFIELDn(tif, field) & BITn(field)) 
+#define TIFFSetFieldBit(tif, field)	(BITFIELDn(tif, field) |= BITn(field))
+#define TIFFClrFieldBit(tif, field)	(BITFIELDn(tif, field) &= ~BITn(field))
+
+#define	FieldSet(fields, f)		(fields[(f)/32] & BITn(f))
+#define	ResetFieldBit(fields, f)	(fields[(f)/32] &= ~BITn(f))
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+extern	const TIFFFieldInfo *_TIFFGetFieldInfo(size_t *);
+extern	const TIFFFieldInfo *_TIFFGetExifFieldInfo(size_t *);
+extern	void _TIFFSetupFieldInfo(TIFF*, const TIFFFieldInfo[], size_t);
+extern	void _TIFFPrintFieldInfo(TIFF*, FILE*);
+extern	TIFFDataType _TIFFSampleToTagType(TIFF*);
+extern  const TIFFFieldInfo* _TIFFFindOrRegisterFieldInfo( TIFF *tif,
+							   ttag_t tag,
+							   TIFFDataType dt );
+extern  TIFFFieldInfo* _TIFFCreateAnonFieldInfo( TIFF *tif, ttag_t tag,
+                                                 TIFFDataType dt );
+
+#define _TIFFMergeFieldInfo	    TIFFMergeFieldInfo
+#define _TIFFFindFieldInfo	    TIFFFindFieldInfo
+#define _TIFFFindFieldInfoByName    TIFFFindFieldInfoByName
+#define _TIFFFieldWithTag	    TIFFFieldWithTag
+#define _TIFFFieldWithName	    TIFFFieldWithName
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* _TIFFDIR_ */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_dirinfo.c b/src/libtiff/tif_dirinfo.c
new file mode 100644
index 0000000..0f9020b
--- /dev/null
+++ b/src/libtiff/tif_dirinfo.c
@@ -0,0 +1,846 @@
+/* $Id: tif_dirinfo.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Core Directory Tag Support.
+ */
+#include "tiffiop.h"
+#include <stdlib.h>
+
+/*
+ * NB: NB: THIS ARRAY IS ASSUMED TO BE SORTED BY TAG.
+ *       If a tag can have both LONG and SHORT types then the LONG must be
+ *       placed before the SHORT for writing to work properly.
+ *
+ * NOTE: The second field (field_readcount) and third field (field_writecount)
+ *       sometimes use the values TIFF_VARIABLE (-1), TIFF_VARIABLE2 (-3)
+ *       and TIFFTAG_SPP (-2). The macros should be used but would throw off 
+ *       the formatting of the code, so please interprete the -1, -2 and -3 
+ *       values accordingly.
+ */
+static const TIFFFieldInfo
+tiffFieldInfo[] = {
+    { TIFFTAG_SUBFILETYPE,	 1, 1,	TIFF_LONG,	FIELD_SUBFILETYPE,
+      1,	0,	"SubfileType" },
+/* XXX SHORT for compatibility w/ old versions of the library */
+    { TIFFTAG_SUBFILETYPE,	 1, 1,	TIFF_SHORT,	FIELD_SUBFILETYPE,
+      1,	0,	"SubfileType" },
+    { TIFFTAG_OSUBFILETYPE,	 1, 1,	TIFF_SHORT,	FIELD_SUBFILETYPE,
+      1,	0,	"OldSubfileType" },
+    { TIFFTAG_IMAGEWIDTH,	 1, 1,	TIFF_LONG,	FIELD_IMAGEDIMENSIONS,
+      0,	0,	"ImageWidth" },
+    { TIFFTAG_IMAGEWIDTH,	 1, 1,	TIFF_SHORT,	FIELD_IMAGEDIMENSIONS,
+      0,	0,	"ImageWidth" },
+    { TIFFTAG_IMAGELENGTH,	 1, 1,	TIFF_LONG,	FIELD_IMAGEDIMENSIONS,
+      1,	0,	"ImageLength" },
+    { TIFFTAG_IMAGELENGTH,	 1, 1,	TIFF_SHORT,	FIELD_IMAGEDIMENSIONS,
+      1,	0,	"ImageLength" },
+    { TIFFTAG_BITSPERSAMPLE,	-1,-1,	TIFF_SHORT,	FIELD_BITSPERSAMPLE,
+      0,	0,	"BitsPerSample" },
+/* XXX LONG for compatibility with some broken TIFF writers */
+    { TIFFTAG_BITSPERSAMPLE,	-1,-1,	TIFF_LONG,	FIELD_BITSPERSAMPLE,
+      0,	0,	"BitsPerSample" },
+    { TIFFTAG_COMPRESSION,	-1, 1,	TIFF_SHORT,	FIELD_COMPRESSION,
+      0,	0,	"Compression" },
+/* XXX LONG for compatibility with some broken TIFF writers */
+    { TIFFTAG_COMPRESSION,	-1, 1,	TIFF_LONG,	FIELD_COMPRESSION,
+      0,	0,	"Compression" },
+    { TIFFTAG_PHOTOMETRIC,	 1, 1,	TIFF_SHORT,	FIELD_PHOTOMETRIC,
+      0,	0,	"PhotometricInterpretation" },
+/* XXX LONG for compatibility with some broken TIFF writers */
+    { TIFFTAG_PHOTOMETRIC,	 1, 1,	TIFF_LONG,	FIELD_PHOTOMETRIC,
+      0,	0,	"PhotometricInterpretation" },
+    { TIFFTAG_THRESHHOLDING,	 1, 1,	TIFF_SHORT,	FIELD_THRESHHOLDING,
+      1,	0,	"Threshholding" },
+    { TIFFTAG_CELLWIDTH,	 1, 1,	TIFF_SHORT,	FIELD_IGNORE,
+      1,	0,	"CellWidth" },
+    { TIFFTAG_CELLLENGTH,	 1, 1,	TIFF_SHORT,	FIELD_IGNORE,
+      1,	0,	"CellLength" },
+    { TIFFTAG_FILLORDER,	 1, 1,	TIFF_SHORT,	FIELD_FILLORDER,
+      0,	0,	"FillOrder" },
+    { TIFFTAG_DOCUMENTNAME,	-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"DocumentName" },
+    { TIFFTAG_IMAGEDESCRIPTION,	-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"ImageDescription" },
+    { TIFFTAG_MAKE,		-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"Make" },
+    { TIFFTAG_MODEL,		-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"Model" },
+    { TIFFTAG_STRIPOFFSETS,	-1,-1,	TIFF_LONG,	FIELD_STRIPOFFSETS,
+      0,	0,	"StripOffsets" },
+    { TIFFTAG_STRIPOFFSETS,	-1,-1,	TIFF_SHORT,	FIELD_STRIPOFFSETS,
+      0,	0,	"StripOffsets" },
+    { TIFFTAG_ORIENTATION,	 1, 1,	TIFF_SHORT,	FIELD_ORIENTATION,
+      0,	0,	"Orientation" },
+    { TIFFTAG_SAMPLESPERPIXEL,	 1, 1,	TIFF_SHORT,	FIELD_SAMPLESPERPIXEL,
+      0,	0,	"SamplesPerPixel" },
+    { TIFFTAG_ROWSPERSTRIP,	 1, 1,	TIFF_LONG,	FIELD_ROWSPERSTRIP,
+      0,	0,	"RowsPerStrip" },
+    { TIFFTAG_ROWSPERSTRIP,	 1, 1,	TIFF_SHORT,	FIELD_ROWSPERSTRIP,
+      0,	0,	"RowsPerStrip" },
+    { TIFFTAG_STRIPBYTECOUNTS,	-1,-1,	TIFF_LONG,	FIELD_STRIPBYTECOUNTS,
+      0,	0,	"StripByteCounts" },
+    { TIFFTAG_STRIPBYTECOUNTS,	-1,-1,	TIFF_SHORT,	FIELD_STRIPBYTECOUNTS,
+      0,	0,	"StripByteCounts" },
+    { TIFFTAG_MINSAMPLEVALUE,	-2,-1,	TIFF_SHORT,	FIELD_MINSAMPLEVALUE,
+      1,	0,	"MinSampleValue" },
+    { TIFFTAG_MAXSAMPLEVALUE,	-2,-1,	TIFF_SHORT,	FIELD_MAXSAMPLEVALUE,
+      1,	0,	"MaxSampleValue" },
+    { TIFFTAG_XRESOLUTION,	 1, 1,	TIFF_RATIONAL,	FIELD_RESOLUTION,
+      1,	0,	"XResolution" },
+    { TIFFTAG_YRESOLUTION,	 1, 1,	TIFF_RATIONAL,	FIELD_RESOLUTION,
+      1,	0,	"YResolution" },
+    { TIFFTAG_PLANARCONFIG,	 1, 1,	TIFF_SHORT,	FIELD_PLANARCONFIG,
+      0,	0,	"PlanarConfiguration" },
+    { TIFFTAG_PAGENAME,		-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"PageName" },
+    { TIFFTAG_XPOSITION,	 1, 1,	TIFF_RATIONAL,	FIELD_POSITION,
+      1,	0,	"XPosition" },
+    { TIFFTAG_YPOSITION,	 1, 1,	TIFF_RATIONAL,	FIELD_POSITION,
+      1,	0,	"YPosition" },
+    { TIFFTAG_FREEOFFSETS,	-1,-1,	TIFF_LONG,	FIELD_IGNORE,
+      0,	0,	"FreeOffsets" },
+    { TIFFTAG_FREEBYTECOUNTS,	-1,-1,	TIFF_LONG,	FIELD_IGNORE,
+      0,	0,	"FreeByteCounts" },
+    { TIFFTAG_GRAYRESPONSEUNIT,	 1, 1,	TIFF_SHORT,	FIELD_IGNORE,
+      1,	0,	"GrayResponseUnit" },
+    { TIFFTAG_GRAYRESPONSECURVE,-1,-1,	TIFF_SHORT,	FIELD_IGNORE,
+      1,	0,	"GrayResponseCurve" },
+    { TIFFTAG_RESOLUTIONUNIT,	 1, 1,	TIFF_SHORT,	FIELD_RESOLUTIONUNIT,
+      1,	0,	"ResolutionUnit" },
+    { TIFFTAG_PAGENUMBER,	 2, 2,	TIFF_SHORT,	FIELD_PAGENUMBER,
+      1,	0,	"PageNumber" },
+    { TIFFTAG_COLORRESPONSEUNIT, 1, 1,	TIFF_SHORT,	FIELD_IGNORE,
+      1,	0,	"ColorResponseUnit" },
+    { TIFFTAG_TRANSFERFUNCTION,	-1,-1,	TIFF_SHORT,	FIELD_TRANSFERFUNCTION,
+      1,	0,	"TransferFunction" },
+    { TIFFTAG_SOFTWARE,		-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"Software" },
+    { TIFFTAG_DATETIME,		20,20,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"DateTime" },
+    { TIFFTAG_ARTIST,		-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"Artist" },
+    { TIFFTAG_HOSTCOMPUTER,	-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"HostComputer" },
+    { TIFFTAG_WHITEPOINT,	 2, 2,	TIFF_RATIONAL,	FIELD_CUSTOM,
+      1,	0,	"WhitePoint" },
+    { TIFFTAG_PRIMARYCHROMATICITIES,6,6,TIFF_RATIONAL,	FIELD_CUSTOM,
+      1,	0,	"PrimaryChromaticities" },
+    { TIFFTAG_COLORMAP,		-1,-1,	TIFF_SHORT,	FIELD_COLORMAP,
+      1,	0,	"ColorMap" },
+    { TIFFTAG_HALFTONEHINTS,	 2, 2,	TIFF_SHORT,	FIELD_HALFTONEHINTS,
+      1,	0,	"HalftoneHints" },
+    { TIFFTAG_TILEWIDTH,	 1, 1,	TIFF_LONG,	FIELD_TILEDIMENSIONS,
+      0,	0,	"TileWidth" },
+    { TIFFTAG_TILEWIDTH,	 1, 1,	TIFF_SHORT,	FIELD_TILEDIMENSIONS,
+      0,	0,	"TileWidth" },
+    { TIFFTAG_TILELENGTH,	 1, 1,	TIFF_LONG,	FIELD_TILEDIMENSIONS,
+      0,	0,	"TileLength" },
+    { TIFFTAG_TILELENGTH,	 1, 1,	TIFF_SHORT,	FIELD_TILEDIMENSIONS,
+      0,	0,	"TileLength" },
+    { TIFFTAG_TILEOFFSETS,	-1, 1,	TIFF_LONG,	FIELD_STRIPOFFSETS,
+      0,	0,	"TileOffsets" },
+    { TIFFTAG_TILEBYTECOUNTS,	-1, 1,	TIFF_LONG,	FIELD_STRIPBYTECOUNTS,
+      0,	0,	"TileByteCounts" },
+    { TIFFTAG_TILEBYTECOUNTS,	-1, 1,	TIFF_SHORT,	FIELD_STRIPBYTECOUNTS,
+      0,	0,	"TileByteCounts" },
+    { TIFFTAG_SUBIFD,		-1,-1,	TIFF_IFD,	FIELD_SUBIFD,
+      1,	1,	"SubIFD" },
+    { TIFFTAG_SUBIFD,		-1,-1,	TIFF_LONG,	FIELD_SUBIFD,
+      1,	1,	"SubIFD" },
+    { TIFFTAG_INKSET,		 1, 1,	TIFF_SHORT,	FIELD_CUSTOM,
+      0,	0,	"InkSet" },
+    { TIFFTAG_INKNAMES,		-1,-1,	TIFF_ASCII,	FIELD_INKNAMES,
+      1,	1,	"InkNames" },
+    { TIFFTAG_NUMBEROFINKS,	 1, 1,	TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"NumberOfInks" },
+    { TIFFTAG_DOTRANGE,		 2, 2,	TIFF_SHORT,	FIELD_CUSTOM,
+      0,	0,	"DotRange" },
+    { TIFFTAG_DOTRANGE,		 2, 2,	TIFF_BYTE,	FIELD_CUSTOM,
+      0,	0,	"DotRange" },
+    { TIFFTAG_TARGETPRINTER,	-1,-1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"TargetPrinter" },
+    { TIFFTAG_EXTRASAMPLES,	-1,-1,	TIFF_SHORT,	FIELD_EXTRASAMPLES,
+      0,	1,	"ExtraSamples" },
+/* XXX for bogus Adobe Photoshop v2.5 files */
+    { TIFFTAG_EXTRASAMPLES,	-1,-1,	TIFF_BYTE,	FIELD_EXTRASAMPLES,
+      0,	1,	"ExtraSamples" },
+    { TIFFTAG_SAMPLEFORMAT,	-1,-1,	TIFF_SHORT,	FIELD_SAMPLEFORMAT,
+      0,	0,	"SampleFormat" },
+    { TIFFTAG_SMINSAMPLEVALUE,	-2,-1,	TIFF_ANY,	FIELD_SMINSAMPLEVALUE,
+      1,	0,	"SMinSampleValue" },
+    { TIFFTAG_SMAXSAMPLEVALUE,	-2,-1,	TIFF_ANY,	FIELD_SMAXSAMPLEVALUE,
+      1,	0,	"SMaxSampleValue" },
+    { TIFFTAG_CLIPPATH,		-1, -3, TIFF_BYTE,	FIELD_CUSTOM,
+      0,	1,	"ClipPath" },
+    { TIFFTAG_XCLIPPATHUNITS,	 1, 1,	TIFF_SLONG,	FIELD_CUSTOM,
+      0,	0,	"XClipPathUnits" },
+    { TIFFTAG_XCLIPPATHUNITS,	 1, 1,	TIFF_SSHORT,	FIELD_CUSTOM,
+      0,	0,	"XClipPathUnits" },
+    { TIFFTAG_XCLIPPATHUNITS,	 1, 1,	TIFF_SBYTE,	FIELD_CUSTOM,
+      0,	0,	"XClipPathUnits" },
+    { TIFFTAG_YCLIPPATHUNITS,	 1, 1,	TIFF_SLONG,	FIELD_CUSTOM,
+      0,	0,	"YClipPathUnits" },
+    { TIFFTAG_YCLIPPATHUNITS,	 1, 1,	TIFF_SSHORT,	FIELD_CUSTOM,
+      0,	0,	"YClipPathUnits" },
+    { TIFFTAG_YCLIPPATHUNITS,	 1, 1,	TIFF_SBYTE,	FIELD_CUSTOM,
+      0,	0,	"YClipPathUnits" },
+    { TIFFTAG_YCBCRCOEFFICIENTS, 3, 3,	TIFF_RATIONAL,	FIELD_CUSTOM,
+      0,	0,	"YCbCrCoefficients" },
+    { TIFFTAG_YCBCRSUBSAMPLING,	 2, 2,	TIFF_SHORT,	FIELD_YCBCRSUBSAMPLING,
+      0,	0,	"YCbCrSubsampling" },
+    { TIFFTAG_YCBCRPOSITIONING,	 1, 1,	TIFF_SHORT,	FIELD_YCBCRPOSITIONING,
+      0,	0,	"YCbCrPositioning" },
+    { TIFFTAG_REFERENCEBLACKWHITE, 6, 6, TIFF_RATIONAL,	FIELD_CUSTOM,
+      1,	0,	"ReferenceBlackWhite" },
+/* XXX temporarily accept LONG for backwards compatibility */
+    { TIFFTAG_REFERENCEBLACKWHITE, 6, 6, TIFF_LONG,	FIELD_CUSTOM,
+      1,	0,	"ReferenceBlackWhite" },
+    { TIFFTAG_XMLPACKET,	-3,-3,	TIFF_BYTE,	FIELD_CUSTOM,
+      0,	1,	"XMLPacket" },
+/* begin SGI tags */
+    { TIFFTAG_MATTEING,		 1, 1,	TIFF_SHORT,	FIELD_EXTRASAMPLES,
+      0,	0,	"Matteing" },
+    { TIFFTAG_DATATYPE,		-2,-1,	TIFF_SHORT,	FIELD_SAMPLEFORMAT,
+      0,	0,	"DataType" },
+    { TIFFTAG_IMAGEDEPTH,	 1, 1,	TIFF_LONG,	FIELD_IMAGEDEPTH,
+      0,	0,	"ImageDepth" },
+    { TIFFTAG_IMAGEDEPTH,	 1, 1,	TIFF_SHORT,	FIELD_IMAGEDEPTH,
+      0,	0,	"ImageDepth" },
+    { TIFFTAG_TILEDEPTH,	 1, 1,	TIFF_LONG,	FIELD_TILEDEPTH,
+      0,	0,	"TileDepth" },
+    { TIFFTAG_TILEDEPTH,	 1, 1,	TIFF_SHORT,	FIELD_TILEDEPTH,
+      0,	0,	"TileDepth" },
+/* end SGI tags */
+/* begin Pixar tags */
+    { TIFFTAG_PIXAR_IMAGEFULLWIDTH,  1, 1, TIFF_LONG,	FIELD_CUSTOM,
+      1,	0,	"ImageFullWidth" },
+    { TIFFTAG_PIXAR_IMAGEFULLLENGTH, 1, 1, TIFF_LONG,	FIELD_CUSTOM,
+      1,	0,	"ImageFullLength" },
+    { TIFFTAG_PIXAR_TEXTUREFORMAT,  -1, -1, TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"TextureFormat" },
+    { TIFFTAG_PIXAR_WRAPMODES,	    -1, -1, TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"TextureWrapModes" },
+    { TIFFTAG_PIXAR_FOVCOT,	     1, 1, TIFF_FLOAT,	FIELD_CUSTOM,
+      1,	0,	"FieldOfViewCotangent" },
+    { TIFFTAG_PIXAR_MATRIX_WORLDTOSCREEN,	16,16,	TIFF_FLOAT,
+      FIELD_CUSTOM,	1,	0,	"MatrixWorldToScreen" },
+    { TIFFTAG_PIXAR_MATRIX_WORLDTOCAMERA,	16,16,	TIFF_FLOAT,
+       FIELD_CUSTOM,	1,	0,	"MatrixWorldToCamera" },
+    { TIFFTAG_COPYRIGHT,	-1, -1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"Copyright" },
+/* end Pixar tags */
+    { TIFFTAG_RICHTIFFIPTC, -3, -3,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,    1,   "RichTIFFIPTC" },
+    { TIFFTAG_PHOTOSHOP,    -3, -3,	TIFF_BYTE,	FIELD_CUSTOM, 
+      0,    1,   "Photoshop" },
+    { TIFFTAG_EXIFIFD,		1, 1,	TIFF_LONG,	FIELD_CUSTOM,
+      0,	0,	"EXIFIFDOffset" },
+    { TIFFTAG_ICCPROFILE,	-3, -3,	TIFF_UNDEFINED,	FIELD_CUSTOM,
+      0,	1,	"ICC Profile" },
+    { TIFFTAG_GPSIFD,		1, 1,	TIFF_LONG,	FIELD_CUSTOM,
+      0,	0,	"GPSIFDOffset" },
+    { TIFFTAG_STONITS,		 1, 1,	TIFF_DOUBLE,	FIELD_CUSTOM,
+      0,	0,	"StoNits" },
+    { TIFFTAG_INTEROPERABILITYIFD, 1, 1, TIFF_LONG,	FIELD_CUSTOM,
+      0,	0,	"InteroperabilityIFDOffset" },
+/* begin DNG tags */
+    { TIFFTAG_DNGVERSION,	4, 4,	TIFF_BYTE,	FIELD_CUSTOM, 
+      0,	0,	"DNGVersion" },
+    { TIFFTAG_DNGBACKWARDVERSION, 4, 4,	TIFF_BYTE,	FIELD_CUSTOM, 
+      0,	0,	"DNGBackwardVersion" },
+    { TIFFTAG_UNIQUECAMERAMODEL,    -1, -1, TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"UniqueCameraModel" },
+    { TIFFTAG_LOCALIZEDCAMERAMODEL, -1, -1, TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"LocalizedCameraModel" },
+    { TIFFTAG_LOCALIZEDCAMERAMODEL, -1, -1, TIFF_BYTE,	FIELD_CUSTOM,
+      1,	1,	"LocalizedCameraModel" },
+    { TIFFTAG_CFAPLANECOLOR,	-1, -1,	TIFF_BYTE,	FIELD_CUSTOM, 
+      0,	1,	"CFAPlaneColor" },
+    { TIFFTAG_CFALAYOUT,	1, 1,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"CFALayout" },
+    { TIFFTAG_LINEARIZATIONTABLE, -1, -1, TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	1,	"LinearizationTable" },
+    { TIFFTAG_BLACKLEVELREPEATDIM, 2, 2, TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"BlackLevelRepeatDim" },
+    { TIFFTAG_BLACKLEVEL,	-1, -1,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	1,	"BlackLevel" },
+    { TIFFTAG_BLACKLEVEL,	-1, -1,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	1,	"BlackLevel" },
+    { TIFFTAG_BLACKLEVEL,	-1, -1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"BlackLevel" },
+    { TIFFTAG_BLACKLEVELDELTAH,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"BlackLevelDeltaH" },
+    { TIFFTAG_BLACKLEVELDELTAV,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"BlackLevelDeltaV" },
+    { TIFFTAG_WHITELEVEL,	-2, -2,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	0,	"WhiteLevel" },
+    { TIFFTAG_WHITELEVEL,	-2, -2,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"WhiteLevel" },
+    { TIFFTAG_DEFAULTSCALE,	2, 2,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"DefaultScale" },
+    { TIFFTAG_BESTQUALITYSCALE,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"BestQualityScale" },
+    { TIFFTAG_DEFAULTCROPORIGIN,	2, 2,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	0,	"DefaultCropOrigin" },
+    { TIFFTAG_DEFAULTCROPORIGIN,	2, 2,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"DefaultCropOrigin" },
+    { TIFFTAG_DEFAULTCROPORIGIN,	2, 2,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"DefaultCropOrigin" },
+    { TIFFTAG_DEFAULTCROPSIZE,	2, 2,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	0,	"DefaultCropSize" },
+    { TIFFTAG_DEFAULTCROPSIZE,	2, 2,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"DefaultCropSize" },
+    { TIFFTAG_DEFAULTCROPSIZE,	2, 2,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"DefaultCropSize" },
+    { TIFFTAG_COLORMATRIX1,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"ColorMatrix1" },
+    { TIFFTAG_COLORMATRIX2,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"ColorMatrix2" },
+    { TIFFTAG_CAMERACALIBRATION1,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"CameraCalibration1" },
+    { TIFFTAG_CAMERACALIBRATION2,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"CameraCalibration2" },
+    { TIFFTAG_REDUCTIONMATRIX1,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"ReductionMatrix1" },
+    { TIFFTAG_REDUCTIONMATRIX2,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"ReductionMatrix2" },
+    { TIFFTAG_ANALOGBALANCE,	-1, -1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"AnalogBalance" },
+    { TIFFTAG_ASSHOTNEUTRAL,	-1, -1,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	1,	"AsShotNeutral" },
+    { TIFFTAG_ASSHOTNEUTRAL,	-1, -1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"AsShotNeutral" },
+    { TIFFTAG_ASSHOTWHITEXY,	2, 2,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"AsShotWhiteXY" },
+    { TIFFTAG_BASELINEEXPOSURE,	1, 1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"BaselineExposure" },
+    { TIFFTAG_BASELINENOISE,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"BaselineNoise" },
+    { TIFFTAG_BASELINESHARPNESS,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"BaselineSharpness" },
+    { TIFFTAG_BAYERGREENSPLIT,	1, 1,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	0,	"BayerGreenSplit" },
+    { TIFFTAG_LINEARRESPONSELIMIT,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"LinearResponseLimit" },
+    { TIFFTAG_CAMERASERIALNUMBER,    -1, -1, TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"CameraSerialNumber" },
+    { TIFFTAG_LENSINFO,	4, 4,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"LensInfo" },
+    { TIFFTAG_CHROMABLURRADIUS,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"ChromaBlurRadius" },
+    { TIFFTAG_ANTIALIASSTRENGTH,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"AntiAliasStrength" },
+    { TIFFTAG_SHADOWSCALE,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      0,	0,	"ShadowScale" },
+    { TIFFTAG_DNGPRIVATEDATA,    -1, -1, TIFF_BYTE,	FIELD_CUSTOM,
+      0,	1,	"DNGPrivateData" },
+    { TIFFTAG_MAKERNOTESAFETY,	1, 1,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"MakerNoteSafety" },
+    { TIFFTAG_CALIBRATIONILLUMINANT1,	1, 1,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"CalibrationIlluminant1" },
+    { TIFFTAG_CALIBRATIONILLUMINANT2,	1, 1,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"CalibrationIlluminant2" },
+    { TIFFTAG_RAWDATAUNIQUEID,	16, 16,	TIFF_BYTE,	FIELD_CUSTOM, 
+      0,	0,	"RawDataUniqueID" },
+    { TIFFTAG_ORIGINALRAWFILENAME,    -1, -1, TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"OriginalRawFileName" },
+    { TIFFTAG_ORIGINALRAWFILENAME,    -1, -1, TIFF_BYTE,	FIELD_CUSTOM,
+      1,	1,	"OriginalRawFileName" },
+    { TIFFTAG_ORIGINALRAWFILEDATA,    -1, -1, TIFF_UNDEFINED,	FIELD_CUSTOM,
+      0,	1,	"OriginalRawFileData" },
+    { TIFFTAG_ACTIVEAREA,	4, 4,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	0,	"ActiveArea" },
+    { TIFFTAG_ACTIVEAREA,	4, 4,	TIFF_SHORT,	FIELD_CUSTOM, 
+      0,	0,	"ActiveArea" },
+    { TIFFTAG_MASKEDAREAS,	-1, -1,	TIFF_LONG,	FIELD_CUSTOM, 
+      0,	1,	"MaskedAreas" },
+    { TIFFTAG_ASSHOTICCPROFILE,    -1, -1, TIFF_UNDEFINED,	FIELD_CUSTOM,
+      0,	1,	"AsShotICCProfile" },
+    { TIFFTAG_ASSHOTPREPROFILEMATRIX,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"AsShotPreProfileMatrix" },
+    { TIFFTAG_CURRENTICCPROFILE,    -1, -1, TIFF_UNDEFINED,	FIELD_CUSTOM,
+      0,	1,	"CurrentICCProfile" },
+    { TIFFTAG_CURRENTPREPROFILEMATRIX,	-1, -1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      0,	1,	"CurrentPreProfileMatrix" },
+/* end DNG tags */
+};
+
+static const TIFFFieldInfo
+exifFieldInfo[] = {
+    { EXIFTAG_EXPOSURETIME,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"ExposureTime" },
+    { EXIFTAG_FNUMBER,		1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"FNumber" },
+    { EXIFTAG_EXPOSUREPROGRAM,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"ExposureProgram" },
+    { EXIFTAG_SPECTRALSENSITIVITY,    -1, -1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"SpectralSensitivity" },
+    { EXIFTAG_ISOSPEEDRATINGS,  -1, -1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	1,	"ISOSpeedRatings" },
+    { EXIFTAG_OECF,	-1, -1,			TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	1,	"OptoelectricConversionFactor" },
+    { EXIFTAG_EXIFVERSION,	4, 4,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	0,	"ExifVersion" },
+    { EXIFTAG_DATETIMEORIGINAL,	20, 20,		TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"DateTimeOriginal" },
+    { EXIFTAG_DATETIMEDIGITIZED, 20, 20,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"DateTimeDigitized" },
+    { EXIFTAG_COMPONENTSCONFIGURATION,	 4, 4,	TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	0,	"ComponentsConfiguration" },
+    { EXIFTAG_COMPRESSEDBITSPERPIXEL,	 1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM,
+      1,	0,	"CompressedBitsPerPixel" },
+    { EXIFTAG_SHUTTERSPEEDVALUE,	1, 1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"ShutterSpeedValue" },
+    { EXIFTAG_APERTUREVALUE,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"ApertureValue" },
+    { EXIFTAG_BRIGHTNESSVALUE,	1, 1,		TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"BrightnessValue" },
+    { EXIFTAG_EXPOSUREBIASVALUE,	1, 1,	TIFF_SRATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"ExposureBiasValue" },
+    { EXIFTAG_MAXAPERTUREVALUE,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"MaxApertureValue" },
+    { EXIFTAG_SUBJECTDISTANCE,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"SubjectDistance" },
+    { EXIFTAG_METERINGMODE,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"MeteringMode" },
+    { EXIFTAG_LIGHTSOURCE,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"LightSource" },
+    { EXIFTAG_FLASH,	1, 1,			TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"Flash" },
+    { EXIFTAG_FOCALLENGTH,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"FocalLength" },
+    { EXIFTAG_SUBJECTAREA,	-1, -1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	1,	"SubjectArea" },
+    { EXIFTAG_MAKERNOTE,	-1, -1,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	1,	"MakerNote" },
+    { EXIFTAG_USERCOMMENT,	-1, -1,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	1,	"UserComment" },
+    { EXIFTAG_SUBSECTIME,    -1, -1,		TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"SubSecTime" },
+    { EXIFTAG_SUBSECTIMEORIGINAL, -1, -1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"SubSecTimeOriginal" },
+    { EXIFTAG_SUBSECTIMEDIGITIZED,-1, -1,	TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"SubSecTimeDigitized" },
+    { EXIFTAG_FLASHPIXVERSION,	4, 4,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	0,	"FlashpixVersion" },
+    { EXIFTAG_PIXELXDIMENSION,	1, 1,		TIFF_LONG,	FIELD_CUSTOM,
+      1,	0,	"PixelXDimension" },
+    { EXIFTAG_PIXELXDIMENSION,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"PixelXDimension" },
+    { EXIFTAG_PIXELYDIMENSION,	1, 1,		TIFF_LONG,	FIELD_CUSTOM,
+      1,	0,	"PixelYDimension" },
+    { EXIFTAG_PIXELYDIMENSION,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"PixelYDimension" },
+    { EXIFTAG_RELATEDSOUNDFILE,	13, 13,		TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"RelatedSoundFile" },
+    { EXIFTAG_FLASHENERGY,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"FlashEnergy" },
+    { EXIFTAG_SPATIALFREQUENCYRESPONSE,	-1, -1,	TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	1,	"SpatialFrequencyResponse" },
+    { EXIFTAG_FOCALPLANEXRESOLUTION,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"FocalPlaneXResolution" },
+    { EXIFTAG_FOCALPLANEYRESOLUTION,	1, 1,	TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"FocalPlaneYResolution" },
+    { EXIFTAG_FOCALPLANERESOLUTIONUNIT,	1, 1,	TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"FocalPlaneResolutionUnit" },
+    { EXIFTAG_SUBJECTLOCATION,	2, 2,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"SubjectLocation" },
+    { EXIFTAG_EXPOSUREINDEX,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"ExposureIndex" },
+    { EXIFTAG_SENSINGMETHOD,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"SensingMethod" },
+    { EXIFTAG_FILESOURCE,	1, 1,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	0,	"FileSource" },
+    { EXIFTAG_SCENETYPE,	1, 1,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	0,	"SceneType" },
+    { EXIFTAG_CFAPATTERN,	-1, -1,		TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	1,	"CFAPattern" },
+    { EXIFTAG_CUSTOMRENDERED,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"CustomRendered" },
+    { EXIFTAG_EXPOSUREMODE,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"ExposureMode" },
+    { EXIFTAG_WHITEBALANCE,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"WhiteBalance" },
+    { EXIFTAG_DIGITALZOOMRATIO,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"DigitalZoomRatio" },
+    { EXIFTAG_FOCALLENGTHIN35MMFILM, 1, 1,	TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"FocalLengthIn35mmFilm" },
+    { EXIFTAG_SCENECAPTURETYPE,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"SceneCaptureType" },
+    { EXIFTAG_GAINCONTROL,	1, 1,		TIFF_RATIONAL,	FIELD_CUSTOM, 
+      1,	0,	"GainControl" },
+    { EXIFTAG_CONTRAST,		1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"Contrast" },
+    { EXIFTAG_SATURATION,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"Saturation" },
+    { EXIFTAG_SHARPNESS,	1, 1,		TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"Sharpness" },
+    { EXIFTAG_DEVICESETTINGDESCRIPTION,	-1, -1,	TIFF_UNDEFINED,	FIELD_CUSTOM,
+      1,	1,	"DeviceSettingDescription" },
+    { EXIFTAG_SUBJECTDISTANCERANGE, 1, 1,	TIFF_SHORT,	FIELD_CUSTOM,
+      1,	0,	"SubjectDistanceRange" },
+    { EXIFTAG_IMAGEUNIQUEID,	33, 33,		TIFF_ASCII,	FIELD_CUSTOM,
+      1,	0,	"ImageUniqueID" }
+};
+
+const TIFFFieldInfo *
+_TIFFGetFieldInfo(size_t *size)
+{
+	*size = TIFFArrayCount(tiffFieldInfo);
+	return tiffFieldInfo;
+}
+
+const TIFFFieldInfo *
+_TIFFGetExifFieldInfo(size_t *size)
+{
+	*size = TIFFArrayCount(exifFieldInfo);
+	return exifFieldInfo;
+}
+
+void
+_TIFFSetupFieldInfo(TIFF* tif, const TIFFFieldInfo info[], size_t n)
+{
+	if (tif->tif_fieldinfo) {
+		size_t  i;
+
+		for (i = 0; i < tif->tif_nfields; i++) 
+		{
+			TIFFFieldInfo *fld = tif->tif_fieldinfo[i];
+			if (fld->field_bit == FIELD_CUSTOM && 
+				strncmp("Tag ", fld->field_name, 4) == 0) {
+					_TIFFfree(fld->field_name);
+					_TIFFfree(fld);
+				}
+		}   
+      
+		_TIFFfree(tif->tif_fieldinfo);
+		tif->tif_nfields = 0;
+	}
+	_TIFFMergeFieldInfo(tif, info, n);
+}
+
+static int
+tagCompare(const void* a, const void* b)
+{
+	const TIFFFieldInfo* ta = *(const TIFFFieldInfo**) a;
+	const TIFFFieldInfo* tb = *(const TIFFFieldInfo**) b;
+	/* NB: be careful of return values for 16-bit platforms */
+	if (ta->field_tag != tb->field_tag)
+		return (ta->field_tag < tb->field_tag ? -1 : 1);
+	else
+		return ((int)tb->field_type - (int)ta->field_type);
+}
+
+static int
+tagNameCompare(const void* a, const void* b)
+{
+	const TIFFFieldInfo* ta = *(const TIFFFieldInfo**) a;
+	const TIFFFieldInfo* tb = *(const TIFFFieldInfo**) b;
+
+        return strcmp(ta->field_name, tb->field_name);
+}
+
+void
+_TIFFMergeFieldInfo(TIFF* tif, const TIFFFieldInfo info[], int n)
+{
+	TIFFFieldInfo** tp;
+	int i;
+
+        tif->tif_foundfield = NULL;
+
+	if (tif->tif_nfields > 0) {
+		tif->tif_fieldinfo = (TIFFFieldInfo**)
+		    _TIFFrealloc(tif->tif_fieldinfo,
+			(tif->tif_nfields+n) * sizeof (TIFFFieldInfo*));
+	} else {
+		tif->tif_fieldinfo = (TIFFFieldInfo**)
+		    _TIFFmalloc(n * sizeof (TIFFFieldInfo*));
+	}
+	assert(tif->tif_fieldinfo != NULL);
+	tp = tif->tif_fieldinfo + tif->tif_nfields;
+	for (i = 0; i < n; i++)
+		*tp++ = (TIFFFieldInfo*) (info + i);	/* XXX */
+
+        /* Sort the field info by tag number */
+        qsort(tif->tif_fieldinfo, tif->tif_nfields += n,
+	      sizeof (TIFFFieldInfo*), tagCompare);
+}
+
+void
+_TIFFPrintFieldInfo(TIFF* tif, FILE* fd)
+{
+	size_t i;
+
+	fprintf(fd, "%s: \n", tif->tif_name);
+	for (i = 0; i < tif->tif_nfields; i++) {
+		const TIFFFieldInfo* fip = tif->tif_fieldinfo[i];
+		fprintf(fd, "field[%2d] %5lu, %2d, %2d, %d, %2d, %5s, %5s, %s\n"
+			, (int)i
+			, (unsigned long) fip->field_tag
+			, fip->field_readcount, fip->field_writecount
+			, fip->field_type
+			, fip->field_bit
+			, fip->field_oktochange ? "TRUE" : "FALSE"
+			, fip->field_passcount ? "TRUE" : "FALSE"
+			, fip->field_name
+		);
+	}
+}
+
+/*
+ * Return size of TIFFDataType in bytes
+ */
+int
+TIFFDataWidth(TIFFDataType type)
+{
+	switch(type)
+	{
+	case 0:  /* nothing */
+	case 1:  /* TIFF_BYTE */
+	case 2:  /* TIFF_ASCII */
+	case 6:  /* TIFF_SBYTE */
+	case 7:  /* TIFF_UNDEFINED */
+		return 1;
+	case 3:  /* TIFF_SHORT */
+	case 8:  /* TIFF_SSHORT */
+		return 2;
+	case 4:  /* TIFF_LONG */
+	case 9:  /* TIFF_SLONG */
+	case 11: /* TIFF_FLOAT */
+        case 13: /* TIFF_IFD */
+		return 4;
+	case 5:  /* TIFF_RATIONAL */
+	case 10: /* TIFF_SRATIONAL */
+	case 12: /* TIFF_DOUBLE */
+		return 8;
+	default:
+		return 0; /* will return 0 for unknown types */
+	}
+}
+
+/*
+ * Return size of TIFFDataType in bytes.
+ *
+ * XXX: We need a separate function to determine the space needed
+ * to store the value. For TIFF_RATIONAL values TIFFDataWidth() returns 8,
+ * but we use 4-byte float to represent rationals.
+ */
+int
+_TIFFDataSize(TIFFDataType type)
+{
+	switch (type) {
+		case TIFF_BYTE:
+		case TIFF_SBYTE:
+		case TIFF_ASCII:
+		case TIFF_UNDEFINED:
+		    return 1;
+		case TIFF_SHORT:
+		case TIFF_SSHORT:
+		    return 2;
+		case TIFF_LONG:
+		case TIFF_SLONG:
+		case TIFF_FLOAT:
+		case TIFF_IFD:
+		case TIFF_RATIONAL:
+		case TIFF_SRATIONAL:
+		    return 4;
+		case TIFF_DOUBLE:
+		    return 8;
+		default:
+		    return 0;
+	}
+}
+
+/*
+ * Return nearest TIFFDataType to the sample type of an image.
+ */
+TIFFDataType
+_TIFFSampleToTagType(TIFF* tif)
+{
+	uint32 bps = TIFFhowmany8(tif->tif_dir.td_bitspersample);
+
+	switch (tif->tif_dir.td_sampleformat) {
+	case SAMPLEFORMAT_IEEEFP:
+		return (bps == 4 ? TIFF_FLOAT : TIFF_DOUBLE);
+	case SAMPLEFORMAT_INT:
+		return (bps <= 1 ? TIFF_SBYTE :
+		    bps <= 2 ? TIFF_SSHORT : TIFF_SLONG);
+	case SAMPLEFORMAT_UINT:
+		return (bps <= 1 ? TIFF_BYTE :
+		    bps <= 2 ? TIFF_SHORT : TIFF_LONG);
+	case SAMPLEFORMAT_VOID:
+		return (TIFF_UNDEFINED);
+	}
+	/*NOTREACHED*/
+	return (TIFF_UNDEFINED);
+}
+
+const TIFFFieldInfo*
+_TIFFFindFieldInfo(TIFF* tif, ttag_t tag, TIFFDataType dt)
+{
+	int i, n;
+
+	if (tif->tif_foundfield && tif->tif_foundfield->field_tag == tag &&
+	    (dt == TIFF_ANY || dt == tif->tif_foundfield->field_type))
+		return (tif->tif_foundfield);
+	/* NB: use sorted search (e.g. binary search) */
+	if(dt != TIFF_ANY) {
+            TIFFFieldInfo key = {0, 0, 0, TIFF_NOTYPE, 0, 0, 0, 0};
+	    TIFFFieldInfo* pkey = &key;
+	    const TIFFFieldInfo **ret;
+
+	    key.field_tag = tag;
+            key.field_type = dt;
+
+	    ret = (const TIFFFieldInfo **) bsearch(&pkey,
+						   tif->tif_fieldinfo, 
+						   tif->tif_nfields,
+						   sizeof(TIFFFieldInfo *), 
+						   tagCompare);
+	    return (ret) ? (*ret) : NULL;
+        } else for (i = 0, n = tif->tif_nfields; i < n; i++) {
+		const TIFFFieldInfo* fip = tif->tif_fieldinfo[i];
+		if (fip->field_tag == tag &&
+		    (dt == TIFF_ANY || fip->field_type == dt))
+			return (tif->tif_foundfield = fip);
+	}
+	return ((const TIFFFieldInfo *)0);
+}
+
+const TIFFFieldInfo*
+_TIFFFindFieldInfoByName(TIFF* tif, const char *field_name, TIFFDataType dt)
+{
+	int i, n;
+
+	if (tif->tif_foundfield
+	    && streq(tif->tif_foundfield->field_name, field_name)
+	    && (dt == TIFF_ANY || dt == tif->tif_foundfield->field_type))
+		return (tif->tif_foundfield);
+	/* NB: use sorted search (e.g. binary search) */
+	if(dt != TIFF_ANY) {
+            TIFFFieldInfo key = {0, 0, 0, TIFF_NOTYPE, 0, 0, 0, 0};
+	    TIFFFieldInfo* pkey = &key;
+	    const TIFFFieldInfo **ret;
+
+            key.field_name = (char *)field_name;
+            key.field_type = dt;
+
+            ret = (const TIFFFieldInfo **) lfind(&pkey,
+						 tif->tif_fieldinfo, 
+						 &tif->tif_nfields,
+						 sizeof(TIFFFieldInfo *),
+						 tagNameCompare);
+	    return (ret) ? (*ret) : NULL;
+        } else
+		for (i = 0, n = tif->tif_nfields; i < n; i++) {
+			const TIFFFieldInfo* fip = tif->tif_fieldinfo[i];
+			if (streq(fip->field_name, field_name) &&
+			    (dt == TIFF_ANY || fip->field_type == dt))
+				return (tif->tif_foundfield = fip);
+		}
+	return ((const TIFFFieldInfo *)0);
+}
+
+const TIFFFieldInfo*
+_TIFFFieldWithTag(TIFF* tif, ttag_t tag)
+{
+	const TIFFFieldInfo* fip = _TIFFFindFieldInfo(tif, tag, TIFF_ANY);
+	if (!fip) {
+		TIFFErrorExt(tif->tif_clientdata, "TIFFFieldWithTag",
+			  "Internal error, unknown tag 0x%x",
+                          (unsigned int) tag);
+		assert(fip != NULL);
+		/*NOTREACHED*/
+	}
+	return (fip);
+}
+
+const TIFFFieldInfo*
+_TIFFFieldWithName(TIFF* tif, const char *field_name)
+{
+	const TIFFFieldInfo* fip =
+		_TIFFFindFieldInfoByName(tif, field_name, TIFF_ANY);
+	if (!fip) {
+		TIFFErrorExt(tif->tif_clientdata, "TIFFFieldWithName",
+			  "Internal error, unknown tag %s", field_name);
+		assert(fip != NULL);
+		/*NOTREACHED*/
+	}
+	return (fip);
+}
+
+const TIFFFieldInfo*
+_TIFFFindOrRegisterFieldInfo( TIFF *tif, ttag_t tag, TIFFDataType dt )
+
+{
+    const TIFFFieldInfo *fld;
+
+    fld = _TIFFFindFieldInfo( tif, tag, dt );
+    if( fld == NULL )
+    {
+        fld = _TIFFCreateAnonFieldInfo( tif, tag, dt );
+        _TIFFMergeFieldInfo( tif, fld, 1 );
+    }
+
+    return fld;
+}
+
+TIFFFieldInfo*
+_TIFFCreateAnonFieldInfo(TIFF *tif, ttag_t tag, TIFFDataType field_type)
+{
+	TIFFFieldInfo *fld;
+	(void) tif;
+
+	fld = (TIFFFieldInfo *) _TIFFmalloc(sizeof (TIFFFieldInfo));
+	if (fld == NULL)
+	    return NULL;
+	_TIFFmemset( fld, 0, sizeof(TIFFFieldInfo) );
+
+	fld->field_tag = tag;
+	fld->field_readcount = TIFF_VARIABLE;
+	fld->field_writecount = TIFF_VARIABLE;
+	fld->field_type = field_type;
+	fld->field_bit = FIELD_CUSTOM;
+	fld->field_oktochange = TRUE;
+	fld->field_passcount = TRUE;
+	fld->field_name = (char *) _TIFFmalloc(32);
+	if (fld->field_name == NULL) {
+	    _TIFFfree(fld);
+	    return NULL;
+	}
+
+	/* note that this name is a special sign to TIFFClose() and
+	 * _TIFFSetupFieldInfo() to free the field
+	 */
+	sprintf(fld->field_name, "Tag %d", (int) tag);
+
+	return fld;    
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_dirread.c b/src/libtiff/tif_dirread.c
new file mode 100644
index 0000000..5c8c708
--- /dev/null
+++ b/src/libtiff/tif_dirread.c
@@ -0,0 +1,1789 @@
+/* $Id: tif_dirread.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Directory Read Support Routines.
+ */
+#include "tiffiop.h"
+
+#define	IGNORE	0		/* tag placeholder used below */
+
+#ifdef HAVE_IEEEFP
+# define	TIFFCvtIEEEFloatToNative(tif, n, fp)
+# define	TIFFCvtIEEEDoubleToNative(tif, n, dp)
+#else
+extern	void TIFFCvtIEEEFloatToNative(TIFF*, uint32, float*);
+extern	void TIFFCvtIEEEDoubleToNative(TIFF*, uint32, double*);
+#endif
+
+static	int EstimateStripByteCounts(TIFF*, TIFFDirEntry*, uint16);
+static	void MissingRequired(TIFF*, const char*);
+static	int CheckDirCount(TIFF*, TIFFDirEntry*, uint32);
+static	tsize_t TIFFFetchData(TIFF*, TIFFDirEntry*, char*);
+static	tsize_t TIFFFetchString(TIFF*, TIFFDirEntry*, char*);
+static	float TIFFFetchRational(TIFF*, TIFFDirEntry*);
+static	int TIFFFetchNormalTag(TIFF*, TIFFDirEntry*);
+static	int TIFFFetchPerSampleShorts(TIFF*, TIFFDirEntry*, uint16*);
+static	int TIFFFetchPerSampleLongs(TIFF*, TIFFDirEntry*, uint32*);
+static	int TIFFFetchPerSampleAnys(TIFF*, TIFFDirEntry*, double*);
+static	int TIFFFetchShortArray(TIFF*, TIFFDirEntry*, uint16*);
+static	int TIFFFetchStripThing(TIFF*, TIFFDirEntry*, long, uint32**);
+static	int TIFFFetchRefBlackWhite(TIFF*, TIFFDirEntry*);
+static	float TIFFFetchFloat(TIFF*, TIFFDirEntry*);
+static	int TIFFFetchFloatArray(TIFF*, TIFFDirEntry*, float*);
+static	int TIFFFetchDoubleArray(TIFF*, TIFFDirEntry*, double*);
+static	int TIFFFetchAnyArray(TIFF*, TIFFDirEntry*, double*);
+static	int TIFFFetchShortPair(TIFF*, TIFFDirEntry*);
+static	void ChopUpSingleUncompressedStrip(TIFF*);
+
+/*
+ * Read the next TIFF directory from a file
+ * and convert it to the internal format.
+ * We read directories sequentially.
+ */
+int
+TIFFReadDirectory(TIFF* tif)
+{
+	static const char module[] = "TIFFReadDirectory";
+
+	int n;
+	TIFFDirectory* td;
+	TIFFDirEntry *dp, *dir = NULL;
+	uint16 iv;
+	uint32 v;
+	const TIFFFieldInfo* fip;
+	size_t fix;
+	uint16 dircount;
+	toff_t nextdiroff;
+	int diroutoforderwarning = 0;
+	toff_t* new_dirlist;
+
+	tif->tif_diroff = tif->tif_nextdiroff;
+	if (tif->tif_diroff == 0)		/* no more directories */
+		return (0);
+
+	/*
+	 * XXX: Trick to prevent IFD looping. The one can create TIFF file
+	 * with looped directory pointers. We will maintain a list of already
+	 * seen directories and check every IFD offset against this list.
+	 */
+	for (n = 0; n < tif->tif_dirnumber; n++) {
+		if (tif->tif_dirlist[n] == tif->tif_diroff)
+			return (0);
+	}
+	tif->tif_dirnumber++;
+	new_dirlist = (toff_t *)_TIFFrealloc(tif->tif_dirlist,
+					tif->tif_dirnumber * sizeof(toff_t));
+	if (!new_dirlist) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+			  "%s: Failed to allocate space for IFD list",
+			  tif->tif_name);
+		return (0);
+	}
+	tif->tif_dirlist = new_dirlist;
+	tif->tif_dirlist[tif->tif_dirnumber - 1] = tif->tif_diroff;
+
+	/*
+	 * Cleanup any previous compression state.
+	 */
+	(*tif->tif_cleanup)(tif);
+	tif->tif_curdir++;
+	nextdiroff = 0;
+	if (!isMapped(tif)) {
+		if (!SeekOK(tif, tif->tif_diroff)) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Seek error accessing TIFF directory",
+                            tif->tif_name);
+			return (0);
+		}
+		if (!ReadOK(tif, &dircount, sizeof (uint16))) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Can not read TIFF directory count",
+                            tif->tif_name);
+			return (0);
+		}
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&dircount);
+		dir = (TIFFDirEntry *)_TIFFCheckMalloc(tif, dircount,
+						       sizeof (TIFFDirEntry),
+						"to read TIFF directory");
+		if (dir == NULL)
+			return (0);
+		if (!ReadOK(tif, dir, dircount*sizeof (TIFFDirEntry))) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+                                  "%.100s: Can not read TIFF directory",
+                                  tif->tif_name);
+			goto bad;
+		}
+		/*
+		 * Read offset to next directory for sequential scans.
+		 */
+		(void) ReadOK(tif, &nextdiroff, sizeof (uint32));
+	} else {
+		toff_t off = tif->tif_diroff;
+
+		if (off + sizeof (uint16) > tif->tif_size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Can not read TIFF directory count",
+                            tif->tif_name);
+			return (0);
+		} else
+			_TIFFmemcpy(&dircount, tif->tif_base + off, sizeof (uint16));
+		off += sizeof (uint16);
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&dircount);
+		dir = (TIFFDirEntry *)_TIFFCheckMalloc(tif, dircount,
+						       sizeof (TIFFDirEntry),
+						"to read TIFF directory");
+		if (dir == NULL)
+			return (0);
+		if (off + dircount*sizeof (TIFFDirEntry) > tif->tif_size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+                                  "%s: Can not read TIFF directory",
+                                  tif->tif_name);
+			goto bad;
+		} else {
+			_TIFFmemcpy(dir, tif->tif_base + off,
+				    dircount*sizeof (TIFFDirEntry));
+		}
+		off += dircount* sizeof (TIFFDirEntry);
+		if (off + sizeof (uint32) <= tif->tif_size)
+			_TIFFmemcpy(&nextdiroff, tif->tif_base+off, sizeof (uint32));
+	}
+	if (tif->tif_flags & TIFF_SWAB)
+		TIFFSwabLong(&nextdiroff);
+	tif->tif_nextdiroff = nextdiroff;
+
+	tif->tif_flags &= ~TIFF_BEENWRITING;	/* reset before new dir */
+	/*
+	 * Setup default value and then make a pass over
+	 * the fields to check type and tag information,
+	 * and to extract info required to size data
+	 * structures.  A second pass is made afterwards
+	 * to read in everthing not taken in the first pass.
+	 */
+	td = &tif->tif_dir;
+	/* free any old stuff and reinit */
+	TIFFFreeDirectory(tif);
+	TIFFDefaultDirectory(tif);
+	/*
+	 * Electronic Arts writes gray-scale TIFF files
+	 * without a PlanarConfiguration directory entry.
+	 * Thus we setup a default value here, even though
+	 * the TIFF spec says there is no default value.
+	 */
+	TIFFSetField(tif, TIFFTAG_PLANARCONFIG, PLANARCONFIG_CONTIG);
+
+	/*
+	 * Sigh, we must make a separate pass through the
+	 * directory for the following reason:
+	 *
+	 * We must process the Compression tag in the first pass
+	 * in order to merge in codec-private tag definitions (otherwise
+	 * we may get complaints about unknown tags).  However, the
+	 * Compression tag may be dependent on the SamplesPerPixel
+	 * tag value because older TIFF specs permited Compression
+	 * to be written as a SamplesPerPixel-count tag entry.
+	 * Thus if we don't first figure out the correct SamplesPerPixel
+	 * tag value then we may end up ignoring the Compression tag
+	 * value because it has an incorrect count value (if the
+	 * true value of SamplesPerPixel is not 1).
+	 *
+	 * It sure would have been nice if Aldus had really thought
+	 * this stuff through carefully.
+	 */ 
+	for (dp = dir, n = dircount; n > 0; n--, dp++) {
+		if (tif->tif_flags & TIFF_SWAB) {
+			TIFFSwabArrayOfShort(&dp->tdir_tag, 2);
+			TIFFSwabArrayOfLong(&dp->tdir_count, 2);
+		}
+		if (dp->tdir_tag == TIFFTAG_SAMPLESPERPIXEL) {
+			if (!TIFFFetchNormalTag(tif, dp))
+				goto bad;
+			dp->tdir_tag = IGNORE;
+		}
+	}
+	/*
+	 * First real pass over the directory.
+	 */
+	fix = 0;
+	for (dp = dir, n = dircount; n > 0; n--, dp++) {
+
+		if (fix >= tif->tif_nfields || dp->tdir_tag == IGNORE)
+			continue;
+               
+		/*
+		 * Silicon Beach (at least) writes unordered
+		 * directory tags (violating the spec).  Handle
+		 * it here, but be obnoxious (maybe they'll fix it?).
+		 */
+		if (dp->tdir_tag < tif->tif_fieldinfo[fix]->field_tag) {
+			if (!diroutoforderwarning) {
+				TIFFWarningExt(tif->tif_clientdata, module,
+	"%s: invalid TIFF directory; tags are not sorted in ascending order",
+					       tif->tif_name);
+				diroutoforderwarning = 1;
+			}
+			fix = 0;			/* O(n^2) */
+		}
+		while (fix < tif->tif_nfields &&
+		       tif->tif_fieldinfo[fix]->field_tag < dp->tdir_tag)
+			fix++;
+		if (fix >= tif->tif_nfields ||
+		    tif->tif_fieldinfo[fix]->field_tag != dp->tdir_tag) {
+
+					TIFFWarningExt(tif->tif_clientdata,
+						       module,
+                        "%s: unknown field with tag %d (0x%x) encountered",
+						       tif->tif_name,
+						       dp->tdir_tag,
+						       dp->tdir_tag,
+						       dp->tdir_type);
+
+                    TIFFMergeFieldInfo(tif,
+                                       _TIFFCreateAnonFieldInfo(tif,
+						dp->tdir_tag,
+						(TIFFDataType) dp->tdir_type),
+				       1 );
+                    fix = 0;
+                    while (fix < tif->tif_nfields &&
+                           tif->tif_fieldinfo[fix]->field_tag < dp->tdir_tag)
+			fix++;
+		}
+		/*
+		 * Null out old tags that we ignore.
+		 */
+		if (tif->tif_fieldinfo[fix]->field_bit == FIELD_IGNORE) {
+	ignore:
+			dp->tdir_tag = IGNORE;
+			continue;
+		}
+		/*
+		 * Check data type.
+		 */
+		fip = tif->tif_fieldinfo[fix];
+		while (dp->tdir_type != (unsigned short) fip->field_type
+                       && fix < tif->tif_nfields) {
+			if (fip->field_type == TIFF_ANY)	/* wildcard */
+				break;
+                        fip = tif->tif_fieldinfo[++fix];
+			if (fix >= tif->tif_nfields ||
+			    fip->field_tag != dp->tdir_tag) {
+				TIFFWarningExt(tif->tif_clientdata, module,
+			"%s: wrong data type %d for \"%s\"; tag ignored",
+					    tif->tif_name, dp->tdir_type,
+					    tif->tif_fieldinfo[fix-1]->field_name);
+				goto ignore;
+			}
+		}
+		/*
+		 * Check count if known in advance.
+		 */
+		if (fip->field_readcount != TIFF_VARIABLE
+		    && fip->field_readcount != TIFF_VARIABLE2) {
+			uint32 expected = (fip->field_readcount == TIFF_SPP) ?
+			    (uint32) td->td_samplesperpixel :
+			    (uint32) fip->field_readcount;
+			if (!CheckDirCount(tif, dp, expected))
+				goto ignore;
+		}
+
+		switch (dp->tdir_tag) {
+		case TIFFTAG_COMPRESSION:
+			/*
+			 * The 5.0 spec says the Compression tag has
+			 * one value, while earlier specs say it has
+			 * one value per sample.  Because of this, we
+			 * accept the tag if one value is supplied.
+			 */
+			if (dp->tdir_count == 1) {
+				v = TIFFExtractData(tif,
+				    dp->tdir_type, dp->tdir_offset);
+				if (!TIFFSetField(tif, dp->tdir_tag, (uint16)v))
+					goto bad;
+				break;
+			/* XXX: workaround for broken TIFFs */
+			} else if (dp->tdir_type == TIFF_LONG) {
+				if (!TIFFFetchPerSampleLongs(tif, dp, &v) ||
+				    !TIFFSetField(tif, dp->tdir_tag, (uint16)v))
+					goto bad;
+			} else {
+				if (!TIFFFetchPerSampleShorts(tif, dp, &iv)
+				    || !TIFFSetField(tif, dp->tdir_tag, iv))
+					goto bad;
+			}
+			dp->tdir_tag = IGNORE;
+			break;
+		case TIFFTAG_STRIPOFFSETS:
+		case TIFFTAG_STRIPBYTECOUNTS:
+		case TIFFTAG_TILEOFFSETS:
+		case TIFFTAG_TILEBYTECOUNTS:
+			TIFFSetFieldBit(tif, fip->field_bit);
+			break;
+		case TIFFTAG_IMAGEWIDTH:
+		case TIFFTAG_IMAGELENGTH:
+		case TIFFTAG_IMAGEDEPTH:
+		case TIFFTAG_TILELENGTH:
+		case TIFFTAG_TILEWIDTH:
+		case TIFFTAG_TILEDEPTH:
+		case TIFFTAG_PLANARCONFIG:
+		case TIFFTAG_ROWSPERSTRIP:
+		case TIFFTAG_EXTRASAMPLES:
+			if (!TIFFFetchNormalTag(tif, dp))
+				goto bad;
+			dp->tdir_tag = IGNORE;
+			break;
+		}
+	}
+
+	/*
+	 * Allocate directory structure and setup defaults.
+	 */
+	if (!TIFFFieldSet(tif, FIELD_IMAGEDIMENSIONS)) {
+		MissingRequired(tif, "ImageLength");
+		goto bad;
+	}
+	/* 
+ 	 * Setup appropriate structures (by strip or by tile)
+	 */
+	if (!TIFFFieldSet(tif, FIELD_TILEDIMENSIONS)) {
+		td->td_nstrips = TIFFNumberOfStrips(tif);
+		td->td_tilewidth = td->td_imagewidth;
+		td->td_tilelength = td->td_rowsperstrip;
+		td->td_tiledepth = td->td_imagedepth;
+		tif->tif_flags &= ~TIFF_ISTILED;
+	} else {
+		td->td_nstrips = TIFFNumberOfTiles(tif);
+		tif->tif_flags |= TIFF_ISTILED;
+	}
+	if (!td->td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+			     "%s: cannot handle zero number of %s",
+			     tif->tif_name, isTiled(tif) ? "tiles" : "strips");
+		goto bad;
+	}
+	td->td_stripsperimage = td->td_nstrips;
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE)
+		td->td_stripsperimage /= td->td_samplesperpixel;
+	if (!TIFFFieldSet(tif, FIELD_STRIPOFFSETS)) {
+		MissingRequired(tif,
+				isTiled(tif) ? "TileOffsets" : "StripOffsets");
+		goto bad;
+	}
+
+	/*
+	 * Second pass: extract other information.
+	 */
+	for (dp = dir, n = dircount; n > 0; n--, dp++) {
+		if (dp->tdir_tag == IGNORE)
+			continue;
+		switch (dp->tdir_tag) {
+		case TIFFTAG_MINSAMPLEVALUE:
+		case TIFFTAG_MAXSAMPLEVALUE:
+		case TIFFTAG_BITSPERSAMPLE:
+		case TIFFTAG_DATATYPE:
+		case TIFFTAG_SAMPLEFORMAT:
+			/*
+			 * The 5.0 spec says the Compression tag has
+			 * one value, while earlier specs say it has
+			 * one value per sample.  Because of this, we
+			 * accept the tag if one value is supplied.
+			 *
+                         * The MinSampleValue, MaxSampleValue, BitsPerSample
+                         * DataType and SampleFormat tags are supposed to be
+                         * written as one value/sample, but some vendors
+                         * incorrectly write one value only -- so we accept
+                         * that as well (yech). Other vendors write correct
+			 * value for NumberOfSamples, but incorrect one for
+			 * BitsPerSample and friends, and we will read this
+			 * too.
+			 */
+			if (dp->tdir_count == 1) {
+				v = TIFFExtractData(tif,
+				    dp->tdir_type, dp->tdir_offset);
+				if (!TIFFSetField(tif, dp->tdir_tag, (uint16)v))
+					goto bad;
+			/* XXX: workaround for broken TIFFs */
+			} else if (dp->tdir_tag == TIFFTAG_BITSPERSAMPLE
+				   && dp->tdir_type == TIFF_LONG) {
+				if (!TIFFFetchPerSampleLongs(tif, dp, &v) ||
+				    !TIFFSetField(tif, dp->tdir_tag, (uint16)v))
+					goto bad;
+			} else {
+				if (!TIFFFetchPerSampleShorts(tif, dp, &iv) ||
+				    !TIFFSetField(tif, dp->tdir_tag, iv))
+					goto bad;
+			}
+			break;
+		case TIFFTAG_SMINSAMPLEVALUE:
+		case TIFFTAG_SMAXSAMPLEVALUE:
+			{
+				double dv = 0.0;
+				if (!TIFFFetchPerSampleAnys(tif, dp, &dv) ||
+				    !TIFFSetField(tif, dp->tdir_tag, dv))
+					goto bad;
+			}
+			break;
+		case TIFFTAG_STRIPOFFSETS:
+		case TIFFTAG_TILEOFFSETS:
+			if (!TIFFFetchStripThing(tif, dp,
+			    td->td_nstrips, &td->td_stripoffset))
+				goto bad;
+			break;
+		case TIFFTAG_STRIPBYTECOUNTS:
+		case TIFFTAG_TILEBYTECOUNTS:
+			if (!TIFFFetchStripThing(tif, dp,
+			    td->td_nstrips, &td->td_stripbytecount))
+				goto bad;
+			break;
+		case TIFFTAG_COLORMAP:
+		case TIFFTAG_TRANSFERFUNCTION:
+			{
+				char* cp;
+				/*
+				 * TransferFunction can have either 1x or 3x
+				 * data values; Colormap can have only 3x
+				 * items.
+				 */
+				v = 1L<<td->td_bitspersample;
+				if (dp->tdir_tag == TIFFTAG_COLORMAP ||
+				    dp->tdir_count != v) {
+					if (!CheckDirCount(tif, dp, 3 * v))
+						break;
+				}
+				v *= sizeof(uint16);
+				cp = (char *)_TIFFCheckMalloc(tif,
+							      dp->tdir_count,
+							      sizeof (uint16),
+					"to read \"TransferFunction\" tag");
+				if (cp != NULL) {
+					if (TIFFFetchData(tif, dp, cp)) {
+						/*
+						 * This deals with there being
+						 * only one array to apply to
+						 * all samples.
+						 */
+						uint32 c = 1L << td->td_bitspersample;
+						if (dp->tdir_count == c)
+							v = 0L;
+						TIFFSetField(tif, dp->tdir_tag,
+						    cp, cp+v, cp+2*v);
+					}
+					_TIFFfree(cp);
+				}
+				break;
+			}
+		case TIFFTAG_PAGENUMBER:
+		case TIFFTAG_HALFTONEHINTS:
+		case TIFFTAG_YCBCRSUBSAMPLING:
+		case TIFFTAG_DOTRANGE:
+			(void) TIFFFetchShortPair(tif, dp);
+			break;
+		case TIFFTAG_REFERENCEBLACKWHITE:
+			(void) TIFFFetchRefBlackWhite(tif, dp);
+			break;
+/* BEGIN REV 4.0 COMPATIBILITY */
+		case TIFFTAG_OSUBFILETYPE:
+			v = 0L;
+			switch (TIFFExtractData(tif, dp->tdir_type,
+			    dp->tdir_offset)) {
+			case OFILETYPE_REDUCEDIMAGE:
+				v = FILETYPE_REDUCEDIMAGE;
+				break;
+			case OFILETYPE_PAGE:
+				v = FILETYPE_PAGE;
+				break;
+			}
+			if (v)
+				TIFFSetField(tif, TIFFTAG_SUBFILETYPE, v);
+			break;
+/* END REV 4.0 COMPATIBILITY */
+		default:
+			(void) TIFFFetchNormalTag(tif, dp);
+			break;
+		}
+	}
+	/*
+	 * Verify Palette image has a Colormap.
+	 */
+	if (td->td_photometric == PHOTOMETRIC_PALETTE &&
+	    !TIFFFieldSet(tif, FIELD_COLORMAP)) {
+		MissingRequired(tif, "Colormap");
+		goto bad;
+	}
+	/*
+	 * Attempt to deal with a missing StripByteCounts tag.
+	 */
+	if (!TIFFFieldSet(tif, FIELD_STRIPBYTECOUNTS)) {
+		/*
+		 * Some manufacturers violate the spec by not giving
+		 * the size of the strips.  In this case, assume there
+		 * is one uncompressed strip of data.
+		 */
+		if ((td->td_planarconfig == PLANARCONFIG_CONTIG &&
+		    td->td_nstrips > 1) ||
+		    (td->td_planarconfig == PLANARCONFIG_SEPARATE &&
+		     td->td_nstrips != td->td_samplesperpixel)) {
+		    MissingRequired(tif, "StripByteCounts");
+		    goto bad;
+		}
+		TIFFWarningExt(tif->tif_clientdata, module,
+			"%s: TIFF directory is missing required "
+			"\"%s\" field, calculating from imagelength",
+			tif->tif_name,
+		        _TIFFFieldWithTag(tif,TIFFTAG_STRIPBYTECOUNTS)->field_name);
+		if (EstimateStripByteCounts(tif, dir, dircount) < 0)
+		    goto bad;
+/* 
+ * Assume we have wrong StripByteCount value (in case of single strip) in
+ * following cases:
+ *   - it is equal to zero along with StripOffset;
+ *   - it is larger than file itself (in case of uncompressed image);
+ *   - it is smaller than the size of the bytes per row multiplied on the
+ *     number of rows.  The last case should not be checked in the case of
+ *     writing new image, because we may do not know the exact strip size
+ *     until the whole image will be written and directory dumped out.
+ */
+#define	BYTECOUNTLOOKSBAD \
+    ( (td->td_stripbytecount[0] == 0 && td->td_stripoffset[0] != 0) || \
+      (td->td_compression == COMPRESSION_NONE && \
+       td->td_stripbytecount[0] > TIFFGetFileSize(tif) - td->td_stripoffset[0]) || \
+      (tif->tif_mode == O_RDONLY && \
+       td->td_compression == COMPRESSION_NONE && \
+       td->td_stripbytecount[0] < TIFFScanlineSize(tif) * td->td_imagelength) )
+
+	} else if (td->td_nstrips == 1 
+                   && td->td_stripoffset[0] != 0 
+                   && BYTECOUNTLOOKSBAD) {
+		/*
+		 * XXX: Plexus (and others) sometimes give a value of zero for
+		 * a tag when they don't know what the correct value is!  Try
+		 * and handle the simple case of estimating the size of a one
+		 * strip image.
+		 */
+		TIFFWarningExt(tif->tif_clientdata, module,
+	"%s: Bogus \"%s\" field, ignoring and calculating from imagelength",
+                            tif->tif_name,
+		            _TIFFFieldWithTag(tif,TIFFTAG_STRIPBYTECOUNTS)->field_name);
+		if(EstimateStripByteCounts(tif, dir, dircount) < 0)
+		    goto bad;
+	} else if (td->td_planarconfig == PLANARCONFIG_CONTIG
+		   && td->td_nstrips > 2
+		   && td->td_compression == COMPRESSION_NONE
+		   && td->td_stripbytecount[0] != td->td_stripbytecount[1]) {
+		/*
+		 * XXX: Some vendors fill StripByteCount array with absolutely
+		 * wrong values (it can be equal to StripOffset array, for
+		 * example). Catch this case here.
+		 */
+		TIFFWarningExt(tif->tif_clientdata, module,
+	"%s: Wrong \"%s\" field, ignoring and calculating from imagelength",
+                            tif->tif_name,
+		            _TIFFFieldWithTag(tif,TIFFTAG_STRIPBYTECOUNTS)->field_name);
+		if (EstimateStripByteCounts(tif, dir, dircount) < 0)
+		    goto bad;
+	}
+	if (dir) {
+		_TIFFfree((char *)dir);
+		dir = NULL;
+	}
+	if (!TIFFFieldSet(tif, FIELD_MAXSAMPLEVALUE))
+		td->td_maxsamplevalue = (uint16)((1L<<td->td_bitspersample)-1);
+	/*
+	 * Setup default compression scheme.
+	 */
+
+	/*
+	 * XXX: We can optimize checking for the strip bounds using the sorted
+	 * bytecounts array. See also comments for TIFFAppendToStrip()
+	 * function in tif_write.c.
+	 */
+	if (td->td_nstrips > 1) {
+		tstrip_t strip;
+
+		td->td_stripbytecountsorted = 1;
+		for (strip = 1; strip < td->td_nstrips; strip++) {
+			if (td->td_stripoffset[strip - 1] >
+			    td->td_stripoffset[strip]) {
+				td->td_stripbytecountsorted = 0;
+				break;
+			}
+		}
+	}
+
+	if (!TIFFFieldSet(tif, FIELD_COMPRESSION))
+		TIFFSetField(tif, TIFFTAG_COMPRESSION, COMPRESSION_NONE);
+        /*
+         * Some manufacturers make life difficult by writing
+	 * large amounts of uncompressed data as a single strip.
+	 * This is contrary to the recommendations of the spec.
+         * The following makes an attempt at breaking such images
+	 * into strips closer to the recommended 8k bytes.  A
+	 * side effect, however, is that the RowsPerStrip tag
+	 * value may be changed.
+         */
+	if (td->td_nstrips == 1 && td->td_compression == COMPRESSION_NONE &&
+	    (tif->tif_flags & (TIFF_STRIPCHOP|TIFF_ISTILED)) == TIFF_STRIPCHOP)
+		ChopUpSingleUncompressedStrip(tif);
+
+	/*
+	 * Reinitialize i/o since we are starting on a new directory.
+	 */
+	tif->tif_row = (uint32) -1;
+	tif->tif_curstrip = (tstrip_t) -1;
+	tif->tif_col = (uint32) -1;
+	tif->tif_curtile = (ttile_t) -1;
+	tif->tif_tilesize = (tsize_t) -1;
+
+	tif->tif_scanlinesize = TIFFScanlineSize(tif);
+	if (!tif->tif_scanlinesize) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: cannot handle zero scanline size",
+			  tif->tif_name);
+		return (0);
+	}
+
+	if (isTiled(tif)) {
+		tif->tif_tilesize = TIFFTileSize(tif);
+		if (!tif->tif_tilesize) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: cannot handle zero tile size",
+				  tif->tif_name);
+			return (0);
+		}
+	} else {
+		if (!TIFFStripSize(tif)) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: cannot handle zero strip size",
+				  tif->tif_name);
+			return (0);
+		}
+	}
+	return (1);
+bad:
+	if (dir)
+		_TIFFfree(dir);
+	return (0);
+}
+
+/* 
+ * Read custom directory from the arbitarry offset.
+ * The code is very similar to TIFFReadDirectory().
+ */
+int
+TIFFReadCustomDirectory(TIFF* tif, toff_t diroff,
+			const TIFFFieldInfo info[], size_t n)
+{
+	static const char module[] = "TIFFReadCustomDirectory";
+
+	TIFFDirectory* td = &tif->tif_dir;
+	TIFFDirEntry *dp, *dir = NULL;
+	const TIFFFieldInfo* fip;
+	size_t fix;
+	uint16 i, dircount;
+
+	_TIFFSetupFieldInfo(tif, info, n);
+
+	tif->tif_diroff = diroff;
+
+	if (!isMapped(tif)) {
+		if (!SeekOK(tif, diroff)) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Seek error accessing TIFF directory",
+                            tif->tif_name);
+			return (0);
+		}
+		if (!ReadOK(tif, &dircount, sizeof (uint16))) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Can not read TIFF directory count",
+                            tif->tif_name);
+			return (0);
+		}
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&dircount);
+		dir = (TIFFDirEntry *)_TIFFCheckMalloc(tif, dircount,
+						       sizeof (TIFFDirEntry),
+					"to read TIFF custom directory");
+		if (dir == NULL)
+			return (0);
+		if (!ReadOK(tif, dir, dircount * sizeof (TIFFDirEntry))) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+                                  "%.100s: Can not read TIFF directory",
+                                  tif->tif_name);
+			goto bad;
+		}
+	} else {
+		toff_t off = diroff;
+
+		if (off + sizeof (uint16) > tif->tif_size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Can not read TIFF directory count",
+                            tif->tif_name);
+			return (0);
+		} else
+			_TIFFmemcpy(&dircount, tif->tif_base + off, sizeof (uint16));
+		off += sizeof (uint16);
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&dircount);
+		dir = (TIFFDirEntry *)_TIFFCheckMalloc(tif, dircount,
+						       sizeof (TIFFDirEntry),
+					"to read TIFF custom directory");
+		if (dir == NULL)
+			return (0);
+		if (off + dircount * sizeof (TIFFDirEntry) > tif->tif_size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+                                  "%s: Can not read TIFF directory",
+                                  tif->tif_name);
+			goto bad;
+		} else {
+			_TIFFmemcpy(dir, tif->tif_base + off,
+				    dircount * sizeof (TIFFDirEntry));
+		}
+	}
+
+	TIFFFreeDirectory(tif);
+
+	fix = 0;
+	for (dp = dir, i = dircount; i > 0; i--, dp++) {
+		if (tif->tif_flags & TIFF_SWAB) {
+			TIFFSwabArrayOfShort(&dp->tdir_tag, 2);
+			TIFFSwabArrayOfLong(&dp->tdir_count, 2);
+		}
+
+		if (fix >= tif->tif_nfields || dp->tdir_tag == IGNORE)
+			continue;
+
+		while (fix < tif->tif_nfields &&
+		       tif->tif_fieldinfo[fix]->field_tag < dp->tdir_tag)
+			fix++;
+
+		if (fix >= tif->tif_nfields ||
+		    tif->tif_fieldinfo[fix]->field_tag != dp->tdir_tag) {
+
+			TIFFWarningExt(tif->tif_clientdata, module,
+                        "%s: unknown field with tag %d (0x%x) encountered",
+				    tif->tif_name, dp->tdir_tag, dp->tdir_tag,
+				    dp->tdir_type);
+
+			TIFFMergeFieldInfo(tif,
+					   _TIFFCreateAnonFieldInfo(tif,
+						dp->tdir_tag,
+						(TIFFDataType)dp->tdir_type),
+					   1);
+
+			fix = 0;
+			while (fix < tif->tif_nfields &&
+			       tif->tif_fieldinfo[fix]->field_tag < dp->tdir_tag)
+				fix++;
+		}
+		/*
+		 * Null out old tags that we ignore.
+		 */
+		if (tif->tif_fieldinfo[fix]->field_bit == FIELD_IGNORE) {
+	ignore:
+			dp->tdir_tag = IGNORE;
+			continue;
+		}
+		/*
+		 * Check data type.
+		 */
+		fip = tif->tif_fieldinfo[fix];
+		while (dp->tdir_type != (unsigned short) fip->field_type
+                       && fix < tif->tif_nfields) {
+			if (fip->field_type == TIFF_ANY)	/* wildcard */
+				break;
+                        fip = tif->tif_fieldinfo[++fix];
+			if (fix >= tif->tif_nfields ||
+			    fip->field_tag != dp->tdir_tag) {
+				TIFFWarningExt(tif->tif_clientdata, module,
+			"%s: wrong data type %d for \"%s\"; tag ignored",
+					    tif->tif_name, dp->tdir_type,
+					    tif->tif_fieldinfo[fix-1]->field_name);
+				goto ignore;
+			}
+		}
+		/*
+		 * Check count if known in advance.
+		 */
+		if (fip->field_readcount != TIFF_VARIABLE
+		    && fip->field_readcount != TIFF_VARIABLE2) {
+			uint32 expected = (fip->field_readcount == TIFF_SPP) ?
+			    (uint32) td->td_samplesperpixel :
+			    (uint32) fip->field_readcount;
+			if (!CheckDirCount(tif, dp, expected))
+				goto ignore;
+		}
+
+		(void) TIFFFetchNormalTag(tif, dp);
+	}
+	
+	if (dir)
+		_TIFFfree(dir);
+	return 1;
+
+bad:
+	if (dir)
+		_TIFFfree(dir);
+	return 0;
+}
+
+/*
+ * EXIF is important special case of custom IFD, so we have a special
+ * function to read it.
+ */
+int
+TIFFReadEXIFDirectory(TIFF* tif, toff_t diroff)
+{
+	size_t exifFieldInfoCount;
+	const TIFFFieldInfo *exifFieldInfo =
+		_TIFFGetExifFieldInfo(&exifFieldInfoCount);
+	return TIFFReadCustomDirectory(tif, diroff, exifFieldInfo,
+				       exifFieldInfoCount);
+}
+
+static int
+EstimateStripByteCounts(TIFF* tif, TIFFDirEntry* dir, uint16 dircount)
+{
+	static const char module[] = "EstimateStripByteCounts";
+
+	register TIFFDirEntry *dp;
+	register TIFFDirectory *td = &tif->tif_dir;
+	uint16 i;
+
+	if (td->td_stripbytecount)
+		_TIFFfree(td->td_stripbytecount);
+	td->td_stripbytecount = (uint32*)
+	    _TIFFCheckMalloc(tif, td->td_nstrips, sizeof (uint32),
+		"for \"StripByteCounts\" array");
+	if (td->td_compression != COMPRESSION_NONE) {
+		uint32 space = (uint32)(sizeof (TIFFHeader)
+		    + sizeof (uint16)
+		    + (dircount * sizeof (TIFFDirEntry))
+		    + sizeof (uint32));
+		toff_t filesize = TIFFGetFileSize(tif);
+		uint16 n;
+
+		/* calculate amount of space used by indirect values */
+		for (dp = dir, n = dircount; n > 0; n--, dp++)
+		{
+			uint32 cc = TIFFDataWidth((TIFFDataType) dp->tdir_type);
+			if (cc == 0) {
+				TIFFErrorExt(tif->tif_clientdata, module,
+			"%s: Cannot determine size of unknown tag type %d",
+					  tif->tif_name, dp->tdir_type);
+				return -1;
+			}
+			cc = cc * dp->tdir_count;
+			if (cc > sizeof (uint32))
+				space += cc;
+		}
+		space = filesize - space;
+		if (td->td_planarconfig == PLANARCONFIG_SEPARATE)
+			space /= td->td_samplesperpixel;
+		for (i = 0; i < td->td_nstrips; i++)
+			td->td_stripbytecount[i] = space;
+		/*
+		 * This gross hack handles the case were the offset to
+		 * the last strip is past the place where we think the strip
+		 * should begin.  Since a strip of data must be contiguous,
+		 * it's safe to assume that we've overestimated the amount
+		 * of data in the strip and trim this number back accordingly.
+		 */ 
+		i--;
+		if (((toff_t)(td->td_stripoffset[i]+td->td_stripbytecount[i]))
+                                                               > filesize)
+			td->td_stripbytecount[i] =
+			    filesize - td->td_stripoffset[i];
+	} else {
+		uint32 rowbytes = TIFFScanlineSize(tif);
+		uint32 rowsperstrip = td->td_imagelength/td->td_stripsperimage;
+		for (i = 0; i < td->td_nstrips; i++)
+			td->td_stripbytecount[i] = rowbytes*rowsperstrip;
+	}
+	TIFFSetFieldBit(tif, FIELD_STRIPBYTECOUNTS);
+	if (!TIFFFieldSet(tif, FIELD_ROWSPERSTRIP))
+		td->td_rowsperstrip = td->td_imagelength;
+	return 1;
+}
+
+static void
+MissingRequired(TIFF* tif, const char* tagname)
+{
+	static const char module[] = "MissingRequired";
+
+	TIFFErrorExt(tif->tif_clientdata, module,
+		  "%s: TIFF directory is missing required \"%s\" field",
+		  tif->tif_name, tagname);
+}
+
+/*
+ * Check the count field of a directory
+ * entry against a known value.  The caller
+ * is expected to skip/ignore the tag if
+ * there is a mismatch.
+ */
+static int
+CheckDirCount(TIFF* tif, TIFFDirEntry* dir, uint32 count)
+{
+	if (count > dir->tdir_count) {
+		TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+	"incorrect count for field \"%s\" (%lu, expecting %lu); tag ignored",
+		    _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name,
+		    dir->tdir_count, count);
+		return (0);
+	} else if (count < dir->tdir_count) {
+		TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+	"incorrect count for field \"%s\" (%lu, expecting %lu); tag trimmed",
+		    _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name,
+		    dir->tdir_count, count);
+		return (1);
+	}
+	return (1);
+}
+
+/*
+ * Fetch a contiguous directory item.
+ */
+static tsize_t
+TIFFFetchData(TIFF* tif, TIFFDirEntry* dir, char* cp)
+{
+	int w = TIFFDataWidth((TIFFDataType) dir->tdir_type);
+	tsize_t cc = dir->tdir_count * w;
+
+	/* Check for overflow. */
+	if (!dir->tdir_count || !w || cc / w != (tsize_t)dir->tdir_count)
+		goto bad;
+
+	if (!isMapped(tif)) {
+		if (!SeekOK(tif, dir->tdir_offset))
+			goto bad;
+		if (!ReadOK(tif, cp, cc))
+			goto bad;
+	} else {
+		/* Check for overflow. */
+		if ((tsize_t)dir->tdir_offset + cc < (tsize_t)dir->tdir_offset
+		    || (tsize_t)dir->tdir_offset + cc < cc
+		    || (tsize_t)dir->tdir_offset + cc > (tsize_t)tif->tif_size)
+			goto bad;
+		_TIFFmemcpy(cp, tif->tif_base + dir->tdir_offset, cc);
+	}
+	if (tif->tif_flags & TIFF_SWAB) {
+		switch (dir->tdir_type) {
+		case TIFF_SHORT:
+		case TIFF_SSHORT:
+			TIFFSwabArrayOfShort((uint16*) cp, dir->tdir_count);
+			break;
+		case TIFF_LONG:
+		case TIFF_SLONG:
+		case TIFF_FLOAT:
+			TIFFSwabArrayOfLong((uint32*) cp, dir->tdir_count);
+			break;
+		case TIFF_RATIONAL:
+		case TIFF_SRATIONAL:
+			TIFFSwabArrayOfLong((uint32*) cp, 2*dir->tdir_count);
+			break;
+		case TIFF_DOUBLE:
+			TIFFSwabArrayOfDouble((double*) cp, dir->tdir_count);
+			break;
+		}
+	}
+	return (cc);
+bad:
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		     "Error fetching data for field \"%s\"",
+		     _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name);
+	return (tsize_t) 0;
+}
+
+/*
+ * Fetch an ASCII item from the file.
+ */
+static tsize_t
+TIFFFetchString(TIFF* tif, TIFFDirEntry* dir, char* cp)
+{
+	if (dir->tdir_count <= 4) {
+		uint32 l = dir->tdir_offset;
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabLong(&l);
+		_TIFFmemcpy(cp, &l, dir->tdir_count);
+		return (1);
+	}
+	return (TIFFFetchData(tif, dir, cp));
+}
+
+/*
+ * Convert numerator+denominator to float.
+ */
+static int
+cvtRational(TIFF* tif, TIFFDirEntry* dir, uint32 num, uint32 denom, float* rv)
+{
+	if (denom == 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "%s: Rational with zero denominator (num = %lu)",
+		    _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name, num);
+		return (0);
+	} else {
+		if (dir->tdir_type == TIFF_RATIONAL)
+			*rv = ((float)num / (float)denom);
+		else
+			*rv = ((float)(int32)num / (float)(int32)denom);
+		return (1);
+	}
+}
+
+/*
+ * Fetch a rational item from the file
+ * at offset off and return the value
+ * as a floating point number.
+ */
+static float
+TIFFFetchRational(TIFF* tif, TIFFDirEntry* dir)
+{
+	uint32 l[2];
+	float v;
+
+	return (!TIFFFetchData(tif, dir, (char *)l) ||
+	    !cvtRational(tif, dir, l[0], l[1], &v) ? 1.0f : v);
+}
+
+/*
+ * Fetch a single floating point value
+ * from the offset field and return it
+ * as a native float.
+ */
+static float
+TIFFFetchFloat(TIFF* tif, TIFFDirEntry* dir)
+{
+	float v;
+	int32 l = TIFFExtractData(tif, dir->tdir_type, dir->tdir_offset);
+        _TIFFmemcpy(&v, &l, sizeof(float));
+	TIFFCvtIEEEFloatToNative(tif, 1, &v);
+	return (v);
+}
+
+/*
+ * Fetch an array of BYTE or SBYTE values.
+ */
+static int
+TIFFFetchByteArray(TIFF* tif, TIFFDirEntry* dir, uint8* v)
+{
+    if (dir->tdir_count <= 4) {
+        /*
+         * Extract data from offset field.
+         */
+        if (tif->tif_header.tiff_magic == TIFF_BIGENDIAN) {
+	    if (dir->tdir_type == TIFF_SBYTE)
+                switch (dir->tdir_count) {
+                    case 4: v[3] = dir->tdir_offset & 0xff;
+                    case 3: v[2] = (dir->tdir_offset >> 8) & 0xff;
+                    case 2: v[1] = (dir->tdir_offset >> 16) & 0xff;
+		    case 1: v[0] = dir->tdir_offset >> 24;
+                }
+	    else
+                switch (dir->tdir_count) {
+                    case 4: v[3] = dir->tdir_offset & 0xff;
+                    case 3: v[2] = (dir->tdir_offset >> 8) & 0xff;
+                    case 2: v[1] = (dir->tdir_offset >> 16) & 0xff;
+		    case 1: v[0] = dir->tdir_offset >> 24;
+                }
+	} else {
+	    if (dir->tdir_type == TIFF_SBYTE)
+                switch (dir->tdir_count) {
+                    case 4: v[3] = dir->tdir_offset >> 24;
+                    case 3: v[2] = (dir->tdir_offset >> 16) & 0xff;
+                    case 2: v[1] = (dir->tdir_offset >> 8) & 0xff;
+                    case 1: v[0] = dir->tdir_offset & 0xff;
+		}
+	    else
+                switch (dir->tdir_count) {
+                    case 4: v[3] = dir->tdir_offset >> 24;
+                    case 3: v[2] = (dir->tdir_offset >> 16) & 0xff;
+                    case 2: v[1] = (dir->tdir_offset >> 8) & 0xff;
+                    case 1: v[0] = dir->tdir_offset & 0xff;
+		}
+	}
+        return (1);
+    } else
+        return (TIFFFetchData(tif, dir, (char*) v) != 0);	/* XXX */
+}
+
+/*
+ * Fetch an array of SHORT or SSHORT values.
+ */
+static int
+TIFFFetchShortArray(TIFF* tif, TIFFDirEntry* dir, uint16* v)
+{
+	if (dir->tdir_count <= 2) {
+		if (tif->tif_header.tiff_magic == TIFF_BIGENDIAN) {
+			switch (dir->tdir_count) {
+			case 2: v[1] = (uint16) (dir->tdir_offset & 0xffff);
+			case 1: v[0] = (uint16) (dir->tdir_offset >> 16);
+			}
+		} else {
+			switch (dir->tdir_count) {
+			case 2: v[1] = (uint16) (dir->tdir_offset >> 16);
+			case 1: v[0] = (uint16) (dir->tdir_offset & 0xffff);
+			}
+		}
+		return (1);
+	} else
+		return (TIFFFetchData(tif, dir, (char *)v) != 0);
+}
+
+/*
+ * Fetch a pair of SHORT or BYTE values. Some tags may have either BYTE
+ * or SHORT type and this function works with both ones.
+ */
+static int
+TIFFFetchShortPair(TIFF* tif, TIFFDirEntry* dir)
+{
+	switch (dir->tdir_type) {
+		case TIFF_BYTE:
+		case TIFF_SBYTE:
+			{
+			uint8 v[4];
+			return TIFFFetchByteArray(tif, dir, v)
+				&& TIFFSetField(tif, dir->tdir_tag, v[0], v[1]);
+			}
+		case TIFF_SHORT:
+		case TIFF_SSHORT:
+			{
+			uint16 v[2];
+			return TIFFFetchShortArray(tif, dir, v)
+				&& TIFFSetField(tif, dir->tdir_tag, v[0], v[1]);
+			}
+		default:
+			return 0;
+	}
+}
+
+/*
+ * Fetch an array of LONG or SLONG values.
+ */
+static int
+TIFFFetchLongArray(TIFF* tif, TIFFDirEntry* dir, uint32* v)
+{
+	if (dir->tdir_count == 1) {
+		v[0] = dir->tdir_offset;
+		return (1);
+	} else
+		return (TIFFFetchData(tif, dir, (char*) v) != 0);
+}
+
+/*
+ * Fetch an array of RATIONAL or SRATIONAL values.
+ */
+static int
+TIFFFetchRationalArray(TIFF* tif, TIFFDirEntry* dir, float* v)
+{
+	int ok = 0;
+	uint32* l;
+
+	l = (uint32*)_TIFFCheckMalloc(tif,
+	    dir->tdir_count, TIFFDataWidth((TIFFDataType) dir->tdir_type),
+	    "to fetch array of rationals");
+	if (l) {
+		if (TIFFFetchData(tif, dir, (char *)l)) {
+			uint32 i;
+			for (i = 0; i < dir->tdir_count; i++) {
+				ok = cvtRational(tif, dir,
+				    l[2*i+0], l[2*i+1], &v[i]);
+				if (!ok)
+					break;
+			}
+		}
+		_TIFFfree((char *)l);
+	}
+	return (ok);
+}
+
+/*
+ * Fetch an array of FLOAT values.
+ */
+static int
+TIFFFetchFloatArray(TIFF* tif, TIFFDirEntry* dir, float* v)
+{
+
+	if (dir->tdir_count == 1) {
+		v[0] = *(float*) &dir->tdir_offset;
+		TIFFCvtIEEEFloatToNative(tif, dir->tdir_count, v);
+		return (1);
+	} else	if (TIFFFetchData(tif, dir, (char*) v)) {
+		TIFFCvtIEEEFloatToNative(tif, dir->tdir_count, v);
+		return (1);
+	} else
+		return (0);
+}
+
+/*
+ * Fetch an array of DOUBLE values.
+ */
+static int
+TIFFFetchDoubleArray(TIFF* tif, TIFFDirEntry* dir, double* v)
+{
+	if (TIFFFetchData(tif, dir, (char*) v)) {
+		TIFFCvtIEEEDoubleToNative(tif, dir->tdir_count, v);
+		return (1);
+	} else
+		return (0);
+}
+
+/*
+ * Fetch an array of ANY values.  The actual values are
+ * returned as doubles which should be able hold all the
+ * types.  Yes, there really should be an tany_t to avoid
+ * this potential non-portability ...  Note in particular
+ * that we assume that the double return value vector is
+ * large enough to read in any fundamental type.  We use
+ * that vector as a buffer to read in the base type vector
+ * and then convert it in place to double (from end
+ * to front of course).
+ */
+static int
+TIFFFetchAnyArray(TIFF* tif, TIFFDirEntry* dir, double* v)
+{
+	int i;
+
+	switch (dir->tdir_type) {
+	case TIFF_BYTE:
+	case TIFF_SBYTE:
+		if (!TIFFFetchByteArray(tif, dir, (uint8*) v))
+			return (0);
+		if (dir->tdir_type == TIFF_BYTE) {
+			uint8* vp = (uint8*) v;
+			for (i = dir->tdir_count-1; i >= 0; i--)
+				v[i] = vp[i];
+		} else {
+			int8* vp = (int8*) v;
+			for (i = dir->tdir_count-1; i >= 0; i--)
+				v[i] = vp[i];
+		}
+		break;
+	case TIFF_SHORT:
+	case TIFF_SSHORT:
+		if (!TIFFFetchShortArray(tif, dir, (uint16*) v))
+			return (0);
+		if (dir->tdir_type == TIFF_SHORT) {
+			uint16* vp = (uint16*) v;
+			for (i = dir->tdir_count-1; i >= 0; i--)
+				v[i] = vp[i];
+		} else {
+			int16* vp = (int16*) v;
+			for (i = dir->tdir_count-1; i >= 0; i--)
+				v[i] = vp[i];
+		}
+		break;
+	case TIFF_LONG:
+	case TIFF_SLONG:
+		if (!TIFFFetchLongArray(tif, dir, (uint32*) v))
+			return (0);
+		if (dir->tdir_type == TIFF_LONG) {
+			uint32* vp = (uint32*) v;
+			for (i = dir->tdir_count-1; i >= 0; i--)
+				v[i] = vp[i];
+		} else {
+			int32* vp = (int32*) v;
+			for (i = dir->tdir_count-1; i >= 0; i--)
+				v[i] = vp[i];
+		}
+		break;
+	case TIFF_RATIONAL:
+	case TIFF_SRATIONAL:
+		if (!TIFFFetchRationalArray(tif, dir, (float*) v))
+			return (0);
+		{ float* vp = (float*) v;
+		  for (i = dir->tdir_count-1; i >= 0; i--)
+			v[i] = vp[i];
+		}
+		break;
+	case TIFF_FLOAT:
+		if (!TIFFFetchFloatArray(tif, dir, (float*) v))
+			return (0);
+		{ float* vp = (float*) v;
+		  for (i = dir->tdir_count-1; i >= 0; i--)
+			v[i] = vp[i];
+		}
+		break;
+	case TIFF_DOUBLE:
+		return (TIFFFetchDoubleArray(tif, dir, (double*) v));
+	default:
+		/* TIFF_NOTYPE */
+		/* TIFF_ASCII */
+		/* TIFF_UNDEFINED */
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			     "cannot read TIFF_ANY type %d for field \"%s\"",
+			     dir->tdir_type,
+			     _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name);
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Fetch a tag that is not handled by special case code.
+ */
+static int
+TIFFFetchNormalTag(TIFF* tif, TIFFDirEntry* dp)
+{
+	static const char mesg[] = "to fetch tag value";
+	int ok = 0;
+	const TIFFFieldInfo* fip = _TIFFFieldWithTag(tif, dp->tdir_tag);
+
+	if (dp->tdir_count > 1) {		/* array of values */
+		char* cp = NULL;
+
+		switch (dp->tdir_type) {
+		case TIFF_BYTE:
+		case TIFF_SBYTE:
+			cp = (char *)_TIFFCheckMalloc(tif,
+			    dp->tdir_count, sizeof (uint8), mesg);
+			ok = cp && TIFFFetchByteArray(tif, dp, (uint8*) cp);
+			break;
+		case TIFF_SHORT:
+		case TIFF_SSHORT:
+			cp = (char *)_TIFFCheckMalloc(tif,
+			    dp->tdir_count, sizeof (uint16), mesg);
+			ok = cp && TIFFFetchShortArray(tif, dp, (uint16*) cp);
+			break;
+		case TIFF_LONG:
+		case TIFF_SLONG:
+			cp = (char *)_TIFFCheckMalloc(tif,
+			    dp->tdir_count, sizeof (uint32), mesg);
+			ok = cp && TIFFFetchLongArray(tif, dp, (uint32*) cp);
+			break;
+		case TIFF_RATIONAL:
+		case TIFF_SRATIONAL:
+			cp = (char *)_TIFFCheckMalloc(tif,
+			    dp->tdir_count, sizeof (float), mesg);
+			ok = cp && TIFFFetchRationalArray(tif, dp, (float*) cp);
+			break;
+		case TIFF_FLOAT:
+			cp = (char *)_TIFFCheckMalloc(tif,
+			    dp->tdir_count, sizeof (float), mesg);
+			ok = cp && TIFFFetchFloatArray(tif, dp, (float*) cp);
+			break;
+		case TIFF_DOUBLE:
+			cp = (char *)_TIFFCheckMalloc(tif,
+			    dp->tdir_count, sizeof (double), mesg);
+			ok = cp && TIFFFetchDoubleArray(tif, dp, (double*) cp);
+			break;
+		case TIFF_ASCII:
+		case TIFF_UNDEFINED:		/* bit of a cheat... */
+			/*
+			 * Some vendors write strings w/o the trailing
+			 * NULL byte, so always append one just in case.
+			 */
+			cp = (char *)_TIFFCheckMalloc(tif, dp->tdir_count + 1,
+						      1, mesg);
+			if( (ok = (cp && TIFFFetchString(tif, dp, cp))) != 0 )
+				cp[dp->tdir_count] = '\0';	/* XXX */
+			break;
+		}
+		if (ok) {
+			ok = (fip->field_passcount ?
+			    TIFFSetField(tif, dp->tdir_tag, dp->tdir_count, cp)
+			  : TIFFSetField(tif, dp->tdir_tag, cp));
+		}
+		if (cp != NULL)
+			_TIFFfree(cp);
+	} else if (CheckDirCount(tif, dp, 1)) {	/* singleton value */
+		switch (dp->tdir_type) {
+		case TIFF_BYTE:
+		case TIFF_SBYTE:
+		case TIFF_SHORT:
+		case TIFF_SSHORT:
+			/*
+			 * If the tag is also acceptable as a LONG or SLONG
+			 * then TIFFSetField will expect an uint32 parameter
+			 * passed to it (through varargs).  Thus, for machines
+			 * where sizeof (int) != sizeof (uint32) we must do
+			 * a careful check here.  It's hard to say if this
+			 * is worth optimizing.
+			 *
+			 * NB: We use TIFFFieldWithTag here knowing that
+			 *     it returns us the first entry in the table
+			 *     for the tag and that that entry is for the
+			 *     widest potential data type the tag may have.
+			 */
+			{ TIFFDataType type = fip->field_type;
+			  if (type != TIFF_LONG && type != TIFF_SLONG) {
+				uint16 v = (uint16)
+			   TIFFExtractData(tif, dp->tdir_type, dp->tdir_offset);
+				ok = (fip->field_passcount ?
+				    TIFFSetField(tif, dp->tdir_tag, 1, &v)
+				  : TIFFSetField(tif, dp->tdir_tag, v));
+				break;
+			  }
+			}
+			/* fall thru... */
+		case TIFF_LONG:
+		case TIFF_SLONG:
+			{ uint32 v32 =
+		    TIFFExtractData(tif, dp->tdir_type, dp->tdir_offset);
+			  ok = (fip->field_passcount ? 
+			      TIFFSetField(tif, dp->tdir_tag, 1, &v32)
+			    : TIFFSetField(tif, dp->tdir_tag, v32));
+			}
+			break;
+		case TIFF_RATIONAL:
+		case TIFF_SRATIONAL:
+		case TIFF_FLOAT:
+			{ float v = (dp->tdir_type == TIFF_FLOAT ? 
+			      TIFFFetchFloat(tif, dp)
+			    : TIFFFetchRational(tif, dp));
+			  ok = (fip->field_passcount ?
+			      TIFFSetField(tif, dp->tdir_tag, 1, &v)
+			    : TIFFSetField(tif, dp->tdir_tag, v));
+			}
+			break;
+		case TIFF_DOUBLE:
+			{ double v;
+			  ok = (TIFFFetchDoubleArray(tif, dp, &v) &&
+			    (fip->field_passcount ?
+			      TIFFSetField(tif, dp->tdir_tag, 1, &v)
+			    : TIFFSetField(tif, dp->tdir_tag, v))
+			  );
+			}
+			break;
+		case TIFF_ASCII:
+		case TIFF_UNDEFINED:		/* bit of a cheat... */
+			{ char c[2];
+			  if( (ok = (TIFFFetchString(tif, dp, c) != 0)) != 0 ) {
+				c[1] = '\0';		/* XXX paranoid */
+				ok = (fip->field_passcount ?
+					TIFFSetField(tif, dp->tdir_tag, 1, c)
+				      : TIFFSetField(tif, dp->tdir_tag, c));
+			  }
+			}
+			break;
+		}
+	}
+	return (ok);
+}
+
+#define	NITEMS(x)	(sizeof (x) / sizeof (x[0]))
+/*
+ * Fetch samples/pixel short values for 
+ * the specified tag and verify that
+ * all values are the same.
+ */
+static int
+TIFFFetchPerSampleShorts(TIFF* tif, TIFFDirEntry* dir, uint16* pl)
+{
+    uint16 samples = tif->tif_dir.td_samplesperpixel;
+    int status = 0;
+
+    if (CheckDirCount(tif, dir, (uint32) samples)) {
+        uint16 buf[10];
+        uint16* v = buf;
+
+        if (dir->tdir_count > NITEMS(buf))
+            v = (uint16*) _TIFFCheckMalloc(tif, dir->tdir_count, sizeof(uint16),
+                                      "to fetch per-sample values");
+        if (v && TIFFFetchShortArray(tif, dir, v)) {
+            uint16 i;
+            int check_count = dir->tdir_count;
+            if( samples < check_count )
+                check_count = samples;
+
+            for (i = 1; i < check_count; i++)
+                if (v[i] != v[0]) {
+					TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+                              "Cannot handle different per-sample values for field \"%s\"",
+                              _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name);
+                    goto bad;
+                }
+            *pl = v[0];
+            status = 1;
+        }
+      bad:
+        if (v && v != buf)
+            _TIFFfree(v);
+    }
+    return (status);
+}
+
+/*
+ * Fetch samples/pixel long values for 
+ * the specified tag and verify that
+ * all values are the same.
+ */
+static int
+TIFFFetchPerSampleLongs(TIFF* tif, TIFFDirEntry* dir, uint32* pl)
+{
+    uint16 samples = tif->tif_dir.td_samplesperpixel;
+    int status = 0;
+
+    if (CheckDirCount(tif, dir, (uint32) samples)) {
+        uint32 buf[10];
+        uint32* v = buf;
+
+        if (dir->tdir_count > NITEMS(buf))
+            v = (uint32*) _TIFFCheckMalloc(tif, dir->tdir_count, sizeof(uint32),
+                                      "to fetch per-sample values");
+        if (v && TIFFFetchLongArray(tif, dir, v)) {
+            uint16 i;
+            int check_count = dir->tdir_count;
+
+            if( samples < check_count )
+                check_count = samples;
+            for (i = 1; i < check_count; i++)
+                if (v[i] != v[0]) {
+					TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+                              "Cannot handle different per-sample values for field \"%s\"",
+                              _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name);
+                    goto bad;
+                }
+            *pl = v[0];
+            status = 1;
+        }
+      bad:
+        if (v && v != buf)
+            _TIFFfree(v);
+    }
+    return (status);
+}
+
+/*
+ * Fetch samples/pixel ANY values for the specified tag and verify that all
+ * values are the same.
+ */
+static int
+TIFFFetchPerSampleAnys(TIFF* tif, TIFFDirEntry* dir, double* pl)
+{
+    uint16 samples = tif->tif_dir.td_samplesperpixel;
+    int status = 0;
+
+    if (CheckDirCount(tif, dir, (uint32) samples)) {
+        double buf[10];
+        double* v = buf;
+
+        if (dir->tdir_count > NITEMS(buf))
+            v = (double*) _TIFFCheckMalloc(tif, dir->tdir_count, sizeof (double),
+                                      "to fetch per-sample values");
+        if (v && TIFFFetchAnyArray(tif, dir, v)) {
+            uint16 i;
+            int check_count = dir->tdir_count;
+            if( samples < check_count )
+                check_count = samples;
+
+            for (i = 1; i < check_count; i++)
+                if (v[i] != v[0]) {
+                    TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+                              "Cannot handle different per-sample values for field \"%s\"",
+                              _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name);
+                    goto bad;
+                }
+            *pl = v[0];
+            status = 1;
+        }
+      bad:
+        if (v && v != buf)
+            _TIFFfree(v);
+    }
+    return (status);
+}
+#undef NITEMS
+
+/*
+ * Fetch a set of offsets or lengths.
+ * While this routine says "strips", in fact it's also used for tiles.
+ */
+static int
+TIFFFetchStripThing(TIFF* tif, TIFFDirEntry* dir, long nstrips, uint32** lpp)
+{
+	register uint32* lp;
+	int status;
+
+        CheckDirCount(tif, dir, (uint32) nstrips);
+
+	/*
+	 * Allocate space for strip information.
+	 */
+	if (*lpp == NULL &&
+	    (*lpp = (uint32 *)_TIFFCheckMalloc(tif,
+	      nstrips, sizeof (uint32), "for strip array")) == NULL)
+		return (0);
+	lp = *lpp;
+        _TIFFmemset( lp, 0, sizeof(uint32) * nstrips );
+
+	if (dir->tdir_type == (int)TIFF_SHORT) {
+		/*
+		 * Handle uint16->uint32 expansion.
+		 */
+		uint16* dp = (uint16*) _TIFFCheckMalloc(tif,
+		    dir->tdir_count, sizeof (uint16), "to fetch strip tag");
+		if (dp == NULL)
+			return (0);
+		if( (status = TIFFFetchShortArray(tif, dir, dp)) != 0 ) {
+                    int i;
+                    
+                    for( i = 0; i < nstrips && i < (int) dir->tdir_count; i++ )
+                    {
+                        lp[i] = dp[i];
+                    }
+		}
+		_TIFFfree((char*) dp);
+
+        } else if( nstrips != (int) dir->tdir_count ) {
+            /* Special case to correct length */
+
+            uint32* dp = (uint32*) _TIFFCheckMalloc(tif,
+		    dir->tdir_count, sizeof (uint32), "to fetch strip tag");
+            if (dp == NULL)
+                return (0);
+
+            status = TIFFFetchLongArray(tif, dir, dp);
+            if( status != 0 ) {
+                int i;
+
+                for( i = 0; i < nstrips && i < (int) dir->tdir_count; i++ )
+                {
+                    lp[i] = dp[i];
+                }
+            }
+
+            _TIFFfree( (char *) dp );
+	} else
+            status = TIFFFetchLongArray(tif, dir, lp);
+        
+	return (status);
+}
+
+/*
+ * Fetch and set the RefBlackWhite tag.
+ */
+static int
+TIFFFetchRefBlackWhite(TIFF* tif, TIFFDirEntry* dir)
+{
+	static const char mesg[] = "for \"ReferenceBlackWhite\" array";
+	char* cp;
+	int ok;
+
+	if (dir->tdir_type == TIFF_RATIONAL)
+		return (TIFFFetchNormalTag(tif, dir));
+	/*
+	 * Handle LONG's for backward compatibility.
+	 */
+	cp = (char *)_TIFFCheckMalloc(tif, dir->tdir_count,
+				      sizeof (uint32), mesg);
+	if( (ok = (cp && TIFFFetchLongArray(tif, dir, (uint32*) cp))) != 0) {
+		float* fp = (float*)
+		    _TIFFCheckMalloc(tif, dir->tdir_count, sizeof (float), mesg);
+		if( (ok = (fp != NULL)) != 0 ) {
+			uint32 i;
+			for (i = 0; i < dir->tdir_count; i++)
+				fp[i] = (float)((uint32*) cp)[i];
+			ok = TIFFSetField(tif, dir->tdir_tag, fp);
+			_TIFFfree((char*) fp);
+		}
+	}
+	if (cp)
+		_TIFFfree(cp);
+	return (ok);
+}
+
+/*
+ * Replace a single strip (tile) of uncompressed data by
+ * multiple strips (tiles), each approximately 8Kbytes.
+ * This is useful for dealing with large images or
+ * for dealing with machines with a limited amount
+ * memory.
+ */
+static void
+ChopUpSingleUncompressedStrip(TIFF* tif)
+{
+	register TIFFDirectory *td = &tif->tif_dir;
+	uint32 bytecount = td->td_stripbytecount[0];
+	uint32 offset = td->td_stripoffset[0];
+	tsize_t rowbytes = TIFFVTileSize(tif, 1), stripbytes;
+	tstrip_t strip, nstrips, rowsperstrip;
+	uint32* newcounts;
+	uint32* newoffsets;
+
+	/*
+	 * Make the rows hold at least one scanline, but fill specified amount
+	 * of data if possible.
+	 */
+	if (rowbytes > STRIP_SIZE_DEFAULT) {
+		stripbytes = rowbytes;
+		rowsperstrip = 1;
+	} else if (rowbytes > 0 ) {
+		rowsperstrip = STRIP_SIZE_DEFAULT / rowbytes;
+		stripbytes = rowbytes * rowsperstrip;
+	}
+        else
+            return;
+
+	/* 
+	 * never increase the number of strips in an image
+	 */
+	if (rowsperstrip >= td->td_rowsperstrip)
+		return;
+	nstrips = (tstrip_t) TIFFhowmany(bytecount, stripbytes);
+        if( nstrips == 0 ) /* something is wonky, do nothing. */
+            return;
+
+	newcounts = (uint32*) _TIFFCheckMalloc(tif, nstrips, sizeof (uint32),
+				"for chopped \"StripByteCounts\" array");
+	newoffsets = (uint32*) _TIFFCheckMalloc(tif, nstrips, sizeof (uint32),
+				"for chopped \"StripOffsets\" array");
+	if (newcounts == NULL || newoffsets == NULL) {
+	        /*
+		 * Unable to allocate new strip information, give
+		 * up and use the original one strip information.
+		 */
+		if (newcounts != NULL)
+			_TIFFfree(newcounts);
+		if (newoffsets != NULL)
+			_TIFFfree(newoffsets);
+		return;
+	}
+	/*
+	 * Fill the strip information arrays with new bytecounts and offsets
+	 * that reflect the broken-up format.
+	 */
+	for (strip = 0; strip < nstrips; strip++) {
+		if (stripbytes > (tsize_t) bytecount)
+			stripbytes = bytecount;
+		newcounts[strip] = stripbytes;
+		newoffsets[strip] = offset;
+		offset += stripbytes;
+		bytecount -= stripbytes;
+	}
+	/*
+	 * Replace old single strip info with multi-strip info.
+	 */
+	td->td_stripsperimage = td->td_nstrips = nstrips;
+	TIFFSetField(tif, TIFFTAG_ROWSPERSTRIP, rowsperstrip);
+
+	_TIFFfree(td->td_stripbytecount);
+	_TIFFfree(td->td_stripoffset);
+	td->td_stripbytecount = newcounts;
+	td->td_stripoffset = newoffsets;
+	td->td_stripbytecountsorted = 1;
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_dirwrite.c b/src/libtiff/tif_dirwrite.c
new file mode 100644
index 0000000..d775354
--- /dev/null
+++ b/src/libtiff/tif_dirwrite.c
@@ -0,0 +1,1243 @@
+/* $Id: tif_dirwrite.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Directory Write Support Routines.
+ */
+#include "tiffiop.h"
+
+#ifdef HAVE_IEEEFP
+# define	TIFFCvtNativeToIEEEFloat(tif, n, fp)
+# define	TIFFCvtNativeToIEEEDouble(tif, n, dp)
+#else
+extern	void TIFFCvtNativeToIEEEFloat(TIFF*, uint32, float*);
+extern	void TIFFCvtNativeToIEEEDouble(TIFF*, uint32, double*);
+#endif
+
+static	int TIFFWriteNormalTag(TIFF*, TIFFDirEntry*, const TIFFFieldInfo*);
+static	void TIFFSetupShortLong(TIFF*, ttag_t, TIFFDirEntry*, uint32);
+static	void TIFFSetupShort(TIFF*, ttag_t, TIFFDirEntry*, uint16);
+static	int TIFFSetupShortPair(TIFF*, ttag_t, TIFFDirEntry*);
+static	int TIFFWritePerSampleShorts(TIFF*, ttag_t, TIFFDirEntry*);
+static	int TIFFWritePerSampleAnys(TIFF*, TIFFDataType, ttag_t, TIFFDirEntry*);
+static	int TIFFWriteShortTable(TIFF*, ttag_t, TIFFDirEntry*, uint32, uint16**);
+static	int TIFFWriteShortArray(TIFF*, TIFFDirEntry*, uint16*);
+static	int TIFFWriteLongArray(TIFF *, TIFFDirEntry*, uint32*);
+static	int TIFFWriteRationalArray(TIFF *, TIFFDirEntry*, float*);
+static	int TIFFWriteFloatArray(TIFF *, TIFFDirEntry*, float*);
+static	int TIFFWriteDoubleArray(TIFF *, TIFFDirEntry*, double*);
+static	int TIFFWriteByteArray(TIFF*, TIFFDirEntry*, char*);
+static	int TIFFWriteAnyArray(TIFF*,
+	    TIFFDataType, ttag_t, TIFFDirEntry*, uint32, double*);
+static	int TIFFWriteTransferFunction(TIFF*, TIFFDirEntry*);
+static	int TIFFWriteInkNames(TIFF*, TIFFDirEntry*);
+static	int TIFFWriteData(TIFF*, TIFFDirEntry*, char*);
+static	int TIFFLinkDirectory(TIFF*);
+
+#define	WriteRationalPair(type, tag1, v1, tag2, v2) {		\
+	TIFFWriteRational((tif), (type), (tag1), (dir), (v1))	\
+	TIFFWriteRational((tif), (type), (tag2), (dir)+1, (v2))	\
+	(dir)++;						\
+}
+#define	TIFFWriteRational(tif, type, tag, dir, v)		\
+	(dir)->tdir_tag = (tag);				\
+	(dir)->tdir_type = (type);				\
+	(dir)->tdir_count = 1;					\
+	if (!TIFFWriteRationalArray((tif), (dir), &(v)))	\
+		goto bad;
+
+/*
+ * Write the contents of the current directory
+ * to the specified file.  This routine doesn't
+ * handle overwriting a directory with auxiliary
+ * storage that's been changed.
+ */
+static int
+_TIFFWriteDirectory(TIFF* tif, int done)
+{
+	uint16 dircount;
+	toff_t diroff;
+	ttag_t tag;
+	uint32 nfields;
+	tsize_t dirsize;
+	char* data;
+	TIFFDirEntry* dir;
+	TIFFDirectory* td;
+	unsigned long b, fields[FIELD_SETLONGS];
+	int fi, nfi;
+
+	if (tif->tif_mode == O_RDONLY)
+		return (1);
+	/*
+	 * Clear write state so that subsequent images with
+	 * different characteristics get the right buffers
+	 * setup for them.
+	 */
+	if (done)
+	{
+	    if (tif->tif_flags & TIFF_POSTENCODE) {
+		    tif->tif_flags &= ~TIFF_POSTENCODE;
+		    if (!(*tif->tif_postencode)(tif)) {
+				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+				"Error post-encoding before directory write");
+			    return (0);
+		    }
+	    }
+	    (*tif->tif_close)(tif);		/* shutdown encoder */
+	    /*
+	     * Flush any data that might have been written
+	     * by the compression close+cleanup routines.
+	     */
+	    if (tif->tif_rawcc > 0 && !TIFFFlushData1(tif)) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"Error flushing data before directory write");
+		    return (0);
+	    }
+	    if ((tif->tif_flags & TIFF_MYBUFFER) && tif->tif_rawdata) {
+		    _TIFFfree(tif->tif_rawdata);
+		    tif->tif_rawdata = NULL;
+		    tif->tif_rawcc = 0;
+		    tif->tif_rawdatasize = 0;
+	    }
+	    tif->tif_flags &= ~(TIFF_BEENWRITING|TIFF_BUFFERSETUP);
+	}
+
+	td = &tif->tif_dir;
+	/*
+	 * Size the directory so that we can calculate
+	 * offsets for the data items that aren't kept
+	 * in-place in each field.
+	 */
+	nfields = 0;
+	for (b = 0; b <= FIELD_LAST; b++)
+		if (TIFFFieldSet(tif, b) && b != FIELD_CUSTOM)
+			nfields += (b < FIELD_SUBFILETYPE ? 2 : 1);
+        nfields += td->td_customValueCount;
+	dirsize = nfields * sizeof (TIFFDirEntry);
+	data = (char*) _TIFFmalloc(dirsize);
+	if (data == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "Cannot write directory, out of space");
+		return (0);
+	}
+	/*
+	 * Directory hasn't been placed yet, put
+	 * it at the end of the file and link it
+	 * into the existing directory structure.
+	 */
+	if (tif->tif_diroff == 0 && !TIFFLinkDirectory(tif))
+		goto bad;
+	tif->tif_dataoff = (toff_t)(
+	    tif->tif_diroff + sizeof (uint16) + dirsize + sizeof (toff_t));
+	if (tif->tif_dataoff & 1)
+		tif->tif_dataoff++;
+	(void) TIFFSeekFile(tif, tif->tif_dataoff, SEEK_SET);
+	tif->tif_curdir++;
+	dir = (TIFFDirEntry*) data;
+	/*
+	 * Setup external form of directory
+	 * entries and write data items.
+	 */
+	_TIFFmemcpy(fields, td->td_fieldsset, sizeof (fields));
+	/*
+	 * Write out ExtraSamples tag only if
+	 * extra samples are present in the data.
+	 */
+	if (FieldSet(fields, FIELD_EXTRASAMPLES) && !td->td_extrasamples) {
+		ResetFieldBit(fields, FIELD_EXTRASAMPLES);
+		nfields--;
+		dirsize -= sizeof (TIFFDirEntry);
+	}								/*XXX*/
+	for (fi = 0, nfi = tif->tif_nfields; nfi > 0; nfi--, fi++) {
+		const TIFFFieldInfo* fip = tif->tif_fieldinfo[fi];
+
+                /*
+                ** For custom fields, we test to see if the custom field
+                ** is set or not.  For normal fields, we just use the
+                ** FieldSet test. 
+                */
+                if( fip->field_bit == FIELD_CUSTOM )
+                {
+                    int ci, is_set = FALSE;
+
+                    for( ci = 0; ci < td->td_customValueCount; ci++ )
+                        is_set |= (td->td_customValues[ci].info == fip);
+
+                    if( !is_set )
+                        continue;
+                }
+		else if (!FieldSet(fields, fip->field_bit))
+                    continue;
+
+
+                /*
+                ** Handle other fields.
+                */
+		switch (fip->field_bit)
+                {
+		case FIELD_STRIPOFFSETS:
+			/*
+			 * We use one field bit for both strip and tile
+
+			 * offsets, and so must be careful in selecting
+			 * the appropriate field descriptor (so that tags
+			 * are written in sorted order).
+			 */
+			tag = isTiled(tif) ?
+			    TIFFTAG_TILEOFFSETS : TIFFTAG_STRIPOFFSETS;
+			if (tag != fip->field_tag)
+				continue;
+			
+			dir->tdir_tag = (uint16) tag;
+			dir->tdir_type = (uint16) TIFF_LONG;
+			dir->tdir_count = (uint32) td->td_nstrips;
+			if (!TIFFWriteLongArray(tif, dir, td->td_stripoffset))
+				goto bad;
+			break;
+		case FIELD_STRIPBYTECOUNTS:
+			/*
+			 * We use one field bit for both strip and tile
+			 * byte counts, and so must be careful in selecting
+			 * the appropriate field descriptor (so that tags
+			 * are written in sorted order).
+			 */
+			tag = isTiled(tif) ?
+			    TIFFTAG_TILEBYTECOUNTS : TIFFTAG_STRIPBYTECOUNTS;
+			if (tag != fip->field_tag)
+				continue;
+			
+			dir->tdir_tag = (uint16) tag;
+			dir->tdir_type = (uint16) TIFF_LONG;
+			dir->tdir_count = (uint32) td->td_nstrips;
+			if (!TIFFWriteLongArray(tif, dir,
+						td->td_stripbytecount))
+				goto bad;
+			break;
+		case FIELD_ROWSPERSTRIP:
+			TIFFSetupShortLong(tif, TIFFTAG_ROWSPERSTRIP,
+			    dir, td->td_rowsperstrip);
+			break;
+		case FIELD_COLORMAP:
+			if (!TIFFWriteShortTable(tif, TIFFTAG_COLORMAP, dir,
+			    3, td->td_colormap))
+				goto bad;
+			break;
+		case FIELD_IMAGEDIMENSIONS:
+			TIFFSetupShortLong(tif, TIFFTAG_IMAGEWIDTH,
+			    dir++, td->td_imagewidth);
+			TIFFSetupShortLong(tif, TIFFTAG_IMAGELENGTH,
+			    dir, td->td_imagelength);
+			break;
+		case FIELD_TILEDIMENSIONS:
+			TIFFSetupShortLong(tif, TIFFTAG_TILEWIDTH,
+			    dir++, td->td_tilewidth);
+			TIFFSetupShortLong(tif, TIFFTAG_TILELENGTH,
+			    dir, td->td_tilelength);
+			break;
+		case FIELD_COMPRESSION:
+			TIFFSetupShort(tif, TIFFTAG_COMPRESSION,
+			    dir, td->td_compression);
+			break;
+		case FIELD_PHOTOMETRIC:
+			TIFFSetupShort(tif, TIFFTAG_PHOTOMETRIC,
+			    dir, td->td_photometric);
+			break;
+		case FIELD_POSITION:
+			WriteRationalPair(TIFF_RATIONAL,
+			    TIFFTAG_XPOSITION, td->td_xposition,
+			    TIFFTAG_YPOSITION, td->td_yposition);
+			break;
+		case FIELD_RESOLUTION:
+			WriteRationalPair(TIFF_RATIONAL,
+			    TIFFTAG_XRESOLUTION, td->td_xresolution,
+			    TIFFTAG_YRESOLUTION, td->td_yresolution);
+			break;
+		case FIELD_BITSPERSAMPLE:
+		case FIELD_MINSAMPLEVALUE:
+		case FIELD_MAXSAMPLEVALUE:
+		case FIELD_SAMPLEFORMAT:
+			if (!TIFFWritePerSampleShorts(tif, fip->field_tag, dir))
+				goto bad;
+			break;
+		case FIELD_SMINSAMPLEVALUE:
+		case FIELD_SMAXSAMPLEVALUE:
+			if (!TIFFWritePerSampleAnys(tif,
+			    _TIFFSampleToTagType(tif), fip->field_tag, dir))
+				goto bad;
+			break;
+		case FIELD_PAGENUMBER:
+		case FIELD_HALFTONEHINTS:
+		case FIELD_YCBCRSUBSAMPLING:
+			if (!TIFFSetupShortPair(tif, fip->field_tag, dir))
+				goto bad;
+			break;
+		case FIELD_INKNAMES:
+			if (!TIFFWriteInkNames(tif, dir))
+				goto bad;
+			break;
+		case FIELD_TRANSFERFUNCTION:
+			if (!TIFFWriteTransferFunction(tif, dir))
+				goto bad;
+			break;
+		case FIELD_SUBIFD:
+			/*
+			 * XXX: Always write this field using LONG type
+			 * for backward compatibility.
+			 */
+			dir->tdir_tag = (uint16) fip->field_tag;
+			dir->tdir_type = (uint16) TIFF_LONG;
+			dir->tdir_count = (uint32) td->td_nsubifd;
+			if (!TIFFWriteLongArray(tif, dir, td->td_subifd))
+				goto bad;
+			/*
+			 * Total hack: if this directory includes a SubIFD
+			 * tag then force the next <n> directories to be
+			 * written as ``sub directories'' of this one.  This
+			 * is used to write things like thumbnails and
+			 * image masks that one wants to keep out of the
+			 * normal directory linkage access mechanism.
+			 */
+			if (dir->tdir_count > 0) {
+				tif->tif_flags |= TIFF_INSUBIFD;
+				tif->tif_nsubifd = (uint16) dir->tdir_count;
+				if (dir->tdir_count > 1)
+					tif->tif_subifdoff = dir->tdir_offset;
+				else
+					tif->tif_subifdoff = (uint32)(
+					      tif->tif_diroff
+					    + sizeof (uint16)
+					    + ((char*)&dir->tdir_offset-data));
+			}
+			break;
+		default:
+			/* XXX: Should be fixed and removed. */
+			if (fip->field_tag == TIFFTAG_DOTRANGE) {
+				if (!TIFFSetupShortPair(tif, fip->field_tag, dir))
+					goto bad;
+			}
+			else if (!TIFFWriteNormalTag(tif, dir, fip))
+				goto bad;
+			break;
+		}
+		dir++;
+                
+                if( fip->field_bit != FIELD_CUSTOM )
+                    ResetFieldBit(fields, fip->field_bit);
+	}
+
+	/*
+	 * Write directory.
+	 */
+	dircount = (uint16) nfields;
+	diroff = (uint32) tif->tif_nextdiroff;
+	if (tif->tif_flags & TIFF_SWAB) {
+		/*
+		 * The file's byte order is opposite to the
+		 * native machine architecture.  We overwrite
+		 * the directory information with impunity
+		 * because it'll be released below after we
+		 * write it to the file.  Note that all the
+		 * other tag construction routines assume that
+		 * we do this byte-swapping; i.e. they only
+		 * byte-swap indirect data.
+		 */
+		for (dir = (TIFFDirEntry*) data; dircount; dir++, dircount--) {
+			TIFFSwabArrayOfShort(&dir->tdir_tag, 2);
+			TIFFSwabArrayOfLong(&dir->tdir_count, 2);
+		}
+		dircount = (uint16) nfields;
+		TIFFSwabShort(&dircount);
+		TIFFSwabLong(&diroff);
+	}
+	(void) TIFFSeekFile(tif, tif->tif_diroff, SEEK_SET);
+	if (!WriteOK(tif, &dircount, sizeof (dircount))) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Error writing directory count");
+		goto bad;
+	}
+	if (!WriteOK(tif, data, dirsize)) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Error writing directory contents");
+		goto bad;
+	}
+	if (!WriteOK(tif, &diroff, sizeof (diroff))) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Error writing directory link");
+		goto bad;
+	}
+	if (done) {
+		TIFFFreeDirectory(tif);
+		tif->tif_flags &= ~TIFF_DIRTYDIRECT;
+		(*tif->tif_cleanup)(tif);
+
+		/*
+		* Reset directory-related state for subsequent
+		* directories.
+		*/
+		TIFFCreateDirectory(tif);
+	}
+	_TIFFfree(data);
+	return (1);
+bad:
+	_TIFFfree(data);
+	return (0);
+}
+#undef WriteRationalPair
+
+int
+TIFFWriteDirectory(TIFF* tif)
+{
+	return _TIFFWriteDirectory(tif, TRUE);
+}
+
+/*
+ * Similar to TIFFWriteDirectory(), writes the directory out
+ * but leaves all data structures in memory so that it can be
+ * written again.  This will make a partially written TIFF file
+ * readable before it is successfully completed/closed.
+ */ 
+int
+TIFFCheckpointDirectory(TIFF* tif)
+{
+	int rc;
+	/* Setup the strips arrays, if they haven't already been. */
+	if (tif->tif_dir.td_stripoffset == NULL)
+	    (void) TIFFSetupStrips(tif);
+	rc = _TIFFWriteDirectory(tif, FALSE);
+	(void) TIFFSetWriteOffset(tif, TIFFSeekFile(tif, 0, SEEK_END));
+	return rc;
+}
+
+/*
+ * Process tags that are not special cased.
+ */
+static int
+TIFFWriteNormalTag(TIFF* tif, TIFFDirEntry* dir, const TIFFFieldInfo* fip)
+{
+	uint16 wc = (uint16) fip->field_writecount;
+	uint32 wc2;
+
+	dir->tdir_tag = (uint16) fip->field_tag;
+	dir->tdir_type = (uint16) fip->field_type;
+	dir->tdir_count = wc;
+	
+	switch (fip->field_type) {
+	case TIFF_SHORT:
+	case TIFF_SSHORT:
+		if (fip->field_passcount) {
+			uint16* wp;
+			if (wc == (uint16) TIFF_VARIABLE2) {
+				TIFFGetField(tif, fip->field_tag, &wc2, &wp);
+				dir->tdir_count = wc2;
+			} else {	/* Assume TIFF_VARIABLE */
+				TIFFGetField(tif, fip->field_tag, &wc, &wp);
+				dir->tdir_count = wc;
+			}
+			if (!TIFFWriteShortArray(tif, dir, wp))
+				return 0;
+		} else {
+			if (wc == 1) {
+				uint16 sv;
+				TIFFGetField(tif, fip->field_tag, &sv);
+				dir->tdir_offset =
+					TIFFInsertData(tif, dir->tdir_type, sv);
+			} else {
+				uint16* wp;
+				TIFFGetField(tif, fip->field_tag, &wp);
+				if (!TIFFWriteShortArray(tif, dir, wp))
+					return 0;
+			}
+		}
+		break;
+	case TIFF_LONG:
+	case TIFF_SLONG:
+	case TIFF_IFD:
+		if (fip->field_passcount) {
+			uint32* lp;
+			if (wc == (uint16) TIFF_VARIABLE2) {
+				TIFFGetField(tif, fip->field_tag, &wc2, &lp);
+				dir->tdir_count = wc2;
+			} else {	/* Assume TIFF_VARIABLE */
+				TIFFGetField(tif, fip->field_tag, &wc, &lp);
+				dir->tdir_count = wc;
+			}
+			if (!TIFFWriteLongArray(tif, dir, lp))
+				return 0;
+		} else {
+			if (wc == 1) {
+				/* XXX handle LONG->SHORT conversion */
+				TIFFGetField(tif, fip->field_tag,
+					     &dir->tdir_offset);
+			} else {
+				uint32* lp;
+				TIFFGetField(tif, fip->field_tag, &lp);
+				if (!TIFFWriteLongArray(tif, dir, lp))
+					return 0;
+			}
+		}
+		break;
+	case TIFF_RATIONAL:
+	case TIFF_SRATIONAL:
+		if (fip->field_passcount) {
+			float* fp;
+			if (wc == (uint16) TIFF_VARIABLE2) {
+				TIFFGetField(tif, fip->field_tag, &wc2, &fp);
+				dir->tdir_count = wc2;
+			} else {	/* Assume TIFF_VARIABLE */
+				TIFFGetField(tif, fip->field_tag, &wc, &fp);
+				dir->tdir_count = wc;
+			}
+			if (!TIFFWriteRationalArray(tif, dir, fp))
+				return 0;
+		} else {
+			if (wc == 1) {
+				float fv;
+				TIFFGetField(tif, fip->field_tag, &fv);
+				if (!TIFFWriteRationalArray(tif, dir, &fv))
+					return 0;
+			} else {
+				float* fp;
+				TIFFGetField(tif, fip->field_tag, &fp);
+				if (!TIFFWriteRationalArray(tif, dir, fp))
+					return 0;
+			}
+		}
+		break;
+	case TIFF_FLOAT:
+		if (fip->field_passcount) {
+			float* fp;
+			if (wc == (uint16) TIFF_VARIABLE2) {
+				TIFFGetField(tif, fip->field_tag, &wc2, &fp);
+				dir->tdir_count = wc2;
+			} else {	/* Assume TIFF_VARIABLE */
+				TIFFGetField(tif, fip->field_tag, &wc, &fp);
+				dir->tdir_count = wc;
+			}
+			if (!TIFFWriteFloatArray(tif, dir, fp))
+				return 0;
+		} else {
+			if (wc == 1) {
+				float fv;
+				TIFFGetField(tif, fip->field_tag, &fv);
+				if (!TIFFWriteFloatArray(tif, dir, &fv))
+					return 0;
+			} else {
+				float* fp;
+				TIFFGetField(tif, fip->field_tag, &fp);
+				if (!TIFFWriteFloatArray(tif, dir, fp))
+					return 0;
+			}
+		}
+		break;
+	case TIFF_DOUBLE:
+		if (fip->field_passcount) {
+			double* dp;
+			if (wc == (uint16) TIFF_VARIABLE2) {
+				TIFFGetField(tif, fip->field_tag, &wc2, &dp);
+				dir->tdir_count = wc2;
+			} else {	/* Assume TIFF_VARIABLE */
+				TIFFGetField(tif, fip->field_tag, &wc, &dp);
+				dir->tdir_count = wc;
+			}
+			if (!TIFFWriteDoubleArray(tif, dir, dp))
+				return 0;
+		} else {
+			if (wc == 1) {
+				double dv;
+				TIFFGetField(tif, fip->field_tag, &dv);
+				if (!TIFFWriteDoubleArray(tif, dir, &dv))
+					return 0;
+			} else {
+				double* dp;
+				TIFFGetField(tif, fip->field_tag, &dp);
+				if (!TIFFWriteDoubleArray(tif, dir, dp))
+					return 0;
+			}
+		}
+		break;
+	case TIFF_ASCII:
+		{ 
+                    char* cp;
+                    if (fip->field_passcount)
+                        TIFFGetField(tif, fip->field_tag, &wc, &cp);
+                    else
+                        TIFFGetField(tif, fip->field_tag, &cp);
+
+                    dir->tdir_count = (uint32) (strlen(cp) + 1);
+                    if (!TIFFWriteByteArray(tif, dir, cp))
+                        return (0);
+		}
+		break;
+
+        case TIFF_BYTE:
+        case TIFF_SBYTE:          
+		if (fip->field_passcount) {
+			char* cp;
+			if (wc == (uint16) TIFF_VARIABLE2) {
+				TIFFGetField(tif, fip->field_tag, &wc2, &cp);
+				dir->tdir_count = wc2;
+			} else {	/* Assume TIFF_VARIABLE */
+				TIFFGetField(tif, fip->field_tag, &wc, &cp);
+				dir->tdir_count = wc;
+			}
+			if (!TIFFWriteByteArray(tif, dir, cp))
+				return 0;
+		} else {
+			if (wc == 1) {
+				char cv;
+				TIFFGetField(tif, fip->field_tag, &cv);
+				if (!TIFFWriteByteArray(tif, dir, &cv))
+					return 0;
+			} else {
+				char* cp;
+				TIFFGetField(tif, fip->field_tag, &cp);
+				if (!TIFFWriteByteArray(tif, dir, cp))
+					return 0;
+			}
+		}
+                break;
+
+	case TIFF_UNDEFINED:
+		{ char* cp;
+		  if (wc == (unsigned short) TIFF_VARIABLE) {
+			TIFFGetField(tif, fip->field_tag, &wc, &cp);
+			dir->tdir_count = wc;
+		  } else if (wc == (unsigned short) TIFF_VARIABLE2) {
+			TIFFGetField(tif, fip->field_tag, &wc2, &cp);
+			dir->tdir_count = wc2;
+		  } else 
+			TIFFGetField(tif, fip->field_tag, &cp);
+		  if (!TIFFWriteByteArray(tif, dir, cp))
+			return (0);
+		}
+		break;
+
+        case TIFF_NOTYPE:
+                break;
+	}
+	return (1);
+}
+
+/*
+ * Setup a directory entry with either a SHORT
+ * or LONG type according to the value.
+ */
+static void
+TIFFSetupShortLong(TIFF* tif, ttag_t tag, TIFFDirEntry* dir, uint32 v)
+{
+	dir->tdir_tag = (uint16) tag;
+	dir->tdir_count = 1;
+	if (v > 0xffffL) {
+		dir->tdir_type = (short) TIFF_LONG;
+		dir->tdir_offset = v;
+	} else {
+		dir->tdir_type = (short) TIFF_SHORT;
+		dir->tdir_offset = TIFFInsertData(tif, (int) TIFF_SHORT, v);
+	}
+}
+
+/*
+ * Setup a SHORT directory entry
+ */
+static void
+TIFFSetupShort(TIFF* tif, ttag_t tag, TIFFDirEntry* dir, uint16 v)
+{
+	dir->tdir_tag = (uint16) tag;
+	dir->tdir_count = 1;
+	dir->tdir_type = (short) TIFF_SHORT;
+	dir->tdir_offset = TIFFInsertData(tif, (int) TIFF_SHORT, v);
+}
+#undef MakeShortDirent
+
+#define	NITEMS(x)	(sizeof (x) / sizeof (x[0]))
+/*
+ * Setup a directory entry that references a
+ * samples/pixel array of SHORT values and
+ * (potentially) write the associated indirect
+ * values.
+ */
+static int
+TIFFWritePerSampleShorts(TIFF* tif, ttag_t tag, TIFFDirEntry* dir)
+{
+	uint16 buf[10], v;
+	uint16* w = buf;
+	uint16 i, samples = tif->tif_dir.td_samplesperpixel;
+	int status;
+
+	if (samples > NITEMS(buf)) {
+		w = (uint16*) _TIFFmalloc(samples * sizeof (uint16));
+		if (w == NULL) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "No space to write per-sample shorts");
+			return (0);
+		}
+	}
+	TIFFGetField(tif, tag, &v);
+	for (i = 0; i < samples; i++)
+		w[i] = v;
+	
+	dir->tdir_tag = (uint16) tag;
+	dir->tdir_type = (uint16) TIFF_SHORT;
+	dir->tdir_count = samples;
+	status = TIFFWriteShortArray(tif, dir, w);
+	if (w != buf)
+		_TIFFfree((char*) w);
+	return (status);
+}
+
+/*
+ * Setup a directory entry that references a samples/pixel array of ``type''
+ * values and (potentially) write the associated indirect values.  The source
+ * data from TIFFGetField() for the specified tag must be returned as double.
+ */
+static int
+TIFFWritePerSampleAnys(TIFF* tif,
+    TIFFDataType type, ttag_t tag, TIFFDirEntry* dir)
+{
+	double buf[10], v;
+	double* w = buf;
+	uint16 i, samples = tif->tif_dir.td_samplesperpixel;
+	int status;
+
+	if (samples > NITEMS(buf)) {
+		w = (double*) _TIFFmalloc(samples * sizeof (double));
+		if (w == NULL) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "No space to write per-sample values");
+			return (0);
+		}
+	}
+	TIFFGetField(tif, tag, &v);
+	for (i = 0; i < samples; i++)
+		w[i] = v;
+	status = TIFFWriteAnyArray(tif, type, tag, dir, samples, w);
+	if (w != buf)
+		_TIFFfree(w);
+	return (status);
+}
+#undef NITEMS
+
+/*
+ * Setup a pair of shorts that are returned by
+ * value, rather than as a reference to an array.
+ */
+static int
+TIFFSetupShortPair(TIFF* tif, ttag_t tag, TIFFDirEntry* dir)
+{
+	uint16 v[2];
+
+	TIFFGetField(tif, tag, &v[0], &v[1]);
+
+	dir->tdir_tag = (uint16) tag;
+	dir->tdir_type = (uint16) TIFF_SHORT;
+	dir->tdir_count = 2;
+	return (TIFFWriteShortArray(tif, dir, v));
+}
+
+/*
+ * Setup a directory entry for an NxM table of shorts,
+ * where M is known to be 2**bitspersample, and write
+ * the associated indirect data.
+ */
+static int
+TIFFWriteShortTable(TIFF* tif,
+    ttag_t tag, TIFFDirEntry* dir, uint32 n, uint16** table)
+{
+	uint32 i, off;
+
+	dir->tdir_tag = (uint16) tag;
+	dir->tdir_type = (short) TIFF_SHORT;
+	/* XXX -- yech, fool TIFFWriteData */
+	dir->tdir_count = (uint32) (1L<<tif->tif_dir.td_bitspersample);
+	off = tif->tif_dataoff;
+	for (i = 0; i < n; i++)
+		if (!TIFFWriteData(tif, dir, (char *)table[i]))
+			return (0);
+	dir->tdir_count *= n;
+	dir->tdir_offset = off;
+	return (1);
+}
+
+/*
+ * Write/copy data associated with an ASCII or opaque tag value.
+ */
+static int
+TIFFWriteByteArray(TIFF* tif, TIFFDirEntry* dir, char* cp)
+{
+	if (dir->tdir_count > 4) {
+		if (!TIFFWriteData(tif, dir, cp))
+			return (0);
+	} else
+		_TIFFmemcpy(&dir->tdir_offset, cp, dir->tdir_count);
+	return (1);
+}
+
+/*
+ * Setup a directory entry of an array of SHORT
+ * or SSHORT and write the associated indirect values.
+ */
+static int
+TIFFWriteShortArray(TIFF* tif, TIFFDirEntry* dir, uint16* v)
+{
+	if (dir->tdir_count <= 2) {
+		if (tif->tif_header.tiff_magic == TIFF_BIGENDIAN) {
+			dir->tdir_offset = (uint32) ((long) v[0] << 16);
+			if (dir->tdir_count == 2)
+				dir->tdir_offset |= v[1] & 0xffff;
+		} else {
+			dir->tdir_offset = v[0] & 0xffff;
+			if (dir->tdir_count == 2)
+				dir->tdir_offset |= (long) v[1] << 16;
+		}
+		return (1);
+	} else
+		return (TIFFWriteData(tif, dir, (char*) v));
+}
+
+/*
+ * Setup a directory entry of an array of LONG
+ * or SLONG and write the associated indirect values.
+ */
+static int
+TIFFWriteLongArray(TIFF* tif, TIFFDirEntry* dir, uint32* v)
+{
+	if (dir->tdir_count == 1) {
+		dir->tdir_offset = v[0];
+		return (1);
+	} else
+		return (TIFFWriteData(tif, dir, (char*) v));
+}
+
+/*
+ * Setup a directory entry of an array of RATIONAL
+ * or SRATIONAL and write the associated indirect values.
+ */
+static int
+TIFFWriteRationalArray(TIFF* tif, TIFFDirEntry* dir, float* v)
+{
+	uint32 i;
+	uint32* t;
+	int status;
+
+	t = (uint32*) _TIFFmalloc(2 * dir->tdir_count * sizeof (uint32));
+	if (t == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "No space to write RATIONAL array");
+		return (0);
+	}
+	for (i = 0; i < dir->tdir_count; i++) {
+		float fv = v[i];
+		int sign = 1;
+		uint32 den;
+
+		if (fv < 0) {
+			if (dir->tdir_type == TIFF_RATIONAL) {
+				TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+	"\"%s\": Information lost writing value (%g) as (unsigned) RATIONAL",
+				_TIFFFieldWithTag(tif,dir->tdir_tag)->field_name,
+				fv);
+				fv = 0;
+			} else
+				fv = -fv, sign = -1;
+		}
+		den = 1L;
+		if (fv > 0) {
+			while (fv < 1L<<(31-3) && den < 1L<<(31-3))
+				fv *= 1<<3, den *= 1L<<3;
+		}
+		t[2*i+0] = (uint32) (sign * (fv + 0.5));
+		t[2*i+1] = den;
+	}
+	status = TIFFWriteData(tif, dir, (char *)t);
+	_TIFFfree((char*) t);
+	return (status);
+}
+
+static int
+TIFFWriteFloatArray(TIFF* tif, TIFFDirEntry* dir, float* v)
+{
+	TIFFCvtNativeToIEEEFloat(tif, dir->tdir_count, v);
+	if (dir->tdir_count == 1) {
+		dir->tdir_offset = *(uint32*) &v[0];
+		return (1);
+	} else
+		return (TIFFWriteData(tif, dir, (char*) v));
+}
+
+static int
+TIFFWriteDoubleArray(TIFF* tif, TIFFDirEntry* dir, double* v)
+{
+	TIFFCvtNativeToIEEEDouble(tif, dir->tdir_count, v);
+	return (TIFFWriteData(tif, dir, (char*) v));
+}
+
+/*
+ * Write an array of ``type'' values for a specified tag (i.e. this is a tag
+ * which is allowed to have different types, e.g. SMaxSampleType).
+ * Internally the data values are represented as double since a double can
+ * hold any of the TIFF tag types (yes, this should really be an abstract
+ * type tany_t for portability).  The data is converted into the specified
+ * type in a temporary buffer and then handed off to the appropriate array
+ * writer.
+ */
+static int
+TIFFWriteAnyArray(TIFF* tif,
+    TIFFDataType type, ttag_t tag, TIFFDirEntry* dir, uint32 n, double* v)
+{
+	char buf[10 * sizeof(double)];
+	char* w = buf;
+	int i, status = 0;
+
+	if (n * TIFFDataWidth(type) > sizeof buf) {
+		w = (char*) _TIFFmalloc(n * TIFFDataWidth(type));
+		if (w == NULL) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "No space to write array");
+			return (0);
+		}
+	}
+
+	dir->tdir_tag = (uint16) tag;
+	dir->tdir_type = (uint16) type;
+	dir->tdir_count = n;
+
+	switch (type) {
+	case TIFF_BYTE:
+		{ 
+			uint8* bp = (uint8*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (uint8) v[i];
+			if (!TIFFWriteByteArray(tif, dir, (char*) bp))
+				goto out;
+		}
+		break;
+	case TIFF_SBYTE:
+		{ 
+			int8* bp = (int8*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (int8) v[i];
+			if (!TIFFWriteByteArray(tif, dir, (char*) bp))
+				goto out;
+		}
+		break;
+	case TIFF_SHORT:
+		{
+			uint16* bp = (uint16*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (uint16) v[i];
+			if (!TIFFWriteShortArray(tif, dir, (uint16*)bp))
+				goto out;
+		}
+		break;
+	case TIFF_SSHORT:
+		{ 
+			int16* bp = (int16*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (int16) v[i];
+			if (!TIFFWriteShortArray(tif, dir, (uint16*)bp))
+				goto out;
+		}
+		break;
+	case TIFF_LONG:
+		{
+			uint32* bp = (uint32*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (uint32) v[i];
+			if (!TIFFWriteLongArray(tif, dir, bp))
+				goto out;
+		}
+		break;
+	case TIFF_SLONG:
+		{
+			int32* bp = (int32*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (int32) v[i];
+			if (!TIFFWriteLongArray(tif, dir, (uint32*) bp))
+				goto out;
+		}
+		break;
+	case TIFF_FLOAT:
+		{ 
+			float* bp = (float*) w;
+			for (i = 0; i < (int) n; i++)
+				bp[i] = (float) v[i];
+			if (!TIFFWriteFloatArray(tif, dir, bp))
+				goto out;
+		}
+		break;
+	case TIFF_DOUBLE:
+		return (TIFFWriteDoubleArray(tif, dir, v));
+	default:
+		/* TIFF_NOTYPE */
+		/* TIFF_ASCII */
+		/* TIFF_UNDEFINED */
+		/* TIFF_RATIONAL */
+		/* TIFF_SRATIONAL */
+		goto out;
+	}
+	status = 1;
+ out:
+	if (w != buf)
+		_TIFFfree(w);
+	return (status);
+}
+
+static int
+TIFFWriteTransferFunction(TIFF* tif, TIFFDirEntry* dir)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+	tsize_t n = (1L<<td->td_bitspersample) * sizeof (uint16);
+	uint16** tf = td->td_transferfunction;
+	int ncols;
+
+	/*
+	 * Check if the table can be written as a single column,
+	 * or if it must be written as 3 columns.  Note that we
+	 * write a 3-column tag if there are 2 samples/pixel and
+	 * a single column of data won't suffice--hmm.
+	 */
+	switch (td->td_samplesperpixel - td->td_extrasamples) {
+	default:	if (_TIFFmemcmp(tf[0], tf[2], n)) { ncols = 3; break; }
+	case 2:		if (_TIFFmemcmp(tf[0], tf[1], n)) { ncols = 3; break; }
+	case 1: case 0:	ncols = 1;
+	}
+	return (TIFFWriteShortTable(tif,
+	    TIFFTAG_TRANSFERFUNCTION, dir, ncols, tf));
+}
+
+static int
+TIFFWriteInkNames(TIFF* tif, TIFFDirEntry* dir)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+
+	dir->tdir_tag = TIFFTAG_INKNAMES;
+	dir->tdir_type = (short) TIFF_ASCII;
+	dir->tdir_count = td->td_inknameslen;
+	return (TIFFWriteByteArray(tif, dir, td->td_inknames));
+}
+
+/*
+ * Write a contiguous directory item.
+ */
+static int
+TIFFWriteData(TIFF* tif, TIFFDirEntry* dir, char* cp)
+{
+	tsize_t cc;
+
+	if (tif->tif_flags & TIFF_SWAB) {
+		switch (dir->tdir_type) {
+		case TIFF_SHORT:
+		case TIFF_SSHORT:
+			TIFFSwabArrayOfShort((uint16*) cp, dir->tdir_count);
+			break;
+		case TIFF_LONG:
+		case TIFF_SLONG:
+		case TIFF_FLOAT:
+			TIFFSwabArrayOfLong((uint32*) cp, dir->tdir_count);
+			break;
+		case TIFF_RATIONAL:
+		case TIFF_SRATIONAL:
+			TIFFSwabArrayOfLong((uint32*) cp, 2*dir->tdir_count);
+			break;
+		case TIFF_DOUBLE:
+			TIFFSwabArrayOfDouble((double*) cp, dir->tdir_count);
+			break;
+		}
+	}
+	dir->tdir_offset = tif->tif_dataoff;
+	cc = dir->tdir_count * TIFFDataWidth((TIFFDataType) dir->tdir_type);
+	if (SeekOK(tif, dir->tdir_offset) &&
+	    WriteOK(tif, cp, cc)) {
+		tif->tif_dataoff += (cc + 1) & ~1;
+		return (1);
+	}
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Error writing data for field \"%s\"",
+	    _TIFFFieldWithTag(tif, dir->tdir_tag)->field_name);
+	return (0);
+}
+
+/*
+ * Similar to TIFFWriteDirectory(), but if the directory has already
+ * been written once, it is relocated to the end of the file, in case it
+ * has changed in size.  Note that this will result in the loss of the 
+ * previously used directory space. 
+ */ 
+
+int 
+TIFFRewriteDirectory( TIFF *tif )
+{
+    static const char module[] = "TIFFRewriteDirectory";
+
+    /* We don't need to do anything special if it hasn't been written. */
+    if( tif->tif_diroff == 0 )
+        return TIFFWriteDirectory( tif );
+
+    /*
+    ** Find and zero the pointer to this directory, so that TIFFLinkDirectory
+    ** will cause it to be added after this directories current pre-link.
+    */
+    
+    /* Is it the first directory in the file? */
+    if (tif->tif_header.tiff_diroff == tif->tif_diroff) 
+    {
+        tif->tif_header.tiff_diroff = 0;
+        tif->tif_diroff = 0;
+
+        TIFFSeekFile(tif, (toff_t)(TIFF_MAGIC_SIZE+TIFF_VERSION_SIZE),
+		     SEEK_SET);
+        if (!WriteOK(tif, &(tif->tif_header.tiff_diroff), 
+                     sizeof (tif->tif_diroff))) 
+        {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Error updating TIFF header");
+            return (0);
+        }
+    }
+    else
+    {
+        toff_t  nextdir, off;
+
+	nextdir = tif->tif_header.tiff_diroff;
+	do {
+		uint16 dircount;
+
+		if (!SeekOK(tif, nextdir) ||
+		    !ReadOK(tif, &dircount, sizeof (dircount))) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Error fetching directory count");
+			return (0);
+		}
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&dircount);
+		(void) TIFFSeekFile(tif,
+		    dircount * sizeof (TIFFDirEntry), SEEK_CUR);
+		if (!ReadOK(tif, &nextdir, sizeof (nextdir))) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Error fetching directory link");
+			return (0);
+		}
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabLong(&nextdir);
+	} while (nextdir != tif->tif_diroff && nextdir != 0);
+        off = TIFFSeekFile(tif, 0, SEEK_CUR); /* get current offset */
+        (void) TIFFSeekFile(tif, off - (toff_t)sizeof(nextdir), SEEK_SET);
+        tif->tif_diroff = 0;
+	if (!WriteOK(tif, &(tif->tif_diroff), sizeof (nextdir))) {
+		TIFFErrorExt(tif->tif_clientdata, module, "Error writing directory link");
+		return (0);
+	}
+    }
+
+    /*
+    ** Now use TIFFWriteDirectory() normally.
+    */
+
+    return TIFFWriteDirectory( tif );
+}
+
+
+/*
+ * Link the current directory into the
+ * directory chain for the file.
+ */
+static int
+TIFFLinkDirectory(TIFF* tif)
+{
+	static const char module[] = "TIFFLinkDirectory";
+	toff_t nextdir;
+	toff_t diroff, off;
+
+	tif->tif_diroff = (TIFFSeekFile(tif, (toff_t) 0, SEEK_END)+1) &~ 1;
+	diroff = tif->tif_diroff;
+	if (tif->tif_flags & TIFF_SWAB)
+		TIFFSwabLong(&diroff);
+
+	/*
+	 * Handle SubIFDs
+	 */
+        if (tif->tif_flags & TIFF_INSUBIFD) {
+		(void) TIFFSeekFile(tif, tif->tif_subifdoff, SEEK_SET);
+		if (!WriteOK(tif, &diroff, sizeof (diroff))) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Error writing SubIFD directory link",
+			    tif->tif_name);
+			return (0);
+		}
+		/*
+		 * Advance to the next SubIFD or, if this is
+		 * the last one configured, revert back to the
+		 * normal directory linkage.
+		 */
+		if (--tif->tif_nsubifd)
+			tif->tif_subifdoff += sizeof (diroff);
+		else
+			tif->tif_flags &= ~TIFF_INSUBIFD;
+		return (1);
+	}
+
+	if (tif->tif_header.tiff_diroff == 0) {
+		/*
+		 * First directory, overwrite offset in header.
+		 */
+		tif->tif_header.tiff_diroff = tif->tif_diroff;
+		(void) TIFFSeekFile(tif,
+				    (toff_t)(TIFF_MAGIC_SIZE+TIFF_VERSION_SIZE),
+                                    SEEK_SET);
+		if (!WriteOK(tif, &diroff, sizeof (diroff))) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Error writing TIFF header");
+			return (0);
+		}
+		return (1);
+	}
+	/*
+	 * Not the first directory, search to the last and append.
+	 */
+	nextdir = tif->tif_header.tiff_diroff;
+	do {
+		uint16 dircount;
+
+		if (!SeekOK(tif, nextdir) ||
+		    !ReadOK(tif, &dircount, sizeof (dircount))) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Error fetching directory count");
+			return (0);
+		}
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&dircount);
+		(void) TIFFSeekFile(tif,
+		    dircount * sizeof (TIFFDirEntry), SEEK_CUR);
+		if (!ReadOK(tif, &nextdir, sizeof (nextdir))) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Error fetching directory link");
+			return (0);
+		}
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabLong(&nextdir);
+	} while (nextdir != 0);
+        off = TIFFSeekFile(tif, 0, SEEK_CUR); /* get current offset */
+        (void) TIFFSeekFile(tif, off - (toff_t)sizeof(nextdir), SEEK_SET);
+	if (!WriteOK(tif, &diroff, sizeof (diroff))) {
+		TIFFErrorExt(tif->tif_clientdata, module, "Error writing directory link");
+		return (0);
+	}
+	return (1);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_dumpmode.c b/src/libtiff/tif_dumpmode.c
new file mode 100644
index 0000000..d58a98b
--- /dev/null
+++ b/src/libtiff/tif_dumpmode.c
@@ -0,0 +1,117 @@
+/* $Header: /cvsroot/imtoolkit/im/src/libtiff/tif_dumpmode.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * "Null" Compression Algorithm Support.
+ */
+#include "tiffiop.h"
+
+/*
+ * Encode a hunk of pixels.
+ */
+static int
+DumpModeEncode(TIFF* tif, tidata_t pp, tsize_t cc, tsample_t s)
+{
+	(void) s;
+	while (cc > 0) {
+		tsize_t n;
+
+		n = cc;
+		if (tif->tif_rawcc + n > tif->tif_rawdatasize)
+			n = tif->tif_rawdatasize - tif->tif_rawcc;
+
+                assert( n > 0 );
+                
+		/*
+		 * Avoid copy if client has setup raw
+		 * data buffer to avoid extra copy.
+		 */
+		if (tif->tif_rawcp != pp)
+			_TIFFmemcpy(tif->tif_rawcp, pp, n);
+		tif->tif_rawcp += n;
+		tif->tif_rawcc += n;
+		pp += n;
+		cc -= n;
+		if (tif->tif_rawcc >= tif->tif_rawdatasize &&
+		    !TIFFFlushData1(tif))
+			return (-1);
+	}
+	return (1);
+}
+
+/*
+ * Decode a hunk of pixels.
+ */
+static int
+DumpModeDecode(TIFF* tif, tidata_t buf, tsize_t cc, tsample_t s)
+{
+	(void) s;
+	if (tif->tif_rawcc < cc) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "DumpModeDecode: Not enough data for scanline %d",
+		    tif->tif_row);
+		return (0);
+	}
+	/*
+	 * Avoid copy if client has setup raw
+	 * data buffer to avoid extra copy.
+	 */
+	if (tif->tif_rawcp != buf)
+		_TIFFmemcpy(buf, tif->tif_rawcp, cc);
+	tif->tif_rawcp += cc;
+	tif->tif_rawcc -= cc;
+	return (1);
+}
+
+/*
+ * Seek forwards nrows in the current strip.
+ */
+static int
+DumpModeSeek(TIFF* tif, uint32 nrows)
+{
+	tif->tif_rawcp += nrows * tif->tif_scanlinesize;
+	tif->tif_rawcc -= nrows * tif->tif_scanlinesize;
+	return (1);
+}
+
+/*
+ * Initialize dump mode.
+ */
+int
+TIFFInitDumpMode(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	tif->tif_decoderow = DumpModeDecode;
+	tif->tif_decodestrip = DumpModeDecode;
+	tif->tif_decodetile = DumpModeDecode;
+	tif->tif_encoderow = DumpModeEncode;
+	tif->tif_encodestrip = DumpModeEncode;
+	tif->tif_encodetile = DumpModeEncode;
+	tif->tif_seek = DumpModeSeek;
+	return (1);
+}
diff --git a/src/libtiff/tif_error.c b/src/libtiff/tif_error.c
new file mode 100644
index 0000000..456e6f9
--- /dev/null
+++ b/src/libtiff/tif_error.c
@@ -0,0 +1,73 @@
+/* $Header: /cvsroot/imtoolkit/im/src/libtiff/tif_error.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ */
+#include "tiffiop.h"
+
+TIFFErrorHandlerExt _TIFFerrorHandlerExt = NULL;
+
+TIFFErrorHandler
+TIFFSetErrorHandler(TIFFErrorHandler handler)
+{
+	TIFFErrorHandler prev = _TIFFerrorHandler;
+	_TIFFerrorHandler = handler;
+	return (prev);
+}
+
+TIFFErrorHandlerExt
+TIFFSetErrorHandlerExt(TIFFErrorHandlerExt handler)
+{
+	TIFFErrorHandlerExt prev = _TIFFerrorHandlerExt;
+	_TIFFerrorHandlerExt = handler;
+	return (prev);
+}
+
+void
+TIFFError(const char* module, const char* fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	if (_TIFFerrorHandler)
+		(*_TIFFerrorHandler)(module, fmt, ap);
+	if (_TIFFerrorHandlerExt)
+		(*_TIFFerrorHandlerExt)(0, module, fmt, ap);
+	va_end(ap);
+}
+
+void
+TIFFErrorExt(thandle_t fd, const char* module, const char* fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	if (_TIFFerrorHandler)
+		(*_TIFFerrorHandler)(module, fmt, ap);
+	if (_TIFFerrorHandlerExt)
+		(*_TIFFerrorHandlerExt)(fd, module, fmt, ap);
+	va_end(ap);
+}
+
diff --git a/src/libtiff/tif_extension.c b/src/libtiff/tif_extension.c
new file mode 100644
index 0000000..0d32a07
--- /dev/null
+++ b/src/libtiff/tif_extension.c
@@ -0,0 +1,111 @@
+/* $Header: /cvsroot/imtoolkit/im/src/libtiff/tif_extension.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Various routines support external extension of the tag set, and other
+ * application extension capabilities. 
+ */
+
+#include "tiffiop.h"
+
+int TIFFGetTagListCount( TIFF *tif )
+
+{
+    TIFFDirectory* td = &tif->tif_dir;
+    
+    return td->td_customValueCount;
+}
+
+ttag_t TIFFGetTagListEntry( TIFF *tif, int tag_index )
+
+{
+    TIFFDirectory* td = &tif->tif_dir;
+
+    if( tag_index < 0 || tag_index >= td->td_customValueCount )
+        return (ttag_t) -1;
+    else
+        return td->td_customValues[tag_index].info->field_tag;
+}
+
+/*
+** This provides read/write access to the TIFFTagMethods within the TIFF
+** structure to application code without giving access to the private
+** TIFF structure.
+*/
+TIFFTagMethods *TIFFAccessTagMethods( TIFF *tif )
+
+{
+    return &(tif->tif_tagmethods);
+}
+
+void *TIFFGetClientInfo( TIFF *tif, const char *name )
+
+{
+    TIFFClientInfoLink *link = tif->tif_clientinfo;
+
+    while( link != NULL && strcmp(link->name,name) != 0 )
+        link = link->next;
+
+    if( link != NULL )
+        return link->data;
+    else
+        return NULL;
+}
+
+void TIFFSetClientInfo( TIFF *tif, void *data, const char *name )
+
+{
+    TIFFClientInfoLink *link = tif->tif_clientinfo;
+
+    /*
+    ** Do we have an existing link with this name?  If so, just
+    ** set it.
+    */
+    while( link != NULL && strcmp(link->name,name) != 0 )
+        link = link->next;
+
+    if( link != NULL )
+    {
+        link->data = data;
+        return;
+    }
+
+    /*
+    ** Create a new link.
+    */
+
+    link = (TIFFClientInfoLink *) _TIFFmalloc(sizeof(TIFFClientInfoLink));
+    assert (link != NULL);
+    link->next = tif->tif_clientinfo;
+    link->name = (char *) _TIFFmalloc(strlen(name)+1);
+    assert (link->name != NULL);
+    strcpy(link->name, name);
+    link->data = data;
+
+    tif->tif_clientinfo = link;
+}
diff --git a/src/libtiff/tif_fax3.c b/src/libtiff/tif_fax3.c
new file mode 100644
index 0000000..96f5d13
--- /dev/null
+++ b/src/libtiff/tif_fax3.c
@@ -0,0 +1,1566 @@
+/* $Id: tif_fax3.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1990-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef CCITT_SUPPORT
+/*
+ * TIFF Library.
+ *
+ * CCITT Group 3 (T.4) and Group 4 (T.6) Compression Support.
+ *
+ * This file contains support for decoding and encoding TIFF
+ * compression algorithms 2, 3, 4, and 32771.
+ *
+ * Decoder support is derived, with permission, from the code
+ * in Frank Cringle's viewfax program;
+ *      Copyright (C) 1990, 1995  Frank D. Cringle.
+ */
+#include "tif_fax3.h"
+#define	G3CODES
+#include "t4.h"
+#include <stdio.h>
+
+/*
+ * Compression+decompression state blocks are
+ * derived from this ``base state'' block.
+ */
+typedef struct {
+        int     rw_mode;                /* O_RDONLY for decode, else encode */
+	int	mode;			/* operating mode */
+	uint32	rowbytes;		/* bytes in a decoded scanline */
+	uint32	rowpixels;		/* pixels in a scanline */
+
+	uint16	cleanfaxdata;		/* CleanFaxData tag */
+	uint32	badfaxrun;		/* BadFaxRun tag */
+	uint32	badfaxlines;		/* BadFaxLines tag */
+	uint32	groupoptions;		/* Group 3/4 options tag */
+	uint32	recvparams;		/* encoded Class 2 session params */
+	char*	subaddress;		/* subaddress string */
+	uint32	recvtime;		/* time spent receiving (secs) */
+	char*	faxdcs;			/* Table 2/T.30 encoded session params */
+	TIFFVGetMethod vgetparent;	/* super-class method */
+	TIFFVSetMethod vsetparent;	/* super-class method */
+} Fax3BaseState;
+#define	Fax3State(tif)		((Fax3BaseState*) (tif)->tif_data)
+
+typedef enum { G3_1D, G3_2D } Ttag;
+typedef struct {
+	Fax3BaseState b;
+
+	/* Decoder state info */
+	const unsigned char* bitmap;	/* bit reversal table */
+	uint32	data;			/* current i/o byte/word */
+	int	bit;			/* current i/o bit in byte */
+	int	EOLcnt;			/* count of EOL codes recognized */
+	TIFFFaxFillFunc fill;		/* fill routine */
+	uint32*	runs;			/* b&w runs for current/previous row */
+	uint32*	refruns;		/* runs for reference line */
+	uint32*	curruns;		/* runs for current line */
+
+	/* Encoder state info */
+	Ttag    tag;			/* encoding state */
+	unsigned char*	refline;	/* reference line for 2d decoding */
+	int	k;			/* #rows left that can be 2d encoded */
+	int	maxk;			/* max #rows that can be 2d encoded */
+} Fax3CodecState;
+#define	DecoderState(tif)	((Fax3CodecState*) Fax3State(tif))
+#define	EncoderState(tif)	((Fax3CodecState*) Fax3State(tif))
+
+#define	is2DEncoding(sp) \
+	(sp->b.groupoptions & GROUP3OPT_2DENCODING)
+#define	isAligned(p,t)	((((unsigned long)(p)) & (sizeof (t)-1)) == 0)
+
+/*
+ * Group 3 and Group 4 Decoding.
+ */
+
+/*
+ * These macros glue the TIFF library state to
+ * the state expected by Frank's decoder.
+ */
+#define	DECLARE_STATE(tif, sp, mod)					\
+    static const char module[] = mod;					\
+    Fax3CodecState* sp = DecoderState(tif);				\
+    int a0;				/* reference element */		\
+    int lastx = sp->b.rowpixels;	/* last element in row */	\
+    uint32 BitAcc;			/* bit accumulator */		\
+    int BitsAvail;			/* # valid bits in BitAcc */	\
+    int RunLength;			/* length of current run */	\
+    unsigned char* cp;			/* next byte of input data */	\
+    unsigned char* ep;			/* end of input data */		\
+    uint32* pa;				/* place to stuff next run */	\
+    uint32* thisrun;			/* current row's run array */	\
+    int EOLcnt;				/* # EOL codes recognized */	\
+    const unsigned char* bitmap = sp->bitmap;	/* input data bit reverser */	\
+    const TIFFFaxTabEnt* TabEnt
+#define	DECLARE_STATE_2D(tif, sp, mod)					\
+    DECLARE_STATE(tif, sp, mod);					\
+    int b1;				/* next change on prev line */	\
+    uint32* pb				/* next run in reference line */\
+/*
+ * Load any state that may be changed during decoding.
+ */
+#define	CACHE_STATE(tif, sp) do {					\
+    BitAcc = sp->data;							\
+    BitsAvail = sp->bit;						\
+    EOLcnt = sp->EOLcnt;						\
+    cp = (unsigned char*) tif->tif_rawcp;				\
+    ep = cp + tif->tif_rawcc;						\
+} while (0)
+/*
+ * Save state possibly changed during decoding.
+ */
+#define	UNCACHE_STATE(tif, sp) do {					\
+    sp->bit = BitsAvail;						\
+    sp->data = BitAcc;							\
+    sp->EOLcnt = EOLcnt;						\
+    tif->tif_rawcc -= (tidata_t) cp - tif->tif_rawcp;			\
+    tif->tif_rawcp = (tidata_t) cp;					\
+} while (0)
+
+/*
+ * Setup state for decoding a strip.
+ */
+static int
+Fax3PreDecode(TIFF* tif, tsample_t s)
+{
+	Fax3CodecState* sp = DecoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->bit = 0;			/* force initial read */
+	sp->data = 0;
+	sp->EOLcnt = 0;			/* force initial scan for EOL */
+	/*
+	 * Decoder assumes lsb-to-msb bit order.  Note that we select
+	 * this here rather than in Fax3SetupState so that viewers can
+	 * hold the image open, fiddle with the FillOrder tag value,
+	 * and then re-decode the image.  Otherwise they'd need to close
+	 * and open the image to get the state reset.
+	 */
+	sp->bitmap =
+	    TIFFGetBitRevTable(tif->tif_dir.td_fillorder != FILLORDER_LSB2MSB);
+	if (sp->refruns) {		/* init reference line to white */
+		sp->refruns[0] = (uint32) sp->b.rowpixels;
+		sp->refruns[1] = 0;
+	}
+	return (1);
+}
+
+/*
+ * Routine for handling various errors/conditions.
+ * Note how they are "glued into the decoder" by
+ * overriding the definitions used by the decoder.
+ */
+
+static void
+Fax3Unexpected(const char* module, TIFF* tif, uint32 line, uint32 a0)
+{
+	TIFFErrorExt(tif->tif_clientdata, module, "%s: Bad code word at line %lu of %s %lu (x %lu)",
+		tif->tif_name, (unsigned long) line, isTiled(tif) ? "tile" : "strip",
+	   (unsigned long) (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip),
+	   (unsigned long) a0);
+}
+#define	unexpected(table, a0)	Fax3Unexpected(module, tif, line, a0)
+
+static void
+Fax3Extension(const char* module, TIFF* tif, uint32 line, uint32 a0)
+{
+	TIFFErrorExt(tif->tif_clientdata, module,
+	    "%s: Uncompressed data (not supported) at line %lu of %s %lu (x %lu)",
+	    tif->tif_name, (unsigned long) line, isTiled(tif) ? "tile" : "strip",
+       (unsigned long) (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip),
+       (unsigned long) a0);
+}
+#define	extension(a0)	Fax3Extension(module, tif, line, a0)
+
+static void
+Fax3BadLength(const char* module, TIFF* tif, uint32 line, uint32 a0, uint32 lastx)
+{
+	TIFFWarningExt(tif->tif_clientdata, module, "%s: %s at line %lu of %s %lu (got %lu, expected %lu)",
+	    tif->tif_name,
+	    a0 < lastx ? "Premature EOL" : "Line length mismatch",
+	    (unsigned long) line, isTiled(tif) ? "tile" : "strip",
+        (unsigned long) (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip),
+        (unsigned long) a0, lastx);
+}
+#define	badlength(a0,lastx)	Fax3BadLength(module, tif, line, a0, lastx)
+
+static void
+Fax3PrematureEOF(const char* module, TIFF* tif, uint32 line, uint32 a0)
+{
+	TIFFWarningExt(tif->tif_clientdata, module, "%s: Premature EOF at line %lu of %s %lu (x %lu)",
+	    tif->tif_name,
+	    (unsigned long) line, isTiled(tif) ? "tile" : "strip",
+        (unsigned long) (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip),
+        (unsigned long) a0);
+}
+#define	prematureEOF(a0)	Fax3PrematureEOF(module, tif, line, a0)
+
+#define	Nop
+
+/*
+ * Decode the requested amount of G3 1D-encoded data.
+ */
+static int
+Fax3Decode1D(TIFF* tif, tidata_t buf, tsize_t occ, tsample_t s)
+{
+	DECLARE_STATE(tif, sp, "Fax3Decode1D");
+        int line = 0;
+
+	(void) s;
+	CACHE_STATE(tif, sp);
+	thisrun = sp->curruns;
+	while ((long)occ > 0) {
+		a0 = 0;
+		RunLength = 0;
+		pa = thisrun;
+#ifdef FAX3_DEBUG
+		printf("\nBitAcc=%08X, BitsAvail = %d\n", BitAcc, BitsAvail);
+		printf("-------------------- %d\n", tif->tif_row);
+		fflush(stdout);
+#endif
+		SYNC_EOL(EOF1D);
+		EXPAND1D(EOF1Da);
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		buf += sp->b.rowbytes;
+		occ -= sp->b.rowbytes;
+                line++;
+		continue;
+	EOF1D:				/* premature EOF */
+		CLEANUP_RUNS();
+	EOF1Da:				/* premature EOF */
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		UNCACHE_STATE(tif, sp);
+		return (-1);
+	}
+	UNCACHE_STATE(tif, sp);
+	return (1);
+}
+
+#define	SWAP(t,a,b)	{ t x; x = (a); (a) = (b); (b) = x; }
+/*
+ * Decode the requested amount of G3 2D-encoded data.
+ */
+static int
+Fax3Decode2D(TIFF* tif, tidata_t buf, tsize_t occ, tsample_t s)
+{
+	DECLARE_STATE_2D(tif, sp, "Fax3Decode2D");
+        int line = 0;
+	int is1D;			/* current line is 1d/2d-encoded */
+
+	(void) s;
+	CACHE_STATE(tif, sp);
+	while ((long)occ > 0) {
+		a0 = 0;
+		RunLength = 0;
+		pa = thisrun = sp->curruns;
+#ifdef FAX3_DEBUG
+		printf("\nBitAcc=%08X, BitsAvail = %d EOLcnt = %d",
+		    BitAcc, BitsAvail, EOLcnt);
+#endif
+		SYNC_EOL(EOF2D);
+		NeedBits8(1, EOF2D);
+		is1D = GetBits(1);	/* 1D/2D-encoding tag bit */
+		ClrBits(1);
+#ifdef FAX3_DEBUG
+		printf(" %s\n-------------------- %d\n",
+		    is1D ? "1D" : "2D", tif->tif_row);
+		fflush(stdout);
+#endif
+		pb = sp->refruns;
+		b1 = *pb++;
+		if (is1D)
+			EXPAND1D(EOF2Da);
+		else
+			EXPAND2D(EOF2Da);
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		SETVALUE(0);		/* imaginary change for reference */
+		SWAP(uint32*, sp->curruns, sp->refruns);
+		buf += sp->b.rowbytes;
+		occ -= sp->b.rowbytes;
+                line++;
+		continue;
+	EOF2D:				/* premature EOF */
+		CLEANUP_RUNS();
+	EOF2Da:				/* premature EOF */
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		UNCACHE_STATE(tif, sp);
+		return (-1);
+	}
+	UNCACHE_STATE(tif, sp);
+	return (1);
+}
+#undef SWAP
+
+/*
+ * The ZERO & FILL macros must handle spans < 2*sizeof(long) bytes.
+ * For machines with 64-bit longs this is <16 bytes; otherwise
+ * this is <8 bytes.  We optimize the code here to reflect the
+ * machine characteristics.
+ */
+#if SIZEOF_LONG == 8
+# define FILL(n, cp)							    \
+    switch (n) {							    \
+    case 15:(cp)[14] = 0xff; case 14:(cp)[13] = 0xff; case 13: (cp)[12] = 0xff;\
+    case 12:(cp)[11] = 0xff; case 11:(cp)[10] = 0xff; case 10: (cp)[9] = 0xff;\
+    case  9: (cp)[8] = 0xff; case  8: (cp)[7] = 0xff; case  7: (cp)[6] = 0xff;\
+    case  6: (cp)[5] = 0xff; case  5: (cp)[4] = 0xff; case  4: (cp)[3] = 0xff;\
+    case  3: (cp)[2] = 0xff; case  2: (cp)[1] = 0xff;			      \
+    case  1: (cp)[0] = 0xff; (cp) += (n); case 0:  ;			      \
+    }
+# define ZERO(n, cp)							\
+    switch (n) {							\
+    case 15:(cp)[14] = 0; case 14:(cp)[13] = 0; case 13: (cp)[12] = 0;	\
+    case 12:(cp)[11] = 0; case 11:(cp)[10] = 0; case 10: (cp)[9] = 0;	\
+    case  9: (cp)[8] = 0; case  8: (cp)[7] = 0; case  7: (cp)[6] = 0;	\
+    case  6: (cp)[5] = 0; case  5: (cp)[4] = 0; case  4: (cp)[3] = 0;	\
+    case  3: (cp)[2] = 0; case  2: (cp)[1] = 0;				\
+    case  1: (cp)[0] = 0; (cp) += (n); case 0:  ;			\
+    }
+#else
+# define FILL(n, cp)							    \
+    switch (n) {							    \
+    case 7: (cp)[6] = 0xff; case 6: (cp)[5] = 0xff; case 5: (cp)[4] = 0xff; \
+    case 4: (cp)[3] = 0xff; case 3: (cp)[2] = 0xff; case 2: (cp)[1] = 0xff; \
+    case 1: (cp)[0] = 0xff; (cp) += (n); case 0:  ;			    \
+    }
+# define ZERO(n, cp)							\
+    switch (n) {							\
+    case 7: (cp)[6] = 0; case 6: (cp)[5] = 0; case 5: (cp)[4] = 0;	\
+    case 4: (cp)[3] = 0; case 3: (cp)[2] = 0; case 2: (cp)[1] = 0;	\
+    case 1: (cp)[0] = 0; (cp) += (n); case 0:  ;			\
+    }
+#endif
+
+/*
+ * Bit-fill a row according to the white/black
+ * runs generated during G3/G4 decoding.
+ */
+void
+_TIFFFax3fillruns(unsigned char* buf, uint32* runs, uint32* erun, uint32 lastx)
+{
+	static const unsigned char _fillmasks[] =
+	    { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff };
+	unsigned char* cp;
+	uint32 x, bx, run;
+	int32 n, nw;
+	long* lp;
+
+	if ((erun-runs)&1)
+	    *erun++ = 0;
+	x = 0;
+	for (; runs < erun; runs += 2) {
+	    run = runs[0];
+	    if (x+run > lastx || run > lastx )
+		run = runs[0] = (uint32) (lastx - x);
+	    if (run) {
+		cp = buf + (x>>3);
+		bx = x&7;
+		if (run > 8-bx) {
+		    if (bx) {			/* align to byte boundary */
+			*cp++ &= 0xff << (8-bx);
+			run -= 8-bx;
+		    }
+		    if( (n = run >> 3) != 0 ) {	/* multiple bytes to fill */
+			if ((n/sizeof (long)) > 1) {
+			    /*
+			     * Align to longword boundary and fill.
+			     */
+			    for (; n && !isAligned(cp, long); n--)
+				    *cp++ = 0x00;
+			    lp = (long*) cp;
+			    nw = (int32)(n / sizeof (long));
+			    n -= nw * sizeof (long);
+			    do {
+				    *lp++ = 0L;
+			    } while (--nw);
+			    cp = (unsigned char*) lp;
+			}
+			ZERO(n, cp);
+			run &= 7;
+		    }
+		    if (run)
+			cp[0] &= 0xff >> run;
+		} else
+		    cp[0] &= ~(_fillmasks[run]>>bx);
+		x += runs[0];
+	    }
+	    run = runs[1];
+	    if (x+run > lastx || run > lastx )
+		run = runs[1] = lastx - x;
+	    if (run) {
+		cp = buf + (x>>3);
+		bx = x&7;
+		if (run > 8-bx) {
+		    if (bx) {			/* align to byte boundary */
+			*cp++ |= 0xff >> bx;
+			run -= 8-bx;
+		    }
+		    if( (n = run>>3) != 0 ) {	/* multiple bytes to fill */
+			if ((n/sizeof (long)) > 1) {
+			    /*
+			     * Align to longword boundary and fill.
+			     */
+			    for (; n && !isAligned(cp, long); n--)
+				*cp++ = 0xff;
+			    lp = (long*) cp;
+			    nw = (int32)(n / sizeof (long));
+			    n -= nw * sizeof (long);
+			    do {
+				*lp++ = -1L;
+			    } while (--nw);
+			    cp = (unsigned char*) lp;
+			}
+			FILL(n, cp);
+			run &= 7;
+		    }
+		    if (run)
+			cp[0] |= 0xff00 >> run;
+		} else
+		    cp[0] |= _fillmasks[run]>>bx;
+		x += runs[1];
+	    }
+	}
+	assert(x == lastx);
+}
+#undef	ZERO
+#undef	FILL
+
+/*
+ * Setup G3/G4-related compression/decompression state
+ * before data is processed.  This routine is called once
+ * per image -- it sets up different state based on whether
+ * or not decoding or encoding is being done and whether
+ * 1D- or 2D-encoded data is involved.
+ */
+static int
+Fax3SetupState(TIFF* tif)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+	Fax3BaseState* sp = Fax3State(tif);
+	int needsRefLine;
+	Fax3CodecState* dsp = (Fax3CodecState*) Fax3State(tif);
+	uint32 rowbytes, rowpixels, nruns;
+
+	if (td->td_bitspersample != 1) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "Bits/sample must be 1 for Group 3/4 encoding/decoding");
+		return (0);
+	}
+	/*
+	 * Calculate the scanline/tile widths.
+	 */
+	if (isTiled(tif)) {
+		rowbytes = TIFFTileRowSize(tif);
+		rowpixels = td->td_tilewidth;
+	} else {
+		rowbytes = TIFFScanlineSize(tif);
+		rowpixels = td->td_imagewidth;
+	}
+	sp->rowbytes = (uint32) rowbytes;
+	sp->rowpixels = (uint32) rowpixels;
+	/*
+	 * Allocate any additional space required for decoding/encoding.
+	 */
+	needsRefLine = (
+	    (sp->groupoptions & GROUP3OPT_2DENCODING) ||
+	    td->td_compression == COMPRESSION_CCITTFAX4
+	);
+
+	nruns = needsRefLine ? 2*TIFFroundup(rowpixels,32) : rowpixels;
+
+	dsp->runs = (uint32*) _TIFFCheckMalloc(tif, 2*nruns+3, sizeof (uint32),
+					  "for Group 3/4 run arrays");
+	if (dsp->runs == NULL)
+		return (0);
+	dsp->curruns = dsp->runs;
+	if (needsRefLine)
+		dsp->refruns = dsp->runs + (nruns>>1);
+	else
+		dsp->refruns = NULL;
+	if (td->td_compression == COMPRESSION_CCITTFAX3
+	    && is2DEncoding(dsp)) {	/* NB: default is 1D routine */
+		tif->tif_decoderow = Fax3Decode2D;
+		tif->tif_decodestrip = Fax3Decode2D;
+		tif->tif_decodetile = Fax3Decode2D;
+	}
+
+	if (needsRefLine) {		/* 2d encoding */
+		Fax3CodecState* esp = EncoderState(tif);
+		/*
+		 * 2d encoding requires a scanline
+		 * buffer for the ``reference line''; the
+		 * scanline against which delta encoding
+		 * is referenced.  The reference line must
+		 * be initialized to be ``white'' (done elsewhere).
+		 */
+		esp->refline = (unsigned char*) _TIFFmalloc(rowbytes);
+		if (esp->refline == NULL) {
+			TIFFErrorExt(tif->tif_clientdata, "Fax3SetupState",
+			    "%s: No space for Group 3/4 reference line",
+			    tif->tif_name);
+			return (0);
+		}
+	} else					/* 1d encoding */
+		EncoderState(tif)->refline = NULL;
+
+	return (1);
+}
+
+/*
+ * CCITT Group 3 FAX Encoding.
+ */
+
+#define	Fax3FlushBits(tif, sp) {				\
+	if ((tif)->tif_rawcc >= (tif)->tif_rawdatasize)		\
+		(void) TIFFFlushData1(tif);			\
+	*(tif)->tif_rawcp++ = (tidataval_t) (sp)->data;		\
+	(tif)->tif_rawcc++;					\
+	(sp)->data = 0, (sp)->bit = 8;				\
+}
+#define	_FlushBits(tif) {					\
+	if ((tif)->tif_rawcc >= (tif)->tif_rawdatasize)		\
+		(void) TIFFFlushData1(tif);			\
+	*(tif)->tif_rawcp++ = (tidataval_t) data;		\
+	(tif)->tif_rawcc++;					\
+	data = 0, bit = 8;					\
+}
+static const int _msbmask[9] =
+    { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
+#define	_PutBits(tif, bits, length) {				\
+	while (length > bit) {					\
+		data |= bits >> (length - bit);			\
+		length -= bit;					\
+		_FlushBits(tif);				\
+	}							\
+	data |= (bits & _msbmask[length]) << (bit - length);	\
+	bit -= length;						\
+	if (bit == 0)						\
+		_FlushBits(tif);				\
+}
+	
+/*
+ * Write a variable-length bit-value to
+ * the output stream.  Values are
+ * assumed to be at most 16 bits.
+ */
+static void
+Fax3PutBits(TIFF* tif, unsigned int bits, unsigned int length)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+	unsigned int bit = sp->bit;
+	int data = sp->data;
+
+	_PutBits(tif, bits, length);
+
+	sp->data = data;
+	sp->bit = bit;
+}
+
+/*
+ * Write a code to the output stream.
+ */
+#define putcode(tif, te)	Fax3PutBits(tif, (te)->code, (te)->length)
+
+#ifdef FAX3_DEBUG
+#define	DEBUG_COLOR(w) (tab == TIFFFaxWhiteCodes ? w "W" : w "B")
+#define	DEBUG_PRINT(what,len) {						\
+    int t;								\
+    printf("%08X/%-2d: %s%5d\t", data, bit, DEBUG_COLOR(what), len);	\
+    for (t = length-1; t >= 0; t--)					\
+	putchar(code & (1<<t) ? '1' : '0');				\
+    putchar('\n');							\
+}
+#endif
+
+/*
+ * Write the sequence of codes that describes
+ * the specified span of zero's or one's.  The
+ * appropriate table that holds the make-up and
+ * terminating codes is supplied.
+ */
+static void
+putspan(TIFF* tif, int32 span, const tableentry* tab)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+	unsigned int bit = sp->bit;
+	int data = sp->data;
+	unsigned int code, length;
+
+	while (span >= 2624) {
+		const tableentry* te = &tab[63 + (2560>>6)];
+		code = te->code, length = te->length;
+#ifdef FAX3_DEBUG
+		DEBUG_PRINT("MakeUp", te->runlen);
+#endif
+		_PutBits(tif, code, length);
+		span -= te->runlen;
+	}
+	if (span >= 64) {
+		const tableentry* te = &tab[63 + (span>>6)];
+		assert(te->runlen == 64*(span>>6));
+		code = te->code, length = te->length;
+#ifdef FAX3_DEBUG
+		DEBUG_PRINT("MakeUp", te->runlen);
+#endif
+		_PutBits(tif, code, length);
+		span -= te->runlen;
+	}
+	code = tab[span].code, length = tab[span].length;
+#ifdef FAX3_DEBUG
+	DEBUG_PRINT("  Term", tab[span].runlen);
+#endif
+	_PutBits(tif, code, length);
+
+	sp->data = data;
+	sp->bit = bit;
+}
+
+/*
+ * Write an EOL code to the output stream.  The zero-fill
+ * logic for byte-aligning encoded scanlines is handled
+ * here.  We also handle writing the tag bit for the next
+ * scanline when doing 2d encoding.
+ */
+static void
+Fax3PutEOL(TIFF* tif)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+	unsigned int bit = sp->bit;
+	int data = sp->data;
+	unsigned int code, length, tparm;
+
+	if (sp->b.groupoptions & GROUP3OPT_FILLBITS) {
+		/*
+		 * Force bit alignment so EOL will terminate on
+		 * a byte boundary.  That is, force the bit alignment
+		 * to 16-12 = 4 before putting out the EOL code.
+		 */
+		int align = 8 - 4;
+		if (align != sp->bit) {
+			if (align > sp->bit)
+				align = sp->bit + (8 - align);
+			else
+				align = sp->bit - align;
+			code = 0;
+			tparm=align; 
+			_PutBits(tif, 0, tparm);
+		}
+	}
+	code = EOL, length = 12;
+	if (is2DEncoding(sp))
+		code = (code<<1) | (sp->tag == G3_1D), length++;
+	_PutBits(tif, code, length);
+
+	sp->data = data;
+	sp->bit = bit;
+}
+
+/*
+ * Reset encoding state at the start of a strip.
+ */
+static int
+Fax3PreEncode(TIFF* tif, tsample_t s)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->bit = 8;
+	sp->data = 0;
+	sp->tag = G3_1D;
+	/*
+	 * This is necessary for Group 4; otherwise it isn't
+	 * needed because the first scanline of each strip ends
+	 * up being copied into the refline.
+	 */
+	if (sp->refline)
+		_TIFFmemset(sp->refline, 0x00, sp->b.rowbytes);
+	if (is2DEncoding(sp)) {
+		float res = tif->tif_dir.td_yresolution;
+		/*
+		 * The CCITT spec says that when doing 2d encoding, you
+		 * should only do it on K consecutive scanlines, where K
+		 * depends on the resolution of the image being encoded
+		 * (2 for <= 200 lpi, 4 for > 200 lpi).  Since the directory
+		 * code initializes td_yresolution to 0, this code will
+		 * select a K of 2 unless the YResolution tag is set
+		 * appropriately.  (Note also that we fudge a little here
+		 * and use 150 lpi to avoid problems with units conversion.)
+		 */
+		if (tif->tif_dir.td_resolutionunit == RESUNIT_CENTIMETER)
+			res *= 2.54f;		/* convert to inches */
+		sp->maxk = (res > 150 ? 4 : 2);
+		sp->k = sp->maxk-1;
+	} else
+		sp->k = sp->maxk = 0;
+	return (1);
+}
+
+static const unsigned char zeroruns[256] = {
+    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,	/* 0x00 - 0x0f */
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,	/* 0x10 - 0x1f */
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	/* 0x20 - 0x2f */
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	/* 0x30 - 0x3f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x4f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x5f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x6f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x7f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x80 - 0x8f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x90 - 0x9f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0xa0 - 0xaf */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0xb0 - 0xbf */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0xc0 - 0xcf */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0xd0 - 0xdf */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0xe0 - 0xef */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0xf0 - 0xff */
+};
+static const unsigned char oneruns[256] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x0f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x1f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x20 - 0x2f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x30 - 0x3f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x40 - 0x4f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x50 - 0x5f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x60 - 0x6f */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0x70 - 0x7f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0x80 - 0x8f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0x90 - 0x9f */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0xa0 - 0xaf */
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0xb0 - 0xbf */
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	/* 0xc0 - 0xcf */
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,	/* 0xd0 - 0xdf */
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,	/* 0xe0 - 0xef */
+    4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8,	/* 0xf0 - 0xff */
+};
+
+/*
+ * On certain systems it pays to inline
+ * the routines that find pixel spans.
+ */
+#ifdef VAXC
+static	int32 find0span(unsigned char*, int32, int32);
+static	int32 find1span(unsigned char*, int32, int32);
+#pragma inline(find0span,find1span)
+#endif
+
+/*
+ * Find a span of ones or zeros using the supplied
+ * table.  The ``base'' of the bit string is supplied
+ * along with the start+end bit indices.
+ */
+inline static int32
+find0span(unsigned char* bp, int32 bs, int32 be)
+{
+	int32 bits = be - bs;
+	int32 n, span;
+
+	bp += bs>>3;
+	/*
+	 * Check partial byte on lhs.
+	 */
+	if (bits > 0 && (n = (bs & 7))) {
+		span = zeroruns[(*bp << n) & 0xff];
+		if (span > 8-n)		/* table value too generous */
+			span = 8-n;
+		if (span > bits)	/* constrain span to bit range */
+			span = bits;
+		if (n+span < 8)		/* doesn't extend to edge of byte */
+			return (span);
+		bits -= span;
+		bp++;
+	} else
+		span = 0;
+	if (bits >= (int32)(2 * 8 * sizeof(long))) {
+		long* lp;
+		/*
+		 * Align to longword boundary and check longwords.
+		 */
+		while (!isAligned(bp, long)) {
+			if (*bp != 0x00)
+				return (span + zeroruns[*bp]);
+			span += 8, bits -= 8;
+			bp++;
+		}
+		lp = (long*) bp;
+		while ((bits >= (int32)(8 * sizeof(long))) && (0 == *lp)) {
+			span += 8*sizeof (long), bits -= 8*sizeof (long);
+			lp++;
+		}
+		bp = (unsigned char*) lp;
+	}
+	/*
+	 * Scan full bytes for all 0's.
+	 */
+	while (bits >= 8) {
+		if (*bp != 0x00)	/* end of run */
+			return (span + zeroruns[*bp]);
+		span += 8, bits -= 8;
+		bp++;
+	}
+	/*
+	 * Check partial byte on rhs.
+	 */
+	if (bits > 0) {
+		n = zeroruns[*bp];
+		span += (n > bits ? bits : n);
+	}
+	return (span);
+}
+
+inline static int32
+find1span(unsigned char* bp, int32 bs, int32 be)
+{
+	int32 bits = be - bs;
+	int32 n, span;
+
+	bp += bs>>3;
+	/*
+	 * Check partial byte on lhs.
+	 */
+	if (bits > 0 && (n = (bs & 7))) {
+		span = oneruns[(*bp << n) & 0xff];
+		if (span > 8-n)		/* table value too generous */
+			span = 8-n;
+		if (span > bits)	/* constrain span to bit range */
+			span = bits;
+		if (n+span < 8)		/* doesn't extend to edge of byte */
+			return (span);
+		bits -= span;
+		bp++;
+	} else
+		span = 0;
+	if (bits >= (int32)(2 * 8 * sizeof(long))) {
+		long* lp;
+		/*
+		 * Align to longword boundary and check longwords.
+		 */
+		while (!isAligned(bp, long)) {
+			if (*bp != 0xff)
+				return (span + oneruns[*bp]);
+			span += 8, bits -= 8;
+			bp++;
+		}
+		lp = (long*) bp;
+		while ((bits >= (int32)(8 * sizeof(long))) && (~0 == *lp)) {
+			span += 8*sizeof (long), bits -= 8*sizeof (long);
+			lp++;
+		}
+		bp = (unsigned char*) lp;
+	}
+	/*
+	 * Scan full bytes for all 1's.
+	 */
+	while (bits >= 8) {
+		if (*bp != 0xff)	/* end of run */
+			return (span + oneruns[*bp]);
+		span += 8, bits -= 8;
+		bp++;
+	}
+	/*
+	 * Check partial byte on rhs.
+	 */
+	if (bits > 0) {
+		n = oneruns[*bp];
+		span += (n > bits ? bits : n);
+	}
+	return (span);
+}
+
+/*
+ * Return the offset of the next bit in the range
+ * [bs..be] that is different from the specified
+ * color.  The end, be, is returned if no such bit
+ * exists.
+ */
+#define	finddiff(_cp, _bs, _be, _color)	\
+	(_bs + (_color ? find1span(_cp,_bs,_be) : find0span(_cp,_bs,_be)))
+/*
+ * Like finddiff, but also check the starting bit
+ * against the end in case start > end.
+ */
+#define	finddiff2(_cp, _bs, _be, _color) \
+	(_bs < _be ? finddiff(_cp,_bs,_be,_color) : _be)
+
+/*
+ * 1d-encode a row of pixels.  The encoding is
+ * a sequence of all-white or all-black spans
+ * of pixels encoded with Huffman codes.
+ */
+static int
+Fax3Encode1DRow(TIFF* tif, unsigned char* bp, uint32 bits)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+	int32 span;
+        uint32 bs = 0;
+
+	for (;;) {
+		span = find0span(bp, bs, bits);		/* white span */
+		putspan(tif, span, TIFFFaxWhiteCodes);
+		bs += span;
+		if (bs >= bits)
+			break;
+		span = find1span(bp, bs, bits);		/* black span */
+		putspan(tif, span, TIFFFaxBlackCodes);
+		bs += span;
+		if (bs >= bits)
+			break;
+	}
+	if (sp->b.mode & (FAXMODE_BYTEALIGN|FAXMODE_WORDALIGN)) {
+		if (sp->bit != 8)			/* byte-align */
+			Fax3FlushBits(tif, sp);
+		if ((sp->b.mode&FAXMODE_WORDALIGN) &&
+		    !isAligned(tif->tif_rawcp, uint16))
+			Fax3FlushBits(tif, sp);
+	}
+	return (1);
+}
+
+static const tableentry horizcode =
+    { 3, 0x1, 0 };	/* 001 */
+static const tableentry passcode =
+    { 4, 0x1, 0 };	/* 0001 */
+static const tableentry vcodes[7] = {
+    { 7, 0x03, 0 },	/* 0000 011 */
+    { 6, 0x03, 0 },	/* 0000 11 */
+    { 3, 0x03, 0 },	/* 011 */
+    { 1, 0x1, 0 },	/* 1 */
+    { 3, 0x2, 0 },	/* 010 */
+    { 6, 0x02, 0 },	/* 0000 10 */
+    { 7, 0x02, 0 }	/* 0000 010 */
+};
+
+/*
+ * 2d-encode a row of pixels.  Consult the CCITT
+ * documentation for the algorithm.
+ */
+static int
+Fax3Encode2DRow(TIFF* tif, unsigned char* bp, unsigned char* rp, uint32 bits)
+{
+#define	PIXEL(buf,ix)	((((buf)[(ix)>>3]) >> (7-((ix)&7))) & 1)
+        uint32 a0 = 0;
+	uint32 a1 = (PIXEL(bp, 0) != 0 ? 0 : finddiff(bp, 0, bits, 0));
+	uint32 b1 = (PIXEL(rp, 0) != 0 ? 0 : finddiff(rp, 0, bits, 0));
+	uint32 a2, b2;
+
+	for (;;) {
+		b2 = finddiff2(rp, b1, bits, PIXEL(rp,b1));
+		if (b2 >= a1) {
+			int32 d = b1 - a1;
+			if (!(-3 <= d && d <= 3)) {	/* horizontal mode */
+				a2 = finddiff2(bp, a1, bits, PIXEL(bp,a1));
+				putcode(tif, &horizcode);
+				if (a0+a1 == 0 || PIXEL(bp, a0) == 0) {
+					putspan(tif, a1-a0, TIFFFaxWhiteCodes);
+					putspan(tif, a2-a1, TIFFFaxBlackCodes);
+				} else {
+					putspan(tif, a1-a0, TIFFFaxBlackCodes);
+					putspan(tif, a2-a1, TIFFFaxWhiteCodes);
+				}
+				a0 = a2;
+			} else {			/* vertical mode */
+				putcode(tif, &vcodes[d+3]);
+				a0 = a1;
+			}
+		} else {				/* pass mode */
+			putcode(tif, &passcode);
+			a0 = b2;
+		}
+		if (a0 >= bits)
+			break;
+		a1 = finddiff(bp, a0, bits, PIXEL(bp,a0));
+		b1 = finddiff(rp, a0, bits, !PIXEL(bp,a0));
+		b1 = finddiff(rp, b1, bits, PIXEL(bp,a0));
+	}
+	return (1);
+#undef PIXEL
+}
+
+/*
+ * Encode a buffer of pixels.
+ */
+static int
+Fax3Encode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+
+	(void) s;
+	while ((long)cc > 0) {
+		if ((sp->b.mode & FAXMODE_NOEOL) == 0)
+			Fax3PutEOL(tif);
+		if (is2DEncoding(sp)) {
+			if (sp->tag == G3_1D) {
+				if (!Fax3Encode1DRow(tif, bp, sp->b.rowpixels))
+					return (0);
+				sp->tag = G3_2D;
+			} else {
+				if (!Fax3Encode2DRow(tif, bp, sp->refline,
+                                                     sp->b.rowpixels))
+					return (0);
+				sp->k--;
+			}
+			if (sp->k == 0) {
+				sp->tag = G3_1D;
+				sp->k = sp->maxk-1;
+			} else
+				_TIFFmemcpy(sp->refline, bp, sp->b.rowbytes);
+		} else {
+			if (!Fax3Encode1DRow(tif, bp, sp->b.rowpixels))
+				return (0);
+		}
+		bp += sp->b.rowbytes;
+		cc -= sp->b.rowbytes;
+	}
+	return (1);
+}
+
+static int
+Fax3PostEncode(TIFF* tif)
+{
+	Fax3CodecState* sp = EncoderState(tif);
+
+	if (sp->bit != 8)
+		Fax3FlushBits(tif, sp);
+	return (1);
+}
+
+static void
+Fax3Close(TIFF* tif)
+{
+	if ((Fax3State(tif)->mode & FAXMODE_NORTC) == 0) {
+		Fax3CodecState* sp = EncoderState(tif);
+		unsigned int code = EOL;
+		unsigned int length = 12;
+		int i;
+
+		if (is2DEncoding(sp))
+			code = (code<<1) | (sp->tag == G3_1D), length++;
+		for (i = 0; i < 6; i++)
+			Fax3PutBits(tif, code, length);
+		Fax3FlushBits(tif, sp);
+	}
+}
+
+static void
+Fax3Cleanup(TIFF* tif)
+{
+	Fax3CodecState* sp = DecoderState(tif);
+	
+	assert(sp != 0);
+
+	tif->tif_tagmethods.vgetfield = sp->b.vgetparent;
+	tif->tif_tagmethods.vsetfield = sp->b.vsetparent;
+
+	if (sp->runs)
+		_TIFFfree(sp->runs);
+	if (sp->refline)
+		_TIFFfree(sp->refline);
+
+	if (Fax3State(tif)->subaddress)
+		_TIFFfree(Fax3State(tif)->subaddress);
+	_TIFFfree(tif->tif_data);
+	tif->tif_data = NULL;
+
+	_TIFFSetDefaultCompressionState(tif);
+}
+
+#define	FIELD_BADFAXLINES	(FIELD_CODEC+0)
+#define	FIELD_CLEANFAXDATA	(FIELD_CODEC+1)
+#define	FIELD_BADFAXRUN		(FIELD_CODEC+2)
+#define	FIELD_RECVPARAMS	(FIELD_CODEC+3)
+#define	FIELD_SUBADDRESS	(FIELD_CODEC+4)
+#define	FIELD_RECVTIME		(FIELD_CODEC+5)
+#define	FIELD_FAXDCS		(FIELD_CODEC+6)
+
+#define	FIELD_OPTIONS		(FIELD_CODEC+7)
+
+static const TIFFFieldInfo faxFieldInfo[] = {
+    { TIFFTAG_FAXMODE,		 0, 0,	TIFF_ANY,	FIELD_PSEUDO,
+      FALSE,	FALSE,	"FaxMode" },
+    { TIFFTAG_FAXFILLFUNC,	 0, 0,	TIFF_ANY,	FIELD_PSEUDO,
+      FALSE,	FALSE,	"FaxFillFunc" },
+    { TIFFTAG_BADFAXLINES,	 1, 1,	TIFF_LONG,	FIELD_BADFAXLINES,
+      TRUE,	FALSE,	"BadFaxLines" },
+    { TIFFTAG_BADFAXLINES,	 1, 1,	TIFF_SHORT,	FIELD_BADFAXLINES,
+      TRUE,	FALSE,	"BadFaxLines" },
+    { TIFFTAG_CLEANFAXDATA,	 1, 1,	TIFF_SHORT,	FIELD_CLEANFAXDATA,
+      TRUE,	FALSE,	"CleanFaxData" },
+    { TIFFTAG_CONSECUTIVEBADFAXLINES,1,1, TIFF_LONG,	FIELD_BADFAXRUN,
+      TRUE,	FALSE,	"ConsecutiveBadFaxLines" },
+    { TIFFTAG_CONSECUTIVEBADFAXLINES,1,1, TIFF_SHORT,	FIELD_BADFAXRUN,
+      TRUE,	FALSE,	"ConsecutiveBadFaxLines" },
+    { TIFFTAG_FAXRECVPARAMS,	 1, 1, TIFF_LONG,	FIELD_RECVPARAMS,
+      TRUE,	FALSE,	"FaxRecvParams" },
+    { TIFFTAG_FAXSUBADDRESS,	-1,-1, TIFF_ASCII,	FIELD_SUBADDRESS,
+      TRUE,	FALSE,	"FaxSubAddress" },
+    { TIFFTAG_FAXRECVTIME,	 1, 1, TIFF_LONG,	FIELD_RECVTIME,
+      TRUE,	FALSE,	"FaxRecvTime" },
+    { TIFFTAG_FAXDCS,		-1,-1, TIFF_ASCII,	FIELD_FAXDCS,
+      TRUE,	FALSE,	"FaxDcs" },
+};
+static const TIFFFieldInfo fax3FieldInfo[] = {
+    { TIFFTAG_GROUP3OPTIONS,	 1, 1,	TIFF_LONG,	FIELD_OPTIONS,
+      FALSE,	FALSE,	"Group3Options" },
+};
+static const TIFFFieldInfo fax4FieldInfo[] = {
+    { TIFFTAG_GROUP4OPTIONS,	 1, 1,	TIFF_LONG,	FIELD_OPTIONS,
+      FALSE,	FALSE,	"Group4Options" },
+};
+#define	N(a)	(sizeof (a) / sizeof (a[0]))
+
+static int
+Fax3VSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	Fax3BaseState* sp = Fax3State(tif);
+
+	assert(sp != 0);
+	assert(sp->vsetparent != 0);
+
+	switch (tag) {
+	case TIFFTAG_FAXMODE:
+		sp->mode = va_arg(ap, int);
+		return (1);			/* NB: pseudo tag */
+	case TIFFTAG_FAXFILLFUNC:
+		DecoderState(tif)->fill = va_arg(ap, TIFFFaxFillFunc);
+		return (1);			/* NB: pseudo tag */
+	case TIFFTAG_GROUP3OPTIONS:
+		/* XXX: avoid reading options if compression mismatches. */
+		if (tif->tif_dir.td_compression == COMPRESSION_CCITTFAX3)
+			sp->groupoptions = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_GROUP4OPTIONS:
+		/* XXX: avoid reading options if compression mismatches. */
+		if (tif->tif_dir.td_compression == COMPRESSION_CCITTFAX4)
+			sp->groupoptions = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_BADFAXLINES:
+		sp->badfaxlines = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_CLEANFAXDATA:
+		sp->cleanfaxdata = (uint16) va_arg(ap, int);
+		break;
+	case TIFFTAG_CONSECUTIVEBADFAXLINES:
+		sp->badfaxrun = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_FAXRECVPARAMS:
+		sp->recvparams = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_FAXSUBADDRESS:
+		_TIFFsetString(&sp->subaddress, va_arg(ap, char*));
+		break;
+	case TIFFTAG_FAXRECVTIME:
+		sp->recvtime = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_FAXDCS:
+		_TIFFsetString(&sp->faxdcs, va_arg(ap, char*));
+		break;
+	default:
+		return (*sp->vsetparent)(tif, tag, ap);
+	}
+	TIFFSetFieldBit(tif, _TIFFFieldWithTag(tif, tag)->field_bit);
+	tif->tif_flags |= TIFF_DIRTYDIRECT;
+	return (1);
+}
+
+static int
+Fax3VGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	Fax3BaseState* sp = Fax3State(tif);
+
+	switch (tag) {
+	case TIFFTAG_FAXMODE:
+		*va_arg(ap, int*) = sp->mode;
+		break;
+	case TIFFTAG_FAXFILLFUNC:
+		*va_arg(ap, TIFFFaxFillFunc*) = DecoderState(tif)->fill;
+		break;
+	case TIFFTAG_GROUP3OPTIONS:
+	case TIFFTAG_GROUP4OPTIONS:
+		*va_arg(ap, uint32*) = sp->groupoptions;
+		break;
+	case TIFFTAG_BADFAXLINES:
+		*va_arg(ap, uint32*) = sp->badfaxlines;
+		break;
+	case TIFFTAG_CLEANFAXDATA:
+		*va_arg(ap, uint16*) = sp->cleanfaxdata;
+		break;
+	case TIFFTAG_CONSECUTIVEBADFAXLINES:
+		*va_arg(ap, uint32*) = sp->badfaxrun;
+		break;
+	case TIFFTAG_FAXRECVPARAMS:
+		*va_arg(ap, uint32*) = sp->recvparams;
+		break;
+	case TIFFTAG_FAXSUBADDRESS:
+		*va_arg(ap, char**) = sp->subaddress;
+		break;
+	case TIFFTAG_FAXRECVTIME:
+		*va_arg(ap, uint32*) = sp->recvtime;
+		break;
+	case TIFFTAG_FAXDCS:
+		*va_arg(ap, char**) = sp->faxdcs;
+		break;
+	default:
+		return (*sp->vgetparent)(tif, tag, ap);
+	}
+	return (1);
+}
+
+static void
+Fax3PrintDir(TIFF* tif, FILE* fd, long flags)
+{
+	Fax3BaseState* sp = Fax3State(tif);
+
+	(void) flags;
+	if (TIFFFieldSet(tif,FIELD_OPTIONS)) {
+		const char* sep = " ";
+		if (tif->tif_dir.td_compression == COMPRESSION_CCITTFAX4) {
+			fprintf(fd, "  Group 4 Options:");
+			if (sp->groupoptions & GROUP4OPT_UNCOMPRESSED)
+				fprintf(fd, "%suncompressed data", sep);
+		} else {
+
+			fprintf(fd, "  Group 3 Options:");
+			if (sp->groupoptions & GROUP3OPT_2DENCODING)
+				fprintf(fd, "%s2-d encoding", sep), sep = "+";
+			if (sp->groupoptions & GROUP3OPT_FILLBITS)
+				fprintf(fd, "%sEOL padding", sep), sep = "+";
+			if (sp->groupoptions & GROUP3OPT_UNCOMPRESSED)
+				fprintf(fd, "%suncompressed data", sep);
+		}
+		fprintf(fd, " (%lu = 0x%lx)\n",
+                        (unsigned long) sp->groupoptions,
+                        (unsigned long) sp->groupoptions);
+	}
+	if (TIFFFieldSet(tif,FIELD_CLEANFAXDATA)) {
+		fprintf(fd, "  Fax Data:");
+		switch (sp->cleanfaxdata) {
+		case CLEANFAXDATA_CLEAN:
+			fprintf(fd, " clean");
+			break;
+		case CLEANFAXDATA_REGENERATED:
+			fprintf(fd, " receiver regenerated");
+			break;
+		case CLEANFAXDATA_UNCLEAN:
+			fprintf(fd, " uncorrected errors");
+			break;
+		}
+		fprintf(fd, " (%u = 0x%x)\n",
+		    sp->cleanfaxdata, sp->cleanfaxdata);
+	}
+	if (TIFFFieldSet(tif,FIELD_BADFAXLINES))
+		fprintf(fd, "  Bad Fax Lines: %lu\n",
+                        (unsigned long) sp->badfaxlines);
+	if (TIFFFieldSet(tif,FIELD_BADFAXRUN))
+		fprintf(fd, "  Consecutive Bad Fax Lines: %lu\n",
+		    (unsigned long) sp->badfaxrun);
+	if (TIFFFieldSet(tif,FIELD_RECVPARAMS))
+		fprintf(fd, "  Fax Receive Parameters: %08lx\n",
+		   (unsigned long) sp->recvparams);
+	if (TIFFFieldSet(tif,FIELD_SUBADDRESS))
+		fprintf(fd, "  Fax SubAddress: %s\n", sp->subaddress);
+	if (TIFFFieldSet(tif,FIELD_RECVTIME))
+		fprintf(fd, "  Fax Receive Time: %lu secs\n",
+		    (unsigned long) sp->recvtime);
+	if (TIFFFieldSet(tif,FIELD_FAXDCS))
+		fprintf(fd, "  Fax DCS: %s\n", sp->faxdcs);
+}
+
+static int
+InitCCITTFax3(TIFF* tif)
+{
+	Fax3BaseState* sp;
+
+	/*
+	 * Allocate state block so tag methods have storage to record values.
+	 */
+	tif->tif_data = (tidata_t)
+		_TIFFmalloc(sizeof (Fax3CodecState));
+
+	if (tif->tif_data == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, "TIFFInitCCITTFax3",
+		    "%s: No space for state block", tif->tif_name);
+		return (0);
+	}
+
+	sp = Fax3State(tif);
+        sp->rw_mode = tif->tif_mode;
+
+	/*
+	 * Merge codec-specific tag information and
+	 * override parent get/set field methods.
+	 */
+	_TIFFMergeFieldInfo(tif, faxFieldInfo, N(faxFieldInfo));
+	sp->vgetparent = tif->tif_tagmethods.vgetfield;
+	tif->tif_tagmethods.vgetfield = Fax3VGetField; /* hook for codec tags */
+	sp->vsetparent = tif->tif_tagmethods.vsetfield;
+	tif->tif_tagmethods.vsetfield = Fax3VSetField; /* hook for codec tags */
+	tif->tif_tagmethods.printdir = Fax3PrintDir;   /* hook for codec tags */
+	sp->groupoptions = 0;	
+	sp->recvparams = 0;
+	sp->subaddress = NULL;
+	sp->faxdcs = NULL;
+
+	if (sp->rw_mode == O_RDONLY) /* FIXME: improve for in place update */
+		tif->tif_flags |= TIFF_NOBITREV; /* decoder does bit reversal */
+	DecoderState(tif)->runs = NULL;
+	TIFFSetField(tif, TIFFTAG_FAXFILLFUNC, _TIFFFax3fillruns);
+	EncoderState(tif)->refline = NULL;
+
+	/*
+	 * Install codec methods.
+	 */
+	tif->tif_setupdecode = Fax3SetupState;
+	tif->tif_predecode = Fax3PreDecode;
+	tif->tif_decoderow = Fax3Decode1D;
+	tif->tif_decodestrip = Fax3Decode1D;
+	tif->tif_decodetile = Fax3Decode1D;
+	tif->tif_setupencode = Fax3SetupState;
+	tif->tif_preencode = Fax3PreEncode;
+	tif->tif_postencode = Fax3PostEncode;
+	tif->tif_encoderow = Fax3Encode;
+	tif->tif_encodestrip = Fax3Encode;
+	tif->tif_encodetile = Fax3Encode;
+	tif->tif_close = Fax3Close;
+	tif->tif_cleanup = Fax3Cleanup;
+
+	return (1);
+}
+
+int
+TIFFInitCCITTFax3(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	if (InitCCITTFax3(tif)) {
+		_TIFFMergeFieldInfo(tif, fax3FieldInfo, N(fax3FieldInfo));
+
+		/*
+		 * The default format is Class/F-style w/o RTC.
+		 */
+		return TIFFSetField(tif, TIFFTAG_FAXMODE, FAXMODE_CLASSF);
+	} else
+		return (0);
+}
+
+/*
+ * CCITT Group 4 (T.6) Facsimile-compatible
+ * Compression Scheme Support.
+ */
+
+#define	SWAP(t,a,b)	{ t x; x = (a); (a) = (b); (b) = x; }
+/*
+ * Decode the requested amount of G4-encoded data.
+ */
+static int
+Fax4Decode(TIFF* tif, tidata_t buf, tsize_t occ, tsample_t s)
+{
+	DECLARE_STATE_2D(tif, sp, "Fax4Decode");
+        int line = 0;
+
+	(void) s;
+	CACHE_STATE(tif, sp);
+	while ((long)occ > 0) {
+		a0 = 0;
+		RunLength = 0;
+		pa = thisrun = sp->curruns;
+		pb = sp->refruns;
+		b1 = *pb++;
+#ifdef FAX3_DEBUG
+		printf("\nBitAcc=%08X, BitsAvail = %d\n", BitAcc, BitsAvail);
+		printf("-------------------- %d\n", tif->tif_row);
+		fflush(stdout);
+#endif
+		EXPAND2D(EOFG4);
+                if (EOLcnt)
+                    goto EOFG4;
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		SETVALUE(0);		/* imaginary change for reference */
+		SWAP(uint32*, sp->curruns, sp->refruns);
+		buf += sp->b.rowbytes;
+		occ -= sp->b.rowbytes;
+                line++;
+		continue;
+	EOFG4:
+                NeedBits16( 13, BADG4 );
+        BADG4:
+#ifdef FAX3_DEBUG
+                if( GetBits(13) != 0x1001 )
+                    fputs( "Bad RTC\n", stderr );
+#endif                
+                ClrBits( 13 );
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		UNCACHE_STATE(tif, sp);
+		return (-1);
+	}
+	UNCACHE_STATE(tif, sp);
+	return (1);
+}
+#undef	SWAP
+
+/*
+ * Encode the requested amount of data.
+ */
+static int
+Fax4Encode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	Fax3CodecState *sp = EncoderState(tif);
+
+	(void) s;
+	while ((long)cc > 0) {
+		if (!Fax3Encode2DRow(tif, bp, sp->refline, sp->b.rowpixels))
+			return (0);
+		_TIFFmemcpy(sp->refline, bp, sp->b.rowbytes);
+		bp += sp->b.rowbytes;
+		cc -= sp->b.rowbytes;
+	}
+	return (1);
+}
+
+static int
+Fax4PostEncode(TIFF* tif)
+{
+	Fax3CodecState *sp = EncoderState(tif);
+
+	/* terminate strip w/ EOFB */
+	Fax3PutBits(tif, EOL, 12);
+	Fax3PutBits(tif, EOL, 12);
+	if (sp->bit != 8)
+		Fax3FlushBits(tif, sp);
+	return (1);
+}
+
+int
+TIFFInitCCITTFax4(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	if (InitCCITTFax3(tif)) {		/* reuse G3 support */
+		_TIFFMergeFieldInfo(tif, fax4FieldInfo, N(fax4FieldInfo));
+
+		tif->tif_decoderow = Fax4Decode;
+		tif->tif_decodestrip = Fax4Decode;
+		tif->tif_decodetile = Fax4Decode;
+		tif->tif_encoderow = Fax4Encode;
+		tif->tif_encodestrip = Fax4Encode;
+		tif->tif_encodetile = Fax4Encode;
+		tif->tif_postencode = Fax4PostEncode;
+		/*
+		 * Suppress RTC at the end of each strip.
+		 */
+		return TIFFSetField(tif, TIFFTAG_FAXMODE, FAXMODE_NORTC);
+	} else
+		return (0);
+}
+
+/*
+ * CCITT Group 3 1-D Modified Huffman RLE Compression Support.
+ * (Compression algorithms 2 and 32771)
+ */
+
+/*
+ * Decode the requested amount of RLE-encoded data.
+ */
+static int
+Fax3DecodeRLE(TIFF* tif, tidata_t buf, tsize_t occ, tsample_t s)
+{
+	DECLARE_STATE(tif, sp, "Fax3DecodeRLE");
+	int mode = sp->b.mode;
+        int line = 0;
+
+	(void) s;
+	CACHE_STATE(tif, sp);
+	thisrun = sp->curruns;
+	while ((long)occ > 0) {
+		a0 = 0;
+		RunLength = 0;
+		pa = thisrun;
+#ifdef FAX3_DEBUG
+		printf("\nBitAcc=%08X, BitsAvail = %d\n", BitAcc, BitsAvail);
+		printf("-------------------- %d\n", tif->tif_row);
+		fflush(stdout);
+#endif
+		EXPAND1D(EOFRLE);
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		/*
+		 * Cleanup at the end of the row.
+		 */
+		if (mode & FAXMODE_BYTEALIGN) {
+			int n = BitsAvail - (BitsAvail &~ 7);
+			ClrBits(n);
+		} else if (mode & FAXMODE_WORDALIGN) {
+			int n = BitsAvail - (BitsAvail &~ 15);
+			ClrBits(n);
+			if (BitsAvail == 0 && !isAligned(cp, uint16))
+			    cp++;
+		}
+		buf += sp->b.rowbytes;
+		occ -= sp->b.rowbytes;
+                line++;
+		continue;
+	EOFRLE:				/* premature EOF */
+		(*sp->fill)(buf, thisrun, pa, lastx);
+		UNCACHE_STATE(tif, sp);
+		return (-1);
+	}
+	UNCACHE_STATE(tif, sp);
+	return (1);
+}
+
+int
+TIFFInitCCITTRLE(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	if (InitCCITTFax3(tif)) {		/* reuse G3 support */
+		tif->tif_decoderow = Fax3DecodeRLE;
+		tif->tif_decodestrip = Fax3DecodeRLE;
+		tif->tif_decodetile = Fax3DecodeRLE;
+		/*
+		 * Suppress RTC+EOLs when encoding and byte-align data.
+		 */
+		return TIFFSetField(tif, TIFFTAG_FAXMODE,
+		    FAXMODE_NORTC|FAXMODE_NOEOL|FAXMODE_BYTEALIGN);
+	} else
+		return (0);
+}
+
+int
+TIFFInitCCITTRLEW(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	if (InitCCITTFax3(tif)) {		/* reuse G3 support */
+		tif->tif_decoderow = Fax3DecodeRLE;
+		tif->tif_decodestrip = Fax3DecodeRLE;
+		tif->tif_decodetile = Fax3DecodeRLE;
+		/*
+		 * Suppress RTC+EOLs when encoding and word-align data.
+		 */
+		return TIFFSetField(tif, TIFFTAG_FAXMODE,
+		    FAXMODE_NORTC|FAXMODE_NOEOL|FAXMODE_WORDALIGN);
+	} else
+		return (0);
+}
+#endif /* CCITT_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_fax3.h b/src/libtiff/tif_fax3.h
new file mode 100644
index 0000000..99ced5f
--- /dev/null
+++ b/src/libtiff/tif_fax3.h
@@ -0,0 +1,525 @@
+/* $Id: tif_fax3.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1990-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _FAX3_
+#define	_FAX3_
+/*
+ * TIFF Library.
+ *
+ * CCITT Group 3 (T.4) and Group 4 (T.6) Decompression Support.
+ *
+ * Decoder support is derived, with permission, from the code
+ * in Frank Cringle's viewfax program;
+ *      Copyright (C) 1990, 1995  Frank D. Cringle.
+ */
+#include "tiff.h"
+
+/*
+ * To override the default routine used to image decoded
+ * spans one can use the pseduo tag TIFFTAG_FAXFILLFUNC.
+ * The routine must have the type signature given below;
+ * for example:
+ *
+ * fillruns(unsigned char* buf, uint32* runs, uint32* erun, uint32 lastx)
+ *
+ * where buf is place to set the bits, runs is the array of b&w run
+ * lengths (white then black), erun is the last run in the array, and
+ * lastx is the width of the row in pixels.  Fill routines can assume
+ * the run array has room for at least lastx runs and can overwrite
+ * data in the run array as needed (e.g. to append zero runs to bring
+ * the count up to a nice multiple).
+ */
+typedef	void (*TIFFFaxFillFunc)(unsigned char*, uint32*, uint32*, uint32);
+
+/*
+ * The default run filler; made external for other decoders.
+ */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+extern	void _TIFFFax3fillruns(unsigned char*, uint32*, uint32*, uint32);
+#if defined(__cplusplus)
+}
+#endif
+
+
+/* finite state machine codes */
+#define S_Null		0
+#define S_Pass		1
+#define S_Horiz		2
+#define S_V0		3
+#define S_VR		4
+#define S_VL		5
+#define S_Ext		6
+#define S_TermW		7
+#define S_TermB		8
+#define S_MakeUpW	9
+#define S_MakeUpB	10
+#define S_MakeUp	11
+#define S_EOL		12
+
+typedef struct {		/* state table entry */
+	unsigned char State;	/* see above */
+	unsigned char Width;	/* width of code in bits */
+	uint32	Param;		/* unsigned 32-bit run length in bits */
+} TIFFFaxTabEnt;
+
+extern	const TIFFFaxTabEnt TIFFFaxMainTable[];
+extern	const TIFFFaxTabEnt TIFFFaxWhiteTable[];
+extern	const TIFFFaxTabEnt TIFFFaxBlackTable[];
+
+/*
+ * The following macros define the majority of the G3/G4 decoder
+ * algorithm using the state tables defined elsewhere.  To build
+ * a decoder you need some setup code and some glue code. Note
+ * that you may also need/want to change the way the NeedBits*
+ * macros get input data if, for example, you know the data to be
+ * decoded is properly aligned and oriented (doing so before running
+ * the decoder can be a big performance win).
+ *
+ * Consult the decoder in the TIFF library for an idea of what you
+ * need to define and setup to make use of these definitions.
+ *
+ * NB: to enable a debugging version of these macros define FAX3_DEBUG
+ *     before including this file.  Trace output goes to stdout.
+ */
+
+#ifndef EndOfData
+#define EndOfData()	(cp >= ep)
+#endif
+/*
+ * Need <=8 or <=16 bits of input data.  Unlike viewfax we
+ * cannot use/assume a word-aligned, properly bit swizzled
+ * input data set because data may come from an arbitrarily
+ * aligned, read-only source such as a memory-mapped file.
+ * Note also that the viewfax decoder does not check for
+ * running off the end of the input data buffer.  This is
+ * possible for G3-encoded data because it prescans the input
+ * data to count EOL markers, but can cause problems for G4
+ * data.  In any event, we don't prescan and must watch for
+ * running out of data since we can't permit the library to
+ * scan past the end of the input data buffer.
+ *
+ * Finally, note that we must handle remaindered data at the end
+ * of a strip specially.  The coder asks for a fixed number of
+ * bits when scanning for the next code.  This may be more bits
+ * than are actually present in the data stream.  If we appear
+ * to run out of data but still have some number of valid bits
+ * remaining then we makeup the requested amount with zeros and
+ * return successfully.  If the returned data is incorrect then
+ * we should be called again and get a premature EOF error;
+ * otherwise we should get the right answer.
+ */
+#ifndef NeedBits8
+#define NeedBits8(n,eoflab) do {					\
+    if (BitsAvail < (n)) {						\
+	if (EndOfData()) {						\
+	    if (BitsAvail == 0)			/* no valid bits */	\
+		goto eoflab;						\
+	    BitsAvail = (n);			/* pad with zeros */	\
+	} else {							\
+	    BitAcc |= ((uint32) bitmap[*cp++])<<BitsAvail;		\
+	    BitsAvail += 8;						\
+	}								\
+    }									\
+} while (0)
+#endif
+#ifndef NeedBits16
+#define NeedBits16(n,eoflab) do {					\
+    if (BitsAvail < (n)) {						\
+	if (EndOfData()) {						\
+	    if (BitsAvail == 0)			/* no valid bits */	\
+		goto eoflab;						\
+	    BitsAvail = (n);			/* pad with zeros */	\
+	} else {							\
+	    BitAcc |= ((uint32) bitmap[*cp++])<<BitsAvail;		\
+	    if ((BitsAvail += 8) < (n)) {				\
+		if (EndOfData()) {					\
+		    /* NB: we know BitsAvail is non-zero here */	\
+		    BitsAvail = (n);		/* pad with zeros */	\
+		} else {						\
+		    BitAcc |= ((uint32) bitmap[*cp++])<<BitsAvail;	\
+		    BitsAvail += 8;					\
+		}							\
+	    }								\
+	}								\
+    }									\
+} while (0)
+#endif
+#define GetBits(n)	(BitAcc & ((1<<(n))-1))
+#define ClrBits(n) do {							\
+    BitsAvail -= (n);							\
+    BitAcc >>= (n);							\
+} while (0)
+
+#ifdef FAX3_DEBUG
+static const char* StateNames[] = {
+    "Null   ",
+    "Pass   ",
+    "Horiz  ",
+    "V0     ",
+    "VR     ",
+    "VL     ",
+    "Ext    ",
+    "TermW  ",
+    "TermB  ",
+    "MakeUpW",
+    "MakeUpB",
+    "MakeUp ",
+    "EOL    ",
+};
+#define DEBUG_SHOW putchar(BitAcc & (1 << t) ? '1' : '0')
+#define LOOKUP8(wid,tab,eoflab) do {					\
+    int t;								\
+    NeedBits8(wid,eoflab);						\
+    TabEnt = tab + GetBits(wid);					\
+    printf("%08lX/%d: %s%5d\t", (long) BitAcc, BitsAvail,		\
+	   StateNames[TabEnt->State], TabEnt->Param);			\
+    for (t = 0; t < TabEnt->Width; t++)					\
+	DEBUG_SHOW;							\
+    putchar('\n');							\
+    fflush(stdout);							\
+    ClrBits(TabEnt->Width);						\
+} while (0)
+#define LOOKUP16(wid,tab,eoflab) do {					\
+    int t;								\
+    NeedBits16(wid,eoflab);						\
+    TabEnt = tab + GetBits(wid);					\
+    printf("%08lX/%d: %s%5d\t", (long) BitAcc, BitsAvail,		\
+	   StateNames[TabEnt->State], TabEnt->Param);			\
+    for (t = 0; t < TabEnt->Width; t++)					\
+	DEBUG_SHOW;							\
+    putchar('\n');							\
+    fflush(stdout);							\
+    ClrBits(TabEnt->Width);						\
+} while (0)
+
+#define SETVALUE(x) do {							\
+    *pa++ = RunLength + (x);						\
+    printf("SETVALUE: %d\t%d\n", RunLength + (x), a0);			\
+    a0 += x;								\
+    RunLength = 0;							\
+} while (0)
+#else
+#define LOOKUP8(wid,tab,eoflab) do {					\
+    NeedBits8(wid,eoflab);						\
+    TabEnt = tab + GetBits(wid);					\
+    ClrBits(TabEnt->Width);						\
+} while (0)
+#define LOOKUP16(wid,tab,eoflab) do {					\
+    NeedBits16(wid,eoflab);						\
+    TabEnt = tab + GetBits(wid);					\
+    ClrBits(TabEnt->Width);						\
+} while (0)
+
+/*
+ * Append a run to the run length array for the
+ * current row and reset decoding state.
+ */
+#define SETVALUE(x) do {							\
+    *pa++ = RunLength + (x);						\
+    a0 += (x);								\
+    RunLength = 0;							\
+} while (0)
+#endif
+
+/*
+ * Synchronize input decoding at the start of each
+ * row by scanning for an EOL (if appropriate) and
+ * skipping any trash data that might be present
+ * after a decoding error.  Note that the decoding
+ * done elsewhere that recognizes an EOL only consumes
+ * 11 consecutive zero bits.  This means that if EOLcnt
+ * is non-zero then we still need to scan for the final flag
+ * bit that is part of the EOL code.
+ */
+#define	SYNC_EOL(eoflab) do {						\
+    if (EOLcnt == 0) {							\
+	for (;;) {							\
+	    NeedBits16(11,eoflab);					\
+	    if (GetBits(11) == 0)					\
+		break;							\
+	    ClrBits(1);							\
+	}								\
+    }									\
+    for (;;) {								\
+	NeedBits8(8,eoflab);						\
+	if (GetBits(8))							\
+	    break;							\
+	ClrBits(8);							\
+    }									\
+    while (GetBits(1) == 0)						\
+	ClrBits(1);							\
+    ClrBits(1);				/* EOL bit */			\
+    EOLcnt = 0;				/* reset EOL counter/flag */	\
+} while (0)
+
+/*
+ * Cleanup the array of runs after decoding a row.
+ * We adjust final runs to insure the user buffer is not
+ * overwritten and/or undecoded area is white filled.
+ */
+#define	CLEANUP_RUNS() do {						\
+    if (RunLength)							\
+	SETVALUE(0);							\
+    if (a0 != lastx) {							\
+	badlength(a0, lastx);						\
+	while (a0 > lastx && pa > thisrun)				\
+	    a0 -= *--pa;						\
+	if (a0 < lastx) {						\
+	    if (a0 < 0)							\
+		a0 = 0;							\
+	    if ((pa-thisrun)&1)						\
+		SETVALUE(0);						\
+	    SETVALUE(lastx - a0);						\
+	} else if (a0 > lastx) {					\
+	    SETVALUE(lastx);						\
+	    SETVALUE(0);							\
+	}								\
+    }									\
+} while (0)
+
+/*
+ * Decode a line of 1D-encoded data.
+ *
+ * The line expanders are written as macros so that they can be reused
+ * but still have direct access to the local variables of the "calling"
+ * function.
+ *
+ * Note that unlike the original version we have to explicitly test for
+ * a0 >= lastx after each black/white run is decoded.  This is because
+ * the original code depended on the input data being zero-padded to
+ * insure the decoder recognized an EOL before running out of data.
+ */
+#define EXPAND1D(eoflab) do {						\
+    for (;;) {								\
+	for (;;) {							\
+	    LOOKUP16(12, TIFFFaxWhiteTable, eof1d);			\
+	    switch (TabEnt->State) {					\
+	    case S_EOL:							\
+		EOLcnt = 1;						\
+		goto done1d;						\
+	    case S_TermW:						\
+		SETVALUE(TabEnt->Param);					\
+		goto doneWhite1d;					\
+	    case S_MakeUpW:						\
+	    case S_MakeUp:						\
+		a0 += TabEnt->Param;					\
+		RunLength += TabEnt->Param;				\
+		break;							\
+	    default:							\
+		unexpected("WhiteTable", a0);				\
+		goto done1d;						\
+	    }								\
+	}								\
+    doneWhite1d:							\
+	if (a0 >= lastx)						\
+	    goto done1d;						\
+	for (;;) {							\
+	    LOOKUP16(13, TIFFFaxBlackTable, eof1d);			\
+	    switch (TabEnt->State) {					\
+	    case S_EOL:							\
+		EOLcnt = 1;						\
+		goto done1d;						\
+	    case S_TermB:						\
+		SETVALUE(TabEnt->Param);					\
+		goto doneBlack1d;					\
+	    case S_MakeUpB:						\
+	    case S_MakeUp:						\
+		a0 += TabEnt->Param;					\
+		RunLength += TabEnt->Param;				\
+		break;							\
+	    default:							\
+		unexpected("BlackTable", a0);				\
+		goto done1d;						\
+	    }								\
+	}								\
+    doneBlack1d:							\
+	if (a0 >= lastx)						\
+	    goto done1d;						\
+        if( *(pa-1) == 0 && *(pa-2) == 0 )				\
+            pa -= 2;                                                    \
+    }									\
+eof1d:									\
+    prematureEOF(a0);							\
+    CLEANUP_RUNS();							\
+    goto eoflab;							\
+done1d:									\
+    CLEANUP_RUNS();							\
+} while (0)
+
+/*
+ * Update the value of b1 using the array
+ * of runs for the reference line.
+ */
+#define CHECK_b1 do {							\
+    if (pa != thisrun) while (b1 <= a0 && b1 < lastx) {			\
+	b1 += pb[0] + pb[1];						\
+	pb += 2;							\
+    }									\
+} while (0)
+
+/*
+ * Expand a row of 2D-encoded data.
+ */
+#define EXPAND2D(eoflab) do {						\
+    while (a0 < lastx) {						\
+	LOOKUP8(7, TIFFFaxMainTable, eof2d);				\
+	switch (TabEnt->State) {					\
+	case S_Pass:							\
+	    CHECK_b1;							\
+	    b1 += *pb++;						\
+	    RunLength += b1 - a0;					\
+	    a0 = b1;							\
+	    b1 += *pb++;						\
+	    break;							\
+	case S_Horiz:							\
+	    if ((pa-thisrun)&1) {					\
+		for (;;) {	/* black first */			\
+		    LOOKUP16(13, TIFFFaxBlackTable, eof2d);		\
+		    switch (TabEnt->State) {				\
+		    case S_TermB:					\
+			SETVALUE(TabEnt->Param);				\
+			goto doneWhite2da;				\
+		    case S_MakeUpB:					\
+		    case S_MakeUp:					\
+			a0 += TabEnt->Param;				\
+			RunLength += TabEnt->Param;			\
+			break;						\
+		    default:						\
+			goto badBlack2d;				\
+		    }							\
+		}							\
+	    doneWhite2da:;						\
+		for (;;) {	/* then white */			\
+		    LOOKUP16(12, TIFFFaxWhiteTable, eof2d);		\
+		    switch (TabEnt->State) {				\
+		    case S_TermW:					\
+			SETVALUE(TabEnt->Param);				\
+			goto doneBlack2da;				\
+		    case S_MakeUpW:					\
+		    case S_MakeUp:					\
+			a0 += TabEnt->Param;				\
+			RunLength += TabEnt->Param;			\
+			break;						\
+		    default:						\
+			goto badWhite2d;				\
+		    }							\
+		}							\
+	    doneBlack2da:;						\
+	    } else {							\
+		for (;;) {	/* white first */			\
+		    LOOKUP16(12, TIFFFaxWhiteTable, eof2d);		\
+		    switch (TabEnt->State) {				\
+		    case S_TermW:					\
+			SETVALUE(TabEnt->Param);				\
+			goto doneWhite2db;				\
+		    case S_MakeUpW:					\
+		    case S_MakeUp:					\
+			a0 += TabEnt->Param;				\
+			RunLength += TabEnt->Param;			\
+			break;						\
+		    default:						\
+			goto badWhite2d;				\
+		    }							\
+		}							\
+	    doneWhite2db:;						\
+		for (;;) {	/* then black */			\
+		    LOOKUP16(13, TIFFFaxBlackTable, eof2d);		\
+		    switch (TabEnt->State) {				\
+		    case S_TermB:					\
+			SETVALUE(TabEnt->Param);				\
+			goto doneBlack2db;				\
+		    case S_MakeUpB:					\
+		    case S_MakeUp:					\
+			a0 += TabEnt->Param;				\
+			RunLength += TabEnt->Param;			\
+			break;						\
+		    default:						\
+			goto badBlack2d;				\
+		    }							\
+		}							\
+	    doneBlack2db:;						\
+	    }								\
+	    CHECK_b1;							\
+	    break;							\
+	case S_V0:							\
+	    CHECK_b1;							\
+	    SETVALUE(b1 - a0);						\
+	    b1 += *pb++;						\
+	    break;							\
+	case S_VR:							\
+	    CHECK_b1;							\
+	    SETVALUE(b1 - a0 + TabEnt->Param);				\
+	    b1 += *pb++;						\
+	    break;							\
+	case S_VL:							\
+	    CHECK_b1;							\
+	    SETVALUE(b1 - a0 - TabEnt->Param);				\
+	    b1 -= *--pb;						\
+	    break;							\
+	case S_Ext:							\
+	    *pa++ = lastx - a0;						\
+	    extension(a0);						\
+	    goto eol2d;							\
+	case S_EOL:							\
+	    *pa++ = lastx - a0;						\
+	    NeedBits8(4,eof2d);						\
+	    if (GetBits(4))						\
+		unexpected("EOL", a0);					\
+            ClrBits(4);                                                 \
+	    EOLcnt = 1;							\
+	    goto eol2d;							\
+	default:							\
+	badMain2d:							\
+	    unexpected("MainTable", a0);				\
+	    goto eol2d;							\
+	badBlack2d:							\
+	    unexpected("BlackTable", a0);				\
+	    goto eol2d;							\
+	badWhite2d:							\
+	    unexpected("WhiteTable", a0);				\
+	    goto eol2d;							\
+	eof2d:								\
+	    prematureEOF(a0);						\
+	    CLEANUP_RUNS();						\
+	    goto eoflab;						\
+	}								\
+    }									\
+    if (RunLength) {							\
+	if (RunLength + a0 < lastx) {					\
+	    /* expect a final V0 */					\
+	    NeedBits8(1,eof2d);						\
+	    if (!GetBits(1))						\
+		goto badMain2d;						\
+	    ClrBits(1);							\
+	}								\
+	SETVALUE(0);							\
+    }									\
+eol2d:									\
+    CLEANUP_RUNS();							\
+} while (0)
+#endif /* _FAX3_ */
diff --git a/src/libtiff/tif_fax3sm.c b/src/libtiff/tif_fax3sm.c
new file mode 100644
index 0000000..08ce1ad
--- /dev/null
+++ b/src/libtiff/tif_fax3sm.c
@@ -0,0 +1,1253 @@
+/* WARNING, this file was automatically generated by the
+    mkg3states program */
+#include "tiff.h"
+#include "tif_fax3.h"
+ const TIFFFaxTabEnt TIFFFaxMainTable[128] = {
+{12,7,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},{1,4,0},{3,1,0},
+{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},{5,6,2},{3,1,0},{5,3,1},{3,1,0},
+{2,3,0},{3,1,0},{4,3,1},{3,1,0},{1,4,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},
+{4,3,1},{3,1,0},{5,7,3},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},
+{1,4,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},{4,6,2},{3,1,0},
+{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},{1,4,0},{3,1,0},{5,3,1},{3,1,0},
+{2,3,0},{3,1,0},{4,3,1},{3,1,0},{6,7,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},
+{4,3,1},{3,1,0},{1,4,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},
+{5,6,2},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},{1,4,0},{3,1,0},
+{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},{4,7,3},{3,1,0},{5,3,1},{3,1,0},
+{2,3,0},{3,1,0},{4,3,1},{3,1,0},{1,4,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},
+{4,3,1},{3,1,0},{4,6,2},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0},
+{1,4,0},{3,1,0},{5,3,1},{3,1,0},{2,3,0},{3,1,0},{4,3,1},{3,1,0}
+};
+ const TIFFFaxTabEnt TIFFFaxWhiteTable[4096] = {
+{12,11,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},
+{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1472},{7,4,5},{7,8,43},{7,6,17},{9,9,1216},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},
+{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,960},{7,4,6},{7,8,31},{7,5,8},
+{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,9,704},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,11,1792},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},
+{9,9,832},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},
+{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1600},{7,4,5},
+{7,8,44},{7,6,17},{9,9,1344},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},
+{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1088},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},
+{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},
+{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},
+{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},
+{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1536},{7,4,5},{7,8,43},{7,6,17},
+{9,9,1280},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},
+{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},
+{7,8,41},{7,6,16},{9,9,1024},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,768},{7,4,6},
+{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,11,1856},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},
+{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,896},{7,4,6},{7,7,19},{7,5,8},
+{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},
+{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1728},{7,4,5},{7,8,44},{7,6,17},{9,9,1408},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},
+{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},
+{9,9,1152},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},
+{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},
+{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},
+{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},
+{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{9,9,1472},{7,4,5},{7,8,43},{7,6,17},{9,9,1216},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},
+{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,960},{7,4,6},
+{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},
+{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,704},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},
+{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},
+{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{11,12,2112},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,40},{7,6,16},{9,9,832},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},
+{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1600},{7,4,5},{7,8,44},{7,6,17},{9,9,1344},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},
+{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1088},{7,4,6},{7,8,32},{7,5,8},
+{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},
+{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},
+{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1536},{7,4,5},
+{7,8,43},{7,6,17},{9,9,1280},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},
+{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,1024},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,9,768},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,12,2368},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},
+{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,896},{7,4,6},
+{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},
+{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},
+{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1728},{7,4,5},{7,8,44},{7,6,17},
+{9,9,1408},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},
+{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},
+{7,8,42},{7,6,16},{9,9,1152},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},
+{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},
+{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},
+{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},
+{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1472},{7,4,5},{7,8,43},{7,6,17},{9,9,1216},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},
+{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},
+{9,9,960},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,704},{7,4,6},{7,8,37},{9,5,128},
+{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},
+{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{11,12,1984},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},
+{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,832},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},
+{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{9,9,1600},{7,4,5},{7,8,44},{7,6,17},{9,9,1344},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},
+{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1088},{7,4,6},
+{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},
+{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},
+{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},
+{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},
+{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1536},{7,4,5},{7,8,43},{7,6,17},{9,9,1280},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},
+{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,1024},{7,4,6},{7,8,31},{7,5,8},
+{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,9,768},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,11,1920},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},
+{9,9,896},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},
+{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1728},{7,4,5},
+{7,8,44},{7,6,17},{9,9,1408},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},
+{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1152},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},
+{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},
+{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},
+{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},
+{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1472},{7,4,5},{7,8,43},{7,6,17},
+{9,9,1216},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},
+{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},
+{7,8,41},{7,6,16},{9,9,960},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,704},{7,4,6},
+{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,12,2240},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},
+{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,832},{7,4,6},{7,7,19},{7,5,8},
+{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},
+{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1600},{7,4,5},{7,8,44},{7,6,17},{9,9,1344},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},
+{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},
+{9,9,1088},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},
+{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},
+{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},
+{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},
+{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{9,9,1536},{7,4,5},{7,8,43},{7,6,17},{9,9,1280},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},
+{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,1024},{7,4,6},
+{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},
+{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,768},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},
+{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},
+{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{11,12,2496},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,40},{7,6,16},{9,9,896},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},
+{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1728},{7,4,5},{7,8,44},{7,6,17},{9,9,1408},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},
+{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1152},{7,4,6},{7,8,32},{7,5,8},
+{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{12,11,0},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},
+{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},
+{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1472},{7,4,5},
+{7,8,43},{7,6,17},{9,9,1216},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},
+{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,960},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,9,704},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,11,1792},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},
+{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,832},{7,4,6},
+{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},
+{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},
+{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1600},{7,4,5},{7,8,44},{7,6,17},
+{9,9,1344},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},
+{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},
+{7,8,42},{7,6,16},{9,9,1088},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},
+{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},
+{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},
+{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},
+{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1536},{7,4,5},{7,8,43},{7,6,17},{9,9,1280},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},
+{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},
+{9,9,1024},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,768},{7,4,6},{7,8,37},{9,5,128},
+{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},
+{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{11,11,1856},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},
+{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,896},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},
+{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{9,9,1728},{7,4,5},{7,8,44},{7,6,17},{9,9,1408},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},
+{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1152},{7,4,6},
+{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},
+{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},
+{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},
+{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},
+{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1472},{7,4,5},{7,8,43},{7,6,17},{9,9,1216},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},
+{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,960},{7,4,6},{7,8,31},{7,5,8},
+{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,9,704},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,12,2176},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},
+{9,9,832},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},
+{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1600},{7,4,5},
+{7,8,44},{7,6,17},{9,9,1344},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},
+{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1088},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},
+{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},
+{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},
+{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},
+{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1536},{7,4,5},{7,8,43},{7,6,17},
+{9,9,1280},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},
+{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},
+{7,8,41},{7,6,16},{9,9,1024},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,768},{7,4,6},
+{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,12,2432},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},
+{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,896},{7,4,6},{7,7,19},{7,5,8},
+{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},
+{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1728},{7,4,5},{7,8,44},{7,6,17},{9,9,1408},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},
+{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},
+{9,9,1152},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},
+{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},
+{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},
+{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},
+{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{9,9,1472},{7,4,5},{7,8,43},{7,6,17},{9,9,1216},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},
+{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,960},{7,4,6},
+{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},
+{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,704},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},
+{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},
+{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{11,12,2048},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,40},{7,6,16},{9,9,832},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},
+{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1600},{7,4,5},{7,8,44},{7,6,17},{9,9,1344},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},
+{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1088},{7,4,6},{7,8,32},{7,5,8},
+{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},
+{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},
+{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1536},{7,4,5},
+{7,8,43},{7,6,17},{9,9,1280},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},
+{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,1024},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,9,768},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,11,1920},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},
+{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,896},{7,4,6},
+{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},
+{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},
+{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1728},{7,4,5},{7,8,44},{7,6,17},
+{9,9,1408},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},
+{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},
+{7,8,42},{7,6,16},{9,9,1152},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},
+{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},
+{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},
+{7,8,55},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},
+{7,8,53},{7,5,9},{9,8,448},{7,4,6},{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1472},{7,4,5},{7,8,43},{7,6,17},{9,9,1216},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},
+{7,8,61},{7,4,4},{7,4,2},{7,4,7},{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},
+{9,9,960},{7,4,6},{7,8,31},{7,5,8},{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,9,704},{7,4,6},{7,8,37},{9,5,128},
+{7,7,25},{7,6,15},{9,8,320},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},
+{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{11,12,2304},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,7,20},{9,5,128},{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},
+{7,7,27},{7,4,5},{7,8,40},{7,6,16},{9,9,832},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},
+{9,8,512},{7,4,6},{7,8,36},{9,5,128},{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{9,9,1600},{7,4,5},{7,8,44},{7,6,17},{9,9,1344},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},
+{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},
+{7,4,2},{7,4,7},{7,8,48},{7,4,3},{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1088},{7,4,6},
+{7,8,32},{7,5,8},{7,8,58},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},
+{7,5,11},{7,4,5},{7,7,26},{7,5,9},{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},
+{9,8,384},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},
+{9,7,256},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{0,0,0},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},
+{7,7,24},{7,6,14},{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},
+{7,8,39},{7,6,16},{9,8,576},{7,4,6},{7,7,19},{7,5,8},{7,8,55},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,45},{7,4,3},{7,5,11},{7,4,5},{7,8,53},{7,5,9},{9,8,448},{7,4,6},
+{7,8,35},{9,5,128},{7,8,51},{7,6,15},{7,8,63},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},
+{9,9,1536},{7,4,5},{7,8,43},{7,6,17},{9,9,1280},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,8,29},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},
+{9,6,1664},{7,4,6},{7,8,33},{9,5,128},{7,8,49},{7,6,14},{7,8,61},{7,4,4},{7,4,2},{7,4,7},
+{7,8,47},{7,4,3},{7,8,59},{7,4,5},{7,8,41},{7,6,16},{9,9,1024},{7,4,6},{7,8,31},{7,5,8},
+{7,8,57},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},
+{7,7,26},{7,5,9},{9,9,768},{7,4,6},{7,8,37},{9,5,128},{7,7,25},{7,6,15},{9,8,320},{7,4,4},
+{7,4,2},{7,4,7},{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},
+{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},{11,12,2560},{7,4,3},
+{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},{7,7,20},{9,5,128},{7,7,24},{7,6,14},
+{7,7,28},{7,4,4},{7,4,2},{7,4,7},{7,7,23},{7,4,3},{7,7,27},{7,4,5},{7,8,40},{7,6,16},
+{9,9,896},{7,4,6},{7,7,19},{7,5,8},{7,8,56},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7},
+{7,8,46},{7,4,3},{7,5,11},{7,4,5},{7,8,54},{7,5,9},{9,8,512},{7,4,6},{7,8,36},{9,5,128},
+{7,8,52},{7,6,15},{7,8,0},{7,4,4},{7,4,2},{7,4,7},{7,6,13},{7,4,3},{9,9,1728},{7,4,5},
+{7,8,44},{7,6,17},{9,9,1408},{7,4,6},{7,6,1},{7,5,8},{9,6,192},{9,5,64},{7,5,10},{7,4,4},
+{7,4,2},{7,4,7},{7,8,30},{7,4,3},{7,5,11},{7,4,5},{7,6,12},{7,5,9},{9,6,1664},{7,4,6},
+{7,8,34},{9,5,128},{7,8,50},{7,6,14},{7,8,62},{7,4,4},{7,4,2},{7,4,7},{7,8,48},{7,4,3},
+{7,8,60},{7,4,5},{7,8,42},{7,6,16},{9,9,1152},{7,4,6},{7,8,32},{7,5,8},{7,8,58},{9,5,64},
+{7,5,10},{7,4,4},{7,4,2},{7,4,7},{7,7,22},{7,4,3},{7,5,11},{7,4,5},{7,7,26},{7,5,9},
+{9,8,640},{7,4,6},{7,8,38},{9,5,128},{7,7,25},{7,6,15},{9,8,384},{7,4,4},{7,4,2},{7,4,7},
+{7,6,13},{7,4,3},{7,7,18},{7,4,5},{7,7,21},{7,6,17},{9,7,256},{7,4,6},{7,6,1},{7,5,8},
+{9,6,192},{9,5,64},{7,5,10},{7,4,4},{7,4,2},{7,4,7}
+};
+ const TIFFFaxTabEnt TIFFFaxBlackTable[8192] = {
+{12,11,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,18},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,17},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1792},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,11,23},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,20},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,11,25},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,128},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,56},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,30},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1856},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,57},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,11,21},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,54},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,52},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,48},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{11,12,2112},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,44},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,36},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,384},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,28},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,60},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,40},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2368},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,16},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,10,64},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,18},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,10,17},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{11,12,1984},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,50},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,34},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1664},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,26},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1408},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,32},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1920},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,61},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,42},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{10,13,1024},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,13,768},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,62},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2240},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,46},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,38},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,512},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,11,19},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,24},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,22},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{11,12,2496},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,16},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,10,64},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{12,11,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,18},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,17},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1792},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,23},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,20},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,11,25},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{10,12,192},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1280},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,31},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{11,11,1856},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,58},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,11,21},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,896},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,640},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,49},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2176},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,45},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,37},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{10,12,448},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,29},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,13,1536},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,41},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2432},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,16},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,10,64},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,18},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,17},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{11,12,2048},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,51},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,35},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,320},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,27},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,59},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,33},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1920},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,256},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,43},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,13,1152},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,55},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,63},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{11,12,2304},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,47},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,39},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,53},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,19},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,24},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,22},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2560},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,10,16},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{10,10,64},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{12,11,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,10,18},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,17},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1792},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,23},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,11,20},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,25},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,12,128},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,56},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,30},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{11,11,1856},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,57},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,21},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,54},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,52},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,48},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2112},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,44},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,36},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,12,384},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,28},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,60},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,40},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{11,12,2368},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,16},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,10,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,10,64},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,18},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,17},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,1984},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,50},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,34},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{10,13,1728},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,26},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,13,1472},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,32},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1920},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,61},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,42},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1088},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,832},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,62},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{11,12,2240},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,46},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,38},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,576},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,19},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,11,24},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,22},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2496},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,16},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,10,64},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{12,11,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,18},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,10,17},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{11,11,1792},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,23},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,11,20},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,25},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,192},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1344},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,31},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,11,1856},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,58},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,21},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{10,13,960},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,13,704},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,49},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2176},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,45},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,37},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,448},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,29},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1600},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,41},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{11,12,2432},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,16},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,10,64},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,18},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,17},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2048},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,51},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,35},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{10,12,320},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,27},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,59},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,33},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{11,11,1920},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,12,256},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,43},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,13,1216},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{0,0,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,8,13},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,9,15},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,55},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,63},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2304},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,12,47},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,12,39},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,12,53},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,12},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{0,0,0},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,8,13},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,11,19},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,11,24},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,11,22},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{11,12,2560},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,7,10},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,10,16},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2},{8,10,0},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},
+{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{10,10,64},{8,2,3},
+{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,9},{8,2,3},{8,3,1},{8,2,2},
+{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,11},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},
+{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},
+{8,8,14},{8,2,3},{8,3,1},{8,2,2},{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,6,8},{8,2,3},
+{8,3,1},{8,2,2},{8,4,5},{8,2,3},{8,3,4},{8,2,2},{8,7,12},{8,2,3},{8,3,1},{8,2,2},
+{8,4,6},{8,2,3},{8,3,4},{8,2,2},{8,5,7},{8,2,3},{8,3,1},{8,2,2},{8,4,5},{8,2,3},
+{8,3,4},{8,2,2}
+};
diff --git a/src/libtiff/tif_flush.c b/src/libtiff/tif_flush.c
new file mode 100644
index 0000000..140a58a
--- /dev/null
+++ b/src/libtiff/tif_flush.c
@@ -0,0 +1,67 @@
+/* $Header: /cvsroot/imtoolkit/im/src/libtiff/tif_flush.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ */
+#include "tiffiop.h"
+
+int
+TIFFFlush(TIFF* tif)
+{
+
+	if (tif->tif_mode != O_RDONLY) {
+		if (!TIFFFlushData(tif))
+			return (0);
+		if ((tif->tif_flags & TIFF_DIRTYDIRECT) &&
+		    !TIFFWriteDirectory(tif))
+			return (0);
+	}
+	return (1);
+}
+
+/*
+ * Flush buffered data to the file.
+ *
+ * Frank Warmerdam'2000: I modified this to return 1 if TIFF_BEENWRITING
+ * is not set, so that TIFFFlush() will proceed to write out the directory.
+ * The documentation says returning 1 is an error indicator, but not having
+ * been writing isn't exactly a an error.  Hopefully this doesn't cause
+ * problems for other people. 
+ */
+int
+TIFFFlushData(TIFF* tif)
+{
+	if ((tif->tif_flags & TIFF_BEENWRITING) == 0)
+		return (0);
+	if (tif->tif_flags & TIFF_POSTENCODE) {
+		tif->tif_flags &= ~TIFF_POSTENCODE;
+		if (!(*tif->tif_postencode)(tif))
+			return (0);
+	}
+	return (TIFFFlushData1(tif));
+}
+
diff --git a/src/libtiff/tif_getimage.c b/src/libtiff/tif_getimage.c
new file mode 100644
index 0000000..251733c
--- /dev/null
+++ b/src/libtiff/tif_getimage.c
@@ -0,0 +1,2598 @@
+/* $Id: tif_getimage.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library
+ *
+ * Read and return a packed RGBA image.
+ */
+#include "tiffiop.h"
+#include <stdio.h>
+
+static	int gtTileContig(TIFFRGBAImage*, uint32*, uint32, uint32);
+static	int gtTileSeparate(TIFFRGBAImage*, uint32*, uint32, uint32);
+static	int gtStripContig(TIFFRGBAImage*, uint32*, uint32, uint32);
+static	int gtStripSeparate(TIFFRGBAImage*, uint32*, uint32, uint32);
+static	int pickTileContigCase(TIFFRGBAImage*);
+static	int pickTileSeparateCase(TIFFRGBAImage*);
+
+static	const char photoTag[] = "PhotometricInterpretation";
+
+/* 
+ * Helper constants used in Orientation tag handling
+ */
+#define FLIP_VERTICALLY 0x01
+#define FLIP_HORIZONTALLY 0x02
+
+/*
+ * Color conversion constants. We will define display types here.
+ */
+
+TIFFDisplay display_sRGB = {
+	{			/* XYZ -> luminance matrix */
+		{  3.2410F, -1.5374F, -0.4986F },
+		{  -0.9692F, 1.8760F, 0.0416F },
+		{  0.0556F, -0.2040F, 1.0570F }
+	},	
+	100.0F, 100.0F, 100.0F,	/* Light o/p for reference white */
+	255, 255, 255,		/* Pixel values for ref. white */
+	1.0F, 1.0F, 1.0F,	/* Residual light o/p for black pixel */
+	2.4F, 2.4F, 2.4F,	/* Gamma values for the three guns */
+};
+
+/*
+ * Check the image to see if TIFFReadRGBAImage can deal with it.
+ * 1/0 is returned according to whether or not the image can
+ * be handled.  If 0 is returned, emsg contains the reason
+ * why it is being rejected.
+ */
+int
+TIFFRGBAImageOK(TIFF* tif, char emsg[1024])
+{
+    TIFFDirectory* td = &tif->tif_dir;
+    uint16 photometric;
+    int colorchannels;
+
+    if (!tif->tif_decodestatus) {
+	sprintf(emsg, "Sorry, requested compression method is not configured");
+	return (0);
+    }
+    switch (td->td_bitspersample) {
+    case 1: case 2: case 4:
+    case 8: case 16:
+	break;
+    default:
+	sprintf(emsg, "Sorry, can not handle images with %d-bit samples",
+	    td->td_bitspersample);
+	return (0);
+    }
+    colorchannels = td->td_samplesperpixel - td->td_extrasamples;
+    if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometric)) {
+	switch (colorchannels) {
+	case 1:
+	    photometric = PHOTOMETRIC_MINISBLACK;
+	    break;
+	case 3:
+	    photometric = PHOTOMETRIC_RGB;
+	    break;
+	default:
+	    sprintf(emsg, "Missing needed %s tag", photoTag);
+	    return (0);
+	}
+    }
+    switch (photometric) {
+    case PHOTOMETRIC_MINISWHITE:
+    case PHOTOMETRIC_MINISBLACK:
+    case PHOTOMETRIC_PALETTE:
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG 
+            && td->td_samplesperpixel != 1
+            && td->td_bitspersample < 8 ) {
+	    sprintf(emsg,
+                    "Sorry, can not handle contiguous data with %s=%d, "
+                    "and %s=%d and Bits/Sample=%d",
+                    photoTag, photometric,
+                    "Samples/pixel", td->td_samplesperpixel,
+                    td->td_bitspersample);
+	    return (0);
+	}
+        /*
+        ** We should likely validate that any extra samples are either
+        ** to be ignored, or are alpha, and if alpha we should try to use
+        ** them.  But for now we won't bother with this. 
+        */
+	break;
+    case PHOTOMETRIC_YCBCR:
+	if (td->td_planarconfig != PLANARCONFIG_CONTIG) {
+	    sprintf(emsg, "Sorry, can not handle YCbCr images with %s=%d",
+		"Planarconfiguration", td->td_planarconfig);
+	    return (0);
+	}
+	break;
+    case PHOTOMETRIC_RGB: 
+	if (colorchannels < 3) {
+	    sprintf(emsg, "Sorry, can not handle RGB image with %s=%d",
+		"Color channels", colorchannels);
+	    return (0);
+	}
+	break;
+    case PHOTOMETRIC_SEPARATED:
+	{
+		uint16 inkset;
+		TIFFGetFieldDefaulted(tif, TIFFTAG_INKSET, &inkset);
+		if (inkset != INKSET_CMYK) {
+		    sprintf(emsg,
+			    "Sorry, can not handle separated image with %s=%d",
+			    "InkSet", inkset);
+		    return 0;
+		}
+		if (td->td_samplesperpixel < 4) {
+		    sprintf(emsg,
+			    "Sorry, can not handle separated image with %s=%d",
+			    "Samples/pixel", td->td_samplesperpixel);
+		    return 0;
+		}
+		break;
+	}
+    case PHOTOMETRIC_LOGL:
+	if (td->td_compression != COMPRESSION_SGILOG) {
+	    sprintf(emsg, "Sorry, LogL data must have %s=%d",
+		"Compression", COMPRESSION_SGILOG);
+	    return (0);
+	}
+	break;
+    case PHOTOMETRIC_LOGLUV:
+	if (td->td_compression != COMPRESSION_SGILOG &&
+		td->td_compression != COMPRESSION_SGILOG24) {
+	    sprintf(emsg, "Sorry, LogLuv data must have %s=%d or %d",
+		"Compression", COMPRESSION_SGILOG, COMPRESSION_SGILOG24);
+	    return (0);
+	}
+	if (td->td_planarconfig != PLANARCONFIG_CONTIG) {
+	    sprintf(emsg, "Sorry, can not handle LogLuv images with %s=%d",
+		"Planarconfiguration", td->td_planarconfig);
+	    return (0);
+	}
+	break;
+    case PHOTOMETRIC_CIELAB:
+	break;
+    default:
+	sprintf(emsg, "Sorry, can not handle image with %s=%d",
+	    photoTag, photometric);
+	return (0);
+    }
+    return (1);
+}
+
+void
+TIFFRGBAImageEnd(TIFFRGBAImage* img)
+{
+	if (img->Map)
+		_TIFFfree(img->Map), img->Map = NULL;
+	if (img->BWmap)
+		_TIFFfree(img->BWmap), img->BWmap = NULL;
+	if (img->PALmap)
+		_TIFFfree(img->PALmap), img->PALmap = NULL;
+	if (img->ycbcr)
+		_TIFFfree(img->ycbcr), img->ycbcr = NULL;
+	if (img->cielab)
+		_TIFFfree(img->cielab), img->cielab = NULL;
+
+	if( img->redcmap ) {
+		_TIFFfree( img->redcmap );
+		_TIFFfree( img->greencmap );
+		_TIFFfree( img->bluecmap );
+	}
+}
+
+static int
+isCCITTCompression(TIFF* tif)
+{
+    uint16 compress;
+    TIFFGetField(tif, TIFFTAG_COMPRESSION, &compress);
+    return (compress == COMPRESSION_CCITTFAX3 ||
+	    compress == COMPRESSION_CCITTFAX4 ||
+	    compress == COMPRESSION_CCITTRLE ||
+	    compress == COMPRESSION_CCITTRLEW);
+}
+
+int
+TIFFRGBAImageBegin(TIFFRGBAImage* img, TIFF* tif, int stop, char emsg[1024])
+{
+    uint16* sampleinfo;
+    uint16 extrasamples;
+    uint16 planarconfig;
+    uint16 compress;
+    int colorchannels;
+    uint16 *red_orig, *green_orig, *blue_orig;
+    int n_color;
+
+    /* Initialize to normal values */
+    img->row_offset = 0;
+    img->col_offset = 0;
+    img->redcmap = NULL;
+    img->greencmap = NULL;
+    img->bluecmap = NULL;
+    img->req_orientation = ORIENTATION_BOTLEFT;	    /* It is the default */
+    
+    img->tif = tif;
+    img->stoponerr = stop;
+    TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &img->bitspersample);
+    switch (img->bitspersample) {
+    case 1: case 2: case 4:
+    case 8: case 16:
+	break;
+    default:
+	sprintf(emsg, "Sorry, can not handle images with %d-bit samples",
+	    img->bitspersample);
+	return (0);
+    }
+    img->alpha = 0;
+    TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &img->samplesperpixel);
+    TIFFGetFieldDefaulted(tif, TIFFTAG_EXTRASAMPLES,
+	&extrasamples, &sampleinfo);
+    if (extrasamples >= 1)
+    {
+	switch (sampleinfo[0]) {
+	case EXTRASAMPLE_UNSPECIFIED:	/* Workaround for some images without */
+		if (img->samplesperpixel > 3)	/* correct info about alpha channel */
+			img->alpha = EXTRASAMPLE_ASSOCALPHA;
+		break;
+	case EXTRASAMPLE_ASSOCALPHA:	/* data is pre-multiplied */
+	case EXTRASAMPLE_UNASSALPHA:	/* data is not pre-multiplied */
+		img->alpha = sampleinfo[0];
+		break;
+	}
+    }
+
+#ifdef DEFAULT_EXTRASAMPLE_AS_ALPHA
+    if( !TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &img->photometric))
+        img->photometric = PHOTOMETRIC_MINISWHITE;
+
+    if( extrasamples == 0 
+        && img->samplesperpixel == 4 
+        && img->photometric == PHOTOMETRIC_RGB )
+    {
+        img->alpha = EXTRASAMPLE_ASSOCALPHA;
+        extrasamples = 1;
+    }
+#endif
+
+    colorchannels = img->samplesperpixel - extrasamples;
+    TIFFGetFieldDefaulted(tif, TIFFTAG_COMPRESSION, &compress);
+    TIFFGetFieldDefaulted(tif, TIFFTAG_PLANARCONFIG, &planarconfig);
+    if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &img->photometric)) {
+	switch (colorchannels) {
+	case 1:
+	    if (isCCITTCompression(tif))
+		img->photometric = PHOTOMETRIC_MINISWHITE;
+	    else
+		img->photometric = PHOTOMETRIC_MINISBLACK;
+	    break;
+	case 3:
+	    img->photometric = PHOTOMETRIC_RGB;
+	    break;
+	default:
+	    sprintf(emsg, "Missing needed %s tag", photoTag);
+	    return (0);
+	}
+    }
+    switch (img->photometric) {
+    case PHOTOMETRIC_PALETTE:
+	if (!TIFFGetField(tif, TIFFTAG_COLORMAP,
+	    &red_orig, &green_orig, &blue_orig)) {
+	    sprintf(emsg, "Missing required \"Colormap\" tag");
+	    return (0);
+	}
+
+        /* copy the colormaps so we can modify them */
+        n_color = (1L << img->bitspersample);
+        img->redcmap = (uint16 *) _TIFFmalloc(sizeof(uint16)*n_color);
+        img->greencmap = (uint16 *) _TIFFmalloc(sizeof(uint16)*n_color);
+        img->bluecmap = (uint16 *) _TIFFmalloc(sizeof(uint16)*n_color);
+        if( !img->redcmap || !img->greencmap || !img->bluecmap ) {
+	    sprintf(emsg, "Out of memory for colormap copy");
+	    return (0);
+        }
+
+        _TIFFmemcpy( img->redcmap, red_orig, n_color * 2 );
+        _TIFFmemcpy( img->greencmap, green_orig, n_color * 2 );
+        _TIFFmemcpy( img->bluecmap, blue_orig, n_color * 2 );
+        
+	/* fall thru... */
+    case PHOTOMETRIC_MINISWHITE:
+    case PHOTOMETRIC_MINISBLACK:
+	if (planarconfig == PLANARCONFIG_CONTIG 
+            && img->samplesperpixel != 1
+            && img->bitspersample < 8 ) {
+	    sprintf(emsg,
+                    "Sorry, can not handle contiguous data with %s=%d, "
+                    "and %s=%d and Bits/Sample=%d",
+                    photoTag, img->photometric,
+                    "Samples/pixel", img->samplesperpixel,
+                    img->bitspersample);
+	    return (0);
+	}
+	break;
+    case PHOTOMETRIC_YCBCR:
+	if (planarconfig != PLANARCONFIG_CONTIG) {
+	    sprintf(emsg, "Sorry, can not handle YCbCr images with %s=%d",
+		"Planarconfiguration", planarconfig);
+	    return (0);
+	}
+	/* It would probably be nice to have a reality check here. */
+	if (planarconfig == PLANARCONFIG_CONTIG)
+	    /* can rely on libjpeg to convert to RGB */
+	    /* XXX should restore current state on exit */
+	    switch (compress) {
+		case COMPRESSION_OJPEG:
+		case COMPRESSION_JPEG:
+		    TIFFSetField(tif, TIFFTAG_JPEGCOLORMODE, JPEGCOLORMODE_RGB);
+		    img->photometric = PHOTOMETRIC_RGB;
+                    break;
+
+                default:
+                    /* do nothing */;
+                    break;
+	    }
+	break;
+    case PHOTOMETRIC_RGB: 
+	if (colorchannels < 3) {
+	    sprintf(emsg, "Sorry, can not handle RGB image with %s=%d",
+		"Color channels", colorchannels);
+	    return (0);
+	}
+	break;
+    case PHOTOMETRIC_SEPARATED: {
+	uint16 inkset;
+	TIFFGetFieldDefaulted(tif, TIFFTAG_INKSET, &inkset);
+	if (inkset != INKSET_CMYK) {
+	    sprintf(emsg, "Sorry, can not handle separated image with %s=%d",
+		"InkSet", inkset);
+	    return (0);
+	}
+	if (img->samplesperpixel < 4) {
+	    sprintf(emsg, "Sorry, can not handle separated image with %s=%d",
+		"Samples/pixel", img->samplesperpixel);
+	    return (0);
+	}
+	break;
+    }
+    case PHOTOMETRIC_LOGL:
+	if (compress != COMPRESSION_SGILOG) {
+	    sprintf(emsg, "Sorry, LogL data must have %s=%d",
+		"Compression", COMPRESSION_SGILOG);
+	    return (0);
+	}
+	TIFFSetField(tif, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_8BIT);
+	img->photometric = PHOTOMETRIC_MINISBLACK;	/* little white lie */
+	img->bitspersample = 8;
+	break;
+    case PHOTOMETRIC_LOGLUV:
+	if (compress != COMPRESSION_SGILOG && compress != COMPRESSION_SGILOG24) {
+	    sprintf(emsg, "Sorry, LogLuv data must have %s=%d or %d",
+		"Compression", COMPRESSION_SGILOG, COMPRESSION_SGILOG24);
+	    return (0);
+	}
+	if (planarconfig != PLANARCONFIG_CONTIG) {
+	    sprintf(emsg, "Sorry, can not handle LogLuv images with %s=%d",
+		"Planarconfiguration", planarconfig);
+	    return (0);
+	}
+	TIFFSetField(tif, TIFFTAG_SGILOGDATAFMT, SGILOGDATAFMT_8BIT);
+	img->photometric = PHOTOMETRIC_RGB;		/* little white lie */
+	img->bitspersample = 8;
+	break;
+    case PHOTOMETRIC_CIELAB:
+	break;
+    default:
+	sprintf(emsg, "Sorry, can not handle image with %s=%d",
+	    photoTag, img->photometric);
+	return (0);
+    }
+    img->Map = NULL;
+    img->BWmap = NULL;
+    img->PALmap = NULL;
+    img->ycbcr = NULL;
+    img->cielab = NULL;
+    TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &img->width);
+    TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &img->height);
+    TIFFGetFieldDefaulted(tif, TIFFTAG_ORIENTATION, &img->orientation);
+    img->isContig =
+	!(planarconfig == PLANARCONFIG_SEPARATE && colorchannels > 1);
+    if (img->isContig) {
+	img->get = TIFFIsTiled(tif) ? gtTileContig : gtStripContig;
+	if (!pickTileContigCase(img)) {
+		sprintf(emsg, "Sorry, can not handle image");
+		return 0;
+	}
+    } else {
+	img->get = TIFFIsTiled(tif) ? gtTileSeparate : gtStripSeparate;
+	if (!pickTileSeparateCase(img)) {
+		sprintf(emsg, "Sorry, can not handle image");
+		return 0;
+	}
+    }
+    return 1;
+}
+
+int
+TIFFRGBAImageGet(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h)
+{
+    if (img->get == NULL) {
+		TIFFErrorExt(img->tif->tif_clientdata, TIFFFileName(img->tif), "No \"get\" routine setup");
+		return (0);
+	}
+	if (img->put.any == NULL) {
+		TIFFErrorExt(img->tif->tif_clientdata, TIFFFileName(img->tif),
+		"No \"put\" routine setupl; probably can not handle image format");
+		return (0);
+    }
+    return (*img->get)(img, raster, w, h);
+}
+
+/*
+ * Read the specified image into an ABGR-format rastertaking in account
+ * specified orientation.
+ */
+int
+TIFFReadRGBAImageOriented(TIFF* tif,
+			  uint32 rwidth, uint32 rheight, uint32* raster,
+			  int orientation, int stop)
+{
+    char emsg[1024] = "";
+    TIFFRGBAImage img;
+    int ok;
+
+	if (TIFFRGBAImageOK(tif, emsg) && TIFFRGBAImageBegin(&img, tif, stop, emsg)) {
+		img.req_orientation = orientation;
+		/* XXX verify rwidth and rheight against width and height */
+		ok = TIFFRGBAImageGet(&img, raster+(rheight-img.height)*rwidth,
+			rwidth, img.height);
+		TIFFRGBAImageEnd(&img);
+	} else {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), emsg);
+		ok = 0;
+    }
+    return (ok);
+}
+
+/*
+ * Read the specified image into an ABGR-format raster. Use bottom left
+ * origin for raster by default.
+ */
+int
+TIFFReadRGBAImage(TIFF* tif,
+		  uint32 rwidth, uint32 rheight, uint32* raster, int stop)
+{
+	return TIFFReadRGBAImageOriented(tif, rwidth, rheight, raster,
+					 ORIENTATION_BOTLEFT, stop);
+}
+
+static int 
+setorientation(TIFFRGBAImage* img)
+{
+	switch (img->orientation) {
+		case ORIENTATION_TOPLEFT:
+		case ORIENTATION_LEFTTOP:
+			if (img->req_orientation == ORIENTATION_TOPRIGHT ||
+			    img->req_orientation == ORIENTATION_RIGHTTOP)
+				return FLIP_HORIZONTALLY;
+			else if (img->req_orientation == ORIENTATION_BOTRIGHT ||
+			    img->req_orientation == ORIENTATION_RIGHTBOT)
+				return FLIP_HORIZONTALLY | FLIP_VERTICALLY;
+			else if (img->req_orientation == ORIENTATION_BOTLEFT ||
+			    img->req_orientation == ORIENTATION_LEFTBOT)
+				return FLIP_VERTICALLY;
+			else
+				return 0;
+		case ORIENTATION_TOPRIGHT:
+		case ORIENTATION_RIGHTTOP:
+			if (img->req_orientation == ORIENTATION_TOPLEFT ||
+			    img->req_orientation == ORIENTATION_LEFTTOP)
+				return FLIP_HORIZONTALLY;
+			else if (img->req_orientation == ORIENTATION_BOTRIGHT ||
+			    img->req_orientation == ORIENTATION_RIGHTBOT)
+				return FLIP_VERTICALLY;
+			else if (img->req_orientation == ORIENTATION_BOTLEFT ||
+			    img->req_orientation == ORIENTATION_LEFTBOT)
+				return FLIP_HORIZONTALLY | FLIP_VERTICALLY;
+			else
+				return 0;
+		case ORIENTATION_BOTRIGHT:
+		case ORIENTATION_RIGHTBOT:
+			if (img->req_orientation == ORIENTATION_TOPLEFT ||
+			    img->req_orientation == ORIENTATION_LEFTTOP)
+				return FLIP_HORIZONTALLY | FLIP_VERTICALLY;
+			else if (img->req_orientation == ORIENTATION_TOPRIGHT ||
+			    img->req_orientation == ORIENTATION_RIGHTTOP)
+				return FLIP_VERTICALLY;
+			else if (img->req_orientation == ORIENTATION_BOTLEFT ||
+			    img->req_orientation == ORIENTATION_LEFTBOT)
+				return FLIP_HORIZONTALLY;
+			else
+				return 0;
+		case ORIENTATION_BOTLEFT:
+		case ORIENTATION_LEFTBOT:
+			if (img->req_orientation == ORIENTATION_TOPLEFT ||
+			    img->req_orientation == ORIENTATION_LEFTTOP)
+				return FLIP_VERTICALLY;
+			else if (img->req_orientation == ORIENTATION_TOPRIGHT ||
+			    img->req_orientation == ORIENTATION_RIGHTTOP)
+				return FLIP_HORIZONTALLY | FLIP_VERTICALLY;
+			else if (img->req_orientation == ORIENTATION_BOTRIGHT ||
+			    img->req_orientation == ORIENTATION_RIGHTBOT)
+				return FLIP_HORIZONTALLY;
+			else
+				return 0;
+		default:	/* NOTREACHED */
+			return 0;
+	}
+}
+
+/*
+ * Get an tile-organized image that has
+ *	PlanarConfiguration contiguous if SamplesPerPixel > 1
+ * or
+ *	SamplesPerPixel == 1
+ */	
+static int
+gtTileContig(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h)
+{
+    TIFF* tif = img->tif;
+    tileContigRoutine put = img->put.contig;
+    uint32 col, row, y, rowstoread;
+    uint32 pos;
+    uint32 tw, th;
+    unsigned char* buf;
+    int32 fromskew, toskew;
+    uint32 nrow;
+    int ret = 1, flip;
+
+    buf = (unsigned char*) _TIFFmalloc(TIFFTileSize(tif));
+    if (buf == 0) {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "No space for tile buffer");
+		return (0);
+    }
+    _TIFFmemset(buf, 0, TIFFTileSize(tif));
+    TIFFGetField(tif, TIFFTAG_TILEWIDTH, &tw);
+    TIFFGetField(tif, TIFFTAG_TILELENGTH, &th);
+
+    flip = setorientation(img);
+    if (flip & FLIP_VERTICALLY) {
+	    y = h - 1;
+	    toskew = -(int32)(tw + w);
+    }
+    else {
+	    y = 0;
+	    toskew = -(int32)(tw - w);
+    }
+     
+    for (row = 0; row < h; row += nrow)
+    {
+        rowstoread = th - (row + img->row_offset) % th;
+    	nrow = (row + rowstoread > h ? h - row : rowstoread);
+	for (col = 0; col < w; col += tw) 
+        {
+            if (TIFFReadTile(tif, buf, col+img->col_offset,
+                             row+img->row_offset, 0, 0) < 0 && img->stoponerr)
+            {
+                ret = 0;
+                break;
+            }
+	    
+            pos = ((row+img->row_offset) % th) * TIFFTileRowSize(tif);
+
+    	    if (col + tw > w) 
+            {
+                /*
+                 * Tile is clipped horizontally.  Calculate
+                 * visible portion and skewing factors.
+                 */
+                uint32 npix = w - col;
+                fromskew = tw - npix;
+                (*put)(img, raster+y*w+col, col, y,
+                       npix, nrow, fromskew, toskew + fromskew, buf + pos);
+            }
+            else 
+            {
+                (*put)(img, raster+y*w+col, col, y, tw, nrow, 0, toskew, buf + pos);
+            }
+        }
+
+        y += (flip & FLIP_VERTICALLY ? -(int32) nrow : (int32) nrow);
+    }
+    _TIFFfree(buf);
+
+    if (flip & FLIP_HORIZONTALLY) {
+	    uint32 line;
+
+	    for (line = 0; line < h; line++) {
+		    uint32 *left = raster + (line * w);
+		    uint32 *right = left + w - 1;
+		    
+		    while ( left < right ) {
+			    uint32 temp = *left;
+			    *left = *right;
+			    *right = temp;
+			    left++, right--;
+		    }
+	    }
+    }
+
+    return (ret);
+}
+
+/*
+ * Get an tile-organized image that has
+ *	 SamplesPerPixel > 1
+ *	 PlanarConfiguration separated
+ * We assume that all such images are RGB.
+ */	
+static int
+gtTileSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h)
+{
+    TIFF* tif = img->tif;
+    tileSeparateRoutine put = img->put.separate;
+    uint32 col, row, y, rowstoread;
+    uint32 pos;
+    uint32 tw, th;
+    unsigned char* buf;
+    unsigned char* r;
+    unsigned char* g;
+    unsigned char* b;
+    unsigned char* a;
+    tsize_t tilesize;
+    int32 fromskew, toskew;
+    int alpha = img->alpha;
+    uint32 nrow;
+    int ret = 1, flip;
+
+    tilesize = TIFFTileSize(tif);
+    buf = (unsigned char*) _TIFFmalloc(4*tilesize);
+    if (buf == 0) {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "No space for tile buffer");
+		return (0);
+    }
+    _TIFFmemset(buf, 0, 4*tilesize);
+    r = buf;
+    g = r + tilesize;
+    b = g + tilesize;
+    a = b + tilesize;
+    if (!alpha)
+	_TIFFmemset(a, 0xff, tilesize);
+    TIFFGetField(tif, TIFFTAG_TILEWIDTH, &tw);
+    TIFFGetField(tif, TIFFTAG_TILELENGTH, &th);
+
+    flip = setorientation(img);
+    if (flip & FLIP_VERTICALLY) {
+	    y = h - 1;
+	    toskew = -(int32)(tw + w);
+    }
+    else {
+	    y = 0;
+	    toskew = -(int32)(tw - w);
+    }
+
+    for (row = 0; row < h; row += nrow) 
+    {
+        rowstoread = th - (row + img->row_offset) % th;
+    	nrow = (row + rowstoread > h ? h - row : rowstoread);
+        for (col = 0; col < w; col += tw) 
+        {
+            if (TIFFReadTile(tif, r, col+img->col_offset,
+                             row+img->row_offset,0,0) < 0 && img->stoponerr)
+            {
+                ret = 0;
+                break;
+            }
+            if (TIFFReadTile(tif, g, col+img->col_offset,
+                             row+img->row_offset,0,1) < 0 && img->stoponerr)
+            {
+                ret = 0;
+                break;
+            }
+            if (TIFFReadTile(tif, b, col+img->col_offset,
+                             row+img->row_offset,0,2) < 0 && img->stoponerr)
+            {
+                ret = 0;
+                break;
+            }
+            if (alpha && TIFFReadTile(tif,a,col+img->col_offset,
+                                      row+img->row_offset,0,3) < 0 && img->stoponerr)
+            {
+                ret = 0;
+                break;
+            }
+
+            pos = ((row+img->row_offset) % th) * TIFFTileRowSize(tif);
+
+            if (col + tw > w) 
+            {
+                /*
+                 * Tile is clipped horizontally.  Calculate
+                 * visible portion and skewing factors.
+                 */
+                uint32 npix = w - col;
+                fromskew = tw - npix;
+                (*put)(img, raster+y*w+col, col, y,
+                       npix, nrow, fromskew, toskew + fromskew, 
+                       r + pos, g + pos, b + pos, a + pos);
+            } else {
+                (*put)(img, raster+y*w+col, col, y,
+                       tw, nrow, 0, toskew, r + pos, g + pos, b + pos, a + pos);
+            }
+        }
+
+        y += (flip & FLIP_VERTICALLY ?-(int32) nrow : (int32) nrow);
+    }
+
+    if (flip & FLIP_HORIZONTALLY) {
+	    uint32 line;
+
+	    for (line = 0; line < h; line++) {
+		    uint32 *left = raster + (line * w);
+		    uint32 *right = left + w - 1;
+		    
+		    while ( left < right ) {
+			    uint32 temp = *left;
+			    *left = *right;
+			    *right = temp;
+			    left++, right--;
+		    }
+	    }
+    }
+
+    _TIFFfree(buf);
+    return (ret);
+}
+
+/*
+ * Get a strip-organized image that has
+ *	PlanarConfiguration contiguous if SamplesPerPixel > 1
+ * or
+ *	SamplesPerPixel == 1
+ */	
+static int
+gtStripContig(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h)
+{
+    TIFF* tif = img->tif;
+    tileContigRoutine put = img->put.contig;
+    uint32 row, y, nrow, rowstoread;
+    uint32 pos;
+    unsigned char* buf;
+    uint32 rowsperstrip;
+    uint32 imagewidth = img->width;
+    tsize_t scanline;
+    int32 fromskew, toskew;
+    int ret = 1, flip;
+
+    buf = (unsigned char*) _TIFFmalloc(TIFFStripSize(tif));
+    if (buf == 0) {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "No space for strip buffer");
+		return (0);
+    }
+    _TIFFmemset(buf, 0, TIFFStripSize(tif));
+
+    flip = setorientation(img);
+    if (flip & FLIP_VERTICALLY) {
+	    y = h - 1;
+	    toskew = -(int32)(w + w);
+    } else {
+	    y = 0;
+	    toskew = -(int32)(w - w);
+    }
+
+    TIFFGetFieldDefaulted(tif, TIFFTAG_ROWSPERSTRIP, &rowsperstrip);
+    scanline = TIFFScanlineSize(tif);
+    fromskew = (w < imagewidth ? imagewidth - w : 0);
+    for (row = 0; row < h; row += nrow) 
+    {
+        rowstoread = rowsperstrip - (row + img->row_offset) % rowsperstrip;
+        nrow = (row + rowstoread > h ? h - row : rowstoread);
+        if (TIFFReadEncodedStrip(tif,
+                                 TIFFComputeStrip(tif,row+img->row_offset, 0),
+                                 buf, 
+                                 ((row + img->row_offset)%rowsperstrip + nrow) * scanline) < 0
+            && img->stoponerr)
+        {
+            ret = 0;
+            break;
+        }
+
+        pos = ((row + img->row_offset) % rowsperstrip) * scanline;
+        (*put)(img, raster+y*w, 0, y, w, nrow, fromskew, toskew, buf + pos);
+        y += (flip & FLIP_VERTICALLY ? -(int32) nrow : (int32) nrow);
+    }
+
+    if (flip & FLIP_HORIZONTALLY) {
+	    uint32 line;
+
+	    for (line = 0; line < h; line++) {
+		    uint32 *left = raster + (line * w);
+		    uint32 *right = left + w - 1;
+		    
+		    while ( left < right ) {
+			    uint32 temp = *left;
+			    *left = *right;
+			    *right = temp;
+			    left++, right--;
+		    }
+	    }
+    }
+
+    _TIFFfree(buf);
+    return (ret);
+}
+
+/*
+ * Get a strip-organized image with
+ *	 SamplesPerPixel > 1
+ *	 PlanarConfiguration separated
+ * We assume that all such images are RGB.
+ */
+static int
+gtStripSeparate(TIFFRGBAImage* img, uint32* raster, uint32 w, uint32 h)
+{
+    TIFF* tif = img->tif;
+    tileSeparateRoutine put = img->put.separate;
+    unsigned char *buf;
+    unsigned char *r, *g, *b, *a;
+    uint32 row, y, nrow, rowstoread;
+    uint32 pos;
+    tsize_t scanline;
+    uint32 rowsperstrip, offset_row;
+    uint32 imagewidth = img->width;
+    tsize_t stripsize;
+    int32 fromskew, toskew;
+    int alpha = img->alpha;
+    int	ret = 1, flip;
+
+    stripsize = TIFFStripSize(tif);
+    r = buf = (unsigned char *)_TIFFmalloc(4*stripsize);
+    if (buf == 0) {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), "No space for tile buffer");
+		return (0);
+    }
+    _TIFFmemset(buf, 0, 4*stripsize);
+    g = r + stripsize;
+    b = g + stripsize;
+    a = b + stripsize;
+    if (!alpha)
+	_TIFFmemset(a, 0xff, stripsize);
+
+    flip = setorientation(img);
+    if (flip & FLIP_VERTICALLY) {
+	    y = h - 1;
+	    toskew = -(int32)(w + w);
+    }
+    else {
+	    y = 0;
+	    toskew = -(int32)(w - w);
+    }
+
+    TIFFGetFieldDefaulted(tif, TIFFTAG_ROWSPERSTRIP, &rowsperstrip);
+    scanline = TIFFScanlineSize(tif);
+    fromskew = (w < imagewidth ? imagewidth - w : 0);
+    for (row = 0; row < h; row += nrow) 
+    {
+        rowstoread = rowsperstrip - (row + img->row_offset) % rowsperstrip;    	
+        nrow = (row + rowstoread > h ? h - row : rowstoread);
+        offset_row = row + img->row_offset;
+    	if (TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 0),
+                                 r, ((row + img->row_offset)%rowsperstrip + nrow) * scanline) < 0 
+            && img->stoponerr)
+        {
+            ret = 0;
+            break;
+        }
+        if (TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 1),
+                                 g, ((row + img->row_offset)%rowsperstrip + nrow) * scanline) < 0 
+            && img->stoponerr)
+        {
+            ret = 0;
+            break;
+        }
+        if (TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 2),
+                                 b, ((row + img->row_offset)%rowsperstrip + nrow) * scanline) < 0 
+            && img->stoponerr)
+        {
+            ret = 0;
+            break;
+        }
+        if (alpha &&
+            (TIFFReadEncodedStrip(tif, TIFFComputeStrip(tif, offset_row, 3),
+                                  a, ((row + img->row_offset)%rowsperstrip + nrow) * scanline) < 0 
+             && img->stoponerr))
+        {
+            ret = 0;
+            break;
+        }
+
+        pos = ((row + img->row_offset) % rowsperstrip) * scanline;
+        (*put)(img, raster+y*w, 0, y, w, nrow, fromskew, toskew, r + pos, g + pos, 
+               b + pos, a + pos);
+        y += (flip & FLIP_VERTICALLY ? -(int32) nrow : (int32) nrow);
+    }
+
+    if (flip & FLIP_HORIZONTALLY) {
+	    uint32 line;
+
+	    for (line = 0; line < h; line++) {
+		    uint32 *left = raster + (line * w);
+		    uint32 *right = left + w - 1;
+		    
+		    while ( left < right ) {
+			    uint32 temp = *left;
+			    *left = *right;
+			    *right = temp;
+			    left++, right--;
+		    }
+	    }
+    }
+
+    _TIFFfree(buf);
+    return (ret);
+}
+
+/*
+ * The following routines move decoded data returned
+ * from the TIFF library into rasters filled with packed
+ * ABGR pixels (i.e. suitable for passing to lrecwrite.)
+ *
+ * The routines have been created according to the most
+ * important cases and optimized.  pickTileContigCase and
+ * pickTileSeparateCase analyze the parameters and select
+ * the appropriate "put" routine to use.
+ */
+#define	REPEAT8(op)	REPEAT4(op); REPEAT4(op)
+#define	REPEAT4(op)	REPEAT2(op); REPEAT2(op)
+#define	REPEAT2(op)	op; op
+#define	CASE8(x,op)			\
+    switch (x) {			\
+    case 7: op; case 6: op; case 5: op;	\
+    case 4: op; case 3: op; case 2: op;	\
+    case 1: op;				\
+    }
+#define	CASE4(x,op)	switch (x) { case 3: op; case 2: op; case 1: op; }
+#define	NOP
+
+#define	UNROLL8(w, op1, op2) {		\
+    uint32 _x;				\
+    for (_x = w; _x >= 8; _x -= 8) {	\
+	op1;				\
+	REPEAT8(op2);			\
+    }					\
+    if (_x > 0) {			\
+	op1;				\
+	CASE8(_x,op2);			\
+    }					\
+}
+#define	UNROLL4(w, op1, op2) {		\
+    uint32 _x;				\
+    for (_x = w; _x >= 4; _x -= 4) {	\
+	op1;				\
+	REPEAT4(op2);			\
+    }					\
+    if (_x > 0) {			\
+	op1;				\
+	CASE4(_x,op2);			\
+    }					\
+}
+#define	UNROLL2(w, op1, op2) {		\
+    uint32 _x;				\
+    for (_x = w; _x >= 2; _x -= 2) {	\
+	op1;				\
+	REPEAT2(op2);			\
+    }					\
+    if (_x) {				\
+	op1;				\
+	op2;				\
+    }					\
+}
+    
+#define	SKEW(r,g,b,skew)	{ r += skew; g += skew; b += skew; }
+#define	SKEW4(r,g,b,a,skew)	{ r += skew; g += skew; b += skew; a+= skew; }
+
+#define A1 (((uint32)0xffL)<<24)
+#define	PACK(r,g,b)	\
+	((uint32)(r)|((uint32)(g)<<8)|((uint32)(b)<<16)|A1)
+#define	PACK4(r,g,b,a)	\
+	((uint32)(r)|((uint32)(g)<<8)|((uint32)(b)<<16)|((uint32)(a)<<24))
+#define W2B(v) (((v)>>8)&0xff)
+#define	PACKW(r,g,b)	\
+	((uint32)W2B(r)|((uint32)W2B(g)<<8)|((uint32)W2B(b)<<16)|A1)
+#define	PACKW4(r,g,b,a)	\
+	((uint32)W2B(r)|((uint32)W2B(g)<<8)|((uint32)W2B(b)<<16)|((uint32)W2B(a)<<24))
+
+#define	DECLAREContigPutFunc(name) \
+static void name(\
+    TIFFRGBAImage* img, \
+    uint32* cp, \
+    uint32 x, uint32 y, \
+    uint32 w, uint32 h, \
+    int32 fromskew, int32 toskew, \
+    unsigned char* pp \
+)
+
+/*
+ * 8-bit palette => colormap/RGB
+ */
+DECLAREContigPutFunc(put8bitcmaptile)
+{
+    uint32** PALmap = img->PALmap;
+    int samplesperpixel = img->samplesperpixel;
+
+    (void) y;
+    while (h-- > 0) {
+	for (x = w; x-- > 0;)
+        {
+	    *cp++ = PALmap[*pp][0];
+            pp += samplesperpixel;
+        }
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 4-bit palette => colormap/RGB
+ */
+DECLAREContigPutFunc(put4bitcmaptile)
+{
+    uint32** PALmap = img->PALmap;
+
+    (void) x; (void) y;
+    fromskew /= 2;
+    while (h-- > 0) {
+	uint32* bw;
+	UNROLL2(w, bw = PALmap[*pp++], *cp++ = *bw++);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 2-bit palette => colormap/RGB
+ */
+DECLAREContigPutFunc(put2bitcmaptile)
+{
+    uint32** PALmap = img->PALmap;
+
+    (void) x; (void) y;
+    fromskew /= 4;
+    while (h-- > 0) {
+	uint32* bw;
+	UNROLL4(w, bw = PALmap[*pp++], *cp++ = *bw++);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 1-bit palette => colormap/RGB
+ */
+DECLAREContigPutFunc(put1bitcmaptile)
+{
+    uint32** PALmap = img->PALmap;
+
+    (void) x; (void) y;
+    fromskew /= 8;
+    while (h-- > 0) {
+	uint32* bw;
+	UNROLL8(w, bw = PALmap[*pp++], *cp++ = *bw++);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 8-bit greyscale => colormap/RGB
+ */
+DECLAREContigPutFunc(putgreytile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    uint32** BWmap = img->BWmap;
+
+    (void) y;
+    while (h-- > 0) {
+	for (x = w; x-- > 0;)
+        {
+	    *cp++ = BWmap[*pp][0];
+            pp += samplesperpixel;
+        }
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 16-bit greyscale => colormap/RGB
+ */
+DECLAREContigPutFunc(put16bitbwtile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    uint32** BWmap = img->BWmap;
+
+    (void) y;
+    while (h-- > 0) {
+        uint16 *wp = (uint16 *) pp;
+
+	for (x = w; x-- > 0;)
+        {
+            /* use high order byte of 16bit value */
+
+	    *cp++ = BWmap[*wp >> 8][0];
+            pp += 2 * samplesperpixel;
+            wp += samplesperpixel;
+        }
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 1-bit bilevel => colormap/RGB
+ */
+DECLAREContigPutFunc(put1bitbwtile)
+{
+    uint32** BWmap = img->BWmap;
+
+    (void) x; (void) y;
+    fromskew /= 8;
+    while (h-- > 0) {
+	uint32* bw;
+	UNROLL8(w, bw = BWmap[*pp++], *cp++ = *bw++);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 2-bit greyscale => colormap/RGB
+ */
+DECLAREContigPutFunc(put2bitbwtile)
+{
+    uint32** BWmap = img->BWmap;
+
+    (void) x; (void) y;
+    fromskew /= 4;
+    while (h-- > 0) {
+	uint32* bw;
+	UNROLL4(w, bw = BWmap[*pp++], *cp++ = *bw++);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 4-bit greyscale => colormap/RGB
+ */
+DECLAREContigPutFunc(put4bitbwtile)
+{
+    uint32** BWmap = img->BWmap;
+
+    (void) x; (void) y;
+    fromskew /= 2;
+    while (h-- > 0) {
+	uint32* bw;
+	UNROLL2(w, bw = BWmap[*pp++], *cp++ = *bw++);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 8-bit packed samples, no Map => RGB
+ */
+DECLAREContigPutFunc(putRGBcontig8bittile)
+{
+    int samplesperpixel = img->samplesperpixel;
+
+    (void) x; (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	UNROLL8(w, NOP,
+	    *cp++ = PACK(pp[0], pp[1], pp[2]);
+	    pp += samplesperpixel);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 8-bit packed samples, w/ Map => RGB
+ */
+DECLAREContigPutFunc(putRGBcontig8bitMaptile)
+{
+    TIFFRGBValue* Map = img->Map;
+    int samplesperpixel = img->samplesperpixel;
+
+    (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	for (x = w; x-- > 0;) {
+	    *cp++ = PACK(Map[pp[0]], Map[pp[1]], Map[pp[2]]);
+	    pp += samplesperpixel;
+	}
+	pp += fromskew;
+	cp += toskew;
+    }
+}
+
+/*
+ * 8-bit packed samples => RGBA w/ associated alpha
+ * (known to have Map == NULL)
+ */
+DECLAREContigPutFunc(putRGBAAcontig8bittile)
+{
+    int samplesperpixel = img->samplesperpixel;
+
+    (void) x; (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	UNROLL8(w, NOP,
+	    *cp++ = PACK4(pp[0], pp[1], pp[2], pp[3]);
+	    pp += samplesperpixel);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 8-bit packed samples => RGBA w/ unassociated alpha
+ * (known to have Map == NULL)
+ */
+DECLAREContigPutFunc(putRGBUAcontig8bittile)
+{
+    int samplesperpixel = img->samplesperpixel;
+
+    (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	uint32 r, g, b, a;
+	for (x = w; x-- > 0;) {
+	    a = pp[3];
+	    r = (pp[0] * a) / 255;
+	    g = (pp[1] * a) / 255;
+	    b = (pp[2] * a) / 255;
+	    *cp++ = PACK4(r,g,b,a);
+	    pp += samplesperpixel;
+	}
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 16-bit packed samples => RGB
+ */
+DECLAREContigPutFunc(putRGBcontig16bittile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    uint16 *wp = (uint16 *)pp;
+
+    (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	for (x = w; x-- > 0;) {
+	    *cp++ = PACKW(wp[0], wp[1], wp[2]);
+	    wp += samplesperpixel;
+	}
+	cp += toskew;
+	wp += fromskew;
+    }
+}
+
+/*
+ * 16-bit packed samples => RGBA w/ associated alpha
+ * (known to have Map == NULL)
+ */
+DECLAREContigPutFunc(putRGBAAcontig16bittile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    uint16 *wp = (uint16 *)pp;
+
+    (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	for (x = w; x-- > 0;) {
+	    *cp++ = PACKW4(wp[0], wp[1], wp[2], wp[3]);
+	    wp += samplesperpixel;
+	}
+	cp += toskew;
+	wp += fromskew;
+    }
+}
+
+/*
+ * 16-bit packed samples => RGBA w/ unassociated alpha
+ * (known to have Map == NULL)
+ */
+DECLAREContigPutFunc(putRGBUAcontig16bittile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    uint16 *wp = (uint16 *)pp;
+
+    (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	uint32 r,g,b,a;
+	/*
+	 * We shift alpha down four bits just in case unsigned
+	 * arithmetic doesn't handle the full range.
+	 * We still have plenty of accuracy, since the output is 8 bits.
+	 * So we have (r * 0xffff) * (a * 0xfff)) = r*a * (0xffff*0xfff)
+	 * Since we want r*a * 0xff for eight bit output,
+	 * we divide by (0xffff * 0xfff) / 0xff == 0x10eff.
+	 */
+	for (x = w; x-- > 0;) {
+	    a = wp[3] >> 4; 
+	    r = (wp[0] * a) / 0x10eff;
+	    g = (wp[1] * a) / 0x10eff;
+	    b = (wp[2] * a) / 0x10eff;
+	    *cp++ = PACK4(r,g,b,a);
+	    wp += samplesperpixel;
+	}
+	cp += toskew;
+	wp += fromskew;
+    }
+}
+
+/*
+ * 8-bit packed CMYK samples w/o Map => RGB
+ *
+ * NB: The conversion of CMYK->RGB is *very* crude.
+ */
+DECLAREContigPutFunc(putRGBcontig8bitCMYKtile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    uint16 r, g, b, k;
+
+    (void) x; (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	UNROLL8(w, NOP,
+	    k = 255 - pp[3];
+	    r = (k*(255-pp[0]))/255;
+	    g = (k*(255-pp[1]))/255;
+	    b = (k*(255-pp[2]))/255;
+	    *cp++ = PACK(r, g, b);
+	    pp += samplesperpixel);
+	cp += toskew;
+	pp += fromskew;
+    }
+}
+
+/*
+ * 8-bit packed CMYK samples w/Map => RGB
+ *
+ * NB: The conversion of CMYK->RGB is *very* crude.
+ */
+DECLAREContigPutFunc(putRGBcontig8bitCMYKMaptile)
+{
+    int samplesperpixel = img->samplesperpixel;
+    TIFFRGBValue* Map = img->Map;
+    uint16 r, g, b, k;
+
+    (void) y;
+    fromskew *= samplesperpixel;
+    while (h-- > 0) {
+	for (x = w; x-- > 0;) {
+	    k = 255 - pp[3];
+	    r = (k*(255-pp[0]))/255;
+	    g = (k*(255-pp[1]))/255;
+	    b = (k*(255-pp[2]))/255;
+	    *cp++ = PACK(Map[r], Map[g], Map[b]);
+	    pp += samplesperpixel;
+	}
+	pp += fromskew;
+	cp += toskew;
+    }
+}
+
+#define	DECLARESepPutFunc(name) \
+static void name(\
+    TIFFRGBAImage* img,\
+    uint32* cp,\
+    uint32 x, uint32 y, \
+    uint32 w, uint32 h,\
+    int32 fromskew, int32 toskew,\
+    unsigned char* r, unsigned char* g, unsigned char* b, unsigned char* a\
+)
+
+/*
+ * 8-bit unpacked samples => RGB
+ */
+DECLARESepPutFunc(putRGBseparate8bittile)
+{
+    (void) img; (void) x; (void) y; (void) a;
+    while (h-- > 0) {
+	UNROLL8(w, NOP, *cp++ = PACK(*r++, *g++, *b++));
+	SKEW(r, g, b, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 8-bit unpacked samples => RGB
+ */
+DECLARESepPutFunc(putRGBseparate8bitMaptile)
+{
+    TIFFRGBValue* Map = img->Map;
+
+    (void) y; (void) a;
+    while (h-- > 0) {
+	for (x = w; x > 0; x--)
+	    *cp++ = PACK(Map[*r++], Map[*g++], Map[*b++]);
+	SKEW(r, g, b, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 8-bit unpacked samples => RGBA w/ associated alpha
+ */
+DECLARESepPutFunc(putRGBAAseparate8bittile)
+{
+    (void) img; (void) x; (void) y;
+    while (h-- > 0) {
+	UNROLL8(w, NOP, *cp++ = PACK4(*r++, *g++, *b++, *a++));
+	SKEW4(r, g, b, a, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 8-bit unpacked samples => RGBA w/ unassociated alpha
+ */
+DECLARESepPutFunc(putRGBUAseparate8bittile)
+{
+    (void) img; (void) y;
+    while (h-- > 0) {
+	uint32 rv, gv, bv, av;
+	for (x = w; x-- > 0;) {
+	    av = *a++;
+	    rv = (*r++ * av) / 255;
+	    gv = (*g++ * av) / 255;
+	    bv = (*b++ * av) / 255;
+	    *cp++ = PACK4(rv,gv,bv,av);
+	}
+	SKEW4(r, g, b, a, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 16-bit unpacked samples => RGB
+ */
+DECLARESepPutFunc(putRGBseparate16bittile)
+{
+    uint16 *wr = (uint16*) r;
+    uint16 *wg = (uint16*) g;
+    uint16 *wb = (uint16*) b;
+
+    (void) img; (void) y; (void) a;
+    while (h-- > 0) {
+	for (x = 0; x < w; x++)
+	    *cp++ = PACKW(*wr++, *wg++, *wb++);
+	SKEW(wr, wg, wb, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 16-bit unpacked samples => RGBA w/ associated alpha
+ */
+DECLARESepPutFunc(putRGBAAseparate16bittile)
+{
+    uint16 *wr = (uint16*) r;
+    uint16 *wg = (uint16*) g;
+    uint16 *wb = (uint16*) b;
+    uint16 *wa = (uint16*) a;
+
+    (void) img; (void) y;
+    while (h-- > 0) {
+	for (x = 0; x < w; x++)
+	    *cp++ = PACKW4(*wr++, *wg++, *wb++, *wa++);
+	SKEW4(wr, wg, wb, wa, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 16-bit unpacked samples => RGBA w/ unassociated alpha
+ */
+DECLARESepPutFunc(putRGBUAseparate16bittile)
+{
+    uint16 *wr = (uint16*) r;
+    uint16 *wg = (uint16*) g;
+    uint16 *wb = (uint16*) b;
+    uint16 *wa = (uint16*) a;
+
+    (void) img; (void) y;
+    while (h-- > 0) {
+	uint32 r,g,b,a;
+	/*
+	 * We shift alpha down four bits just in case unsigned
+	 * arithmetic doesn't handle the full range.
+	 * We still have plenty of accuracy, since the output is 8 bits.
+	 * So we have (r * 0xffff) * (a * 0xfff)) = r*a * (0xffff*0xfff)
+	 * Since we want r*a * 0xff for eight bit output,
+	 * we divide by (0xffff * 0xfff) / 0xff == 0x10eff.
+	 */
+	for (x = w; x-- > 0;) {
+	    a = *wa++ >> 4; 
+	    r = (*wr++ * a) / 0x10eff;
+	    g = (*wg++ * a) / 0x10eff;
+	    b = (*wb++ * a) / 0x10eff;
+	    *cp++ = PACK4(r,g,b,a);
+	}
+	SKEW4(wr, wg, wb, wa, fromskew);
+	cp += toskew;
+    }
+}
+
+/*
+ * 8-bit packed CIE L*a*b 1976 samples => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitCIELab)
+{
+	float X, Y, Z;
+	uint32 r, g, b;
+	(void) y;
+	fromskew *= 3;
+	while (h-- > 0) {
+		for (x = w; x-- > 0;) {
+			TIFFCIELabToXYZ(img->cielab,
+					(unsigned char)pp[0],
+					(signed char)pp[1],
+					(signed char)pp[2],
+					&X, &Y, &Z);
+			TIFFXYZToRGB(img->cielab, X, Y, Z, &r, &g, &b);
+			*cp++ = PACK(r, g, b);
+			pp += 3;
+		}
+		cp += toskew;
+		pp += fromskew;
+	}
+}
+
+/*
+ * YCbCr -> RGB conversion and packing routines.
+ */
+
+#define	YCbCrtoRGB(dst, Y) {						\
+	uint32 r, g, b;							\
+	TIFFYCbCrtoRGB(img->ycbcr, (Y), Cb, Cr, &r, &g, &b);		\
+	dst = PACK(r, g, b);						\
+}
+
+/*
+ * 8-bit packed YCbCr samples => RGB 
+ * This function is generic for different sampling sizes, 
+ * and can handle blocks sizes that aren't multiples of the
+ * sampling size.  However, it is substantially less optimized
+ * than the specific sampling cases.  It is used as a fallback
+ * for difficult blocks.
+ */
+#ifdef notdef
+static void putcontig8bitYCbCrGenericTile( 
+    TIFFRGBAImage* img, 
+    uint32* cp, 
+    uint32 x, uint32 y, 
+    uint32 w, uint32 h, 
+    int32 fromskew, int32 toskew, 
+    unsigned char* pp,
+    int h_group, 
+    int v_group )
+
+{
+    uint32* cp1 = cp+w+toskew;
+    uint32* cp2 = cp1+w+toskew;
+    uint32* cp3 = cp2+w+toskew;
+    int32 incr = 3*w+4*toskew;
+    int32   Cb, Cr;
+    int     group_size = v_group * h_group + 2;
+
+    (void) y;
+    fromskew = (fromskew * group_size) / h_group;
+
+    for( yy = 0; yy < h; yy++ )
+    {
+        unsigned char *pp_line;
+        int     y_line_group = yy / v_group;
+        int     y_remainder = yy - y_line_group * v_group;
+
+        pp_line = pp + v_line_group * 
+
+        
+        for( xx = 0; xx < w; xx++ )
+        {
+            Cb = pp
+        }
+    }
+    for (; h >= 4; h -= 4) {
+	x = w>>2;
+	do {
+	    Cb = pp[16];
+	    Cr = pp[17];
+
+	    YCbCrtoRGB(cp [0], pp[ 0]);
+	    YCbCrtoRGB(cp [1], pp[ 1]);
+	    YCbCrtoRGB(cp [2], pp[ 2]);
+	    YCbCrtoRGB(cp [3], pp[ 3]);
+	    YCbCrtoRGB(cp1[0], pp[ 4]);
+	    YCbCrtoRGB(cp1[1], pp[ 5]);
+	    YCbCrtoRGB(cp1[2], pp[ 6]);
+	    YCbCrtoRGB(cp1[3], pp[ 7]);
+	    YCbCrtoRGB(cp2[0], pp[ 8]);
+	    YCbCrtoRGB(cp2[1], pp[ 9]);
+	    YCbCrtoRGB(cp2[2], pp[10]);
+	    YCbCrtoRGB(cp2[3], pp[11]);
+	    YCbCrtoRGB(cp3[0], pp[12]);
+	    YCbCrtoRGB(cp3[1], pp[13]);
+	    YCbCrtoRGB(cp3[2], pp[14]);
+	    YCbCrtoRGB(cp3[3], pp[15]);
+
+	    cp += 4, cp1 += 4, cp2 += 4, cp3 += 4;
+	    pp += 18;
+	} while (--x);
+	cp += incr, cp1 += incr, cp2 += incr, cp3 += incr;
+	pp += fromskew;
+    }
+}
+#endif
+
+/*
+ * 8-bit packed YCbCr samples w/ 4,4 subsampling => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitYCbCr44tile)
+{
+    uint32* cp1 = cp+w+toskew;
+    uint32* cp2 = cp1+w+toskew;
+    uint32* cp3 = cp2+w+toskew;
+    int32 incr = 3*w+4*toskew;
+
+    (void) y;
+    /* adjust fromskew */
+    fromskew = (fromskew * 18) / 4;
+    if ((h & 3) == 0 && (w & 3) == 0) {				        
+        for (; h >= 4; h -= 4) {
+            x = w>>2;
+            do {
+                int32 Cb = pp[16];
+                int32 Cr = pp[17];
+
+                YCbCrtoRGB(cp [0], pp[ 0]);
+                YCbCrtoRGB(cp [1], pp[ 1]);
+                YCbCrtoRGB(cp [2], pp[ 2]);
+                YCbCrtoRGB(cp [3], pp[ 3]);
+                YCbCrtoRGB(cp1[0], pp[ 4]);
+                YCbCrtoRGB(cp1[1], pp[ 5]);
+                YCbCrtoRGB(cp1[2], pp[ 6]);
+                YCbCrtoRGB(cp1[3], pp[ 7]);
+                YCbCrtoRGB(cp2[0], pp[ 8]);
+                YCbCrtoRGB(cp2[1], pp[ 9]);
+                YCbCrtoRGB(cp2[2], pp[10]);
+                YCbCrtoRGB(cp2[3], pp[11]);
+                YCbCrtoRGB(cp3[0], pp[12]);
+                YCbCrtoRGB(cp3[1], pp[13]);
+                YCbCrtoRGB(cp3[2], pp[14]);
+                YCbCrtoRGB(cp3[3], pp[15]);
+
+                cp += 4, cp1 += 4, cp2 += 4, cp3 += 4;
+                pp += 18;
+            } while (--x);
+            cp += incr, cp1 += incr, cp2 += incr, cp3 += incr;
+            pp += fromskew;
+        }
+    } else {
+        while (h > 0) {
+            for (x = w; x > 0;) {
+                int32 Cb = pp[16];
+                int32 Cr = pp[17];
+                switch (x) {
+                default:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp3[3], pp[15]); /* FALLTHROUGH */
+                    case 3:  YCbCrtoRGB(cp2[3], pp[11]); /* FALLTHROUGH */
+                    case 2:  YCbCrtoRGB(cp1[3], pp[ 7]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [3], pp[ 3]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 3:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp3[2], pp[14]); /* FALLTHROUGH */
+                    case 3:  YCbCrtoRGB(cp2[2], pp[10]); /* FALLTHROUGH */
+                    case 2:  YCbCrtoRGB(cp1[2], pp[ 6]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [2], pp[ 2]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 2:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp3[1], pp[13]); /* FALLTHROUGH */
+                    case 3:  YCbCrtoRGB(cp2[1], pp[ 9]); /* FALLTHROUGH */
+                    case 2:  YCbCrtoRGB(cp1[1], pp[ 5]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [1], pp[ 1]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 1:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp3[0], pp[12]); /* FALLTHROUGH */
+                    case 3:  YCbCrtoRGB(cp2[0], pp[ 8]); /* FALLTHROUGH */
+                    case 2:  YCbCrtoRGB(cp1[0], pp[ 4]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [0], pp[ 0]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                }
+                if (x < 4) {
+                    cp += x; cp1 += x; cp2 += x; cp3 += x;
+                    x = 0;
+                }
+                else {
+                    cp += 4; cp1 += 4; cp2 += 4; cp3 += 4;
+                    x -= 4;
+                }
+                pp += 18;
+            }
+            if (h <= 4)
+                break;
+            h -= 4;
+            cp += incr, cp1 += incr, cp2 += incr, cp3 += incr;
+            pp += fromskew;
+        }
+    }
+}
+
+/*
+ * 8-bit packed YCbCr samples w/ 4,2 subsampling => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitYCbCr42tile)
+{
+    uint32* cp1 = cp+w+toskew;
+    int32 incr = 2*toskew+w;
+
+    (void) y;
+    fromskew = (fromskew * 10) / 4;
+    if ((h & 3) == 0 && (w & 1) == 0) {
+        for (; h >= 2; h -= 2) {
+            x = w>>2;
+            do {
+                int32 Cb = pp[8];
+                int32 Cr = pp[9];
+                
+                YCbCrtoRGB(cp [0], pp[0]);
+                YCbCrtoRGB(cp [1], pp[1]);
+                YCbCrtoRGB(cp [2], pp[2]);
+                YCbCrtoRGB(cp [3], pp[3]);
+                YCbCrtoRGB(cp1[0], pp[4]);
+                YCbCrtoRGB(cp1[1], pp[5]);
+                YCbCrtoRGB(cp1[2], pp[6]);
+                YCbCrtoRGB(cp1[3], pp[7]);
+                
+                cp += 4, cp1 += 4;
+                pp += 10;
+            } while (--x);
+            cp += incr, cp1 += incr;
+            pp += fromskew;
+        }
+    } else {
+        while (h > 0) {
+            for (x = w; x > 0;) {
+                int32 Cb = pp[8];
+                int32 Cr = pp[9];
+                switch (x) {
+                default:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp1[3], pp[ 7]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [3], pp[ 3]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 3:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp1[2], pp[ 6]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [2], pp[ 2]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 2:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp1[1], pp[ 5]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [1], pp[ 1]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 1:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp1[0], pp[ 4]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [0], pp[ 0]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                }
+                if (x < 4) {
+                    cp += x; cp1 += x;
+                    x = 0;
+                }
+                else {
+                    cp += 4; cp1 += 4;
+                    x -= 4;
+                }
+                pp += 10;
+            }
+            if (h <= 2)
+                break;
+            h -= 2;
+            cp += incr, cp1 += incr;
+            pp += fromskew;
+        }
+    }
+}
+
+/*
+ * 8-bit packed YCbCr samples w/ 4,1 subsampling => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitYCbCr41tile)
+{
+    (void) y;
+    /* XXX adjust fromskew */
+    do {
+	x = w>>2;
+	do {
+	    int32 Cb = pp[4];
+	    int32 Cr = pp[5];
+
+	    YCbCrtoRGB(cp [0], pp[0]);
+	    YCbCrtoRGB(cp [1], pp[1]);
+	    YCbCrtoRGB(cp [2], pp[2]);
+	    YCbCrtoRGB(cp [3], pp[3]);
+
+	    cp += 4;
+	    pp += 6;
+	} while (--x);
+
+        if( (w&3) != 0 )
+        {
+	    int32 Cb = pp[4];
+	    int32 Cr = pp[5];
+
+            switch( (w&3) ) {
+              case 3: YCbCrtoRGB(cp [2], pp[2]);
+              case 2: YCbCrtoRGB(cp [1], pp[1]);
+              case 1: YCbCrtoRGB(cp [0], pp[0]);
+              case 0: break;
+            }
+
+            cp += (w&3);
+            pp += 6;
+        }
+
+	cp += toskew;
+	pp += fromskew;
+    } while (--h);
+
+}
+
+/*
+ * 8-bit packed YCbCr samples w/ 2,2 subsampling => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitYCbCr22tile)
+{
+    uint32* cp1 = cp+w+toskew;
+    int32 incr = 2*toskew+w;
+
+    (void) y;
+    fromskew = (fromskew * 6) / 2;
+    if ((h & 1) == 0 && (w & 1) == 0) {
+        for (; h >= 2; h -= 2) {
+            x = w>>1;
+            do {
+                int32 Cb = pp[4];
+                int32 Cr = pp[5];
+
+                YCbCrtoRGB(cp [0], pp[0]);
+                YCbCrtoRGB(cp [1], pp[1]);
+                YCbCrtoRGB(cp1[0], pp[2]);
+                YCbCrtoRGB(cp1[1], pp[3]);
+
+                cp += 2, cp1 += 2;
+                pp += 6;
+            } while (--x);
+            cp += incr, cp1 += incr;
+            pp += fromskew;
+        }
+    } else {
+        while (h > 0) {
+            for (x = w; x > 0;) {
+                int32 Cb = pp[4];
+                int32 Cr = pp[5];
+                switch (x) {
+                default:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp1[1], pp[ 3]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [1], pp[ 1]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                case 1:
+                    switch (h) {
+                    default: YCbCrtoRGB(cp1[0], pp[ 2]); /* FALLTHROUGH */
+                    case 1:  YCbCrtoRGB(cp [0], pp[ 0]); /* FALLTHROUGH */
+                    }                                    /* FALLTHROUGH */
+                }
+                if (x < 2) {
+                    cp += x; cp1 += x;
+                    x = 0;
+                }
+                else {
+                    cp += 2; cp1 += 2;
+                    x -= 2;
+                }
+                pp += 6;
+            }
+            if (h <= 2)
+                break;
+            h -= 2;
+            cp += incr, cp1 += incr;
+            pp += fromskew;
+        }
+    }
+}
+
+/*
+ * 8-bit packed YCbCr samples w/ 2,1 subsampling => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitYCbCr21tile)
+{
+    (void) y;
+    fromskew = (fromskew * 4) / 2;
+    do {
+	x = w>>1;
+	do {
+	    int32 Cb = pp[2];
+	    int32 Cr = pp[3];
+
+	    YCbCrtoRGB(cp[0], pp[0]); 
+	    YCbCrtoRGB(cp[1], pp[1]);
+
+	    cp += 2;
+	    pp += 4;
+	} while (--x);
+
+        if( (w&1) != 0 )
+        {
+	    int32 Cb = pp[2];
+	    int32 Cr = pp[3];
+            
+            YCbCrtoRGB(cp [0], pp[0]);
+
+	    cp += 1;
+	    pp += 4;
+        }
+
+	cp += toskew;
+	pp += fromskew;
+    } while (--h);
+}
+
+/*
+ * 8-bit packed YCbCr samples w/ no subsampling => RGB
+ */
+DECLAREContigPutFunc(putcontig8bitYCbCr11tile)
+{
+    (void) y;
+    fromskew *= 3;
+    do {
+        x = w; /* was x = w>>1; patched 2000/09/25 warmerda@home.com */ 
+	do {
+	    int32 Cb = pp[1];
+	    int32 Cr = pp[2];
+
+	    YCbCrtoRGB(*cp++, pp[0]);
+
+	    pp += 3;
+	} while (--x);
+	cp += toskew;
+	pp += fromskew;
+    } while (--h);
+}
+#undef	YCbCrtoRGB
+
+static tileContigRoutine
+initYCbCrConversion(TIFFRGBAImage* img)
+{
+	static char module[] = "initCIELabConversion";
+
+	float *luma, *refBlackWhite;
+	uint16 hs, vs;
+
+	if (img->ycbcr == NULL) {
+	    img->ycbcr = (TIFFYCbCrToRGB*) _TIFFmalloc(
+		    TIFFroundup(sizeof (TIFFYCbCrToRGB), sizeof (long))
+		    + 4*256*sizeof (TIFFRGBValue)
+		    + 2*256*sizeof (int)
+		    + 3*256*sizeof (int32)
+	    );
+	    if (img->ycbcr == NULL) {
+			TIFFErrorExt(img->tif->tif_clientdata, module,
+			      "No space for YCbCr->RGB conversion state");
+		    return (NULL);
+	    }
+	}
+
+	TIFFGetFieldDefaulted(img->tif, TIFFTAG_YCBCRCOEFFICIENTS, &luma);
+	TIFFGetFieldDefaulted(img->tif, TIFFTAG_REFERENCEBLACKWHITE,
+			      &refBlackWhite);
+	if (TIFFYCbCrToRGBInit(img->ycbcr, luma, refBlackWhite) < 0)
+		return NULL;
+
+	/*
+	 * The 6.0 spec says that subsampling must be
+	 * one of 1, 2, or 4, and that vertical subsampling
+	 * must always be <= horizontal subsampling; so
+	 * there are only a few possibilities and we just
+	 * enumerate the cases.
+	 */
+	TIFFGetFieldDefaulted(img->tif, TIFFTAG_YCBCRSUBSAMPLING, &hs, &vs);
+	switch ((hs<<4)|vs) {
+		case 0x44: return (putcontig8bitYCbCr44tile);
+		case 0x42: return (putcontig8bitYCbCr42tile);
+		case 0x41: return (putcontig8bitYCbCr41tile);
+		case 0x22: return (putcontig8bitYCbCr22tile);
+		case 0x21: return (putcontig8bitYCbCr21tile);
+		case 0x11: return (putcontig8bitYCbCr11tile);
+	}
+
+	return (NULL);
+}
+
+static tileContigRoutine
+initCIELabConversion(TIFFRGBAImage* img)
+{
+	static char module[] = "initCIELabConversion";
+
+	float   *whitePoint;
+	float   refWhite[3];
+
+	if (!img->cielab) {
+		img->cielab = (TIFFCIELabToRGB *)
+			_TIFFmalloc(sizeof(TIFFCIELabToRGB));
+		if (!img->cielab) {
+			TIFFErrorExt(img->tif->tif_clientdata, module,
+			    "No space for CIE L*a*b*->RGB conversion state.");
+			return NULL;
+		}
+	}
+
+	TIFFGetFieldDefaulted(img->tif, TIFFTAG_WHITEPOINT, &whitePoint);
+	refWhite[1] = 100.0F;
+	refWhite[0] = whitePoint[0] / whitePoint[1] * refWhite[1];
+	refWhite[2] = (1.0F - whitePoint[0] - whitePoint[1])
+		      / whitePoint[1] * refWhite[1];
+	if (TIFFCIELabToRGBInit(img->cielab, &display_sRGB, refWhite) < 0) {
+		TIFFErrorExt(img->tif->tif_clientdata, module,
+		    "Failed to initialize CIE L*a*b*->RGB conversion state.");
+		_TIFFfree(img->cielab);
+		return NULL;
+	}
+
+	return putcontig8bitCIELab;
+}
+
+/*
+ * Greyscale images with less than 8 bits/sample are handled
+ * with a table to avoid lots of shifts and masks.  The table
+ * is setup so that put*bwtile (below) can retrieve 8/bitspersample
+ * pixel values simply by indexing into the table with one
+ * number.
+ */
+static int
+makebwmap(TIFFRGBAImage* img)
+{
+    TIFFRGBValue* Map = img->Map;
+    int bitspersample = img->bitspersample;
+    int nsamples = 8 / bitspersample;
+    int i;
+    uint32* p;
+
+    if( nsamples == 0 )
+        nsamples = 1;
+
+    img->BWmap = (uint32**) _TIFFmalloc(
+	256*sizeof (uint32 *)+(256*nsamples*sizeof(uint32)));
+    if (img->BWmap == NULL) {
+		TIFFErrorExt(img->tif->tif_clientdata, TIFFFileName(img->tif), "No space for B&W mapping table");
+		return (0);
+    }
+    p = (uint32*)(img->BWmap + 256);
+    for (i = 0; i < 256; i++) {
+	TIFFRGBValue c;
+	img->BWmap[i] = p;
+	switch (bitspersample) {
+#define	GREY(x)	c = Map[x]; *p++ = PACK(c,c,c);
+	case 1:
+	    GREY(i>>7);
+	    GREY((i>>6)&1);
+	    GREY((i>>5)&1);
+	    GREY((i>>4)&1);
+	    GREY((i>>3)&1);
+	    GREY((i>>2)&1);
+	    GREY((i>>1)&1);
+	    GREY(i&1);
+	    break;
+	case 2:
+	    GREY(i>>6);
+	    GREY((i>>4)&3);
+	    GREY((i>>2)&3);
+	    GREY(i&3);
+	    break;
+	case 4:
+	    GREY(i>>4);
+	    GREY(i&0xf);
+	    break;
+	case 8:
+        case 16:
+	    GREY(i);
+	    break;
+	}
+#undef	GREY
+    }
+    return (1);
+}
+
+/*
+ * Construct a mapping table to convert from the range
+ * of the data samples to [0,255] --for display.  This
+ * process also handles inverting B&W images when needed.
+ */ 
+static int
+setupMap(TIFFRGBAImage* img)
+{
+    int32 x, range;
+
+    range = (int32)((1L<<img->bitspersample)-1);
+    
+    /* treat 16 bit the same as eight bit */
+    if( img->bitspersample == 16 )
+        range = (int32) 255;
+
+    img->Map = (TIFFRGBValue*) _TIFFmalloc((range+1) * sizeof (TIFFRGBValue));
+    if (img->Map == NULL) {
+		TIFFErrorExt(img->tif->tif_clientdata, TIFFFileName(img->tif),
+			"No space for photometric conversion table");
+		return (0);
+    }
+    if (img->photometric == PHOTOMETRIC_MINISWHITE) {
+	for (x = 0; x <= range; x++)
+	    img->Map[x] = (TIFFRGBValue) (((range - x) * 255) / range);
+    } else {
+	for (x = 0; x <= range; x++)
+	    img->Map[x] = (TIFFRGBValue) ((x * 255) / range);
+    }
+    if (img->bitspersample <= 16 &&
+	(img->photometric == PHOTOMETRIC_MINISBLACK ||
+	 img->photometric == PHOTOMETRIC_MINISWHITE)) {
+	/*
+	 * Use photometric mapping table to construct
+	 * unpacking tables for samples <= 8 bits.
+	 */
+	if (!makebwmap(img))
+	    return (0);
+	/* no longer need Map, free it */
+	_TIFFfree(img->Map), img->Map = NULL;
+    }
+    return (1);
+}
+
+static int
+checkcmap(TIFFRGBAImage* img)
+{
+    uint16* r = img->redcmap;
+    uint16* g = img->greencmap;
+    uint16* b = img->bluecmap;
+    long n = 1L<<img->bitspersample;
+
+    while (n-- > 0)
+	if (*r++ >= 256 || *g++ >= 256 || *b++ >= 256)
+	    return (16);
+    return (8);
+}
+
+static void
+cvtcmap(TIFFRGBAImage* img)
+{
+    uint16* r = img->redcmap;
+    uint16* g = img->greencmap;
+    uint16* b = img->bluecmap;
+    long i;
+
+    for (i = (1L<<img->bitspersample)-1; i >= 0; i--) {
+#define	CVT(x)		((uint16)((x)>>8))
+	r[i] = CVT(r[i]);
+	g[i] = CVT(g[i]);
+	b[i] = CVT(b[i]);
+#undef	CVT
+    }
+}
+
+/*
+ * Palette images with <= 8 bits/sample are handled
+ * with a table to avoid lots of shifts and masks.  The table
+ * is setup so that put*cmaptile (below) can retrieve 8/bitspersample
+ * pixel values simply by indexing into the table with one
+ * number.
+ */
+static int
+makecmap(TIFFRGBAImage* img)
+{
+    int bitspersample = img->bitspersample;
+    int nsamples = 8 / bitspersample;
+    uint16* r = img->redcmap;
+    uint16* g = img->greencmap;
+    uint16* b = img->bluecmap;
+    uint32 *p;
+    int i;
+
+    img->PALmap = (uint32**) _TIFFmalloc(
+	256*sizeof (uint32 *)+(256*nsamples*sizeof(uint32)));
+    if (img->PALmap == NULL) {
+		TIFFErrorExt(img->tif->tif_clientdata, TIFFFileName(img->tif), "No space for Palette mapping table");
+		return (0);
+	}
+    p = (uint32*)(img->PALmap + 256);
+    for (i = 0; i < 256; i++) {
+	TIFFRGBValue c;
+	img->PALmap[i] = p;
+#define	CMAP(x)	c = (TIFFRGBValue) x; *p++ = PACK(r[c]&0xff, g[c]&0xff, b[c]&0xff);
+	switch (bitspersample) {
+	case 1:
+	    CMAP(i>>7);
+	    CMAP((i>>6)&1);
+	    CMAP((i>>5)&1);
+	    CMAP((i>>4)&1);
+	    CMAP((i>>3)&1);
+	    CMAP((i>>2)&1);
+	    CMAP((i>>1)&1);
+	    CMAP(i&1);
+	    break;
+	case 2:
+	    CMAP(i>>6);
+	    CMAP((i>>4)&3);
+	    CMAP((i>>2)&3);
+	    CMAP(i&3);
+	    break;
+	case 4:
+	    CMAP(i>>4);
+	    CMAP(i&0xf);
+	    break;
+	case 8:
+	    CMAP(i);
+	    break;
+	}
+#undef CMAP
+    }
+    return (1);
+}
+
+/* 
+ * Construct any mapping table used
+ * by the associated put routine.
+ */
+static int
+buildMap(TIFFRGBAImage* img)
+{
+    switch (img->photometric) {
+    case PHOTOMETRIC_RGB:
+    case PHOTOMETRIC_YCBCR:
+    case PHOTOMETRIC_SEPARATED:
+	if (img->bitspersample == 8)
+	    break;
+	/* fall thru... */
+    case PHOTOMETRIC_MINISBLACK:
+    case PHOTOMETRIC_MINISWHITE:
+	if (!setupMap(img))
+	    return (0);
+	break;
+    case PHOTOMETRIC_PALETTE:
+	/*
+	 * Convert 16-bit colormap to 8-bit (unless it looks
+	 * like an old-style 8-bit colormap).
+	 */
+	if (checkcmap(img) == 16)
+	    cvtcmap(img);
+	else
+	    TIFFWarningExt(img->tif->tif_clientdata, TIFFFileName(img->tif), "Assuming 8-bit colormap");
+	/*
+	 * Use mapping table and colormap to construct
+	 * unpacking tables for samples < 8 bits.
+	 */
+	if (img->bitspersample <= 8 && !makecmap(img))
+	    return (0);
+	break;
+    }
+    return (1);
+}
+
+/*
+ * Select the appropriate conversion routine for packed data.
+ */
+static int
+pickTileContigCase(TIFFRGBAImage* img)
+{
+    tileContigRoutine put = 0;
+
+    if (buildMap(img)) {
+	switch (img->photometric) {
+	case PHOTOMETRIC_RGB:
+	    switch (img->bitspersample) {
+	    case 8:
+		if (!img->Map) {
+		    if (img->alpha == EXTRASAMPLE_ASSOCALPHA)
+			put = putRGBAAcontig8bittile;
+		    else if (img->alpha == EXTRASAMPLE_UNASSALPHA)
+			put = putRGBUAcontig8bittile;
+		    else
+			put = putRGBcontig8bittile;
+		} else
+		    put = putRGBcontig8bitMaptile;
+		break;
+	    case 16:
+		put = putRGBcontig16bittile;
+		if (!img->Map) {
+		    if (img->alpha == EXTRASAMPLE_ASSOCALPHA)
+			put = putRGBAAcontig16bittile;
+		    else if (img->alpha == EXTRASAMPLE_UNASSALPHA)
+			put = putRGBUAcontig16bittile;
+		}
+		break;
+	    }
+	    break;
+	case PHOTOMETRIC_SEPARATED:
+	    if (img->bitspersample == 8) {
+		if (!img->Map)
+		    put = putRGBcontig8bitCMYKtile;
+		else
+		    put = putRGBcontig8bitCMYKMaptile;
+	    }
+	    break;
+	case PHOTOMETRIC_PALETTE:
+	    switch (img->bitspersample) {
+	    case 8:	put = put8bitcmaptile; break;
+	    case 4: put = put4bitcmaptile; break;
+	    case 2: put = put2bitcmaptile; break;
+	    case 1: put = put1bitcmaptile; break;
+	    }
+	    break;
+	case PHOTOMETRIC_MINISWHITE:
+	case PHOTOMETRIC_MINISBLACK:
+	    switch (img->bitspersample) {
+            case 16: put = put16bitbwtile; break;
+	    case 8:  put = putgreytile; break;
+	    case 4:  put = put4bitbwtile; break;
+	    case 2:  put = put2bitbwtile; break;
+	    case 1:  put = put1bitbwtile; break;
+	    }
+	    break;
+	case PHOTOMETRIC_YCBCR:
+	    if (img->bitspersample == 8)
+		put = initYCbCrConversion(img);
+	    break;
+	case PHOTOMETRIC_CIELAB:
+	    if (img->bitspersample == 8)
+		put = initCIELabConversion(img);
+	    break;
+	}
+    }
+    return ((img->put.contig = put) != 0);
+}
+
+/*
+ * Select the appropriate conversion routine for unpacked data.
+ *
+ * NB: we assume that unpacked single channel data is directed
+ *	 to the "packed routines.
+ */
+static int
+pickTileSeparateCase(TIFFRGBAImage* img)
+{
+    tileSeparateRoutine put = 0;
+
+    if (buildMap(img)) {
+	switch (img->photometric) {
+	case PHOTOMETRIC_RGB:
+	    switch (img->bitspersample) {
+	    case 8:
+		if (!img->Map) {
+		    if (img->alpha == EXTRASAMPLE_ASSOCALPHA)
+			put = putRGBAAseparate8bittile;
+		    else if (img->alpha == EXTRASAMPLE_UNASSALPHA)
+			put = putRGBUAseparate8bittile;
+		    else
+			put = putRGBseparate8bittile;
+		} else
+		    put = putRGBseparate8bitMaptile;
+		break;
+	    case 16:
+		put = putRGBseparate16bittile;
+		if (!img->Map) {
+		    if (img->alpha == EXTRASAMPLE_ASSOCALPHA)
+			put = putRGBAAseparate16bittile;
+		    else if (img->alpha == EXTRASAMPLE_UNASSALPHA)
+			put = putRGBUAseparate16bittile;
+		}
+		break;
+	    }
+	    break;
+	}
+    }
+    return ((img->put.separate = put) != 0);
+}
+
+/*
+ * Read a whole strip off data from the file, and convert to RGBA form.
+ * If this is the last strip, then it will only contain the portion of
+ * the strip that is actually within the image space.  The result is
+ * organized in bottom to top form.
+ */
+
+
+int
+TIFFReadRGBAStrip(TIFF* tif, uint32 row, uint32 * raster )
+
+{
+    char 	emsg[1024] = "";
+    TIFFRGBAImage img;
+    int 	ok;
+    uint32	rowsperstrip, rows_to_read;
+
+    if( TIFFIsTiled( tif ) )
+    {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif),
+                  "Can't use TIFFReadRGBAStrip() with tiled file.");
+	return (0);
+    }
+    
+    TIFFGetFieldDefaulted(tif, TIFFTAG_ROWSPERSTRIP, &rowsperstrip);
+    if( (row % rowsperstrip) != 0 )
+    {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif),
+				"Row passed to TIFFReadRGBAStrip() must be first in a strip.");
+		return (0);
+    }
+
+    if (TIFFRGBAImageOK(tif, emsg) && TIFFRGBAImageBegin(&img, tif, 0, emsg)) {
+
+        img.row_offset = row;
+        img.col_offset = 0;
+
+        if( row + rowsperstrip > img.height )
+            rows_to_read = img.height - row;
+        else
+            rows_to_read = rowsperstrip;
+        
+	ok = TIFFRGBAImageGet(&img, raster, img.width, rows_to_read );
+        
+	TIFFRGBAImageEnd(&img);
+    } else {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), emsg);
+		ok = 0;
+    }
+    
+    return (ok);
+}
+
+/*
+ * Read a whole tile off data from the file, and convert to RGBA form.
+ * The returned RGBA data is organized from bottom to top of tile,
+ * and may include zeroed areas if the tile extends off the image.
+ */
+
+int
+TIFFReadRGBATile(TIFF* tif, uint32 col, uint32 row, uint32 * raster)
+
+{
+    char 	emsg[1024] = "";
+    TIFFRGBAImage img;
+    int 	ok;
+    uint32	tile_xsize, tile_ysize;
+    uint32	read_xsize, read_ysize;
+    uint32	i_row;
+
+    /*
+     * Verify that our request is legal - on a tile file, and on a
+     * tile boundary.
+     */
+    
+    if( !TIFFIsTiled( tif ) )
+    {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif),
+				  "Can't use TIFFReadRGBATile() with stripped file.");
+		return (0);
+    }
+    
+    TIFFGetFieldDefaulted(tif, TIFFTAG_TILEWIDTH, &tile_xsize);
+    TIFFGetFieldDefaulted(tif, TIFFTAG_TILELENGTH, &tile_ysize);
+    if( (col % tile_xsize) != 0 || (row % tile_ysize) != 0 )
+    {
+		TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif),
+                  "Row/col passed to TIFFReadRGBATile() must be top"
+                  "left corner of a tile.");
+	return (0);
+    }
+
+    /*
+     * Setup the RGBA reader.
+     */
+    
+    if (!TIFFRGBAImageOK(tif, emsg) 
+	|| !TIFFRGBAImageBegin(&img, tif, 0, emsg)) {
+	    TIFFErrorExt(tif->tif_clientdata, TIFFFileName(tif), emsg);
+	    return( 0 );
+    }
+
+    /*
+     * The TIFFRGBAImageGet() function doesn't allow us to get off the
+     * edge of the image, even to fill an otherwise valid tile.  So we
+     * figure out how much we can read, and fix up the tile buffer to
+     * a full tile configuration afterwards.
+     */
+
+    if( row + tile_ysize > img.height )
+        read_ysize = img.height - row;
+    else
+        read_ysize = tile_ysize;
+    
+    if( col + tile_xsize > img.width )
+        read_xsize = img.width - col;
+    else
+        read_xsize = tile_xsize;
+
+    /*
+     * Read the chunk of imagery.
+     */
+    
+    img.row_offset = row;
+    img.col_offset = col;
+
+    ok = TIFFRGBAImageGet(&img, raster, read_xsize, read_ysize );
+        
+    TIFFRGBAImageEnd(&img);
+
+    /*
+     * If our read was incomplete we will need to fix up the tile by
+     * shifting the data around as if a full tile of data is being returned.
+     *
+     * This is all the more complicated because the image is organized in
+     * bottom to top format. 
+     */
+
+    if( read_xsize == tile_xsize && read_ysize == tile_ysize )
+        return( ok );
+
+    for( i_row = 0; i_row < read_ysize; i_row++ ) {
+        memmove( raster + (tile_ysize - i_row - 1) * tile_xsize,
+                 raster + (read_ysize - i_row - 1) * read_xsize,
+                 read_xsize * sizeof(uint32) );
+        _TIFFmemset( raster + (tile_ysize - i_row - 1) * tile_xsize+read_xsize,
+                     0, sizeof(uint32) * (tile_xsize - read_xsize) );
+    }
+
+    for( i_row = read_ysize; i_row < tile_ysize; i_row++ ) {
+        _TIFFmemset( raster + (tile_ysize - i_row - 1) * tile_xsize,
+                     0, sizeof(uint32) * tile_xsize );
+    }
+
+    return (ok);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_jpeg.c b/src/libtiff/tif_jpeg.c
new file mode 100644
index 0000000..63f943e
--- /dev/null
+++ b/src/libtiff/tif_jpeg.c
@@ -0,0 +1,1946 @@
+/* $Id: tif_jpeg.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1994-1997 Sam Leffler
+ * Copyright (c) 1994-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#define WIN32_LEAN_AND_MEAN
+#define VC_EXTRALEAN
+
+#include "tiffiop.h"
+#ifdef JPEG_SUPPORT
+
+/*
+ * TIFF Library
+ *
+ * JPEG Compression support per TIFF Technical Note #2
+ * (*not* per the original TIFF 6.0 spec).
+ *
+ * This file is simply an interface to the libjpeg library written by
+ * the Independent JPEG Group.  You need release 5 or later of the IJG
+ * code, which you can find on the Internet at ftp.uu.net:/graphics/jpeg/.
+ *
+ * Contributed by Tom Lane <tgl@sss.pgh.pa.us>.
+ */
+#include <setjmp.h>
+
+int TIFFFillStrip(TIFF*, tstrip_t);
+int TIFFFillTile(TIFF*, ttile_t);
+
+/* We undefine FAR to avoid conflict with JPEG definition */
+
+#ifdef FAR
+#undef FAR
+#endif
+
+/*
+  Libjpeg's jmorecfg.h defines INT16 and INT32, but only if XMD_H is
+  not defined.  Unfortunately, the MinGW and Borland compilers include
+  a typedef for INT32, which causes a conflict.  MSVC does not include
+  a conficting typedef given the headers which are included.
+*/
+#if defined(__BORLANDC__) || defined(__MINGW32__)
+# define XMD_H 1
+#endif
+
+/*
+   The windows RPCNDR.H file defines boolean, but defines it with the
+   unsigned char size.  You should compile JPEG library using appropriate
+   definitions in jconfig.h header, but many users compile library in wrong
+   way. That causes errors of the following type:
+
+   "JPEGLib: JPEG parameter struct mismatch: library thinks size is 432,
+   caller expects 464"
+
+   For such users we wil fix the problem here. See install.doc file from
+   the JPEG library distribution for details.
+*/
+
+/* Define "boolean" as unsigned char, not int, per Windows custom. */
+#if 0  /* IMLIB - libjpeg already configured, the following definitions are not necessary */
+#if defined(WIN32) && !defined(__MINGW32__)
+# ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
+   typedef unsigned char boolean;
+# endif
+# define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
+#endif
+#endif
+
+#include "jpeglib.h"
+#include "jerror.h"
+
+/*
+ * We are using width_in_blocks which is supposed to be private to
+ * libjpeg. Unfortunately, the libjpeg delivered with Cygwin has
+ * renamed this member to width_in_data_units.  Since the header has
+ * also renamed a define, use that unique define name in order to
+ * detect the problem header and adjust to suit.
+ */
+#if defined(D_MAX_DATA_UNITS_IN_MCU)
+#define width_in_blocks width_in_data_units
+#endif
+
+/*
+ * On some machines it may be worthwhile to use _setjmp or sigsetjmp
+ * in place of plain setjmp.  These macros will make it easier.
+ */
+#define SETJMP(jbuf)		setjmp(jbuf)
+#define LONGJMP(jbuf,code)	longjmp(jbuf,code)
+#define JMP_BUF			jmp_buf
+
+typedef struct jpeg_destination_mgr jpeg_destination_mgr;
+typedef struct jpeg_source_mgr jpeg_source_mgr;
+typedef	struct jpeg_error_mgr jpeg_error_mgr;
+
+/*
+ * State block for each open TIFF file using
+ * libjpeg to do JPEG compression/decompression.
+ *
+ * libjpeg's visible state is either a jpeg_compress_struct
+ * or jpeg_decompress_struct depending on which way we
+ * are going.  comm can be used to refer to the fields
+ * which are common to both.
+ *
+ * NB: cinfo is required to be the first member of JPEGState,
+ *     so we can safely cast JPEGState* -> jpeg_xxx_struct*
+ *     and vice versa!
+ */
+typedef	struct {
+	union {
+		struct jpeg_compress_struct c;
+		struct jpeg_decompress_struct d;
+		struct jpeg_common_struct comm;
+	} cinfo;			/* NB: must be first */
+        int             cinfo_initialized;
+
+	jpeg_error_mgr	err;		/* libjpeg error manager */
+	JMP_BUF		exit_jmpbuf;	/* for catching libjpeg failures */
+	/*
+	 * The following two members could be a union, but
+	 * they're small enough that it's not worth the effort.
+	 */
+	jpeg_destination_mgr dest;	/* data dest for compression */
+	jpeg_source_mgr	src;		/* data source for decompression */
+					/* private state */
+	TIFF*		tif;		/* back link needed by some code */
+	uint16		photometric;	/* copy of PhotometricInterpretation */
+	uint16		h_sampling;	/* luminance sampling factors */
+	uint16		v_sampling;
+	tsize_t		bytesperline;	/* decompressed bytes per scanline */
+	/* pointers to intermediate buffers when processing downsampled data */
+	JSAMPARRAY	ds_buffer[MAX_COMPONENTS];
+	int		scancount;	/* number of "scanlines" accumulated */
+	int		samplesperclump;
+
+	TIFFVGetMethod	vgetparent;	/* super-class method */
+	TIFFVSetMethod	vsetparent;	/* super-class method */
+	TIFFStripMethod	defsparent;	/* super-class method */
+	TIFFTileMethod	deftparent;	/* super-class method */
+					/* pseudo-tag fields */
+	void*		jpegtables;	/* JPEGTables tag value, or NULL */
+	uint32		jpegtables_length; /* number of bytes in same */
+	int		jpegquality;	/* Compression quality level */
+	int		jpegcolormode;	/* Auto RGB<=>YCbCr convert? */
+	int		jpegtablesmode;	/* What to put in JPEGTables */
+
+        int             ycbcrsampling_fetched;
+	uint32		recvparams;	/* encoded Class 2 session params */
+	char*		subaddress;	/* subaddress string */
+	uint32		recvtime;	/* time spent receiving (secs) */
+	char*		faxdcs;		/* encoded fax parameters (DCS, Table 2/T.30) */
+} JPEGState;
+
+#define	JState(tif)	((JPEGState*)(tif)->tif_data)
+
+static	int JPEGDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int JPEGDecodeRaw(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int JPEGEncode(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int JPEGEncodeRaw(TIFF*, tidata_t, tsize_t, tsample_t);
+static  int JPEGInitializeLibJPEG( TIFF * tif,
+								   int force_encode, int force_decode );
+
+#define	FIELD_JPEGTABLES	(FIELD_CODEC+0)
+#define	FIELD_RECVPARAMS	(FIELD_CODEC+1)
+#define	FIELD_SUBADDRESS	(FIELD_CODEC+2)
+#define	FIELD_RECVTIME		(FIELD_CODEC+3)
+#define	FIELD_FAXDCS		(FIELD_CODEC+4)
+
+static const TIFFFieldInfo jpegFieldInfo[] = {
+    { TIFFTAG_JPEGTABLES,	 -3,-3,	TIFF_UNDEFINED,	FIELD_JPEGTABLES,
+      FALSE,	TRUE,	"JPEGTables" },
+    { TIFFTAG_JPEGQUALITY,	 0, 0,	TIFF_ANY,	FIELD_PSEUDO,
+      TRUE,	FALSE,	"" },
+    { TIFFTAG_JPEGCOLORMODE,	 0, 0,	TIFF_ANY,	FIELD_PSEUDO,
+      FALSE,	FALSE,	"" },
+    { TIFFTAG_JPEGTABLESMODE,	 0, 0,	TIFF_ANY,	FIELD_PSEUDO,
+      FALSE,	FALSE,	"" },
+    /* Specific for JPEG in faxes */
+    { TIFFTAG_FAXRECVPARAMS,	 1, 1, TIFF_LONG,	FIELD_RECVPARAMS,
+      TRUE,	FALSE,	"FaxRecvParams" },
+    { TIFFTAG_FAXSUBADDRESS,	-1,-1, TIFF_ASCII,	FIELD_SUBADDRESS,
+      TRUE,	FALSE,	"FaxSubAddress" },
+    { TIFFTAG_FAXRECVTIME,	 1, 1, TIFF_LONG,	FIELD_RECVTIME,
+      TRUE,	FALSE,	"FaxRecvTime" },
+    { TIFFTAG_FAXDCS,		-1, -1, TIFF_ASCII,	FIELD_FAXDCS,
+	  TRUE,	FALSE,	"FaxDcs" },
+};
+#define	N(a)	(sizeof (a) / sizeof (a[0]))
+
+/*
+ * libjpeg interface layer.
+ *
+ * We use setjmp/longjmp to return control to libtiff
+ * when a fatal error is encountered within the JPEG
+ * library.  We also direct libjpeg error and warning
+ * messages through the appropriate libtiff handlers.
+ */
+
+/*
+ * Error handling routines (these replace corresponding
+ * IJG routines from jerror.c).  These are used for both
+ * compression and decompression.
+ */
+static void
+TIFFjpeg_error_exit(j_common_ptr cinfo)
+{
+	JPEGState *sp = (JPEGState *) cinfo;	/* NB: cinfo assumed first */
+	char buffer[JMSG_LENGTH_MAX];
+
+	(*cinfo->err->format_message) (cinfo, buffer);
+	TIFFErrorExt(sp->tif->tif_clientdata, "JPEGLib", buffer);		/* display the error message */
+	jpeg_abort(cinfo);			/* clean up libjpeg state */
+	LONGJMP(sp->exit_jmpbuf, 1);		/* return to libtiff caller */
+}
+
+/*
+ * This routine is invoked only for warning messages,
+ * since error_exit does its own thing and trace_level
+ * is never set > 0.
+ */
+static void
+TIFFjpeg_output_message(j_common_ptr cinfo)
+{
+	char buffer[JMSG_LENGTH_MAX];
+
+	(*cinfo->err->format_message) (cinfo, buffer);
+	TIFFWarningExt(((JPEGState *) cinfo)->tif->tif_clientdata, "JPEGLib", buffer);
+}
+
+/*
+ * Interface routines.  This layer of routines exists
+ * primarily to limit side-effects from using setjmp.
+ * Also, normal/error returns are converted into return
+ * values per libtiff practice.
+ */
+#define	CALLJPEG(sp, fail, op)	(SETJMP((sp)->exit_jmpbuf) ? (fail) : (op))
+#define	CALLVJPEG(sp, op)	CALLJPEG(sp, 0, ((op),1))
+
+static int
+TIFFjpeg_create_compress(JPEGState* sp)
+{
+	/* initialize JPEG error handling */
+	sp->cinfo.c.err = jpeg_std_error(&sp->err);
+	sp->err.error_exit = TIFFjpeg_error_exit;
+	sp->err.output_message = TIFFjpeg_output_message;
+
+	return CALLVJPEG(sp, jpeg_create_compress(&sp->cinfo.c));
+}
+
+static int
+TIFFjpeg_create_decompress(JPEGState* sp)
+{
+	/* initialize JPEG error handling */
+	sp->cinfo.d.err = jpeg_std_error(&sp->err);
+	sp->err.error_exit = TIFFjpeg_error_exit;
+	sp->err.output_message = TIFFjpeg_output_message;
+
+	return CALLVJPEG(sp, jpeg_create_decompress(&sp->cinfo.d));
+}
+
+static int
+TIFFjpeg_set_defaults(JPEGState* sp)
+{
+	return CALLVJPEG(sp, jpeg_set_defaults(&sp->cinfo.c));
+}
+
+static int
+TIFFjpeg_set_colorspace(JPEGState* sp, J_COLOR_SPACE colorspace)
+{
+	return CALLVJPEG(sp, jpeg_set_colorspace(&sp->cinfo.c, colorspace));
+}
+
+static int
+TIFFjpeg_set_quality(JPEGState* sp, int quality, boolean force_baseline)
+{
+	return CALLVJPEG(sp,
+	    jpeg_set_quality(&sp->cinfo.c, quality, force_baseline));
+}
+
+static int
+TIFFjpeg_suppress_tables(JPEGState* sp, boolean suppress)
+{
+	return CALLVJPEG(sp, jpeg_suppress_tables(&sp->cinfo.c, suppress));
+}
+
+static int
+TIFFjpeg_start_compress(JPEGState* sp, boolean write_all_tables)
+{
+	return CALLVJPEG(sp,
+	    jpeg_start_compress(&sp->cinfo.c, write_all_tables));
+}
+
+static int
+TIFFjpeg_write_scanlines(JPEGState* sp, JSAMPARRAY scanlines, int num_lines)
+{
+	return CALLJPEG(sp, -1, (int) jpeg_write_scanlines(&sp->cinfo.c,
+	    scanlines, (JDIMENSION) num_lines));
+}
+
+static int
+TIFFjpeg_write_raw_data(JPEGState* sp, JSAMPIMAGE data, int num_lines)
+{
+	return CALLJPEG(sp, -1, (int) jpeg_write_raw_data(&sp->cinfo.c,
+	    data, (JDIMENSION) num_lines));
+}
+
+static int
+TIFFjpeg_finish_compress(JPEGState* sp)
+{
+	return CALLVJPEG(sp, jpeg_finish_compress(&sp->cinfo.c));
+}
+
+static int
+TIFFjpeg_write_tables(JPEGState* sp)
+{
+	return CALLVJPEG(sp, jpeg_write_tables(&sp->cinfo.c));
+}
+
+static int
+TIFFjpeg_read_header(JPEGState* sp, boolean require_image)
+{
+	return CALLJPEG(sp, -1, jpeg_read_header(&sp->cinfo.d, require_image));
+}
+
+static int
+TIFFjpeg_start_decompress(JPEGState* sp)
+{
+	return CALLVJPEG(sp, jpeg_start_decompress(&sp->cinfo.d));
+}
+
+static int
+TIFFjpeg_read_scanlines(JPEGState* sp, JSAMPARRAY scanlines, int max_lines)
+{
+	return CALLJPEG(sp, -1, (int) jpeg_read_scanlines(&sp->cinfo.d,
+	    scanlines, (JDIMENSION) max_lines));
+}
+
+static int
+TIFFjpeg_read_raw_data(JPEGState* sp, JSAMPIMAGE data, int max_lines)
+{
+	return CALLJPEG(sp, -1, (int) jpeg_read_raw_data(&sp->cinfo.d,
+	    data, (JDIMENSION) max_lines));
+}
+
+static int
+TIFFjpeg_finish_decompress(JPEGState* sp)
+{
+	return CALLJPEG(sp, -1, (int) jpeg_finish_decompress(&sp->cinfo.d));
+}
+
+static int
+TIFFjpeg_abort(JPEGState* sp)
+{
+	return CALLVJPEG(sp, jpeg_abort(&sp->cinfo.comm));
+}
+
+static int
+TIFFjpeg_destroy(JPEGState* sp)
+{
+	return CALLVJPEG(sp, jpeg_destroy(&sp->cinfo.comm));
+}
+
+static JSAMPARRAY
+TIFFjpeg_alloc_sarray(JPEGState* sp, int pool_id,
+		      JDIMENSION samplesperrow, JDIMENSION numrows)
+{
+	return CALLJPEG(sp, (JSAMPARRAY) NULL,
+	    (*sp->cinfo.comm.mem->alloc_sarray)
+		(&sp->cinfo.comm, pool_id, samplesperrow, numrows));
+}
+
+/*
+ * JPEG library destination data manager.
+ * These routines direct compressed data from libjpeg into the
+ * libtiff output buffer.
+ */
+
+static void
+std_init_destination(j_compress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+	TIFF* tif = sp->tif;
+
+	sp->dest.next_output_byte = (JOCTET*) tif->tif_rawdata;
+	sp->dest.free_in_buffer = (size_t) tif->tif_rawdatasize;
+}
+
+static boolean
+std_empty_output_buffer(j_compress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+	TIFF* tif = sp->tif;
+
+	/* the entire buffer has been filled */
+	tif->tif_rawcc = tif->tif_rawdatasize;
+	TIFFFlushData1(tif);
+	sp->dest.next_output_byte = (JOCTET*) tif->tif_rawdata;
+	sp->dest.free_in_buffer = (size_t) tif->tif_rawdatasize;
+
+	return (TRUE);
+}
+
+static void
+std_term_destination(j_compress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+	TIFF* tif = sp->tif;
+
+	tif->tif_rawcp = (tidata_t) sp->dest.next_output_byte;
+	tif->tif_rawcc =
+	    tif->tif_rawdatasize - (tsize_t) sp->dest.free_in_buffer;
+	/* NB: libtiff does the final buffer flush */
+}
+
+static void
+TIFFjpeg_data_dest(JPEGState* sp, TIFF* tif)
+{
+	(void) tif;
+	sp->cinfo.c.dest = &sp->dest;
+	sp->dest.init_destination = std_init_destination;
+	sp->dest.empty_output_buffer = std_empty_output_buffer;
+	sp->dest.term_destination = std_term_destination;
+}
+
+/*
+ * Alternate destination manager for outputting to JPEGTables field.
+ */
+
+static void
+tables_init_destination(j_compress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+
+	/* while building, jpegtables_length is allocated buffer size */
+	sp->dest.next_output_byte = (JOCTET*) sp->jpegtables;
+	sp->dest.free_in_buffer = (size_t) sp->jpegtables_length;
+}
+
+static boolean
+tables_empty_output_buffer(j_compress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+	void* newbuf;
+
+	/* the entire buffer has been filled; enlarge it by 1000 bytes */
+	newbuf = _TIFFrealloc((tdata_t) sp->jpegtables,
+			      (tsize_t) (sp->jpegtables_length + 1000));
+	if (newbuf == NULL)
+		ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 100);
+	sp->dest.next_output_byte = (JOCTET*) newbuf + sp->jpegtables_length;
+	sp->dest.free_in_buffer = (size_t) 1000;
+	sp->jpegtables = newbuf;
+	sp->jpegtables_length += 1000;
+	return (TRUE);
+}
+
+static void
+tables_term_destination(j_compress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+
+	/* set tables length to number of bytes actually emitted */
+	sp->jpegtables_length -= sp->dest.free_in_buffer;
+}
+
+static int
+TIFFjpeg_tables_dest(JPEGState* sp, TIFF* tif)
+{
+	(void) tif;
+	/*
+	 * Allocate a working buffer for building tables.
+	 * Initial size is 1000 bytes, which is usually adequate.
+	 */
+	if (sp->jpegtables)
+		_TIFFfree(sp->jpegtables);
+	sp->jpegtables_length = 1000;
+	sp->jpegtables = (void*) _TIFFmalloc((tsize_t) sp->jpegtables_length);
+	if (sp->jpegtables == NULL) {
+		sp->jpegtables_length = 0;
+		TIFFErrorExt(sp->tif->tif_clientdata, "TIFFjpeg_tables_dest", "No space for JPEGTables");
+		return (0);
+	}
+	sp->cinfo.c.dest = &sp->dest;
+	sp->dest.init_destination = tables_init_destination;
+	sp->dest.empty_output_buffer = tables_empty_output_buffer;
+	sp->dest.term_destination = tables_term_destination;
+	return (1);
+}
+
+/*
+ * JPEG library source data manager.
+ * These routines supply compressed data to libjpeg.
+ */
+
+static void
+std_init_source(j_decompress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+	TIFF* tif = sp->tif;
+
+	sp->src.next_input_byte = (const JOCTET*) tif->tif_rawdata;
+	sp->src.bytes_in_buffer = (size_t) tif->tif_rawcc;
+}
+
+static boolean
+std_fill_input_buffer(j_decompress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState* ) cinfo;
+	static const JOCTET dummy_EOI[2] = { 0xFF, JPEG_EOI };
+
+	/*
+	 * Should never get here since entire strip/tile is
+	 * read into memory before the decompressor is called,
+	 * and thus was supplied by init_source.
+	 */
+	WARNMS(cinfo, JWRN_JPEG_EOF);
+	/* insert a fake EOI marker */
+	sp->src.next_input_byte = dummy_EOI;
+	sp->src.bytes_in_buffer = 2;
+	return (TRUE);
+}
+
+static void
+std_skip_input_data(j_decompress_ptr cinfo, long num_bytes)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+
+	if (num_bytes > 0) {
+		if (num_bytes > (long) sp->src.bytes_in_buffer) {
+			/* oops, buffer overrun */
+			(void) std_fill_input_buffer(cinfo);
+		} else {
+			sp->src.next_input_byte += (size_t) num_bytes;
+			sp->src.bytes_in_buffer -= (size_t) num_bytes;
+		}
+	}
+}
+
+static void
+std_term_source(j_decompress_ptr cinfo)
+{
+	/* No work necessary here */
+	/* Or must we update tif->tif_rawcp, tif->tif_rawcc ??? */
+	/* (if so, need empty tables_term_source!) */
+	(void) cinfo;
+}
+
+static void
+TIFFjpeg_data_src(JPEGState* sp, TIFF* tif)
+{
+	(void) tif;
+	sp->cinfo.d.src = &sp->src;
+	sp->src.init_source = std_init_source;
+	sp->src.fill_input_buffer = std_fill_input_buffer;
+	sp->src.skip_input_data = std_skip_input_data;
+	sp->src.resync_to_restart = jpeg_resync_to_restart;
+	sp->src.term_source = std_term_source;
+	sp->src.bytes_in_buffer = 0;		/* for safety */
+	sp->src.next_input_byte = NULL;
+}
+
+/*
+ * Alternate source manager for reading from JPEGTables.
+ * We can share all the code except for the init routine.
+ */
+
+static void
+tables_init_source(j_decompress_ptr cinfo)
+{
+	JPEGState* sp = (JPEGState*) cinfo;
+
+	sp->src.next_input_byte = (const JOCTET*) sp->jpegtables;
+	sp->src.bytes_in_buffer = (size_t) sp->jpegtables_length;
+}
+
+static void
+TIFFjpeg_tables_src(JPEGState* sp, TIFF* tif)
+{
+	TIFFjpeg_data_src(sp, tif);
+	sp->src.init_source = tables_init_source;
+}
+
+/*
+ * Allocate downsampled-data buffers needed for downsampled I/O.
+ * We use values computed in jpeg_start_compress or jpeg_start_decompress.
+ * We use libjpeg's allocator so that buffers will be released automatically
+ * when done with strip/tile.
+ * This is also a handy place to compute samplesperclump, bytesperline.
+ */
+static int
+alloc_downsampled_buffers(TIFF* tif, jpeg_component_info* comp_info,
+			  int num_components)
+{
+	JPEGState* sp = JState(tif);
+	int ci;
+	jpeg_component_info* compptr;
+	JSAMPARRAY buf;
+	int samples_per_clump = 0;
+
+	for (ci = 0, compptr = comp_info; ci < num_components;
+	     ci++, compptr++) {
+		samples_per_clump += compptr->h_samp_factor *
+			compptr->v_samp_factor;
+		buf = TIFFjpeg_alloc_sarray(sp, JPOOL_IMAGE,
+				compptr->width_in_blocks * DCTSIZE,
+				(JDIMENSION) (compptr->v_samp_factor*DCTSIZE));
+		if (buf == NULL)
+			return (0);
+		sp->ds_buffer[ci] = buf;
+	}
+	sp->samplesperclump = samples_per_clump;
+	return (1);
+}
+
+
+/*
+ * JPEG Decoding.
+ */
+
+static int
+JPEGSetupDecode(TIFF* tif)
+{
+	JPEGState* sp = JState(tif);
+	TIFFDirectory *td = &tif->tif_dir;
+
+        JPEGInitializeLibJPEG( tif, 0, 1 );
+
+	assert(sp != NULL);
+	assert(sp->cinfo.comm.is_decompressor);
+
+	/* Read JPEGTables if it is present */
+	if (TIFFFieldSet(tif,FIELD_JPEGTABLES)) {
+		TIFFjpeg_tables_src(sp, tif);
+		if(TIFFjpeg_read_header(sp,FALSE) != JPEG_HEADER_TABLES_ONLY) {
+			TIFFErrorExt(tif->tif_clientdata, "JPEGSetupDecode", "Bogus JPEGTables field");
+			return (0);
+		}
+	}
+
+	/* Grab parameters that are same for all strips/tiles */
+	sp->photometric = td->td_photometric;
+	switch (sp->photometric) {
+	case PHOTOMETRIC_YCBCR:
+		sp->h_sampling = td->td_ycbcrsubsampling[0];
+		sp->v_sampling = td->td_ycbcrsubsampling[1];
+		break;
+	default:
+		/* TIFF 6.0 forbids subsampling of all other color spaces */
+		sp->h_sampling = 1;
+		sp->v_sampling = 1;
+		break;
+	}
+
+	/* Set up for reading normal data */
+	TIFFjpeg_data_src(sp, tif);
+	tif->tif_postdecode = _TIFFNoPostDecode; /* override byte swapping */
+	return (1);
+}
+
+/*
+ * Set up for decoding a strip or tile.
+ */
+static int
+JPEGPreDecode(TIFF* tif, tsample_t s)
+{
+	JPEGState *sp = JState(tif);
+	TIFFDirectory *td = &tif->tif_dir;
+	static const char module[] = "JPEGPreDecode";
+	uint32 segment_width, segment_height;
+	int downsampled_output;
+	int ci;
+
+	assert(sp != NULL);
+	assert(sp->cinfo.comm.is_decompressor);
+	/*
+	 * Reset decoder state from any previous strip/tile,
+	 * in case application didn't read the whole strip.
+	 */
+	if (!TIFFjpeg_abort(sp))
+		return (0);
+	/*
+	 * Read the header for this strip/tile.
+	 */
+	if (TIFFjpeg_read_header(sp, TRUE) != JPEG_HEADER_OK)
+		return (0);
+	/*
+	 * Check image parameters and set decompression parameters.
+	 */
+	segment_width = td->td_imagewidth;
+	segment_height = td->td_imagelength - tif->tif_row;
+	if (isTiled(tif)) {
+                segment_width = td->td_tilewidth;
+                segment_height = td->td_tilelength;
+		sp->bytesperline = TIFFTileRowSize(tif);
+	} else {
+		if (segment_height > td->td_rowsperstrip)
+			segment_height = td->td_rowsperstrip;
+		sp->bytesperline = TIFFScanlineSize(tif);
+	}
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE && s > 0) {
+		/*
+		 * For PC 2, scale down the expected strip/tile size
+		 * to match a downsampled component
+		 */
+		segment_width = TIFFhowmany(segment_width, sp->h_sampling);
+		segment_height = TIFFhowmany(segment_height, sp->v_sampling);
+	}
+	if (sp->cinfo.d.image_width != segment_width ||
+	    sp->cinfo.d.image_height != segment_height) {
+		TIFFWarningExt(tif->tif_clientdata, module,
+                 "Improper JPEG strip/tile size, expected %dx%d, got %dx%d",
+                          segment_width, 
+                          segment_height,
+                          sp->cinfo.d.image_width, 
+                          sp->cinfo.d.image_height);
+	}
+	if (sp->cinfo.d.num_components !=
+	    (td->td_planarconfig == PLANARCONFIG_CONTIG ?
+	     td->td_samplesperpixel : 1)) {
+		TIFFErrorExt(tif->tif_clientdata, module, "Improper JPEG component count");
+		return (0);
+	}
+#ifdef JPEG_LIB_MK1
+	if (12 != td->td_bitspersample && 8 != td->td_bitspersample) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Improper JPEG data precision");
+            return (0);
+	}
+        sp->cinfo.d.data_precision = td->td_bitspersample;
+        sp->cinfo.d.bits_in_jsample = td->td_bitspersample;
+#else
+	if (sp->cinfo.d.data_precision != td->td_bitspersample) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Improper JPEG data precision");
+            return (0);
+	}
+#endif
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG) {
+		/* Component 0 should have expected sampling factors */
+		if (sp->cinfo.d.comp_info[0].h_samp_factor != sp->h_sampling ||
+		    sp->cinfo.d.comp_info[0].v_samp_factor != sp->v_sampling) {
+				TIFFWarningExt(tif->tif_clientdata, module,
+                                    "Improper JPEG sampling factors %d,%d\n"
+                                    "Apparently should be %d,%d.",
+                                    sp->cinfo.d.comp_info[0].h_samp_factor,
+                                    sp->cinfo.d.comp_info[0].v_samp_factor,
+                                    sp->h_sampling, sp->v_sampling);
+
+			    /*
+			     * XXX: Files written by the Intergraph software
+			     * has different sampling factors stored in the
+			     * TIFF tags and in the JPEG structures. We will
+			     * try to deduce Intergraph files by the presense
+			     * of the tag 33918.
+			     */
+			    if (!_TIFFFindFieldInfo(tif, 33918, TIFF_ANY)) {
+					TIFFWarningExt(tif->tif_clientdata, module,
+					"Decompressor will try reading with "
+					"sampling %d,%d.",
+					sp->cinfo.d.comp_info[0].h_samp_factor,
+					sp->cinfo.d.comp_info[0].v_samp_factor);
+
+				    sp->h_sampling = (uint16)
+					sp->cinfo.d.comp_info[0].h_samp_factor;
+				    sp->v_sampling = (uint16)
+					sp->cinfo.d.comp_info[0].v_samp_factor;
+			    }
+		}
+		/* Rest should have sampling factors 1,1 */
+		for (ci = 1; ci < sp->cinfo.d.num_components; ci++) {
+			if (sp->cinfo.d.comp_info[ci].h_samp_factor != 1 ||
+			    sp->cinfo.d.comp_info[ci].v_samp_factor != 1) {
+				TIFFErrorExt(tif->tif_clientdata, module, "Improper JPEG sampling factors");
+				return (0);
+			}
+		}
+	} else {
+		/* PC 2's single component should have sampling factors 1,1 */
+		if (sp->cinfo.d.comp_info[0].h_samp_factor != 1 ||
+		    sp->cinfo.d.comp_info[0].v_samp_factor != 1) {
+			TIFFErrorExt(tif->tif_clientdata, module, "Improper JPEG sampling factors");
+			return (0);
+		}
+	}
+	downsampled_output = FALSE;
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG &&
+	    sp->photometric == PHOTOMETRIC_YCBCR &&
+	    sp->jpegcolormode == JPEGCOLORMODE_RGB) {
+	/* Convert YCbCr to RGB */
+		sp->cinfo.d.jpeg_color_space = JCS_YCbCr;
+		sp->cinfo.d.out_color_space = JCS_RGB;
+	} else {
+			/* Suppress colorspace handling */
+		sp->cinfo.d.jpeg_color_space = JCS_UNKNOWN;
+		sp->cinfo.d.out_color_space = JCS_UNKNOWN;
+ 		tif->tif_flags |= TIFF_UPSAMPLED;  /* IMLIB - allow upsampling when there is no  colorspace handling
+		if (td->td_planarconfig == PLANARCONFIG_CONTIG &&
+		    (sp->h_sampling != 1 || sp->v_sampling != 1))
+			downsampled_output = TRUE;  */
+		/* XXX what about up-sampling? */
+	}
+	if (downsampled_output) {
+		/* Need to use raw-data interface to libjpeg */
+		sp->cinfo.d.raw_data_out = TRUE;
+		tif->tif_decoderow = JPEGDecodeRaw;
+		tif->tif_decodestrip = JPEGDecodeRaw;
+		tif->tif_decodetile = JPEGDecodeRaw;
+	} else {
+		/* Use normal interface to libjpeg */
+		sp->cinfo.d.raw_data_out = FALSE;
+		tif->tif_decoderow = JPEGDecode;
+		tif->tif_decodestrip = JPEGDecode;
+		tif->tif_decodetile = JPEGDecode;
+	}
+	/* Start JPEG decompressor */
+	if (!TIFFjpeg_start_decompress(sp))
+		return (0);
+	/* Allocate downsampled-data buffers if needed */
+	if (downsampled_output) {
+		if (!alloc_downsampled_buffers(tif, sp->cinfo.d.comp_info,
+					       sp->cinfo.d.num_components))
+			return (0);
+		sp->scancount = DCTSIZE;	/* mark buffer empty */
+	}
+	return (1);
+}
+
+/*
+ * Decode a chunk of pixels.
+ * "Standard" case: returned data is not downsampled.
+ */
+/*ARGSUSED*/ static int
+JPEGDecode(TIFF* tif, tidata_t buf, tsize_t cc, tsample_t s)
+{
+    JPEGState *sp = JState(tif);
+    tsize_t nrows;
+    (void) s;
+
+    nrows = cc / sp->bytesperline;
+    if (cc % sp->bytesperline)
+		TIFFWarningExt(tif->tif_clientdata, tif->tif_name, "fractional scanline not read");
+
+    if( nrows > (int) sp->cinfo.d.image_height )
+        nrows = sp->cinfo.d.image_height;
+
+    /* data is expected to be read in multiples of a scanline */
+    if (nrows)
+    {
+        JSAMPROW line_work_buf = NULL;
+
+        /*
+        ** For 6B, only use temporary buffer for 12 bit imagery. 
+        ** For Mk1 always use it. 
+        */
+#if !defined(JPEG_LIB_MK1)        
+        if( sp->cinfo.d.data_precision == 12 )
+#endif
+        {
+            line_work_buf = (JSAMPROW) 
+                _TIFFmalloc(sizeof(short) * sp->cinfo.d.output_width 
+                            * sp->cinfo.d.num_components );
+        }
+
+        do {
+            if( line_work_buf != NULL )
+            {
+                /* 
+                ** In the MK1 case, we aways read into a 16bit buffer, and then
+                ** pack down to 12bit or 8bit.  In 6B case we only read into 16
+                ** bit buffer for 12bit data, which we need to repack. 
+                */
+                if (TIFFjpeg_read_scanlines(sp, &line_work_buf, 1) != 1)
+                    return (0);
+
+                if( sp->cinfo.d.data_precision == 12 )
+                {
+                    int value_pairs = (sp->cinfo.d.output_width 
+                                       * sp->cinfo.d.num_components) / 2;
+                    int iPair;
+
+                    for( iPair = 0; iPair < value_pairs; iPair++ )
+                    {
+                        unsigned char *out_ptr = 
+                            ((unsigned char *) buf) + iPair * 3;
+                        JSAMPLE *in_ptr = line_work_buf + iPair * 2;
+
+                        out_ptr[0] = (in_ptr[0] & 0xff0) >> 4;
+                        out_ptr[1] = ((in_ptr[0] & 0xf) << 4)
+                            | ((in_ptr[1] & 0xf00) >> 8);
+                        out_ptr[2] = ((in_ptr[1] & 0xff) >> 0);
+                    }
+                }
+                else if( sp->cinfo.d.data_precision == 8 )
+                {
+                    int value_count = (sp->cinfo.d.output_width 
+                                       * sp->cinfo.d.num_components);
+                    int iValue;
+
+                    for( iValue = 0; iValue < value_count; iValue++ )
+                    {
+                        ((unsigned char *) buf)[iValue] = 
+                            line_work_buf[iValue] & 0xff;
+                    }
+                }
+            }
+            else
+            {
+                /*
+                ** In the libjpeg6b 8bit case.  We read directly into the 
+                ** TIFF buffer.
+                */
+                JSAMPROW bufptr = (JSAMPROW)buf;
+  
+                if (TIFFjpeg_read_scanlines(sp, &bufptr, 1) != 1)
+                    return (0);
+            }
+
+            ++tif->tif_row;
+            buf += sp->bytesperline;
+            cc -= sp->bytesperline;
+        } while (--nrows > 0);
+
+        if( line_work_buf != NULL )
+            _TIFFfree( line_work_buf );
+    }
+
+    /* Close down the decompressor if we've finished the strip or tile. */
+    return sp->cinfo.d.output_scanline < sp->cinfo.d.output_height
+        || TIFFjpeg_finish_decompress(sp);
+}
+
+/*
+ * Decode a chunk of pixels.
+ * Returned data is downsampled per sampling factors.
+ */
+/*ARGSUSED*/ static int
+JPEGDecodeRaw(TIFF* tif, tidata_t buf, tsize_t cc, tsample_t s)
+{
+    JPEGState *sp = JState(tif);
+    tsize_t nrows;
+    (void) s;
+
+    /* data is expected to be read in multiples of a scanline */
+    if ( (nrows = sp->cinfo.d.image_height) ) {
+        /* Cb,Cr both have sampling factors 1, so this is correct */
+        JDIMENSION clumps_per_line = sp->cinfo.d.comp_info[1].downsampled_width;
+        int samples_per_clump = sp->samplesperclump;
+	
+#ifdef JPEG_LIB_MK1
+        unsigned short* tmpbuf = _TIFFmalloc(sizeof(unsigned short) *
+                                             sp->cinfo.d.output_width *
+                                             sp->cinfo.d.num_components);
+#endif
+ 
+        do {
+            jpeg_component_info *compptr;
+            int ci, clumpoffset;
+
+            /* Reload downsampled-data buffer if needed */
+            if (sp->scancount >= DCTSIZE) {
+                int n = sp->cinfo.d.max_v_samp_factor * DCTSIZE;
+                if (TIFFjpeg_read_raw_data(sp, sp->ds_buffer, n)
+                    != n)
+                    return (0);
+                sp->scancount = 0;
+            }
+            /*
+             * Fastest way to unseparate data is to make one pass
+             * over the scanline for each row of each component.
+             */
+            clumpoffset = 0;	/* first sample in clump */
+            for (ci = 0, compptr = sp->cinfo.d.comp_info;
+                 ci < sp->cinfo.d.num_components;
+                 ci++, compptr++) {
+                int hsamp = compptr->h_samp_factor;
+                int vsamp = compptr->v_samp_factor;
+                int ypos;
+
+                for (ypos = 0; ypos < vsamp; ypos++) {
+                    JSAMPLE *inptr = sp->ds_buffer[ci][sp->scancount*vsamp + ypos];
+#ifdef JPEG_LIB_MK1
+                    JSAMPLE *outptr = (JSAMPLE*)tmpbuf + clumpoffset;
+#else
+                    JSAMPLE *outptr = (JSAMPLE*)buf + clumpoffset;
+#endif
+                    JDIMENSION nclump;
+
+                    if (hsamp == 1) {
+                        /* fast path for at least Cb and Cr */
+                        for (nclump = clumps_per_line; nclump-- > 0; ) {
+                            outptr[0] = *inptr++;
+                            outptr += samples_per_clump;
+                        }
+                    } else {
+                        int xpos;
+
+                        /* general case */
+                        for (nclump = clumps_per_line; nclump-- > 0; ) {
+                            for (xpos = 0; xpos < hsamp; xpos++)
+                                outptr[xpos] = *inptr++;
+                            outptr += samples_per_clump;
+                        }
+                    }
+                    clumpoffset += hsamp;
+                }
+            }
+
+#ifdef JPEG_LIB_MK1
+            {
+                if (sp->cinfo.d.data_precision == 8)
+                {
+                    int i=0;
+                    int len = sp->cinfo.d.output_width * sp->cinfo.d.num_components;
+                    for (i=0; i<len; i++)
+                    {
+                        ((unsigned char*)buf)[i] = tmpbuf[i] & 0xff;
+                    }
+                }
+                else
+                {         // 12-bit
+                    int value_pairs = (sp->cinfo.d.output_width
+                                       * sp->cinfo.d.num_components) / 2;
+                    int iPair;
+                    for( iPair = 0; iPair < value_pairs; iPair++ )
+                    {
+                        unsigned char *out_ptr = ((unsigned char *) buf) + iPair * 3;
+                        JSAMPLE *in_ptr = tmpbuf + iPair * 2;
+                        out_ptr[0] = (in_ptr[0] & 0xff0) >> 4;
+                        out_ptr[1] = ((in_ptr[0] & 0xf) << 4)
+                            | ((in_ptr[1] & 0xf00) >> 8);
+                        out_ptr[2] = ((in_ptr[1] & 0xff) >> 0);
+                    }
+                }
+            }
+#endif
+
+            ++sp->scancount;
+            ++tif->tif_row;
+            buf += sp->bytesperline;
+            cc -= sp->bytesperline;
+        } while (--nrows > 0);
+  
+#ifdef JPEG_LIB_MK1
+        _TIFFfree(tmpbuf);
+#endif
+
+    }
+
+    /* Close down the decompressor if done. */
+    return sp->cinfo.d.output_scanline < sp->cinfo.d.output_height
+        || TIFFjpeg_finish_decompress(sp);
+}
+
+
+/*
+ * JPEG Encoding.
+ */
+
+static void
+unsuppress_quant_table (JPEGState* sp, int tblno)
+{
+	JQUANT_TBL* qtbl;
+
+	if ((qtbl = sp->cinfo.c.quant_tbl_ptrs[tblno]) != NULL)
+		qtbl->sent_table = FALSE;
+}
+
+static void
+unsuppress_huff_table (JPEGState* sp, int tblno)
+{
+	JHUFF_TBL* htbl;
+
+	if ((htbl = sp->cinfo.c.dc_huff_tbl_ptrs[tblno]) != NULL)
+		htbl->sent_table = FALSE;
+	if ((htbl = sp->cinfo.c.ac_huff_tbl_ptrs[tblno]) != NULL)
+		htbl->sent_table = FALSE;
+}
+
+static int
+prepare_JPEGTables(TIFF* tif)
+{
+	JPEGState* sp = JState(tif);
+
+        JPEGInitializeLibJPEG( tif, 0, 0 );
+
+	/* Initialize quant tables for current quality setting */
+	if (!TIFFjpeg_set_quality(sp, sp->jpegquality, FALSE))
+		return (0);
+	/* Mark only the tables we want for output */
+	/* NB: chrominance tables are currently used only with YCbCr */
+	if (!TIFFjpeg_suppress_tables(sp, TRUE))
+		return (0);
+	if (sp->jpegtablesmode & JPEGTABLESMODE_QUANT) {
+		unsuppress_quant_table(sp, 0);
+		if (sp->photometric == PHOTOMETRIC_YCBCR)
+			unsuppress_quant_table(sp, 1);
+	}
+	if (sp->jpegtablesmode & JPEGTABLESMODE_HUFF) {
+		unsuppress_huff_table(sp, 0);
+		if (sp->photometric == PHOTOMETRIC_YCBCR)
+			unsuppress_huff_table(sp, 1);
+	}
+	/* Direct libjpeg output into jpegtables */
+	if (!TIFFjpeg_tables_dest(sp, tif))
+		return (0);
+	/* Emit tables-only datastream */
+	if (!TIFFjpeg_write_tables(sp))
+		return (0);
+
+	return (1);
+}
+
+static int
+JPEGSetupEncode(TIFF* tif)
+{
+	JPEGState* sp = JState(tif);
+	TIFFDirectory *td = &tif->tif_dir;
+	static const char module[] = "JPEGSetupEncode";
+
+        JPEGInitializeLibJPEG( tif, 1, 0 );
+
+	assert(sp != NULL);
+	assert(!sp->cinfo.comm.is_decompressor);
+
+	/*
+	 * Initialize all JPEG parameters to default values.
+	 * Note that jpeg_set_defaults needs legal values for
+	 * in_color_space and input_components.
+	 */
+	sp->cinfo.c.in_color_space = JCS_UNKNOWN;
+	sp->cinfo.c.input_components = 1;
+	if (!TIFFjpeg_set_defaults(sp))
+		return (0);
+	/* Set per-file parameters */
+	sp->photometric = td->td_photometric;
+	switch (sp->photometric) {
+	case PHOTOMETRIC_YCBCR:
+		sp->h_sampling = td->td_ycbcrsubsampling[0];
+		sp->v_sampling = td->td_ycbcrsubsampling[1];
+		/*
+		 * A ReferenceBlackWhite field *must* be present since the
+		 * default value is inappropriate for YCbCr.  Fill in the
+		 * proper value if application didn't set it.
+		 */
+		{
+			float *ref;
+			if (!TIFFGetField(tif, TIFFTAG_REFERENCEBLACKWHITE,
+					  &ref)) {
+				float refbw[6];
+				long top = 1L << td->td_bitspersample;
+				refbw[0] = 0;
+				refbw[1] = (float)(top-1L);
+				refbw[2] = (float)(top>>1);
+				refbw[3] = refbw[1];
+				refbw[4] = refbw[2];
+				refbw[5] = refbw[1];
+				TIFFSetField(tif, TIFFTAG_REFERENCEBLACKWHITE,
+					     refbw);
+			}
+		}
+		break;
+	case PHOTOMETRIC_PALETTE:		/* disallowed by Tech Note */
+	case PHOTOMETRIC_MASK:
+		TIFFErrorExt(tif->tif_clientdata, module,
+			  "PhotometricInterpretation %d not allowed for JPEG",
+			  (int) sp->photometric);
+		return (0);
+	default:
+		/* TIFF 6.0 forbids subsampling of all other color spaces */
+		sp->h_sampling = 1;
+		sp->v_sampling = 1;
+		break;
+	}
+
+	/* Verify miscellaneous parameters */
+
+	/*
+	 * This would need work if libtiff ever supports different
+	 * depths for different components, or if libjpeg ever supports
+	 * run-time selection of depth.  Neither is imminent.
+	 */
+#ifdef JPEG_LIB_MK1
+        /* BITS_IN_JSAMPLE now permits 8 and 12 --- dgilbert */
+	if (td->td_bitspersample != 8 && td->td_bitspersample != 12) 
+#else
+	if (td->td_bitspersample != BITS_IN_JSAMPLE ) 
+#endif
+        {
+		TIFFErrorExt(tif->tif_clientdata, module, "BitsPerSample %d not allowed for JPEG",
+			  (int) td->td_bitspersample);
+		return (0);
+	}
+	sp->cinfo.c.data_precision = td->td_bitspersample;
+#ifdef JPEG_LIB_MK1
+        sp->cinfo.c.bits_in_jsample = td->td_bitspersample;
+#endif
+	if (isTiled(tif)) {
+		if ((td->td_tilelength % (sp->v_sampling * DCTSIZE)) != 0) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+				  "JPEG tile height must be multiple of %d",
+				  sp->v_sampling * DCTSIZE);
+			return (0);
+		}
+		if ((td->td_tilewidth % (sp->h_sampling * DCTSIZE)) != 0) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+				  "JPEG tile width must be multiple of %d",
+				  sp->h_sampling * DCTSIZE);
+			return (0);
+		}
+	} else {
+		if (td->td_rowsperstrip < td->td_imagelength &&
+		    (td->td_rowsperstrip % (sp->v_sampling * DCTSIZE)) != 0) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+				  "RowsPerStrip must be multiple of %d for JPEG",
+				  sp->v_sampling * DCTSIZE);
+			return (0);
+		}
+	}
+
+	/* Create a JPEGTables field if appropriate */
+	if (sp->jpegtablesmode & (JPEGTABLESMODE_QUANT|JPEGTABLESMODE_HUFF)) {
+		if (!prepare_JPEGTables(tif))
+			return (0);
+		/* Mark the field present */
+		/* Can't use TIFFSetField since BEENWRITING is already set! */
+		TIFFSetFieldBit(tif, FIELD_JPEGTABLES);
+		tif->tif_flags |= TIFF_DIRTYDIRECT;
+	} else {
+		/* We do not support application-supplied JPEGTables, */
+		/* so mark the field not present */
+		TIFFClrFieldBit(tif, FIELD_JPEGTABLES);
+	}
+
+	/* Direct libjpeg output to libtiff's output buffer */
+	TIFFjpeg_data_dest(sp, tif);
+
+	return (1);
+}
+
+/*
+ * Set encoding state at the start of a strip or tile.
+ */
+static int
+JPEGPreEncode(TIFF* tif, tsample_t s)
+{
+	JPEGState *sp = JState(tif);
+	TIFFDirectory *td = &tif->tif_dir;
+	static const char module[] = "JPEGPreEncode";
+	uint32 segment_width, segment_height;
+	int downsampled_input;
+
+	assert(sp != NULL);
+	assert(!sp->cinfo.comm.is_decompressor);
+	/*
+	 * Set encoding parameters for this strip/tile.
+	 */
+	if (isTiled(tif)) {
+		segment_width = td->td_tilewidth;
+		segment_height = td->td_tilelength;
+		sp->bytesperline = TIFFTileRowSize(tif);
+	} else {
+		segment_width = td->td_imagewidth;
+		segment_height = td->td_imagelength - tif->tif_row;
+		if (segment_height > td->td_rowsperstrip)
+			segment_height = td->td_rowsperstrip;
+		sp->bytesperline = TIFFScanlineSize(tif);
+	}
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE && s > 0) {
+		/* for PC 2, scale down the strip/tile size
+		 * to match a downsampled component
+		 */
+		segment_width = TIFFhowmany(segment_width, sp->h_sampling);
+		segment_height = TIFFhowmany(segment_height, sp->v_sampling);
+	}
+	if (segment_width > 65535 || segment_height > 65535) {
+		TIFFErrorExt(tif->tif_clientdata, module, "Strip/tile too large for JPEG");
+		return (0);
+	}
+	sp->cinfo.c.image_width = segment_width;
+	sp->cinfo.c.image_height = segment_height;
+	downsampled_input = FALSE;
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG) {
+		sp->cinfo.c.input_components = td->td_samplesperpixel;
+		if (sp->photometric == PHOTOMETRIC_YCBCR) {
+			if (sp->jpegcolormode == JPEGCOLORMODE_RGB) {
+				sp->cinfo.c.in_color_space = JCS_RGB;
+			} else {
+				sp->cinfo.c.in_color_space = JCS_YCbCr;
+				tif->tif_flags |= TIFF_UPSAMPLED; /* IMLIB - allow upsampling in the input data
+				if (sp->h_sampling != 1 || sp->v_sampling != 1)
+					downsampled_input = TRUE;  */
+			}
+			if (!TIFFjpeg_set_colorspace(sp, JCS_YCbCr))
+				return (0);
+			/*
+			 * Set Y sampling factors;
+			 * we assume jpeg_set_colorspace() set the rest to 1
+			 */
+			sp->cinfo.c.comp_info[0].h_samp_factor = sp->h_sampling;
+			sp->cinfo.c.comp_info[0].v_samp_factor = sp->v_sampling;
+		} else {
+			sp->cinfo.c.in_color_space = JCS_UNKNOWN;
+			if (!TIFFjpeg_set_colorspace(sp, JCS_UNKNOWN))
+				return (0);
+			/* jpeg_set_colorspace set all sampling factors to 1 */
+		}
+	} else {
+		sp->cinfo.c.input_components = 1;
+		sp->cinfo.c.in_color_space = JCS_UNKNOWN;
+		if (!TIFFjpeg_set_colorspace(sp, JCS_UNKNOWN))
+			return (0);
+		sp->cinfo.c.comp_info[0].component_id = s;
+		/* jpeg_set_colorspace() set sampling factors to 1 */
+		if (sp->photometric == PHOTOMETRIC_YCBCR && s > 0) {
+			sp->cinfo.c.comp_info[0].quant_tbl_no = 1;
+			sp->cinfo.c.comp_info[0].dc_tbl_no = 1;
+			sp->cinfo.c.comp_info[0].ac_tbl_no = 1;
+		}
+	}
+	/* ensure libjpeg won't write any extraneous markers */
+	sp->cinfo.c.write_JFIF_header = FALSE;
+	sp->cinfo.c.write_Adobe_marker = FALSE;
+	/* set up table handling correctly */
+	if (! (sp->jpegtablesmode & JPEGTABLESMODE_QUANT)) {
+		if (!TIFFjpeg_set_quality(sp, sp->jpegquality, FALSE))
+			return (0);
+		unsuppress_quant_table(sp, 0);
+		unsuppress_quant_table(sp, 1);
+	}
+	if (sp->jpegtablesmode & JPEGTABLESMODE_HUFF)
+		sp->cinfo.c.optimize_coding = FALSE;
+	else
+		sp->cinfo.c.optimize_coding = TRUE;
+	if (downsampled_input) {
+		/* Need to use raw-data interface to libjpeg */
+		sp->cinfo.c.raw_data_in = TRUE;
+		tif->tif_encoderow = JPEGEncodeRaw;
+		tif->tif_encodestrip = JPEGEncodeRaw;
+		tif->tif_encodetile = JPEGEncodeRaw;
+	} else {
+		/* Use normal interface to libjpeg */
+		sp->cinfo.c.raw_data_in = FALSE;
+		tif->tif_encoderow = JPEGEncode;
+		tif->tif_encodestrip = JPEGEncode;
+		tif->tif_encodetile = JPEGEncode;
+	}
+	/* Start JPEG compressor */
+	if (!TIFFjpeg_start_compress(sp, FALSE))
+		return (0);
+	/* Allocate downsampled-data buffers if needed */
+	if (downsampled_input) {
+		if (!alloc_downsampled_buffers(tif, sp->cinfo.c.comp_info,
+					       sp->cinfo.c.num_components))
+			return (0);
+	}
+	sp->scancount = 0;
+
+	return (1);
+}
+
+/*
+ * Encode a chunk of pixels.
+ * "Standard" case: incoming data is not downsampled.
+ */
+static int
+JPEGEncode(TIFF* tif, tidata_t buf, tsize_t cc, tsample_t s)
+{
+	JPEGState *sp = JState(tif);
+	tsize_t nrows;
+	JSAMPROW bufptr[1];
+
+	(void) s;
+	assert(sp != NULL);
+	/* data is expected to be supplied in multiples of a scanline */
+	nrows = cc / sp->bytesperline;
+	if (cc % sp->bytesperline)
+		TIFFWarningExt(tif->tif_clientdata, tif->tif_name, "fractional scanline discarded");
+
+	while (nrows-- > 0) {
+		bufptr[0] = (JSAMPROW) buf;
+		if (TIFFjpeg_write_scanlines(sp, bufptr, 1) != 1)
+			return (0);
+		if (nrows > 0)
+			tif->tif_row++;
+		buf += sp->bytesperline;
+	}
+	return (1);
+}
+
+/*
+ * Encode a chunk of pixels.
+ * Incoming data is expected to be downsampled per sampling factors.
+ */
+static int
+JPEGEncodeRaw(TIFF* tif, tidata_t buf, tsize_t cc, tsample_t s)
+{
+	JPEGState *sp = JState(tif);
+	JSAMPLE* inptr;
+	JSAMPLE* outptr;
+	tsize_t nrows;
+	JDIMENSION clumps_per_line, nclump;
+	int clumpoffset, ci, xpos, ypos;
+	jpeg_component_info* compptr;
+	int samples_per_clump = sp->samplesperclump;
+
+	(void) s;
+	assert(sp != NULL);
+	/* data is expected to be supplied in multiples of a scanline */
+	nrows = cc / sp->bytesperline;
+	if (cc % sp->bytesperline)
+		TIFFWarningExt(tif->tif_clientdata, tif->tif_name, "fractional scanline discarded");
+
+	/* Cb,Cr both have sampling factors 1, so this is correct */
+	clumps_per_line = sp->cinfo.c.comp_info[1].downsampled_width;
+
+	while (nrows-- > 0) {
+		/*
+		 * Fastest way to separate the data is to make one pass
+		 * over the scanline for each row of each component.
+		 */
+		clumpoffset = 0;		/* first sample in clump */
+		for (ci = 0, compptr = sp->cinfo.c.comp_info;
+		     ci < sp->cinfo.c.num_components;
+		     ci++, compptr++) {
+		    int hsamp = compptr->h_samp_factor;
+		    int vsamp = compptr->v_samp_factor;
+		    int padding = (int) (compptr->width_in_blocks * DCTSIZE -
+					 clumps_per_line * hsamp);
+		    for (ypos = 0; ypos < vsamp; ypos++) {
+			inptr = ((JSAMPLE*) buf) + clumpoffset;
+			outptr = sp->ds_buffer[ci][sp->scancount*vsamp + ypos];
+			if (hsamp == 1) {
+			    /* fast path for at least Cb and Cr */
+			    for (nclump = clumps_per_line; nclump-- > 0; ) {
+				*outptr++ = inptr[0];
+				inptr += samples_per_clump;
+			    }
+			} else {
+			    /* general case */
+			    for (nclump = clumps_per_line; nclump-- > 0; ) {
+				for (xpos = 0; xpos < hsamp; xpos++)
+				    *outptr++ = inptr[xpos];
+				inptr += samples_per_clump;
+			    }
+			}
+			/* pad each scanline as needed */
+			for (xpos = 0; xpos < padding; xpos++) {
+			    *outptr = outptr[-1];
+			    outptr++;
+			}
+			clumpoffset += hsamp;
+		    }
+		}
+		sp->scancount++;
+		if (sp->scancount >= DCTSIZE) {
+			int n = sp->cinfo.c.max_v_samp_factor * DCTSIZE;
+			if (TIFFjpeg_write_raw_data(sp, sp->ds_buffer, n) != n)
+				return (0);
+			sp->scancount = 0;
+		}
+		if (nrows > 0)
+			tif->tif_row++;
+		buf += sp->bytesperline;
+	}
+	return (1);
+}
+
+/*
+ * Finish up at the end of a strip or tile.
+ */
+static int
+JPEGPostEncode(TIFF* tif)
+{
+	JPEGState *sp = JState(tif);
+
+	if (sp->scancount > 0) {
+		/*
+		 * Need to emit a partial bufferload of downsampled data.
+		 * Pad the data vertically.
+		 */
+		int ci, ypos, n;
+		jpeg_component_info* compptr;
+
+		for (ci = 0, compptr = sp->cinfo.c.comp_info;
+		     ci < sp->cinfo.c.num_components;
+		     ci++, compptr++) {
+			int vsamp = compptr->v_samp_factor;
+			tsize_t row_width = compptr->width_in_blocks * DCTSIZE
+				* sizeof(JSAMPLE);
+			for (ypos = sp->scancount * vsamp;
+			     ypos < DCTSIZE * vsamp; ypos++) {
+				_TIFFmemcpy((tdata_t)sp->ds_buffer[ci][ypos],
+					    (tdata_t)sp->ds_buffer[ci][ypos-1],
+					    row_width);
+
+			}
+		}
+		n = sp->cinfo.c.max_v_samp_factor * DCTSIZE;
+		if (TIFFjpeg_write_raw_data(sp, sp->ds_buffer, n) != n)
+			return (0);
+	}
+
+	return (TIFFjpeg_finish_compress(JState(tif)));
+}
+
+static void
+JPEGCleanup(TIFF* tif)
+{
+	JPEGState *sp = JState(tif);
+	
+	assert(sp != 0);
+
+	tif->tif_tagmethods.vgetfield = sp->vgetparent;
+	tif->tif_tagmethods.vsetfield = sp->vsetparent;
+
+	if( sp->cinfo_initialized )
+	    TIFFjpeg_destroy(sp);	/* release libjpeg resources */
+	if (sp->jpegtables)		/* tag value */
+		_TIFFfree(sp->jpegtables);
+	_TIFFfree(tif->tif_data);	/* release local state */
+	tif->tif_data = NULL;
+
+	_TIFFSetDefaultCompressionState(tif);
+}
+
+static int
+JPEGVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	JPEGState* sp = JState(tif);
+	TIFFDirectory* td = &tif->tif_dir;
+	uint32 v32;
+
+	assert(sp != NULL);
+
+	switch (tag) {
+	case TIFFTAG_JPEGTABLES:
+		v32 = va_arg(ap, uint32);
+		if (v32 == 0) {
+			/* XXX */
+			return (0);
+		}
+		_TIFFsetByteArray(&sp->jpegtables, va_arg(ap, void*),
+		    (long) v32);
+		sp->jpegtables_length = v32;
+		TIFFSetFieldBit(tif, FIELD_JPEGTABLES);
+		break;
+	case TIFFTAG_JPEGQUALITY:
+		sp->jpegquality = va_arg(ap, int);
+		return (1);			/* pseudo tag */
+	case TIFFTAG_JPEGCOLORMODE:
+		sp->jpegcolormode = va_arg(ap, int);
+		/*
+		 * Mark whether returned data is up-sampled or not
+		 * so TIFFStripSize and TIFFTileSize return values
+		 * that reflect the true amount of data.
+		 */
+		tif->tif_flags &= ~TIFF_UPSAMPLED;
+		if (td->td_planarconfig == PLANARCONFIG_CONTIG) {
+		    if (td->td_photometric == PHOTOMETRIC_YCBCR &&
+		      sp->jpegcolormode == JPEGCOLORMODE_RGB) {
+			tif->tif_flags |= TIFF_UPSAMPLED;
+		    } else {
+			if (td->td_ycbcrsubsampling[0] != 1 ||
+			    td->td_ycbcrsubsampling[1] != 1)
+			    ; /* XXX what about up-sampling? */
+		    }
+		}
+		/*
+		 * Must recalculate cached tile size
+		 * in case sampling state changed.
+		 */
+		tif->tif_tilesize = isTiled(tif) ? TIFFTileSize(tif) : (tsize_t) -1;
+		return (1);			/* pseudo tag */
+	case TIFFTAG_JPEGTABLESMODE:
+		sp->jpegtablesmode = va_arg(ap, int);
+		return (1);			/* pseudo tag */
+	case TIFFTAG_YCBCRSUBSAMPLING:
+                /* mark the fact that we have a real ycbcrsubsampling! */
+		sp->ycbcrsampling_fetched = 1;
+		return (*sp->vsetparent)(tif, tag, ap);
+	case TIFFTAG_FAXRECVPARAMS:
+		sp->recvparams = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_FAXSUBADDRESS:
+		_TIFFsetString(&sp->subaddress, va_arg(ap, char*));
+		break;
+	case TIFFTAG_FAXRECVTIME:
+		sp->recvtime = va_arg(ap, uint32);
+		break;
+	case TIFFTAG_FAXDCS:
+		_TIFFsetString(&sp->faxdcs, va_arg(ap, char*));
+		break;
+	default:
+		return (*sp->vsetparent)(tif, tag, ap);
+	}
+	TIFFSetFieldBit(tif, _TIFFFieldWithTag(tif, tag)->field_bit);
+	tif->tif_flags |= TIFF_DIRTYDIRECT;
+	return (1);
+}
+
+/*
+ * Some JPEG-in-TIFF produces do not emit the YCBCRSUBSAMPLING values in
+ * the TIFF tags, but still use non-default (2,2) values within the jpeg
+ * data stream itself.  In order for TIFF applications to work properly
+ * - for instance to get the strip buffer size right - it is imperative
+ * that the subsampling be available before we start reading the image
+ * data normally.  This function will attempt to load the first strip in
+ * order to get the sampling values from the jpeg data stream.  Various
+ * hacks are various places are done to ensure this function gets called
+ * before the td_ycbcrsubsampling values are used from the directory structure,
+ * including calling TIFFGetField() for the YCBCRSUBSAMPLING field from 
+ * TIFFStripSize(), and the printing code in tif_print.c. 
+ *
+ * Note that JPEGPreDeocode() will produce a fairly loud warning when the
+ * discovered sampling does not match the default sampling (2,2) or whatever
+ * was actually in the tiff tags. 
+ *
+ * Problems:
+ *  o This code will cause one whole strip/tile of compressed data to be
+ *    loaded just to get the tags right, even if the imagery is never read.
+ *    It would be more efficient to just load a bit of the header, and
+ *    initialize things from that. 
+ *
+ * See the bug in bugzilla for details:
+ *
+ * http://bugzilla.remotesensing.org/show_bug.cgi?id=168
+ *
+ * Frank Warmerdam, July 2002
+ */
+
+static void 
+JPEGFixupTestSubsampling( TIFF * tif )
+{
+#ifdef CHECK_JPEG_YCBCR_SUBSAMPLING
+    JPEGState *sp = JState(tif);
+    TIFFDirectory *td = &tif->tif_dir;
+
+    JPEGInitializeLibJPEG( tif, 0, 0 );
+
+    /*
+     * Some JPEG-in-TIFF files don't provide the ycbcrsampling tags, 
+     * and use a sampling schema other than the default 2,2.  To handle
+     * this we actually have to scan the header of a strip or tile of
+     * jpeg data to get the sampling.  
+     */
+    if( !sp->cinfo.comm.is_decompressor 
+        || sp->ycbcrsampling_fetched  
+        || td->td_photometric != PHOTOMETRIC_YCBCR )
+        return;
+
+    sp->ycbcrsampling_fetched = 1;
+    if( TIFFIsTiled( tif ) )
+    {
+        if( !TIFFFillTile( tif, 0 ) )
+			return;
+    }
+    else
+	{
+        if( !TIFFFillStrip( tif, 0 ) )
+            return;
+    }
+
+    TIFFSetField( tif, TIFFTAG_YCBCRSUBSAMPLING, 
+                  (uint16) sp->h_sampling, (uint16) sp->v_sampling );
+#endif /* CHECK_JPEG_YCBCR_SUBSAMPLING */
+}
+
+static int
+JPEGVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	JPEGState* sp = JState(tif);
+
+	assert(sp != NULL);
+
+	switch (tag) {
+		case TIFFTAG_JPEGTABLES:
+			*va_arg(ap, uint32*) = sp->jpegtables_length;
+			*va_arg(ap, void**) = sp->jpegtables;
+			break;
+		case TIFFTAG_JPEGQUALITY:
+			*va_arg(ap, int*) = sp->jpegquality;
+			break;
+		case TIFFTAG_JPEGCOLORMODE:
+			*va_arg(ap, int*) = sp->jpegcolormode;
+			break;
+		case TIFFTAG_JPEGTABLESMODE:
+			*va_arg(ap, int*) = sp->jpegtablesmode;
+			break;
+		case TIFFTAG_YCBCRSUBSAMPLING:
+			JPEGFixupTestSubsampling( tif );
+			return (*sp->vgetparent)(tif, tag, ap);
+			break;
+		case TIFFTAG_FAXRECVPARAMS:
+			*va_arg(ap, uint32*) = sp->recvparams;
+			break;
+		case TIFFTAG_FAXSUBADDRESS:
+			*va_arg(ap, char**) = sp->subaddress;
+			break;
+		case TIFFTAG_FAXRECVTIME:
+			*va_arg(ap, uint32*) = sp->recvtime;
+			break;
+		case TIFFTAG_FAXDCS:
+			*va_arg(ap, char**) = sp->faxdcs;
+			break;
+		default:
+			return (*sp->vgetparent)(tif, tag, ap);
+	}
+	return (1);
+}
+
+static void
+JPEGPrintDir(TIFF* tif, FILE* fd, long flags)
+{
+	JPEGState* sp = JState(tif);
+
+	assert(sp != NULL);
+
+	(void) flags;
+	if (TIFFFieldSet(tif,FIELD_JPEGTABLES))
+		fprintf(fd, "  JPEG Tables: (%lu bytes)\n",
+			(unsigned long) sp->jpegtables_length);
+        if (TIFFFieldSet(tif,FIELD_RECVPARAMS))
+                fprintf(fd, "  Fax Receive Parameters: %08lx\n",
+                   (unsigned long) sp->recvparams);
+        if (TIFFFieldSet(tif,FIELD_SUBADDRESS))
+                fprintf(fd, "  Fax SubAddress: %s\n", sp->subaddress);
+        if (TIFFFieldSet(tif,FIELD_RECVTIME))
+                fprintf(fd, "  Fax Receive Time: %lu secs\n",
+                    (unsigned long) sp->recvtime);
+        if (TIFFFieldSet(tif,FIELD_FAXDCS))
+                fprintf(fd, "  Fax DCS: %s\n", sp->faxdcs);
+}
+
+static uint32
+JPEGDefaultStripSize(TIFF* tif, uint32 s)
+{
+	JPEGState* sp = JState(tif);
+	TIFFDirectory *td = &tif->tif_dir;
+
+	s = (*sp->defsparent)(tif, s);
+	if (s < td->td_imagelength)
+		s = TIFFroundup(s, td->td_ycbcrsubsampling[1] * DCTSIZE);
+	return (s);
+}
+
+static void
+JPEGDefaultTileSize(TIFF* tif, uint32* tw, uint32* th)
+{
+	JPEGState* sp = JState(tif);
+	TIFFDirectory *td = &tif->tif_dir;
+
+	(*sp->deftparent)(tif, tw, th);
+	*tw = TIFFroundup(*tw, td->td_ycbcrsubsampling[0] * DCTSIZE);
+	*th = TIFFroundup(*th, td->td_ycbcrsubsampling[1] * DCTSIZE);
+}
+
+/*
+ * The JPEG library initialized used to be done in TIFFInitJPEG(), but
+ * now that we allow a TIFF file to be opened in update mode it is necessary
+ * to have some way of deciding whether compression or decompression is
+ * desired other than looking at tif->tif_mode.  We accomplish this by 
+ * examining {TILE/STRIP}BYTECOUNTS to see if there is a non-zero entry.
+ * If so, we assume decompression is desired. 
+ *
+ * This is tricky, because TIFFInitJPEG() is called while the directory is
+ * being read, and generally speaking the BYTECOUNTS tag won't have been read
+ * at that point.  So we try to defer jpeg library initialization till we
+ * do have that tag ... basically any access that might require the compressor
+ * or decompressor that occurs after the reading of the directory. 
+ *
+ * In an ideal world compressors or decompressors would be setup
+ * at the point where a single tile or strip was accessed (for read or write)
+ * so that stuff like update of missing tiles, or replacement of tiles could
+ * be done. However, we aren't trying to crack that nut just yet ...
+ *
+ * NFW, Feb 3rd, 2003.
+ */
+
+static int JPEGInitializeLibJPEG( TIFF * tif, int force_encode, int force_decode )
+{
+    JPEGState* sp = JState(tif);
+    uint32 *byte_counts = NULL;
+    int     data_is_empty = TRUE;
+    int     decompress;
+
+    if( sp->cinfo_initialized )
+        return 1;
+
+    /*
+     * Do we have tile data already?  Make sure we initialize the
+     * the state in decompressor mode if we have tile data, even if we
+     * are not in read-only file access mode. 
+     */
+    if( TIFFIsTiled( tif ) 
+        && TIFFGetField( tif, TIFFTAG_TILEBYTECOUNTS, &byte_counts ) 
+        && byte_counts != NULL )
+    {
+        data_is_empty = byte_counts[0] == 0;
+    }
+    if( !TIFFIsTiled( tif ) 
+        && TIFFGetField( tif, TIFFTAG_STRIPBYTECOUNTS, &byte_counts) 
+        && byte_counts != NULL )
+    {
+        data_is_empty = byte_counts[0] == 0;
+    }
+
+    if( force_decode )
+        decompress = 1;
+    else if( force_encode )
+        decompress = 0;
+    else if( tif->tif_mode == O_RDONLY )
+        decompress = 1;
+    else if( data_is_empty )
+        decompress = 0;
+    else
+        decompress = 1;
+
+    /*
+     * Initialize libjpeg.
+     */
+    if ( decompress ) {
+        if (!TIFFjpeg_create_decompress(sp))
+            return (0);
+
+    } else {
+        if (!TIFFjpeg_create_compress(sp))
+            return (0);
+    }
+
+    sp->cinfo_initialized = TRUE;
+
+    return 1;
+}
+
+int
+TIFFInitJPEG(TIFF* tif, int scheme)
+{
+	JPEGState* sp;
+
+	assert(scheme == COMPRESSION_JPEG);
+
+	/*
+	 * Allocate state block so tag methods have storage to record values.
+	 */
+	tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (JPEGState));
+
+	if (tif->tif_data == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, "TIFFInitJPEG", "No space for JPEG state block");
+		return (0);
+	}
+        _TIFFmemset( tif->tif_data, 0, sizeof(JPEGState));
+
+	sp = JState(tif);
+	sp->tif = tif;				/* back link */
+
+	/*
+	 * Merge codec-specific tag information and override parent get/set
+	 * field methods.
+	 */
+	_TIFFMergeFieldInfo(tif, jpegFieldInfo, N(jpegFieldInfo));
+	sp->vgetparent = tif->tif_tagmethods.vgetfield;
+	tif->tif_tagmethods.vgetfield = JPEGVGetField; /* hook for codec tags */
+	sp->vsetparent = tif->tif_tagmethods.vsetfield;
+	tif->tif_tagmethods.vsetfield = JPEGVSetField; /* hook for codec tags */
+	tif->tif_tagmethods.printdir = JPEGPrintDir;   /* hook for codec tags */
+
+	/* Default values for codec-specific fields */
+	sp->jpegtables = NULL;
+	sp->jpegtables_length = 0;
+	sp->jpegquality = 75;			/* Default IJG quality */
+	sp->jpegcolormode = JPEGCOLORMODE_RAW;
+	sp->jpegtablesmode = JPEGTABLESMODE_QUANT | JPEGTABLESMODE_HUFF;
+
+        sp->recvparams = 0;
+        sp->subaddress = NULL;
+        sp->faxdcs = NULL;
+
+        sp->ycbcrsampling_fetched = 0;
+
+	/*
+	 * Install codec methods.
+	 */
+	tif->tif_setupdecode = JPEGSetupDecode;
+	tif->tif_predecode = JPEGPreDecode;
+	tif->tif_decoderow = JPEGDecode;
+	tif->tif_decodestrip = JPEGDecode;
+	tif->tif_decodetile = JPEGDecode;
+	tif->tif_setupencode = JPEGSetupEncode;
+	tif->tif_preencode = JPEGPreEncode;
+	tif->tif_postencode = JPEGPostEncode;
+	tif->tif_encoderow = JPEGEncode;
+	tif->tif_encodestrip = JPEGEncode;
+	tif->tif_encodetile = JPEGEncode;
+	tif->tif_cleanup = JPEGCleanup;
+	sp->defsparent = tif->tif_defstripsize;
+	tif->tif_defstripsize = JPEGDefaultStripSize;
+	sp->deftparent = tif->tif_deftilesize;
+	tif->tif_deftilesize = JPEGDefaultTileSize;
+	tif->tif_flags |= TIFF_NOBITREV;	/* no bit reversal, please */
+
+        sp->cinfo_initialized = FALSE;
+
+	/*
+        ** Create a JPEGTables field if no directory has yet been created. 
+        ** We do this just to ensure that sufficient space is reserved for
+        ** the JPEGTables field.  It will be properly created the right
+        ** size later. 
+        */
+        if( tif->tif_diroff == 0 )
+        {
+#define SIZE_OF_JPEGTABLES 2000
+            TIFFSetFieldBit(tif, FIELD_JPEGTABLES);
+            sp->jpegtables_length = SIZE_OF_JPEGTABLES;
+            sp->jpegtables = (void *) _TIFFmalloc(sp->jpegtables_length);
+	    _TIFFmemset(sp->jpegtables, 0, SIZE_OF_JPEGTABLES);
+#undef SIZE_OF_JPEGTABLES
+        }
+
+        /*
+         * Mark the TIFFTAG_YCBCRSAMPLES as present even if it is not
+         * see: JPEGFixupTestSubsampling().
+         */
+        TIFFSetFieldBit( tif, FIELD_YCBCRSUBSAMPLING );
+
+	return 1;
+}
+#endif /* JPEG_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_luv.c b/src/libtiff/tif_luv.c
new file mode 100644
index 0000000..484bc4f
--- /dev/null
+++ b/src/libtiff/tif_luv.c
@@ -0,0 +1,1606 @@
+/* $Id: tif_luv.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1997 Greg Ward Larson
+ * Copyright (c) 1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler, Greg Larson and Silicon Graphics may not be used in any
+ * advertising or publicity relating to the software without the specific,
+ * prior written permission of Sam Leffler, Greg Larson and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER, GREG LARSON OR SILICON GRAPHICS BE LIABLE
+ * FOR ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef LOGLUV_SUPPORT
+
+/*
+ * TIFF Library.
+ * LogLuv compression support for high dynamic range images.
+ *
+ * Contributed by Greg Larson.
+ *
+ * LogLuv image support uses the TIFF library to store 16 or 10-bit
+ * log luminance values with 8 bits each of u and v or a 14-bit index.
+ *
+ * The codec can take as input and produce as output 32-bit IEEE float values 
+ * as well as 16-bit integer values.  A 16-bit luminance is interpreted
+ * as a sign bit followed by a 15-bit integer that is converted
+ * to and from a linear magnitude using the transformation:
+ *
+ *	L = 2^( (Le+.5)/256 - 64 )		# real from 15-bit
+ *
+ *	Le = floor( 256*(log2(L) + 64) )	# 15-bit from real
+ *
+ * The actual conversion to world luminance units in candelas per sq. meter
+ * requires an additional multiplier, which is stored in the TIFFTAG_STONITS.
+ * This value is usually set such that a reasonable exposure comes from
+ * clamping decoded luminances above 1 to 1 in the displayed image.
+ *
+ * The 16-bit values for u and v may be converted to real values by dividing
+ * each by 32768.  (This allows for negative values, which aren't useful as
+ * far as we know, but are left in case of future improvements in human
+ * color vision.)
+ *
+ * Conversion from (u,v), which is actually the CIE (u',v') system for
+ * you color scientists, is accomplished by the following transformation:
+ *
+ *	u = 4*x / (-2*x + 12*y + 3)
+ *	v = 9*y / (-2*x + 12*y + 3)
+ *
+ *	x = 9*u / (6*u - 16*v + 12)
+ *	y = 4*v / (6*u - 16*v + 12)
+ *
+ * This process is greatly simplified by passing 32-bit IEEE floats
+ * for each of three CIE XYZ coordinates.  The codec then takes care
+ * of conversion to and from LogLuv, though the application is still
+ * responsible for interpreting the TIFFTAG_STONITS calibration factor.
+ *
+ * By definition, a CIE XYZ vector of [1 1 1] corresponds to a neutral white
+ * point of (x,y)=(1/3,1/3).  However, most color systems assume some other
+ * white point, such as D65, and an absolute color conversion to XYZ then
+ * to another color space with a different white point may introduce an
+ * unwanted color cast to the image.  It is often desirable, therefore, to
+ * perform a white point conversion that maps the input white to [1 1 1]
+ * in XYZ, then record the original white point using the TIFFTAG_WHITEPOINT
+ * tag value.  A decoder that demands absolute color calibration may use
+ * this white point tag to get back the original colors, but usually it
+ * will be ignored and the new white point will be used instead that
+ * matches the output color space.
+ *
+ * Pixel information is compressed into one of two basic encodings, depending
+ * on the setting of the compression tag, which is one of COMPRESSION_SGILOG
+ * or COMPRESSION_SGILOG24.  For COMPRESSION_SGILOG, greyscale data is
+ * stored as:
+ *
+ *	 1       15
+ *	|-+---------------|
+ *
+ * COMPRESSION_SGILOG color data is stored as:
+ *
+ *	 1       15           8        8
+ *	|-+---------------|--------+--------|
+ *	 S       Le           ue       ve
+ *
+ * For the 24-bit COMPRESSION_SGILOG24 color format, the data is stored as:
+ *
+ *	     10           14
+ *	|----------|--------------|
+ *	     Le'          Ce
+ *
+ * There is no sign bit in the 24-bit case, and the (u,v) chromaticity is
+ * encoded as an index for optimal color resolution.  The 10 log bits are
+ * defined by the following conversions:
+ *
+ *	L = 2^((Le'+.5)/64 - 12)		# real from 10-bit
+ *
+ *	Le' = floor( 64*(log2(L) + 12) )	# 10-bit from real
+ *
+ * The 10 bits of the smaller format may be converted into the 15 bits of
+ * the larger format by multiplying by 4 and adding 13314.  Obviously,
+ * a smaller range of magnitudes is covered (about 5 orders of magnitude
+ * instead of 38), and the lack of a sign bit means that negative luminances
+ * are not allowed.  (Well, they aren't allowed in the real world, either,
+ * but they are useful for certain types of image processing.)
+ *
+ * The desired user format is controlled by the setting the internal
+ * pseudo tag TIFFTAG_SGILOGDATAFMT to one of:
+ *  SGILOGDATAFMT_FLOAT       = IEEE 32-bit float XYZ values
+ *  SGILOGDATAFMT_16BIT	      = 16-bit integer encodings of logL, u and v
+ * Raw data i/o is also possible using:
+ *  SGILOGDATAFMT_RAW         = 32-bit unsigned integer with encoded pixel
+ * In addition, the following decoding is provided for ease of display:
+ *  SGILOGDATAFMT_8BIT        = 8-bit default RGB gamma-corrected values
+ *
+ * For grayscale images, we provide the following data formats:
+ *  SGILOGDATAFMT_FLOAT       = IEEE 32-bit float Y values
+ *  SGILOGDATAFMT_16BIT       = 16-bit integer w/ encoded luminance
+ *  SGILOGDATAFMT_8BIT        = 8-bit gray monitor values
+ *
+ * Note that the COMPRESSION_SGILOG applies a simple run-length encoding
+ * scheme by separating the logL, u and v bytes for each row and applying
+ * a PackBits type of compression.  Since the 24-bit encoding is not
+ * adaptive, the 32-bit color format takes less space in many cases.
+ *
+ * Further control is provided over the conversion from higher-resolution
+ * formats to final encoded values through the pseudo tag
+ * TIFFTAG_SGILOGENCODE:
+ *  SGILOGENCODE_NODITHER     = do not dither encoded values
+ *  SGILOGENCODE_RANDITHER    = apply random dithering during encoding
+ *
+ * The default value of this tag is SGILOGENCODE_NODITHER for
+ * COMPRESSION_SGILOG to maximize run-length encoding and
+ * SGILOGENCODE_RANDITHER for COMPRESSION_SGILOG24 to turn
+ * quantization errors into noise.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/*
+ * State block for each open TIFF
+ * file using LogLuv compression/decompression.
+ */
+typedef	struct logLuvState LogLuvState;
+
+struct logLuvState {
+	int			user_datafmt;	/* user data format */
+	int			encode_meth;	/* encoding method */
+	int			pixel_size;	/* bytes per pixel */
+
+	tidata_t*		tbuf;		/* translation buffer */
+	int			tbuflen;	/* buffer length */
+	void (*tfunc)(LogLuvState*, tidata_t, int);
+
+	TIFFVSetMethod		vgetparent;	/* super-class method */
+	TIFFVSetMethod		vsetparent;	/* super-class method */
+};
+
+#define	DecoderState(tif)	((LogLuvState*) (tif)->tif_data)
+#define	EncoderState(tif)	((LogLuvState*) (tif)->tif_data)
+
+#define SGILOGDATAFMT_UNKNOWN	-1
+
+#define MINRUN		4	/* minimum run length */
+
+/*
+ * Decode a string of 16-bit gray pixels.
+ */
+static int
+LogL16Decode(TIFF* tif, tidata_t op, tsize_t occ, tsample_t s)
+{
+	LogLuvState* sp = DecoderState(tif);
+	int shft, i, npixels;
+	unsigned char* bp;
+	int16* tp;
+	int16 b;
+	int cc, rc;
+
+	assert(s == 0);
+	assert(sp != NULL);
+
+	npixels = occ / sp->pixel_size;
+
+	if (sp->user_datafmt == SGILOGDATAFMT_16BIT)
+		tp = (int16*) op;
+	else {
+		assert(sp->tbuflen >= npixels);
+		tp = (int16*) sp->tbuf;
+	}
+	_TIFFmemset((tdata_t) tp, 0, npixels*sizeof (tp[0]));
+
+	bp = (unsigned char*) tif->tif_rawcp;
+	cc = tif->tif_rawcc;
+					/* get each byte string */
+	for (shft = 2*8; (shft -= 8) >= 0; ) {
+		for (i = 0; i < npixels && cc > 0; )
+			if (*bp >= 128) {		/* run */
+				rc = *bp++ + (2-128);
+				b = (int16)(*bp++ << shft);
+				cc -= 2;
+				while (rc-- && i < npixels)
+					tp[i++] |= b;
+			} else {			/* non-run */
+				rc = *bp++;		/* nul is noop */
+				while (--cc && rc-- && i < npixels)
+					tp[i++] |= (int16)*bp++ << shft;
+			}
+		if (i != npixels) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		"LogL16Decode: Not enough data at row %d (short %d pixels)",
+			    tif->tif_row, npixels - i);
+			tif->tif_rawcp = (tidata_t) bp;
+			tif->tif_rawcc = cc;
+			return (0);
+		}
+	}
+	(*sp->tfunc)(sp, op, npixels);
+	tif->tif_rawcp = (tidata_t) bp;
+	tif->tif_rawcc = cc;
+	return (1);
+}
+
+/*
+ * Decode a string of 24-bit pixels.
+ */
+static int
+LogLuvDecode24(TIFF* tif, tidata_t op, tsize_t occ, tsample_t s)
+{
+	LogLuvState* sp = DecoderState(tif);
+	int cc, i, npixels;
+	unsigned char* bp;
+	uint32* tp;
+
+	assert(s == 0);
+	assert(sp != NULL);
+
+	npixels = occ / sp->pixel_size;
+
+	if (sp->user_datafmt == SGILOGDATAFMT_RAW)
+		tp = (uint32 *)op;
+	else {
+		assert(sp->tbuflen >= npixels);
+		tp = (uint32 *) sp->tbuf;
+	}
+					/* copy to array of uint32 */
+	bp = (unsigned char*) tif->tif_rawcp;
+	cc = tif->tif_rawcc;
+	for (i = 0; i < npixels && cc > 0; i++) {
+		tp[i] = bp[0] << 16 | bp[1] << 8 | bp[2];
+		bp += 3;
+		cc -= 3;
+	}
+	tif->tif_rawcp = (tidata_t) bp;
+	tif->tif_rawcc = cc;
+	if (i != npixels) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    "LogLuvDecode24: Not enough data at row %d (short %d pixels)",
+		    tif->tif_row, npixels - i);
+		return (0);
+	}
+	(*sp->tfunc)(sp, op, npixels);
+	return (1);
+}
+
+/*
+ * Decode a string of 32-bit pixels.
+ */
+static int
+LogLuvDecode32(TIFF* tif, tidata_t op, tsize_t occ, tsample_t s)
+{
+	LogLuvState* sp;
+	int shft, i, npixels;
+	unsigned char* bp;
+	uint32* tp;
+	uint32 b;
+	int cc, rc;
+
+	assert(s == 0);
+	sp = DecoderState(tif);
+	assert(sp != NULL);
+
+	npixels = occ / sp->pixel_size;
+
+	if (sp->user_datafmt == SGILOGDATAFMT_RAW)
+		tp = (uint32*) op;
+	else {
+		assert(sp->tbuflen >= npixels);
+		tp = (uint32*) sp->tbuf;
+	}
+	_TIFFmemset((tdata_t) tp, 0, npixels*sizeof (tp[0]));
+
+	bp = (unsigned char*) tif->tif_rawcp;
+	cc = tif->tif_rawcc;
+					/* get each byte string */
+	for (shft = 4*8; (shft -= 8) >= 0; ) {
+		for (i = 0; i < npixels && cc > 0; )
+			if (*bp >= 128) {		/* run */
+				rc = *bp++ + (2-128);
+				b = (uint32)*bp++ << shft;
+				cc -= 2;
+				while (rc-- && i < npixels)
+					tp[i++] |= b;
+			} else {			/* non-run */
+				rc = *bp++;		/* nul is noop */
+				while (--cc && rc-- && i < npixels)
+					tp[i++] |= (uint32)*bp++ << shft;
+			}
+		if (i != npixels) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		"LogLuvDecode32: Not enough data at row %d (short %d pixels)",
+			    tif->tif_row, npixels - i);
+			tif->tif_rawcp = (tidata_t) bp;
+			tif->tif_rawcc = cc;
+			return (0);
+		}
+	}
+	(*sp->tfunc)(sp, op, npixels);
+	tif->tif_rawcp = (tidata_t) bp;
+	tif->tif_rawcc = cc;
+	return (1);
+}
+
+/*
+ * Decode a strip of pixels.  We break it into rows to
+ * maintain synchrony with the encode algorithm, which
+ * is row by row.
+ */
+static int
+LogLuvDecodeStrip(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	tsize_t rowlen = TIFFScanlineSize(tif);
+
+	assert(cc%rowlen == 0);
+	while (cc && (*tif->tif_decoderow)(tif, bp, rowlen, s))
+		bp += rowlen, cc -= rowlen;
+	return (cc == 0);
+}
+
+/*
+ * Decode a tile of pixels.  We break it into rows to
+ * maintain synchrony with the encode algorithm, which
+ * is row by row.
+ */
+static int
+LogLuvDecodeTile(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	tsize_t rowlen = TIFFTileRowSize(tif);
+
+	assert(cc%rowlen == 0);
+	while (cc && (*tif->tif_decoderow)(tif, bp, rowlen, s))
+		bp += rowlen, cc -= rowlen;
+	return (cc == 0);
+}
+
+/*
+ * Encode a row of 16-bit pixels.
+ */
+static int
+LogL16Encode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	LogLuvState* sp = EncoderState(tif);
+	int shft, i, j, npixels;
+	tidata_t op;
+	int16* tp;
+	int16 b;
+	int occ, rc=0, mask, beg;
+
+	assert(s == 0);
+	assert(sp != NULL);
+	npixels = cc / sp->pixel_size;
+
+	if (sp->user_datafmt == SGILOGDATAFMT_16BIT)
+		tp = (int16*) bp;
+	else {
+		tp = (int16*) sp->tbuf;
+		assert(sp->tbuflen >= npixels);
+		(*sp->tfunc)(sp, bp, npixels);
+	}
+					/* compress each byte string */
+	op = tif->tif_rawcp;
+	occ = tif->tif_rawdatasize - tif->tif_rawcc;
+	for (shft = 2*8; (shft -= 8) >= 0; )
+		for (i = 0; i < npixels; i += rc) {
+			if (occ < 4) {
+				tif->tif_rawcp = op;
+				tif->tif_rawcc = tif->tif_rawdatasize - occ;
+				if (!TIFFFlushData1(tif))
+					return (-1);
+				op = tif->tif_rawcp;
+				occ = tif->tif_rawdatasize - tif->tif_rawcc;
+			}
+			mask = 0xff << shft;		/* find next run */
+			for (beg = i; beg < npixels; beg += rc) {
+				b = (int16) (tp[beg] & mask);
+				rc = 1;
+				while (rc < 127+2 && beg+rc < npixels &&
+						(tp[beg+rc] & mask) == b)
+					rc++;
+				if (rc >= MINRUN)
+					break;		/* long enough */
+			}
+			if (beg-i > 1 && beg-i < MINRUN) {
+				b = (int16) (tp[i] & mask);/*check short run */
+				j = i+1;
+				while ((tp[j++] & mask) == b)
+                                    if (j == beg) {
+                                        *op++ = (tidataval_t)(128-2+j-i);
+                                        *op++ = (tidataval_t) (b >> shft);
+                                        occ -= 2;
+                                        i = beg;
+                                        break;
+                                    }
+			}
+			while (i < beg) {		/* write out non-run */
+				if ((j = beg-i) > 127) j = 127;
+				if (occ < j+3) {
+                                    tif->tif_rawcp = op;
+                                    tif->tif_rawcc = tif->tif_rawdatasize - occ;
+                                    if (!TIFFFlushData1(tif))
+                                        return (-1);
+                                    op = tif->tif_rawcp;
+                                    occ = tif->tif_rawdatasize - tif->tif_rawcc;
+				}
+				*op++ = (tidataval_t) j; occ--;
+				while (j--) {
+					*op++ = (tidataval_t) (tp[i++] >> shft & 0xff);
+					occ--;
+				}
+			}
+			if (rc >= MINRUN) {		/* write out run */
+				*op++ = (tidataval_t) (128-2+rc);
+				*op++ = (tidataval_t) (tp[beg] >> shft & 0xff);
+				occ -= 2;
+			} else
+				rc = 0;
+		}
+	tif->tif_rawcp = op;
+	tif->tif_rawcc = tif->tif_rawdatasize - occ;
+
+	return (0);
+}
+
+/*
+ * Encode a row of 24-bit pixels.
+ */
+static int
+LogLuvEncode24(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	LogLuvState* sp = EncoderState(tif);
+	int i, npixels, occ;
+	tidata_t op;
+	uint32* tp;
+
+	assert(s == 0);
+	assert(sp != NULL);
+	npixels = cc / sp->pixel_size;
+
+	if (sp->user_datafmt == SGILOGDATAFMT_RAW)
+		tp = (uint32*) bp;
+	else {
+		tp = (uint32*) sp->tbuf;
+		assert(sp->tbuflen >= npixels);
+		(*sp->tfunc)(sp, bp, npixels);
+	}
+					/* write out encoded pixels */
+	op = tif->tif_rawcp;
+	occ = tif->tif_rawdatasize - tif->tif_rawcc;
+	for (i = npixels; i--; ) {
+		if (occ < 3) {
+			tif->tif_rawcp = op;
+			tif->tif_rawcc = tif->tif_rawdatasize - occ;
+			if (!TIFFFlushData1(tif))
+				return (-1);
+			op = tif->tif_rawcp;
+			occ = tif->tif_rawdatasize - tif->tif_rawcc;
+		}
+		*op++ = (tidataval_t)(*tp >> 16);
+		*op++ = (tidataval_t)(*tp >> 8 & 0xff);
+		*op++ = (tidataval_t)(*tp++ & 0xff);
+		occ -= 3;
+	}
+	tif->tif_rawcp = op;
+	tif->tif_rawcc = tif->tif_rawdatasize - occ;
+
+	return (0);
+}
+
+/*
+ * Encode a row of 32-bit pixels.
+ */
+static int
+LogLuvEncode32(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	LogLuvState* sp = EncoderState(tif);
+	int shft, i, j, npixels;
+	tidata_t op;
+	uint32* tp;
+	uint32 b;
+	int occ, rc=0, mask, beg;
+
+	assert(s == 0);
+	assert(sp != NULL);
+
+	npixels = cc / sp->pixel_size;
+
+	if (sp->user_datafmt == SGILOGDATAFMT_RAW)
+		tp = (uint32*) bp;
+	else {
+		tp = (uint32*) sp->tbuf;
+		assert(sp->tbuflen >= npixels);
+		(*sp->tfunc)(sp, bp, npixels);
+	}
+					/* compress each byte string */
+	op = tif->tif_rawcp;
+	occ = tif->tif_rawdatasize - tif->tif_rawcc;
+	for (shft = 4*8; (shft -= 8) >= 0; )
+		for (i = 0; i < npixels; i += rc) {
+			if (occ < 4) {
+				tif->tif_rawcp = op;
+				tif->tif_rawcc = tif->tif_rawdatasize - occ;
+				if (!TIFFFlushData1(tif))
+					return (-1);
+				op = tif->tif_rawcp;
+				occ = tif->tif_rawdatasize - tif->tif_rawcc;
+			}
+			mask = 0xff << shft;		/* find next run */
+			for (beg = i; beg < npixels; beg += rc) {
+				b = tp[beg] & mask;
+				rc = 1;
+				while (rc < 127+2 && beg+rc < npixels &&
+						(tp[beg+rc] & mask) == b)
+					rc++;
+				if (rc >= MINRUN)
+					break;		/* long enough */
+			}
+			if (beg-i > 1 && beg-i < MINRUN) {
+				b = tp[i] & mask;	/* check short run */
+				j = i+1;
+				while ((tp[j++] & mask) == b)
+					if (j == beg) {
+						*op++ = (tidataval_t)(128-2+j-i);
+						*op++ = (tidataval_t)(b >> shft);
+						occ -= 2;
+						i = beg;
+						break;
+					}
+			}
+			while (i < beg) {		/* write out non-run */
+				if ((j = beg-i) > 127) j = 127;
+				if (occ < j+3) {
+					tif->tif_rawcp = op;
+					tif->tif_rawcc = tif->tif_rawdatasize - occ;
+					if (!TIFFFlushData1(tif))
+						return (-1);
+					op = tif->tif_rawcp;
+					occ = tif->tif_rawdatasize - tif->tif_rawcc;
+				}
+				*op++ = (tidataval_t) j; occ--;
+				while (j--) {
+					*op++ = (tidataval_t)(tp[i++] >> shft & 0xff);
+					occ--;
+				}
+			}
+			if (rc >= MINRUN) {		/* write out run */
+				*op++ = (tidataval_t) (128-2+rc);
+				*op++ = (tidataval_t)(tp[beg] >> shft & 0xff);
+				occ -= 2;
+			} else
+				rc = 0;
+		}
+	tif->tif_rawcp = op;
+	tif->tif_rawcc = tif->tif_rawdatasize - occ;
+
+	return (0);
+}
+
+/*
+ * Encode a strip of pixels.  We break it into rows to
+ * avoid encoding runs across row boundaries.
+ */
+static int
+LogLuvEncodeStrip(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	tsize_t rowlen = TIFFScanlineSize(tif);
+
+	assert(cc%rowlen == 0);
+	while (cc && (*tif->tif_encoderow)(tif, bp, rowlen, s) == 0)
+		bp += rowlen, cc -= rowlen;
+	return (cc == 0);
+}
+
+/*
+ * Encode a tile of pixels.  We break it into rows to
+ * avoid encoding runs across row boundaries.
+ */
+static int
+LogLuvEncodeTile(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	tsize_t rowlen = TIFFTileRowSize(tif);
+
+	assert(cc%rowlen == 0);
+	while (cc && (*tif->tif_encoderow)(tif, bp, rowlen, s) == 0)
+		bp += rowlen, cc -= rowlen;
+	return (cc == 0);
+}
+
+/*
+ * Encode/Decode functions for converting to and from user formats.
+ */
+
+#include "uvcode.h"
+
+#ifndef UVSCALE
+#define U_NEU		0.210526316
+#define V_NEU		0.473684211
+#define UVSCALE		410.
+#endif
+
+#ifndef	M_LN2
+#define M_LN2		0.69314718055994530942
+#endif
+#ifndef M_PI
+#define M_PI		3.14159265358979323846
+#endif
+#define log2(x)		((1./M_LN2)*log(x))
+#define exp2(x)		exp(M_LN2*(x))
+
+#define itrunc(x,m)	((m)==SGILOGENCODE_NODITHER ? \
+				(int)(x) : \
+				(int)((x) + rand()*(1./RAND_MAX) - .5))
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+double
+LogL16toY(int p16)		/* compute luminance from 16-bit LogL */
+{
+	int	Le = p16 & 0x7fff;
+	double	Y;
+
+	if (!Le)
+		return (0.);
+	Y = exp(M_LN2/256.*(Le+.5) - M_LN2*64.);
+	return (!(p16 & 0x8000) ? Y : -Y);
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+int
+LogL16fromY(double Y, int em)	/* get 16-bit LogL from Y */
+{
+	if (Y >= 1.8371976e19)
+		return (0x7fff);
+	if (Y <= -1.8371976e19)
+		return (0xffff);
+	if (Y > 5.4136769e-20)
+		return itrunc(256.*(log2(Y) + 64.), em);
+	if (Y < -5.4136769e-20)
+		return (~0x7fff | itrunc(256.*(log2(-Y) + 64.), em));
+	return (0);
+}
+
+static void
+L16toY(LogLuvState* sp, tidata_t op, int n)
+{
+	int16* l16 = (int16*) sp->tbuf;
+	float* yp = (float*) op;
+
+	while (n-- > 0)
+		*yp++ = (float)LogL16toY(*l16++);
+}
+
+static void
+L16toGry(LogLuvState* sp, tidata_t op, int n)
+{
+	int16* l16 = (int16*) sp->tbuf;
+	uint8* gp = (uint8*) op;
+
+	while (n-- > 0) {
+		double Y = LogL16toY(*l16++);
+		*gp++ = (uint8) ((Y <= 0.) ? 0 : (Y >= 1.) ? 255 : (int)(256.*sqrt(Y)));
+	}
+}
+
+static void
+L16fromY(LogLuvState* sp, tidata_t op, int n)
+{
+	int16* l16 = (int16*) sp->tbuf;
+	float* yp = (float*) op;
+
+	while (n-- > 0)
+		*l16++ = (int16) (LogL16fromY(*yp++, sp->encode_meth));
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+void
+XYZtoRGB24(float xyz[3], uint8 rgb[3])
+{
+	double	r, g, b;
+					/* assume CCIR-709 primaries */
+	r =  2.690*xyz[0] + -1.276*xyz[1] + -0.414*xyz[2];
+	g = -1.022*xyz[0] +  1.978*xyz[1] +  0.044*xyz[2];
+	b =  0.061*xyz[0] + -0.224*xyz[1] +  1.163*xyz[2];
+					/* assume 2.0 gamma for speed */
+	/* could use integer sqrt approx., but this is probably faster */
+	rgb[0] = (uint8)((r<=0.) ? 0 : (r >= 1.) ? 255 : (int)(256.*sqrt(r)));
+	rgb[1] = (uint8)((g<=0.) ? 0 : (g >= 1.) ? 255 : (int)(256.*sqrt(g)));
+	rgb[2] = (uint8)((b<=0.) ? 0 : (b >= 1.) ? 255 : (int)(256.*sqrt(b)));
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+double
+LogL10toY(int p10)		/* compute luminance from 10-bit LogL */
+{
+	if (p10 == 0)
+		return (0.);
+	return (exp(M_LN2/64.*(p10+.5) - M_LN2*12.));
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+int
+LogL10fromY(double Y, int em)	/* get 10-bit LogL from Y */
+{
+	if (Y >= 15.742)
+		return (0x3ff);
+	else if (Y <= .00024283)
+		return (0);
+	else
+		return itrunc(64.*(log2(Y) + 12.), em);
+}
+
+#define NANGLES		100
+#define uv2ang(u, v)	( (NANGLES*.499999999/M_PI) \
+				* atan2((v)-V_NEU,(u)-U_NEU) + .5*NANGLES )
+
+static int
+oog_encode(double u, double v)		/* encode out-of-gamut chroma */
+{
+	static int	oog_table[NANGLES];
+	static int	initialized = 0;
+	register int	i;
+	
+	if (!initialized) {		/* set up perimeter table */
+		double	eps[NANGLES], ua, va, ang, epsa;
+		int	ui, vi, ustep;
+		for (i = NANGLES; i--; )
+			eps[i] = 2.;
+		for (vi = UV_NVS; vi--; ) {
+			va = UV_VSTART + (vi+.5)*UV_SQSIZ;
+			ustep = uv_row[vi].nus-1;
+			if (vi == UV_NVS-1 || vi == 0 || ustep <= 0)
+				ustep = 1;
+			for (ui = uv_row[vi].nus-1; ui >= 0; ui -= ustep) {
+				ua = uv_row[vi].ustart + (ui+.5)*UV_SQSIZ;
+				ang = uv2ang(ua, va);
+                                i = (int) ang;
+				epsa = fabs(ang - (i+.5));
+				if (epsa < eps[i]) {
+					oog_table[i] = uv_row[vi].ncum + ui;
+					eps[i] = epsa;
+				}
+			}
+		}
+		for (i = NANGLES; i--; )	/* fill any holes */
+			if (eps[i] > 1.5) {
+				int	i1, i2;
+				for (i1 = 1; i1 < NANGLES/2; i1++)
+					if (eps[(i+i1)%NANGLES] < 1.5)
+						break;
+				for (i2 = 1; i2 < NANGLES/2; i2++)
+					if (eps[(i+NANGLES-i2)%NANGLES] < 1.5)
+						break;
+				if (i1 < i2)
+					oog_table[i] =
+						oog_table[(i+i1)%NANGLES];
+				else
+					oog_table[i] =
+						oog_table[(i+NANGLES-i2)%NANGLES];
+			}
+		initialized = 1;
+	}
+	i = (int) uv2ang(u, v);		/* look up hue angle */
+	return (oog_table[i]);
+}
+
+#undef uv2ang
+#undef NANGLES
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+int
+uv_encode(double u, double v, int em)	/* encode (u',v') coordinates */
+{
+	register int	vi, ui;
+
+	if (v < UV_VSTART)
+		return oog_encode(u, v);
+	vi = itrunc((v - UV_VSTART)*(1./UV_SQSIZ), em);
+	if (vi >= UV_NVS)
+		return oog_encode(u, v);
+	if (u < uv_row[vi].ustart)
+		return oog_encode(u, v);
+	ui = itrunc((u - uv_row[vi].ustart)*(1./UV_SQSIZ), em);
+	if (ui >= uv_row[vi].nus)
+		return oog_encode(u, v);
+
+	return (uv_row[vi].ncum + ui);
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+int
+uv_decode(double *up, double *vp, int c)	/* decode (u',v') index */
+{
+	int	upper, lower;
+	register int	ui, vi;
+
+	if (c < 0 || c >= UV_NDIVS)
+		return (-1);
+	lower = 0;				/* binary search */
+	upper = UV_NVS;
+	while (upper - lower > 1) {
+		vi = (lower + upper) >> 1;
+		ui = c - uv_row[vi].ncum;
+		if (ui > 0)
+			lower = vi;
+		else if (ui < 0)
+			upper = vi;
+		else {
+			lower = vi;
+			break;
+		}
+	}
+	vi = lower;
+	ui = c - uv_row[vi].ncum;
+	*up = uv_row[vi].ustart + (ui+.5)*UV_SQSIZ;
+	*vp = UV_VSTART + (vi+.5)*UV_SQSIZ;
+	return (0);
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+void
+LogLuv24toXYZ(uint32 p, float XYZ[3])
+{
+	int	Ce;
+	double	L, u, v, s, x, y;
+					/* decode luminance */
+	L = LogL10toY(p>>14 & 0x3ff);
+	if (L <= 0.) {
+		XYZ[0] = XYZ[1] = XYZ[2] = 0.;
+		return;
+	}
+					/* decode color */
+	Ce = p & 0x3fff;
+	if (uv_decode(&u, &v, Ce) < 0) {
+		u = U_NEU; v = V_NEU;
+	}
+	s = 1./(6.*u - 16.*v + 12.);
+	x = 9.*u * s;
+	y = 4.*v * s;
+					/* convert to XYZ */
+	XYZ[0] = (float)(x/y * L);
+	XYZ[1] = (float)L;
+	XYZ[2] = (float)((1.-x-y)/y * L);
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+uint32
+LogLuv24fromXYZ(float XYZ[3], int em)
+{
+	int	Le, Ce;
+	double	u, v, s;
+					/* encode luminance */
+	Le = LogL10fromY(XYZ[1], em);
+					/* encode color */
+	s = XYZ[0] + 15.*XYZ[1] + 3.*XYZ[2];
+	if (!Le || s <= 0.) {
+		u = U_NEU;
+		v = V_NEU;
+	} else {
+		u = 4.*XYZ[0] / s;
+		v = 9.*XYZ[1] / s;
+	}
+	Ce = uv_encode(u, v, em);
+	if (Ce < 0)			/* never happens */
+		Ce = uv_encode(U_NEU, V_NEU, SGILOGENCODE_NODITHER);
+					/* combine encodings */
+	return (Le << 14 | Ce);
+}
+
+static void
+Luv24toXYZ(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	float* xyz = (float*) op;
+
+	while (n-- > 0) {
+		LogLuv24toXYZ(*luv, xyz);
+		xyz += 3;
+		luv++;
+	}
+}
+
+static void
+Luv24toLuv48(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	int16* luv3 = (int16*) op;
+
+	while (n-- > 0) {
+		double u, v;
+
+		*luv3++ = (int16)((*luv >> 12 & 0xffd) + 13314);
+		if (uv_decode(&u, &v, *luv&0x3fff) < 0) {
+			u = U_NEU;
+			v = V_NEU;
+		}
+		*luv3++ = (int16)(u * (1L<<15));
+		*luv3++ = (int16)(v * (1L<<15));
+		luv++;
+	}
+}
+
+static void
+Luv24toRGB(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	uint8* rgb = (uint8*) op;
+
+	while (n-- > 0) {
+		float xyz[3];
+
+		LogLuv24toXYZ(*luv++, xyz);
+		XYZtoRGB24(xyz, rgb);
+		rgb += 3;
+	}
+}
+
+static void
+Luv24fromXYZ(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	float* xyz = (float*) op;
+
+	while (n-- > 0) {
+		*luv++ = LogLuv24fromXYZ(xyz, sp->encode_meth);
+		xyz += 3;
+	}
+}
+
+static void
+Luv24fromLuv48(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	int16* luv3 = (int16*) op;
+
+	while (n-- > 0) {
+		int Le, Ce;
+
+		if (luv3[0] <= 0)
+			Le = 0;
+		else if (luv3[0] >= (1<<12)+3314)
+			Le = (1<<10) - 1;
+		else if (sp->encode_meth == SGILOGENCODE_NODITHER)
+			Le = (luv3[0]-3314) >> 2;
+		else
+			Le = itrunc(.25*(luv3[0]-3314.), sp->encode_meth);
+
+		Ce = uv_encode((luv3[1]+.5)/(1<<15), (luv3[2]+.5)/(1<<15),
+					sp->encode_meth);
+		if (Ce < 0)	/* never happens */
+			Ce = uv_encode(U_NEU, V_NEU, SGILOGENCODE_NODITHER);
+		*luv++ = (uint32)Le << 14 | Ce;
+		luv3 += 3;
+	}
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+void
+LogLuv32toXYZ(uint32 p, float XYZ[3])
+{
+	double	L, u, v, s, x, y;
+					/* decode luminance */
+	L = LogL16toY((int)p >> 16);
+	if (L <= 0.) {
+		XYZ[0] = XYZ[1] = XYZ[2] = 0.;
+		return;
+	}
+					/* decode color */
+	u = 1./UVSCALE * ((p>>8 & 0xff) + .5);
+	v = 1./UVSCALE * ((p & 0xff) + .5);
+	s = 1./(6.*u - 16.*v + 12.);
+	x = 9.*u * s;
+	y = 4.*v * s;
+					/* convert to XYZ */
+	XYZ[0] = (float)(x/y * L);
+	XYZ[1] = (float)L;
+	XYZ[2] = (float)((1.-x-y)/y * L);
+}
+
+#if !LOGLUV_PUBLIC
+static
+#endif
+uint32
+LogLuv32fromXYZ(float XYZ[3], int em)
+{
+	unsigned int	Le, ue, ve;
+	double	u, v, s;
+					/* encode luminance */
+	Le = (unsigned int)LogL16fromY(XYZ[1], em);
+					/* encode color */
+	s = XYZ[0] + 15.*XYZ[1] + 3.*XYZ[2];
+	if (!Le || s <= 0.) {
+		u = U_NEU;
+		v = V_NEU;
+	} else {
+		u = 4.*XYZ[0] / s;
+		v = 9.*XYZ[1] / s;
+	}
+	if (u <= 0.) ue = 0;
+	else ue = itrunc(UVSCALE*u, em);
+	if (ue > 255) ue = 255;
+	if (v <= 0.) ve = 0;
+	else ve = itrunc(UVSCALE*v, em);
+	if (ve > 255) ve = 255;
+					/* combine encodings */
+	return (Le << 16 | ue << 8 | ve);
+}
+
+static void
+Luv32toXYZ(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	float* xyz = (float*) op;
+
+	while (n-- > 0) {
+		LogLuv32toXYZ(*luv++, xyz);
+		xyz += 3;
+	}
+}
+
+static void
+Luv32toLuv48(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	int16* luv3 = (int16*) op;
+
+	while (n-- > 0) {
+		double u, v;
+
+		*luv3++ = (int16)(*luv >> 16);
+		u = 1./UVSCALE * ((*luv>>8 & 0xff) + .5);
+		v = 1./UVSCALE * ((*luv & 0xff) + .5);
+		*luv3++ = (int16)(u * (1L<<15));
+		*luv3++ = (int16)(v * (1L<<15));
+		luv++;
+	}
+}
+
+static void
+Luv32toRGB(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	uint8* rgb = (uint8*) op;
+
+	while (n-- > 0) {
+		float xyz[3];
+
+		LogLuv32toXYZ(*luv++, xyz);
+		XYZtoRGB24(xyz, rgb);
+		rgb += 3;
+	}
+}
+
+static void
+Luv32fromXYZ(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	float* xyz = (float*) op;
+
+	while (n-- > 0) {
+		*luv++ = LogLuv32fromXYZ(xyz, sp->encode_meth);
+		xyz += 3;
+	}
+}
+
+static void
+Luv32fromLuv48(LogLuvState* sp, tidata_t op, int n)
+{
+	uint32* luv = (uint32*) sp->tbuf;
+	int16* luv3 = (int16*) op;
+
+	if (sp->encode_meth == SGILOGENCODE_NODITHER) {
+		while (n-- > 0) {
+			*luv++ = (uint32)luv3[0] << 16 |
+				(luv3[1]*(uint32)(UVSCALE+.5) >> 7 & 0xff00) |
+				(luv3[2]*(uint32)(UVSCALE+.5) >> 15 & 0xff);
+			luv3 += 3;
+		}
+		return;
+	}
+	while (n-- > 0) {
+		*luv++ = (uint32)luv3[0] << 16 |
+	(itrunc(luv3[1]*(UVSCALE/(1<<15)), sp->encode_meth) << 8 & 0xff00) |
+		(itrunc(luv3[2]*(UVSCALE/(1<<15)), sp->encode_meth) & 0xff);
+		luv3 += 3;
+	}
+}
+
+static void
+_logLuvNop(LogLuvState* sp, tidata_t op, int n)
+{
+	(void) sp; (void) op; (void) n;
+}
+
+static int
+LogL16GuessDataFmt(TIFFDirectory *td)
+{
+#define	PACK(s,b,f)	(((b)<<6)|((s)<<3)|(f))
+	switch (PACK(td->td_samplesperpixel, td->td_bitspersample, td->td_sampleformat)) {
+	case PACK(1, 32, SAMPLEFORMAT_IEEEFP):
+		return (SGILOGDATAFMT_FLOAT);
+	case PACK(1, 16, SAMPLEFORMAT_VOID):
+	case PACK(1, 16, SAMPLEFORMAT_INT):
+	case PACK(1, 16, SAMPLEFORMAT_UINT):
+		return (SGILOGDATAFMT_16BIT);
+	case PACK(1,  8, SAMPLEFORMAT_VOID):
+	case PACK(1,  8, SAMPLEFORMAT_UINT):
+		return (SGILOGDATAFMT_8BIT);
+	}
+#undef PACK
+	return (SGILOGDATAFMT_UNKNOWN);
+}
+
+static uint32
+multiply(size_t m1, size_t m2)
+{
+	uint32	bytes = m1 * m2;
+
+	if (m1 && bytes / m1 != m2)
+		bytes = 0;
+
+	return bytes;
+}
+
+static int
+LogL16InitState(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	LogLuvState* sp = DecoderState(tif);
+	static const char module[] = "LogL16InitState";
+
+	assert(sp != NULL);
+	assert(td->td_photometric == PHOTOMETRIC_LOGL);
+
+	/* for some reason, we can't do this in TIFFInitLogL16 */
+	if (sp->user_datafmt == SGILOGDATAFMT_UNKNOWN)
+		sp->user_datafmt = LogL16GuessDataFmt(td);
+	switch (sp->user_datafmt) {
+	case SGILOGDATAFMT_FLOAT:
+		sp->pixel_size = sizeof (float);
+		break;
+	case SGILOGDATAFMT_16BIT:
+		sp->pixel_size = sizeof (int16);
+		break;
+	case SGILOGDATAFMT_8BIT:
+		sp->pixel_size = sizeof (uint8);
+		break;
+	default:
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "No support for converting user data format to LogL");
+		return (0);
+	}
+	sp->tbuflen = multiply(td->td_imagewidth, td->td_rowsperstrip);
+	if (multiply(sp->tbuflen, sizeof (int16)) == 0 ||
+	    (sp->tbuf = (tidata_t*) _TIFFmalloc(sp->tbuflen * sizeof (int16))) == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: No space for SGILog translation buffer",
+		    tif->tif_name);
+		return (0);
+	}
+	return (1);
+}
+
+static int
+LogLuvGuessDataFmt(TIFFDirectory *td)
+{
+	int guess;
+
+	/*
+	 * If the user didn't tell us their datafmt,
+	 * take our best guess from the bitspersample.
+	 */
+#define	PACK(a,b)	(((a)<<3)|(b))
+	switch (PACK(td->td_bitspersample, td->td_sampleformat)) {
+	case PACK(32, SAMPLEFORMAT_IEEEFP):
+		guess = SGILOGDATAFMT_FLOAT;
+		break;
+	case PACK(32, SAMPLEFORMAT_VOID):
+	case PACK(32, SAMPLEFORMAT_UINT):
+	case PACK(32, SAMPLEFORMAT_INT):
+		guess = SGILOGDATAFMT_RAW;
+		break;
+	case PACK(16, SAMPLEFORMAT_VOID):
+	case PACK(16, SAMPLEFORMAT_INT):
+	case PACK(16, SAMPLEFORMAT_UINT):
+		guess = SGILOGDATAFMT_16BIT;
+		break;
+	case PACK( 8, SAMPLEFORMAT_VOID):
+	case PACK( 8, SAMPLEFORMAT_UINT):
+		guess = SGILOGDATAFMT_8BIT;
+		break;
+	default:
+		guess = SGILOGDATAFMT_UNKNOWN;
+		break;
+#undef PACK
+	}
+	/*
+	 * Double-check samples per pixel.
+	 */
+	switch (td->td_samplesperpixel) {
+	case 1:
+		if (guess != SGILOGDATAFMT_RAW)
+			guess = SGILOGDATAFMT_UNKNOWN;
+		break;
+	case 3:
+		if (guess == SGILOGDATAFMT_RAW)
+			guess = SGILOGDATAFMT_UNKNOWN;
+		break;
+	default:
+		guess = SGILOGDATAFMT_UNKNOWN;
+		break;
+	}
+	return (guess);
+}
+
+static int
+LogLuvInitState(TIFF* tif)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+	LogLuvState* sp = DecoderState(tif);
+	static const char module[] = "LogLuvInitState";
+
+	assert(sp != NULL);
+	assert(td->td_photometric == PHOTOMETRIC_LOGLUV);
+
+	/* for some reason, we can't do this in TIFFInitLogLuv */
+	if (td->td_planarconfig != PLANARCONFIG_CONTIG) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+		    "SGILog compression cannot handle non-contiguous data");
+		return (0);
+	}
+	if (sp->user_datafmt == SGILOGDATAFMT_UNKNOWN)
+		sp->user_datafmt = LogLuvGuessDataFmt(td);
+	switch (sp->user_datafmt) {
+	case SGILOGDATAFMT_FLOAT:
+		sp->pixel_size = 3*sizeof (float);
+		break;
+	case SGILOGDATAFMT_16BIT:
+		sp->pixel_size = 3*sizeof (int16);
+		break;
+	case SGILOGDATAFMT_RAW:
+		sp->pixel_size = sizeof (uint32);
+		break;
+	case SGILOGDATAFMT_8BIT:
+		sp->pixel_size = 3*sizeof (uint8);
+		break;
+	default:
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "No support for converting user data format to LogLuv");
+		return (0);
+	}
+	sp->tbuflen = multiply(td->td_imagewidth, td->td_rowsperstrip);
+	if (multiply(sp->tbuflen, sizeof (uint32)) == 0 ||
+	    (sp->tbuf = (tidata_t*) _TIFFmalloc(sp->tbuflen * sizeof (uint32))) == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: No space for SGILog translation buffer",
+		    tif->tif_name);
+		return (0);
+	}
+	return (1);
+}
+
+static int
+LogLuvSetupDecode(TIFF* tif)
+{
+	LogLuvState* sp = DecoderState(tif);
+	TIFFDirectory* td = &tif->tif_dir;
+
+	tif->tif_postdecode = _TIFFNoPostDecode;
+	switch (td->td_photometric) {
+	case PHOTOMETRIC_LOGLUV:
+		if (!LogLuvInitState(tif))
+			break;
+		if (td->td_compression == COMPRESSION_SGILOG24) {
+			tif->tif_decoderow = LogLuvDecode24;
+			switch (sp->user_datafmt) {
+			case SGILOGDATAFMT_FLOAT:
+				sp->tfunc = Luv24toXYZ;
+				break;
+			case SGILOGDATAFMT_16BIT:
+				sp->tfunc = Luv24toLuv48;
+				break;
+			case SGILOGDATAFMT_8BIT:
+				sp->tfunc = Luv24toRGB;
+				break;
+			}
+		} else {
+			tif->tif_decoderow = LogLuvDecode32;
+			switch (sp->user_datafmt) {
+			case SGILOGDATAFMT_FLOAT:
+				sp->tfunc = Luv32toXYZ;
+				break;
+			case SGILOGDATAFMT_16BIT:
+				sp->tfunc = Luv32toLuv48;
+				break;
+			case SGILOGDATAFMT_8BIT:
+				sp->tfunc = Luv32toRGB;
+				break;
+			}
+		}
+		return (1);
+	case PHOTOMETRIC_LOGL:
+		if (!LogL16InitState(tif))
+			break;
+		tif->tif_decoderow = LogL16Decode;
+		switch (sp->user_datafmt) {
+		case SGILOGDATAFMT_FLOAT:
+			sp->tfunc = L16toY;
+			break;
+		case SGILOGDATAFMT_8BIT:
+			sp->tfunc = L16toGry;
+			break;
+		}
+		return (1);
+	default:
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+    "Inappropriate photometric interpretation %d for SGILog compression; %s",
+		    td->td_photometric, "must be either LogLUV or LogL");
+		break;
+	}
+	return (0);
+}
+
+static int
+LogLuvSetupEncode(TIFF* tif)
+{
+	LogLuvState* sp = EncoderState(tif);
+	TIFFDirectory* td = &tif->tif_dir;
+
+	switch (td->td_photometric) {
+	case PHOTOMETRIC_LOGLUV:
+		if (!LogLuvInitState(tif))
+			break;
+		if (td->td_compression == COMPRESSION_SGILOG24) {
+			tif->tif_encoderow = LogLuvEncode24;
+			switch (sp->user_datafmt) {
+			case SGILOGDATAFMT_FLOAT:
+				sp->tfunc = Luv24fromXYZ;
+				break;
+			case SGILOGDATAFMT_16BIT:
+				sp->tfunc = Luv24fromLuv48;
+				break;
+			case SGILOGDATAFMT_RAW:
+				break;
+			default:
+				goto notsupported;
+			}
+		} else {
+			tif->tif_encoderow = LogLuvEncode32;
+			switch (sp->user_datafmt) {
+			case SGILOGDATAFMT_FLOAT:
+				sp->tfunc = Luv32fromXYZ;
+				break;
+			case SGILOGDATAFMT_16BIT:
+				sp->tfunc = Luv32fromLuv48;
+				break;
+			case SGILOGDATAFMT_RAW:
+				break;
+			default:
+				goto notsupported;
+			}
+		}
+		break;
+	case PHOTOMETRIC_LOGL:
+		if (!LogL16InitState(tif))
+			break;
+		tif->tif_encoderow = LogL16Encode;
+		switch (sp->user_datafmt) {
+		case SGILOGDATAFMT_FLOAT:
+			sp->tfunc = L16fromY;
+			break;
+		case SGILOGDATAFMT_16BIT:
+			break;
+		default:
+			goto notsupported;
+		}
+		break;
+	default:
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+    "Inappropriate photometric interpretation %d for SGILog compression; %s",
+    		    td->td_photometric, "must be either LogLUV or LogL");
+		break;
+	}
+	return (1);
+notsupported:
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    "SGILog compression supported only for %s, or raw data",
+	    td->td_photometric == PHOTOMETRIC_LOGL ? "Y, L" : "XYZ, Luv");
+	return (0);
+}
+
+static void
+LogLuvClose(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	/*
+	 * For consistency, we always want to write out the same
+	 * bitspersample and sampleformat for our TIFF file,
+	 * regardless of the data format being used by the application.
+	 * Since this routine is called after tags have been set but
+	 * before they have been recorded in the file, we reset them here.
+	 */
+	td->td_samplesperpixel =
+	    (td->td_photometric == PHOTOMETRIC_LOGL) ? 1 : 3;
+	td->td_bitspersample = 16;
+	td->td_sampleformat = SAMPLEFORMAT_INT;
+}
+
+static void
+LogLuvCleanup(TIFF* tif)
+{
+	LogLuvState* sp = (LogLuvState *)tif->tif_data;
+
+	assert(sp != 0);
+
+	tif->tif_tagmethods.vgetfield = sp->vgetparent;
+	tif->tif_tagmethods.vsetfield = sp->vsetparent;
+
+	if (sp->tbuf)
+		_TIFFfree(sp->tbuf);
+	_TIFFfree(sp);
+	tif->tif_data = NULL;
+
+	_TIFFSetDefaultCompressionState(tif);
+}
+
+static int
+LogLuvVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	LogLuvState* sp = DecoderState(tif);
+	int bps, fmt;
+
+	switch (tag) {
+	case TIFFTAG_SGILOGDATAFMT:
+		sp->user_datafmt = va_arg(ap, int);
+		/*
+		 * Tweak the TIFF header so that the rest of libtiff knows what
+		 * size of data will be passed between app and library, and
+		 * assume that the app knows what it is doing and is not
+		 * confused by these header manipulations...
+		 */
+		switch (sp->user_datafmt) {
+		case SGILOGDATAFMT_FLOAT:
+			bps = 32, fmt = SAMPLEFORMAT_IEEEFP;
+			break;
+		case SGILOGDATAFMT_16BIT:
+			bps = 16, fmt = SAMPLEFORMAT_INT;
+			break;
+		case SGILOGDATAFMT_RAW:
+			bps = 32, fmt = SAMPLEFORMAT_UINT;
+			TIFFSetField(tif, TIFFTAG_SAMPLESPERPIXEL, 1);
+			break;
+		case SGILOGDATAFMT_8BIT:
+			bps = 8, fmt = SAMPLEFORMAT_UINT;
+			break;
+		default:
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "Unknown data format %d for LogLuv compression",
+			    sp->user_datafmt);
+			return (0);
+		}
+		TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, bps);
+		TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, fmt);
+		/*
+		 * Must recalculate sizes should bits/sample change.
+		 */
+		tif->tif_tilesize = isTiled(tif) ? TIFFTileSize(tif) : (tsize_t) -1;
+		tif->tif_scanlinesize = TIFFScanlineSize(tif);
+		return (1);
+	case TIFFTAG_SGILOGENCODE:
+		sp->encode_meth = va_arg(ap, int);
+		if (sp->encode_meth != SGILOGENCODE_NODITHER &&
+				sp->encode_meth != SGILOGENCODE_RANDITHER) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+				"Unknown encoding %d for LogLuv compression",
+				sp->encode_meth);
+			return (0);
+		}
+		return (1);
+	default:
+		return (*sp->vsetparent)(tif, tag, ap);
+	}
+}
+
+static int
+LogLuvVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	LogLuvState *sp = (LogLuvState *)tif->tif_data;
+
+	switch (tag) {
+	case TIFFTAG_SGILOGDATAFMT:
+		*va_arg(ap, int*) = sp->user_datafmt;
+		return (1);
+	default:
+		return (*sp->vgetparent)(tif, tag, ap);
+	}
+}
+
+static const TIFFFieldInfo LogLuvFieldInfo[] = {
+    { TIFFTAG_SGILOGDATAFMT,	  0, 0,	TIFF_SHORT,	FIELD_PSEUDO,
+      TRUE,	FALSE,	"SGILogDataFmt"},
+    { TIFFTAG_SGILOGENCODE,	  0, 0, TIFF_SHORT,	FIELD_PSEUDO,
+      TRUE,	FALSE,	"SGILogEncode"}
+};
+
+int
+TIFFInitSGILog(TIFF* tif, int scheme)
+{
+	static const char module[] = "TIFFInitSGILog";
+	LogLuvState* sp;
+
+	assert(scheme == COMPRESSION_SGILOG24 || scheme == COMPRESSION_SGILOG);
+
+	/*
+	 * Allocate state block so tag methods have storage to record values.
+	 */
+	tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LogLuvState));
+	if (tif->tif_data == NULL)
+		goto bad;
+	sp = (LogLuvState*) tif->tif_data;
+	_TIFFmemset((tdata_t)sp, 0, sizeof (*sp));
+	sp->user_datafmt = SGILOGDATAFMT_UNKNOWN;
+	sp->encode_meth = (scheme == COMPRESSION_SGILOG24) ?
+				SGILOGENCODE_RANDITHER : SGILOGENCODE_NODITHER;
+	sp->tfunc = _logLuvNop;
+
+	/*
+	 * Install codec methods.
+	 * NB: tif_decoderow & tif_encoderow are filled
+	 *     in at setup time.
+	 */
+	tif->tif_setupdecode = LogLuvSetupDecode;
+	tif->tif_decodestrip = LogLuvDecodeStrip;
+	tif->tif_decodetile = LogLuvDecodeTile;
+	tif->tif_setupencode = LogLuvSetupEncode;
+	tif->tif_encodestrip = LogLuvEncodeStrip;
+	tif->tif_encodetile = LogLuvEncodeTile;
+	tif->tif_close = LogLuvClose;
+	tif->tif_cleanup = LogLuvCleanup;
+
+	/* override SetField so we can handle our private pseudo-tag */
+	_TIFFMergeFieldInfo(tif, LogLuvFieldInfo,
+			    TIFFArrayCount(LogLuvFieldInfo));
+	sp->vgetparent = tif->tif_tagmethods.vgetfield;
+	tif->tif_tagmethods.vgetfield = LogLuvVGetField;   /* hook for codec tags */
+	sp->vsetparent = tif->tif_tagmethods.vsetfield;
+	tif->tif_tagmethods.vsetfield = LogLuvVSetField;   /* hook for codec tags */
+
+	return (1);
+bad:
+	TIFFErrorExt(tif->tif_clientdata, module,
+		     "%s: No space for LogLuv state block", tif->tif_name);
+	return (0);
+}
+#endif /* LOGLUV_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_lzw.c b/src/libtiff/tif_lzw.c
new file mode 100644
index 0000000..79d4091
--- /dev/null
+++ b/src/libtiff/tif_lzw.c
@@ -0,0 +1,1084 @@
+/* $Id: tif_lzw.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef LZW_SUPPORT
+/*
+ * TIFF Library.
+ * Rev 5.0 Lempel-Ziv & Welch Compression Support
+ *
+ * This code is derived from the compress program whose code is
+ * derived from software contributed to Berkeley by James A. Woods,
+ * derived from original work by Spencer Thomas and Joseph Orost.
+ *
+ * The original Berkeley copyright notice appears below in its entirety.
+ */
+#include "tif_predict.h"
+
+#include <stdio.h>
+
+/*
+ * NB: The 5.0 spec describes a different algorithm than Aldus
+ *     implements.  Specifically, Aldus does code length transitions
+ *     one code earlier than should be done (for real LZW).
+ *     Earlier versions of this library implemented the correct
+ *     LZW algorithm, but emitted codes in a bit order opposite
+ *     to the TIFF spec.  Thus, to maintain compatibility w/ Aldus
+ *     we interpret MSB-LSB ordered codes to be images written w/
+ *     old versions of this library, but otherwise adhere to the
+ *     Aldus "off by one" algorithm.
+ *
+ * Future revisions to the TIFF spec are expected to "clarify this issue".
+ */
+#define	LZW_COMPAT		/* include backwards compatibility code */
+/*
+ * Each strip of data is supposed to be terminated by a CODE_EOI.
+ * If the following #define is included, the decoder will also
+ * check for end-of-strip w/o seeing this code.  This makes the
+ * library more robust, but also slower.
+ */
+#define	LZW_CHECKEOS		/* include checks for strips w/o EOI code */
+
+#define MAXCODE(n)	((1L<<(n))-1)
+/*
+ * The TIFF spec specifies that encoded bit
+ * strings range from 9 to 12 bits.
+ */
+#define	BITS_MIN	9		/* start with 9 bits */
+#define	BITS_MAX	12		/* max of 12 bit strings */
+/* predefined codes */
+#define	CODE_CLEAR	256		/* code to clear string table */
+#define	CODE_EOI	257		/* end-of-information code */
+#define CODE_FIRST	258		/* first free code entry */
+#define	CODE_MAX	MAXCODE(BITS_MAX)
+#define	HSIZE		9001L		/* 91% occupancy */
+#define	HSHIFT		(13-8)
+#ifdef LZW_COMPAT
+/* NB: +1024 is for compatibility with old files */
+#define	CSIZE		(MAXCODE(BITS_MAX)+1024L)
+#else
+#define	CSIZE		(MAXCODE(BITS_MAX)+1L)
+#endif
+
+/*
+ * State block for each open TIFF file using LZW
+ * compression/decompression.  Note that the predictor
+ * state block must be first in this data structure.
+ */
+typedef	struct {
+	TIFFPredictorState predict;	/* predictor super class */
+
+	unsigned short	nbits;		/* # of bits/code */
+	unsigned short	maxcode;	/* maximum code for lzw_nbits */
+	unsigned short	free_ent;	/* next free entry in hash table */
+	long		nextdata;	/* next bits of i/o */
+	long		nextbits;	/* # of valid bits in lzw_nextdata */
+
+        int             rw_mode;        /* preserve rw_mode from init */
+} LZWBaseState;
+
+#define	lzw_nbits	base.nbits
+#define	lzw_maxcode	base.maxcode
+#define	lzw_free_ent	base.free_ent
+#define	lzw_nextdata	base.nextdata
+#define	lzw_nextbits	base.nextbits
+
+/*
+ * Encoding-specific state.
+ */
+typedef uint16 hcode_t;			/* codes fit in 16 bits */
+typedef struct {
+	long	hash;
+	hcode_t	code;
+} hash_t;
+
+/*
+ * Decoding-specific state.
+ */
+typedef struct code_ent {
+	struct code_ent *next;
+	unsigned short	length;		/* string len, including this token */
+	unsigned char	value;		/* data value */
+	unsigned char	firstchar;	/* first token of string */
+} code_t;
+
+typedef	int (*decodeFunc)(TIFF*, tidata_t, tsize_t, tsample_t);
+
+typedef struct {
+	LZWBaseState base;
+
+	/* Decoding specific data */
+	long	dec_nbitsmask;		/* lzw_nbits 1 bits, right adjusted */
+	long	dec_restart;		/* restart count */
+#ifdef LZW_CHECKEOS
+	long	dec_bitsleft;		/* available bits in raw data */
+#endif
+	decodeFunc dec_decode;		/* regular or backwards compatible */
+	code_t*	dec_codep;		/* current recognized code */
+	code_t*	dec_oldcodep;		/* previously recognized code */
+	code_t*	dec_free_entp;		/* next free entry */
+	code_t*	dec_maxcodep;		/* max available entry */
+	code_t*	dec_codetab;		/* kept separate for small machines */
+
+	/* Encoding specific data */
+	int	enc_oldcode;		/* last code encountered */
+	long	enc_checkpoint;		/* point at which to clear table */
+#define CHECK_GAP	10000		/* enc_ratio check interval */
+	long	enc_ratio;		/* current compression ratio */
+	long	enc_incount;		/* (input) data bytes encoded */
+	long	enc_outcount;		/* encoded (output) bytes */
+	tidata_t enc_rawlimit;		/* bound on tif_rawdata buffer */
+	hash_t*	enc_hashtab;		/* kept separate for small machines */
+} LZWCodecState;
+
+#define	LZWState(tif)		((LZWBaseState*) (tif)->tif_data)
+#define	DecoderState(tif)	((LZWCodecState*) LZWState(tif))
+#define	EncoderState(tif)	((LZWCodecState*) LZWState(tif))
+
+static	int LZWDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+#ifdef LZW_COMPAT
+static	int LZWDecodeCompat(TIFF*, tidata_t, tsize_t, tsample_t);
+#endif
+static  void cl_hash(LZWCodecState*);
+
+/*
+ * LZW Decoder.
+ */
+
+#ifdef LZW_CHECKEOS
+/*
+ * This check shouldn't be necessary because each
+ * strip is suppose to be terminated with CODE_EOI.
+ */
+#define	NextCode(_tif, _sp, _bp, _code, _get) {				\
+	if ((_sp)->dec_bitsleft < nbits) {				\
+		TIFFWarningExt(_tif->tif_clientdata, _tif->tif_name,				\
+		    "LZWDecode: Strip %d not terminated with EOI code", \
+		    _tif->tif_curstrip);				\
+		_code = CODE_EOI;					\
+	} else {							\
+		_get(_sp,_bp,_code);					\
+		(_sp)->dec_bitsleft -= nbits;				\
+	}								\
+}
+#else
+#define	NextCode(tif, sp, bp, code, get) get(sp, bp, code)
+#endif
+
+static int
+LZWSetupDecode(TIFF* tif)
+{
+	LZWCodecState* sp = DecoderState(tif);
+	static const char module[] = " LZWSetupDecode";
+	int code;
+
+        if( sp == NULL )
+        {
+            /*
+             * Allocate state block so tag methods have storage to record 
+			 * values.
+             */
+            tif->tif_data = (tidata_t) _TIFFmalloc(sizeof(LZWCodecState));
+            if (tif->tif_data == NULL)
+            {
+				TIFFErrorExt(tif->tif_clientdata, "LZWPreDecode", "No space for LZW state block");
+                return (0);
+            }
+
+            DecoderState(tif)->dec_codetab = NULL;
+            DecoderState(tif)->dec_decode = NULL;
+            
+            /*
+             * Setup predictor setup.
+             */
+            (void) TIFFPredictorInit(tif);
+
+            sp = DecoderState(tif);
+        }
+            
+	assert(sp != NULL);
+
+	if (sp->dec_codetab == NULL) {
+		sp->dec_codetab = (code_t*)_TIFFmalloc(CSIZE*sizeof (code_t));
+		if (sp->dec_codetab == NULL) {
+			TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW code table");
+			return (0);
+		}
+		/*
+		 * Pre-load the table.
+		 */
+                code = 255;
+                do {
+                    sp->dec_codetab[code].value = code;
+                    sp->dec_codetab[code].firstchar = code;
+                    sp->dec_codetab[code].length = 1;
+                    sp->dec_codetab[code].next = NULL;
+                } while (code--);
+	}
+	return (1);
+}
+
+/*
+ * Setup state for decoding a strip.
+ */
+static int
+LZWPreDecode(TIFF* tif, tsample_t s)
+{
+	LZWCodecState *sp = DecoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	/*
+	 * Check for old bit-reversed codes.
+	 */
+	if (tif->tif_rawdata[0] == 0 && (tif->tif_rawdata[1] & 0x1)) {
+#ifdef LZW_COMPAT
+		if (!sp->dec_decode) {
+			TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+			    "Old-style LZW codes, convert file");
+			/*
+			 * Override default decoding methods with
+			 * ones that deal with the old coding.
+			 * Otherwise the predictor versions set
+			 * above will call the compatibility routines
+			 * through the dec_decode method.
+			 */
+			tif->tif_decoderow = LZWDecodeCompat;
+			tif->tif_decodestrip = LZWDecodeCompat;
+			tif->tif_decodetile = LZWDecodeCompat;
+			/*
+			 * If doing horizontal differencing, must
+			 * re-setup the predictor logic since we
+			 * switched the basic decoder methods...
+			 */
+			(*tif->tif_setupdecode)(tif);
+			sp->dec_decode = LZWDecodeCompat;
+		}
+		sp->lzw_maxcode = MAXCODE(BITS_MIN);
+#else /* !LZW_COMPAT */
+		if (!sp->dec_decode) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "Old-style LZW codes not supported");
+			sp->dec_decode = LZWDecode;
+		}
+		return (0);
+#endif/* !LZW_COMPAT */
+	} else {
+		sp->lzw_maxcode = MAXCODE(BITS_MIN)-1;
+		sp->dec_decode = LZWDecode;
+	}
+	sp->lzw_nbits = BITS_MIN;
+	sp->lzw_nextbits = 0;
+	sp->lzw_nextdata = 0;
+
+	sp->dec_restart = 0;
+	sp->dec_nbitsmask = MAXCODE(BITS_MIN);
+#ifdef LZW_CHECKEOS
+	sp->dec_bitsleft = tif->tif_rawcc << 3;
+#endif
+	sp->dec_free_entp = sp->dec_codetab + CODE_FIRST;
+	/*
+	 * Zero entries that are not yet filled in.  We do
+	 * this to guard against bogus input data that causes
+	 * us to index into undefined entries.  If you can
+	 * come up with a way to safely bounds-check input codes
+	 * while decoding then you can remove this operation.
+	 */
+	_TIFFmemset(sp->dec_free_entp, 0, (CSIZE-CODE_FIRST)*sizeof (code_t));
+	sp->dec_oldcodep = &sp->dec_codetab[-1];
+	sp->dec_maxcodep = &sp->dec_codetab[sp->dec_nbitsmask-1];
+	return (1);
+}
+
+/*
+ * Decode a "hunk of data".
+ */
+#define	GetNextCode(sp, bp, code) {				\
+	nextdata = (nextdata<<8) | *(bp)++;			\
+	nextbits += 8;						\
+	if (nextbits < nbits) {					\
+		nextdata = (nextdata<<8) | *(bp)++;		\
+		nextbits += 8;					\
+	}							\
+	code = (hcode_t)((nextdata >> (nextbits-nbits)) & nbitsmask);	\
+	nextbits -= nbits;					\
+}
+
+static void
+codeLoop(TIFF* tif)
+{
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    "LZWDecode: Bogus encoding, loop in the code table; scanline %d",
+	    tif->tif_row);
+}
+
+static int
+LZWDecode(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
+{
+	LZWCodecState *sp = DecoderState(tif);
+	char *op = (char*) op0;
+	long occ = (long) occ0;
+	char *tp;
+	unsigned char *bp;
+	hcode_t code;
+	int len;
+	long nbits, nextbits, nextdata, nbitsmask;
+	code_t *codep, *free_entp, *maxcodep, *oldcodep;
+
+	(void) s;
+	assert(sp != NULL);
+	/*
+	 * Restart interrupted output operation.
+	 */
+	if (sp->dec_restart) {
+		long residue;
+
+		codep = sp->dec_codep;
+		residue = codep->length - sp->dec_restart;
+		if (residue > occ) {
+			/*
+			 * Residue from previous decode is sufficient
+			 * to satisfy decode request.  Skip to the
+			 * start of the decoded string, place decoded
+			 * values in the output buffer, and return.
+			 */
+			sp->dec_restart += occ;
+			do {
+				codep = codep->next;
+			} while (--residue > occ && codep);
+			if (codep) {
+				tp = op + occ;
+				do {
+					*--tp = codep->value;
+					codep = codep->next;
+				} while (--occ && codep);
+			}
+			return (1);
+		}
+		/*
+		 * Residue satisfies only part of the decode request.
+		 */
+		op += residue, occ -= residue;
+		tp = op;
+		do {
+			int t;
+			--tp;
+			t = codep->value;
+			codep = codep->next;
+			*tp = t;
+		} while (--residue && codep);
+		sp->dec_restart = 0;
+	}
+
+	bp = (unsigned char *)tif->tif_rawcp;
+	nbits = sp->lzw_nbits;
+	nextdata = sp->lzw_nextdata;
+	nextbits = sp->lzw_nextbits;
+	nbitsmask = sp->dec_nbitsmask;
+	oldcodep = sp->dec_oldcodep;
+	free_entp = sp->dec_free_entp;
+	maxcodep = sp->dec_maxcodep;
+
+	while (occ > 0) {
+		NextCode(tif, sp, bp, code, GetNextCode);
+		if (code == CODE_EOI)
+			break;
+		if (code == CODE_CLEAR) {
+			free_entp = sp->dec_codetab + CODE_FIRST;
+			nbits = BITS_MIN;
+			nbitsmask = MAXCODE(BITS_MIN);
+			maxcodep = sp->dec_codetab + nbitsmask-1;
+			NextCode(tif, sp, bp, code, GetNextCode);
+			if (code == CODE_EOI)
+				break;
+			*op++ = (char)code, occ--;
+			oldcodep = sp->dec_codetab + code;
+			continue;
+		}
+		codep = sp->dec_codetab + code;
+
+		/*
+	 	 * Add the new entry to the code table.
+	 	 */
+		if (free_entp < &sp->dec_codetab[0] ||
+			free_entp >= &sp->dec_codetab[CSIZE]) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"LZWDecode: Corrupted LZW table at scanline %d",
+			tif->tif_row);
+			return (0);
+		}
+
+		free_entp->next = oldcodep;
+		if (free_entp->next < &sp->dec_codetab[0] ||
+			free_entp->next >= &sp->dec_codetab[CSIZE]) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"LZWDecode: Corrupted LZW table at scanline %d",
+			tif->tif_row);
+			return (0);
+		}
+		free_entp->firstchar = free_entp->next->firstchar;
+		free_entp->length = free_entp->next->length+1;
+		free_entp->value = (codep < free_entp) ?
+		    codep->firstchar : free_entp->firstchar;
+		if (++free_entp > maxcodep) {
+			if (++nbits > BITS_MAX)		/* should not happen */
+				nbits = BITS_MAX;
+			nbitsmask = MAXCODE(nbits);
+			maxcodep = sp->dec_codetab + nbitsmask-1;
+		}
+		oldcodep = codep;
+		if (code >= 256) {
+			/*
+		 	 * Code maps to a string, copy string
+			 * value to output (written in reverse).
+		 	 */
+			if(codep->length == 0) {
+				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    		    "LZWDecode: Wrong length of decoded string: "
+			    "data probably corrupted at scanline %d",
+			    tif->tif_row);	
+			    return (0);
+			}
+			if (codep->length > occ) {
+				/*
+				 * String is too long for decode buffer,
+				 * locate portion that will fit, copy to
+				 * the decode buffer, and setup restart
+				 * logic for the next decoding call.
+				 */
+				sp->dec_codep = codep;
+				do {
+					codep = codep->next;
+				} while (codep && codep->length > occ);
+				if (codep) {
+					sp->dec_restart = occ;
+					tp = op + occ;
+					do  {
+						*--tp = codep->value;
+						codep = codep->next;
+					}  while (--occ && codep);
+					if (codep)
+						codeLoop(tif);
+				}
+				break;
+			}
+			len = codep->length;
+			tp = op + len;
+			do {
+				int t;
+				--tp;
+				t = codep->value;
+				codep = codep->next;
+				*tp = t;
+			} while (codep && tp > op);
+			if (codep) {
+			    codeLoop(tif);
+			    break;
+			}
+			op += len, occ -= len;
+		} else
+			*op++ = (char)code, occ--;
+	}
+
+	tif->tif_rawcp = (tidata_t) bp;
+	sp->lzw_nbits = (unsigned short) nbits;
+	sp->lzw_nextdata = nextdata;
+	sp->lzw_nextbits = nextbits;
+	sp->dec_nbitsmask = nbitsmask;
+	sp->dec_oldcodep = oldcodep;
+	sp->dec_free_entp = free_entp;
+	sp->dec_maxcodep = maxcodep;
+
+	if (occ > 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		"LZWDecode: Not enough data at scanline %d (short %d bytes)",
+		    tif->tif_row, occ);
+		return (0);
+	}
+	return (1);
+}
+
+#ifdef LZW_COMPAT
+/*
+ * Decode a "hunk of data" for old images.
+ */
+#define	GetNextCodeCompat(sp, bp, code) {			\
+	nextdata |= (unsigned long) *(bp)++ << nextbits;	\
+	nextbits += 8;						\
+	if (nextbits < nbits) {					\
+		nextdata |= (unsigned long) *(bp)++ << nextbits;\
+		nextbits += 8;					\
+	}							\
+	code = (hcode_t)(nextdata & nbitsmask);			\
+	nextdata >>= nbits;					\
+	nextbits -= nbits;					\
+}
+
+static int
+LZWDecodeCompat(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
+{
+	LZWCodecState *sp = DecoderState(tif);
+	char *op = (char*) op0;
+	long occ = (long) occ0;
+	char *tp;
+	unsigned char *bp;
+	int code, nbits;
+	long nextbits, nextdata, nbitsmask;
+	code_t *codep, *free_entp, *maxcodep, *oldcodep;
+
+	(void) s;
+	assert(sp != NULL);
+	/*
+	 * Restart interrupted output operation.
+	 */
+	if (sp->dec_restart) {
+		long residue;
+
+		codep = sp->dec_codep;
+		residue = codep->length - sp->dec_restart;
+		if (residue > occ) {
+			/*
+			 * Residue from previous decode is sufficient
+			 * to satisfy decode request.  Skip to the
+			 * start of the decoded string, place decoded
+			 * values in the output buffer, and return.
+			 */
+			sp->dec_restart += occ;
+			do {
+				codep = codep->next;
+			} while (--residue > occ);
+			tp = op + occ;
+			do {
+				*--tp = codep->value;
+				codep = codep->next;
+			} while (--occ);
+			return (1);
+		}
+		/*
+		 * Residue satisfies only part of the decode request.
+		 */
+		op += residue, occ -= residue;
+		tp = op;
+		do {
+			*--tp = codep->value;
+			codep = codep->next;
+		} while (--residue);
+		sp->dec_restart = 0;
+	}
+
+	bp = (unsigned char *)tif->tif_rawcp;
+	nbits = sp->lzw_nbits;
+	nextdata = sp->lzw_nextdata;
+	nextbits = sp->lzw_nextbits;
+	nbitsmask = sp->dec_nbitsmask;
+	oldcodep = sp->dec_oldcodep;
+	free_entp = sp->dec_free_entp;
+	maxcodep = sp->dec_maxcodep;
+
+	while (occ > 0) {
+		NextCode(tif, sp, bp, code, GetNextCodeCompat);
+		if (code == CODE_EOI)
+			break;
+		if (code == CODE_CLEAR) {
+			free_entp = sp->dec_codetab + CODE_FIRST;
+			nbits = BITS_MIN;
+			nbitsmask = MAXCODE(BITS_MIN);
+			maxcodep = sp->dec_codetab + nbitsmask;
+			NextCode(tif, sp, bp, code, GetNextCodeCompat);
+			if (code == CODE_EOI)
+				break;
+			*op++ = code, occ--;
+			oldcodep = sp->dec_codetab + code;
+			continue;
+		}
+		codep = sp->dec_codetab + code;
+
+		/*
+	 	 * Add the new entry to the code table.
+	 	 */
+		if (free_entp < &sp->dec_codetab[0] ||
+			free_entp >= &sp->dec_codetab[CSIZE]) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"LZWDecodeCompat: Corrupted LZW table at scanline %d",
+			tif->tif_row);
+			return (0);
+		}
+
+		free_entp->next = oldcodep;
+		if (free_entp->next < &sp->dec_codetab[0] ||
+			free_entp->next >= &sp->dec_codetab[CSIZE]) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"LZWDecodeCompat: Corrupted LZW table at scanline %d",
+			tif->tif_row);
+			return (0);
+		}
+		free_entp->firstchar = free_entp->next->firstchar;
+		free_entp->length = free_entp->next->length+1;
+		free_entp->value = (codep < free_entp) ?
+		    codep->firstchar : free_entp->firstchar;
+		if (++free_entp > maxcodep) {
+			if (++nbits > BITS_MAX)		/* should not happen */
+				nbits = BITS_MAX;
+			nbitsmask = MAXCODE(nbits);
+			maxcodep = sp->dec_codetab + nbitsmask;
+		}
+		oldcodep = codep;
+		if (code >= 256) {
+			/*
+		 	 * Code maps to a string, copy string
+			 * value to output (written in reverse).
+		 	 */
+			if(codep->length == 0) {
+				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    		    "LZWDecodeCompat: Wrong length of decoded "
+			    "string: data probably corrupted at scanline %d",
+			    tif->tif_row);	
+			    return (0);
+			}
+			if (codep->length > occ) {
+				/*
+				 * String is too long for decode buffer,
+				 * locate portion that will fit, copy to
+				 * the decode buffer, and setup restart
+				 * logic for the next decoding call.
+				 */
+				sp->dec_codep = codep;
+				do {
+					codep = codep->next;
+				} while (codep->length > occ);
+				sp->dec_restart = occ;
+				tp = op + occ;
+				do  {
+					*--tp = codep->value;
+					codep = codep->next;
+				}  while (--occ);
+				break;
+			}
+			op += codep->length, occ -= codep->length;
+			tp = op;
+			do {
+				*--tp = codep->value;
+			} while( (codep = codep->next) != NULL);
+		} else
+			*op++ = code, occ--;
+	}
+
+	tif->tif_rawcp = (tidata_t) bp;
+	sp->lzw_nbits = nbits;
+	sp->lzw_nextdata = nextdata;
+	sp->lzw_nextbits = nextbits;
+	sp->dec_nbitsmask = nbitsmask;
+	sp->dec_oldcodep = oldcodep;
+	sp->dec_free_entp = free_entp;
+	sp->dec_maxcodep = maxcodep;
+
+	if (occ > 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+	    "LZWDecodeCompat: Not enough data at scanline %d (short %d bytes)",
+		    tif->tif_row, occ);
+		return (0);
+	}
+	return (1);
+}
+#endif /* LZW_COMPAT */
+
+/*
+ * LZW Encoding.
+ */
+
+static int
+LZWSetupEncode(TIFF* tif)
+{
+	LZWCodecState* sp = EncoderState(tif);
+	static const char module[] = "LZWSetupEncode";
+
+	assert(sp != NULL);
+	sp->enc_hashtab = (hash_t*) _TIFFmalloc(HSIZE*sizeof (hash_t));
+	if (sp->enc_hashtab == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, module, "No space for LZW hash table");
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Reset encoding state at the start of a strip.
+ */
+static int
+LZWPreEncode(TIFF* tif, tsample_t s)
+{
+	LZWCodecState *sp = EncoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->lzw_nbits = BITS_MIN;
+	sp->lzw_maxcode = MAXCODE(BITS_MIN);
+	sp->lzw_free_ent = CODE_FIRST;
+	sp->lzw_nextbits = 0;
+	sp->lzw_nextdata = 0;
+	sp->enc_checkpoint = CHECK_GAP;
+	sp->enc_ratio = 0;
+	sp->enc_incount = 0;
+	sp->enc_outcount = 0;
+	/*
+	 * The 4 here insures there is space for 2 max-sized
+	 * codes in LZWEncode and LZWPostDecode.
+	 */
+	sp->enc_rawlimit = tif->tif_rawdata + tif->tif_rawdatasize-1 - 4;
+	cl_hash(sp);		/* clear hash table */
+	sp->enc_oldcode = (hcode_t) -1;	/* generates CODE_CLEAR in LZWEncode */
+	return (1);
+}
+
+#define	CALCRATIO(sp, rat) {					\
+	if (incount > 0x007fffff) { /* NB: shift will overflow */\
+		rat = outcount >> 8;				\
+		rat = (rat == 0 ? 0x7fffffff : incount/rat);	\
+	} else							\
+		rat = (incount<<8) / outcount;			\
+}
+#define	PutNextCode(op, c) {					\
+	nextdata = (nextdata << nbits) | c;			\
+	nextbits += nbits;					\
+	*op++ = (unsigned char)(nextdata >> (nextbits-8));		\
+	nextbits -= 8;						\
+	if (nextbits >= 8) {					\
+		*op++ = (unsigned char)(nextdata >> (nextbits-8));	\
+		nextbits -= 8;					\
+	}							\
+	outcount += nbits;					\
+}
+
+/*
+ * Encode a chunk of pixels.
+ *
+ * Uses an open addressing double hashing (no chaining) on the 
+ * prefix code/next character combination.  We do a variant of
+ * Knuth's algorithm D (vol. 3, sec. 6.4) along with G. Knott's
+ * relatively-prime secondary probe.  Here, the modular division
+ * first probe is gives way to a faster exclusive-or manipulation. 
+ * Also do block compression with an adaptive reset, whereby the
+ * code table is cleared when the compression ratio decreases,
+ * but after the table fills.  The variable-length output codes
+ * are re-sized at this point, and a CODE_CLEAR is generated
+ * for the decoder. 
+ */
+static int
+LZWEncode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	register LZWCodecState *sp = EncoderState(tif);
+	register long fcode;
+	register hash_t *hp;
+	register int h, c;
+	hcode_t ent;
+	long disp;
+	long incount, outcount, checkpoint;
+	long nextdata, nextbits;
+	int free_ent, maxcode, nbits;
+	tidata_t op, limit;
+
+	(void) s;
+	if (sp == NULL)
+		return (0);
+	/*
+	 * Load local state.
+	 */
+	incount = sp->enc_incount;
+	outcount = sp->enc_outcount;
+	checkpoint = sp->enc_checkpoint;
+	nextdata = sp->lzw_nextdata;
+	nextbits = sp->lzw_nextbits;
+	free_ent = sp->lzw_free_ent;
+	maxcode = sp->lzw_maxcode;
+	nbits = sp->lzw_nbits;
+	op = tif->tif_rawcp;
+	limit = sp->enc_rawlimit;
+	ent = sp->enc_oldcode;
+
+	if (ent == (hcode_t) -1 && cc > 0) {
+		/*
+		 * NB: This is safe because it can only happen
+		 *     at the start of a strip where we know there
+		 *     is space in the data buffer.
+		 */
+		PutNextCode(op, CODE_CLEAR);
+		ent = *bp++; cc--; incount++;
+	}
+	while (cc > 0) {
+		c = *bp++; cc--; incount++;
+		fcode = ((long)c << BITS_MAX) + ent;
+		h = (c << HSHIFT) ^ ent;	/* xor hashing */
+#ifdef _WINDOWS
+		/*
+		 * Check hash index for an overflow.
+		 */
+		if (h >= HSIZE)
+			h -= HSIZE;
+#endif
+		hp = &sp->enc_hashtab[h];
+		if (hp->hash == fcode) {
+			ent = hp->code;
+			continue;
+		}
+		if (hp->hash >= 0) {
+			/*
+			 * Primary hash failed, check secondary hash.
+			 */
+			disp = HSIZE - h;
+			if (h == 0)
+				disp = 1;
+			do {
+				/*
+				 * Avoid pointer arithmetic 'cuz of
+				 * wraparound problems with segments.
+				 */
+				if ((h -= disp) < 0)
+					h += HSIZE;
+				hp = &sp->enc_hashtab[h];
+				if (hp->hash == fcode) {
+					ent = hp->code;
+					goto hit;
+				}
+			} while (hp->hash >= 0);
+		}
+		/*
+		 * New entry, emit code and add to table.
+		 */
+		/*
+		 * Verify there is space in the buffer for the code
+		 * and any potential Clear code that might be emitted
+		 * below.  The value of limit is setup so that there
+		 * are at least 4 bytes free--room for 2 codes.
+		 */
+		if (op > limit) {
+			tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
+			TIFFFlushData1(tif);
+			op = tif->tif_rawdata;
+		}
+		PutNextCode(op, ent);
+		ent = c;
+		hp->code = free_ent++;
+		hp->hash = fcode;
+		if (free_ent == CODE_MAX-1) {
+			/* table is full, emit clear code and reset */
+			cl_hash(sp);
+			sp->enc_ratio = 0;
+			incount = 0;
+			outcount = 0;
+			free_ent = CODE_FIRST;
+			PutNextCode(op, CODE_CLEAR);
+			nbits = BITS_MIN;
+			maxcode = MAXCODE(BITS_MIN);
+		} else {
+			/*
+			 * If the next entry is going to be too big for
+			 * the code size, then increase it, if possible.
+			 */
+			if (free_ent > maxcode) {
+				nbits++;
+				assert(nbits <= BITS_MAX);
+				maxcode = (int) MAXCODE(nbits);
+			} else if (incount >= checkpoint) {
+				long rat;
+				/*
+				 * Check compression ratio and, if things seem
+				 * to be slipping, clear the hash table and
+				 * reset state.  The compression ratio is a
+				 * 24+8-bit fractional number.
+				 */
+				checkpoint = incount+CHECK_GAP;
+				CALCRATIO(sp, rat);
+				if (rat <= sp->enc_ratio) {
+					cl_hash(sp);
+					sp->enc_ratio = 0;
+					incount = 0;
+					outcount = 0;
+					free_ent = CODE_FIRST;
+					PutNextCode(op, CODE_CLEAR);
+					nbits = BITS_MIN;
+					maxcode = MAXCODE(BITS_MIN);
+				} else
+					sp->enc_ratio = rat;
+			}
+		}
+	hit:
+		;
+	}
+
+	/*
+	 * Restore global state.
+	 */
+	sp->enc_incount = incount;
+	sp->enc_outcount = outcount;
+	sp->enc_checkpoint = checkpoint;
+	sp->enc_oldcode = ent;
+	sp->lzw_nextdata = nextdata;
+	sp->lzw_nextbits = nextbits;
+	sp->lzw_free_ent = free_ent;
+	sp->lzw_maxcode = maxcode;
+	sp->lzw_nbits = nbits;
+	tif->tif_rawcp = op;
+	return (1);
+}
+
+/*
+ * Finish off an encoded strip by flushing the last
+ * string and tacking on an End Of Information code.
+ */
+static int
+LZWPostEncode(TIFF* tif)
+{
+	register LZWCodecState *sp = EncoderState(tif);
+	tidata_t op = tif->tif_rawcp;
+	long nextbits = sp->lzw_nextbits;
+	long nextdata = sp->lzw_nextdata;
+	long outcount = sp->enc_outcount;
+	int nbits = sp->lzw_nbits;
+
+	if (op > sp->enc_rawlimit) {
+		tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
+		TIFFFlushData1(tif);
+		op = tif->tif_rawdata;
+	}
+	if (sp->enc_oldcode != (hcode_t) -1) {
+		PutNextCode(op, sp->enc_oldcode);
+		sp->enc_oldcode = (hcode_t) -1;
+	}
+	PutNextCode(op, CODE_EOI);
+	if (nextbits > 0) 
+		*op++ = (unsigned char)(nextdata << (8-nextbits));
+	tif->tif_rawcc = (tsize_t)(op - tif->tif_rawdata);
+	return (1);
+}
+
+/*
+ * Reset encoding hash table.
+ */
+static void
+cl_hash(LZWCodecState* sp)
+{
+	register hash_t *hp = &sp->enc_hashtab[HSIZE-1];
+	register long i = HSIZE-8;
+
+ 	do {
+		i -= 8;
+		hp[-7].hash = -1;
+		hp[-6].hash = -1;
+		hp[-5].hash = -1;
+		hp[-4].hash = -1;
+		hp[-3].hash = -1;
+		hp[-2].hash = -1;
+		hp[-1].hash = -1;
+		hp[ 0].hash = -1;
+		hp -= 8;
+	} while (i >= 0);
+    	for (i += 8; i > 0; i--, hp--)
+		hp->hash = -1;
+}
+
+static void
+LZWCleanup(TIFF* tif)
+{
+	(void)TIFFPredictorCleanup(tif);
+
+	assert(tif->tif_data != 0);
+
+	if (DecoderState(tif)->dec_codetab)
+		_TIFFfree(DecoderState(tif)->dec_codetab);
+
+	if (EncoderState(tif)->enc_hashtab)
+		_TIFFfree(EncoderState(tif)->enc_hashtab);
+
+	_TIFFfree(tif->tif_data);
+	tif->tif_data = NULL;
+
+	_TIFFSetDefaultCompressionState(tif);
+}
+
+int
+TIFFInitLZW(TIFF* tif, int scheme)
+{
+	assert(scheme == COMPRESSION_LZW);
+	/*
+	 * Allocate state block so tag methods have storage to record values.
+	 */
+	tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (LZWCodecState));
+	if (tif->tif_data == NULL)
+		goto bad;
+	DecoderState(tif)->dec_codetab = NULL;
+	DecoderState(tif)->dec_decode = NULL;
+	EncoderState(tif)->enc_hashtab = NULL;
+        LZWState(tif)->rw_mode = tif->tif_mode;
+
+	/*
+	 * Install codec methods.
+	 */
+	tif->tif_setupdecode = LZWSetupDecode;
+	tif->tif_predecode = LZWPreDecode;
+	tif->tif_decoderow = LZWDecode;
+	tif->tif_decodestrip = LZWDecode;
+	tif->tif_decodetile = LZWDecode;
+	tif->tif_setupencode = LZWSetupEncode;
+	tif->tif_preencode = LZWPreEncode;
+	tif->tif_postencode = LZWPostEncode;
+	tif->tif_encoderow = LZWEncode;
+	tif->tif_encodestrip = LZWEncode;
+	tif->tif_encodetile = LZWEncode;
+	tif->tif_cleanup = LZWCleanup;
+	/*
+	 * Setup predictor setup.
+	 */
+	(void) TIFFPredictorInit(tif);
+	return (1);
+bad:
+	TIFFErrorExt(tif->tif_clientdata, "TIFFInitLZW", 
+		     "No space for LZW state block");
+	return (0);
+}
+
+/*
+ * Copyright (c) 1985, 1986 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * James A. Woods, derived from original work by Spencer Thomas
+ * and Joseph Orost.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley.  The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+#endif /* LZW_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_next.c b/src/libtiff/tif_next.c
new file mode 100644
index 0000000..7223f7b
--- /dev/null
+++ b/src/libtiff/tif_next.c
@@ -0,0 +1,144 @@
+/* $Id: tif_next.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef NEXT_SUPPORT
+/*
+ * TIFF Library.
+ *
+ * NeXT 2-bit Grey Scale Compression Algorithm Support
+ */
+
+#define SETPIXEL(op, v) {			\
+	switch (npixels++ & 3) {		\
+	case 0:	op[0]  = (unsigned char) ((v) << 6); break;	\
+	case 1:	op[0] |= (v) << 4; break;	\
+	case 2:	op[0] |= (v) << 2; break;	\
+	case 3:	*op++ |= (v);	   break;	\
+	}					\
+}
+
+#define LITERALROW	0x00
+#define LITERALSPAN	0x40
+#define WHITE   	((1<<2)-1)
+
+static int
+NeXTDecode(TIFF* tif, tidata_t buf, tsize_t occ, tsample_t s)
+{
+	register unsigned char *bp, *op;
+	register tsize_t cc;
+	register int n;
+	tidata_t row;
+	tsize_t scanline;
+
+	(void) s;
+	/*
+	 * Each scanline is assumed to start off as all
+	 * white (we assume a PhotometricInterpretation
+	 * of ``min-is-black'').
+	 */
+	for (op = buf, cc = occ; cc-- > 0;)
+		*op++ = 0xff;
+
+	bp = (unsigned char *)tif->tif_rawcp;
+	cc = tif->tif_rawcc;
+	scanline = tif->tif_scanlinesize;
+	for (row = buf; (long)occ > 0; occ -= scanline, row += scanline) {
+		n = *bp++, cc--;
+		switch (n) {
+		case LITERALROW:
+			/*
+			 * The entire scanline is given as literal values.
+			 */
+			if (cc < scanline)
+				goto bad;
+			_TIFFmemcpy(row, bp, scanline);
+			bp += scanline;
+			cc -= scanline;
+			break;
+		case LITERALSPAN: {
+			int off;
+			/*
+			 * The scanline has a literal span
+			 * that begins at some offset.
+			 */
+			off = (bp[0] * 256) + bp[1];
+			n = (bp[2] * 256) + bp[3];
+			if (cc < 4+n || off+n > scanline)
+				goto bad;
+			_TIFFmemcpy(row+off, bp+4, n);
+			bp += 4+n;
+			cc -= 4+n;
+			break;
+		}
+		default: {
+			register int npixels = 0, grey;
+			unsigned long imagewidth = tif->tif_dir.td_imagewidth;
+
+			/*
+			 * The scanline is composed of a sequence
+			 * of constant color ``runs''.  We shift
+			 * into ``run mode'' and interpret bytes
+			 * as codes of the form <color><npixels>
+			 * until we've filled the scanline.
+			 */
+			op = row;
+			for (;;) {
+				grey = (n>>6) & 0x3;
+				n &= 0x3f;
+				while (n-- > 0)
+					SETPIXEL(op, grey);
+				if (npixels >= (int) imagewidth)
+					break;
+				if (cc == 0)
+					goto bad;
+				n = *bp++, cc--;
+			}
+			break;
+		}
+		}
+	}
+	tif->tif_rawcp = (tidata_t) bp;
+	tif->tif_rawcc = cc;
+	return (1);
+bad:
+	TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "NeXTDecode: Not enough data for scanline %ld",
+	    (long) tif->tif_row);
+	return (0);
+}
+
+int
+TIFFInitNeXT(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	tif->tif_decoderow = NeXTDecode;
+	tif->tif_decodestrip = NeXTDecode;
+	tif->tif_decodetile = NeXTDecode;
+	return (1);
+}
+#endif /* NEXT_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_ojpeg.c b/src/libtiff/tif_ojpeg.c
new file mode 100644
index 0000000..ecc5e08
--- /dev/null
+++ b/src/libtiff/tif_ojpeg.c
@@ -0,0 +1,2629 @@
+/* $Id: tif_ojpeg.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+#include "tiffiop.h"
+#ifdef OJPEG_SUPPORT
+
+/* JPEG Compression support, as per the original TIFF 6.0 specification.
+
+   WARNING: KLUDGE ALERT!  The type of JPEG encapsulation defined by the TIFF
+                           Version 6.0 specification is now totally obsolete and
+   deprecated for new applications and images.  This file is an unsupported hack
+   that was created solely in order to read (but NOT write!) a few old,
+   unconverted images still present on some users' computer systems.  The code
+   isn't pretty or robust, and it won't read every "old format" JPEG-in-TIFF
+   file (see Samuel Leffler's draft "TIFF Technical Note No. 2" for a long and
+   incomplete list of known problems), but it seems to work well enough in the
+   few cases of practical interest to the author; so, "caveat emptor"!  This
+   file should NEVER be enhanced to write new images using anything other than
+   the latest approved JPEG-in-TIFF encapsulation method, implemented by the
+   "tif_jpeg.c" file elsewhere in this library.
+
+   This file interfaces with Release 6B of the JPEG Library written by theu
+   Independent JPEG Group, which you can find on the Internet at:
+   ftp://ftp.uu.net:/graphics/jpeg/.
+
+   The "C" Preprocessor macros, "[CD]_LOSSLESS_SUPPORTED", are defined by your
+   JPEG Library Version 6B only if you have applied a (massive!) patch by Ken
+   Murchison of Oceana Matrix Ltd. <ken@oceana.com> to support lossless Huffman
+   encoding (TIFF "JPEGProc" tag value = 14).  This patch can be found on the
+   Internet at: ftp://ftp.oceana.com/pub/ljpeg-6b.tar.gz.
+
+   Some old files produced by the Wang Imaging application for Microsoft Windows
+   apparently can be decoded only with a special patch to the JPEG Library,
+   which defines a subroutine named "jpeg_reset_huff_decode()" in its "jdhuff.c"
+   module (the "jdshuff.c" module, if Ken Murchison's patch has been applied).
+   Unfortunately the patch differs slightly in each case, and some TIFF Library
+   have reported problems finding the code, so both versions appear below; you
+   should carefully extract and apply only the version that applies to your JPEG
+   Library!
+
+   Contributed by Scott Marovich <marovich@hpl.hp.com> with considerable help
+   from Charles Auer <Bumble731@msn.com> to unravel the mysteries of image files
+   created by the Wang Imaging application for Microsoft Windows.
+*/
+#if 0  /* Patch for JPEG Library WITHOUT lossless Huffman coding */
+*** jdhuff.c.orig	Mon Oct 20 17:51:10 1997
+--- jdhuff.c	Sun Nov 11 17:33:58 2001
+***************
+*** 648,651 ****
+--- 648,683 ----
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    }
+  }
++ 
++ /*
++  * BEWARE OF KLUDGE:  This subroutine is a hack for decoding illegal JPEG-in-
++  *                    TIFF encapsulations produced by Microsoft's Wang Imaging
++  * for Windows application with the public-domain TIFF Library.  Based upon an
++  * examination of selected output files, this program apparently divides a JPEG
++  * bit-stream into consecutive horizontal TIFF "strips", such that the JPEG
++  * encoder's/decoder's DC coefficients for each image component are reset before
++  * each "strip".  Moreover, a "strip" is not necessarily encoded in a multiple
++  * of 8 bits, so one must sometimes discard 1-7 bits at the end of each "strip"
++  * for alignment to the next input-Byte storage boundary.  IJG JPEG Library
++  * decoder state is not normally exposed to client applications, so this sub-
++  * routine provides the TIFF Library with a "hook" to make these corrections.
++  * It should be called after "jpeg_start_decompress()" and before
++  * "jpeg_finish_decompress()", just before decoding each "strip" using
++  * "jpeg_read_raw_data()" or "jpeg_read_scanlines()".
++  *
++  * This kludge is not sanctioned or supported by the Independent JPEG Group, and
++  * future changes to the IJG JPEG Library might invalidate it.  Do not send bug
++  * reports about this code to IJG developers.  Instead, contact the author for
++  * advice: Scott B. Marovich <marovich@hpl.hp.com>, Hewlett-Packard Labs, 6/01.
++  */
++ GLOBAL(void)
++ jpeg_reset_huff_decode (register j_decompress_ptr cinfo)
++ { register huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
++   register int ci = 0;
++ 
++   /* Discard encoded input bits, up to the next Byte boundary */
++   entropy->bitstate.bits_left &= ~7;
++   /* Re-initialize DC predictions to 0 */
++   do entropy->saved.last_dc_val[ci] = 0; while (++ci < cinfo->comps_in_scan);
++ }
+#endif /* Patch for JPEG Library WITHOUT lossless Huffman coding */
+#if 0  /* Patch for JPEG Library WITH lossless Huffman coding */
+*** jdshuff.c.orig	Mon Mar 11 16:44:54 2002
+--- jdshuff.c	Mon Mar 11 16:44:54 2002
+***************
+*** 357,360 ****
+--- 357,393 ----
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+    }
+  }
++ 
++ /*
++  * BEWARE OF KLUDGE:  This subroutine is a hack for decoding illegal JPEG-in-
++  *                    TIFF encapsulations produced by Microsoft's Wang Imaging
++  * for Windows application with the public-domain TIFF Library.  Based upon an
++  * examination of selected output files, this program apparently divides a JPEG
++  * bit-stream into consecutive horizontal TIFF "strips", such that the JPEG
++  * encoder's/decoder's DC coefficients for each image component are reset before
++  * each "strip".  Moreover, a "strip" is not necessarily encoded in a multiple
++  * of 8 bits, so one must sometimes discard 1-7 bits at the end of each "strip"
++  * for alignment to the next input-Byte storage boundary.  IJG JPEG Library
++  * decoder state is not normally exposed to client applications, so this sub-
++  * routine provides the TIFF Library with a "hook" to make these corrections.
++  * It should be called after "jpeg_start_decompress()" and before
++  * "jpeg_finish_decompress()", just before decoding each "strip" using
++  * "jpeg_read_raw_data()" or "jpeg_read_scanlines()".
++  *
++  * This kludge is not sanctioned or supported by the Independent JPEG Group, and
++  * future changes to the IJG JPEG Library might invalidate it.  Do not send bug
++  * reports about this code to IJG developers.  Instead, contact the author for
++  * advice: Scott B. Marovich <marovich@hpl.hp.com>, Hewlett-Packard Labs, 6/01.
++  */
++ GLOBAL(void)
++ jpeg_reset_huff_decode (register j_decompress_ptr cinfo)
++ { register shuff_entropy_ptr entropy = (shuff_entropy_ptr)
++                                        ((j_lossy_d_ptr)cinfo->codec)->entropy_private;
++   register int ci = 0;
++ 
++   /* Discard encoded input bits, up to the next Byte boundary */
++   entropy->bitstate.bits_left &= ~7;
++   /* Re-initialize DC predictions to 0 */
++   do entropy->saved.last_dc_val[ci] = 0; while (++ci < cinfo->comps_in_scan);
++ }
+#endif /* Patch for JPEG Library WITH lossless Huffman coding */
+#include <setjmp.h>
+#include <stdio.h>
+#ifdef FAR
+#undef FAR /* Undefine FAR to avoid conflict with JPEG definition */
+#endif
+#define JPEG_INTERNALS /* Include "jpegint.h" for "DSTATE_*" symbols */
+#define JPEG_CJPEG_DJPEG /* Include all Version 6B+ "jconfig.h" options */
+#undef INLINE
+#include "jpeglib.h"
+#undef JPEG_CJPEG_DJPEG
+#undef JPEG_INTERNALS
+
+/* Hack for files produced by Wang Imaging application on Microsoft Windows */
+extern void jpeg_reset_huff_decode(j_decompress_ptr);
+
+/* On some machines, it may be worthwhile to use "_setjmp()" or "sigsetjmp()"
+   instead of "setjmp()".  These macros make it easier:
+*/
+#define SETJMP(jbuf)setjmp(jbuf)
+#define LONGJMP(jbuf,code)longjmp(jbuf,code)
+#define JMP_BUF jmp_buf
+
+#define TIFFTAG_WANG_PAGECONTROL 32934
+
+/* Bit-vector offsets for keeping track of TIFF records that we've parsed. */
+
+#define FIELD_JPEGPROC FIELD_CODEC
+#define FIELD_JPEGIFOFFSET (FIELD_CODEC+1)
+#define FIELD_JPEGIFBYTECOUNT (FIELD_CODEC+2)
+#define FIELD_JPEGRESTARTINTERVAL (FIELD_CODEC+3)
+#define FIELD_JPEGTABLES (FIELD_CODEC+4) /* New, post-6.0 JPEG-in-TIFF tag! */
+#define FIELD_JPEGLOSSLESSPREDICTORS (FIELD_CODEC+5)
+#define FIELD_JPEGPOINTTRANSFORM (FIELD_CODEC+6)
+#define FIELD_JPEGQTABLES (FIELD_CODEC+7)
+#define FIELD_JPEGDCTABLES (FIELD_CODEC+8)
+#define FIELD_JPEGACTABLES (FIELD_CODEC+9)
+#define FIELD_WANG_PAGECONTROL (FIELD_CODEC+10)
+#define FIELD_JPEGCOLORMODE (FIELD_CODEC+11)
+
+typedef struct jpeg_destination_mgr jpeg_destination_mgr;
+typedef struct jpeg_source_mgr jpeg_source_mgr;
+typedef struct jpeg_error_mgr jpeg_error_mgr;
+
+/* State variable for each open TIFF file that uses "libjpeg" for JPEG
+   decompression.  (Note:  This file should NEVER perform JPEG compression
+   except in the manner implemented by the "tif_jpeg.c" file, elsewhere in this
+   library; see comments above.)  JPEG Library internal state is recorded in a
+   "jpeg_{de}compress_struct", while a "jpeg_common_struct" records a few items
+   common to both compression and expansion.  The "cinfo" field containing JPEG
+   Library state MUST be the 1st member of our own state variable, so that we
+   can safely "cast" pointers back and forth.
+*/
+typedef struct             /* This module's private, per-image state variable */
+  {
+    union         /* JPEG Library state variable; this MUST be our 1st field! */
+      {
+        struct jpeg_compress_struct c;
+        struct jpeg_decompress_struct d;
+        struct jpeg_common_struct comm;
+      } cinfo;
+    jpeg_error_mgr err;                         /* JPEG Library error manager */
+    JMP_BUF exit_jmpbuf;             /* ...for catching JPEG Library failures */
+#   ifdef never
+
+ /* (The following two fields could be a "union", but they're small enough that
+    it's not worth the effort.)
+ */
+    jpeg_destination_mgr dest;             /* Destination for compressed data */
+#   endif
+    jpeg_source_mgr src;                           /* Source of expanded data */
+    JSAMPARRAY ds_buffer[MAX_COMPONENTS]; /* ->Temporary downsampling buffers */
+    TIFF *tif;                        /* Reverse pointer, needed by some code */
+    TIFFVGetMethod vgetparent;                    /* "Super class" methods... */
+    TIFFVSetMethod vsetparent;
+    TIFFStripMethod defsparent;
+    TIFFTileMethod deftparent;
+    void *jpegtables;           /* ->"New" JPEG tables, if we synthesized any */
+    uint32 is_WANG,    /* <=> Wang Imaging for Microsoft Windows output file? */
+           jpegtables_length;   /* Length of "new" JPEG tables, if they exist */
+    tsize_t bytesperline;          /* No. of decompressed Bytes per scan line */
+    int jpegquality,                             /* Compression quality level */
+        jpegtablesmode,                          /* What to put in JPEGTables */
+        samplesperclump,
+        scancount;                           /* No. of scan lines accumulated */
+    J_COLOR_SPACE photometric;          /* IJG JPEG Library's photometry code */
+    unsigned char h_sampling,                          /* Luminance sampling factors */
+           v_sampling,
+           jpegcolormode;           /* Who performs RGB <-> YCbCr conversion? */
+			/* JPEGCOLORMODE_RAW <=> TIFF Library or its client */
+			/* JPEGCOLORMODE_RGB <=> JPEG Library               */
+    /* These fields are added to support TIFFGetField */
+    uint16 jpegproc;
+    uint32 jpegifoffset;
+    uint32 jpegifbytecount;
+    uint32 jpegrestartinterval;
+    void* jpeglosslesspredictors;
+    uint16 jpeglosslesspredictors_length;
+    void* jpegpointtransform;
+    uint32 jpegpointtransform_length;
+    void* jpegqtables;
+    uint32 jpegqtables_length;
+    void* jpegdctables;
+    uint32 jpegdctables_length;
+    void* jpegactables;
+    uint32 jpegactables_length;
+
+  } OJPEGState;
+#define OJState(tif)((OJPEGState*)(tif)->tif_data)
+
+static const TIFFFieldInfo ojpegFieldInfo[]=/* JPEG-specific TIFF-record tags */
+  {
+
+ /* This is the current JPEG-in-TIFF metadata-encapsulation tag, and its
+    treatment in this file is idiosyncratic.  It should never appear in a
+    "source" image conforming to the TIFF Version 6.0 specification, so we
+    arrange to report an error if it appears.  But in order to support possible
+    future conversion of "old" JPEG-in-TIFF encapsulations to "new" ones, we
+    might wish to synthesize an equivalent value to be returned by the TIFF
+    Library's "getfield" method.  So, this table tells the TIFF Library to pass
+    these records to us in order to filter them below.
+ */
+    {
+      TIFFTAG_JPEGTABLES            ,TIFF_VARIABLE2,TIFF_VARIABLE2,
+      TIFF_UNDEFINED,FIELD_JPEGTABLES            ,FALSE,TRUE ,"JPEGTables"
+    },
+
+ /* These tags are defined by the TIFF Version 6.0 specification and are now
+    obsolete.  This module reads them from an old "source" image, but it never
+    writes them to a new "destination" image.
+ */
+    {
+      TIFFTAG_JPEGPROC              ,1            ,1            ,
+      TIFF_SHORT    ,FIELD_JPEGPROC              ,FALSE,FALSE,"JPEGProc"
+    },
+    {
+      TIFFTAG_JPEGIFOFFSET          ,1            ,1            ,
+      TIFF_LONG     ,FIELD_JPEGIFOFFSET          ,FALSE,FALSE,"JPEGInterchangeFormat"
+    },
+    {
+      TIFFTAG_JPEGIFBYTECOUNT       ,1            ,1            ,
+      TIFF_LONG     ,FIELD_JPEGIFBYTECOUNT       ,FALSE,FALSE,"JPEGInterchangeFormatLength"
+    },
+    {
+      TIFFTAG_JPEGRESTARTINTERVAL   ,1            ,1            ,
+      TIFF_SHORT    ,FIELD_JPEGRESTARTINTERVAL   ,FALSE,FALSE,"JPEGRestartInterval"
+    },
+    {
+      TIFFTAG_JPEGLOSSLESSPREDICTORS,TIFF_VARIABLE,TIFF_VARIABLE,
+      TIFF_SHORT    ,FIELD_JPEGLOSSLESSPREDICTORS,FALSE,TRUE ,"JPEGLosslessPredictors"
+    },
+    {
+      TIFFTAG_JPEGPOINTTRANSFORM    ,TIFF_VARIABLE,TIFF_VARIABLE,
+      TIFF_SHORT    ,FIELD_JPEGPOINTTRANSFORM    ,FALSE,TRUE ,"JPEGPointTransforms"
+    },
+    {
+      TIFFTAG_JPEGQTABLES           ,TIFF_VARIABLE,TIFF_VARIABLE,
+      TIFF_LONG     ,FIELD_JPEGQTABLES           ,FALSE,TRUE ,"JPEGQTables"
+    },
+    {
+      TIFFTAG_JPEGDCTABLES          ,TIFF_VARIABLE,TIFF_VARIABLE,
+      TIFF_LONG     ,FIELD_JPEGDCTABLES          ,FALSE,TRUE ,"JPEGDCTables"
+    },
+    {
+      TIFFTAG_JPEGACTABLES          ,TIFF_VARIABLE,TIFF_VARIABLE,
+      TIFF_LONG     ,FIELD_JPEGACTABLES          ,FALSE,TRUE ,"JPEGACTables"
+    },
+    {
+      TIFFTAG_WANG_PAGECONTROL      ,TIFF_VARIABLE,1            ,
+      TIFF_LONG     ,FIELD_WANG_PAGECONTROL      ,FALSE,FALSE,"WANG PageControl"
+    },
+
+ /* This is a pseudo tag intended for internal use only by the TIFF Library and
+    its clients, which should never appear in an input/output image file.  It
+    specifies whether the TIFF Library (or its client) should do YCbCr <-> RGB
+    color-space conversion (JPEGCOLORMODE_RAW <=> 0) or whether we should ask
+    the JPEG Library to do it (JPEGCOLORMODE_RGB <=> 1).
+ */
+    {
+      TIFFTAG_JPEGCOLORMODE         ,0            ,0            ,
+      TIFF_ANY      ,FIELD_PSEUDO                ,FALSE,FALSE,"JPEGColorMode"
+    }
+  };
+static const char JPEGLib_name[]={"JPEG Library"},
+                  bad_bps[]={"%u BitsPerSample not allowed for JPEG"},
+                  bad_photometry[]={"PhotometricInterpretation %u not allowed for JPEG"},
+                  bad_subsampling[]={"invalid YCbCr subsampling factor(s)"},
+#                 ifdef never
+                  no_write_frac[]={"fractional scan line discarded"},
+#                 endif
+                  no_read_frac[]={"fractional scan line not read"},
+                  no_jtable_space[]={"No space for JPEGTables"};
+
+/* The following diagnostic subroutines interface with and replace default
+   subroutines in the JPEG Library.  Our basic strategy is to use "setjmp()"/
+   "longjmp()" in order to return control to the TIFF Library when the JPEG
+   library detects an error, and to use TIFF Library subroutines for displaying
+   diagnostic messages to a client application.
+*/
+static void
+TIFFojpeg_error_exit(register j_common_ptr cinfo)
+{
+    char buffer[JMSG_LENGTH_MAX];
+    int code = cinfo->err->msg_code;
+
+    if (((OJPEGState *)cinfo)->is_WANG) {
+	if (code == JERR_SOF_DUPLICATE || code == JERR_SOI_DUPLICATE)
+	    return;	    /* ignore it */
+    }
+
+    (*cinfo->err->format_message)(cinfo,buffer);
+    TIFFError(JPEGLib_name,buffer); /* Display error message */
+    jpeg_abort(cinfo); /* Clean up JPEG Library state */
+    LONGJMP(((OJPEGState *)cinfo)->exit_jmpbuf,1); /* Return to TIFF client */
+}
+
+static void
+TIFFojpeg_output_message(register j_common_ptr cinfo)
+  { char buffer[JMSG_LENGTH_MAX];
+
+ /* This subroutine is invoked only for warning messages, since the JPEG
+    Library's "error_exit" method does its own thing and "trace_level" is never
+    set > 0.
+ */
+    (*cinfo->err->format_message)(cinfo,buffer);
+    TIFFWarning(JPEGLib_name,buffer);
+  }
+
+/* The following subroutines, which also interface with the JPEG Library, exist
+   mainly in limit the side effects of "setjmp()" and convert JPEG normal/error
+   conditions into TIFF Library return codes.
+*/
+#define CALLJPEG(sp,fail,op)(SETJMP((sp)->exit_jmpbuf)?(fail):(op))
+#define CALLVJPEG(sp,op)CALLJPEG(sp,0,((op),1))
+#ifdef never
+
+static int
+TIFFojpeg_create_compress(register OJPEGState *sp)
+  {
+    sp->cinfo.c.err = jpeg_std_error(&sp->err); /* Initialize error handling */
+    sp->err.error_exit = TIFFojpeg_error_exit;
+    sp->err.output_message = TIFFojpeg_output_message;
+    return CALLVJPEG(sp,jpeg_create_compress(&sp->cinfo.c));
+  }
+
+/* The following subroutines comprise a JPEG Library "destination" data manager
+   by directing compressed data from the JPEG Library to a TIFF Library output
+   buffer.
+*/
+static void
+std_init_destination(register j_compress_ptr cinfo){} /* "Dummy" stub */
+
+static boolean
+std_empty_output_buffer(register j_compress_ptr cinfo)
+  {
+#   define sp ((OJPEGState *)cinfo)
+    register TIFF *tif = sp->tif;
+
+    tif->tif_rawcc = tif->tif_rawdatasize; /* Entire buffer has been filled */
+    TIFFFlushData1(tif);
+    sp->dest.next_output_byte = (JOCTET *)tif->tif_rawdata;
+    sp->dest.free_in_buffer = (size_t)tif->tif_rawdatasize;
+    return TRUE;
+#   undef sp
+  }
+
+static void
+std_term_destination(register j_compress_ptr cinfo)
+  {
+#   define sp ((OJPEGState *)cinfo)
+    register TIFF *tif = sp->tif;
+
+ /* NB: The TIFF Library does the final buffer flush. */
+    tif->tif_rawcp = (tidata_t)sp->dest.next_output_byte;
+    tif->tif_rawcc = tif->tif_rawdatasize - (tsize_t)sp->dest.free_in_buffer;
+#   undef sp
+  }
+
+/* Alternate destination manager to output JPEGTables field: */
+
+static void
+tables_init_destination(register j_compress_ptr cinfo)
+  {
+#   define sp ((OJPEGState *)cinfo)
+ /* The "jpegtables_length" field is the allocated buffer size while building */
+    sp->dest.next_output_byte = (JOCTET *)sp->jpegtables;
+    sp->dest.free_in_buffer = (size_t)sp->jpegtables_length;
+#   undef sp
+  }
+
+static boolean
+tables_empty_output_buffer(register j_compress_ptr cinfo)
+  { void *newbuf;
+#   define sp ((OJPEGState *)cinfo)
+
+ /* The entire buffer has been filled, so enlarge it by 1000 bytes. */
+    if (!( newbuf = _TIFFrealloc( (tdata_t)sp->jpegtables
+                                , (tsize_t)(sp->jpegtables_length + 1000)
+                                )
+         )
+       ) ERREXIT1(cinfo,JERR_OUT_OF_MEMORY,100);
+    sp->dest.next_output_byte = (JOCTET *)newbuf + sp->jpegtables_length;
+    sp->dest.free_in_buffer = (size_t)1000;
+    sp->jpegtables = newbuf;
+    sp->jpegtables_length += 1000;
+    return TRUE;
+#   undef sp
+  }
+
+static void
+tables_term_destination(register j_compress_ptr cinfo)
+  {
+#   define sp ((OJPEGState *)cinfo)
+ /* Set tables length to no. of Bytes actually emitted. */
+    sp->jpegtables_length -= sp->dest.free_in_buffer;
+#   undef sp
+  }
+
+/*ARGSUSED*/ static int
+TIFFojpeg_tables_dest(register OJPEGState *sp, TIFF *tif)
+  {
+
+ /* Allocate a working buffer for building tables.  The initial size is 1000
+    Bytes, which is usually adequate.
+ */
+    if (sp->jpegtables) _TIFFfree(sp->jpegtables);
+    if (!(sp->jpegtables = (void*)
+                           _TIFFmalloc((tsize_t)(sp->jpegtables_length = 1000))
+         )
+       )
+      {
+        sp->jpegtables_length = 0;
+        TIFFError("TIFFojpeg_tables_dest",no_jtable_space);
+        return 0;
+      };
+    sp->cinfo.c.dest = &sp->dest;
+    sp->dest.init_destination = tables_init_destination;
+    sp->dest.empty_output_buffer = tables_empty_output_buffer;
+    sp->dest.term_destination = tables_term_destination;
+    return 1;
+  }
+#else /* well, hardly ever */
+
+static int
+_notSupported(register TIFF *tif)
+  { const TIFFCodec *c = TIFFFindCODEC(tif->tif_dir.td_compression);
+
+    TIFFError(tif->tif_name,"%s compression not supported",c->name);
+    return 0;
+  }
+#endif /* never */
+
+/* The following subroutines comprise a JPEG Library "source" data manager by
+   by directing compressed data to the JPEG Library from a TIFF Library input
+   buffer.
+*/
+static void
+std_init_source(register j_decompress_ptr cinfo)
+  {
+#   define sp ((OJPEGState *)cinfo)
+    register TIFF *tif = sp->tif;
+
+    if (sp->src.bytes_in_buffer == 0)
+      {
+        sp->src.next_input_byte = (const JOCTET *)tif->tif_rawdata;
+        sp->src.bytes_in_buffer = (size_t)tif->tif_rawcc;
+      };
+#   undef sp
+  }
+
+static boolean
+std_fill_input_buffer(register j_decompress_ptr cinfo)
+  { static const JOCTET dummy_EOI[2]={0xFF,JPEG_EOI};
+#   define sp ((OJPEGState *)cinfo)
+
+ /* Control should never get here, since an entire strip/tile is read into
+    memory before the decompressor is called; thus, data should have been
+    supplied by the "init_source" method.  ...But, sometimes things fail.
+ */
+    WARNMS(cinfo,JWRN_JPEG_EOF);
+    sp->src.next_input_byte = dummy_EOI; /* Insert a fake EOI marker */
+    sp->src.bytes_in_buffer = sizeof dummy_EOI;
+    return TRUE;
+#   undef sp
+  }
+
+static void
+std_skip_input_data(register j_decompress_ptr cinfo, long num_bytes)
+  {
+#   define sp ((OJPEGState *)cinfo)
+
+    if (num_bytes > 0)
+    {
+      if (num_bytes > (long)sp->src.bytes_in_buffer) /* oops: buffer overrun */
+        (void)std_fill_input_buffer(cinfo);
+      else
+        {
+          sp->src.next_input_byte += (size_t)num_bytes;
+          sp->src.bytes_in_buffer -= (size_t)num_bytes;
+        }
+    }
+#   undef sp
+  }
+
+/*ARGSUSED*/ static void
+std_term_source(register j_decompress_ptr cinfo){} /* "Dummy" stub */
+
+/* Allocate temporary I/O buffers for downsampled data, using values computed in
+   "jpeg_start_{de}compress()".  We use the JPEG Library's allocator so that
+   buffers will be released automatically when done with a strip/tile.  This is
+   also a handy place to compute samplesperclump, bytesperline, etc.
+*/
+static int
+alloc_downsampled_buffers(TIFF *tif,jpeg_component_info *comp_info,
+                          int num_components)
+  { register OJPEGState *sp = OJState(tif);
+
+    sp->samplesperclump = 0;
+    if (num_components > 0)
+      { tsize_t size = sp->cinfo.comm.is_decompressor
+#                    ifdef D_LOSSLESS_SUPPORTED
+                     ? sp->cinfo.d.min_codec_data_unit
+#                    else
+                     ? DCTSIZE
+#                    endif
+#                    ifdef C_LOSSLESS_SUPPORTED
+                     : sp->cinfo.c.data_unit;
+#                    else
+                     : DCTSIZE;
+#                    endif
+        int ci = 0;
+        register jpeg_component_info *compptr = comp_info;
+
+        do
+          { JSAMPARRAY buf;
+
+            sp->samplesperclump +=
+              compptr->h_samp_factor * compptr->v_samp_factor;
+#           if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+            if (!(buf = CALLJPEG(sp,0,(*sp->cinfo.comm.mem->alloc_sarray)(&sp->cinfo.comm,JPOOL_IMAGE,compptr->width_in_data_units*size,compptr->v_samp_factor*size))))
+#           else
+            if (!(buf = CALLJPEG(sp,0,(*sp->cinfo.comm.mem->alloc_sarray)(&sp->cinfo.comm,JPOOL_IMAGE,compptr->width_in_blocks*size,compptr->v_samp_factor*size))))
+#           endif
+              return 0;
+            sp->ds_buffer[ci] = buf;
+          }
+        while (++compptr,++ci < num_components);
+      };
+    return 1;
+  }
+#ifdef never
+
+/* JPEG Encoding begins here. */
+
+/*ARGSUSED*/ static int
+OJPEGEncode(register TIFF *tif,tidata_t buf,tsize_t cc,tsample_t s)
+  { tsize_t rows;                          /* No. of unprocessed rows in file */
+    register OJPEGState *sp = OJState(tif);
+
+ /* Encode a chunk of pixels, where returned data is NOT down-sampled (the
+    standard case).  The data is expected to be written in scan-line multiples.
+ */
+    if (cc % sp->bytesperline) TIFFWarning(tif->tif_name,no_write_frac);
+    if ( (cc /= bytesperline)      /* No. of complete rows in caller's buffer */
+       > (rows = sp->cinfo.c.image_height - sp->cinfo.c.next_scanline)
+       ) cc = rows;
+    while (--cc >= 0)
+      {
+        if (   CALLJPEG(sp,-1,jpeg_write_scanlines(&sp->cinfo.c,(JSAMPARRAY)&buf,1))
+            != 1
+           ) return 0;
+        ++tif->tif_row;
+        buf += sp->bytesperline;
+      };
+    return 1;
+  }
+
+/*ARGSUSED*/ static int
+OJPEGEncodeRaw(register TIFF *tif,tidata_t buf,tsize_t cc,tsample_t s)
+  { tsize_t rows;                          /* No. of unprocessed rows in file */
+    JDIMENSION lines_per_MCU, size;
+    register OJPEGState *sp = OJState(tif);
+
+ /* Encode a chunk of pixels, where returned data is down-sampled as per the
+    sampling factors.  The data is expected to be written in scan-line
+    multiples.
+ */
+    cc /= sp->bytesperline;
+    if (cc % sp->bytesperline) TIFFWarning(tif->tif_name,no_write_frac);
+    if ( (cc /= bytesperline)      /* No. of complete rows in caller's buffer */
+       > (rows = sp->cinfo.c.image_height - sp->cinfo.c.next_scanline)
+       ) cc = rows;
+#   ifdef C_LOSSLESS_SUPPORTED
+    lines_per_MCU = sp->cinfo.c.max_samp_factor*(size = sp->cinfo.d.data_unit);
+#   else
+    lines_per_MCU = sp->cinfo.c.max_samp_factor*(size = DCTSIZE);
+#   endif
+    while (--cc >= 0)
+      { int ci = 0, clumpoffset = 0;
+        register jpeg_component_info *compptr = sp->cinfo.c.comp_info;
+
+     /* The fastest way to separate the data is to make 1 pass over the scan
+        line for each row of each component.
+     */
+        do
+          { int ypos = 0;
+
+            do
+              { int padding;
+                register JSAMPLE *inptr = (JSAMPLE*)buf + clumpoffset,
+                                 *outptr =
+                  sp->ds_buffer[ci][sp->scancount*compptr->v_samp_factor+ypos];
+             /* Cb,Cr both have sampling factors 1, so this is correct */
+                register int clumps_per_line =
+                  sp->cinfo.c.comp_info[1].downsampled_width,
+                             xpos;
+
+                padding = (int)
+#                         ifdef C_LOSSLESS_SUPPORTED
+                          ( compptr->width_in_data_units * size
+#                         else
+                          ( compptr->width_in_blocks * size
+#                         endif
+                          - clumps_per_line * compptr->h_samp_factor
+                          );
+                if (compptr->h_samp_factor == 1) /* Cb & Cr fast path */
+                  do *outptr++ = *inptr;
+                  while ((inptr += sp->samplesperclump),--clumps_per_line > 0);
+                else /* general case */
+                  do
+                    {
+                      xpos = 0;
+                      do *outptr++ = inptr[xpos];
+                      while (++xpos < compptr->h_samp_factor);
+                    }
+                  while ((inptr += sp->samplesperclump),--clumps_per_line > 0);
+                xpos = 0; /* Pad each scan line as needed */
+                do outptr[0] = outptr[-1]; while (++outptr,++xpos < padding);
+                clumpoffset += compptr->h_samp_factor;
+              }
+            while (++ypos < compptr->v_samp_factor);
+          }
+        while (++compptr,++ci < sp->cinfo.c.num_components);
+        if (++sp->scancount >= size)
+          {
+            if (   CALLJPEG(sp,-1,jpeg_write_raw_data(&sp->cinfo.c,sp->ds_buffer,lines_per_MCU))
+                != lines_per_MCU
+               ) return 0;
+            sp->scancount = 0;
+          };
+        ++tif->tif_row++
+        buf += sp->bytesperline;
+      };
+    return 1;
+  }
+
+static int
+OJPEGSetupEncode(register TIFF *tif)
+  { static const char module[]={"OJPEGSetupEncode"};
+    uint32 segment_height, segment_width;
+    int status = 1;                              /* Assume success by default */
+    register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+
+ /* Verify miscellaneous parameters.  This will need work if the TIFF Library
+    ever supports different depths for different components, or if the JPEG
+    Library ever supports run-time depth selection.  Neither seems imminent.
+ */
+    if (td->td_bitspersample != 8)
+      {
+        TIFFError(module,bad_bps,td->td_bitspersample);
+        status = 0;
+      };
+
+ /* The TIFF Version 6.0 specification and IJG JPEG Library accept different
+    sets of color spaces, so verify that our image belongs to the common subset
+    and map its photometry code, then initialize to handle subsampling and
+    optional JPEG Library YCbCr <-> RGB color-space conversion.
+ */
+    switch (td->td_photometric)
+      {
+        case PHOTOMETRIC_YCBCR     :
+
+       /* ISO IS 10918-1 requires that JPEG subsampling factors be 1-4, but
+          TIFF Version 6.0 is more restrictive: only 1, 2, and 4 are allowed.
+       */
+          if (   (   td->td_ycbcrsubsampling[0] == 1
+                  || td->td_ycbcrsubsampling[0] == 2
+                  || td->td_ycbcrsubsampling[0] == 4
+                 )
+              && (   td->td_ycbcrsubsampling[1] == 1
+                  || td->td_ycbcrsubsampling[1] == 2
+                  || td->td_ycbcrsubsampling[1] == 4
+                 )
+             )
+            sp->cinfo.c.raw_data_in =
+              ( (sp->h_sampling = td->td_ycbcrsubsampling[0]) << 3
+              | (sp->v_sampling = td->td_ycbcrsubsampling[1])
+              ) != 011;
+          else
+            {
+              TIFFError(module,bad_subsampling);
+              status = 0;
+            };
+
+       /* A ReferenceBlackWhite field MUST be present, since the default value
+          is inapproriate for YCbCr.  Fill in the proper value if the
+          application didn't set it.
+       */
+          if (!TIFFFieldSet(tif,FIELD_REFBLACKWHITE))
+            { float refbw[6];
+              long top = 1L << td->td_bitspersample;
+ 
+              refbw[0] = 0;
+              refbw[1] = (float)(top-1L);
+              refbw[2] = (float)(top>>1);
+              refbw[3] = refbw[1];
+              refbw[4] = refbw[2];
+              refbw[5] = refbw[1];
+              TIFFSetField(tif,TIFFTAG_REFERENCEBLACKWHITE,refbw);
+            };
+          sp->cinfo.c.jpeg_color_space = JCS_YCbCr;
+          if (sp->jpegcolormode == JPEGCOLORMODE_RGB)
+            {
+              sp->cinfo.c.raw_data_in = FALSE;
+              sp->in_color_space = JCS_RGB;
+              break;
+            };
+          goto L2;
+        case PHOTOMETRIC_MINISBLACK:
+          sp->cinfo.c.jpeg_color_space = JCS_GRAYSCALE;
+          goto L1;
+        case PHOTOMETRIC_RGB       :
+          sp->cinfo.c.jpeg_color_space = JCS_RGB;
+          goto L1;
+        case PHOTOMETRIC_SEPARATED :
+          sp->cinfo.c.jpeg_color_space = JCS_CMYK;
+      L1: sp->jpegcolormode = JPEGCOLORMODE_RAW; /* No JPEG Lib. conversion */
+      L2: sp->cinfo.d.in_color_space = sp->cinfo.d.jpeg_color-space;
+          break;
+        default                    :
+          TIFFError(module,bad_photometry,td->td_photometric);
+          status = 0;
+      };
+    tif->tif_encoderow = tif->tif_encodestrip = tif->tif_encodetile =
+      sp->cinfo.c.raw_data_in ? OJPEGEncodeRaw : OJPEGEncode;
+    if (isTiled(tif))
+      { tsize_t size;
+
+#       ifdef C_LOSSLESS_SUPPORTED
+        if ((size = sp->v_sampling*sp->cinfo.c.data_unit) < 16) size = 16;
+#       else
+        if ((size = sp->v_sampling*DCTSIZE) < 16) size = 16;
+#       endif
+        if ((segment_height = td->td_tilelength) % size)
+          {
+            TIFFError(module,"JPEG tile height must be multiple of %d",size);
+            status = 0;
+          };
+#       ifdef C_LOSSLESS_SUPPORTED
+        if ((size = sp->h_sampling*sp->cinfo.c.data_unit) < 16) size = 16;
+#       else
+        if ((size = sp->h_sampling*DCTSIZE) < 16) size = 16;
+#       endif
+        if ((segment_width = td->td_tilewidth) % size)
+          {
+            TIFFError(module,"JPEG tile width must be multiple of %d",size);
+            status = 0;
+          };
+        sp->bytesperline = TIFFTileRowSize(tif);
+      }
+    else
+      { tsize_t size;
+
+#       ifdef C_LOSSLESS_SUPPORTED
+        if ((size = sp->v_sampling*sp->cinfo.c.data_unit) < 16) size = 16;
+#       else
+        if ((size = sp->v_sampling*DCTSIZE) < 16) size = 16;
+#       endif
+        if (td->td_rowsperstrip < (segment_height = td->td_imagelength))
+          {
+            if (td->td_rowsperstrip % size)
+              {
+                TIFFError(module,"JPEG RowsPerStrip must be multiple of %d",size);
+                status = 0;
+              };
+            segment_height = td->td_rowsperstrip;
+          };
+        segment_width = td->td_imagewidth;
+        sp->bytesperline = tif->tif_scanlinesize;
+      };
+    if (segment_width > 65535 || segment_height > 65535)
+      {
+        TIFFError(module,"Strip/tile too large for JPEG");
+        status = 0;
+      };
+
+ /* Initialize all JPEG parameters to default values.  Note that the JPEG
+    Library's "jpeg_set_defaults()" method needs legal values for the
+    "in_color_space" and "input_components" fields.
+ */
+    sp->cinfo.c.input_components = 1; /* Default for JCS_UNKNOWN */
+    if (!CALLVJPEG(sp,jpeg_set_defaults(&sp->cinfo.c))) status = 0;
+    switch (sp->jpegtablesmode & (JPEGTABLESMODE_HUFF|JPEGTABLESMODE_QUANT))
+      { register JHUFF_TBL *htbl;
+        register JQUANT_TBL *qtbl;
+
+        case 0                                       :
+          sp->cinfo.c.optimize_coding = TRUE;
+        case JPEGTABLESMODE_HUFF                     :
+          if (!CALLVJPEG(sp,jpeg_set_quality(&sp->cinfo.c,sp->jpegquality,FALSE)))
+            return 0;
+          if (qtbl = sp->cinfo.c.quant_tbl_ptrs[0]) qtbl->sent_table = FALSE;
+          if (qtbl = sp->cinfo.c.quant_tbl_ptrs[1]) qtbl->sent_table = FALSE;
+          goto L3;
+        case JPEGTABLESMODE_QUANT                    :
+          sp->cinfo.c.optimize_coding = TRUE;
+
+       /* We do not support application-supplied JPEG tables, so mark the field
+          "not present".
+       */
+      L3: TIFFClrFieldBit(tif,FIELD_JPEGTABLES);
+          break;
+        case JPEGTABLESMODE_HUFF|JPEGTABLESMODE_QUANT:
+          if (   !CALLVJPEG(sp,jpeg_set_quality(&sp->cinfo.c,sp->jpegquality,FALSE))
+              || !CALLVJPEG(sp,jpeg_suppress_tables(&sp->cinfo.c,TRUE))
+             )
+            {
+              status = 0;
+              break;
+            };
+          if (qtbl = sp->cinfo.c.quant_tbl_ptrs[0]) qtbl->sent_table = FALSE;
+          if (htbl = sp->cinfo.c.dc_huff_tbl_ptrs[0]) htbl->sent_table = FALSE;
+          if (htbl = sp->cinfo.c.ac_huff_tbl_ptrs[0]) htbl->sent_table = FALSE;
+          if (sp->cinfo.c.jpeg_color_space == JCS_YCbCr)
+            {
+              if (qtbl = sp->cinfo.c.quant_tbl_ptrs[1])
+                qtbl->sent_table = FALSE;
+              if (htbl = sp->cinfo.c.dc_huff_tbl_ptrs[1])
+                htbl->sent_table = FALSE;
+              if (htbl = sp->cinfo.c.ac_huff_tbl_ptrs[1])
+                htbl->sent_table = FALSE;
+            };
+          if (   TIFFojpeg_tables_dest(sp,tif)
+              && CALLVJPEG(sp,jpeg_write_tables(&sp->cinfo.c))
+             )
+            {
+    
+           /* Mark the field "present".  We can't use "TIFFSetField()" because
+              "BEENWRITING" is already set!
+           */
+              TIFFSetFieldBit(tif,FIELD_JPEGTABLES);
+              tif->tif_flags |= TIFF_DIRTYDIRECT;
+            }
+          else status = 0;
+      };
+    if (   sp->cinfo.c.raw_data_in
+        && !alloc_downsampled_buffers(tif,sp->cinfo.c.comp_info,
+                                      sp->cinfo.c.num_components)
+       ) status = 0;
+    if (status == 0) return 0; /* If TIFF errors, don't bother to continue */
+ /* Grab parameters that are same for all strips/tiles. */
+
+    sp->dest.init_destination = std_init_destination;
+    sp->dest.empty_output_buffer = std_empty_output_buffer;
+    sp->dest.term_destination = std_term_destination;
+    sp->cinfo.c.dest = &sp->dest;
+    sp->cinfo.c.data_precision = td->td_bitspersample;
+    sp->cinfo.c.write_JFIF_header = /* Don't write extraneous markers */
+    sp->cinfo.c.write_Adobe_marker = FALSE;
+    sp->cinfo.c.image_width = segment_width;
+    sp->cinfo.c.image_height = segment_height;
+    sp->cinfo.c.comp_info[0].h_samp_factor =
+    sp->cinfo.c.comp_info[0].v_samp_factor = 1;
+    return CALLVJPEG(sp,jpeg_start_compress(&sp->cinfo.c,FALSE));
+#   undef td
+  }
+
+static int
+OJPEGPreEncode(register TIFF *tif,tsample_t s)
+  { register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+
+ /* If we are about to write the first row of an image plane, which should
+    coincide with a JPEG "scan", reset the JPEG Library's compressor.  Otherwise
+    let the compressor run "as is" and return a "success" status without further
+    ado.
+ */
+    if (     (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip)
+           % td->td_stripsperimage
+        == 0
+       )
+      {
+        if (   (sp->cinfo.c.comp_info[0].component_id = s) == 1)
+            && sp->cinfo.c.jpeg_color_space == JCS_YCbCr
+           )
+          {
+            sp->cinfo.c.comp_info[0].quant_tbl_no =
+            sp->cinfo.c.comp_info[0].dc_tbl_no =
+            sp->cinfo.c.comp_info[0].ac_tbl_no = 1;
+            sp->cinfo.c.comp_info[0].h_samp_factor = sp->h_sampling;
+            sp->cinfo.c.comp_info[0].v_samp_factor = sp->v_sampling;
+    
+         /* Scale expected strip/tile size to match a downsampled component. */
+    
+            sp->cinfo.c.image_width = TIFFhowmany(segment_width,sp->h_sampling);
+            sp->cinfo.c.image_height=TIFFhowmany(segment_height,sp->v_sampling);
+          };
+        sp->scancount = 0; /* Mark subsampling buffer(s) empty */
+      };
+    return 1;
+#   undef td
+  }
+
+static int
+OJPEGPostEncode(register TIFF *tif)
+  { register OJPEGState *sp = OJState(tif);
+
+ /* Finish up at the end of a strip or tile. */
+
+    if (sp->scancount > 0) /* emit partial buffer of down-sampled data */
+      { JDIMENSION n;
+
+#       ifdef C_LOSSLESS_SUPPORTED
+        if (   sp->scancount < sp->cinfo.c.data_unit
+            && sp->cinfo.c.num_components > 0
+           )
+#       else
+        if (sp->scancount < DCTSIZE && sp->cinfo.c.num_components > 0)
+#       endif
+          { int ci = 0,                            /* Pad the data vertically */
+#           ifdef C_LOSSLESS_SUPPORTED
+                size = sp->cinfo.c.data_unit;
+#           else
+                size = DCTSIZE;
+#           endif
+            register jpeg_component_info *compptr = sp->cinfo.c.comp_info;
+
+            do
+#              ifdef C_LOSSLESS_SUPPORTED
+               { tsize_t row_width = compptr->width_in_data_units
+#              else
+                 tsize_t row_width = compptr->width_in_blocks
+#              endif
+                   *size*sizeof(JSAMPLE);
+                 int ypos = sp->scancount*compptr->v_samp_factor;
+
+                 do _TIFFmemcpy( (tdata_t)sp->ds_buffer[ci][ypos]
+                               , (tdata_t)sp->ds_buffer[ci][ypos-1]
+                               , row_width
+                               );
+                 while (++ypos < compptr->v_samp_factor*size);
+               }
+            while (++compptr,++ci < sp->cinfo.c.num_components);
+          };
+        n = sp->cinfo.c.max_v_samp_factor*size;
+        if (CALLJPEG(sp,-1,jpeg_write_raw_data(&sp->cinfo.c,sp->ds_buffer,n)) != n)
+          return 0;
+      };
+    return CALLVJPEG(sp,jpeg_finish_compress(&sp->cinfo.c));
+  }
+#endif /* never */
+
+/* JPEG Decoding begins here. */
+
+/*ARGSUSED*/ static int
+OJPEGDecode(register TIFF *tif,tidata_t buf,tsize_t cc,tsample_t s)
+  { tsize_t bytesperline = isTiled(tif)
+                         ? TIFFTileRowSize(tif)
+                         : tif->tif_scanlinesize,
+            rows;                          /* No. of unprocessed rows in file */
+    register OJPEGState *sp = OJState(tif);
+
+ /* Decode a chunk of pixels, where the input data has not NOT been down-
+    sampled, or else the TIFF Library's client has used the "JPEGColorMode" TIFF
+    pseudo-tag to request that the JPEG Library do color-space conversion; this
+    is the normal case.  The data is expected to be read in scan-line multiples,
+    and this subroutine is called for both pixel-interleaved and separate color
+    planes.
+
+    WARNING:  Unlike "OJPEGDecodeRawContig()", below, the no. of Bytes in each
+              decoded row is calculated here as "bytesperline" instead of
+    using "sp->bytesperline", which might be a little smaller.  This can
+    occur for an old tiled image whose width isn't a multiple of 8 pixels.
+    That's illegal according to the TIFF Version 6 specification, but some
+    test files, like "zackthecat.tif", were built that way.  In those cases,
+    we want to embed the image's true width in our caller's buffer (which is
+    presumably allocated according to the expected tile width) by
+    effectively "padding" it with unused Bytes at the end of each row.
+ */
+    if ( (cc /= bytesperline)      /* No. of complete rows in caller's buffer */
+       > (rows = sp->cinfo.d.output_height - sp->cinfo.d.output_scanline)
+       ) cc = rows;
+    while (--cc >= 0)
+      {
+        if (   CALLJPEG(sp,-1,jpeg_read_scanlines(&sp->cinfo.d,(JSAMPARRAY)&buf,1))
+            != 1
+           ) return 0;
+        buf += bytesperline;
+        ++tif->tif_row;
+      };
+
+ /* BEWARE OF KLUDGE:  If our input file was produced by Microsoft's Wang
+                       Imaging for Windows application, the DC coefficients of
+    each JPEG image component (Y,Cb,Cr) must be reset at the end of each TIFF
+    "strip", and any JPEG data bits remaining in the current Byte of the
+    decoder's input buffer must be discarded.  To do so, we create an "ad hoc"
+    interface in the "jdhuff.c" module of IJG JPEG Library Version 6 (module
+    "jdshuff.c", if Ken Murchison's lossless-Huffman patch is applied), and we
+    invoke that interface here after decoding each "strip".
+ */
+    if (sp->is_WANG) jpeg_reset_huff_decode(&sp->cinfo.d);
+    return 1;
+  }
+
+/*ARGSUSED*/ static int
+OJPEGDecodeRawContig(register TIFF *tif,tidata_t buf,tsize_t cc,tsample_t s)
+  { tsize_t rows;                          /* No. of unprocessed rows in file */
+    JDIMENSION lines_per_MCU, size;
+    register OJPEGState *sp = OJState(tif);
+
+ /* Decode a chunk of pixels, where the input data has pixel-interleaved color
+    planes, some of which have been down-sampled, but the TIFF Library's client
+    has NOT used the "JPEGColorMode" TIFF pseudo-tag to request that the JPEG
+    Library do color-space conversion.  In other words, we must up-sample/
+    expand/duplicate image components according to the image's sampling factors,
+    without changing its color space.  The data is expected to be read in scan-
+    line multiples.
+ */
+    if ( (cc /= sp->bytesperline)  /* No. of complete rows in caller's buffer */
+       > (rows = sp->cinfo.d.output_height - sp->cinfo.d.output_scanline)
+       ) cc = rows;
+    lines_per_MCU = sp->cinfo.d.max_v_samp_factor
+#   ifdef D_LOSSLESS_SUPPORTED
+                  * (size = sp->cinfo.d.min_codec_data_unit);
+#   else
+                  * (size = DCTSIZE);
+#   endif
+    while (--cc >= 0)
+      { int clumpoffset, ci;
+        register jpeg_component_info *compptr;
+
+        if (sp->scancount >= size) /* reload downsampled-data buffers */
+          {
+            if (   CALLJPEG(sp,-1,jpeg_read_raw_data(&sp->cinfo.d,sp->ds_buffer,lines_per_MCU))
+                != lines_per_MCU
+               ) return 0;
+            sp->scancount = 0;
+          };
+
+     /* The fastest way to separate the data is: make 1 pass over the scan
+        line for each row of each component.
+     */
+        clumpoffset = ci = 0;
+        compptr = sp->cinfo.d.comp_info;
+        do
+          { int ypos = 0;
+
+            if (compptr->h_samp_factor == 1) /* fast path */
+              do
+                { register JSAMPLE *inptr =
+                    sp->ds_buffer[ci][sp->scancount*compptr->v_samp_factor+ypos],
+                                   *outptr = (JSAMPLE *)buf + clumpoffset;
+                  register int clumps_per_line = compptr->downsampled_width;
+
+                  do *outptr = *inptr++;
+                  while ((outptr += sp->samplesperclump),--clumps_per_line > 0);
+                }
+              while ( (clumpoffset += compptr->h_samp_factor)
+                    , ++ypos < compptr->v_samp_factor
+                    );
+            else /* general case */
+              do
+                { register JSAMPLE *inptr =
+                    sp->ds_buffer[ci][sp->scancount*compptr->v_samp_factor+ypos],
+                                   *outptr = (JSAMPLE *)buf + clumpoffset;
+                  register int clumps_per_line = compptr->downsampled_width;
+
+                  do
+                    { register int xpos = 0;
+
+                      do outptr[xpos] = *inptr++;
+                      while (++xpos < compptr->h_samp_factor);
+                    }
+                  while ((outptr += sp->samplesperclump),--clumps_per_line > 0);
+                }
+              while ( (clumpoffset += compptr->h_samp_factor)
+                    , ++ypos < compptr->v_samp_factor
+                    );
+          }
+        while (++compptr,++ci < sp->cinfo.d.num_components);
+        ++sp->scancount;
+        buf += sp->bytesperline;
+        ++tif->tif_row;
+      };
+
+ /* BEWARE OF KLUDGE:  If our input file was produced by Microsoft's Wang
+                       Imaging for Windows application, the DC coefficients of
+    each JPEG image component (Y,Cb,Cr) must be reset at the end of each TIFF
+    "strip", and any JPEG data bits remaining in the current Byte of the
+    decoder's input buffer must be discarded.  To do so, we create an "ad hoc"
+    interface in the "jdhuff.c" module of IJG JPEG Library Version 6 (module
+    "jdshuff.c", if Ken Murchison's lossless-Huffman patch is applied), and we
+    invoke that interface here after decoding each "strip".
+ */
+    if (sp->is_WANG) jpeg_reset_huff_decode(&sp->cinfo.d);
+    return 1;
+  }
+
+/*ARGSUSED*/ static int
+OJPEGDecodeRawSeparate(TIFF *tif,register tidata_t buf,tsize_t cc,tsample_t s)
+  { tsize_t rows;                          /* No. of unprocessed rows in file */
+    JDIMENSION lines_per_MCU,
+               size,                                             /* ...of MCU */
+               v;                   /* Component's vertical up-sampling ratio */
+    register OJPEGState *sp = OJState(tif);
+    register jpeg_component_info *compptr = sp->cinfo.d.comp_info + s;
+
+ /* Decode a chunk of pixels, where the input data has separate color planes,
+    some of which have been down-sampled, but the TIFF Library's client has NOT
+    used the "JPEGColorMode" TIFF pseudo-tag to request that the JPEG Library
+    do color-space conversion.  The data is expected to be read in scan-line
+    multiples.
+ */
+    v = sp->cinfo.d.max_v_samp_factor/compptr->v_samp_factor;
+    if ( (cc /= compptr->downsampled_width) /* No. of rows in caller's buffer */
+       > (rows = (sp->cinfo.d.output_height-sp->cinfo.d.output_scanline+v-1)/v)
+       ) cc = rows; /* No. of rows of "clumps" to read */
+    lines_per_MCU = sp->cinfo.d.max_v_samp_factor
+#   ifdef D_LOSSLESS_SUPPORTED
+                  * (size = sp->cinfo.d.min_codec_data_unit);
+#   else
+                  * (size = DCTSIZE);
+#   endif
+ L: if (sp->scancount >= size) /* reload downsampled-data buffers */
+      {
+        if (   CALLJPEG(sp,-1,jpeg_read_raw_data(&sp->cinfo.d,sp->ds_buffer,lines_per_MCU))
+            != lines_per_MCU
+           ) return 0;
+        sp->scancount = 0;
+      };
+    rows = 0;
+    do
+      { register JSAMPLE *inptr =
+          sp->ds_buffer[s][sp->scancount*compptr->v_samp_factor + rows];
+        register int clumps_per_line = compptr->downsampled_width;
+
+        do *buf++ = *inptr++; while (--clumps_per_line > 0); /* Copy scanline */
+        tif->tif_row += v;
+        if (--cc <= 0) return 1; /* End of caller's buffer? */
+      }
+    while (++rows < compptr->v_samp_factor);
+    ++sp->scancount;
+    goto L;
+  }
+
+/* "OJPEGSetupDecode()" temporarily forces the JPEG Library to use the following
+   subroutine as a "dummy" input reader in order to fool the library into
+   thinking that it has read the image's first "Start of Scan" (SOS) marker, so
+   that it initializes accordingly.
+*/
+/*ARGSUSED*/ METHODDEF(int)
+fake_SOS_marker(j_decompress_ptr cinfo){return JPEG_REACHED_SOS;}
+
+/*ARGSUSED*/ METHODDEF(int)
+suspend(j_decompress_ptr cinfo){return JPEG_SUSPENDED;}
+
+/* The JPEG Library's "null" color-space converter actually re-packs separate
+   color planes (it's native image representation) into a pixel-interleaved,
+   contiguous plane.  But if our TIFF Library client is tryng to process a
+   PLANARCONFIG_SEPARATE image, we don't want that; so here are modifications of
+   code in the JPEG Library's "jdcolor.c" file, which simply copy Bytes to a
+   color plane specified by the current JPEG "scan".
+*/
+METHODDEF(void)
+ycc_rgb_convert(register j_decompress_ptr cinfo,JSAMPIMAGE in,JDIMENSION row,
+                register JSAMPARRAY out,register int nrows)
+  { typedef struct                /* "jdcolor.c" color-space conversion state */
+      {
+
+     /* WARNING:  This declaration is ugly and dangerous!  It's supposed to be
+                  private to the JPEG Library's "jdcolor.c" module, but we also
+        need it here.  Since the library's copy might change without notice, be
+        sure to keep this one synchronized or the following code will break!
+     */
+        struct jpeg_color_deconverter pub; /* Public fields */
+     /* Private state for YCC->RGB conversion */
+        int *Cr_r_tab,   /* ->Cr to R conversion table */
+            *Cb_b_tab;   /* ->Cb to B conversion table */
+        INT32 *Cr_g_tab, /* ->Cr to G conversion table */
+              *Cb_g_tab; /* ->Cb to G conversion table */
+      } *my_cconvert_ptr;
+    my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+    JSAMPARRAY irow0p = in[0] + row;
+    register JSAMPLE *range_limit = cinfo->sample_range_limit;
+    register JSAMPROW outp, Y;
+
+    switch (cinfo->output_scan_number - 1)
+      { JSAMPARRAY irow1p, irow2p;
+        register INT32 *table0, *table1;
+        SHIFT_TEMPS
+
+        case RGB_RED  : irow2p = in[2] + row;
+                        table0 = (INT32 *)cconvert->Cr_r_tab;
+                        while (--nrows >= 0)
+                          { register JSAMPROW Cr = *irow2p++;
+                             register int i = cinfo->output_width;
+
+                             Y = *irow0p++;
+                             outp = *out++;
+                             while (--i >= 0)
+                               *outp++ = range_limit[*Y++ + table0[*Cr++]];
+                          };
+                        return;
+        case RGB_GREEN: irow1p = in[1] + row;
+                        irow2p = in[2] + row;
+                        table0 = cconvert->Cb_g_tab;
+                        table1 = cconvert->Cr_g_tab;
+                        while (--nrows >= 0)
+                          { register JSAMPROW Cb = *irow1p++,
+                                              Cr = *irow2p++;
+                             register int i = cinfo->output_width;
+
+                             Y = *irow0p++;
+                             outp = *out++;
+                             while (--i >= 0)
+                               *outp++ =
+                                 range_limit[ *Y++
+                                            + RIGHT_SHIFT(table0[*Cb++]+table1[*Cr++],16)
+                                            ];
+                          };
+                        return;
+        case RGB_BLUE : irow1p = in[1] + row;
+                        table0 = (INT32 *)cconvert->Cb_b_tab;
+                        while (--nrows >= 0)
+                          { register JSAMPROW Cb = *irow1p++;
+                             register int i = cinfo->output_width;
+
+                             Y = *irow0p++;
+                             outp = *out++;
+                             while (--i >= 0)
+                               *outp++ = range_limit[*Y++ + table0[*Cb++]];
+                          }
+      }
+  }
+
+METHODDEF(void)
+null_convert(register j_decompress_ptr cinfo,JSAMPIMAGE in,JDIMENSION row,
+             register JSAMPARRAY out,register int nrows)
+  { register JSAMPARRAY irowp = in[cinfo->output_scan_number - 1] + row;
+
+    while (--nrows >= 0) _TIFFmemcpy(*out++,*irowp++,cinfo->output_width);
+  }
+
+static int
+OJPEGSetupDecode(register TIFF *tif)
+  { static char module[]={"OJPEGSetupDecode"};
+    J_COLOR_SPACE jpeg_color_space,   /* Color space of JPEG-compressed image */
+                  out_color_space;       /* Color space of decompressed image */
+    uint32 segment_width;
+    int status = 1;                              /* Assume success by default */
+    boolean downsampled_output=FALSE, /* <=> Want JPEG Library's "raw" image? */
+            is_JFIF;                                       /* <=> JFIF image? */
+    register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+
+ /* Verify miscellaneous parameters.  This will need work if the TIFF Library
+    ever supports different depths for different components, or if the JPEG
+    Library ever supports run-time depth selection.  Neither seems imminent.
+ */
+    if (td->td_bitspersample != sp->cinfo.d.data_precision)
+      {
+        TIFFError(module,bad_bps,td->td_bitspersample);
+        status = 0;
+      };
+
+ /* The TIFF Version 6.0 specification and IJG JPEG Library accept different
+    sets of color spaces, so verify that our image belongs to the common subset
+    and map its photometry code, then initialize to handle subsampling and
+    optional JPEG Library YCbCr <-> RGB color-space conversion.
+ */
+    switch (td->td_photometric)
+      {
+        case PHOTOMETRIC_YCBCR     :
+
+       /* ISO IS 10918-1 requires that JPEG subsampling factors be 1-4, but
+          TIFF Version 6.0 is more restrictive: only 1, 2, and 4 are allowed.
+       */
+          if (   (   td->td_ycbcrsubsampling[0] == 1
+                  || td->td_ycbcrsubsampling[0] == 2
+                  || td->td_ycbcrsubsampling[0] == 4
+                 )
+              && (   td->td_ycbcrsubsampling[1] == 1
+                  || td->td_ycbcrsubsampling[1] == 2
+                  || td->td_ycbcrsubsampling[1] == 4
+                 )
+             )
+            downsampled_output =
+              (
+                (sp->h_sampling = td->td_ycbcrsubsampling[0]) << 3
+              | (sp->v_sampling = td->td_ycbcrsubsampling[1])
+              ) != 011;
+          else
+            {
+              TIFFError(module,bad_subsampling);
+              status = 0;
+            };
+          jpeg_color_space = JCS_YCbCr;
+          if (sp->jpegcolormode == JPEGCOLORMODE_RGB)
+            {
+              downsampled_output = FALSE;
+              out_color_space = JCS_RGB;
+              break;
+            };
+          goto L2;
+        case PHOTOMETRIC_MINISBLACK:
+          jpeg_color_space = JCS_GRAYSCALE;
+          goto L1;
+        case PHOTOMETRIC_RGB       :
+          jpeg_color_space = JCS_RGB;
+          goto L1;
+        case PHOTOMETRIC_SEPARATED :
+          jpeg_color_space = JCS_CMYK;
+      L1: sp->jpegcolormode = JPEGCOLORMODE_RAW; /* No JPEG Lib. conversion */
+      L2: out_color_space = jpeg_color_space;
+          break;
+        default                    :
+          TIFFError(module,bad_photometry,td->td_photometric);
+          status = 0;
+      };
+    if (status == 0) return 0; /* If TIFF errors, don't bother to continue */
+
+ /* Set parameters that are same for all strips/tiles. */
+
+    sp->cinfo.d.src = &sp->src;
+    sp->src.init_source = std_init_source;
+    sp->src.fill_input_buffer = std_fill_input_buffer;
+    sp->src.skip_input_data = std_skip_input_data;
+    sp->src.resync_to_restart = jpeg_resync_to_restart;
+    sp->src.term_source = std_term_source;
+
+ /* BOGOSITY ALERT!  The Wang Imaging application for Microsoft Windows produces
+                     images containing "JPEGInterchangeFormat[Length]" TIFF
+    records that resemble JFIF-in-TIFF encapsulations but, in fact, violate the
+    TIFF Version 6 specification in several ways; nevertheless, we try to handle
+    them gracefully because there are apparently a lot of them around.  The
+    purported "JFIF" data stream in one of these files vaguely resembles a JPEG
+    "tables only" data stream, except that there's no trailing EOI marker.  The
+    rest of the JPEG data stream lies in a discontiguous file region, identified
+    by the 0th Strip offset (which is *also* illegal!), where it begins with an
+    SOS marker and apparently continues to the end of the file.  There is no
+    trailing EOI marker here, either.
+ */
+    is_JFIF = !sp->is_WANG && TIFFFieldSet(tif,FIELD_JPEGIFOFFSET);
+
+ /* Initialize decompression parameters that won't be overridden by JPEG Library
+    defaults set during the "jpeg_read_header()" call, below.
+ */
+    segment_width = td->td_imagewidth;
+    if (isTiled(tif))
+      {
+        if (sp->is_WANG) /* we don't know how to handle it */
+          {
+            TIFFError(module,"Tiled Wang image not supported");
+            return 0;
+          };
+
+     /* BOGOSITY ALERT!  "TIFFTileRowSize()" seems to work fine for modern JPEG-
+                         in-TIFF encapsulations where the image width--like the
+        tile width--is a multiple of 8 or 16 pixels.  But image widths and
+        heights are aren't restricted to 8- or 16-bit multiples, and we need
+        the exact Byte count of decompressed scan lines when we call the JPEG
+        Library.  At least one old file ("zackthecat.tif") in the TIFF Library
+        test suite has widths and heights slightly less than the tile sizes, and
+        it apparently used the bogus computation below to determine the number
+        of Bytes per scan line (was this due to an old, broken version of
+        "TIFFhowmany()"?).  Before we get here, "OJPEGSetupDecode()" verified
+        that our image uses 8-bit samples, so the following check appears to
+        return the correct answer in all known cases tested to date.
+     */
+        if (is_JFIF || (segment_width & 7) == 0)
+          sp->bytesperline = TIFFTileRowSize(tif); /* Normal case */
+        else
+          {
+            /* Was the file-encoder's segment-width calculation bogus? */
+            segment_width = (segment_width/sp->h_sampling + 1) * sp->h_sampling;
+            sp->bytesperline = segment_width * td->td_samplesperpixel;
+          }
+      }
+    else sp->bytesperline = TIFFVStripSize(tif,1);
+
+ /* BEWARE OF KLUDGE:  If we have JPEG Interchange File Format (JFIF) image,
+                       then we want to read "metadata" in the bit-stream's
+    header and validate it against corresponding information in TIFF records.
+    But if we have a *really old* JPEG file that's not JFIF, then we simply
+    assign TIFF-record values to JPEG Library variables without checking.
+ */
+    if (is_JFIF) /* JFIF image */
+      { unsigned char *end_of_data;
+        int subsampling_factors;
+        register unsigned char *p;
+        register int i;
+
+     /* WARNING:  Although the image file contains a JFIF bit stream, it might
+                  also contain some old TIFF records causing "OJPEGVSetField()"
+        to have allocated quantization or Huffman decoding tables.  But when the
+        JPEG Library reads and parses the JFIF header below, it reallocate these
+        tables anew without checking for "dangling" pointers, thereby causing a
+        memory "leak".  We have enough information to potentially deallocate the
+        old tables here, but unfortunately JPEG Library Version 6B uses a "pool"
+        allocator for small objects, with no deallocation procedure; instead, it
+        reclaims a whole pool when an image is closed/destroyed, so well-behaved
+        TIFF client applications (i.e., those which close their JPEG images as
+        soon as they're no longer needed) will waste memory for a short time but
+        recover it eventually.  But ill-behaved TIFF clients (i.e., those which
+        keep many JPEG images open gratuitously) can exhaust memory prematurely.
+        If the JPEG Library ever implements a deallocation procedure, insert
+        this clean-up code:
+     */
+#       ifdef someday
+        if (sp->jpegtablesmode & JPEGTABLESMODE_QUANT) /* free quant. tables */
+          { register int i = 0;
+
+            do
+              { register JQUANT_TBL *q;
+
+                if (q = sp->cinfo.d.quant_tbl_ptrs[i])
+                  {
+                    jpeg_free_small(&sp->cinfo.comm,q,sizeof *q);
+                    sp->cinfo.d.quant_tbl_ptrs[i] = 0;
+                  }
+              }
+            while (++i < NUM_QUANT_TBLS);
+          };
+        if (sp->jpegtablesmode & JPEGTABLESMODE_HUFF) /* free Huffman tables */
+          { register int i = 0;
+
+            do
+              { register JHUFF_TBL *h;
+
+                if (h = sp->cinfo.d.dc_huff_tbl_ptrs[i])
+                  {
+                    jpeg_free_small(&sp->cinfo.comm,h,sizeof *h);
+                    sp->cinfo.d.dc_huff_tbl_ptrs[i] = 0;
+                  };
+                if (h = sp->cinfo.d.ac_huff_tbl_ptrs[i])
+                  {
+                    jpeg_free_small(&sp->cinfo.comm,h,sizeof *h);
+                    sp->cinfo.d.ac_huff_tbl_ptrs[i] = 0;
+                  }
+              }
+            while (++i < NUM_HUFF_TBLS);
+          };
+#       endif /* someday */
+
+     /* Since we might someday wish to try rewriting "old format" JPEG-in-TIFF
+        encapsulations in "new format" files, try to synthesize the value of a
+        modern "JPEGTables" TIFF record by scanning the JPEG data from just past
+        the "Start of Information" (SOI) marker until something other than a
+        legitimate "table" marker is found, as defined in ISO IS 10918-1
+        Appending B.2.4; namely:
+
+        -- Define Quantization Table (DQT)
+        -- Define Huffman Table (DHT)
+        -- Define Arithmetic Coding table (DAC)
+        -- Define Restart Interval (DRI)
+        -- Comment (COM)
+        -- Application data (APPn)
+
+        For convenience, we also accept "Expansion" (EXP) markers, although they
+        are apparently not a part of normal "table" data.
+     */
+        sp->jpegtables = p = (unsigned char *)sp->src.next_input_byte;
+        end_of_data = p + sp->src.bytes_in_buffer;
+        p += 2;
+        while (p < end_of_data && p[0] == 0xFF)
+          switch (p[1])
+            {
+              default  : goto L;
+              case 0xC0: /* SOF0  */
+              case 0xC1: /* SOF1  */
+              case 0xC2: /* SOF2  */
+              case 0xC3: /* SOF3  */
+              case 0xC4: /* DHT   */
+              case 0xC5: /* SOF5  */
+              case 0xC6: /* SOF6  */
+              case 0xC7: /* SOF7  */
+              case 0xC9: /* SOF9  */
+              case 0xCA: /* SOF10 */
+              case 0xCB: /* SOF11 */
+              case 0xCC: /* DAC   */
+              case 0xCD: /* SOF13 */
+              case 0xCE: /* SOF14 */
+              case 0xCF: /* SOF15 */
+              case 0xDB: /* DQT   */
+              case 0xDD: /* DRI   */
+              case 0xDF: /* EXP   */
+              case 0xE0: /* APP0  */
+              case 0xE1: /* APP1  */
+              case 0xE2: /* APP2  */
+              case 0xE3: /* APP3  */
+              case 0xE4: /* APP4  */
+              case 0xE5: /* APP5  */
+              case 0xE6: /* APP6  */
+              case 0xE7: /* APP7  */
+              case 0xE8: /* APP8  */
+              case 0xE9: /* APP9  */
+              case 0xEA: /* APP10 */
+              case 0xEB: /* APP11 */
+              case 0xEC: /* APP12 */
+              case 0xED: /* APP13 */
+              case 0xEE: /* APP14 */
+              case 0xEF: /* APP15 */
+              case 0xFE: /* COM   */
+                         p += (p[2] << 8 | p[3]) + 2;
+            };
+     L: if (p - (unsigned char *)sp->jpegtables > 2) /* fake "JPEGTables" */
+          {
+
+         /* In case our client application asks, pretend that this image file
+            contains a modern "JPEGTables" TIFF record by copying to a buffer
+            the initial part of the JFIF bit-stream that we just scanned, from
+            the SOI marker through the "metadata" tables, then append an EOI
+            marker and flag the "JPEGTables" TIFF record as "present".
+         */
+            sp->jpegtables_length = p - (unsigned char*)sp->jpegtables + 2;
+            p = sp->jpegtables;
+            if (!(sp->jpegtables = _TIFFmalloc(sp->jpegtables_length)))
+              {
+                TIFFError(module,no_jtable_space);
+                return 0;
+              };
+            _TIFFmemcpy(sp->jpegtables,p,sp->jpegtables_length-2);
+            p = (unsigned char *)sp->jpegtables + sp->jpegtables_length;
+            p[-2] = 0xFF; p[-1] = JPEG_EOI; /* Append EOI marker */
+            TIFFSetFieldBit(tif,FIELD_JPEGTABLES);
+            tif->tif_flags |= TIFF_DIRTYDIRECT;
+          }
+        else sp->jpegtables = 0; /* Don't simulate "JPEGTables" */
+        if (   CALLJPEG(sp,-1,jpeg_read_header(&sp->cinfo.d,TRUE))
+            != JPEG_HEADER_OK
+           ) return 0;
+        if (   sp->cinfo.d.image_width  != segment_width
+            || sp->cinfo.d.image_height != td->td_imagelength 
+           )
+          {
+            TIFFError(module,"Improper JPEG strip/tile size");
+            return 0;
+          };
+        if (sp->cinfo.d.num_components != td->td_samplesperpixel)
+          {
+            TIFFError(module,"Improper JPEG component count");
+            return 0;
+          };
+        if (sp->cinfo.d.data_precision != td->td_bitspersample)
+          {
+            TIFFError(module,"Improper JPEG data precision");
+            return 0;
+          };
+
+     /* Check that JPEG image components all have the same subsampling factors
+        declared (or defaulted) in the TIFF file, since TIFF Version 6.0 is more
+        restrictive than JPEG:  Only the 0th component may have horizontal and
+        vertical subsampling factors other than <1,1>.
+     */
+        subsampling_factors = sp->h_sampling << 3 | sp->v_sampling;
+        i = 0;
+        do
+          {
+            if (   ( sp->cinfo.d.comp_info[i].h_samp_factor << 3
+                   | sp->cinfo.d.comp_info[i].v_samp_factor
+                   )
+                != subsampling_factors
+               )
+              {
+                TIFFError(module,"Improper JPEG subsampling factors");
+                return 0;
+              };
+            subsampling_factors = 011; /* Required for image components > 0 */
+          }
+        while (++i < sp->cinfo.d.num_components);
+      }
+    else /* not JFIF image */
+      { int (*save)(j_decompress_ptr cinfo) = sp->cinfo.d.marker->read_markers;
+        register int i;
+
+     /* We're not assuming that this file's JPEG bit stream has any header
+        "metadata", so fool the JPEG Library into thinking that we read a
+        "Start of Input" (SOI) marker and a "Start of Frame" (SOFx) marker, then
+        force it to read a simulated "Start of Scan" (SOS) marker when we call
+        "jpeg_read_header()" below.  This should cause the JPEG Library to
+        establish reasonable defaults.
+     */
+        sp->cinfo.d.marker->saw_SOI =       /* Pretend we saw SOI marker */
+        sp->cinfo.d.marker->saw_SOF = TRUE; /* Pretend we saw SOF marker */
+        sp->cinfo.d.marker->read_markers =
+          sp->is_WANG ? suspend : fake_SOS_marker;
+        sp->cinfo.d.global_state = DSTATE_INHEADER;
+        sp->cinfo.d.Se = DCTSIZE2-1; /* Suppress JPEG Library warning */
+        sp->cinfo.d.image_width  = segment_width;
+        sp->cinfo.d.image_height = td->td_imagelength;
+
+     /* The following color-space initialization, including the complicated
+        "switch"-statement below, essentially duplicates the logic used by the
+        JPEG Library's "jpeg_init_colorspace()" subroutine during compression.
+     */
+        sp->cinfo.d.num_components = td->td_samplesperpixel;
+        sp->cinfo.d.comp_info = (jpeg_component_info *)
+          (*sp->cinfo.d.mem->alloc_small)
+            ( &sp->cinfo.comm
+            , JPOOL_IMAGE
+            , sp->cinfo.d.num_components * sizeof *sp->cinfo.d.comp_info
+            );
+        i = 0;
+        do
+          {
+            sp->cinfo.d.comp_info[i].component_index = i;
+            sp->cinfo.d.comp_info[i].component_needed = TRUE;
+            sp->cinfo.d.cur_comp_info[i] = &sp->cinfo.d.comp_info[i];
+          }
+        while (++i < sp->cinfo.d.num_components);
+        switch (jpeg_color_space)
+          {
+            case JCS_UNKNOWN  :
+              i = 0;
+              do
+                {
+                  sp->cinfo.d.comp_info[i].component_id = i;
+                  sp->cinfo.d.comp_info[i].h_samp_factor =
+                  sp->cinfo.d.comp_info[i].v_samp_factor = 1;
+                }
+              while (++i < sp->cinfo.d.num_components);
+              break;
+            case JCS_GRAYSCALE:
+              sp->cinfo.d.comp_info[0].component_id =
+              sp->cinfo.d.comp_info[0].h_samp_factor =
+              sp->cinfo.d.comp_info[0].v_samp_factor = 1;
+              break;
+            case JCS_RGB      :
+              sp->cinfo.d.comp_info[0].component_id = 'R';
+              sp->cinfo.d.comp_info[1].component_id = 'G';
+              sp->cinfo.d.comp_info[2].component_id = 'B';
+              i = 0;
+              do sp->cinfo.d.comp_info[i].h_samp_factor =
+                 sp->cinfo.d.comp_info[i].v_samp_factor = 1;
+              while (++i < sp->cinfo.d.num_components);
+              break;
+            case JCS_CMYK     :
+              sp->cinfo.d.comp_info[0].component_id = 'C';
+              sp->cinfo.d.comp_info[1].component_id = 'M';
+              sp->cinfo.d.comp_info[2].component_id = 'Y';
+              sp->cinfo.d.comp_info[3].component_id = 'K';
+              i = 0;
+              do sp->cinfo.d.comp_info[i].h_samp_factor =
+                 sp->cinfo.d.comp_info[i].v_samp_factor = 1;
+              while (++i < sp->cinfo.d.num_components);
+              break;
+            case JCS_YCbCr    :
+              i = 0;
+              do
+                {
+                  sp->cinfo.d.comp_info[i].component_id = i+1;
+                  sp->cinfo.d.comp_info[i].h_samp_factor =
+                  sp->cinfo.d.comp_info[i].v_samp_factor = 1;
+                  sp->cinfo.d.comp_info[i].quant_tbl_no =
+                  sp->cinfo.d.comp_info[i].dc_tbl_no =
+                  sp->cinfo.d.comp_info[i].ac_tbl_no = i > 0;
+                }
+              while (++i < sp->cinfo.d.num_components);
+              sp->cinfo.d.comp_info[0].h_samp_factor = sp->h_sampling;
+              sp->cinfo.d.comp_info[0].v_samp_factor = sp->v_sampling;
+          };
+        sp->cinfo.d.comps_in_scan = td->td_planarconfig == PLANARCONFIG_CONTIG
+                                  ? sp->cinfo.d.num_components
+                                  : 1;
+        i = CALLJPEG(sp,-1,jpeg_read_header(&sp->cinfo.d,!sp->is_WANG));
+        sp->cinfo.d.marker->read_markers = save; /* Restore input method */
+        if (sp->is_WANG) /* produced by Wang Imaging on Microsoft Windows */
+          {
+            if (i != JPEG_SUSPENDED) return 0;
+
+         /* BOGOSITY ALERT!  Files prooduced by the Wang Imaging application for
+                             Microsoft Windows are a special--and, technically
+            illegal--case.  A JPEG SOS marker and rest of the data stream should
+            be located at the end of the file, in a position identified by the
+            0th Strip offset.
+         */
+            i = td->td_nstrips - 1;
+            sp->src.next_input_byte = tif->tif_base + td->td_stripoffset[0];
+            sp->src.bytes_in_buffer = td->td_stripoffset[i] -
+              td->td_stripoffset[0] + td->td_stripbytecount[i];
+            i = CALLJPEG(sp,-1,jpeg_read_header(&sp->cinfo.d,TRUE));
+          };
+        if (i != JPEG_HEADER_OK) return 0;
+      };
+
+ /* Some of our initialization must wait until the JPEG Library is initialized
+    above, in order to override its defaults.
+ */
+    if (   (sp->cinfo.d.raw_data_out = downsampled_output)
+        && !alloc_downsampled_buffers(tif,sp->cinfo.d.comp_info,
+                                      sp->cinfo.d.num_components)
+       ) return 0;
+    sp->cinfo.d.jpeg_color_space = jpeg_color_space;
+    sp->cinfo.d.out_color_space = out_color_space;
+    sp->cinfo.d.dither_mode = JDITHER_NONE; /* Reduce image "noise" */
+    sp->cinfo.d.two_pass_quantize = FALSE;
+
+ /* If the image consists of separate, discontiguous TIFF "samples" (= color
+    planes, hopefully = JPEG "scans"), then we must use the JPEG Library's
+    "buffered image" mode to decompress the entire image into temporary buffers,
+    because the JPEG Library must parse the entire JPEG bit-stream in order to
+    be satsified that it has a complete set of color components for each pixel,
+    but the TIFF Library must allow our client to extract 1 component at a time.
+    Initializing the JPEG Library's "buffered image" mode is tricky:  First, we
+    start its decompressor, then we tell the decompressor to "consume" (i.e.,
+    buffer) the entire bit-stream.
+
+    WARNING:  Disabling "fancy" up-sampling seems to slightly reduce "noise" for
+              certain old Wang Imaging files, but it absolutely *must* be
+    enabled if the image has separate color planes, since in that case, the JPEG
+    Library doesn't use an "sp->cinfo.d.cconvert" structure (so de-referencing
+    this pointer below will cause a fatal crash) but writing our own code to up-
+    sample separate color planes is too much work for right now.  Maybe someday?
+ */
+    sp->cinfo.d.do_fancy_upsampling = /* Always let this default (to TRUE)? */
+    sp->cinfo.d.buffered_image = td->td_planarconfig == PLANARCONFIG_SEPARATE;
+    if (!CALLJPEG(sp,0,jpeg_start_decompress(&sp->cinfo.d))) return 0;
+    if (sp->cinfo.d.buffered_image) /* separate color planes */
+      {
+        if (sp->cinfo.d.raw_data_out)
+          tif->tif_decoderow = tif->tif_decodestrip = tif->tif_decodetile =
+            OJPEGDecodeRawSeparate;
+        else
+          {
+            tif->tif_decoderow = tif->tif_decodestrip = tif->tif_decodetile =
+              OJPEGDecode;
+
+         /* In JPEG Library Version 6B, color-space conversion isn't implemented
+            for separate color planes, so we must do it ourself if our TIFF
+            client doesn't want to:
+         */
+            sp->cinfo.d.cconvert->color_convert =
+              sp->cinfo.d.jpeg_color_space == sp->cinfo.d.out_color_space
+              ? null_convert : ycc_rgb_convert;
+          };
+    L3: switch (CALLJPEG(sp,0,jpeg_consume_input(&sp->cinfo.d)))
+          {
+            default              : goto L3;
+
+         /* If no JPEG "End of Information" (EOI) marker is found when bit-
+            stream parsing ends, check whether we have enough data to proceed
+            before reporting an error.
+         */
+            case JPEG_SUSPENDED  : if (  sp->cinfo.d.input_scan_number
+                                        *sp->cinfo.d.image_height
+                                       + sp->cinfo.d.input_iMCU_row
+                                        *sp->cinfo.d.max_v_samp_factor
+#                                       ifdef D_LOSSLESS_SUPPORTED
+                                        *sp->cinfo.d.data_units_in_MCU
+                                        *sp->cinfo.d.min_codec_data_unit
+#                                       else
+                                        *sp->cinfo.d.blocks_in_MCU
+                                        *DCTSIZE
+#                                       endif
+                                      < td->td_samplesperpixel
+                                       *sp->cinfo.d.image_height
+                                      )
+                                     {
+                                       TIFFError(tif->tif_name,
+                                         "Premature end of JPEG bit-stream");
+                                       return 0;
+                                     }
+            case JPEG_REACHED_EOI: ;
+          }
+      }
+    else /* pixel-interleaved color planes */
+      tif->tif_decoderow = tif->tif_decodestrip = tif->tif_decodetile =
+        downsampled_output ? OJPEGDecodeRawContig : OJPEGDecode;
+    return 1;
+#   undef td
+  }
+
+static int
+OJPEGPreDecode(register TIFF *tif,tsample_t s)
+  { register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+
+ /* If we are about to read the first row of an image plane (hopefully, these
+    are coincident with JPEG "scans"!), reset the JPEG Library's decompressor
+    appropriately.  Otherwise, let the decompressor run "as is" and return a
+    "success" status without further ado.
+ */
+    if (     (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip)
+           % td->td_stripsperimage
+        == 0
+       )
+      {
+        if (   sp->cinfo.d.buffered_image
+            && !CALLJPEG(sp,0,jpeg_start_output(&sp->cinfo.d,s+1))
+           ) return 0;
+        sp->cinfo.d.output_scanline = 0;
+
+     /* Mark subsampling buffers "empty". */
+
+#       ifdef D_LOSSLESS_SUPPORTED
+        sp->scancount = sp->cinfo.d.min_codec_data_unit;
+#       else
+        sp->scancount = DCTSIZE;
+#       endif
+      };
+    return 1;
+#   undef td
+  }
+
+/*ARGSUSED*/ static void
+OJPEGPostDecode(register TIFF *tif,tidata_t buf,tsize_t cc)
+  { register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+
+ /* The JPEG Library decompressor has reached the end of a strip/tile.  If this
+    is the end of a TIFF image "sample" (= JPEG "scan") in a file with separate
+    components (color planes), then end the "scan".  If it ends the image's last
+    sample/scan, then also stop the JPEG Library's decompressor.
+ */
+    if (sp->cinfo.d.output_scanline >= sp->cinfo.d.output_height)
+      {
+        if (sp->cinfo.d.buffered_image)
+          CALLJPEG(sp,-1,jpeg_finish_output(&sp->cinfo.d)); /* End JPEG scan */
+        if (   (isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip)
+            >= td->td_nstrips-1
+           ) CALLJPEG(sp,0,jpeg_finish_decompress(&sp->cinfo.d));
+      }
+#   undef td
+  }
+
+static int
+OJPEGVSetField(register TIFF *tif,ttag_t tag,va_list ap)
+{
+    uint32 v32;
+    register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+    toff_t tiffoff=0;
+    uint32 bufoff=0;
+    uint32 code_count=0;
+    int i2=0;
+    int k2=0;
+
+    switch (tag)
+      {
+        default                            : return
+                                               (*sp->vsetparent)(tif,tag,ap);
+
+     /* BEWARE OF KLUDGE:  Some old-format JPEG-in-TIFF files, including those
+                           produced by the Wang Imaging application for Micro-
+        soft Windows, illegally omit a "ReferenceBlackWhite" TIFF tag, even
+        though the TIFF specification's default is intended for the RGB color
+        space and is inappropriate for the YCbCr color space ordinarily used for
+        JPEG images.  Since many TIFF client applications request the value of
+        this tag immediately after a TIFF image directory is parsed, and before
+        any other code in this module receives control, we are forced to fix
+        this problem very early in image-file processing.  Fortunately, legal
+        TIFF files are supposed to store their tags in numeric order, so a
+        mandatory "PhotometricInterpretation" tag should always appear before
+        an optional "ReferenceBlackWhite" tag.  Hence, we slyly peek ahead when
+        we discover the desired photometry, by installing modified black and
+        white reference levels.
+     */
+        case TIFFTAG_PHOTOMETRIC           :
+          if (   (v32 = (*sp->vsetparent)(tif,tag,ap))
+              && td->td_photometric == PHOTOMETRIC_YCBCR
+             )
+	  {
+		float *ref;
+		if (!TIFFGetField(tif, TIFFTAG_REFERENCEBLACKWHITE, &ref)) {
+			float refbw[6];
+			long top = 1L << td->td_bitspersample;
+			refbw[0] = 0;
+			refbw[1] = (float)(top-1L);
+			refbw[2] = (float)(top>>1);
+			refbw[3] = refbw[1];
+			refbw[4] = refbw[2];
+			refbw[5] = refbw[1];
+			TIFFSetField(tif, TIFFTAG_REFERENCEBLACKWHITE, refbw);
+		}
+	  }
+          return v32;
+
+     /* BEWARE OF KLUDGE:  According to Charles Auer <Bumble731@msn.com>, if our
+                           input is a multi-image (multi-directory) JPEG-in-TIFF
+        file is produced by the Wang Imaging application on Microsoft Windows,
+        for some reason the first directory excludes the vendor-specific "WANG
+        PageControl" tag (32934) that we check below, so the only other way to
+        identify these directories is apparently to look for a software-
+        identification tag with the substring, "Wang Labs".  Single-image files
+        can apparently pass both tests, which causes no harm here, but what a
+        mess this is!
+     */
+        case TIFFTAG_SOFTWARE              :
+        {
+            char *software;
+
+            v32 = (*sp->vsetparent)(tif,tag,ap);
+            if( TIFFGetField( tif, TIFFTAG_SOFTWARE, &software )
+                && strstr( software, "Wang Labs" ) )
+                sp->is_WANG = 1;
+            return v32;
+        }
+
+        case TIFFTAG_JPEGPROC              :
+        case TIFFTAG_JPEGIFOFFSET          :
+        case TIFFTAG_JPEGIFBYTECOUNT       :
+        case TIFFTAG_JPEGRESTARTINTERVAL   :
+        case TIFFTAG_JPEGLOSSLESSPREDICTORS:
+        case TIFFTAG_JPEGPOINTTRANSFORM    :
+        case TIFFTAG_JPEGQTABLES           :
+        case TIFFTAG_JPEGDCTABLES          :
+        case TIFFTAG_JPEGACTABLES          :
+        case TIFFTAG_WANG_PAGECONTROL      :
+        case TIFFTAG_JPEGCOLORMODE         : ;
+      };
+    v32 = va_arg(ap,uint32); /* No. of values in this TIFF record */
+
+    /* This switch statement is added for OJPEGVSetField */
+    if(v32 !=0){
+        switch(tag){
+            case TIFFTAG_JPEGPROC:
+                sp->jpegproc=v32;
+                break;
+            case TIFFTAG_JPEGIFOFFSET:
+                sp->jpegifoffset=v32;
+		break;
+            case TIFFTAG_JPEGIFBYTECOUNT:
+		sp->jpegifbytecount=v32;
+		break;
+            case TIFFTAG_JPEGRESTARTINTERVAL:
+		sp->jpegrestartinterval=v32;
+		break;
+            case TIFFTAG_JPEGLOSSLESSPREDICTORS:
+		sp->jpeglosslesspredictors_length=v32;
+		break;
+            case TIFFTAG_JPEGPOINTTRANSFORM:
+		sp->jpegpointtransform_length=v32;
+		break;
+            case TIFFTAG_JPEGQTABLES:
+		sp->jpegqtables_length=v32;
+		break;
+            case TIFFTAG_JPEGACTABLES:
+		sp->jpegactables_length=v32;
+		break;
+            case TIFFTAG_JPEGDCTABLES:
+		sp->jpegdctables_length=v32;
+		break;
+            default:
+		break;
+        }
+    }
+
+ /* BEWARE:  The following actions apply only if we are reading a "source" TIFF
+             image to be decompressed for a client application program.  If we
+    ever enhance this file's CODEC to write "destination" JPEG-in-TIFF images,
+    we'll need an "if"- and another "switch"-statement below, because we'll
+    probably want to store these records' values in some different places.  Most
+    of these need not be parsed here in order to decode JPEG bit stream, so we
+    set boolean flags to note that they have been seen, but we otherwise ignore
+    them.
+ */
+    switch (tag)
+      { JHUFF_TBL **h;
+
+     /* Validate the JPEG-process code. */
+
+        case TIFFTAG_JPEGPROC              :
+          switch (v32)
+            {
+              default               : TIFFError(tif->tif_name,
+                                        "Unknown JPEG process");
+                                      return 0;
+#             ifdef C_LOSSLESS_SUPPORTED
+
+           /* Image uses (lossy) baseline sequential coding. */
+
+              case JPEGPROC_BASELINE: sp->cinfo.d.process = JPROC_SEQUENTIAL;
+                                      sp->cinfo.d.data_unit = DCTSIZE;
+                                      break;
+
+           /* Image uses (lossless) Huffman coding. */
+
+              case JPEGPROC_LOSSLESS: sp->cinfo.d.process = JPROC_LOSSLESS;
+                                      sp->cinfo.d.data_unit = 1;
+#             else /* not C_LOSSLESS_SUPPORTED */
+              case JPEGPROC_LOSSLESS: TIFFError(JPEGLib_name,
+                                        "Does not support lossless Huffman coding");
+                                      return 0;
+              case JPEGPROC_BASELINE: ;
+#             endif /* C_LOSSLESS_SUPPORTED */
+            };
+          break;
+
+     /* The TIFF Version 6.0 specification says that if the value of a TIFF
+        "JPEGInterchangeFormat" record is 0, then we are to behave as if this
+        record were absent; i.e., the data does *not* represent a JPEG Inter-
+        change Format File (JFIF), so don't even set the boolean "I've been
+        here" flag below.  Otherwise, the field's value represents the file
+        offset of the JPEG SOI marker.
+     */
+        case TIFFTAG_JPEGIFOFFSET          :
+          if (v32)
+            {
+              sp->src.next_input_byte = tif->tif_base + v32;
+              break;
+            };
+          return 1;
+        case TIFFTAG_JPEGIFBYTECOUNT       :
+          sp->src.bytes_in_buffer = v32;
+          break;
+
+     /* The TIFF Version 6.0 specification says that if the JPEG "Restart"
+        marker interval is 0, then the data has no "Restart" markers; i.e., we
+        must behave as if this TIFF record were absent.  So, don't even set the
+        boolean "I've been here" flag below.
+     */
+     /*
+      * Instead, set the field bit so TIFFGetField can get whether or not
+      * it was set.
+      */
+        case TIFFTAG_JPEGRESTARTINTERVAL   :
+          if (v32)
+              sp->cinfo.d.restart_interval = v32;
+              break;
+     /* The TIFF Version 6.0 specification says that this tag is supposed to be
+        a vector containing a value for each image component, but for lossless
+        Huffman coding (the only JPEG process defined by the specification for
+        which this tag should be needed), ISO IS 10918-1 uses only a single
+        value, equivalent to the "Ss" field in a JPEG bit-stream's "Start of
+        Scan" (SOS) marker.  So, we extract the first vector element and ignore
+        the rest.  (I hope this is correct!)
+     */
+        case TIFFTAG_JPEGLOSSLESSPREDICTORS:
+           if (v32)
+             {
+               sp->cinfo.d.Ss = *va_arg(ap,uint16 *);
+               sp->jpeglosslesspredictors = 
+		    _TIFFmalloc(sp->jpeglosslesspredictors_length
+				* sizeof(uint16));
+               if(sp->jpeglosslesspredictors==NULL){return(0);}
+               for(i2=0;i2<sp->jpeglosslesspredictors_length;i2++){
+                ((uint16*)sp->jpeglosslesspredictors)[i2] =
+			((uint16*)sp->cinfo.d.Ss)[i2];
+               }
+               sp->jpeglosslesspredictors_length*=sizeof(uint16);
+               break;
+             };
+           return v32;
+
+     /* The TIFF Version 6.0 specification says that this tag is supposed to be
+        a vector containing a value for each image component, but for lossless
+        Huffman coding (the only JPEG process defined by the specification for
+        which this tag should be needed), ISO IS 10918-1 uses only a single
+        value, equivalent to the "Al" field in a JPEG bit-stream's "Start of
+        Scan" (SOS) marker.  So, we extract the first vector element and ignore
+        the rest.  (I hope this is correct!)
+     */
+        case TIFFTAG_JPEGPOINTTRANSFORM    :
+           if (v32)
+             {
+               sp->cinfo.d.Al = *va_arg(ap,uint16 *);
+               sp->jpegpointtransform =
+		    _TIFFmalloc(sp->jpegpointtransform_length*sizeof(uint16));
+               if(sp->jpegpointtransform==NULL){return(0);}
+               for(i2=0;i2<sp->jpegpointtransform_length;i2++) {
+                ((uint16*)sp->jpegpointtransform)[i2] =
+			((uint16*)sp->cinfo.d.Al)[i2];
+               }
+               sp->jpegpointtransform_length*=sizeof(uint16);
+               break;
+             }
+           return v32;
+
+     /* We have a vector of offsets to quantization tables, so load 'em! */
+
+        case TIFFTAG_JPEGQTABLES           :
+          if (v32)
+            { uint32 *v;
+              int i;
+              if (v32 > NUM_QUANT_TBLS)
+                {
+                  TIFFError(tif->tif_name,"Too many quantization tables");
+                  return 0;
+                };
+              i = 0;
+              v = va_arg(ap,uint32 *);
+                sp->jpegqtables=_TIFFmalloc(64*sp->jpegqtables_length);
+                if(sp->jpegqtables==NULL){return(0);}
+                tiffoff = TIFFSeekFile(tif, 0, SEEK_CUR);
+                bufoff=0;
+                for(i2=0;i2<sp->jpegqtables_length;i2++){
+                    TIFFSeekFile(tif, v[i2], SEEK_SET);
+                    TIFFReadFile(tif, &(((unsigned char*)(sp->jpegqtables))[bufoff]),
+				 64);
+                    bufoff+=64;
+                }
+                sp->jpegqtables_length=bufoff;
+                TIFFSeekFile(tif, tiffoff, SEEK_SET);
+
+              do /* read quantization table */
+                { register UINT8 *from = tif->tif_base + *v++;
+                  register UINT16 *to;
+                  register int j = DCTSIZE2;
+
+                  if (!( sp->cinfo.d.quant_tbl_ptrs[i]
+                       = CALLJPEG(sp,0,jpeg_alloc_quant_table(&sp->cinfo.comm))
+                       )
+                     )
+                    {
+                      TIFFError(JPEGLib_name,"No space for quantization table");
+                      return 0;
+                    };
+                  to = sp->cinfo.d.quant_tbl_ptrs[i]->quantval;
+                  do *to++ = *from++; while (--j > 0);
+                }
+              while (++i < v32);
+              sp->jpegtablesmode |= JPEGTABLESMODE_QUANT;
+            };
+          break;
+
+     /* We have a vector of offsets to DC Huffman tables, so load 'em! */
+
+        case TIFFTAG_JPEGDCTABLES          :
+          h = sp->cinfo.d.dc_huff_tbl_ptrs;
+          goto L;
+
+     /* We have a vector of offsets to AC Huffman tables, so load 'em! */
+
+        case TIFFTAG_JPEGACTABLES          :
+          h = sp->cinfo.d.ac_huff_tbl_ptrs;
+       L: if (v32)
+            { uint32 *v;
+              int i;
+              if (v32 > NUM_HUFF_TBLS)
+                {
+                  TIFFError(tif->tif_name,"Too many Huffman tables");
+                  return 0;
+                };
+              v = va_arg(ap,uint32 *);
+                if(tag == TIFFTAG_JPEGDCTABLES) {
+                    sp->jpegdctables=_TIFFmalloc(272*sp->jpegdctables_length);
+                    if(sp->jpegdctables==NULL){return(0);}
+                    tiffoff = TIFFSeekFile(tif, 0, SEEK_CUR);
+                    bufoff=0;
+                    code_count=0;                
+                    for(i2=0;i2<sp->jpegdctables_length;i2++){
+                        TIFFSeekFile(tif, v[i2], SEEK_SET);
+                        TIFFReadFile(tif,
+				     &(((unsigned char*)(sp->jpegdctables))[bufoff]),
+				     16);
+                        code_count=0;
+                        for(k2=0;k2<16;k2++){
+                            code_count+=((unsigned char*)(sp->jpegdctables))[k2+bufoff];
+                        }
+                        TIFFReadFile(tif,
+				     &(((unsigned char*)(sp->jpegdctables))[bufoff+16]),
+				     code_count);
+                        bufoff+=16;
+                        bufoff+=code_count;
+                    }
+                    sp->jpegdctables_length=bufoff;
+                    TIFFSeekFile(tif, tiffoff, SEEK_SET);
+                }
+                if(tag==TIFFTAG_JPEGACTABLES){
+                    sp->jpegactables=_TIFFmalloc(272*sp->jpegactables_length);
+                    if(sp->jpegactables==NULL){return(0);}
+                    tiffoff = TIFFSeekFile(tif, 0, SEEK_CUR);
+                    bufoff=0;
+                    code_count=0;                
+                    for(i2=0;i2<sp->jpegactables_length;i2++){
+                        TIFFSeekFile(tif, v[i2], SEEK_SET);
+                        TIFFReadFile(tif, &(((unsigned char*)(sp->jpegactables))[bufoff]), 16);
+                        code_count=0;
+                        for(k2=0;k2<16;k2++){
+                            code_count+=((unsigned char*)(sp->jpegactables))[k2+bufoff];
+                        }
+                        TIFFReadFile(tif, &(((unsigned char*)(sp->jpegactables))[bufoff+16]), code_count);
+                        bufoff+=16;
+                        bufoff+=code_count;
+                    }
+                    sp->jpegactables_length=bufoff;
+                    TIFFSeekFile(tif, tiffoff, SEEK_SET);
+                }
+              i = 0;
+              do /* copy each Huffman table */
+                { int size = 0;
+                  register UINT8 *from = tif->tif_base + *v++, *to;
+                  register int j = sizeof (*h)->bits;
+
+               /* WARNING:  This code relies on the fact that an image file not
+                            "memory mapped" was read entirely into a single
+                  buffer by "TIFFInitOJPEG()", so we can do a fast memory-to-
+                  memory copy here.  Each table consists of 16 Bytes, which are
+                  suffixed to a 0 Byte when copied, followed by a variable
+                  number of Bytes whose length is the sum of the first 16.
+               */
+                  if (!( *h
+                       = CALLJPEG(sp,0,jpeg_alloc_huff_table(&sp->cinfo.comm))
+                       )
+                     )
+                    {
+                      TIFFError(JPEGLib_name,"No space for Huffman table");
+                      return 0;
+                    };
+                  to = (*h++)->bits;
+                  *to++ = 0;
+                  while (--j > 0) size += *to++ = *from++; /* Copy 16 Bytes */
+                  if (size > sizeof (*h)->huffval/sizeof *(*h)->huffval)
+                    {
+                      TIFFError(tif->tif_name,"Huffman table too big");
+                      return 0;
+                    };
+                  if ((j = size) > 0) do *to++ = *from++; while (--j > 0);
+                  while (++size <= sizeof (*h)->huffval/sizeof *(*h)->huffval)
+                    *to++ = 0; /* Zero the rest of the table for cleanliness */
+                }
+              while (++i < v32);
+              sp->jpegtablesmode |= JPEGTABLESMODE_HUFF;
+            };
+          break;
+
+     /* The following vendor-specific TIFF tag occurs in (highly illegal) files
+        produced by the Wang Imaging application for Microsoft Windows.  These
+        can apparently have several "pages", in which case this tag specifies
+        the offset of a "page control" structure, which we don't currently know
+        how to handle.  0 indicates a 1-page image with no "page control", which
+        we make a feeble effort to handle.
+     */
+        case TIFFTAG_WANG_PAGECONTROL      :
+          if (v32 == 0) v32 = -1;
+          sp->is_WANG = v32;
+          tag = TIFFTAG_JPEGPROC+FIELD_WANG_PAGECONTROL-FIELD_JPEGPROC;
+          break;
+
+     /* This pseudo tag indicates whether our caller is expected to do YCbCr <->
+        RGB color-space conversion (JPEGCOLORMODE_RAW <=> 0) or whether we must
+        ask the JPEG Library to do it (JPEGCOLORMODE_RGB <=> 1).
+     */
+        case TIFFTAG_JPEGCOLORMODE         :
+          sp->jpegcolormode = v32;
+
+       /* Mark the image to indicate whether returned data is up-sampled, so
+          that "TIFF{Strip,Tile}Size()" reflect the true amount of data present.
+       */
+          v32 = tif->tif_flags; /* Save flags temporarily */
+          tif->tif_flags &= ~TIFF_UPSAMPLED;
+          if (   td->td_photometric == PHOTOMETRIC_YCBCR
+              &&    (td->td_ycbcrsubsampling[0]<<3 | td->td_ycbcrsubsampling[1])
+                 != 011
+              && sp->jpegcolormode == JPEGCOLORMODE_RGB
+             ) tif->tif_flags |= TIFF_UPSAMPLED;
+
+       /* If the up-sampling state changed, re-calculate tile size. */
+
+          if ((tif->tif_flags ^ v32) & TIFF_UPSAMPLED)
+            {
+              tif->tif_tilesize = isTiled(tif) ? TIFFTileSize(tif) : (tsize_t) -1;
+              tif->tif_flags |= TIFF_DIRTYDIRECT;
+            };
+          return 1;
+      };
+    TIFFSetFieldBit(tif,tag-TIFFTAG_JPEGPROC+FIELD_JPEGPROC);
+    return 1;
+#   undef td
+  }
+
+static int
+OJPEGVGetField(register TIFF *tif,ttag_t tag,va_list ap)
+  { register OJPEGState *sp = OJState(tif);
+
+    switch (tag)
+      {
+
+     /* If this file has managed to synthesize a set of consolidated "metadata"
+        tables for the current (post-TIFF Version 6.0 specification) JPEG-in-
+        TIFF encapsulation strategy, then tell our caller about them; otherwise,
+        keep mum.
+     */
+        case TIFFTAG_JPEGTABLES            :
+          if (sp->jpegtables_length) /* we have "new"-style JPEG tables */
+            {
+              *va_arg(ap,uint32 *) = sp->jpegtables_length;
+              *va_arg(ap,char **) = sp->jpegtables;
+              return 1;
+            };
+
+     /* This pseudo tag indicates whether our caller is expected to do YCbCr <->
+        RGB color-space conversion (JPEGCOLORMODE_RAW <=> 0) or whether we must
+        ask the JPEG Library to do it (JPEGCOLORMODE_RGB <=> 1).
+     */
+        case TIFFTAG_JPEGCOLORMODE         :
+          *va_arg(ap,uint32 *) = sp->jpegcolormode;
+          return 1;
+
+     /* The following tags are defined by the TIFF Version 6.0 specification
+        and are obsolete.  If our caller asks for information about them, do not
+        return anything, even if we parsed them in an old-format "source" image.
+     */
+        case TIFFTAG_JPEGPROC              :
+		*va_arg(ap, uint16*)=sp->jpegproc;
+		return(1);
+		break;
+        case TIFFTAG_JPEGIFOFFSET          :
+		*va_arg(ap, uint32*)=sp->jpegifoffset;
+		return(1);
+		break;
+        case TIFFTAG_JPEGIFBYTECOUNT       :
+		*va_arg(ap, uint32*)=sp->jpegifbytecount;
+		return(1);
+		break;
+        case TIFFTAG_JPEGRESTARTINTERVAL   :
+		*va_arg(ap, uint32*)=sp->jpegrestartinterval;
+		return(1);
+		break;
+        case TIFFTAG_JPEGLOSSLESSPREDICTORS:
+                *va_arg(ap, uint32*)=sp->jpeglosslesspredictors_length;
+                *va_arg(ap, void**)=sp->jpeglosslesspredictors;
+                return(1);
+                break;
+        case TIFFTAG_JPEGPOINTTRANSFORM    :
+                *va_arg(ap, uint32*)=sp->jpegpointtransform_length;
+                *va_arg(ap, void**)=sp->jpegpointtransform;
+                return(1);
+                break;
+        case TIFFTAG_JPEGQTABLES           :
+                *va_arg(ap, uint32*)=sp->jpegqtables_length;
+                *va_arg(ap, void**)=sp->jpegqtables;
+                return(1);
+                break;
+        case TIFFTAG_JPEGDCTABLES          :
+                *va_arg(ap, uint32*)=sp->jpegdctables_length;
+                *va_arg(ap, void**)=sp->jpegdctables;
+                return(1);
+                break;
+        case TIFFTAG_JPEGACTABLES          : 
+                *va_arg(ap, uint32*)=sp->jpegactables_length;
+                *va_arg(ap, void**)=sp->jpegactables;
+                return(1);
+                break;
+      };
+    return (*sp->vgetparent)(tif,tag,ap);
+  }
+
+static void
+OJPEGPrintDir(register TIFF *tif,FILE *fd,long flags)
+  { register OJPEGState *sp = OJState(tif);
+
+    if (   ( flags
+           & (TIFFPRINT_JPEGQTABLES|TIFFPRINT_JPEGDCTABLES|TIFFPRINT_JPEGACTABLES)
+           )
+        && sp->jpegtables_length
+       )
+      fprintf(fd,"  JPEG Table Data: <present>, %lu bytes\n",
+        sp->jpegtables_length);
+  }
+
+static uint32
+OJPEGDefaultStripSize(register TIFF *tif,register uint32 s)
+  { register OJPEGState *sp = OJState(tif);
+#   define td (&tif->tif_dir)
+
+    if ((s = (*sp->defsparent)(tif,s)) < td->td_imagelength)
+      { register tsize_t size = sp->cinfo.comm.is_decompressor
+#                             ifdef D_LOSSLESS_SUPPORTED
+                              ? sp->cinfo.d.min_codec_data_unit
+#                             else
+                              ? DCTSIZE
+#                             endif
+#                             ifdef C_LOSSLESS_SUPPORTED
+                              : sp->cinfo.c.data_unit;
+#                             else
+                              : DCTSIZE;
+#                             endif
+
+        size = TIFFroundup(size,16);
+        s = TIFFroundup(s,td->td_ycbcrsubsampling[1]*size);
+      };
+    return s;
+#   undef td
+  }
+
+static void
+OJPEGDefaultTileSize(register TIFF *tif,register uint32 *tw,register uint32 *th)
+  { register OJPEGState *sp = OJState(tif);
+    register tsize_t size;
+#   define td (&tif->tif_dir)
+
+    size = sp->cinfo.comm.is_decompressor
+#        ifdef D_LOSSLESS_SUPPORTED
+         ? sp->cinfo.d.min_codec_data_unit
+#        else
+         ? DCTSIZE
+#        endif
+#        ifdef C_LOSSLESS_SUPPORTED
+         : sp->cinfo.c.data_unit;
+#        else
+         : DCTSIZE;
+#        endif
+    size = TIFFroundup(size,16);
+    (*sp->deftparent)(tif,tw,th);
+    *tw = TIFFroundup(*tw,td->td_ycbcrsubsampling[0]*size);
+    *th = TIFFroundup(*th,td->td_ycbcrsubsampling[1]*size);
+#   undef td
+  }
+
+static void
+OJPEGCleanUp(register TIFF *tif)
+  { register OJPEGState *sp;
+
+    if ( (sp = OJState(tif)) )
+      {
+        CALLVJPEG(sp,jpeg_destroy(&sp->cinfo.comm)); /* Free JPEG Lib. vars. */
+        if (sp->jpegtables) {_TIFFfree(sp->jpegtables);sp->jpegtables=0;}
+        if (sp->jpeglosslesspredictors) {
+		_TIFFfree(sp->jpeglosslesspredictors);
+		sp->jpeglosslesspredictors = 0;
+	}
+        if (sp->jpegpointtransform) {
+		_TIFFfree(sp->jpegpointtransform);
+		sp->jpegpointtransform=0;
+	}
+        if (sp->jpegqtables) {_TIFFfree(sp->jpegqtables);sp->jpegqtables=0;}
+        if (sp->jpegactables) {_TIFFfree(sp->jpegactables);sp->jpegactables=0;}
+        if (sp->jpegdctables) {_TIFFfree(sp->jpegdctables);sp->jpegdctables=0;}
+     /* If the image file isn't "memory mapped" and we read it all into a
+        single, large memory buffer, free the buffer now.
+     */
+        if (!isMapped(tif) && tif->tif_base) /* free whole-file buffer */
+          {
+            _TIFFfree(tif->tif_base);
+            tif->tif_base = 0;
+            tif->tif_size = 0;
+          };
+        _TIFFfree(sp); /* Release local variables */
+        tif->tif_data = 0;
+      }
+  }
+
+int
+TIFFInitOJPEG(register TIFF *tif,int scheme)
+  { register OJPEGState *sp;
+#   define td (&tif->tif_dir)
+#   ifndef never
+
+ /* This module supports a decompression-only CODEC, which is intended strictly
+    for viewing old image files using the obsolete JPEG-in-TIFF encapsulation
+    specified by the TIFF Version 6.0 specification.  It does not, and never
+    should, support compression for new images.  If a client application asks us
+    to, refuse and complain loudly!
+ */
+    if (tif->tif_mode != O_RDONLY) return _notSupported(tif);
+#   endif /* never */
+    if (!isMapped(tif))
+      {
+
+     /* BEWARE OF KLUDGE:  If our host operating-system doesn't let an image
+                           file be "memory mapped", then we want to read the
+        entire file into a single (possibly large) memory buffer as if it had
+        been "memory mapped".  Although this is likely to waste space, because
+        analysis of the file's content might cause parts of it to be read into
+        smaller buffers duplicatively, it appears to be the lesser of several
+        evils.  Very old JPEG-in-TIFF encapsulations aren't guaranteed to be
+        JFIF bit streams, or to have a TIFF "JPEGTables" record or much other
+        "metadata" to help us locate the decoding tables and entropy-coded data,
+        so we're likely do a lot of random-access grokking around, and we must
+        ultimately tell the JPEG Library to sequentially scan much of the file
+        anyway.  This is all likely to be easier if we use "brute force" to
+        read the entire file, once, and don't use incremental disc I/O.  If our
+        client application tries to process a file so big that we can't buffer
+        it entirely, then tough shit: we'll give up and exit!
+     */
+        if (!(tif->tif_base = _TIFFmalloc(tif->tif_size=TIFFGetFileSize(tif))))
+          {
+            TIFFError(tif->tif_name,"Cannot allocate file buffer");
+            return 0;
+          };
+        if (!SeekOK(tif,0) || !ReadOK(tif,tif->tif_base,tif->tif_size))
+          {
+            TIFFError(tif->tif_name,"Cannot read file");
+            return 0;
+          }
+      };
+
+ /* Allocate storage for this module's per-file variables. */
+
+    if (!(tif->tif_data = (tidata_t)_TIFFmalloc(sizeof *sp)))
+      {
+        TIFFError("TIFFInitOJPEG","No space for JPEG state block");
+        return 0;
+      };
+    (sp = OJState(tif))->tif = tif; /* Initialize reverse pointer */
+    sp->cinfo.d.err = jpeg_std_error(&sp->err); /* Initialize error handling */
+    sp->err.error_exit = TIFFojpeg_error_exit;
+    sp->err.output_message = TIFFojpeg_output_message;
+    if (!CALLVJPEG(sp,jpeg_create_decompress(&sp->cinfo.d))) return 0;
+
+ /* Install CODEC-specific tag information and override default TIFF Library
+    "method" subroutines with our own, CODEC-specific methods.  Like all good
+    members of an object-class, we save some of these subroutine pointers for
+    "fall back" in case our own methods fail.
+ */
+    _TIFFMergeFieldInfo(tif,ojpegFieldInfo,
+      sizeof ojpegFieldInfo/sizeof *ojpegFieldInfo);
+    sp->defsparent = tif->tif_defstripsize;
+    sp->deftparent = tif->tif_deftilesize;
+    sp->vgetparent = tif->tif_tagmethods.vgetfield;
+    sp->vsetparent = tif->tif_tagmethods.vsetfield;
+    tif->tif_defstripsize = OJPEGDefaultStripSize;
+    tif->tif_deftilesize = OJPEGDefaultTileSize;
+    tif->tif_tagmethods.vgetfield = OJPEGVGetField;
+    tif->tif_tagmethods.vsetfield = OJPEGVSetField;
+    tif->tif_tagmethods.printdir = OJPEGPrintDir;
+#   ifdef never
+    tif->tif_setupencode = OJPEGSetupEncode;
+    tif->tif_preencode = OJPEGPreEncode;
+    tif->tif_postencode = OJPEGPostEncode;
+#   else /* well, hardly ever */
+    tif->tif_setupencode = tif->tif_postencode = _notSupported;
+    tif->tif_preencode = (TIFFPreMethod)_notSupported;
+#   endif /* never */
+    tif->tif_setupdecode = OJPEGSetupDecode;
+    tif->tif_predecode = OJPEGPreDecode;
+    tif->tif_postdecode = OJPEGPostDecode;
+    tif->tif_cleanup = OJPEGCleanUp;
+
+ /* If the image file doesn't have "JPEGInterchangeFormat[Length]" TIFF records
+    to guide us, we have few clues about where its encapsulated JPEG bit stream
+    is located, so establish intelligent defaults:  If the Image File Directory
+    doesn't immediately follow the TIFF header, assume that the JPEG data lies
+    in between; otherwise, assume that it follows the Image File Directory.
+ */
+    if (tif->tif_header.tiff_diroff > sizeof tif->tif_header)
+      {
+        sp->src.next_input_byte = tif->tif_base + sizeof tif->tif_header;
+        sp->src.bytes_in_buffer = tif->tif_header.tiff_diroff
+                                - sizeof tif->tif_header;
+      }
+    else /* this case is ugly! */
+      { uint32 maxoffset = tif->tif_size;
+        uint16 dircount;
+
+     /* Calculate the offset to the next Image File Directory, if there is one,
+        or to the end of the file, if not.  Then arrange to read the file from
+        the end of the Image File Directory to that offset.
+     */
+        if (tif->tif_nextdiroff) maxoffset = tif->tif_nextdiroff; /* Not EOF */
+        _TIFFmemcpy(&dircount,(const tdata_t)
+          (sp->src.next_input_byte = tif->tif_base+tif->tif_header.tiff_diroff),
+          sizeof dircount);
+        if (tif->tif_flags & TIFF_SWAB) TIFFSwabShort(&dircount);
+        sp->src.next_input_byte += dircount*sizeof(TIFFDirEntry)
+                                + sizeof maxoffset + sizeof dircount;
+        sp->src.bytes_in_buffer = tif->tif_base - sp->src.next_input_byte
+                                + maxoffset;
+      };
+
+ /* IJG JPEG Library Version 6B can be configured for either 8- or 12-bit sample
+    precision, but we assume that "old JPEG" TIFF clients only need 8 bits.
+ */
+    sp->cinfo.d.data_precision = 8;
+#   ifdef C_LOSSLESS_SUPPORTED
+
+ /* If the "JPEGProc" TIFF tag is missing from the Image File Dictionary, the
+    JPEG Library will use its (lossy) baseline sequential process by default.
+ */
+    sp->cinfo.d.data_unit = DCTSIZE;
+#   endif /* C_LOSSLESS_SUPPORTED */
+
+ /* Initialize other CODEC-specific variables requiring default values. */
+
+    tif->tif_flags |= TIFF_NOBITREV; /* No bit-reversal within data bytes */
+    sp->h_sampling = sp->v_sampling = 1; /* No subsampling by default */
+    sp->is_WANG = 0; /* Assume not a MS Windows Wang Imaging file by default */
+    sp->jpegtables = 0; /* No "new"-style JPEG tables synthesized yet */
+    sp->jpegtables_length = 0;
+    sp->jpegquality = 75; /* Default IJG quality */
+    sp->jpegcolormode = JPEGCOLORMODE_RAW;
+    sp->jpegtablesmode = 0; /* No tables found yet */
+    sp->jpeglosslesspredictors=0;
+    sp->jpeglosslesspredictors_length=0;
+    sp->jpegpointtransform=0;
+    sp->jpegpointtransform_length=0;
+    sp->jpegqtables=0;
+    sp->jpegqtables_length=0;
+    sp->jpegdctables=0;
+    sp->jpegdctables_length=0;
+    sp->jpegactables=0;
+    sp->jpegactables_length=0;
+    return 1;
+#   undef td
+  }
+#endif /* OJPEG_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_open.c b/src/libtiff/tif_open.c
new file mode 100644
index 0000000..98b30fa
--- /dev/null
+++ b/src/libtiff/tif_open.c
@@ -0,0 +1,683 @@
+/* $Id: tif_open.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ */
+#include "tiffiop.h"
+
+static const long typemask[13] = {
+	(long)0L,		/* TIFF_NOTYPE */
+	(long)0x000000ffL,	/* TIFF_BYTE */
+	(long)0xffffffffL,	/* TIFF_ASCII */
+	(long)0x0000ffffL,	/* TIFF_SHORT */
+	(long)0xffffffffL,	/* TIFF_LONG */
+	(long)0xffffffffL,	/* TIFF_RATIONAL */
+	(long)0x000000ffL,	/* TIFF_SBYTE */
+	(long)0x000000ffL,	/* TIFF_UNDEFINED */
+	(long)0x0000ffffL,	/* TIFF_SSHORT */
+	(long)0xffffffffL,	/* TIFF_SLONG */
+	(long)0xffffffffL,	/* TIFF_SRATIONAL */
+	(long)0xffffffffL,	/* TIFF_FLOAT */
+	(long)0xffffffffL,	/* TIFF_DOUBLE */
+};
+static const int bigTypeshift[13] = {
+	0,		/* TIFF_NOTYPE */
+	24,		/* TIFF_BYTE */
+	0,		/* TIFF_ASCII */
+	16,		/* TIFF_SHORT */
+	0,		/* TIFF_LONG */
+	0,		/* TIFF_RATIONAL */
+	24,		/* TIFF_SBYTE */
+	24,		/* TIFF_UNDEFINED */
+	16,		/* TIFF_SSHORT */
+	0,		/* TIFF_SLONG */
+	0,		/* TIFF_SRATIONAL */
+	0,		/* TIFF_FLOAT */
+	0,		/* TIFF_DOUBLE */
+};
+static const int litTypeshift[13] = {
+	0,		/* TIFF_NOTYPE */
+	0,		/* TIFF_BYTE */
+	0,		/* TIFF_ASCII */
+	0,		/* TIFF_SHORT */
+	0,		/* TIFF_LONG */
+	0,		/* TIFF_RATIONAL */
+	0,		/* TIFF_SBYTE */
+	0,		/* TIFF_UNDEFINED */
+	0,		/* TIFF_SSHORT */
+	0,		/* TIFF_SLONG */
+	0,		/* TIFF_SRATIONAL */
+	0,		/* TIFF_FLOAT */
+	0,		/* TIFF_DOUBLE */
+};
+
+/*
+ * Dummy functions to fill the omitted client procedures.
+ */
+static int
+_tiffDummyMapProc(thandle_t fd, tdata_t* pbase, toff_t* psize)
+{
+	(void) fd; (void) pbase; (void) psize;
+	return (0);
+}
+
+static void
+_tiffDummyUnmapProc(thandle_t fd, tdata_t base, toff_t size)
+{
+	(void) fd; (void) base; (void) size;
+}
+
+/*
+ * Initialize the shift & mask tables, and the
+ * byte swapping state according to the file
+ * contents and the machine architecture.
+ */
+static void
+TIFFInitOrder(TIFF* tif, int magic)
+{
+	tif->tif_typemask = typemask;
+	if (magic == TIFF_BIGENDIAN) {
+		tif->tif_typeshift = bigTypeshift;
+#ifndef WORDS_BIGENDIAN
+		tif->tif_flags |= TIFF_SWAB;
+#endif
+	} else {
+		tif->tif_typeshift = litTypeshift;
+#ifdef WORDS_BIGENDIAN
+		tif->tif_flags |= TIFF_SWAB;
+#endif
+	}
+}
+
+int
+_TIFFgetMode(const char* mode, const char* module)
+{
+	int m = -1;
+
+	switch (mode[0]) {
+	case 'r':
+		m = O_RDONLY;
+		if (mode[1] == '+')
+			m = O_RDWR;
+		break;
+	case 'w':
+	case 'a':
+		m = O_RDWR|O_CREAT;
+		if (mode[0] == 'w')
+			m |= O_TRUNC;
+		break;
+	default:
+		TIFFErrorExt(0, module, "\"%s\": Bad mode", mode);
+		break;
+	}
+	return (m);
+}
+
+TIFF*
+TIFFClientOpen(
+	const char* name, const char* mode,
+	thandle_t clientdata,
+	TIFFReadWriteProc readproc,
+	TIFFReadWriteProc writeproc,
+	TIFFSeekProc seekproc,
+	TIFFCloseProc closeproc,
+	TIFFSizeProc sizeproc,
+	TIFFMapFileProc mapproc,
+	TIFFUnmapFileProc unmapproc
+)
+{
+	static const char module[] = "TIFFClientOpen";
+	TIFF *tif;
+	int m;
+	const char* cp;
+
+	m = _TIFFgetMode(mode, module);
+	if (m == -1)
+		goto bad2;
+	tif = (TIFF *)_TIFFmalloc(sizeof (TIFF) + strlen(name) + 1);
+	if (tif == NULL) {
+		TIFFErrorExt(clientdata, module, "%s: Out of memory (TIFF structure)", name);
+		goto bad2;
+	}
+	_TIFFmemset(tif, 0, sizeof (*tif));
+	tif->tif_name = (char *)tif + sizeof (TIFF);
+	strcpy(tif->tif_name, name);
+	tif->tif_mode = m &~ (O_CREAT|O_TRUNC);
+	tif->tif_curdir = (tdir_t) -1;		/* non-existent directory */
+	tif->tif_curoff = 0;
+	tif->tif_curstrip = (tstrip_t) -1;	/* invalid strip */
+	tif->tif_row = (uint32) -1;		/* read/write pre-increment */
+	tif->tif_clientdata = clientdata;
+	if (!readproc || !writeproc || !seekproc || !closeproc || !sizeproc) {
+		TIFFErrorExt(clientdata, module,
+			  "One of the client procedures is NULL pointer.");
+		goto bad2;
+	}
+	tif->tif_readproc = readproc;
+	tif->tif_writeproc = writeproc;
+	tif->tif_seekproc = seekproc;
+	tif->tif_closeproc = closeproc;
+	tif->tif_sizeproc = sizeproc;
+        if (mapproc)
+		tif->tif_mapproc = mapproc;
+	else
+		tif->tif_mapproc = _tiffDummyMapProc;
+	if (unmapproc)
+		tif->tif_unmapproc = unmapproc;
+	else
+		tif->tif_unmapproc = _tiffDummyUnmapProc;
+	_TIFFSetDefaultCompressionState(tif);	/* setup default state */
+	/*
+	 * Default is to return data MSB2LSB and enable the
+	 * use of memory-mapped files and strip chopping when
+	 * a file is opened read-only.
+	 */
+	tif->tif_flags = FILLORDER_MSB2LSB;
+	if (m == O_RDONLY )
+            tif->tif_flags |= TIFF_MAPPED;
+
+#ifdef STRIPCHOP_DEFAULT
+	if (m == O_RDONLY || m == O_RDWR)
+		tif->tif_flags |= STRIPCHOP_DEFAULT;
+#endif
+
+	/*
+	 * Process library-specific flags in the open mode string.
+	 * The following flags may be used to control intrinsic library
+	 * behaviour that may or may not be desirable (usually for
+	 * compatibility with some application that claims to support
+	 * TIFF but only supports some braindead idea of what the
+	 * vendor thinks TIFF is):
+	 *
+	 * 'l'		use little-endian byte order for creating a file
+	 * 'b'		use big-endian byte order for creating a file
+	 * 'L'		read/write information using LSB2MSB bit order
+	 * 'B'		read/write information using MSB2LSB bit order
+	 * 'H'		read/write information using host bit order
+	 * 'M'		enable use of memory-mapped files when supported
+	 * 'm'		disable use of memory-mapped files
+	 * 'C'		enable strip chopping support when reading
+	 * 'c'		disable strip chopping support
+	 * 'h'		read TIFF header only, do not load the first IFD
+	 *
+	 * The use of the 'l' and 'b' flags is strongly discouraged.
+	 * These flags are provided solely because numerous vendors,
+	 * typically on the PC, do not correctly support TIFF; they
+	 * only support the Intel little-endian byte order.  This
+	 * support is not configured by default because it supports
+	 * the violation of the TIFF spec that says that readers *MUST*
+	 * support both byte orders.  It is strongly recommended that
+	 * you not use this feature except to deal with busted apps
+	 * that write invalid TIFF.  And even in those cases you should
+	 * bang on the vendors to fix their software.
+	 *
+	 * The 'L', 'B', and 'H' flags are intended for applications
+	 * that can optimize operations on data by using a particular
+	 * bit order.  By default the library returns data in MSB2LSB
+	 * bit order for compatibiltiy with older versions of this
+	 * library.  Returning data in the bit order of the native cpu
+	 * makes the most sense but also requires applications to check
+	 * the value of the FillOrder tag; something they probably do
+	 * not do right now.
+	 *
+	 * The 'M' and 'm' flags are provided because some virtual memory
+	 * systems exhibit poor behaviour when large images are mapped.
+	 * These options permit clients to control the use of memory-mapped
+	 * files on a per-file basis.
+	 *
+	 * The 'C' and 'c' flags are provided because the library support
+	 * for chopping up large strips into multiple smaller strips is not
+	 * application-transparent and as such can cause problems.  The 'c'
+	 * option permits applications that only want to look at the tags,
+	 * for example, to get the unadulterated TIFF tag information.
+	 */
+	for (cp = mode; *cp; cp++)
+		switch (*cp) {
+		case 'b':
+#ifndef WORDS_BIGENDIAN
+		    if (m&O_CREAT)
+				tif->tif_flags |= TIFF_SWAB;
+#endif
+			break;
+		case 'l':
+#ifdef WORDS_BIGENDIAN
+			if ((m&O_CREAT))
+				tif->tif_flags |= TIFF_SWAB;
+#endif
+			break;
+		case 'B':
+			tif->tif_flags = (tif->tif_flags &~ TIFF_FILLORDER) |
+			    FILLORDER_MSB2LSB;
+			break;
+		case 'L':
+			tif->tif_flags = (tif->tif_flags &~ TIFF_FILLORDER) |
+			    FILLORDER_LSB2MSB;
+			break;
+		case 'H':
+			tif->tif_flags = (tif->tif_flags &~ TIFF_FILLORDER) |
+			    HOST_FILLORDER;
+			break;
+		case 'M':
+			if (m == O_RDONLY)
+				tif->tif_flags |= TIFF_MAPPED;
+			break;
+		case 'm':
+			if (m == O_RDONLY)
+				tif->tif_flags &= ~TIFF_MAPPED;
+			break;
+		case 'C':
+			if (m == O_RDONLY)
+				tif->tif_flags |= TIFF_STRIPCHOP;
+			break;
+		case 'c':
+			if (m == O_RDONLY)
+				tif->tif_flags &= ~TIFF_STRIPCHOP;
+			break;
+		case 'h':
+			tif->tif_flags |= TIFF_HEADERONLY;
+			break;
+		}
+	/*
+	 * Read in TIFF header.
+	 */
+	if (tif->tif_mode & O_TRUNC ||
+	    !ReadOK(tif, &tif->tif_header, sizeof (TIFFHeader))) {
+		if (tif->tif_mode == O_RDONLY) {
+			TIFFErrorExt(tif->tif_clientdata, name, "Cannot read TIFF header");
+			goto bad;
+		}
+		/*
+		 * Setup header and write.
+		 */
+#ifdef WORDS_BIGENDIAN
+		tif->tif_header.tiff_magic = tif->tif_flags & TIFF_SWAB
+		    ? TIFF_LITTLEENDIAN : TIFF_BIGENDIAN;
+#else
+		tif->tif_header.tiff_magic = tif->tif_flags & TIFF_SWAB
+		    ? TIFF_BIGENDIAN : TIFF_LITTLEENDIAN;
+#endif
+		tif->tif_header.tiff_version = TIFF_VERSION;
+		if (tif->tif_flags & TIFF_SWAB)
+			TIFFSwabShort(&tif->tif_header.tiff_version);
+		tif->tif_header.tiff_diroff = 0;	/* filled in later */
+
+
+                /*
+                 * The doc for "fopen" for some STD_C_LIBs says that if you 
+                 * open a file for modify ("+"), then you must fseek (or 
+                 * fflush?) between any freads and fwrites.  This is not
+                 * necessary on most systems, but has been shown to be needed
+                 * on Solaris. 
+                 */
+                TIFFSeekFile( tif, 0, SEEK_SET );
+               
+		if (!WriteOK(tif, &tif->tif_header, sizeof (TIFFHeader))) {
+			TIFFErrorExt(tif->tif_clientdata, name, "Error writing TIFF header");
+			goto bad;
+		}
+		/*
+		 * Setup the byte order handling.
+		 */
+		TIFFInitOrder(tif, tif->tif_header.tiff_magic);
+		/*
+		 * Setup default directory.
+		 */
+		if (!TIFFDefaultDirectory(tif))
+			goto bad;
+		tif->tif_diroff = 0;
+		tif->tif_dirlist = NULL;
+		tif->tif_dirnumber = 0;
+		return (tif);
+	}
+	/*
+	 * Setup the byte order handling.
+	 */
+	if (tif->tif_header.tiff_magic != TIFF_BIGENDIAN &&
+	    tif->tif_header.tiff_magic != TIFF_LITTLEENDIAN
+#if MDI_SUPPORT
+	    &&
+#if HOST_BIGENDIAN
+	    tif->tif_header.tiff_magic != MDI_BIGENDIAN
+#else
+	    tif->tif_header.tiff_magic != MDI_LITTLEENDIAN
+#endif
+	    ) {
+		TIFFErrorExt(tif->tif_clientdata, name,  "Not a TIFF or MDI file, bad magic number %d (0x%x)",
+#else
+	    ) {
+		TIFFErrorExt(tif->tif_clientdata, name,  "Not a TIFF file, bad magic number %d (0x%x)",
+#endif
+		    tif->tif_header.tiff_magic,
+		    tif->tif_header.tiff_magic);
+		goto bad;
+	}
+	TIFFInitOrder(tif, tif->tif_header.tiff_magic);
+	/*
+	 * Swap header if required.
+	 */
+	if (tif->tif_flags & TIFF_SWAB) {
+		TIFFSwabShort(&tif->tif_header.tiff_version);
+		TIFFSwabLong(&tif->tif_header.tiff_diroff);
+	}
+	/*
+	 * Now check version (if needed, it's been byte-swapped).
+	 * Note that this isn't actually a version number, it's a
+	 * magic number that doesn't change (stupid).
+	 */
+	if (tif->tif_header.tiff_version == TIFF_BIGTIFF_VERSION) {
+		TIFFErrorExt(tif->tif_clientdata, name,
+                          "This is a BigTIFF file.  This format not supported\n"
+                          "by this version of libtiff." );
+		goto bad;
+	}
+	if (tif->tif_header.tiff_version != TIFF_VERSION) {
+		TIFFErrorExt(tif->tif_clientdata, name,
+		    "Not a TIFF file, bad version number %d (0x%x)",
+		    tif->tif_header.tiff_version,
+		    tif->tif_header.tiff_version); 
+		goto bad;
+	}
+	tif->tif_flags |= TIFF_MYBUFFER;
+	tif->tif_rawcp = tif->tif_rawdata = 0;
+	tif->tif_rawdatasize = 0;
+
+	/*
+	 * Sometimes we do not want to read the first directory (for example,
+	 * it may be broken) and want to proceed to other directories. I this
+	 * case we use the TIFF_HEADERONLY flag to open file and return
+	 * immediately after reading TIFF header.
+	 */
+	if (tif->tif_flags & TIFF_HEADERONLY)
+		return (tif);
+
+	/*
+	 * Setup initial directory.
+	 */
+	switch (mode[0]) {
+	case 'r':
+		tif->tif_nextdiroff = tif->tif_header.tiff_diroff;
+		/*
+		 * Try to use a memory-mapped file if the client
+		 * has not explicitly suppressed usage with the
+		 * 'm' flag in the open mode (see above).
+		 */
+		if ((tif->tif_flags & TIFF_MAPPED) &&
+	!TIFFMapFileContents(tif, (tdata_t*) &tif->tif_base, &tif->tif_size))
+			tif->tif_flags &= ~TIFF_MAPPED;
+		if (TIFFReadDirectory(tif)) {
+			tif->tif_rawcc = -1;
+			tif->tif_flags |= TIFF_BUFFERSETUP;
+			return (tif);
+		}
+		break;
+	case 'a':
+		/*
+		 * New directories are automatically append
+		 * to the end of the directory chain when they
+		 * are written out (see TIFFWriteDirectory).
+		 */
+		if (!TIFFDefaultDirectory(tif))
+			goto bad;
+		return (tif);
+	}
+bad:
+	tif->tif_mode = O_RDONLY;	/* XXX avoid flush */
+        TIFFCleanup(tif);
+bad2:
+	return ((TIFF*)0);
+}
+
+/*
+ * Query functions to access private data.
+ */
+
+/*
+ * Return open file's name.
+ */
+const char *
+TIFFFileName(TIFF* tif)
+{
+	return (tif->tif_name);
+}
+
+/*
+ * Set the file name.
+ */
+const char *
+TIFFSetFileName(TIFF* tif, const char *name)
+{
+	const char* old_name = tif->tif_name;
+	tif->tif_name = (char *)name;
+	return (old_name);
+}
+
+/*
+ * Return open file's I/O descriptor.
+ */
+int
+TIFFFileno(TIFF* tif)
+{
+	return (tif->tif_fd);
+}
+
+/*
+ * Set open file's I/O descriptor, and return previous value.
+ */
+int
+TIFFSetFileno(TIFF* tif, int fd)
+{
+        int old_fd = tif->tif_fd;
+	tif->tif_fd = fd;
+	return old_fd;
+}
+
+/*
+ * Return open file's clientdata.
+ */
+thandle_t
+TIFFClientdata(TIFF* tif)
+{
+	return (tif->tif_clientdata);
+}
+
+/*
+ * Set open file's clientdata, and return previous value.
+ */
+thandle_t
+TIFFSetClientdata(TIFF* tif, thandle_t newvalue)
+{
+	thandle_t m = tif->tif_clientdata;
+	tif->tif_clientdata = newvalue;
+	return m;
+}
+
+/*
+ * Return read/write mode.
+ */
+int
+TIFFGetMode(TIFF* tif)
+{
+	return (tif->tif_mode);
+}
+
+/*
+ * Return read/write mode.
+ */
+int
+TIFFSetMode(TIFF* tif, int mode)
+{
+	int old_mode = tif->tif_mode;
+	tif->tif_mode = mode;
+	return (old_mode);
+}
+
+/*
+ * Return nonzero if file is organized in
+ * tiles; zero if organized as strips.
+ */
+int
+TIFFIsTiled(TIFF* tif)
+{
+	return (isTiled(tif));
+}
+
+/*
+ * Return current row being read/written.
+ */
+uint32
+TIFFCurrentRow(TIFF* tif)
+{
+	return (tif->tif_row);
+}
+
+/*
+ * Return index of the current directory.
+ */
+tdir_t
+TIFFCurrentDirectory(TIFF* tif)
+{
+	return (tif->tif_curdir);
+}
+
+/*
+ * Return current strip.
+ */
+tstrip_t
+TIFFCurrentStrip(TIFF* tif)
+{
+	return (tif->tif_curstrip);
+}
+
+/*
+ * Return current tile.
+ */
+ttile_t
+TIFFCurrentTile(TIFF* tif)
+{
+	return (tif->tif_curtile);
+}
+
+/*
+ * Return nonzero if the file has byte-swapped data.
+ */
+int
+TIFFIsByteSwapped(TIFF* tif)
+{
+	return ((tif->tif_flags & TIFF_SWAB) != 0);
+}
+
+/*
+ * Return nonzero if the data is returned up-sampled.
+ */
+int
+TIFFIsUpSampled(TIFF* tif)
+{
+	return (isUpSampled(tif));
+}
+
+/*
+ * Return nonzero if the data is returned in MSB-to-LSB bit order.
+ */
+int
+TIFFIsMSB2LSB(TIFF* tif)
+{
+	return (isFillOrder(tif, FILLORDER_MSB2LSB));
+}
+
+/*
+ * Return nonzero if given file was written in big-endian order.
+ */
+int
+TIFFIsBigEndian(TIFF* tif)
+{
+	return (tif->tif_header.tiff_magic == TIFF_BIGENDIAN);
+}
+
+/*
+ * Return pointer to file read method.
+ */
+TIFFReadWriteProc
+TIFFGetReadProc(TIFF* tif)
+{
+	return (tif->tif_readproc);
+}
+
+/*
+ * Return pointer to file write method.
+ */
+TIFFReadWriteProc
+TIFFGetWriteProc(TIFF* tif)
+{
+	return (tif->tif_writeproc);
+}
+
+/*
+ * Return pointer to file seek method.
+ */
+TIFFSeekProc
+TIFFGetSeekProc(TIFF* tif)
+{
+	return (tif->tif_seekproc);
+}
+
+/*
+ * Return pointer to file close method.
+ */
+TIFFCloseProc
+TIFFGetCloseProc(TIFF* tif)
+{
+	return (tif->tif_closeproc);
+}
+
+/*
+ * Return pointer to file size requesting method.
+ */
+TIFFSizeProc
+TIFFGetSizeProc(TIFF* tif)
+{
+	return (tif->tif_sizeproc);
+}
+
+/*
+ * Return pointer to memory mapping method.
+ */
+TIFFMapFileProc
+TIFFGetMapFileProc(TIFF* tif)
+{
+	return (tif->tif_mapproc);
+}
+
+/*
+ * Return pointer to memory unmapping method.
+ */
+TIFFUnmapFileProc
+TIFFGetUnmapFileProc(TIFF* tif)
+{
+	return (tif->tif_unmapproc);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_packbits.c b/src/libtiff/tif_packbits.c
new file mode 100644
index 0000000..644d0e5
--- /dev/null
+++ b/src/libtiff/tif_packbits.c
@@ -0,0 +1,293 @@
+/* $Id: tif_packbits.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef PACKBITS_SUPPORT
+/*
+ * TIFF Library.
+ *
+ * PackBits Compression Algorithm Support
+ */
+#include <stdio.h>
+
+static int
+PackBitsPreEncode(TIFF* tif, tsample_t s)
+{
+	(void) s;
+
+        if (!(tif->tif_data = (tidata_t)_TIFFmalloc(sizeof(tsize_t))))
+		return (0);
+	/*
+	 * Calculate the scanline/tile-width size in bytes.
+	 */
+	if (isTiled(tif))
+		*(tsize_t*)tif->tif_data = TIFFTileRowSize(tif);
+	else
+		*(tsize_t*)tif->tif_data = TIFFScanlineSize(tif);
+	return (1);
+}
+
+static int
+PackBitsPostEncode(TIFF* tif)
+{
+        if (tif->tif_data)
+            _TIFFfree(tif->tif_data);
+	return (1);
+}
+
+/*
+ * NB: tidata is the type representing *(tidata_t);
+ *     if tidata_t is made signed then this type must
+ *     be adjusted accordingly.
+ */
+typedef unsigned char tidata;
+
+/*
+ * Encode a run of pixels.
+ */
+static int
+PackBitsEncode(TIFF* tif, tidata_t buf, tsize_t cc, tsample_t s)
+{
+	unsigned char* bp = (unsigned char*) buf;
+	tidata_t op, ep, lastliteral;
+	long n, slop;
+	int b;
+	enum { BASE, LITERAL, RUN, LITERAL_RUN } state;
+
+	(void) s;
+	op = tif->tif_rawcp;
+	ep = tif->tif_rawdata + tif->tif_rawdatasize;
+	state = BASE;
+	lastliteral = 0;
+	while (cc > 0) {
+		/*
+		 * Find the longest string of identical bytes.
+		 */
+		b = *bp++, cc--, n = 1;
+		for (; cc > 0 && b == *bp; cc--, bp++)
+			n++;
+	again:
+		if (op + 2 >= ep) {		/* insure space for new data */
+			/*
+			 * Be careful about writing the last
+			 * literal.  Must write up to that point
+			 * and then copy the remainder to the
+			 * front of the buffer.
+			 */
+			if (state == LITERAL || state == LITERAL_RUN) {
+				slop = op - lastliteral;
+				tif->tif_rawcc += lastliteral - tif->tif_rawcp;
+				if (!TIFFFlushData1(tif))
+					return (-1);
+				op = tif->tif_rawcp;
+				while (slop-- > 0)
+					*op++ = *lastliteral++;
+				lastliteral = tif->tif_rawcp;
+			} else {
+				tif->tif_rawcc += op - tif->tif_rawcp;
+				if (!TIFFFlushData1(tif))
+					return (-1);
+				op = tif->tif_rawcp;
+			}
+		}
+		switch (state) {
+		case BASE:		/* initial state, set run/literal */
+			if (n > 1) {
+				state = RUN;
+				if (n > 128) {
+					*op++ = (tidata) -127;
+					*op++ = (tidataval_t) b;
+					n -= 128;
+					goto again;
+				}
+				*op++ = (tidataval_t)(-(n-1));
+				*op++ = (tidataval_t) b;
+			} else {
+				lastliteral = op;
+				*op++ = 0;
+				*op++ = (tidataval_t) b;
+				state = LITERAL;
+			}
+			break;
+		case LITERAL:		/* last object was literal string */
+			if (n > 1) {
+				state = LITERAL_RUN;
+				if (n > 128) {
+					*op++ = (tidata) -127;
+					*op++ = (tidataval_t) b;
+					n -= 128;
+					goto again;
+				}
+				*op++ = (tidataval_t)(-(n-1));	/* encode run */
+				*op++ = (tidataval_t) b;
+			} else {			/* extend literal */
+				if (++(*lastliteral) == 127)
+					state = BASE;
+				*op++ = (tidataval_t) b;
+			}
+			break;
+		case RUN:		/* last object was run */
+			if (n > 1) {
+				if (n > 128) {
+					*op++ = (tidata) -127;
+					*op++ = (tidataval_t) b;
+					n -= 128;
+					goto again;
+				}
+				*op++ = (tidataval_t)(-(n-1));
+				*op++ = (tidataval_t) b;
+			} else {
+				lastliteral = op;
+				*op++ = 0;
+				*op++ = (tidataval_t) b;
+				state = LITERAL;
+			}
+			break;
+		case LITERAL_RUN:	/* literal followed by a run */
+			/*
+			 * Check to see if previous run should
+			 * be converted to a literal, in which
+			 * case we convert literal-run-literal
+			 * to a single literal.
+			 */
+			if (n == 1 && op[-2] == (tidata) -1 &&
+			    *lastliteral < 126) {
+				state = (((*lastliteral) += 2) == 127 ?
+				    BASE : LITERAL);
+				op[-2] = op[-1];	/* replicate */
+			} else
+				state = RUN;
+			goto again;
+		}
+	}
+	tif->tif_rawcc += op - tif->tif_rawcp;
+	tif->tif_rawcp = op;
+	return (1);
+}
+
+/*
+ * Encode a rectangular chunk of pixels.  We break it up
+ * into row-sized pieces to insure that encoded runs do
+ * not span rows.  Otherwise, there can be problems with
+ * the decoder if data is read, for example, by scanlines
+ * when it was encoded by strips.
+ */
+static int
+PackBitsEncodeChunk(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	tsize_t rowsize = *(tsize_t*)tif->tif_data;
+
+	while ((long)cc > 0) {
+		int	chunk = rowsize;
+		
+		if( cc < chunk )
+		    chunk = cc;
+
+		if (PackBitsEncode(tif, bp, chunk, s) < 0)
+		    return (-1);
+		bp += chunk;
+		cc -= chunk;
+	}
+	return (1);
+}
+
+static int
+PackBitsDecode(TIFF* tif, tidata_t op, tsize_t occ, tsample_t s)
+{
+	char *bp;
+	tsize_t cc;
+	long n;
+	int b;
+
+	(void) s;
+	bp = (char*) tif->tif_rawcp;
+	cc = tif->tif_rawcc;
+	while (cc > 0 && (long)occ > 0) {
+		n = (long) *bp++, cc--;
+		/*
+		 * Watch out for compilers that
+		 * don't sign extend chars...
+		 */
+		if (n >= 128)
+			n -= 256;
+		if (n < 0) {		/* replicate next byte -n+1 times */
+			if (n == -128)	/* nop */
+				continue;
+                        n = -n + 1;
+                        if( occ < n )
+                        {
+							TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+                                        "PackBitsDecode: discarding %d bytes "
+                                        "to avoid buffer overrun",
+                                        n - occ);
+                            n = occ;
+                        }
+			occ -= n;
+			b = *bp++, cc--;
+			while (n-- > 0)
+				*op++ = (tidataval_t) b;
+		} else {		/* copy next n+1 bytes literally */
+			if (occ < n + 1)
+                        {
+                            TIFFWarningExt(tif->tif_clientdata, tif->tif_name,
+                                        "PackBitsDecode: discarding %d bytes "
+                                        "to avoid buffer overrun",
+                                        n - occ + 1);
+                            n = occ - 1;
+                        }
+                        _TIFFmemcpy(op, bp, ++n);
+			op += n; occ -= n;
+			bp += n; cc -= n;
+		}
+	}
+	tif->tif_rawcp = (tidata_t) bp;
+	tif->tif_rawcc = cc;
+	if (occ > 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "PackBitsDecode: Not enough data for scanline %ld",
+		    (long) tif->tif_row);
+		return (0);
+	}
+	return (1);
+}
+
+int
+TIFFInitPackBits(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	tif->tif_decoderow = PackBitsDecode;
+	tif->tif_decodestrip = PackBitsDecode;
+	tif->tif_decodetile = PackBitsDecode;
+	tif->tif_preencode = PackBitsPreEncode;
+        tif->tif_postencode = PackBitsPostEncode;
+	tif->tif_encoderow = PackBitsEncode;
+	tif->tif_encodestrip = PackBitsEncodeChunk;
+	tif->tif_encodetile = PackBitsEncodeChunk;
+	return (1);
+}
+#endif /* PACKBITS_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_pixarlog.c b/src/libtiff/tif_pixarlog.c
new file mode 100644
index 0000000..b7acbaa
--- /dev/null
+++ b/src/libtiff/tif_pixarlog.c
@@ -0,0 +1,1342 @@
+/* $Id: tif_pixarlog.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1996-1997 Sam Leffler
+ * Copyright (c) 1996 Pixar
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Pixar, Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Pixar, Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL PIXAR, SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef PIXARLOG_SUPPORT
+
+/*
+ * TIFF Library.
+ * PixarLog Compression Support
+ *
+ * Contributed by Dan McCoy.
+ *
+ * PixarLog film support uses the TIFF library to store companded
+ * 11 bit values into a tiff file, which are compressed using the 
+ * zip compressor.  
+ *
+ * The codec can take as input and produce as output 32-bit IEEE float values 
+ * as well as 16-bit or 8-bit unsigned integer values.
+ *
+ * On writing any of the above are converted into the internal
+ * 11-bit log format.   In the case of  8 and 16 bit values, the
+ * input is assumed to be unsigned linear color values that represent
+ * the range 0-1.  In the case of IEEE values, the 0-1 range is assumed to
+ * be the normal linear color range, in addition over 1 values are
+ * accepted up to a value of about 25.0 to encode "hot" hightlights and such.
+ * The encoding is lossless for 8-bit values, slightly lossy for the
+ * other bit depths.  The actual color precision should be better
+ * than the human eye can perceive with extra room to allow for
+ * error introduced by further image computation.  As with any quantized
+ * color format, it is possible to perform image calculations which
+ * expose the quantization error. This format should certainly be less 
+ * susceptable to such errors than standard 8-bit encodings, but more
+ * susceptable than straight 16-bit or 32-bit encodings.
+ *
+ * On reading the internal format is converted to the desired output format.
+ * The program can request which format it desires by setting the internal
+ * pseudo tag TIFFTAG_PIXARLOGDATAFMT to one of these possible values:
+ *  PIXARLOGDATAFMT_FLOAT     = provide IEEE float values.
+ *  PIXARLOGDATAFMT_16BIT     = provide unsigned 16-bit integer values
+ *  PIXARLOGDATAFMT_8BIT      = provide unsigned 8-bit integer values
+ *
+ * alternately PIXARLOGDATAFMT_8BITABGR provides unsigned 8-bit integer
+ * values with the difference that if there are exactly three or four channels
+ * (rgb or rgba) it swaps the channel order (bgr or abgr).
+ *
+ * PIXARLOGDATAFMT_11BITLOG provides the internal encoding directly
+ * packed in 16-bit values.   However no tools are supplied for interpreting
+ * these values.
+ *
+ * "hot" (over 1.0) areas written in floating point get clamped to
+ * 1.0 in the integer data types.
+ *
+ * When the file is closed after writing, the bit depth and sample format
+ * are set always to appear as if 8-bit data has been written into it.
+ * That way a naive program unaware of the particulars of the encoding
+ * gets the format it is most likely able to handle.
+ *
+ * The codec does it's own horizontal differencing step on the coded
+ * values so the libraries predictor stuff should be turned off.
+ * The codec also handle byte swapping the encoded values as necessary
+ * since the library does not have the information necessary
+ * to know the bit depth of the raw unencoded buffer.
+ * 
+ */
+
+#include "tif_predict.h"
+#include "zlib.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Tables for converting to/from 11 bit coded values */
+
+#define  TSIZE	 2048		/* decode table size (11-bit tokens) */
+#define  TSIZEP1 2049		/* Plus one for slop */
+#define  ONE	 1250		/* token value of 1.0 exactly */
+#define  RATIO	 1.004		/* nominal ratio for log part */
+
+#define CODE_MASK 0x7ff         /* 11 bits. */
+
+static float  Fltsize;
+static float  LogK1, LogK2;
+
+#define REPEAT(n, op)   { int i; i=n; do { i--; op; } while (i>0); }
+
+static void
+horizontalAccumulateF(uint16 *wp, int n, int stride, float *op, 
+	float *ToLinearF)
+{
+    register unsigned int  cr, cg, cb, ca, mask;
+    register float  t0, t1, t2, t3;
+
+    if (n >= stride) {
+	mask = CODE_MASK;
+	if (stride == 3) {
+	    t0 = ToLinearF[cr = wp[0]];
+	    t1 = ToLinearF[cg = wp[1]];
+	    t2 = ToLinearF[cb = wp[2]];
+	    op[0] = t0;
+	    op[1] = t1;
+	    op[2] = t2;
+	    n -= 3;
+	    while (n > 0) {
+		wp += 3;
+		op += 3;
+		n -= 3;
+		t0 = ToLinearF[(cr += wp[0]) & mask];
+		t1 = ToLinearF[(cg += wp[1]) & mask];
+		t2 = ToLinearF[(cb += wp[2]) & mask];
+		op[0] = t0;
+		op[1] = t1;
+		op[2] = t2;
+	    }
+	} else if (stride == 4) {
+	    t0 = ToLinearF[cr = wp[0]];
+	    t1 = ToLinearF[cg = wp[1]];
+	    t2 = ToLinearF[cb = wp[2]];
+	    t3 = ToLinearF[ca = wp[3]];
+	    op[0] = t0;
+	    op[1] = t1;
+	    op[2] = t2;
+	    op[3] = t3;
+	    n -= 4;
+	    while (n > 0) {
+		wp += 4;
+		op += 4;
+		n -= 4;
+		t0 = ToLinearF[(cr += wp[0]) & mask];
+		t1 = ToLinearF[(cg += wp[1]) & mask];
+		t2 = ToLinearF[(cb += wp[2]) & mask];
+		t3 = ToLinearF[(ca += wp[3]) & mask];
+		op[0] = t0;
+		op[1] = t1;
+		op[2] = t2;
+		op[3] = t3;
+	    }
+	} else {
+	    REPEAT(stride, *op = ToLinearF[*wp&mask]; wp++; op++)
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride,
+		    wp[stride] += *wp; *op = ToLinearF[*wp&mask]; wp++; op++)
+		n -= stride;
+	    }
+	}
+    }
+}
+
+static void
+horizontalAccumulate12(uint16 *wp, int n, int stride, int16 *op,
+	float *ToLinearF)
+{
+    register unsigned int  cr, cg, cb, ca, mask;
+    register float  t0, t1, t2, t3;
+
+#define SCALE12 2048.0F
+#define CLAMP12(t) (((t) < 3071) ? (uint16) (t) : 3071)
+
+    if (n >= stride) {
+	mask = CODE_MASK;
+	if (stride == 3) {
+	    t0 = ToLinearF[cr = wp[0]] * SCALE12;
+	    t1 = ToLinearF[cg = wp[1]] * SCALE12;
+	    t2 = ToLinearF[cb = wp[2]] * SCALE12;
+	    op[0] = CLAMP12(t0);
+	    op[1] = CLAMP12(t1);
+	    op[2] = CLAMP12(t2);
+	    n -= 3;
+	    while (n > 0) {
+		wp += 3;
+		op += 3;
+		n -= 3;
+		t0 = ToLinearF[(cr += wp[0]) & mask] * SCALE12;
+		t1 = ToLinearF[(cg += wp[1]) & mask] * SCALE12;
+		t2 = ToLinearF[(cb += wp[2]) & mask] * SCALE12;
+		op[0] = CLAMP12(t0);
+		op[1] = CLAMP12(t1);
+		op[2] = CLAMP12(t2);
+	    }
+	} else if (stride == 4) {
+	    t0 = ToLinearF[cr = wp[0]] * SCALE12;
+	    t1 = ToLinearF[cg = wp[1]] * SCALE12;
+	    t2 = ToLinearF[cb = wp[2]] * SCALE12;
+	    t3 = ToLinearF[ca = wp[3]] * SCALE12;
+	    op[0] = CLAMP12(t0);
+	    op[1] = CLAMP12(t1);
+	    op[2] = CLAMP12(t2);
+	    op[3] = CLAMP12(t3);
+	    n -= 4;
+	    while (n > 0) {
+		wp += 4;
+		op += 4;
+		n -= 4;
+		t0 = ToLinearF[(cr += wp[0]) & mask] * SCALE12;
+		t1 = ToLinearF[(cg += wp[1]) & mask] * SCALE12;
+		t2 = ToLinearF[(cb += wp[2]) & mask] * SCALE12;
+		t3 = ToLinearF[(ca += wp[3]) & mask] * SCALE12;
+		op[0] = CLAMP12(t0);
+		op[1] = CLAMP12(t1);
+		op[2] = CLAMP12(t2);
+		op[3] = CLAMP12(t3);
+	    }
+	} else {
+	    REPEAT(stride, t0 = ToLinearF[*wp&mask] * SCALE12;
+                           *op = CLAMP12(t0); wp++; op++)
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride,
+		    wp[stride] += *wp; t0 = ToLinearF[wp[stride]&mask]*SCALE12;
+		    *op = CLAMP12(t0);  wp++; op++)
+		n -= stride;
+	    }
+	}
+    }
+}
+
+static void
+horizontalAccumulate16(uint16 *wp, int n, int stride, uint16 *op,
+	uint16 *ToLinear16)
+{
+    register unsigned int  cr, cg, cb, ca, mask;
+
+    if (n >= stride) {
+	mask = CODE_MASK;
+	if (stride == 3) {
+	    op[0] = ToLinear16[cr = wp[0]];
+	    op[1] = ToLinear16[cg = wp[1]];
+	    op[2] = ToLinear16[cb = wp[2]];
+	    n -= 3;
+	    while (n > 0) {
+		wp += 3;
+		op += 3;
+		n -= 3;
+		op[0] = ToLinear16[(cr += wp[0]) & mask];
+		op[1] = ToLinear16[(cg += wp[1]) & mask];
+		op[2] = ToLinear16[(cb += wp[2]) & mask];
+	    }
+	} else if (stride == 4) {
+	    op[0] = ToLinear16[cr = wp[0]];
+	    op[1] = ToLinear16[cg = wp[1]];
+	    op[2] = ToLinear16[cb = wp[2]];
+	    op[3] = ToLinear16[ca = wp[3]];
+	    n -= 4;
+	    while (n > 0) {
+		wp += 4;
+		op += 4;
+		n -= 4;
+		op[0] = ToLinear16[(cr += wp[0]) & mask];
+		op[1] = ToLinear16[(cg += wp[1]) & mask];
+		op[2] = ToLinear16[(cb += wp[2]) & mask];
+		op[3] = ToLinear16[(ca += wp[3]) & mask];
+	    }
+	} else {
+	    REPEAT(stride, *op = ToLinear16[*wp&mask]; wp++; op++)
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride,
+		    wp[stride] += *wp; *op = ToLinear16[*wp&mask]; wp++; op++)
+		n -= stride;
+	    }
+	}
+    }
+}
+
+/* 
+ * Returns the log encoded 11-bit values with the horizontal
+ * differencing undone.
+ */
+static void
+horizontalAccumulate11(uint16 *wp, int n, int stride, uint16 *op)
+{
+    register unsigned int  cr, cg, cb, ca, mask;
+
+    if (n >= stride) {
+	mask = CODE_MASK;
+	if (stride == 3) {
+	    op[0] = cr = wp[0];  op[1] = cg = wp[1];  op[2] = cb = wp[2];
+	    n -= 3;
+	    while (n > 0) {
+		wp += 3;
+		op += 3;
+		n -= 3;
+		op[0] = (cr += wp[0]) & mask;
+		op[1] = (cg += wp[1]) & mask;
+		op[2] = (cb += wp[2]) & mask;
+	    }
+	} else if (stride == 4) {
+	    op[0] = cr = wp[0];  op[1] = cg = wp[1];
+	    op[2] = cb = wp[2];  op[3] = ca = wp[3];
+	    n -= 4;
+	    while (n > 0) {
+		wp += 4;
+		op += 4;
+		n -= 4;
+		op[0] = (cr += wp[0]) & mask;
+		op[1] = (cg += wp[1]) & mask;
+		op[2] = (cb += wp[2]) & mask;
+		op[3] = (ca += wp[3]) & mask;
+	    } 
+	} else {
+	    REPEAT(stride, *op = *wp&mask; wp++; op++)
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride,
+		    wp[stride] += *wp; *op = *wp&mask; wp++; op++)
+	    	n -= stride;
+	    }
+	}
+    }
+}
+
+static void
+horizontalAccumulate8(uint16 *wp, int n, int stride, unsigned char *op,
+	unsigned char *ToLinear8)
+{
+    register unsigned int  cr, cg, cb, ca, mask;
+
+    if (n >= stride) {
+	mask = CODE_MASK;
+	if (stride == 3) {
+	    op[0] = ToLinear8[cr = wp[0]];
+	    op[1] = ToLinear8[cg = wp[1]];
+	    op[2] = ToLinear8[cb = wp[2]];
+	    n -= 3;
+	    while (n > 0) {
+		n -= 3;
+		wp += 3;
+		op += 3;
+		op[0] = ToLinear8[(cr += wp[0]) & mask];
+		op[1] = ToLinear8[(cg += wp[1]) & mask];
+		op[2] = ToLinear8[(cb += wp[2]) & mask];
+	    }
+	} else if (stride == 4) {
+	    op[0] = ToLinear8[cr = wp[0]];
+	    op[1] = ToLinear8[cg = wp[1]];
+	    op[2] = ToLinear8[cb = wp[2]];
+	    op[3] = ToLinear8[ca = wp[3]];
+	    n -= 4;
+	    while (n > 0) {
+		n -= 4;
+		wp += 4;
+		op += 4;
+		op[0] = ToLinear8[(cr += wp[0]) & mask];
+		op[1] = ToLinear8[(cg += wp[1]) & mask];
+		op[2] = ToLinear8[(cb += wp[2]) & mask];
+		op[3] = ToLinear8[(ca += wp[3]) & mask];
+	    }
+	} else {
+	    REPEAT(stride, *op = ToLinear8[*wp&mask]; wp++; op++)
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride,
+		    wp[stride] += *wp; *op = ToLinear8[*wp&mask]; wp++; op++)
+		n -= stride;
+	    }
+	}
+    }
+}
+
+
+static void
+horizontalAccumulate8abgr(uint16 *wp, int n, int stride, unsigned char *op,
+	unsigned char *ToLinear8)
+{
+    register unsigned int  cr, cg, cb, ca, mask;
+    register unsigned char  t0, t1, t2, t3;
+
+    if (n >= stride) {
+	mask = CODE_MASK;
+	if (stride == 3) {
+	    op[0] = 0;
+	    t1 = ToLinear8[cb = wp[2]];
+	    t2 = ToLinear8[cg = wp[1]];
+	    t3 = ToLinear8[cr = wp[0]];
+	    op[1] = t1;
+	    op[2] = t2;
+	    op[3] = t3;
+	    n -= 3;
+	    while (n > 0) {
+		n -= 3;
+		wp += 3;
+		op += 4;
+		op[0] = 0;
+		t1 = ToLinear8[(cb += wp[2]) & mask];
+		t2 = ToLinear8[(cg += wp[1]) & mask];
+		t3 = ToLinear8[(cr += wp[0]) & mask];
+		op[1] = t1;
+		op[2] = t2;
+		op[3] = t3;
+	    }
+	} else if (stride == 4) {
+	    t0 = ToLinear8[ca = wp[3]];
+	    t1 = ToLinear8[cb = wp[2]];
+	    t2 = ToLinear8[cg = wp[1]];
+	    t3 = ToLinear8[cr = wp[0]];
+	    op[0] = t0;
+	    op[1] = t1;
+	    op[2] = t2;
+	    op[3] = t3;
+	    n -= 4;
+	    while (n > 0) {
+		n -= 4;
+		wp += 4;
+		op += 4;
+		t0 = ToLinear8[(ca += wp[3]) & mask];
+		t1 = ToLinear8[(cb += wp[2]) & mask];
+		t2 = ToLinear8[(cg += wp[1]) & mask];
+		t3 = ToLinear8[(cr += wp[0]) & mask];
+		op[0] = t0;
+		op[1] = t1;
+		op[2] = t2;
+		op[3] = t3;
+	    }
+	} else {
+	    REPEAT(stride, *op = ToLinear8[*wp&mask]; wp++; op++)
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride,
+		    wp[stride] += *wp; *op = ToLinear8[*wp&mask]; wp++; op++)
+		n -= stride;
+	    }
+	}
+    }
+}
+
+/*
+ * State block for each open TIFF
+ * file using PixarLog compression/decompression.
+ */
+typedef	struct {
+	TIFFPredictorState	predict;
+	z_stream		stream;
+	uint16			*tbuf; 
+	uint16			stride;
+	int			state;
+	int			user_datafmt;
+	int			quality;
+#define PLSTATE_INIT 1
+
+	TIFFVSetMethod		vgetparent;	/* super-class method */
+	TIFFVSetMethod		vsetparent;	/* super-class method */
+
+	float *ToLinearF;
+	uint16 *ToLinear16;
+	unsigned char *ToLinear8;
+	uint16  *FromLT2;
+	uint16  *From14; /* Really for 16-bit data, but we shift down 2 */
+	uint16  *From8;
+	
+} PixarLogState;
+
+static int
+PixarLogMakeTables(PixarLogState *sp)
+{
+
+/*
+ *    We make several tables here to convert between various external
+ *    representations (float, 16-bit, and 8-bit) and the internal
+ *    11-bit companded representation.  The 11-bit representation has two
+ *    distinct regions.  A linear bottom end up through .018316 in steps
+ *    of about .000073, and a region of constant ratio up to about 25.
+ *    These floating point numbers are stored in the main table ToLinearF. 
+ *    All other tables are derived from this one.  The tables (and the
+ *    ratios) are continuous at the internal seam.
+ */
+
+    int  nlin, lt2size;
+    int  i, j;
+    double  b, c, linstep, v;
+    float *ToLinearF;
+    uint16 *ToLinear16;
+    unsigned char *ToLinear8;
+    uint16  *FromLT2;
+    uint16  *From14; /* Really for 16-bit data, but we shift down 2 */
+    uint16  *From8;
+
+    c = log(RATIO);	
+    nlin = (int)(1./c);	/* nlin must be an integer */
+    c = 1./nlin;
+    b = exp(-c*ONE);	/* multiplicative scale factor [b*exp(c*ONE) = 1] */
+    linstep = b*c*exp(1.);
+
+    LogK1 = (float)(1./c);	/* if (v >= 2)  token = k1*log(v*k2) */
+    LogK2 = (float)(1./b);
+    lt2size = (int)(2./linstep) + 1;
+    FromLT2 = (uint16 *)_TIFFmalloc(lt2size*sizeof(uint16));
+    From14 = (uint16 *)_TIFFmalloc(16384*sizeof(uint16));
+    From8 = (uint16 *)_TIFFmalloc(256*sizeof(uint16));
+    ToLinearF = (float *)_TIFFmalloc(TSIZEP1 * sizeof(float));
+    ToLinear16 = (uint16 *)_TIFFmalloc(TSIZEP1 * sizeof(uint16));
+    ToLinear8 = (unsigned char *)_TIFFmalloc(TSIZEP1 * sizeof(unsigned char));
+    if (FromLT2 == NULL || From14  == NULL || From8   == NULL ||
+	 ToLinearF == NULL || ToLinear16 == NULL || ToLinear8 == NULL) {
+	if (FromLT2) _TIFFfree(FromLT2);
+	if (From14) _TIFFfree(From14);
+	if (From8) _TIFFfree(From8);
+	if (ToLinearF) _TIFFfree(ToLinearF);
+	if (ToLinear16) _TIFFfree(ToLinear16);
+	if (ToLinear8) _TIFFfree(ToLinear8);
+	sp->FromLT2 = NULL;
+	sp->From14 = NULL;
+	sp->From8 = NULL;
+	sp->ToLinearF = NULL;
+	sp->ToLinear16 = NULL;
+	sp->ToLinear8 = NULL;
+	return 0;
+    }
+
+    j = 0;
+
+    for (i = 0; i < nlin; i++)  {
+	v = i * linstep;
+	ToLinearF[j++] = (float)v;
+    }
+
+    for (i = nlin; i < TSIZE; i++)
+	ToLinearF[j++] = (float)(b*exp(c*i));
+
+    ToLinearF[2048] = ToLinearF[2047];
+
+    for (i = 0; i < TSIZEP1; i++)  {
+	v = ToLinearF[i]*65535.0 + 0.5;
+	ToLinear16[i] = (v > 65535.0) ? 65535 : (uint16)v;
+	v = ToLinearF[i]*255.0  + 0.5;
+	ToLinear8[i]  = (v > 255.0) ? 255 : (unsigned char)v;
+    }
+
+    j = 0;
+    for (i = 0; i < lt2size; i++)  {
+	if ((i*linstep)*(i*linstep) > ToLinearF[j]*ToLinearF[j+1])
+	    j++;
+	FromLT2[i] = j;
+    }
+
+    /*
+     * Since we lose info anyway on 16-bit data, we set up a 14-bit
+     * table and shift 16-bit values down two bits on input.
+     * saves a little table space.
+     */
+    j = 0;
+    for (i = 0; i < 16384; i++)  {
+	while ((i/16383.)*(i/16383.) > ToLinearF[j]*ToLinearF[j+1])
+	    j++;
+	From14[i] = j;
+    }
+
+    j = 0;
+    for (i = 0; i < 256; i++)  {
+	while ((i/255.)*(i/255.) > ToLinearF[j]*ToLinearF[j+1])
+	    j++;
+	From8[i] = j;
+    }
+
+    Fltsize = (float)(lt2size/2);
+
+    sp->ToLinearF = ToLinearF;
+    sp->ToLinear16 = ToLinear16;
+    sp->ToLinear8 = ToLinear8;
+    sp->FromLT2 = FromLT2;
+    sp->From14 = From14;
+    sp->From8 = From8;
+
+    return 1;
+}
+
+#define	DecoderState(tif)	((PixarLogState*) (tif)->tif_data)
+#define	EncoderState(tif)	((PixarLogState*) (tif)->tif_data)
+
+static	int PixarLogEncode(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int PixarLogDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+
+#define N(a)   (sizeof(a)/sizeof(a[0]))
+#define PIXARLOGDATAFMT_UNKNOWN	-1
+
+static int
+PixarLogGuessDataFmt(TIFFDirectory *td)
+{
+	int guess = PIXARLOGDATAFMT_UNKNOWN;
+	int format = td->td_sampleformat;
+
+	/* If the user didn't tell us his datafmt,
+	 * take our best guess from the bitspersample.
+	 */
+	switch (td->td_bitspersample) {
+	 case 32:
+		if (format == SAMPLEFORMAT_IEEEFP)
+			guess = PIXARLOGDATAFMT_FLOAT;
+		break;
+	 case 16:
+		if (format == SAMPLEFORMAT_VOID || format == SAMPLEFORMAT_UINT)
+			guess = PIXARLOGDATAFMT_16BIT;
+		break;
+	 case 12:
+		if (format == SAMPLEFORMAT_VOID || format == SAMPLEFORMAT_INT)
+			guess = PIXARLOGDATAFMT_12BITPICIO;
+		break;
+	 case 11:
+		if (format == SAMPLEFORMAT_VOID || format == SAMPLEFORMAT_UINT)
+			guess = PIXARLOGDATAFMT_11BITLOG;
+		break;
+	 case 8:
+		if (format == SAMPLEFORMAT_VOID || format == SAMPLEFORMAT_UINT)
+			guess = PIXARLOGDATAFMT_8BIT;
+		break;
+	}
+
+	return guess;
+}
+
+static uint32
+multiply(size_t m1, size_t m2)
+{
+	uint32	bytes = m1 * m2;
+
+	if (m1 && bytes / m1 != m2)
+		bytes = 0;
+
+	return bytes;
+}
+
+static int
+PixarLogSetupDecode(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	PixarLogState* sp = DecoderState(tif);
+	tsize_t tbuf_size;
+	static const char module[] = "PixarLogSetupDecode";
+
+	assert(sp != NULL);
+
+	/* Make sure no byte swapping happens on the data
+	 * after decompression. */
+	tif->tif_postdecode = _TIFFNoPostDecode;
+
+	/* for some reason, we can't do this in TIFFInitPixarLog */
+
+	sp->stride = (td->td_planarconfig == PLANARCONFIG_CONTIG ?
+	    td->td_samplesperpixel : 1);
+	tbuf_size = multiply(multiply(multiply(sp->stride, td->td_imagewidth),
+				      td->td_rowsperstrip), sizeof(uint16));
+	if (tbuf_size == 0)
+		return (0);
+	sp->tbuf = (uint16 *) _TIFFmalloc(tbuf_size);
+	if (sp->tbuf == NULL)
+		return (0);
+	if (sp->user_datafmt == PIXARLOGDATAFMT_UNKNOWN)
+		sp->user_datafmt = PixarLogGuessDataFmt(td);
+	if (sp->user_datafmt == PIXARLOGDATAFMT_UNKNOWN) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+			"PixarLog compression can't handle bits depth/data format combination (depth: %d)", 
+			td->td_bitspersample);
+		return (0);
+	}
+
+	if (inflateInit(&sp->stream) != Z_OK) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: %s", tif->tif_name, sp->stream.msg);
+		return (0);
+	} else {
+		sp->state |= PLSTATE_INIT;
+		return (1);
+	}
+}
+
+/*
+ * Setup state for decoding a strip.
+ */
+static int
+PixarLogPreDecode(TIFF* tif, tsample_t s)
+{
+	PixarLogState* sp = DecoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->stream.next_in = tif->tif_rawdata;
+	sp->stream.avail_in = tif->tif_rawcc;
+	return (inflateReset(&sp->stream) == Z_OK);
+}
+
+static int
+PixarLogDecode(TIFF* tif, tidata_t op, tsize_t occ, tsample_t s)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	PixarLogState* sp = DecoderState(tif);
+	static const char module[] = "PixarLogDecode";
+	int i, nsamples, llen;
+	uint16 *up;
+
+	switch (sp->user_datafmt) {
+	case PIXARLOGDATAFMT_FLOAT:
+		nsamples = occ / sizeof(float);	/* XXX float == 32 bits */
+		break;
+	case PIXARLOGDATAFMT_16BIT:
+	case PIXARLOGDATAFMT_12BITPICIO:
+	case PIXARLOGDATAFMT_11BITLOG:
+		nsamples = occ / sizeof(uint16); /* XXX uint16 == 16 bits */
+		break;
+	case PIXARLOGDATAFMT_8BIT:
+	case PIXARLOGDATAFMT_8BITABGR:
+		nsamples = occ;
+		break;
+	default:
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"%d bit input not supported in PixarLog",
+			td->td_bitspersample);
+		return 0;
+	}
+
+	llen = sp->stride * td->td_imagewidth;
+
+	(void) s;
+	assert(sp != NULL);
+	sp->stream.next_out = (unsigned char *) sp->tbuf;
+	sp->stream.avail_out = nsamples * sizeof(uint16);
+	do {
+		int state = inflate(&sp->stream, Z_PARTIAL_FLUSH);
+		if (state == Z_STREAM_END) {
+			break;			/* XXX */
+		}
+		if (state == Z_DATA_ERROR) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Decoding error at scanline %d, %s",
+			    tif->tif_name, tif->tif_row, sp->stream.msg);
+			if (inflateSync(&sp->stream) != Z_OK)
+				return (0);
+			continue;
+		}
+		if (state != Z_OK) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: zlib error: %s",
+			    tif->tif_name, sp->stream.msg);
+			return (0);
+		}
+	} while (sp->stream.avail_out > 0);
+
+	/* hopefully, we got all the bytes we needed */
+	if (sp->stream.avail_out != 0) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: Not enough data at scanline %d (short %d bytes)",
+		    tif->tif_name, tif->tif_row, sp->stream.avail_out);
+		return (0);
+	}
+
+	up = sp->tbuf;
+	/* Swap bytes in the data if from a different endian machine. */
+	if (tif->tif_flags & TIFF_SWAB)
+		TIFFSwabArrayOfShort(up, nsamples);
+
+	for (i = 0; i < nsamples; i += llen, up += llen) {
+		switch (sp->user_datafmt)  {
+		case PIXARLOGDATAFMT_FLOAT:
+			horizontalAccumulateF(up, llen, sp->stride,
+					(float *)op, sp->ToLinearF);
+			op += llen * sizeof(float);
+			break;
+		case PIXARLOGDATAFMT_16BIT:
+			horizontalAccumulate16(up, llen, sp->stride,
+					(uint16 *)op, sp->ToLinear16);
+			op += llen * sizeof(uint16);
+			break;
+		case PIXARLOGDATAFMT_12BITPICIO:
+			horizontalAccumulate12(up, llen, sp->stride,
+					(int16 *)op, sp->ToLinearF);
+			op += llen * sizeof(int16);
+			break;
+		case PIXARLOGDATAFMT_11BITLOG:
+			horizontalAccumulate11(up, llen, sp->stride,
+					(uint16 *)op);
+			op += llen * sizeof(uint16);
+			break;
+		case PIXARLOGDATAFMT_8BIT:
+			horizontalAccumulate8(up, llen, sp->stride,
+					(unsigned char *)op, sp->ToLinear8);
+			op += llen * sizeof(unsigned char);
+			break;
+		case PIXARLOGDATAFMT_8BITABGR:
+			horizontalAccumulate8abgr(up, llen, sp->stride,
+					(unsigned char *)op, sp->ToLinear8);
+			op += llen * sizeof(unsigned char);
+			break;
+		default:
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+				  "PixarLogDecode: unsupported bits/sample: %d", 
+				  td->td_bitspersample);
+			return (0);
+		}
+	}
+
+	return (1);
+}
+
+static int
+PixarLogSetupEncode(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	PixarLogState* sp = EncoderState(tif);
+	tsize_t tbuf_size;
+	static const char module[] = "PixarLogSetupEncode";
+
+	assert(sp != NULL);
+
+	/* for some reason, we can't do this in TIFFInitPixarLog */
+
+	sp->stride = (td->td_planarconfig == PLANARCONFIG_CONTIG ?
+	    td->td_samplesperpixel : 1);
+	tbuf_size = multiply(multiply(multiply(sp->stride, td->td_imagewidth),
+				      td->td_rowsperstrip), sizeof(uint16));
+	if (tbuf_size == 0)
+		return (0);
+	sp->tbuf = (uint16 *) _TIFFmalloc(tbuf_size);
+	if (sp->tbuf == NULL)
+		return (0);
+	if (sp->user_datafmt == PIXARLOGDATAFMT_UNKNOWN)
+		sp->user_datafmt = PixarLogGuessDataFmt(td);
+	if (sp->user_datafmt == PIXARLOGDATAFMT_UNKNOWN) {
+		TIFFErrorExt(tif->tif_clientdata, module, "PixarLog compression can't handle %d bit linear encodings", td->td_bitspersample);
+		return (0);
+	}
+
+	if (deflateInit(&sp->stream, sp->quality) != Z_OK) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: %s", tif->tif_name, sp->stream.msg);
+		return (0);
+	} else {
+		sp->state |= PLSTATE_INIT;
+		return (1);
+	}
+}
+
+/*
+ * Reset encoding state at the start of a strip.
+ */
+static int
+PixarLogPreEncode(TIFF* tif, tsample_t s)
+{
+	PixarLogState *sp = EncoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->stream.next_out = tif->tif_rawdata;
+	sp->stream.avail_out = tif->tif_rawdatasize;
+	return (deflateReset(&sp->stream) == Z_OK);
+}
+
+static void
+horizontalDifferenceF(float *ip, int n, int stride, uint16 *wp, uint16 *FromLT2)
+{
+
+    int32 r1, g1, b1, a1, r2, g2, b2, a2, mask;
+    float fltsize = Fltsize;
+
+#define  CLAMP(v) ( (v<(float)0.)   ? 0				\
+		  : (v<(float)2.)   ? FromLT2[(int)(v*fltsize)]	\
+		  : (v>(float)24.2) ? 2047			\
+		  : LogK1*log(v*LogK2) + 0.5 )
+
+    mask = CODE_MASK;
+    if (n >= stride) {
+	if (stride == 3) {
+	    r2 = wp[0] = (uint16) CLAMP(ip[0]);
+	    g2 = wp[1] = (uint16) CLAMP(ip[1]);
+	    b2 = wp[2] = (uint16) CLAMP(ip[2]);
+	    n -= 3;
+	    while (n > 0) {
+		n -= 3;
+		wp += 3;
+		ip += 3;
+		r1 = (int32) CLAMP(ip[0]); wp[0] = (r1-r2) & mask; r2 = r1;
+		g1 = (int32) CLAMP(ip[1]); wp[1] = (g1-g2) & mask; g2 = g1;
+		b1 = (int32) CLAMP(ip[2]); wp[2] = (b1-b2) & mask; b2 = b1;
+	    }
+	} else if (stride == 4) {
+	    r2 = wp[0] = (uint16) CLAMP(ip[0]);
+	    g2 = wp[1] = (uint16) CLAMP(ip[1]);
+	    b2 = wp[2] = (uint16) CLAMP(ip[2]);
+	    a2 = wp[3] = (uint16) CLAMP(ip[3]);
+	    n -= 4;
+	    while (n > 0) {
+		n -= 4;
+		wp += 4;
+		ip += 4;
+		r1 = (int32) CLAMP(ip[0]); wp[0] = (r1-r2) & mask; r2 = r1;
+		g1 = (int32) CLAMP(ip[1]); wp[1] = (g1-g2) & mask; g2 = g1;
+		b1 = (int32) CLAMP(ip[2]); wp[2] = (b1-b2) & mask; b2 = b1;
+		a1 = (int32) CLAMP(ip[3]); wp[3] = (a1-a2) & mask; a2 = a1;
+	    }
+	} else {
+	    ip += n - 1;	/* point to last one */
+	    wp += n - 1;	/* point to last one */
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride, wp[0] = (uint16) CLAMP(ip[0]);
+				wp[stride] -= wp[0];
+				wp[stride] &= mask;
+				wp--; ip--)
+		n -= stride;
+	    }
+	    REPEAT(stride, wp[0] = (uint16) CLAMP(ip[0]); wp--; ip--)
+	}
+    }
+}
+
+static void
+horizontalDifference16(unsigned short *ip, int n, int stride, 
+	unsigned short *wp, uint16 *From14)
+{
+    register int  r1, g1, b1, a1, r2, g2, b2, a2, mask;
+
+/* assumption is unsigned pixel values */
+#undef   CLAMP
+#define  CLAMP(v) From14[(v) >> 2]
+
+    mask = CODE_MASK;
+    if (n >= stride) {
+	if (stride == 3) {
+	    r2 = wp[0] = CLAMP(ip[0]);  g2 = wp[1] = CLAMP(ip[1]);
+	    b2 = wp[2] = CLAMP(ip[2]);
+	    n -= 3;
+	    while (n > 0) {
+		n -= 3;
+		wp += 3;
+		ip += 3;
+		r1 = CLAMP(ip[0]); wp[0] = (r1-r2) & mask; r2 = r1;
+		g1 = CLAMP(ip[1]); wp[1] = (g1-g2) & mask; g2 = g1;
+		b1 = CLAMP(ip[2]); wp[2] = (b1-b2) & mask; b2 = b1;
+	    }
+	} else if (stride == 4) {
+	    r2 = wp[0] = CLAMP(ip[0]);  g2 = wp[1] = CLAMP(ip[1]);
+	    b2 = wp[2] = CLAMP(ip[2]);  a2 = wp[3] = CLAMP(ip[3]);
+	    n -= 4;
+	    while (n > 0) {
+		n -= 4;
+		wp += 4;
+		ip += 4;
+		r1 = CLAMP(ip[0]); wp[0] = (r1-r2) & mask; r2 = r1;
+		g1 = CLAMP(ip[1]); wp[1] = (g1-g2) & mask; g2 = g1;
+		b1 = CLAMP(ip[2]); wp[2] = (b1-b2) & mask; b2 = b1;
+		a1 = CLAMP(ip[3]); wp[3] = (a1-a2) & mask; a2 = a1;
+	    }
+	} else {
+	    ip += n - 1;	/* point to last one */
+	    wp += n - 1;	/* point to last one */
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride, wp[0] = CLAMP(ip[0]);
+				wp[stride] -= wp[0];
+				wp[stride] &= mask;
+				wp--; ip--)
+		n -= stride;
+	    }
+	    REPEAT(stride, wp[0] = CLAMP(ip[0]); wp--; ip--)
+	}
+    }
+}
+
+
+static void
+horizontalDifference8(unsigned char *ip, int n, int stride, 
+	unsigned short *wp, uint16 *From8)
+{
+    register int  r1, g1, b1, a1, r2, g2, b2, a2, mask;
+
+#undef	 CLAMP
+#define  CLAMP(v) (From8[(v)])
+
+    mask = CODE_MASK;
+    if (n >= stride) {
+	if (stride == 3) {
+	    r2 = wp[0] = CLAMP(ip[0]);  g2 = wp[1] = CLAMP(ip[1]);
+	    b2 = wp[2] = CLAMP(ip[2]);
+	    n -= 3;
+	    while (n > 0) {
+		n -= 3;
+		r1 = CLAMP(ip[3]); wp[3] = (r1-r2) & mask; r2 = r1;
+		g1 = CLAMP(ip[4]); wp[4] = (g1-g2) & mask; g2 = g1;
+		b1 = CLAMP(ip[5]); wp[5] = (b1-b2) & mask; b2 = b1;
+		wp += 3;
+		ip += 3;
+	    }
+	} else if (stride == 4) {
+	    r2 = wp[0] = CLAMP(ip[0]);  g2 = wp[1] = CLAMP(ip[1]);
+	    b2 = wp[2] = CLAMP(ip[2]);  a2 = wp[3] = CLAMP(ip[3]);
+	    n -= 4;
+	    while (n > 0) {
+		n -= 4;
+		r1 = CLAMP(ip[4]); wp[4] = (r1-r2) & mask; r2 = r1;
+		g1 = CLAMP(ip[5]); wp[5] = (g1-g2) & mask; g2 = g1;
+		b1 = CLAMP(ip[6]); wp[6] = (b1-b2) & mask; b2 = b1;
+		a1 = CLAMP(ip[7]); wp[7] = (a1-a2) & mask; a2 = a1;
+		wp += 4;
+		ip += 4;
+	    }
+	} else {
+	    wp += n + stride - 1;	/* point to last one */
+	    ip += n + stride - 1;	/* point to last one */
+	    n -= stride;
+	    while (n > 0) {
+		REPEAT(stride, wp[0] = CLAMP(ip[0]);
+				wp[stride] -= wp[0];
+				wp[stride] &= mask;
+				wp--; ip--)
+		n -= stride;
+	    }
+	    REPEAT(stride, wp[0] = CLAMP(ip[0]); wp--; ip--)
+	}
+    }
+}
+
+/*
+ * Encode a chunk of pixels.
+ */
+static int
+PixarLogEncode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	PixarLogState *sp = EncoderState(tif);
+	static const char module[] = "PixarLogEncode";
+	int 	i, n, llen;
+	unsigned short * up;
+
+	(void) s;
+
+	switch (sp->user_datafmt) {
+	case PIXARLOGDATAFMT_FLOAT:
+		n = cc / sizeof(float);		/* XXX float == 32 bits */
+		break;
+	case PIXARLOGDATAFMT_16BIT:
+	case PIXARLOGDATAFMT_12BITPICIO:
+	case PIXARLOGDATAFMT_11BITLOG:
+		n = cc / sizeof(uint16);	/* XXX uint16 == 16 bits */
+		break;
+	case PIXARLOGDATAFMT_8BIT:
+	case PIXARLOGDATAFMT_8BITABGR:
+		n = cc;
+		break;
+	default:
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			"%d bit input not supported in PixarLog",
+			td->td_bitspersample);
+		return 0;
+	}
+
+	llen = sp->stride * td->td_imagewidth;
+
+	for (i = 0, up = sp->tbuf; i < n; i += llen, up += llen) {
+		switch (sp->user_datafmt)  {
+		case PIXARLOGDATAFMT_FLOAT:
+			horizontalDifferenceF((float *)bp, llen, 
+				sp->stride, up, sp->FromLT2);
+			bp += llen * sizeof(float);
+			break;
+		case PIXARLOGDATAFMT_16BIT:
+			horizontalDifference16((uint16 *)bp, llen, 
+				sp->stride, up, sp->From14);
+			bp += llen * sizeof(uint16);
+			break;
+		case PIXARLOGDATAFMT_8BIT:
+			horizontalDifference8((unsigned char *)bp, llen, 
+				sp->stride, up, sp->From8);
+			bp += llen * sizeof(unsigned char);
+			break;
+		default:
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+				"%d bit input not supported in PixarLog",
+				td->td_bitspersample);
+			return 0;
+		}
+	}
+ 
+	sp->stream.next_in = (unsigned char *) sp->tbuf;
+	sp->stream.avail_in = n * sizeof(uint16);
+
+	do {
+		if (deflate(&sp->stream, Z_NO_FLUSH) != Z_OK) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Encoder error: %s",
+			    tif->tif_name, sp->stream.msg);
+			return (0);
+		}
+		if (sp->stream.avail_out == 0) {
+			tif->tif_rawcc = tif->tif_rawdatasize;
+			TIFFFlushData1(tif);
+			sp->stream.next_out = tif->tif_rawdata;
+			sp->stream.avail_out = tif->tif_rawdatasize;
+		}
+	} while (sp->stream.avail_in > 0);
+	return (1);
+}
+
+/*
+ * Finish off an encoded strip by flushing the last
+ * string and tacking on an End Of Information code.
+ */
+
+static int
+PixarLogPostEncode(TIFF* tif)
+{
+	PixarLogState *sp = EncoderState(tif);
+	static const char module[] = "PixarLogPostEncode";
+	int state;
+
+	sp->stream.avail_in = 0;
+
+	do {
+		state = deflate(&sp->stream, Z_FINISH);
+		switch (state) {
+		case Z_STREAM_END:
+		case Z_OK:
+		    if (sp->stream.avail_out != (uint32)tif->tif_rawdatasize) {
+			    tif->tif_rawcc =
+				tif->tif_rawdatasize - sp->stream.avail_out;
+			    TIFFFlushData1(tif);
+			    sp->stream.next_out = tif->tif_rawdata;
+			    sp->stream.avail_out = tif->tif_rawdatasize;
+		    }
+		    break;
+		default:
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: zlib error: %s",
+			tif->tif_name, sp->stream.msg);
+		    return (0);
+		}
+	} while (state != Z_STREAM_END);
+	return (1);
+}
+
+static void
+PixarLogClose(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	/* In a really sneaky maneuver, on close, we covertly modify both
+	 * bitspersample and sampleformat in the directory to indicate
+	 * 8-bit linear.  This way, the decode "just works" even for
+	 * readers that don't know about PixarLog, or how to set
+	 * the PIXARLOGDATFMT pseudo-tag.
+	 */
+	td->td_bitspersample = 8;
+	td->td_sampleformat = SAMPLEFORMAT_UINT;
+}
+
+static void
+PixarLogCleanup(TIFF* tif)
+{
+	PixarLogState* sp = (PixarLogState*) tif->tif_data;
+
+	assert(sp != 0);
+
+	(void)TIFFPredictorCleanup(tif);
+
+	tif->tif_tagmethods.vgetfield = sp->vgetparent;
+	tif->tif_tagmethods.vsetfield = sp->vsetparent;
+
+	if (sp->FromLT2) _TIFFfree(sp->FromLT2);
+	if (sp->From14) _TIFFfree(sp->From14);
+	if (sp->From8) _TIFFfree(sp->From8);
+	if (sp->ToLinearF) _TIFFfree(sp->ToLinearF);
+	if (sp->ToLinear16) _TIFFfree(sp->ToLinear16);
+	if (sp->ToLinear8) _TIFFfree(sp->ToLinear8);
+	if (sp->state&PLSTATE_INIT) {
+		if (tif->tif_mode == O_RDONLY)
+			inflateEnd(&sp->stream);
+		else
+			deflateEnd(&sp->stream);
+	}
+	if (sp->tbuf)
+		_TIFFfree(sp->tbuf);
+	_TIFFfree(sp);
+	tif->tif_data = NULL;
+
+	_TIFFSetDefaultCompressionState(tif);
+}
+
+static int
+PixarLogVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+    PixarLogState *sp = (PixarLogState *)tif->tif_data;
+    int result;
+    static const char module[] = "PixarLogVSetField";
+
+    switch (tag) {
+     case TIFFTAG_PIXARLOGQUALITY:
+		sp->quality = va_arg(ap, int);
+		if (tif->tif_mode != O_RDONLY && (sp->state&PLSTATE_INIT)) {
+			if (deflateParams(&sp->stream,
+			    sp->quality, Z_DEFAULT_STRATEGY) != Z_OK) {
+				TIFFErrorExt(tif->tif_clientdata, module, "%s: zlib error: %s",
+					tif->tif_name, sp->stream.msg);
+				return (0);
+			}
+		}
+		return (1);
+     case TIFFTAG_PIXARLOGDATAFMT:
+	sp->user_datafmt = va_arg(ap, int);
+	/* Tweak the TIFF header so that the rest of libtiff knows what
+	 * size of data will be passed between app and library, and
+	 * assume that the app knows what it is doing and is not
+	 * confused by these header manipulations...
+	 */
+	switch (sp->user_datafmt) {
+	 case PIXARLOGDATAFMT_8BIT:
+	 case PIXARLOGDATAFMT_8BITABGR:
+	    TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, 8);
+	    TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_UINT);
+	    break;
+	 case PIXARLOGDATAFMT_11BITLOG:
+	    TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, 16);
+	    TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_UINT);
+	    break;
+	 case PIXARLOGDATAFMT_12BITPICIO:
+	    TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, 16);
+	    TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_INT);
+	    break;
+	 case PIXARLOGDATAFMT_16BIT:
+	    TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, 16);
+	    TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_UINT);
+	    break;
+	 case PIXARLOGDATAFMT_FLOAT:
+	    TIFFSetField(tif, TIFFTAG_BITSPERSAMPLE, 32);
+	    TIFFSetField(tif, TIFFTAG_SAMPLEFORMAT, SAMPLEFORMAT_IEEEFP);
+	    break;
+	}
+	/*
+	 * Must recalculate sizes should bits/sample change.
+	 */
+	tif->tif_tilesize = isTiled(tif) ? TIFFTileSize(tif) : (tsize_t) -1;
+	tif->tif_scanlinesize = TIFFScanlineSize(tif);
+	result = 1;		/* NB: pseudo tag */
+	break;
+     default:
+	result = (*sp->vsetparent)(tif, tag, ap);
+    }
+    return (result);
+}
+
+static int
+PixarLogVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+    PixarLogState *sp = (PixarLogState *)tif->tif_data;
+
+    switch (tag) {
+     case TIFFTAG_PIXARLOGQUALITY:
+	*va_arg(ap, int*) = sp->quality;
+	break;
+     case TIFFTAG_PIXARLOGDATAFMT:
+	*va_arg(ap, int*) = sp->user_datafmt;
+	break;
+     default:
+	return (*sp->vgetparent)(tif, tag, ap);
+    }
+    return (1);
+}
+
+static const TIFFFieldInfo pixarlogFieldInfo[] = {
+    {TIFFTAG_PIXARLOGDATAFMT,0,0,TIFF_ANY,  FIELD_PSEUDO,FALSE,FALSE,""},
+    {TIFFTAG_PIXARLOGQUALITY,0,0,TIFF_ANY,  FIELD_PSEUDO,FALSE,FALSE,""}
+};
+
+int
+TIFFInitPixarLog(TIFF* tif, int scheme)
+{
+	PixarLogState* sp;
+
+	assert(scheme == COMPRESSION_PIXARLOG);
+
+	/*
+	 * Allocate state block so tag methods have storage to record values.
+	 */
+	tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (PixarLogState));
+	if (tif->tif_data == NULL)
+		goto bad;
+	sp = (PixarLogState*) tif->tif_data;
+	_TIFFmemset(sp, 0, sizeof (*sp));
+	sp->stream.data_type = Z_BINARY;
+	sp->user_datafmt = PIXARLOGDATAFMT_UNKNOWN;
+
+	/*
+	 * Install codec methods.
+	 */
+	tif->tif_setupdecode = PixarLogSetupDecode;
+	tif->tif_predecode = PixarLogPreDecode;
+	tif->tif_decoderow = PixarLogDecode;
+	tif->tif_decodestrip = PixarLogDecode;
+	tif->tif_decodetile = PixarLogDecode;
+	tif->tif_setupencode = PixarLogSetupEncode;
+	tif->tif_preencode = PixarLogPreEncode;
+	tif->tif_postencode = PixarLogPostEncode;
+	tif->tif_encoderow = PixarLogEncode;
+	tif->tif_encodestrip = PixarLogEncode;
+	tif->tif_encodetile = PixarLogEncode;
+	tif->tif_close = PixarLogClose;
+	tif->tif_cleanup = PixarLogCleanup;
+
+	/* Override SetField so we can handle our private pseudo-tag */
+	_TIFFMergeFieldInfo(tif, pixarlogFieldInfo, N(pixarlogFieldInfo));
+	sp->vgetparent = tif->tif_tagmethods.vgetfield;
+	tif->tif_tagmethods.vgetfield = PixarLogVGetField;   /* hook for codec tags */
+	sp->vsetparent = tif->tif_tagmethods.vsetfield;
+	tif->tif_tagmethods.vsetfield = PixarLogVSetField;   /* hook for codec tags */
+
+	/* Default values for codec-specific fields */
+	sp->quality = Z_DEFAULT_COMPRESSION; /* default comp. level */
+	sp->state = 0;
+
+	/* we don't wish to use the predictor, 
+	 * the default is none, which predictor value 1
+	 */
+	(void) TIFFPredictorInit(tif);
+
+	/*
+	 * build the companding tables 
+	 */
+	PixarLogMakeTables(sp);
+
+	return (1);
+bad:
+	TIFFErrorExt(tif->tif_clientdata, "TIFFInitPixarLog",
+		     "No space for PixarLog state block");
+	return (0);
+}
+#endif /* PIXARLOG_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_predict.c b/src/libtiff/tif_predict.c
new file mode 100644
index 0000000..e738ca3
--- /dev/null
+++ b/src/libtiff/tif_predict.c
@@ -0,0 +1,626 @@
+/* $Id: tif_predict.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Predictor Tag Support (used by multiple codecs).
+ */
+#include "tiffiop.h"
+#include "tif_predict.h"
+
+#define	PredictorState(tif)	((TIFFPredictorState*) (tif)->tif_data)
+
+static	void horAcc8(TIFF*, tidata_t, tsize_t);
+static	void horAcc16(TIFF*, tidata_t, tsize_t);
+static	void swabHorAcc16(TIFF*, tidata_t, tsize_t);
+static	void horDiff8(TIFF*, tidata_t, tsize_t);
+static	void horDiff16(TIFF*, tidata_t, tsize_t);
+static	void fpAcc(TIFF*, tidata_t, tsize_t);
+static	void fpDiff(TIFF*, tidata_t, tsize_t);
+static	int PredictorDecodeRow(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int PredictorDecodeTile(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int PredictorEncodeRow(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int PredictorEncodeTile(TIFF*, tidata_t, tsize_t, tsample_t);
+
+static int
+PredictorSetup(TIFF* tif)
+{
+	static const char module[] = "PredictorSetup";
+
+	TIFFPredictorState* sp = PredictorState(tif);
+	TIFFDirectory* td = &tif->tif_dir;
+
+	switch (sp->predictor)		/* no differencing */
+	{
+		case PREDICTOR_NONE:
+			return 1;
+		case PREDICTOR_HORIZONTAL:
+			if (td->td_bitspersample != 8
+			    && td->td_bitspersample != 16) {
+				TIFFErrorExt(tif->tif_clientdata, module,
+    "Horizontal differencing \"Predictor\" not supported with %d-bit samples",
+					  td->td_bitspersample);
+				return 0;
+			}
+			break;
+		case PREDICTOR_FLOATINGPOINT:
+			if (td->td_sampleformat != SAMPLEFORMAT_IEEEFP) {
+				TIFFErrorExt(tif->tif_clientdata, module,
+	"Floating point \"Predictor\" not supported with %d data format",
+					  td->td_sampleformat);
+				return 0;
+			}
+			break;
+		default:
+			TIFFErrorExt(tif->tif_clientdata, module,
+				  "\"Predictor\" value %d not supported",
+				  sp->predictor);
+			return 0;
+	}
+	sp->stride = (td->td_planarconfig == PLANARCONFIG_CONTIG ?
+	    td->td_samplesperpixel : 1);
+	/*
+	 * Calculate the scanline/tile-width size in bytes.
+	 */
+	if (isTiled(tif))
+		sp->rowsize = TIFFTileRowSize(tif);
+	else
+		sp->rowsize = TIFFScanlineSize(tif);
+
+	return 1;
+}
+
+static int
+PredictorSetupDecode(TIFF* tif)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+	TIFFDirectory* td = &tif->tif_dir;
+
+	if (!(*sp->setupdecode)(tif) || !PredictorSetup(tif))
+		return 0;
+
+	if (sp->predictor == 2) {
+		switch (td->td_bitspersample) {
+			case 8:  sp->pfunc = horAcc8; break;
+			case 16: sp->pfunc = horAcc16; break;
+		}
+		/*
+		 * Override default decoding method with one that does the
+		 * predictor stuff.
+		 */
+		sp->coderow = tif->tif_decoderow;
+		tif->tif_decoderow = PredictorDecodeRow;
+		sp->codestrip = tif->tif_decodestrip;
+		tif->tif_decodestrip = PredictorDecodeTile;
+		sp->codetile = tif->tif_decodetile;
+		tif->tif_decodetile = PredictorDecodeTile;
+		/*
+		 * If the data is horizontally differenced 16-bit data that
+		 * requires byte-swapping, then it must be byte swapped before
+		 * the accumulation step.  We do this with a special-purpose
+		 * routine and override the normal post decoding logic that
+		 * the library setup when the directory was read.
+		 */
+		if (tif->tif_flags & TIFF_SWAB) {
+			if (sp->pfunc == horAcc16) {
+				sp->pfunc = swabHorAcc16;
+				tif->tif_postdecode = _TIFFNoPostDecode;
+			} /* else handle 32-bit case... */
+		}
+	}
+
+	else if (sp->predictor == 3) {
+		sp->pfunc = fpAcc;
+		/*
+		 * Override default decoding method with one that does the
+		 * predictor stuff.
+		 */
+		sp->coderow = tif->tif_decoderow;
+		tif->tif_decoderow = PredictorDecodeRow;
+		sp->codestrip = tif->tif_decodestrip;
+		tif->tif_decodestrip = PredictorDecodeTile;
+		sp->codetile = tif->tif_decodetile;
+		tif->tif_decodetile = PredictorDecodeTile;
+		/*
+		 * The data should not be swapped outside of the floating
+		 * point predictor, the accumulation routine should return
+		 * byres in the native order.
+		 */
+		if (tif->tif_flags & TIFF_SWAB) {
+			tif->tif_postdecode = _TIFFNoPostDecode;
+		}
+		/*
+		 * Allocate buffer to keep the decoded bytes before
+		 * rearranging in the ight order
+		 */
+	}
+
+	return 1;
+}
+
+static int
+PredictorSetupEncode(TIFF* tif)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+	TIFFDirectory* td = &tif->tif_dir;
+
+	if (!(*sp->setupencode)(tif) || !PredictorSetup(tif))
+		return 0;
+
+	if (sp->predictor == 2) {
+		switch (td->td_bitspersample) {
+			case 8:  sp->pfunc = horDiff8; break;
+			case 16: sp->pfunc = horDiff16; break;
+		}
+		/*
+		 * Override default encoding method with one that does the
+		 * predictor stuff.
+		 */
+		sp->coderow = tif->tif_encoderow;
+		tif->tif_encoderow = PredictorEncodeRow;
+		sp->codestrip = tif->tif_encodestrip;
+		tif->tif_encodestrip = PredictorEncodeTile;
+		sp->codetile = tif->tif_encodetile;
+		tif->tif_encodetile = PredictorEncodeTile;
+	}
+	
+	else if (sp->predictor == 3) {
+		sp->pfunc = fpDiff;
+		/*
+		 * Override default encoding method with one that does the
+		 * predictor stuff.
+		 */
+		sp->coderow = tif->tif_encoderow;
+		tif->tif_encoderow = PredictorEncodeRow;
+		sp->codestrip = tif->tif_encodestrip;
+		tif->tif_encodestrip = PredictorEncodeTile;
+		sp->codetile = tif->tif_encodetile;
+		tif->tif_encodetile = PredictorEncodeTile;
+	}
+
+	return 1;
+}
+
+#define REPEAT4(n, op)		\
+    switch (n) {		\
+    default: { int i; for (i = n-4; i > 0; i--) { op; } } \
+    case 4:  op;		\
+    case 3:  op;		\
+    case 2:  op;		\
+    case 1:  op;		\
+    case 0:  ;			\
+    }
+
+static void
+horAcc8(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	tsize_t stride = PredictorState(tif)->stride;
+
+	char* cp = (char*) cp0;
+	if (cc > stride) {
+		cc -= stride;
+		/*
+		 * Pipeline the most common cases.
+		 */
+		if (stride == 3)  {
+			unsigned int cr = cp[0];
+			unsigned int cg = cp[1];
+			unsigned int cb = cp[2];
+			do {
+				cc -= 3, cp += 3;
+				cp[0] = (char) (cr += cp[0]);
+				cp[1] = (char) (cg += cp[1]);
+				cp[2] = (char) (cb += cp[2]);
+			} while ((int32) cc > 0);
+		} else if (stride == 4)  {
+			unsigned int cr = cp[0];
+			unsigned int cg = cp[1];
+			unsigned int cb = cp[2];
+			unsigned int ca = cp[3];
+			do {
+				cc -= 4, cp += 4;
+				cp[0] = (char) (cr += cp[0]);
+				cp[1] = (char) (cg += cp[1]);
+				cp[2] = (char) (cb += cp[2]);
+				cp[3] = (char) (ca += cp[3]);
+			} while ((int32) cc > 0);
+		} else  {
+			do {
+				REPEAT4(stride, cp[stride] =
+					(char) (cp[stride] + *cp); cp++)
+				cc -= stride;
+			} while ((int32) cc > 0);
+		}
+	}
+}
+
+static void
+swabHorAcc16(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	tsize_t stride = PredictorState(tif)->stride;
+	uint16* wp = (uint16*) cp0;
+	tsize_t wc = cc / 2;
+
+	if (wc > stride) {
+		TIFFSwabArrayOfShort(wp, wc);
+		wc -= stride;
+		do {
+			REPEAT4(stride, wp[stride] += wp[0]; wp++)
+			wc -= stride;
+		} while ((int32) wc > 0);
+	}
+}
+
+static void
+horAcc16(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	tsize_t stride = PredictorState(tif)->stride;
+	uint16* wp = (uint16*) cp0;
+	tsize_t wc = cc / 2;
+
+	if (wc > stride) {
+		wc -= stride;
+		do {
+			REPEAT4(stride, wp[stride] += wp[0]; wp++)
+			wc -= stride;
+		} while ((int32) wc > 0);
+	}
+}
+
+/*
+ * Floating point predictor accumulation routine.
+ */
+static void
+fpAcc(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	tsize_t stride = PredictorState(tif)->stride;
+	uint32 bps = tif->tif_dir.td_bitspersample / 8;
+	tsize_t wc = cc / bps;
+	tsize_t count = cc;
+	uint8 *cp = (uint8 *) cp0;
+	uint8 *tmp = (uint8 *)_TIFFmalloc(cc);
+
+	if (!tmp)
+		return;
+
+	while (count > stride) {
+		REPEAT4(stride, cp[stride] += cp[0]; cp++)
+		count -= stride;
+	}
+
+	_TIFFmemcpy(tmp, cp0, cc);
+	cp = (uint8 *) cp0;
+	for (count = 0; count < wc; count++) {
+		uint32 byte;
+		for (byte = 0; byte < bps; byte++) {
+#if WORDS_BIGENDIAN
+			cp[bps * count + byte] = tmp[byte * wc + count];
+#else
+			cp[bps * count + byte] =
+				tmp[(bps - byte - 1) * wc + count];
+#endif
+		}
+	}
+	_TIFFfree(tmp);
+}
+
+/*
+ * Decode a scanline and apply the predictor routine.
+ */
+static int
+PredictorDecodeRow(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
+{
+	TIFFPredictorState *sp = PredictorState(tif);
+
+	assert(sp != NULL);
+	assert(sp->coderow != NULL);
+	assert(sp->pfunc != NULL);
+
+	if ((*sp->coderow)(tif, op0, occ0, s)) {
+		(*sp->pfunc)(tif, op0, occ0);
+		return 1;
+	} else
+		return 0;
+}
+
+/*
+ * Decode a tile/strip and apply the predictor routine.
+ * Note that horizontal differencing must be done on a
+ * row-by-row basis.  The width of a "row" has already
+ * been calculated at pre-decode time according to the
+ * strip/tile dimensions.
+ */
+static int
+PredictorDecodeTile(TIFF* tif, tidata_t op0, tsize_t occ0, tsample_t s)
+{
+	TIFFPredictorState *sp = PredictorState(tif);
+
+	assert(sp != NULL);
+	assert(sp->codetile != NULL);
+
+	if ((*sp->codetile)(tif, op0, occ0, s)) {
+		tsize_t rowsize = sp->rowsize;
+		assert(rowsize > 0);
+		assert(sp->pfunc != NULL);
+		while ((long)occ0 > 0) {
+			(*sp->pfunc)(tif, op0, (tsize_t) rowsize);
+			occ0 -= rowsize;
+			op0 += rowsize;
+		}
+		return 1;
+	} else
+		return 0;
+}
+
+static void
+horDiff8(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+	tsize_t stride = sp->stride;
+	char* cp = (char*) cp0;
+
+	if (cc > stride) {
+		cc -= stride;
+		/*
+		 * Pipeline the most common cases.
+		 */
+		if (stride == 3) {
+			int r1, g1, b1;
+			int r2 = cp[0];
+			int g2 = cp[1];
+			int b2 = cp[2];
+			do {
+				r1 = cp[3]; cp[3] = r1-r2; r2 = r1;
+				g1 = cp[4]; cp[4] = g1-g2; g2 = g1;
+				b1 = cp[5]; cp[5] = b1-b2; b2 = b1;
+				cp += 3;
+			} while ((int32)(cc -= 3) > 0);
+		} else if (stride == 4) {
+			int r1, g1, b1, a1;
+			int r2 = cp[0];
+			int g2 = cp[1];
+			int b2 = cp[2];
+			int a2 = cp[3];
+			do {
+				r1 = cp[4]; cp[4] = r1-r2; r2 = r1;
+				g1 = cp[5]; cp[5] = g1-g2; g2 = g1;
+				b1 = cp[6]; cp[6] = b1-b2; b2 = b1;
+				a1 = cp[7]; cp[7] = a1-a2; a2 = a1;
+				cp += 4;
+			} while ((int32)(cc -= 4) > 0);
+		} else {
+			cp += cc - 1;
+			do {
+				REPEAT4(stride, cp[stride] -= cp[0]; cp--)
+			} while ((int32)(cc -= stride) > 0);
+		}
+	}
+}
+
+static void
+horDiff16(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+	tsize_t stride = sp->stride;
+	int16 *wp = (int16*) cp0;
+	tsize_t wc = cc/2;
+
+	if (wc > stride) {
+		wc -= stride;
+		wp += wc - 1;
+		do {
+			REPEAT4(stride, wp[stride] -= wp[0]; wp--)
+			wc -= stride;
+		} while ((int32) wc > 0);
+	}
+}
+
+/*
+ * Floating point predictor differencing routine.
+ */
+static void
+fpDiff(TIFF* tif, tidata_t cp0, tsize_t cc)
+{
+	tsize_t stride = PredictorState(tif)->stride;
+	uint32 bps = tif->tif_dir.td_bitspersample / 8;
+	tsize_t wc = cc / bps;
+	tsize_t count;
+	uint8 *cp = (uint8 *) cp0;
+	uint8 *tmp = (uint8 *)_TIFFmalloc(cc);
+
+	if (!tmp)
+		return;
+
+	_TIFFmemcpy(tmp, cp0, cc);
+	for (count = 0; count < wc; count++) {
+		uint32 byte;
+		for (byte = 0; byte < bps; byte++) {
+#if WORDS_BIGENDIAN
+			cp[byte * wc + count] =	tmp[bps * count + byte];
+#else
+			cp[(bps - byte - 1) * wc + count] =
+				tmp[bps * count + byte];
+#endif
+		}
+	}
+	_TIFFfree(tmp);
+
+	cp = (uint8 *) cp0;
+	cp += cc - stride - 1;
+	for (count = cc; count > stride; count -= stride)
+		REPEAT4(stride, cp[stride] -= cp[0]; cp--)
+}
+
+static int
+PredictorEncodeRow(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	TIFFPredictorState *sp = PredictorState(tif);
+
+	assert(sp != NULL);
+	assert(sp->pfunc != NULL);
+	assert(sp->coderow != NULL);
+
+	/* XXX horizontal differencing alters user's data XXX */
+	(*sp->pfunc)(tif, bp, cc);
+	return (*sp->coderow)(tif, bp, cc, s);
+}
+
+static int
+PredictorEncodeTile(TIFF* tif, tidata_t bp0, tsize_t cc0, tsample_t s)
+{
+	TIFFPredictorState *sp = PredictorState(tif);
+	tsize_t cc = cc0, rowsize;
+	unsigned char* bp = bp0;
+
+	assert(sp != NULL);
+	assert(sp->pfunc != NULL);
+	assert(sp->codetile != NULL);
+
+	rowsize = sp->rowsize;
+	assert(rowsize > 0);
+	while ((long)cc > 0) {
+		(*sp->pfunc)(tif, bp, (tsize_t) rowsize);
+		cc -= rowsize;
+		bp += rowsize;
+	}
+	return (*sp->codetile)(tif, bp0, cc0, s);
+}
+
+#define	FIELD_PREDICTOR	(FIELD_CODEC+0)		/* XXX */
+
+static const TIFFFieldInfo predictFieldInfo[] = {
+    { TIFFTAG_PREDICTOR,	 1, 1, TIFF_SHORT,	FIELD_PREDICTOR,
+      FALSE,	FALSE,	"Predictor" },
+};
+#define	N(a)	(sizeof (a) / sizeof (a[0]))
+
+static int
+PredictorVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	TIFFPredictorState *sp = PredictorState(tif);
+
+	assert(sp != NULL);
+	assert(sp->vsetparent != NULL);
+
+	switch (tag) {
+	case TIFFTAG_PREDICTOR:
+		sp->predictor = (uint16) va_arg(ap, int);
+		TIFFSetFieldBit(tif, FIELD_PREDICTOR);
+		break;
+	default:
+		return (*sp->vsetparent)(tif, tag, ap);
+	}
+	tif->tif_flags |= TIFF_DIRTYDIRECT;
+	return 1;
+}
+
+static int
+PredictorVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	TIFFPredictorState *sp = PredictorState(tif);
+
+	assert(sp != NULL);
+	assert(sp->vgetparent != NULL);
+
+	switch (tag) {
+	case TIFFTAG_PREDICTOR:
+		*va_arg(ap, uint16*) = sp->predictor;
+		break;
+	default:
+		return (*sp->vgetparent)(tif, tag, ap);
+	}
+	return 1;
+}
+
+static void
+PredictorPrintDir(TIFF* tif, FILE* fd, long flags)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+
+	(void) flags;
+	if (TIFFFieldSet(tif,FIELD_PREDICTOR)) {
+		fprintf(fd, "  Predictor: ");
+		switch (sp->predictor) {
+		case 1: fprintf(fd, "none "); break;
+		case 2: fprintf(fd, "horizontal differencing "); break;
+		case 3: fprintf(fd, "floating point predictor "); break;
+		}
+		fprintf(fd, "%u (0x%x)\n", sp->predictor, sp->predictor);
+	}
+	if (sp->printdir)
+		(*sp->printdir)(tif, fd, flags);
+}
+
+int
+TIFFPredictorInit(TIFF* tif)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+
+	assert(sp != 0);
+
+	/*
+	 * Merge codec-specific tag information and
+	 * override parent get/set field methods.
+	 */
+	_TIFFMergeFieldInfo(tif, predictFieldInfo, N(predictFieldInfo));
+	sp->vgetparent = tif->tif_tagmethods.vgetfield;
+	tif->tif_tagmethods.vgetfield =
+            PredictorVGetField;/* hook for predictor tag */
+	sp->vsetparent = tif->tif_tagmethods.vsetfield;
+	tif->tif_tagmethods.vsetfield =
+            PredictorVSetField;/* hook for predictor tag */
+	sp->printdir = tif->tif_tagmethods.printdir;
+	tif->tif_tagmethods.printdir =
+            PredictorPrintDir;	/* hook for predictor tag */
+
+	sp->setupdecode = tif->tif_setupdecode;
+	tif->tif_setupdecode = PredictorSetupDecode;
+	sp->setupencode = tif->tif_setupencode;
+	tif->tif_setupencode = PredictorSetupEncode;
+
+	sp->predictor = 1;			/* default value */
+	sp->pfunc = NULL;			/* no predictor routine */
+	return 1;
+}
+
+int
+TIFFPredictorCleanup(TIFF* tif)
+{
+	TIFFPredictorState* sp = PredictorState(tif);
+
+	assert(sp != 0);
+
+	tif->tif_tagmethods.vgetfield = sp->vgetparent;
+	tif->tif_tagmethods.vsetfield = sp->vsetparent;
+	tif->tif_tagmethods.printdir = sp->printdir;
+	tif->tif_setupdecode = sp->setupdecode;
+	tif->tif_setupencode = sp->setupencode;
+
+	return 1;
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_predict.h b/src/libtiff/tif_predict.h
new file mode 100644
index 0000000..594f973
--- /dev/null
+++ b/src/libtiff/tif_predict.h
@@ -0,0 +1,64 @@
+/* $Id: tif_predict.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1995-1997 Sam Leffler
+ * Copyright (c) 1995-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _TIFFPREDICT_
+#define	_TIFFPREDICT_
+/*
+ * ``Library-private'' Support for the Predictor Tag
+ */
+
+/*
+ * Codecs that want to support the Predictor tag must place
+ * this structure first in their private state block so that
+ * the predictor code can cast tif_data to find its state.
+ */
+typedef struct {
+	int		predictor;	/* predictor tag value */
+	int		stride;		/* sample stride over data */
+	tsize_t		rowsize;	/* tile/strip row size */
+
+	TIFFPostMethod	pfunc;		/* horizontal differencer/accumulator */
+	TIFFCodeMethod	coderow;	/* parent codec encode/decode row */
+	TIFFCodeMethod	codestrip;	/* parent codec encode/decode strip */
+	TIFFCodeMethod	codetile;	/* parent codec encode/decode tile */
+	TIFFVGetMethod	vgetparent;	/* super-class method */
+	TIFFVSetMethod	vsetparent;	/* super-class method */
+	TIFFPrintMethod	printdir;	/* super-class method */
+	TIFFBoolMethod	setupdecode;	/* super-class method */
+	TIFFBoolMethod	setupencode;	/* super-class method */
+} TIFFPredictorState;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+extern	int TIFFPredictorInit(TIFF*);
+extern	int TIFFPredictorCleanup(TIFF*);
+#if defined(__cplusplus)
+}
+#endif
+#endif /* _TIFFPREDICT_ */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_print.c b/src/libtiff/tif_print.c
new file mode 100644
index 0000000..01429be
--- /dev/null
+++ b/src/libtiff/tif_print.c
@@ -0,0 +1,639 @@
+/* $Id: tif_print.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Directory Printing Support
+ */
+#include "tiffiop.h"
+#include <stdio.h>
+
+#include <ctype.h>
+
+static const char *photoNames[] = {
+    "min-is-white",				/* PHOTOMETRIC_MINISWHITE */
+    "min-is-black",				/* PHOTOMETRIC_MINISBLACK */
+    "RGB color",				/* PHOTOMETRIC_RGB */
+    "palette color (RGB from colormap)",	/* PHOTOMETRIC_PALETTE */
+    "transparency mask",			/* PHOTOMETRIC_MASK */
+    "separated",				/* PHOTOMETRIC_SEPARATED */
+    "YCbCr",					/* PHOTOMETRIC_YCBCR */
+    "7 (0x7)",
+    "CIE L*a*b*",				/* PHOTOMETRIC_CIELAB */
+};
+#define	NPHOTONAMES	(sizeof (photoNames) / sizeof (photoNames[0]))
+
+static const char *orientNames[] = {
+    "0 (0x0)",
+    "row 0 top, col 0 lhs",			/* ORIENTATION_TOPLEFT */
+    "row 0 top, col 0 rhs",			/* ORIENTATION_TOPRIGHT */
+    "row 0 bottom, col 0 rhs",			/* ORIENTATION_BOTRIGHT */
+    "row 0 bottom, col 0 lhs",			/* ORIENTATION_BOTLEFT */
+    "row 0 lhs, col 0 top",			/* ORIENTATION_LEFTTOP */
+    "row 0 rhs, col 0 top",			/* ORIENTATION_RIGHTTOP */
+    "row 0 rhs, col 0 bottom",			/* ORIENTATION_RIGHTBOT */
+    "row 0 lhs, col 0 bottom",			/* ORIENTATION_LEFTBOT */
+};
+#define	NORIENTNAMES	(sizeof (orientNames) / sizeof (orientNames[0]))
+
+static void
+_TIFFPrintField(FILE* fd, const TIFFFieldInfo *fip,
+		uint32 value_count, void *raw_data)
+{
+	uint32 j;
+		
+	fprintf(fd, "  %s: ", fip->field_name);
+
+	for(j = 0; j < value_count; j++) {
+		if(fip->field_type == TIFF_BYTE)
+			fprintf(fd, "%u", ((uint8 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_UNDEFINED)
+			fprintf(fd, "0x%x",
+				(unsigned int) ((unsigned char *) raw_data)[j]);
+		else if(fip->field_type == TIFF_SBYTE)
+			fprintf(fd, "%d", ((int8 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_SHORT)
+			fprintf(fd, "%u", ((uint16 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_SSHORT)
+			fprintf(fd, "%d", ((int16 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_LONG)
+			fprintf(fd, "%lu",
+				(unsigned long)((uint32 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_SLONG)
+			fprintf(fd, "%ld", (long)((int32 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_RATIONAL
+			|| fip->field_type == TIFF_SRATIONAL
+			|| fip->field_type == TIFF_FLOAT)
+			fprintf(fd, "%f", ((float *) raw_data)[j]);
+		else if(fip->field_type == TIFF_IFD)
+			fprintf(fd, "0x%ulx", ((uint32 *) raw_data)[j]);
+		else if(fip->field_type == TIFF_ASCII) {
+			fprintf(fd, "%s", (char *) raw_data);
+			break;
+		}
+		else if(fip->field_type == TIFF_DOUBLE)
+			fprintf(fd, "%f", ((double *) raw_data)[j]);
+		else if(fip->field_type == TIFF_FLOAT)
+			fprintf(fd, "%f", ((float *)raw_data)[j]);
+		else {
+			fprintf(fd, "<unsupported data type in TIFFPrint>");
+			break;
+		}
+
+		if(j < value_count - 1)
+			fprintf(fd, ",");
+	}
+
+	fprintf(fd, "\n");
+}
+
+static int
+_TIFFPrettyPrintField(TIFF* tif, FILE* fd, ttag_t tag,
+		      uint32 value_count, void *raw_data)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	switch (tag)
+	{
+		case TIFFTAG_INKSET:
+			fprintf(fd, "  Ink Set: ");
+			switch (*((uint16*)raw_data)) {
+				case INKSET_CMYK:
+					fprintf(fd, "CMYK\n");
+					break;
+				default:
+					fprintf(fd, "%u (0x%x)\n",
+						*((uint16*)raw_data),
+						*((uint16*)raw_data));
+					break;
+			}
+			return 1;
+		case TIFFTAG_DOTRANGE:
+			fprintf(fd, "  Dot Range: %u-%u\n",
+				((uint16*)raw_data)[0], ((uint16*)raw_data)[1]);
+			return 1;
+		case TIFFTAG_WHITEPOINT:
+			fprintf(fd, "  White Point: %g-%g\n",
+				((float *)raw_data)[0], ((float *)raw_data)[1]);			return 1;
+		case TIFFTAG_REFERENCEBLACKWHITE:
+		{
+			uint16 i;
+
+			fprintf(fd, "  Reference Black/White:\n");
+			for (i = 0; i < td->td_samplesperpixel; i++)
+			fprintf(fd, "    %2d: %5g %5g\n", i,
+				((float *)raw_data)[2*i+0],
+				((float *)raw_data)[2*i+1]);
+			return 1;
+		}
+		case TIFFTAG_XMLPACKET:
+		{
+			uint32 i;
+			
+			fprintf(fd, "  XMLPacket (XMP Metadata):\n" );
+			for(i = 0; i < value_count; i++)
+				fputc(((char *)raw_data)[i], fd);
+			fprintf( fd, "\n" );
+			return 1;
+		}
+		case TIFFTAG_RICHTIFFIPTC:
+			/*
+			 * XXX: for some weird reason RichTIFFIPTC tag
+			 * defined as array of LONG values.
+			 */
+			fprintf(fd,
+				"  RichTIFFIPTC Data: <present>, %lu bytes\n",
+				(unsigned long) value_count * 4);
+			return 1;
+		case TIFFTAG_PHOTOSHOP:
+			fprintf(fd, "  Photoshop Data: <present>, %lu bytes\n",
+				(unsigned long) value_count);
+			return 1;
+		case TIFFTAG_ICCPROFILE:
+			fprintf(fd, "  ICC Profile: <present>, %lu bytes\n",
+				(unsigned long) value_count);
+			return 1;
+		case TIFFTAG_STONITS:
+			fprintf(fd,
+				"  Sample to Nits conversion factor: %.4e\n",
+				*((double*)raw_data));
+			return 1;
+        }
+
+	return 0;
+}
+
+/*
+ * Print the contents of the current directory
+ * to the specified stdio file stream.
+ */
+void
+TIFFPrintDirectory(TIFF* tif, FILE* fd, long flags)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	char *sep;
+	uint16 i;
+	long l, n;
+
+	fprintf(fd, "TIFF Directory at offset 0x%lx (%lu)\n",
+		(unsigned long)tif->tif_diroff, (unsigned long)tif->tif_diroff);
+	if (TIFFFieldSet(tif,FIELD_SUBFILETYPE)) {
+		fprintf(fd, "  Subfile Type:");
+		sep = " ";
+		if (td->td_subfiletype & FILETYPE_REDUCEDIMAGE) {
+			fprintf(fd, "%sreduced-resolution image", sep);
+			sep = "/";
+		}
+		if (td->td_subfiletype & FILETYPE_PAGE) {
+			fprintf(fd, "%smulti-page document", sep);
+			sep = "/";
+		}
+		if (td->td_subfiletype & FILETYPE_MASK)
+			fprintf(fd, "%stransparency mask", sep);
+		fprintf(fd, " (%lu = 0x%lx)\n",
+		    (long) td->td_subfiletype, (long) td->td_subfiletype);
+	}
+	if (TIFFFieldSet(tif,FIELD_IMAGEDIMENSIONS)) {
+		fprintf(fd, "  Image Width: %lu Image Length: %lu",
+		    (unsigned long) td->td_imagewidth, (unsigned long) td->td_imagelength);
+		if (TIFFFieldSet(tif,FIELD_IMAGEDEPTH))
+			fprintf(fd, " Image Depth: %lu",
+			    (unsigned long) td->td_imagedepth);
+		fprintf(fd, "\n");
+	}
+	if (TIFFFieldSet(tif,FIELD_TILEDIMENSIONS)) {
+		fprintf(fd, "  Tile Width: %lu Tile Length: %lu",
+		    (unsigned long) td->td_tilewidth, (unsigned long) td->td_tilelength);
+		if (TIFFFieldSet(tif,FIELD_TILEDEPTH))
+			fprintf(fd, " Tile Depth: %lu",
+			    (unsigned long) td->td_tiledepth);
+		fprintf(fd, "\n");
+	}
+	if (TIFFFieldSet(tif,FIELD_RESOLUTION)) {
+		fprintf(fd, "  Resolution: %g, %g",
+		    td->td_xresolution, td->td_yresolution);
+		if (TIFFFieldSet(tif,FIELD_RESOLUTIONUNIT)) {
+			switch (td->td_resolutionunit) {
+			case RESUNIT_NONE:
+				fprintf(fd, " (unitless)");
+				break;
+			case RESUNIT_INCH:
+				fprintf(fd, " pixels/inch");
+				break;
+			case RESUNIT_CENTIMETER:
+				fprintf(fd, " pixels/cm");
+				break;
+			default:
+				fprintf(fd, " (unit %u = 0x%x)",
+				    td->td_resolutionunit,
+				    td->td_resolutionunit);
+				break;
+			}
+		}
+		fprintf(fd, "\n");
+	}
+	if (TIFFFieldSet(tif,FIELD_POSITION))
+		fprintf(fd, "  Position: %g, %g\n",
+		    td->td_xposition, td->td_yposition);
+	if (TIFFFieldSet(tif,FIELD_BITSPERSAMPLE))
+		fprintf(fd, "  Bits/Sample: %u\n", td->td_bitspersample);
+	if (TIFFFieldSet(tif,FIELD_SAMPLEFORMAT)) {
+		fprintf(fd, "  Sample Format: ");
+		switch (td->td_sampleformat) {
+		case SAMPLEFORMAT_VOID:
+			fprintf(fd, "void\n");
+			break;
+		case SAMPLEFORMAT_INT:
+			fprintf(fd, "signed integer\n");
+			break;
+		case SAMPLEFORMAT_UINT:
+			fprintf(fd, "unsigned integer\n");
+			break;
+		case SAMPLEFORMAT_IEEEFP:
+			fprintf(fd, "IEEE floating point\n");
+			break;
+		case SAMPLEFORMAT_COMPLEXINT:
+			fprintf(fd, "complex signed integer\n");
+			break;
+		case SAMPLEFORMAT_COMPLEXIEEEFP:
+			fprintf(fd, "complex IEEE floating point\n");
+			break;
+		default:
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_sampleformat, td->td_sampleformat);
+			break;
+		}
+	}
+	if (TIFFFieldSet(tif,FIELD_COMPRESSION)) {
+		const TIFFCodec* c = TIFFFindCODEC(td->td_compression);
+		fprintf(fd, "  Compression Scheme: ");
+		if (c)
+			fprintf(fd, "%s\n", c->name);
+		else
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_compression, td->td_compression);
+	}
+	if (TIFFFieldSet(tif,FIELD_PHOTOMETRIC)) {
+		fprintf(fd, "  Photometric Interpretation: ");
+		if (td->td_photometric < NPHOTONAMES)
+			fprintf(fd, "%s\n", photoNames[td->td_photometric]);
+		else {
+			switch (td->td_photometric) {
+			case PHOTOMETRIC_LOGL:
+				fprintf(fd, "CIE Log2(L)\n");
+				break;
+			case PHOTOMETRIC_LOGLUV:
+				fprintf(fd, "CIE Log2(L) (u',v')\n");
+				break;
+			default:
+				fprintf(fd, "%u (0x%x)\n",
+				    td->td_photometric, td->td_photometric);
+				break;
+			}
+		}
+	}
+	if (TIFFFieldSet(tif,FIELD_EXTRASAMPLES) && td->td_extrasamples) {
+		fprintf(fd, "  Extra Samples: %u<", td->td_extrasamples);
+		sep = "";
+		for (i = 0; i < td->td_extrasamples; i++) {
+			switch (td->td_sampleinfo[i]) {
+			case EXTRASAMPLE_UNSPECIFIED:
+				fprintf(fd, "%sunspecified", sep);
+				break;
+			case EXTRASAMPLE_ASSOCALPHA:
+				fprintf(fd, "%sassoc-alpha", sep);
+				break;
+			case EXTRASAMPLE_UNASSALPHA:
+				fprintf(fd, "%sunassoc-alpha", sep);
+				break;
+			default:
+				fprintf(fd, "%s%u (0x%x)", sep,
+				    td->td_sampleinfo[i], td->td_sampleinfo[i]);
+				break;
+			}
+			sep = ", ";
+		}
+		fprintf(fd, ">\n");
+	}
+	if (TIFFFieldSet(tif,FIELD_INKNAMES)) {
+		char* cp;
+		fprintf(fd, "  Ink Names: ");
+		i = td->td_samplesperpixel;
+		sep = "";
+		for (cp = td->td_inknames; i > 0; cp = strchr(cp,'\0')+1, i--) {
+			fputs(sep, fd);
+			_TIFFprintAscii(fd, cp);
+			sep = ", ";
+		}
+                fputs("\n", fd);
+	}
+	if (TIFFFieldSet(tif,FIELD_THRESHHOLDING)) {
+		fprintf(fd, "  Thresholding: ");
+		switch (td->td_threshholding) {
+		case THRESHHOLD_BILEVEL:
+			fprintf(fd, "bilevel art scan\n");
+			break;
+		case THRESHHOLD_HALFTONE:
+			fprintf(fd, "halftone or dithered scan\n");
+			break;
+		case THRESHHOLD_ERRORDIFFUSE:
+			fprintf(fd, "error diffused\n");
+			break;
+		default:
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_threshholding, td->td_threshholding);
+			break;
+		}
+	}
+	if (TIFFFieldSet(tif,FIELD_FILLORDER)) {
+		fprintf(fd, "  FillOrder: ");
+		switch (td->td_fillorder) {
+		case FILLORDER_MSB2LSB:
+			fprintf(fd, "msb-to-lsb\n");
+			break;
+		case FILLORDER_LSB2MSB:
+			fprintf(fd, "lsb-to-msb\n");
+			break;
+		default:
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_fillorder, td->td_fillorder);
+			break;
+		}
+	}
+	if (TIFFFieldSet(tif,FIELD_YCBCRSUBSAMPLING))
+        {
+            /*
+             * For hacky reasons (see tif_jpeg.c - JPEGFixupTestSubsampling),
+             * we need to fetch this rather than trust what is in our
+             * structures.
+             */
+            uint16 subsampling[2];
+
+            TIFFGetField( tif, TIFFTAG_YCBCRSUBSAMPLING, 
+                          subsampling + 0, subsampling + 1 );
+		fprintf(fd, "  YCbCr Subsampling: %u, %u\n",
+                        subsampling[0], subsampling[1] );
+        }
+	if (TIFFFieldSet(tif,FIELD_YCBCRPOSITIONING)) {
+		fprintf(fd, "  YCbCr Positioning: ");
+		switch (td->td_ycbcrpositioning) {
+		case YCBCRPOSITION_CENTERED:
+			fprintf(fd, "centered\n");
+			break;
+		case YCBCRPOSITION_COSITED:
+			fprintf(fd, "cosited\n");
+			break;
+		default:
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_ycbcrpositioning, td->td_ycbcrpositioning);
+			break;
+		}
+	}
+	if (TIFFFieldSet(tif,FIELD_HALFTONEHINTS))
+		fprintf(fd, "  Halftone Hints: light %u dark %u\n",
+		    td->td_halftonehints[0], td->td_halftonehints[1]);
+	if (TIFFFieldSet(tif,FIELD_ORIENTATION)) {
+		fprintf(fd, "  Orientation: ");
+		if (td->td_orientation < NORIENTNAMES)
+			fprintf(fd, "%s\n", orientNames[td->td_orientation]);
+		else
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_orientation, td->td_orientation);
+	}
+	if (TIFFFieldSet(tif,FIELD_SAMPLESPERPIXEL))
+		fprintf(fd, "  Samples/Pixel: %u\n", td->td_samplesperpixel);
+	if (TIFFFieldSet(tif,FIELD_ROWSPERSTRIP)) {
+		fprintf(fd, "  Rows/Strip: ");
+		if (td->td_rowsperstrip == (uint32) -1)
+			fprintf(fd, "(infinite)\n");
+		else
+			fprintf(fd, "%lu\n", (unsigned long) td->td_rowsperstrip);
+	}
+	if (TIFFFieldSet(tif,FIELD_MINSAMPLEVALUE))
+		fprintf(fd, "  Min Sample Value: %u\n", td->td_minsamplevalue);
+	if (TIFFFieldSet(tif,FIELD_MAXSAMPLEVALUE))
+		fprintf(fd, "  Max Sample Value: %u\n", td->td_maxsamplevalue);
+	if (TIFFFieldSet(tif,FIELD_SMINSAMPLEVALUE))
+		fprintf(fd, "  SMin Sample Value: %g\n",
+		    td->td_sminsamplevalue);
+	if (TIFFFieldSet(tif,FIELD_SMAXSAMPLEVALUE))
+		fprintf(fd, "  SMax Sample Value: %g\n",
+		    td->td_smaxsamplevalue);
+	if (TIFFFieldSet(tif,FIELD_PLANARCONFIG)) {
+		fprintf(fd, "  Planar Configuration: ");
+		switch (td->td_planarconfig) {
+		case PLANARCONFIG_CONTIG:
+			fprintf(fd, "single image plane\n");
+			break;
+		case PLANARCONFIG_SEPARATE:
+			fprintf(fd, "separate image planes\n");
+			break;
+		default:
+			fprintf(fd, "%u (0x%x)\n",
+			    td->td_planarconfig, td->td_planarconfig);
+			break;
+		}
+	}
+	if (TIFFFieldSet(tif,FIELD_PAGENUMBER))
+		fprintf(fd, "  Page Number: %u-%u\n",
+		    td->td_pagenumber[0], td->td_pagenumber[1]);
+	if (TIFFFieldSet(tif,FIELD_COLORMAP)) {
+		fprintf(fd, "  Color Map: ");
+		if (flags & TIFFPRINT_COLORMAP) {
+			fprintf(fd, "\n");
+			n = 1L<<td->td_bitspersample;
+			for (l = 0; l < n; l++)
+				fprintf(fd, "   %5lu: %5u %5u %5u\n",
+				    l,
+				    td->td_colormap[0][l],
+				    td->td_colormap[1][l],
+				    td->td_colormap[2][l]);
+		} else
+			fprintf(fd, "(present)\n");
+	}
+	if (TIFFFieldSet(tif,FIELD_TRANSFERFUNCTION)) {
+		fprintf(fd, "  Transfer Function: ");
+		if (flags & TIFFPRINT_CURVES) {
+			fprintf(fd, "\n");
+			n = 1L<<td->td_bitspersample;
+			for (l = 0; l < n; l++) {
+				fprintf(fd, "    %2lu: %5u",
+				    l, td->td_transferfunction[0][l]);
+				for (i = 1; i < td->td_samplesperpixel; i++)
+					fprintf(fd, " %5u",
+					    td->td_transferfunction[i][l]);
+				fputc('\n', fd);
+			}
+		} else
+			fprintf(fd, "(present)\n");
+	}
+	if (TIFFFieldSet(tif, FIELD_SUBIFD)) {
+		fprintf(fd, "  SubIFD Offsets:");
+		for (i = 0; i < td->td_nsubifd; i++)
+			fprintf(fd, " %5lu", (long) td->td_subifd[i]);
+		fputc('\n', fd);
+	}
+
+        /*
+        ** Custom tag support.
+        */
+        {
+            int  i;
+            short count;
+
+            count = (short) TIFFGetTagListCount(tif);
+            for(i = 0; i < count; i++) {
+                ttag_t  tag = TIFFGetTagListEntry(tif, i);
+                const TIFFFieldInfo *fip;
+                uint16 value_count;
+                int mem_alloc = 0;
+                void *raw_data;
+
+                fip = TIFFFieldWithTag(tif, tag);
+                if(fip == NULL)
+			continue;
+
+		if(fip->field_passcount) {
+			if(TIFFGetField(tif, tag, &value_count, &raw_data) != 1)
+				continue;
+		} else {
+			if (fip->field_readcount == TIFF_VARIABLE
+			    || fip->field_readcount == TIFF_VARIABLE2)
+				value_count = 1;
+			else if (fip->field_readcount == TIFF_SPP)
+				value_count = td->td_samplesperpixel;
+			else
+				value_count = fip->field_readcount;
+			if ((fip->field_type == TIFF_ASCII
+			     || fip->field_readcount == TIFF_VARIABLE
+			     || fip->field_readcount == TIFF_VARIABLE2
+			     || fip->field_readcount == TIFF_SPP
+			     || value_count > 1)
+			    && fip->field_tag != TIFFTAG_PAGENUMBER
+			    && fip->field_tag != TIFFTAG_HALFTONEHINTS
+			    && fip->field_tag != TIFFTAG_YCBCRSUBSAMPLING
+			    && fip->field_tag != TIFFTAG_DOTRANGE) {
+				if(TIFFGetField(tif, tag, &raw_data) != 1)
+					continue;
+			} else if (fip->field_tag != TIFFTAG_PAGENUMBER
+				   && fip->field_tag != TIFFTAG_HALFTONEHINTS
+				   && fip->field_tag != TIFFTAG_YCBCRSUBSAMPLING
+				   && fip->field_tag != TIFFTAG_DOTRANGE) {
+				raw_data = _TIFFmalloc(
+					_TIFFDataSize(fip->field_type)
+					* value_count);
+				mem_alloc = 1;
+				if(TIFFGetField(tif, tag, raw_data) != 1) {
+					_TIFFfree(raw_data);
+					continue;
+				}
+			} else {
+				/* 
+				 * XXX: Should be fixed and removed, see the
+				 * notes related to TIFFTAG_PAGENUMBER,
+				 * TIFFTAG_HALFTONEHINTS,
+				 * TIFFTAG_YCBCRSUBSAMPLING and
+				 * TIFFTAG_DOTRANGE tags in tif_dir.c. */
+				char *tmp;
+				raw_data = _TIFFmalloc(
+					_TIFFDataSize(fip->field_type)
+					* value_count);
+				tmp = raw_data;
+				mem_alloc = 1;
+				if(TIFFGetField(tif, tag, tmp,
+				tmp + _TIFFDataSize(fip->field_type)) != 1) {
+					_TIFFfree(raw_data);
+					continue;
+				}
+			}
+		}
+
+		/*
+		 * Catch the tags which needs to be specially handled and
+		 * pretty print them. If tag not handled in
+		 * _TIFFPrettyPrintField() fall down and print it as any other
+		 * tag.
+		 */
+		if (_TIFFPrettyPrintField(tif, fd, tag, value_count, raw_data)) {
+			if(mem_alloc)
+				_TIFFfree(raw_data);
+			continue;
+		}
+		else
+			_TIFFPrintField(fd, fip, value_count, raw_data);
+
+		if(mem_alloc)
+			_TIFFfree(raw_data);
+            }
+        }
+        
+	if (tif->tif_tagmethods.printdir)
+		(*tif->tif_tagmethods.printdir)(tif, fd, flags);
+	if ((flags & TIFFPRINT_STRIPS) &&
+	    TIFFFieldSet(tif,FIELD_STRIPOFFSETS)) {
+		tstrip_t s;
+
+		fprintf(fd, "  %lu %s:\n",
+		    (long) td->td_nstrips,
+		    isTiled(tif) ? "Tiles" : "Strips");
+		for (s = 0; s < td->td_nstrips; s++)
+			fprintf(fd, "    %3lu: [%8lu, %8lu]\n",
+			    (unsigned long) s,
+			    (unsigned long) td->td_stripoffset[s],
+			    (unsigned long) td->td_stripbytecount[s]);
+	}
+}
+
+void
+_TIFFprintAscii(FILE* fd, const char* cp)
+{
+	for (; *cp != '\0'; cp++) {
+		const char* tp;
+
+		if (isprint((int)*cp)) {
+			fputc(*cp, fd);
+			continue;
+		}
+		for (tp = "\tt\bb\rr\nn\vv"; *tp; tp++)
+			if (*tp++ == *cp)
+				break;
+		if (*tp)
+			fprintf(fd, "\\%c", *tp);
+		else
+			fprintf(fd, "\\%03o", *cp & 0xff);
+	}
+}
+
+void
+_TIFFprintAsciiTag(FILE* fd, const char* name, const char* value)
+{
+	fprintf(fd, "  %s: \"", name);
+	_TIFFprintAscii(fd, value);
+	fprintf(fd, "\"\n");
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_read.c b/src/libtiff/tif_read.c
new file mode 100644
index 0000000..7bc580e
--- /dev/null
+++ b/src/libtiff/tif_read.c
@@ -0,0 +1,650 @@
+/* $Id: tif_read.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ * Scanline-oriented Read Support
+ */
+#include "tiffiop.h"
+#include <stdio.h>
+
+	int TIFFFillStrip(TIFF*, tstrip_t);
+	int TIFFFillTile(TIFF*, ttile_t);
+static	int TIFFStartStrip(TIFF*, tstrip_t);
+static	int TIFFStartTile(TIFF*, ttile_t);
+static	int TIFFCheckRead(TIFF*, int);
+
+#define	NOSTRIP	((tstrip_t) -1)			/* undefined state */
+#define	NOTILE	((ttile_t) -1)			/* undefined state */
+
+/*
+ * Seek to a random row+sample in a file.
+ */
+static int
+TIFFSeek(TIFF* tif, uint32 row, tsample_t sample)
+{
+	register TIFFDirectory *td = &tif->tif_dir;
+	tstrip_t strip;
+
+	if (row >= td->td_imagelength) {	/* out of range */
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%lu: Row out of range, max %lu",
+		    (unsigned long) row, (unsigned long) td->td_imagelength);
+		return (0);
+	}
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE) {
+		if (sample >= td->td_samplesperpixel) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "%lu: Sample out of range, max %lu",
+			    (unsigned long) sample, (unsigned long) td->td_samplesperpixel);
+			return (0);
+		}
+		strip = sample*td->td_stripsperimage + row/td->td_rowsperstrip;
+	} else
+		strip = row / td->td_rowsperstrip;
+	if (strip != tif->tif_curstrip) { 	/* different strip, refill */
+		if (!TIFFFillStrip(tif, strip))
+			return (0);
+	} else if (row < tif->tif_row) {
+		/*
+		 * Moving backwards within the same strip: backup
+		 * to the start and then decode forward (below).
+		 *
+		 * NB: If you're planning on lots of random access within a
+		 * strip, it's better to just read and decode the entire
+		 * strip, and then access the decoded data in a random fashion.
+		 */
+		if (!TIFFStartStrip(tif, strip))
+			return (0);
+	}
+	if (row != tif->tif_row) {
+		/*
+		 * Seek forward to the desired row.
+		 */
+		if (!(*tif->tif_seek)(tif, row - tif->tif_row))
+			return (0);
+		tif->tif_row = row;
+	}
+	return (1);
+}
+
+int
+TIFFReadScanline(TIFF* tif, tdata_t buf, uint32 row, tsample_t sample)
+{
+	int e;
+
+	if (!TIFFCheckRead(tif, 0))
+		return (-1);
+	if( (e = TIFFSeek(tif, row, sample)) != 0) {
+		/*
+		 * Decompress desired row into user buffer.
+		 */
+		e = (*tif->tif_decoderow)
+		    (tif, (tidata_t) buf, tif->tif_scanlinesize, sample);
+
+                /* we are now poised at the beginning of the next row */
+                tif->tif_row = row + 1;
+
+		if (e)
+			(*tif->tif_postdecode)(tif, (tidata_t) buf,
+			    tif->tif_scanlinesize);
+	}
+	return (e > 0 ? 1 : -1);
+}
+
+/*
+ * Read a strip of data and decompress the specified
+ * amount into the user-supplied buffer.
+ */
+tsize_t
+TIFFReadEncodedStrip(TIFF* tif, tstrip_t strip, tdata_t buf, tsize_t size)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	uint32 nrows;
+	tsize_t stripsize;
+        tstrip_t sep_strip, strips_per_sep;
+
+	if (!TIFFCheckRead(tif, 0))
+		return (-1);
+	if (strip >= td->td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%ld: Strip out of range, max %ld",
+		    (long) strip, (long) td->td_nstrips);
+		return (-1);
+	}
+	/*
+	 * Calculate the strip size according to the number of
+	 * rows in the strip (check for truncated last strip on any
+         * of the separations).
+	 */
+        if( td->td_rowsperstrip >= td->td_imagelength )
+            strips_per_sep = 1;
+        else
+            strips_per_sep = (td->td_imagelength+td->td_rowsperstrip-1)
+                / td->td_rowsperstrip;
+
+        sep_strip = strip % strips_per_sep;
+
+	if (sep_strip != strips_per_sep-1 ||
+	    (nrows = td->td_imagelength % td->td_rowsperstrip) == 0)
+		nrows = td->td_rowsperstrip;
+
+	stripsize = TIFFVStripSize(tif, nrows);
+	if (size == (tsize_t) -1)
+		size = stripsize;
+	else if (size > stripsize)
+		size = stripsize;
+	if (TIFFFillStrip(tif, strip) 
+            && (*tif->tif_decodestrip)(tif, (tidata_t) buf, size, 
+                         (tsample_t)(strip / td->td_stripsperimage)) > 0 ) {
+		(*tif->tif_postdecode)(tif, (tidata_t) buf, size);
+		return (size);
+	} else
+		return ((tsize_t) -1);
+}
+
+static tsize_t
+TIFFReadRawStrip1(TIFF* tif,
+    tstrip_t strip, tdata_t buf, tsize_t size, const char* module)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if (!isMapped(tif)) {
+		tsize_t cc;
+
+		if (!SeekOK(tif, td->td_stripoffset[strip])) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Seek error at scanline %lu, strip %lu",
+			    tif->tif_name,
+			    (unsigned long) tif->tif_row, (unsigned long) strip);
+			return (-1);
+		}
+		cc = TIFFReadFile(tif, buf, size);
+		if (cc != size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+		"%s: Read error at scanline %lu; got %lu bytes, expected %lu",
+			    tif->tif_name,
+			    (unsigned long) tif->tif_row,
+			    (unsigned long) cc,
+			    (unsigned long) size);
+			return (-1);
+		}
+	} else {
+		if (td->td_stripoffset[strip] + size > tif->tif_size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+    "%s: Read error at scanline %lu, strip %lu; got %lu bytes, expected %lu",
+			    tif->tif_name,
+			    (unsigned long) tif->tif_row,
+			    (unsigned long) strip,
+			    (unsigned long) tif->tif_size - td->td_stripoffset[strip],
+			    (unsigned long) size);
+			return (-1);
+		}
+		_TIFFmemcpy(buf, tif->tif_base + td->td_stripoffset[strip],
+                            size);
+	}
+	return (size);
+}
+
+/*
+ * Read a strip of data from the file.
+ */
+tsize_t
+TIFFReadRawStrip(TIFF* tif, tstrip_t strip, tdata_t buf, tsize_t size)
+{
+	static const char module[] = "TIFFReadRawStrip";
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t bytecount;
+
+	if (!TIFFCheckRead(tif, 0))
+		return ((tsize_t) -1);
+	if (strip >= td->td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%lu: Strip out of range, max %lu",
+		    (unsigned long) strip, (unsigned long) td->td_nstrips);
+		return ((tsize_t) -1);
+	}
+	bytecount = td->td_stripbytecount[strip];
+	if (bytecount <= 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "%lu: Invalid strip byte count, strip %lu",
+		    (unsigned long) bytecount, (unsigned long) strip);
+		return ((tsize_t) -1);
+	}
+	if (size != (tsize_t)-1 && size < bytecount)
+		bytecount = size;
+	return (TIFFReadRawStrip1(tif, strip, buf, bytecount, module));
+}
+
+/*
+ * Read the specified strip and setup for decoding. 
+ * The data buffer is expanded, as necessary, to
+ * hold the strip's data.
+ */
+int
+TIFFFillStrip(TIFF* tif, tstrip_t strip)
+{
+	static const char module[] = "TIFFFillStrip";
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t bytecount;
+
+	bytecount = td->td_stripbytecount[strip];
+	if (bytecount <= 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "%lu: Invalid strip byte count, strip %lu",
+		    (unsigned long) bytecount, (unsigned long) strip);
+		return (0);
+	}
+	if (isMapped(tif) &&
+	    (isFillOrder(tif, td->td_fillorder)
+             || (tif->tif_flags & TIFF_NOBITREV))) {
+		/*
+		 * The image is mapped into memory and we either don't
+		 * need to flip bits or the compression routine is going
+		 * to handle this operation itself.  In this case, avoid
+		 * copying the raw data and instead just reference the
+		 * data from the memory mapped file image.  This assumes
+		 * that the decompression routines do not modify the
+		 * contents of the raw data buffer (if they try to,
+		 * the application will get a fault since the file is
+		 * mapped read-only).
+		 */
+		if ((tif->tif_flags & TIFF_MYBUFFER) && tif->tif_rawdata)
+			_TIFFfree(tif->tif_rawdata);
+		tif->tif_flags &= ~TIFF_MYBUFFER;
+		if ( td->td_stripoffset[strip] + bytecount > tif->tif_size) {
+			/*
+			 * This error message might seem strange, but it's
+			 * what would happen if a read were done instead.
+			 */
+			TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: Read error on strip %lu; got %lu bytes, expected %lu",
+			    tif->tif_name,
+			    (unsigned long) strip,
+			    (unsigned long) tif->tif_size - td->td_stripoffset[strip],
+			    (unsigned long) bytecount);
+			tif->tif_curstrip = NOSTRIP;
+			return (0);
+		}
+		tif->tif_rawdatasize = bytecount;
+		tif->tif_rawdata = tif->tif_base + td->td_stripoffset[strip];
+	} else {
+		/*
+		 * Expand raw data buffer, if needed, to
+		 * hold data strip coming from file
+		 * (perhaps should set upper bound on
+		 *  the size of a buffer we'll use?).
+		 */
+		if (bytecount > tif->tif_rawdatasize) {
+			tif->tif_curstrip = NOSTRIP;
+			if ((tif->tif_flags & TIFF_MYBUFFER) == 0) {
+				TIFFErrorExt(tif->tif_clientdata, module,
+				"%s: Data buffer too small to hold strip %lu",
+				    tif->tif_name, (unsigned long) strip);
+				return (0);
+			}
+			if (!TIFFReadBufferSetup(tif, 0,
+			    TIFFroundup(bytecount, 1024)))
+				return (0);
+		}
+		if (TIFFReadRawStrip1(tif, strip, (unsigned char *)tif->tif_rawdata,
+		    bytecount, module) != bytecount)
+			return (0);
+		if (!isFillOrder(tif, td->td_fillorder) &&
+		    (tif->tif_flags & TIFF_NOBITREV) == 0)
+			TIFFReverseBits(tif->tif_rawdata, bytecount);
+	}
+	return (TIFFStartStrip(tif, strip));
+}
+
+/*
+ * Tile-oriented Read Support
+ * Contributed by Nancy Cam (Silicon Graphics).
+ */
+
+/*
+ * Read and decompress a tile of data.  The
+ * tile is selected by the (x,y,z,s) coordinates.
+ */
+tsize_t
+TIFFReadTile(TIFF* tif,
+    tdata_t buf, uint32 x, uint32 y, uint32 z, tsample_t s)
+{
+	if (!TIFFCheckRead(tif, 1) || !TIFFCheckTile(tif, x, y, z, s))
+		return (-1);
+	return (TIFFReadEncodedTile(tif,
+	    TIFFComputeTile(tif, x, y, z, s), buf, (tsize_t) -1));
+}
+
+/*
+ * Read a tile of data and decompress the specified
+ * amount into the user-supplied buffer.
+ */
+tsize_t
+TIFFReadEncodedTile(TIFF* tif, ttile_t tile, tdata_t buf, tsize_t size)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t tilesize = tif->tif_tilesize;
+
+	if (!TIFFCheckRead(tif, 1))
+		return (-1);
+	if (tile >= td->td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%ld: Tile out of range, max %ld",
+		    (long) tile, (unsigned long) td->td_nstrips);
+		return (-1);
+	}
+	if (size == (tsize_t) -1)
+		size = tilesize;
+	else if (size > tilesize)
+		size = tilesize;
+	if (TIFFFillTile(tif, tile) && (*tif->tif_decodetile)(tif,
+	    (tidata_t) buf, size, (tsample_t)(tile/td->td_stripsperimage))) {
+		(*tif->tif_postdecode)(tif, (tidata_t) buf, size);
+		return (size);
+	} else
+		return (-1);
+}
+
+static tsize_t
+TIFFReadRawTile1(TIFF* tif,
+    ttile_t tile, tdata_t buf, tsize_t size, const char* module)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if (!isMapped(tif)) {
+		tsize_t cc;
+
+		if (!SeekOK(tif, td->td_stripoffset[tile])) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Seek error at row %ld, col %ld, tile %ld",
+			    tif->tif_name,
+			    (long) tif->tif_row,
+			    (long) tif->tif_col,
+			    (long) tile);
+			return ((tsize_t) -1);
+		}
+		cc = TIFFReadFile(tif, buf, size);
+		if (cc != size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+	    "%s: Read error at row %ld, col %ld; got %lu bytes, expected %lu",
+			    tif->tif_name,
+			    (long) tif->tif_row,
+			    (long) tif->tif_col,
+			    (unsigned long) cc,
+			    (unsigned long) size);
+			return ((tsize_t) -1);
+		}
+	} else {
+		if (td->td_stripoffset[tile] + size > tif->tif_size) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+    "%s: Read error at row %ld, col %ld, tile %ld; got %lu bytes, expected %lu",
+			    tif->tif_name,
+			    (long) tif->tif_row,
+			    (long) tif->tif_col,
+			    (long) tile,
+			    (unsigned long) tif->tif_size - td->td_stripoffset[tile],
+			    (unsigned long) size);
+			return ((tsize_t) -1);
+		}
+		_TIFFmemcpy(buf, tif->tif_base + td->td_stripoffset[tile], size);
+	}
+	return (size);
+}
+
+/*
+ * Read a tile of data from the file.
+ */
+tsize_t
+TIFFReadRawTile(TIFF* tif, ttile_t tile, tdata_t buf, tsize_t size)
+{
+	static const char module[] = "TIFFReadRawTile";
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t bytecount;
+
+	if (!TIFFCheckRead(tif, 1))
+		return ((tsize_t) -1);
+	if (tile >= td->td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "%lu: Tile out of range, max %lu",
+		    (unsigned long) tile, (unsigned long) td->td_nstrips);
+		return ((tsize_t) -1);
+	}
+	bytecount = td->td_stripbytecount[tile];
+	if (size != (tsize_t) -1 && size < bytecount)
+		bytecount = size;
+	return (TIFFReadRawTile1(tif, tile, buf, bytecount, module));
+}
+
+/*
+ * Read the specified tile and setup for decoding. 
+ * The data buffer is expanded, as necessary, to
+ * hold the tile's data.
+ */
+int
+TIFFFillTile(TIFF* tif, ttile_t tile)
+{
+	static const char module[] = "TIFFFillTile";
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t bytecount;
+
+	bytecount = td->td_stripbytecount[tile];
+	if (bytecount <= 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "%lu: Invalid tile byte count, tile %lu",
+		    (unsigned long) bytecount, (unsigned long) tile);
+		return (0);
+	}
+	if (isMapped(tif) &&
+	    (isFillOrder(tif, td->td_fillorder)
+             || (tif->tif_flags & TIFF_NOBITREV))) {
+		/*
+		 * The image is mapped into memory and we either don't
+		 * need to flip bits or the compression routine is going
+		 * to handle this operation itself.  In this case, avoid
+		 * copying the raw data and instead just reference the
+		 * data from the memory mapped file image.  This assumes
+		 * that the decompression routines do not modify the
+		 * contents of the raw data buffer (if they try to,
+		 * the application will get a fault since the file is
+		 * mapped read-only).
+		 */
+		if ((tif->tif_flags & TIFF_MYBUFFER) && tif->tif_rawdata)
+			_TIFFfree(tif->tif_rawdata);
+		tif->tif_flags &= ~TIFF_MYBUFFER;
+		if ( td->td_stripoffset[tile] + bytecount > tif->tif_size) {
+			tif->tif_curtile = NOTILE;
+			return (0);
+		}
+		tif->tif_rawdatasize = bytecount;
+		tif->tif_rawdata = tif->tif_base + td->td_stripoffset[tile];
+	} else {
+		/*
+		 * Expand raw data buffer, if needed, to
+		 * hold data tile coming from file
+		 * (perhaps should set upper bound on
+		 *  the size of a buffer we'll use?).
+		 */
+		if (bytecount > tif->tif_rawdatasize) {
+			tif->tif_curtile = NOTILE;
+			if ((tif->tif_flags & TIFF_MYBUFFER) == 0) {
+				TIFFErrorExt(tif->tif_clientdata, module,
+				"%s: Data buffer too small to hold tile %ld",
+				    tif->tif_name, (long) tile);
+				return (0);
+			}
+			if (!TIFFReadBufferSetup(tif, 0,
+			    TIFFroundup(bytecount, 1024)))
+				return (0);
+		}
+		if (TIFFReadRawTile1(tif, tile,
+                                     (unsigned char *)tif->tif_rawdata,
+                                     bytecount, module) != bytecount)
+			return (0);
+		if (!isFillOrder(tif, td->td_fillorder) &&
+		    (tif->tif_flags & TIFF_NOBITREV) == 0)
+			TIFFReverseBits(tif->tif_rawdata, bytecount);
+	}
+	return (TIFFStartTile(tif, tile));
+}
+
+/*
+ * Setup the raw data buffer in preparation for
+ * reading a strip of raw data.  If the buffer
+ * is specified as zero, then a buffer of appropriate
+ * size is allocated by the library.  Otherwise,
+ * the client must guarantee that the buffer is
+ * large enough to hold any individual strip of
+ * raw data.
+ */
+int
+TIFFReadBufferSetup(TIFF* tif, tdata_t bp, tsize_t size)
+{
+	static const char module[] = "TIFFReadBufferSetup";
+
+	if (tif->tif_rawdata) {
+		if (tif->tif_flags & TIFF_MYBUFFER)
+			_TIFFfree(tif->tif_rawdata);
+		tif->tif_rawdata = NULL;
+	}
+	if (bp) {
+		tif->tif_rawdatasize = size;
+		tif->tif_rawdata = (tidata_t) bp;
+		tif->tif_flags &= ~TIFF_MYBUFFER;
+	} else {
+		tif->tif_rawdatasize = TIFFroundup(size, 1024);
+		tif->tif_rawdata = (tidata_t) _TIFFmalloc(tif->tif_rawdatasize);
+		tif->tif_flags |= TIFF_MYBUFFER;
+	}
+	if (tif->tif_rawdata == NULL) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: No space for data buffer at scanline %ld",
+		    tif->tif_name, (long) tif->tif_row);
+		tif->tif_rawdatasize = 0;
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Set state to appear as if a
+ * strip has just been read in.
+ */
+static int
+TIFFStartStrip(TIFF* tif, tstrip_t strip)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if ((tif->tif_flags & TIFF_CODERSETUP) == 0) {
+		if (!(*tif->tif_setupdecode)(tif))
+			return (0);
+		tif->tif_flags |= TIFF_CODERSETUP;
+	}
+	tif->tif_curstrip = strip;
+	tif->tif_row = (strip % td->td_stripsperimage) * td->td_rowsperstrip;
+	tif->tif_rawcp = tif->tif_rawdata;
+	tif->tif_rawcc = td->td_stripbytecount[strip];
+	return ((*tif->tif_predecode)(tif,
+			(tsample_t)(strip / td->td_stripsperimage)));
+}
+
+/*
+ * Set state to appear as if a
+ * tile has just been read in.
+ */
+static int
+TIFFStartTile(TIFF* tif, ttile_t tile)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if ((tif->tif_flags & TIFF_CODERSETUP) == 0) {
+		if (!(*tif->tif_setupdecode)(tif))
+			return (0);
+		tif->tif_flags |= TIFF_CODERSETUP;
+	}
+	tif->tif_curtile = tile;
+	tif->tif_row =
+	    (tile % TIFFhowmany(td->td_imagewidth, td->td_tilewidth)) *
+		td->td_tilelength;
+	tif->tif_col =
+	    (tile % TIFFhowmany(td->td_imagelength, td->td_tilelength)) *
+		td->td_tilewidth;
+	tif->tif_rawcp = tif->tif_rawdata;
+	tif->tif_rawcc = td->td_stripbytecount[tile];
+	return ((*tif->tif_predecode)(tif,
+			(tsample_t)(tile/td->td_stripsperimage)));
+}
+
+static int
+TIFFCheckRead(TIFF* tif, int tiles)
+{
+	if (tif->tif_mode == O_WRONLY) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "File not open for reading");
+		return (0);
+	}
+	if (tiles ^ isTiled(tif)) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, tiles ?
+		    "Can not read tiles from a stripped image" :
+		    "Can not read scanlines from a tiled image");
+		return (0);
+	}
+	return (1);
+}
+
+void
+_TIFFNoPostDecode(TIFF* tif, tidata_t buf, tsize_t cc)
+{
+    (void) tif; (void) buf; (void) cc;
+}
+
+void
+_TIFFSwab16BitData(TIFF* tif, tidata_t buf, tsize_t cc)
+{
+    (void) tif;
+    assert((cc & 1) == 0);
+    TIFFSwabArrayOfShort((uint16*) buf, cc/2);
+}
+
+void
+_TIFFSwab24BitData(TIFF* tif, tidata_t buf, tsize_t cc)
+{
+    (void) tif;
+    assert((cc % 3) == 0);
+    TIFFSwabArrayOfTriples((uint8*) buf, cc/3);
+}
+
+void
+_TIFFSwab32BitData(TIFF* tif, tidata_t buf, tsize_t cc)
+{
+    (void) tif;
+    assert((cc & 3) == 0);
+    TIFFSwabArrayOfLong((uint32*) buf, cc/4);
+}
+
+void
+_TIFFSwab64BitData(TIFF* tif, tidata_t buf, tsize_t cc)
+{
+    (void) tif;
+    assert((cc & 7) == 0);
+    TIFFSwabArrayOfDouble((double*) buf, cc/8);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_strip.c b/src/libtiff/tif_strip.c
new file mode 100644
index 0000000..e6a0fb7
--- /dev/null
+++ b/src/libtiff/tif_strip.c
@@ -0,0 +1,294 @@
+/* $Id: tif_strip.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Strip-organized Image Support Routines.
+ */
+#include "tiffiop.h"
+
+static uint32
+summarize(TIFF* tif, size_t summand1, size_t summand2, const char* where)
+{
+	/*
+	 * XXX: We are using casting to uint32 here, bacause sizeof(size_t)
+	 * may be larger than sizeof(uint32) on 64-bit architectures.
+	 */
+	uint32	bytes = summand1 + summand2;
+
+	if (bytes - summand1 != summand2) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Integer overflow in %s", where);
+		bytes = 0;
+	}
+
+	return (bytes);
+}
+
+static uint32
+multiply(TIFF* tif, size_t nmemb, size_t elem_size, const char* where)
+{
+	uint32	bytes = nmemb * elem_size;
+
+	if (elem_size && bytes / elem_size != nmemb) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Integer overflow in %s", where);
+		bytes = 0;
+	}
+
+	return (bytes);
+}
+
+/*
+ * Compute which strip a (row,sample) value is in.
+ */
+tstrip_t
+TIFFComputeStrip(TIFF* tif, uint32 row, tsample_t sample)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tstrip_t strip;
+
+	strip = row / td->td_rowsperstrip;
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE) {
+		if (sample >= td->td_samplesperpixel) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "%lu: Sample out of range, max %lu",
+			    (unsigned long) sample, (unsigned long) td->td_samplesperpixel);
+			return ((tstrip_t) 0);
+		}
+		strip += sample*td->td_stripsperimage;
+	}
+	return (strip);
+}
+
+/*
+ * Compute how many strips are in an image.
+ */
+tstrip_t
+TIFFNumberOfStrips(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tstrip_t nstrips;
+
+	nstrips = (td->td_rowsperstrip == (uint32) -1 ? 1 :
+	     TIFFhowmany(td->td_imagelength, td->td_rowsperstrip));
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE)
+		nstrips = multiply(tif, nstrips, td->td_samplesperpixel,
+				   "TIFFNumberOfStrips");
+	return (nstrips);
+}
+
+/*
+ * Compute the # bytes in a variable height, row-aligned strip.
+ */
+tsize_t
+TIFFVStripSize(TIFF* tif, uint32 nrows)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if (nrows == (uint32) -1)
+		nrows = td->td_imagelength;
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG &&
+	    td->td_photometric == PHOTOMETRIC_YCBCR &&
+	    !isUpSampled(tif)) {
+		/*
+		 * Packed YCbCr data contain one Cb+Cr for every
+		 * HorizontalSampling*VerticalSampling Y values.
+		 * Must also roundup width and height when calculating
+		 * since images that are not a multiple of the
+		 * horizontal/vertical subsampling area include
+		 * YCbCr data for the extended image.
+		 */
+                uint16 ycbcrsubsampling[2];
+                tsize_t w, scanline, samplingarea;
+
+                TIFFGetField( tif, TIFFTAG_YCBCRSUBSAMPLING, 
+                              ycbcrsubsampling + 0, 
+                              ycbcrsubsampling + 1 );
+
+		samplingarea = ycbcrsubsampling[0]*ycbcrsubsampling[1];
+		if (samplingarea == 0) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+				     "Invalid YCbCr subsampling");
+			return 0;
+		}
+
+		w = TIFFroundup(td->td_imagewidth, ycbcrsubsampling[0]);
+		scanline = TIFFhowmany8(multiply(tif, w, td->td_bitspersample,
+						 "TIFFVStripSize"));
+		nrows = TIFFroundup(nrows, ycbcrsubsampling[1]);
+		/* NB: don't need TIFFhowmany here 'cuz everything is rounded */
+		scanline = multiply(tif, nrows, scanline, "TIFFVStripSize");
+		return ((tsize_t)
+		    summarize(tif, scanline,
+			      multiply(tif, 2, scanline / samplingarea,
+				       "TIFFVStripSize"), "TIFFVStripSize"));
+	} else
+		return ((tsize_t) multiply(tif, nrows, TIFFScanlineSize(tif),
+					   "TIFFVStripSize"));
+}
+
+
+/*
+ * Compute the # bytes in a raw strip.
+ */
+tsize_t
+TIFFRawStripSize(TIFF* tif, tstrip_t strip)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+	tsize_t bytecount = td->td_stripbytecount[strip];
+
+	if (bytecount <= 0) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			  "%lu: Invalid strip byte count, strip %lu",
+			  (unsigned long) bytecount, (unsigned long) strip);
+		bytecount = (tsize_t) -1;
+	}
+
+	return bytecount;
+}
+
+/*
+ * Compute the # bytes in a (row-aligned) strip.
+ *
+ * Note that if RowsPerStrip is larger than the
+ * recorded ImageLength, then the strip size is
+ * truncated to reflect the actual space required
+ * to hold the strip.
+ */
+tsize_t
+TIFFStripSize(TIFF* tif)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+	uint32 rps = td->td_rowsperstrip;
+	if (rps > td->td_imagelength)
+		rps = td->td_imagelength;
+	return (TIFFVStripSize(tif, rps));
+}
+
+/*
+ * Compute a default strip size based on the image
+ * characteristics and a requested value.  If the
+ * request is <1 then we choose a strip size according
+ * to certain heuristics.
+ */
+uint32
+TIFFDefaultStripSize(TIFF* tif, uint32 request)
+{
+	return (*tif->tif_defstripsize)(tif, request);
+}
+
+uint32
+_TIFFDefaultStripSize(TIFF* tif, uint32 s)
+{
+	if ((int32) s < 1) {
+		/*
+		 * If RowsPerStrip is unspecified, try to break the
+		 * image up into strips that are approximately
+		 * STRIP_SIZE_DEFAULT bytes long.
+		 */
+		tsize_t scanline = TIFFScanlineSize(tif);
+		s = (uint32)STRIP_SIZE_DEFAULT / (scanline == 0 ? 1 : scanline);
+		if (s == 0)		/* very wide images */
+			s = 1;
+	}
+	return (s);
+}
+
+/*
+ * Return the number of bytes to read/write in a call to
+ * one of the scanline-oriented i/o routines.  Note that
+ * this number may be 1/samples-per-pixel if data is
+ * stored as separate planes.
+ */
+tsize_t
+TIFFScanlineSize(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t scanline;
+	
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG) {
+		if (td->td_photometric == PHOTOMETRIC_YCBCR
+		    && !isUpSampled(tif)) {
+			uint16 ycbcrsubsampling[2];
+
+			TIFFGetField(tif, TIFFTAG_YCBCRSUBSAMPLING, 
+				     ycbcrsubsampling + 0,
+				     ycbcrsubsampling + 1);
+
+			if (ycbcrsubsampling[0] == 0) {
+				TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+					     "Invalid YCbCr subsampling");
+				return 0;
+			}
+
+			scanline = TIFFroundup(td->td_imagewidth,
+					       ycbcrsubsampling[0]);
+			scanline = TIFFhowmany8(multiply(tif, scanline,
+							 td->td_bitspersample,
+							 "TIFFScanlineSize"));
+			return ((tsize_t)
+				summarize(tif, scanline,
+					  multiply(tif, 2,
+						scanline / ycbcrsubsampling[0],
+						"TIFFVStripSize"),
+					  "TIFFVStripSize"));
+		} else {
+			scanline = multiply(tif, td->td_imagewidth,
+					    td->td_samplesperpixel,
+					    "TIFFScanlineSize");
+		}
+	} else
+		scanline = td->td_imagewidth;
+	return ((tsize_t) TIFFhowmany8(multiply(tif, scanline,
+						td->td_bitspersample,
+						"TIFFScanlineSize")));
+}
+
+/*
+ * Return the number of bytes required to store a complete
+ * decoded and packed raster scanline (as opposed to the
+ * I/O size returned by TIFFScanlineSize which may be less
+ * if data is store as separate planes).
+ */
+tsize_t
+TIFFRasterScanlineSize(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t scanline;
+	
+	scanline = multiply (tif, td->td_bitspersample, td->td_imagewidth,
+			     "TIFFRasterScanlineSize");
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG) {
+		scanline = multiply (tif, scanline, td->td_samplesperpixel,
+				     "TIFFRasterScanlineSize");
+		return ((tsize_t) TIFFhowmany8(scanline));
+	} else
+		return ((tsize_t) multiply (tif, TIFFhowmany8(scanline),
+					    td->td_samplesperpixel,
+					    "TIFFRasterScanlineSize"));
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_swab.c b/src/libtiff/tif_swab.c
new file mode 100644
index 0000000..f8e977e
--- /dev/null
+++ b/src/libtiff/tif_swab.c
@@ -0,0 +1,235 @@
+/* $Id: tif_swab.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library Bit & Byte Swapping Support.
+ *
+ * XXX We assume short = 16-bits and long = 32-bits XXX
+ */
+#include "tiffiop.h"
+
+#ifndef TIFFSwabShort
+void
+TIFFSwabShort(uint16* wp)
+{
+	register unsigned char* cp = (unsigned char*) wp;
+	unsigned char t;
+
+	t = cp[1]; cp[1] = cp[0]; cp[0] = t;
+}
+#endif
+
+#ifndef TIFFSwabLong
+void
+TIFFSwabLong(uint32* lp)
+{
+	register unsigned char* cp = (unsigned char*) lp;
+	unsigned char t;
+
+	t = cp[3]; cp[3] = cp[0]; cp[0] = t;
+	t = cp[2]; cp[2] = cp[1]; cp[1] = t;
+}
+#endif
+
+#ifndef TIFFSwabArrayOfShort
+void
+TIFFSwabArrayOfShort(uint16* wp, register unsigned long n)
+{
+	register unsigned char* cp;
+	register unsigned char t;
+
+	/* XXX unroll loop some */
+	while (n-- > 0) {
+		cp = (unsigned char*) wp;
+		t = cp[1]; cp[1] = cp[0]; cp[0] = t;
+		wp++;
+	}
+}
+#endif
+
+#ifndef TIFFSwabArrayOfTriples
+void
+TIFFSwabArrayOfTriples(uint8* tp, unsigned long n)
+{
+	unsigned char* cp;
+	unsigned char t;
+
+	/* XXX unroll loop some */
+	while (n-- > 0) {
+		cp = (unsigned char*) tp;
+		t = cp[2]; cp[2] = cp[0]; cp[0] = t;
+		tp += 3;
+	}
+}
+#endif
+
+#ifndef TIFFSwabArrayOfLong
+void
+TIFFSwabArrayOfLong(register uint32* lp, register unsigned long n)
+{
+	register unsigned char *cp;
+	register unsigned char t;
+
+	/* XXX unroll loop some */
+	while (n-- > 0) {
+		cp = (unsigned char *)lp;
+		t = cp[3]; cp[3] = cp[0]; cp[0] = t;
+		t = cp[2]; cp[2] = cp[1]; cp[1] = t;
+		lp++;
+	}
+}
+#endif
+
+#ifndef TIFFSwabDouble
+void
+TIFFSwabDouble(double *dp)
+{
+        register uint32* lp = (uint32*) dp;
+        uint32 t;
+
+	TIFFSwabArrayOfLong(lp, 2);
+	t = lp[0]; lp[0] = lp[1]; lp[1] = t;
+}
+#endif
+
+#ifndef TIFFSwabArrayOfDouble
+void
+TIFFSwabArrayOfDouble(double* dp, register unsigned long n)
+{
+	register uint32* lp = (uint32*) dp;
+        register uint32 t;
+
+	TIFFSwabArrayOfLong(lp, n + n);
+        while (n-- > 0) {
+		t = lp[0]; lp[0] = lp[1]; lp[1] = t;
+                lp += 2;
+        }
+}
+#endif
+
+/*
+ * Bit reversal tables.  TIFFBitRevTable[<byte>] gives
+ * the bit reversed value of <byte>.  Used in various
+ * places in the library when the FillOrder requires
+ * bit reversal of byte values (e.g. CCITT Fax 3
+ * encoding/decoding).  TIFFNoBitRevTable is provided
+ * for algorithms that want an equivalent table that
+ * do not reverse bit values.
+ */
+static const unsigned char TIFFBitRevTable[256] = {
+    0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+    0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+    0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+    0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+    0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+    0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+    0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+    0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+    0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+    0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+    0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+    0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+    0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+    0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+    0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+    0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+    0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+    0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+    0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+    0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+    0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+    0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+    0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+    0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+    0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+    0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+    0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+    0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+    0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+    0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+    0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+    0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
+};
+static const unsigned char TIFFNoBitRevTable[256] = {
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 
+    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 
+    0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 
+    0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 
+    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 
+    0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 
+    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 
+    0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 
+    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 
+    0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 
+    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 
+    0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 
+    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 
+    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 
+    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 
+    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 
+    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 
+    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 
+    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 
+    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 
+    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 
+    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 
+    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 
+};
+
+const unsigned char*
+TIFFGetBitRevTable(int reversed)
+{
+	return (reversed ? TIFFBitRevTable : TIFFNoBitRevTable);
+}
+
+void
+TIFFReverseBits(register unsigned char* cp, register unsigned long n)
+{
+	for (; n > 8; n -= 8) {
+		cp[0] = TIFFBitRevTable[cp[0]];
+		cp[1] = TIFFBitRevTable[cp[1]];
+		cp[2] = TIFFBitRevTable[cp[2]];
+		cp[3] = TIFFBitRevTable[cp[3]];
+		cp[4] = TIFFBitRevTable[cp[4]];
+		cp[5] = TIFFBitRevTable[cp[5]];
+		cp[6] = TIFFBitRevTable[cp[6]];
+		cp[7] = TIFFBitRevTable[cp[7]];
+		cp += 8;
+	}
+	while (n-- > 0)
+		*cp = TIFFBitRevTable[*cp], cp++;
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_thunder.c b/src/libtiff/tif_thunder.c
new file mode 100644
index 0000000..40f0618
--- /dev/null
+++ b/src/libtiff/tif_thunder.c
@@ -0,0 +1,158 @@
+/* $Id: tif_thunder.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef THUNDER_SUPPORT
+/*
+ * TIFF Library.
+ *
+ * ThunderScan 4-bit Compression Algorithm Support
+ */
+
+/*
+ * ThunderScan uses an encoding scheme designed for
+ * 4-bit pixel values.  Data is encoded in bytes, with
+ * each byte split into a 2-bit code word and a 6-bit
+ * data value.  The encoding gives raw data, runs of
+ * pixels, or pixel values encoded as a delta from the
+ * previous pixel value.  For the latter, either 2-bit
+ * or 3-bit delta values are used, with the deltas packed
+ * into a single byte.
+ */
+#define	THUNDER_DATA		0x3f	/* mask for 6-bit data */
+#define	THUNDER_CODE		0xc0	/* mask for 2-bit code word */
+/* code values */
+#define	THUNDER_RUN		0x00	/* run of pixels w/ encoded count */
+#define	THUNDER_2BITDELTAS	0x40	/* 3 pixels w/ encoded 2-bit deltas */
+#define	    DELTA2_SKIP		2	/* skip code for 2-bit deltas */
+#define	THUNDER_3BITDELTAS	0x80	/* 2 pixels w/ encoded 3-bit deltas */
+#define	    DELTA3_SKIP		4	/* skip code for 3-bit deltas */
+#define	THUNDER_RAW		0xc0	/* raw data encoded */
+
+static const int twobitdeltas[4] = { 0, 1, 0, -1 };
+static const int threebitdeltas[8] = { 0, 1, 2, 3, 0, -3, -2, -1 };
+
+#define	SETPIXEL(op, v) { \
+	lastpixel = (v) & 0xf; \
+	if (npixels++ & 1) \
+	    *op++ |= lastpixel; \
+	else \
+	    op[0] = (tidataval_t) (lastpixel << 4); \
+}
+
+static int
+ThunderDecode(TIFF* tif, tidata_t op, tsize_t maxpixels)
+{
+	register unsigned char *bp;
+	register tsize_t cc;
+	unsigned int lastpixel;
+	tsize_t npixels;
+
+	bp = (unsigned char *)tif->tif_rawcp;
+	cc = tif->tif_rawcc;
+	lastpixel = 0;
+	npixels = 0;
+	while (cc > 0 && npixels < maxpixels) {
+		int n, delta;
+
+		n = *bp++, cc--;
+		switch (n & THUNDER_CODE) {
+		case THUNDER_RUN:		/* pixel run */
+			/*
+			 * Replicate the last pixel n times,
+			 * where n is the lower-order 6 bits.
+			 */
+			if (npixels & 1) {
+				op[0] |= lastpixel;
+				lastpixel = *op++; npixels++; n--;
+			} else
+				lastpixel |= lastpixel << 4;
+			npixels += n;
+			if (npixels < maxpixels) {
+				for (; n > 0; n -= 2)
+					*op++ = (tidataval_t) lastpixel;
+			}
+			if (n == -1)
+				*--op &= 0xf0;
+			lastpixel &= 0xf;
+			break;
+		case THUNDER_2BITDELTAS:	/* 2-bit deltas */
+			if ((delta = ((n >> 4) & 3)) != DELTA2_SKIP)
+				SETPIXEL(op, lastpixel + twobitdeltas[delta]);
+			if ((delta = ((n >> 2) & 3)) != DELTA2_SKIP)
+				SETPIXEL(op, lastpixel + twobitdeltas[delta]);
+			if ((delta = (n & 3)) != DELTA2_SKIP)
+				SETPIXEL(op, lastpixel + twobitdeltas[delta]);
+			break;
+		case THUNDER_3BITDELTAS:	/* 3-bit deltas */
+			if ((delta = ((n >> 3) & 7)) != DELTA3_SKIP)
+				SETPIXEL(op, lastpixel + threebitdeltas[delta]);
+			if ((delta = (n & 7)) != DELTA3_SKIP)
+				SETPIXEL(op, lastpixel + threebitdeltas[delta]);
+			break;
+		case THUNDER_RAW:		/* raw data */
+			SETPIXEL(op, n);
+			break;
+		}
+	}
+	tif->tif_rawcp = (tidata_t) bp;
+	tif->tif_rawcc = cc;
+	if (npixels != maxpixels) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		    "ThunderDecode: %s data at scanline %ld (%lu != %lu)",
+		    npixels < maxpixels ? "Not enough" : "Too much",
+		    (long) tif->tif_row, (long) npixels, (long) maxpixels);
+		return (0);
+	}
+	return (1);
+}
+
+static int
+ThunderDecodeRow(TIFF* tif, tidata_t buf, tsize_t occ, tsample_t s)
+{
+	tidata_t row = buf;
+	
+	(void) s;
+	while ((long)occ > 0) {
+		if (!ThunderDecode(tif, row, tif->tif_dir.td_imagewidth))
+			return (0);
+		occ -= tif->tif_scanlinesize;
+		row += tif->tif_scanlinesize;
+	}
+	return (1);
+}
+
+int
+TIFFInitThunderScan(TIFF* tif, int scheme)
+{
+	(void) scheme;
+	tif->tif_decoderow = ThunderDecodeRow;
+	tif->tif_decodestrip = ThunderDecodeRow;
+	return (1);
+}
+#endif /* THUNDER_SUPPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_tile.c b/src/libtiff/tif_tile.c
new file mode 100644
index 0000000..4d90e11
--- /dev/null
+++ b/src/libtiff/tif_tile.c
@@ -0,0 +1,273 @@
+/* $Id: tif_tile.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1991-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Tiled Image Support Routines.
+ */
+#include "tiffiop.h"
+
+static uint32
+summarize(TIFF* tif, size_t summand1, size_t summand2, const char* where)
+{
+	/*
+	 * XXX: We are using casting to uint32 here, because sizeof(size_t)
+	 * may be larger than sizeof(uint32) on 64-bit architectures.
+	 */
+	uint32	bytes = summand1 + summand2;
+
+	if (bytes - summand1 != summand2) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Integer overflow in %s", where);
+		bytes = 0;
+	}
+
+	return (bytes);
+}
+
+static uint32
+multiply(TIFF* tif, size_t nmemb, size_t elem_size, const char* where)
+{
+	uint32	bytes = nmemb * elem_size;
+
+	if (elem_size && bytes / elem_size != nmemb) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Integer overflow in %s", where);
+		bytes = 0;
+	}
+
+	return (bytes);
+}
+
+/*
+ * Compute which tile an (x,y,z,s) value is in.
+ */
+ttile_t
+TIFFComputeTile(TIFF* tif, uint32 x, uint32 y, uint32 z, tsample_t s)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	uint32 dx = td->td_tilewidth;
+	uint32 dy = td->td_tilelength;
+	uint32 dz = td->td_tiledepth;
+	ttile_t tile = 1;
+
+	if (td->td_imagedepth == 1)
+		z = 0;
+	if (dx == (uint32) -1)
+		dx = td->td_imagewidth;
+	if (dy == (uint32) -1)
+		dy = td->td_imagelength;
+	if (dz == (uint32) -1)
+		dz = td->td_imagedepth;
+	if (dx != 0 && dy != 0 && dz != 0) {
+		uint32 xpt = TIFFhowmany(td->td_imagewidth, dx); 
+		uint32 ypt = TIFFhowmany(td->td_imagelength, dy); 
+		uint32 zpt = TIFFhowmany(td->td_imagedepth, dz); 
+
+		if (td->td_planarconfig == PLANARCONFIG_SEPARATE) 
+			tile = (xpt*ypt*zpt)*s +
+			     (xpt*ypt)*(z/dz) +
+			     xpt*(y/dy) +
+			     x/dx;
+		else
+			tile = (xpt*ypt)*(z/dz) + xpt*(y/dy) + x/dx;
+	}
+	return (tile);
+}
+
+/*
+ * Check an (x,y,z,s) coordinate
+ * against the image bounds.
+ */
+int
+TIFFCheckTile(TIFF* tif, uint32 x, uint32 y, uint32 z, tsample_t s)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if (x >= td->td_imagewidth) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			     "%lu: Col out of range, max %lu",
+			     (unsigned long) x,
+			     (unsigned long) (td->td_imagewidth - 1));
+		return (0);
+	}
+	if (y >= td->td_imagelength) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			     "%lu: Row out of range, max %lu",
+			     (unsigned long) y,
+			     (unsigned long) (td->td_imagelength - 1));
+		return (0);
+	}
+	if (z >= td->td_imagedepth) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			     "%lu: Depth out of range, max %lu",
+			     (unsigned long) z,
+			     (unsigned long) (td->td_imagedepth - 1));
+		return (0);
+	}
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE &&
+	    s >= td->td_samplesperpixel) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			     "%lu: Sample out of range, max %lu",
+			     (unsigned long) s,
+			     (unsigned long) (td->td_samplesperpixel - 1));
+		return (0);
+	}
+	return (1);
+}
+
+/*
+ * Compute how many tiles are in an image.
+ */
+ttile_t
+TIFFNumberOfTiles(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	uint32 dx = td->td_tilewidth;
+	uint32 dy = td->td_tilelength;
+	uint32 dz = td->td_tiledepth;
+	ttile_t ntiles;
+
+	if (dx == (uint32) -1)
+		dx = td->td_imagewidth;
+	if (dy == (uint32) -1)
+		dy = td->td_imagelength;
+	if (dz == (uint32) -1)
+		dz = td->td_imagedepth;
+	ntiles = (dx == 0 || dy == 0 || dz == 0) ? 0 :
+	    multiply(tif, multiply(tif, TIFFhowmany(td->td_imagewidth, dx),
+				   TIFFhowmany(td->td_imagelength, dy),
+				   "TIFFNumberOfTiles"),
+		     TIFFhowmany(td->td_imagedepth, dz), "TIFFNumberOfTiles");
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE)
+		ntiles = multiply(tif, ntiles, td->td_samplesperpixel,
+				  "TIFFNumberOfTiles");
+	return (ntiles);
+}
+
+/*
+ * Compute the # bytes in each row of a tile.
+ */
+tsize_t
+TIFFTileRowSize(TIFF* tif)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t rowsize;
+	
+	if (td->td_tilelength == 0 || td->td_tilewidth == 0)
+		return ((tsize_t) 0);
+	rowsize = multiply(tif, td->td_bitspersample, td->td_tilewidth,
+			   "TIFFTileRowSize");
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG)
+		rowsize = multiply(tif, rowsize, td->td_samplesperpixel,
+				   "TIFFTileRowSize");
+	return ((tsize_t) TIFFhowmany8(rowsize));
+}
+
+/*
+ * Compute the # bytes in a variable length, row-aligned tile.
+ */
+tsize_t
+TIFFVTileSize(TIFF* tif, uint32 nrows)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	tsize_t tilesize;
+
+	if (td->td_tilelength == 0 || td->td_tilewidth == 0 ||
+	    td->td_tiledepth == 0)
+		return ((tsize_t) 0);
+	if (td->td_planarconfig == PLANARCONFIG_CONTIG &&
+	    td->td_photometric == PHOTOMETRIC_YCBCR &&
+	    !isUpSampled(tif)) {
+		/*
+		 * Packed YCbCr data contain one Cb+Cr for every
+		 * HorizontalSampling*VerticalSampling Y values.
+		 * Must also roundup width and height when calculating
+		 * since images that are not a multiple of the
+		 * horizontal/vertical subsampling area include
+		 * YCbCr data for the extended image.
+		 */
+		tsize_t w =
+		    TIFFroundup(td->td_tilewidth, td->td_ycbcrsubsampling[0]);
+		tsize_t rowsize =
+		    TIFFhowmany8(multiply(tif, w, td->td_bitspersample,
+					  "TIFFVTileSize"));
+		tsize_t samplingarea =
+		    td->td_ycbcrsubsampling[0]*td->td_ycbcrsubsampling[1];
+		if (samplingarea == 0) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name, "Invalid YCbCr subsampling");
+			return 0;
+		}
+		nrows = TIFFroundup(nrows, td->td_ycbcrsubsampling[1]);
+		/* NB: don't need TIFFhowmany here 'cuz everything is rounded */
+		tilesize = multiply(tif, nrows, rowsize, "TIFFVTileSize");
+		tilesize = summarize(tif, tilesize,
+				     multiply(tif, 2, tilesize / samplingarea,
+					      "TIFFVTileSize"),
+				     "TIFFVTileSize");
+	} else
+		tilesize = multiply(tif, nrows, TIFFTileRowSize(tif),
+				    "TIFFVTileSize");
+	return ((tsize_t)
+	    multiply(tif, tilesize, td->td_tiledepth, "TIFFVTileSize"));
+}
+
+/*
+ * Compute the # bytes in a row-aligned tile.
+ */
+tsize_t
+TIFFTileSize(TIFF* tif)
+{
+	return (TIFFVTileSize(tif, tif->tif_dir.td_tilelength));
+}
+
+/*
+ * Compute a default tile size based on the image
+ * characteristics and a requested value.  If a
+ * request is <1 then we choose a size according
+ * to certain heuristics.
+ */
+void
+TIFFDefaultTileSize(TIFF* tif, uint32* tw, uint32* th)
+{
+	(*tif->tif_deftilesize)(tif, tw, th);
+}
+
+void
+_TIFFDefaultTileSize(TIFF* tif, uint32* tw, uint32* th)
+{
+	(void) tif;
+	if (*(int32*) tw < 1)
+		*tw = 256;
+	if (*(int32*) th < 1)
+		*th = 256;
+	/* roundup to a multiple of 16 per the spec */
+	if (*tw & 0xf)
+		*tw = TIFFroundup(*tw, 16);
+	if (*th & 0xf)
+		*th = TIFFroundup(*th, 16);
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_version.c b/src/libtiff/tif_version.c
new file mode 100644
index 0000000..c0f25bf
--- /dev/null
+++ b/src/libtiff/tif_version.c
@@ -0,0 +1,33 @@
+/* $Header: /cvsroot/imtoolkit/im/src/libtiff/tif_version.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+/*
+ * Copyright (c) 1992-1997 Sam Leffler
+ * Copyright (c) 1992-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+#include "tiffiop.h"
+
+static const char TIFFVersion[] = TIFFLIB_VERSION_STR;
+
+const char*
+TIFFGetVersion(void)
+{
+	return (TIFFVersion);
+}
diff --git a/src/libtiff/tif_warning.c b/src/libtiff/tif_warning.c
new file mode 100644
index 0000000..cb9d53f
--- /dev/null
+++ b/src/libtiff/tif_warning.c
@@ -0,0 +1,74 @@
+/* $Header: /cvsroot/imtoolkit/im/src/libtiff/tif_warning.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ */
+#include "tiffiop.h"
+
+TIFFErrorHandlerExt _TIFFwarningHandlerExt = NULL;
+
+TIFFErrorHandler
+TIFFSetWarningHandler(TIFFErrorHandler handler)
+{
+	TIFFErrorHandler prev = _TIFFwarningHandler;
+	_TIFFwarningHandler = handler;
+	return (prev);
+}
+
+TIFFErrorHandlerExt
+TIFFSetWarningHandlerExt(TIFFErrorHandlerExt handler)
+{
+	TIFFErrorHandlerExt prev = _TIFFwarningHandlerExt;
+	_TIFFwarningHandlerExt = handler;
+	return (prev);
+}
+
+void
+TIFFWarning(const char* module, const char* fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	if (_TIFFwarningHandler)
+		(*_TIFFwarningHandler)(module, fmt, ap);
+	if (_TIFFwarningHandlerExt)
+		(*_TIFFwarningHandlerExt)(0, module, fmt, ap);
+	va_end(ap);
+}
+
+void
+TIFFWarningExt(thandle_t fd, const char* module, const char* fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	if (_TIFFwarningHandler)
+		(*_TIFFwarningHandler)(module, fmt, ap);
+	if (_TIFFwarningHandlerExt)
+		(*_TIFFwarningHandlerExt)(fd, module, fmt, ap);
+	va_end(ap);
+}
+
+
diff --git a/src/libtiff/tif_write.c b/src/libtiff/tif_write.c
new file mode 100644
index 0000000..a07606f
--- /dev/null
+++ b/src/libtiff/tif_write.c
@@ -0,0 +1,725 @@
+/* $Id: tif_write.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+/*
+ * TIFF Library.
+ *
+ * Scanline-oriented Write Support
+ */
+#include "tiffiop.h"
+#include <stdio.h>
+
+#define	STRIPINCR	20		/* expansion factor on strip array */
+
+#define	WRITECHECKSTRIPS(tif, module)				\
+	(((tif)->tif_flags&TIFF_BEENWRITING) || TIFFWriteCheck((tif),0,module))
+#define	WRITECHECKTILES(tif, module)				\
+	(((tif)->tif_flags&TIFF_BEENWRITING) || TIFFWriteCheck((tif),1,module))
+#define	BUFFERCHECK(tif)					\
+	((((tif)->tif_flags & TIFF_BUFFERSETUP) && tif->tif_rawdata) ||	\
+	    TIFFWriteBufferSetup((tif), NULL, (tsize_t) -1))
+
+static	int TIFFGrowStrips(TIFF*, int, const char*);
+static	int TIFFAppendToStrip(TIFF*, tstrip_t, tidata_t, tsize_t);
+
+int
+TIFFWriteScanline(TIFF* tif, tdata_t buf, uint32 row, tsample_t sample)
+{
+	static const char module[] = "TIFFWriteScanline";
+	register TIFFDirectory *td;
+	int status, imagegrew = 0;
+	tstrip_t strip;
+
+	if (!WRITECHECKSTRIPS(tif, module))
+		return (-1);
+	/*
+	 * Handle delayed allocation of data buffer.  This
+	 * permits it to be sized more intelligently (using
+	 * directory information).
+	 */
+	if (!BUFFERCHECK(tif))
+		return (-1);
+	td = &tif->tif_dir;
+	/*
+	 * Extend image length if needed
+	 * (but only for PlanarConfig=1).
+	 */
+	if (row >= td->td_imagelength) {	/* extend image */
+		if (td->td_planarconfig == PLANARCONFIG_SEPARATE) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		"Can not change \"ImageLength\" when using separate planes");
+			return (-1);
+		}
+		td->td_imagelength = row+1;
+		imagegrew = 1;
+	}
+	/*
+	 * Calculate strip and check for crossings.
+	 */
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE) {
+		if (sample >= td->td_samplesperpixel) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+			    "%d: Sample out of range, max %d",
+			    sample, td->td_samplesperpixel);
+			return (-1);
+		}
+		strip = sample*td->td_stripsperimage + row/td->td_rowsperstrip;
+	} else
+		strip = row / td->td_rowsperstrip;
+	/*
+	 * Check strip array to make sure there's space. We don't support
+	 * dynamically growing files that have data organized in separate
+	 * bitplanes because it's too painful.  In that case we require that
+	 * the imagelength be set properly before the first write (so that the
+	 * strips array will be fully allocated above).
+	 */
+	if (strip >= td->td_nstrips && !TIFFGrowStrips(tif, 1, module))
+		return (-1);
+	if (strip != tif->tif_curstrip) {
+		/*
+		 * Changing strips -- flush any data present.
+		 */
+		if (!TIFFFlushData(tif))
+			return (-1);
+		tif->tif_curstrip = strip;
+		/*
+		 * Watch out for a growing image.  The value of strips/image
+		 * will initially be 1 (since it can't be deduced until the
+		 * imagelength is known).
+		 */
+		if (strip >= td->td_stripsperimage && imagegrew)
+			td->td_stripsperimage =
+			    TIFFhowmany(td->td_imagelength,td->td_rowsperstrip);
+		tif->tif_row =
+		    (strip % td->td_stripsperimage) * td->td_rowsperstrip;
+		if ((tif->tif_flags & TIFF_CODERSETUP) == 0) {
+			if (!(*tif->tif_setupencode)(tif))
+				return (-1);
+			tif->tif_flags |= TIFF_CODERSETUP;
+		}
+        
+		tif->tif_rawcc = 0;
+		tif->tif_rawcp = tif->tif_rawdata;
+
+		if( td->td_stripbytecount[strip] > 0 )
+		{
+			/* if we are writing over existing tiles, zero length */
+			td->td_stripbytecount[strip] = 0;
+
+			/* this forces TIFFAppendToStrip() to do a seek */
+			tif->tif_curoff = 0;
+		}
+
+		if (!(*tif->tif_preencode)(tif, sample))
+			return (-1);
+		tif->tif_flags |= TIFF_POSTENCODE;
+	}
+	/*
+	 * Ensure the write is either sequential or at the
+	 * beginning of a strip (or that we can randomly
+	 * access the data -- i.e. no encoding).
+	 */
+	if (row != tif->tif_row) {
+		if (row < tif->tif_row) {
+			/*
+			 * Moving backwards within the same strip:
+			 * backup to the start and then decode
+			 * forward (below).
+			 */
+			tif->tif_row = (strip % td->td_stripsperimage) *
+			    td->td_rowsperstrip;
+			tif->tif_rawcp = tif->tif_rawdata;
+		}
+		/*
+		 * Seek forward to the desired row.
+		 */
+		if (!(*tif->tif_seek)(tif, row - tif->tif_row))
+			return (-1);
+		tif->tif_row = row;
+	}
+
+        /* swab if needed - note that source buffer will be altered */
+        tif->tif_postdecode( tif, (tidata_t) buf, tif->tif_scanlinesize );
+
+	status = (*tif->tif_encoderow)(tif, (tidata_t) buf,
+	    tif->tif_scanlinesize, sample);
+
+        /* we are now poised at the beginning of the next row */
+	tif->tif_row = row + 1;
+	return (status);
+}
+
+/*
+ * Encode the supplied data and write it to the
+ * specified strip.
+ *
+ * NB: Image length must be setup before writing.
+ */
+tsize_t
+TIFFWriteEncodedStrip(TIFF* tif, tstrip_t strip, tdata_t data, tsize_t cc)
+{
+	static const char module[] = "TIFFWriteEncodedStrip";
+	TIFFDirectory *td = &tif->tif_dir;
+	tsample_t sample;
+
+	if (!WRITECHECKSTRIPS(tif, module))
+		return ((tsize_t) -1);
+	/*
+	 * Check strip array to make sure there's space.
+	 * We don't support dynamically growing files that
+	 * have data organized in separate bitplanes because
+	 * it's too painful.  In that case we require that
+	 * the imagelength be set properly before the first
+	 * write (so that the strips array will be fully
+	 * allocated above).
+	 */
+	if (strip >= td->td_nstrips) {
+		if (td->td_planarconfig == PLANARCONFIG_SEPARATE) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		"Can not grow image by strips when using separate planes");
+			return ((tsize_t) -1);
+		}
+		if (!TIFFGrowStrips(tif, 1, module))
+			return ((tsize_t) -1);
+		td->td_stripsperimage =
+		    TIFFhowmany(td->td_imagelength, td->td_rowsperstrip);
+	}
+	/*
+	 * Handle delayed allocation of data buffer.  This
+	 * permits it to be sized according to the directory
+	 * info.
+	 */
+	if (!BUFFERCHECK(tif))
+		return ((tsize_t) -1);
+	tif->tif_curstrip = strip;
+	tif->tif_row = (strip % td->td_stripsperimage) * td->td_rowsperstrip;
+	if ((tif->tif_flags & TIFF_CODERSETUP) == 0) {
+		if (!(*tif->tif_setupencode)(tif))
+			return ((tsize_t) -1);
+		tif->tif_flags |= TIFF_CODERSETUP;
+	}
+        
+	tif->tif_rawcc = 0;
+	tif->tif_rawcp = tif->tif_rawdata;
+
+        if( td->td_stripbytecount[strip] > 0 )
+        {
+            /* if we are writing over existing tiles, zero length. */
+            td->td_stripbytecount[strip] = 0;
+
+            /* this forces TIFFAppendToStrip() to do a seek */
+            tif->tif_curoff = 0;
+        }
+        
+	tif->tif_flags &= ~TIFF_POSTENCODE;
+	sample = (tsample_t)(strip / td->td_stripsperimage);
+	if (!(*tif->tif_preencode)(tif, sample))
+		return ((tsize_t) -1);
+
+        /* swab if needed - note that source buffer will be altered */
+        tif->tif_postdecode( tif, (tidata_t) data, cc );
+
+	if (!(*tif->tif_encodestrip)(tif, (tidata_t) data, cc, sample))
+		return ((tsize_t) 0);
+	if (!(*tif->tif_postencode)(tif))
+		return ((tsize_t) -1);
+	if (!isFillOrder(tif, td->td_fillorder) &&
+	    (tif->tif_flags & TIFF_NOBITREV) == 0)
+		TIFFReverseBits(tif->tif_rawdata, tif->tif_rawcc);
+	if (tif->tif_rawcc > 0 &&
+	    !TIFFAppendToStrip(tif, strip, tif->tif_rawdata, tif->tif_rawcc))
+		return ((tsize_t) -1);
+	tif->tif_rawcc = 0;
+	tif->tif_rawcp = tif->tif_rawdata;
+	return (cc);
+}
+
+/*
+ * Write the supplied data to the specified strip.
+ *
+ * NB: Image length must be setup before writing.
+ */
+tsize_t
+TIFFWriteRawStrip(TIFF* tif, tstrip_t strip, tdata_t data, tsize_t cc)
+{
+	static const char module[] = "TIFFWriteRawStrip";
+	TIFFDirectory *td = &tif->tif_dir;
+
+	if (!WRITECHECKSTRIPS(tif, module))
+		return ((tsize_t) -1);
+	/*
+	 * Check strip array to make sure there's space.
+	 * We don't support dynamically growing files that
+	 * have data organized in separate bitplanes because
+	 * it's too painful.  In that case we require that
+	 * the imagelength be set properly before the first
+	 * write (so that the strips array will be fully
+	 * allocated above).
+	 */
+	if (strip >= td->td_nstrips) {
+		if (td->td_planarconfig == PLANARCONFIG_SEPARATE) {
+			TIFFErrorExt(tif->tif_clientdata, tif->tif_name,
+		"Can not grow image by strips when using separate planes");
+			return ((tsize_t) -1);
+		}
+		/*
+		 * Watch out for a growing image.  The value of
+		 * strips/image will initially be 1 (since it
+		 * can't be deduced until the imagelength is known).
+		 */
+		if (strip >= td->td_stripsperimage)
+			td->td_stripsperimage =
+			    TIFFhowmany(td->td_imagelength,td->td_rowsperstrip);
+		if (!TIFFGrowStrips(tif, 1, module))
+			return ((tsize_t) -1);
+	}
+	tif->tif_curstrip = strip;
+	tif->tif_row = (strip % td->td_stripsperimage) * td->td_rowsperstrip;
+	return (TIFFAppendToStrip(tif, strip, (tidata_t) data, cc) ?
+	    cc : (tsize_t) -1);
+}
+
+/*
+ * Write and compress a tile of data.  The
+ * tile is selected by the (x,y,z,s) coordinates.
+ */
+tsize_t
+TIFFWriteTile(TIFF* tif,
+    tdata_t buf, uint32 x, uint32 y, uint32 z, tsample_t s)
+{
+	if (!TIFFCheckTile(tif, x, y, z, s))
+		return (-1);
+	/*
+	 * NB: A tile size of -1 is used instead of tif_tilesize knowing
+	 *     that TIFFWriteEncodedTile will clamp this to the tile size.
+	 *     This is done because the tile size may not be defined until
+	 *     after the output buffer is setup in TIFFWriteBufferSetup.
+	 */
+	return (TIFFWriteEncodedTile(tif,
+	    TIFFComputeTile(tif, x, y, z, s), buf, (tsize_t) -1));
+}
+
+/*
+ * Encode the supplied data and write it to the
+ * specified tile.  There must be space for the
+ * data.  The function clamps individual writes
+ * to a tile to the tile size, but does not (and
+ * can not) check that multiple writes to the same
+ * tile do not write more than tile size data.
+ *
+ * NB: Image length must be setup before writing; this
+ *     interface does not support automatically growing
+ *     the image on each write (as TIFFWriteScanline does).
+ */
+tsize_t
+TIFFWriteEncodedTile(TIFF* tif, ttile_t tile, tdata_t data, tsize_t cc)
+{
+	static const char module[] = "TIFFWriteEncodedTile";
+	TIFFDirectory *td;
+	tsample_t sample;
+
+	if (!WRITECHECKTILES(tif, module))
+		return ((tsize_t) -1);
+	td = &tif->tif_dir;
+	if (tile >= td->td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: Tile %lu out of range, max %lu",
+		    tif->tif_name, (unsigned long) tile, (unsigned long) td->td_nstrips);
+		return ((tsize_t) -1);
+	}
+	/*
+	 * Handle delayed allocation of data buffer.  This
+	 * permits it to be sized more intelligently (using
+	 * directory information).
+	 */
+	if (!BUFFERCHECK(tif))
+		return ((tsize_t) -1);
+	tif->tif_curtile = tile;
+
+	tif->tif_rawcc = 0;
+	tif->tif_rawcp = tif->tif_rawdata;
+
+        if( td->td_stripbytecount[tile] > 0 )
+        {
+            /* if we are writing over existing tiles, zero length. */
+            td->td_stripbytecount[tile] = 0;
+
+            /* this forces TIFFAppendToStrip() to do a seek */
+            tif->tif_curoff = 0;
+        }
+        
+	/* 
+	 * Compute tiles per row & per column to compute
+	 * current row and column
+	 */
+	tif->tif_row = (tile % TIFFhowmany(td->td_imagelength, td->td_tilelength))
+		* td->td_tilelength;
+	tif->tif_col = (tile % TIFFhowmany(td->td_imagewidth, td->td_tilewidth))
+		* td->td_tilewidth;
+
+	if ((tif->tif_flags & TIFF_CODERSETUP) == 0) {
+		if (!(*tif->tif_setupencode)(tif))
+			return ((tsize_t) -1);
+		tif->tif_flags |= TIFF_CODERSETUP;
+	}
+	tif->tif_flags &= ~TIFF_POSTENCODE;
+	sample = (tsample_t)(tile/td->td_stripsperimage);
+	if (!(*tif->tif_preencode)(tif, sample))
+		return ((tsize_t) -1);
+	/*
+	 * Clamp write amount to the tile size.  This is mostly
+	 * done so that callers can pass in some large number
+	 * (e.g. -1) and have the tile size used instead.
+	 */
+	if ( cc < 1 || cc > tif->tif_tilesize)
+		cc = tif->tif_tilesize;
+
+        /* swab if needed - note that source buffer will be altered */
+        tif->tif_postdecode( tif, (tidata_t) data, cc );
+
+	if (!(*tif->tif_encodetile)(tif, (tidata_t) data, cc, sample))
+		return ((tsize_t) 0);
+	if (!(*tif->tif_postencode)(tif))
+		return ((tsize_t) -1);
+	if (!isFillOrder(tif, td->td_fillorder) &&
+	    (tif->tif_flags & TIFF_NOBITREV) == 0)
+		TIFFReverseBits((unsigned char *)tif->tif_rawdata, tif->tif_rawcc);
+	if (tif->tif_rawcc > 0 && !TIFFAppendToStrip(tif, tile,
+	    tif->tif_rawdata, tif->tif_rawcc))
+		return ((tsize_t) -1);
+	tif->tif_rawcc = 0;
+	tif->tif_rawcp = tif->tif_rawdata;
+	return (cc);
+}
+
+/*
+ * Write the supplied data to the specified strip.
+ * There must be space for the data; we don't check
+ * if strips overlap!
+ *
+ * NB: Image length must be setup before writing; this
+ *     interface does not support automatically growing
+ *     the image on each write (as TIFFWriteScanline does).
+ */
+tsize_t
+TIFFWriteRawTile(TIFF* tif, ttile_t tile, tdata_t data, tsize_t cc)
+{
+	static const char module[] = "TIFFWriteRawTile";
+
+	if (!WRITECHECKTILES(tif, module))
+		return ((tsize_t) -1);
+	if (tile >= tif->tif_dir.td_nstrips) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: Tile %lu out of range, max %lu",
+		    tif->tif_name, (unsigned long) tile,
+		    (unsigned long) tif->tif_dir.td_nstrips);
+		return ((tsize_t) -1);
+	}
+	return (TIFFAppendToStrip(tif, tile, (tidata_t) data, cc) ?
+	    cc : (tsize_t) -1);
+}
+
+#define	isUnspecified(tif, f) \
+    (TIFFFieldSet(tif,f) && (tif)->tif_dir.td_imagelength == 0)
+
+int
+TIFFSetupStrips(TIFF* tif)
+{
+	TIFFDirectory* td = &tif->tif_dir;
+
+	if (isTiled(tif))
+		td->td_stripsperimage =
+		    isUnspecified(tif, FIELD_TILEDIMENSIONS) ?
+			td->td_samplesperpixel : TIFFNumberOfTiles(tif);
+	else
+		td->td_stripsperimage =
+		    isUnspecified(tif, FIELD_ROWSPERSTRIP) ?
+			td->td_samplesperpixel : TIFFNumberOfStrips(tif);
+	td->td_nstrips = td->td_stripsperimage;
+	if (td->td_planarconfig == PLANARCONFIG_SEPARATE)
+		td->td_stripsperimage /= td->td_samplesperpixel;
+	td->td_stripoffset = (uint32 *)
+	    _TIFFmalloc(td->td_nstrips * sizeof (uint32));
+	td->td_stripbytecount = (uint32 *)
+	    _TIFFmalloc(td->td_nstrips * sizeof (uint32));
+	if (td->td_stripoffset == NULL || td->td_stripbytecount == NULL)
+		return (0);
+	/*
+	 * Place data at the end-of-file
+	 * (by setting offsets to zero).
+	 */
+	_TIFFmemset(td->td_stripoffset, 0, td->td_nstrips*sizeof (uint32));
+	_TIFFmemset(td->td_stripbytecount, 0, td->td_nstrips*sizeof (uint32));
+	TIFFSetFieldBit(tif, FIELD_STRIPOFFSETS);
+	TIFFSetFieldBit(tif, FIELD_STRIPBYTECOUNTS);
+	return (1);
+}
+#undef isUnspecified
+
+/*
+ * Verify file is writable and that the directory
+ * information is setup properly.  In doing the latter
+ * we also "freeze" the state of the directory so
+ * that important information is not changed.
+ */
+int
+TIFFWriteCheck(TIFF* tif, int tiles, const char* module)
+{
+	if (tif->tif_mode == O_RDONLY) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: File not open for writing",
+		    tif->tif_name);
+		return (0);
+	}
+	if (tiles ^ isTiled(tif)) {
+		TIFFErrorExt(tif->tif_clientdata, tif->tif_name, tiles ?
+		    "Can not write tiles to a stripped image" :
+		    "Can not write scanlines to a tiled image");
+		return (0);
+	}
+        
+	/*
+	 * On the first write verify all the required information
+	 * has been setup and initialize any data structures that
+	 * had to wait until directory information was set.
+	 * Note that a lot of our work is assumed to remain valid
+	 * because we disallow any of the important parameters
+	 * from changing after we start writing (i.e. once
+	 * TIFF_BEENWRITING is set, TIFFSetField will only allow
+	 * the image's length to be changed).
+	 */
+	if (!TIFFFieldSet(tif, FIELD_IMAGEDIMENSIONS)) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: Must set \"ImageWidth\" before writing data",
+		    tif->tif_name);
+		return (0);
+	}
+	if (tif->tif_dir.td_samplesperpixel == 1) {
+		/* 
+		 * Planarconfiguration is irrelevant in case of single band
+		 * images and need not be included. We will set it anyway,
+		 * because this field is used in other parts of library even
+		 * in the single band case.
+		 */
+		tif->tif_dir.td_planarconfig = PLANARCONFIG_CONTIG;
+	} else {
+		if (!TIFFFieldSet(tif, FIELD_PLANARCONFIG)) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: Must set \"PlanarConfiguration\" before writing data",
+			    tif->tif_name);
+			return (0);
+		}
+	}
+	if (tif->tif_dir.td_stripoffset == NULL && !TIFFSetupStrips(tif)) {
+		tif->tif_dir.td_nstrips = 0;
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: No space for %s arrays",
+		    tif->tif_name, isTiled(tif) ? "tile" : "strip");
+		return (0);
+	}
+	tif->tif_tilesize = isTiled(tif) ? TIFFTileSize(tif) : (tsize_t) -1;
+	tif->tif_scanlinesize = TIFFScanlineSize(tif);
+	tif->tif_flags |= TIFF_BEENWRITING;
+	return (1);
+}
+
+/*
+ * Setup the raw data buffer used for encoding.
+ */
+int
+TIFFWriteBufferSetup(TIFF* tif, tdata_t bp, tsize_t size)
+{
+	static const char module[] = "TIFFWriteBufferSetup";
+
+	if (tif->tif_rawdata) {
+		if (tif->tif_flags & TIFF_MYBUFFER) {
+			_TIFFfree(tif->tif_rawdata);
+			tif->tif_flags &= ~TIFF_MYBUFFER;
+		}
+		tif->tif_rawdata = NULL;
+	}
+	if (size == (tsize_t) -1) {
+		size = (isTiled(tif) ?
+		    tif->tif_tilesize : TIFFStripSize(tif));
+		/*
+		 * Make raw data buffer at least 8K
+		 */
+		if (size < 8*1024)
+			size = 8*1024;
+		bp = NULL;			/* NB: force malloc */
+	}
+	if (bp == NULL) {
+		bp = _TIFFmalloc(size);
+		if (bp == NULL) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: No space for output buffer",
+			    tif->tif_name);
+			return (0);
+		}
+		tif->tif_flags |= TIFF_MYBUFFER;
+	} else
+		tif->tif_flags &= ~TIFF_MYBUFFER;
+	tif->tif_rawdata = (tidata_t) bp;
+	tif->tif_rawdatasize = size;
+	tif->tif_rawcc = 0;
+	tif->tif_rawcp = tif->tif_rawdata;
+	tif->tif_flags |= TIFF_BUFFERSETUP;
+	return (1);
+}
+
+/*
+ * Grow the strip data structures by delta strips.
+ */
+static int
+TIFFGrowStrips(TIFF* tif, int delta, const char* module)
+{
+	TIFFDirectory	*td = &tif->tif_dir;
+	uint32		*new_stripoffset, *new_stripbytecount;
+
+	assert(td->td_planarconfig == PLANARCONFIG_CONTIG);
+	new_stripoffset = (uint32*)_TIFFrealloc(td->td_stripoffset,
+		(td->td_nstrips + delta) * sizeof (uint32));
+	new_stripbytecount = (uint32*)_TIFFrealloc(td->td_stripbytecount,
+		(td->td_nstrips + delta) * sizeof (uint32));
+	if (new_stripoffset == NULL || new_stripbytecount == NULL) {
+		if (new_stripoffset)
+			_TIFFfree(new_stripoffset);
+		if (new_stripbytecount)
+			_TIFFfree(new_stripbytecount);
+		td->td_nstrips = 0;
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: No space to expand strip arrays",
+			  tif->tif_name);
+		return (0);
+	}
+	td->td_stripoffset = new_stripoffset;
+	td->td_stripbytecount = new_stripbytecount;
+	_TIFFmemset(td->td_stripoffset + td->td_nstrips,
+		    0, delta*sizeof (uint32));
+	_TIFFmemset(td->td_stripbytecount + td->td_nstrips,
+		    0, delta*sizeof (uint32));
+	td->td_nstrips += delta;
+	return (1);
+}
+
+/*
+ * Append the data to the specified strip.
+ */
+static int
+TIFFAppendToStrip(TIFF* tif, tstrip_t strip, tidata_t data, tsize_t cc)
+{
+	TIFFDirectory *td = &tif->tif_dir;
+	static const char module[] = "TIFFAppendToStrip";
+
+	if (td->td_stripoffset[strip] == 0 || tif->tif_curoff == 0) {
+		/*
+		 * No current offset, set the current strip.
+		 */
+		assert(td->td_nstrips > 0);
+		if (td->td_stripoffset[strip] != 0) {
+			/*
+			 * Prevent overlapping of the data chunks. We need
+                         * this to enable in place updating of the compressed
+                         * images. Larger blocks will be moved at the end of
+                         * the file without any optimization of the spare
+                         * space, so such scheme is not too much effective.
+			 */
+			if (td->td_stripbytecountsorted) {
+				if (strip == td->td_nstrips - 1
+				    || td->td_stripoffset[strip + 1] <
+					td->td_stripoffset[strip] + cc) {
+					td->td_stripoffset[strip] =
+						TIFFSeekFile(tif, (toff_t)0,
+							     SEEK_END);
+				}
+			} else {
+				tstrip_t i;
+				for (i = 0; i < td->td_nstrips; i++) {
+					if (td->td_stripoffset[i] > 
+						td->td_stripoffset[strip]
+					    && td->td_stripoffset[i] <
+						td->td_stripoffset[strip] + cc) {
+						td->td_stripoffset[strip] =
+							TIFFSeekFile(tif,
+								     (toff_t)0,
+								     SEEK_END);
+					}
+				}
+			}
+
+			if (!SeekOK(tif, td->td_stripoffset[strip])) {
+				TIFFErrorExt(tif->tif_clientdata, module,
+					  "%s: Seek error at scanline %lu",
+					  tif->tif_name,
+					  (unsigned long)tif->tif_row);
+				return (0);
+			}
+		} else
+			td->td_stripoffset[strip] =
+			    TIFFSeekFile(tif, (toff_t) 0, SEEK_END);
+		tif->tif_curoff = td->td_stripoffset[strip];
+	}
+
+	if (!WriteOK(tif, data, cc)) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: Write error at scanline %lu",
+		    tif->tif_name, (unsigned long) tif->tif_row);
+		return (0);
+	}
+	tif->tif_curoff += cc;
+	td->td_stripbytecount[strip] += cc;
+	return (1);
+}
+
+/*
+ * Internal version of TIFFFlushData that can be
+ * called by ``encodestrip routines'' w/o concern
+ * for infinite recursion.
+ */
+int
+TIFFFlushData1(TIFF* tif)
+{
+	if (tif->tif_rawcc > 0) {
+		if (!isFillOrder(tif, tif->tif_dir.td_fillorder) &&
+		    (tif->tif_flags & TIFF_NOBITREV) == 0)
+			TIFFReverseBits((unsigned char *)tif->tif_rawdata,
+			    tif->tif_rawcc);
+		if (!TIFFAppendToStrip(tif,
+		    isTiled(tif) ? tif->tif_curtile : tif->tif_curstrip,
+		    tif->tif_rawdata, tif->tif_rawcc))
+			return (0);
+		tif->tif_rawcc = 0;
+		tif->tif_rawcp = tif->tif_rawdata;
+	}
+	return (1);
+}
+
+/*
+ * Set the current write offset.  This should only be
+ * used to set the offset to a known previous location
+ * (very carefully), or to 0 so that the next write gets
+ * appended to the end of the file.
+ */
+void
+TIFFSetWriteOffset(TIFF* tif, toff_t off)
+{
+	tif->tif_curoff = off;
+}
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tif_zip.c b/src/libtiff/tif_zip.c
new file mode 100644
index 0000000..41781f8
--- /dev/null
+++ b/src/libtiff/tif_zip.c
@@ -0,0 +1,378 @@
+/* $Id: tif_zip.c,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1995-1997 Sam Leffler
+ * Copyright (c) 1995-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#include "tiffiop.h"
+#ifdef ZIP_SUPPORT
+/*
+ * TIFF Library.
+ *
+ * ZIP (aka Deflate) Compression Support
+ *
+ * This file is simply an interface to the zlib library written by
+ * Jean-loup Gailly and Mark Adler.  You must use version 1.0 or later
+ * of the library: this code assumes the 1.0 API and also depends on
+ * the ability to write the zlib header multiple times (one per strip)
+ * which was not possible with versions prior to 0.95.  Note also that
+ * older versions of this codec avoided this bug by supressing the header
+ * entirely.  This means that files written with the old library cannot
+ * be read; they should be converted to a different compression scheme
+ * and then reconverted.
+ *
+ * The data format used by the zlib library is described in the files
+ * zlib-3.1.doc, deflate-1.1.doc and gzip-4.1.doc, available in the
+ * directory ftp://ftp.uu.net/pub/archiving/zip/doc.  The library was
+ * last found at ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib-0.99.tar.gz.
+ */
+#include "tif_predict.h"
+#include "zlib.h"
+
+#include <stdio.h>
+
+/*
+ * Sigh, ZLIB_VERSION is defined as a string so there's no
+ * way to do a proper check here.  Instead we guess based
+ * on the presence of #defines that were added between the
+ * 0.95 and 1.0 distributions.
+ */
+#if !defined(Z_NO_COMPRESSION) || !defined(Z_DEFLATED)
+#error "Antiquated ZLIB software; you must use version 1.0 or later"
+#endif
+
+/*
+ * State block for each open TIFF
+ * file using ZIP compression/decompression.
+ */
+typedef	struct {
+	TIFFPredictorState predict;
+	z_stream	stream;
+	int		zipquality;		/* compression level */
+	int		state;			/* state flags */
+#define	ZSTATE_INIT	0x1		/* zlib setup successfully */
+
+	TIFFVGetMethod	vgetparent;		/* super-class method */
+	TIFFVSetMethod	vsetparent;		/* super-class method */
+} ZIPState;
+
+#define	ZState(tif)		((ZIPState*) (tif)->tif_data)
+#define	DecoderState(tif)	ZState(tif)
+#define	EncoderState(tif)	ZState(tif)
+
+static	int ZIPEncode(TIFF*, tidata_t, tsize_t, tsample_t);
+static	int ZIPDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+
+static int
+ZIPSetupDecode(TIFF* tif)
+{
+	ZIPState* sp = DecoderState(tif);
+	static const char module[] = "ZIPSetupDecode";
+
+	assert(sp != NULL);
+	if (inflateInit(&sp->stream) != Z_OK) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: %s", tif->tif_name, sp->stream.msg);
+		return (0);
+	} else {
+		sp->state |= ZSTATE_INIT;
+		return (1);
+	}
+}
+
+/*
+ * Setup state for decoding a strip.
+ */
+static int
+ZIPPreDecode(TIFF* tif, tsample_t s)
+{
+	ZIPState* sp = DecoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->stream.next_in = tif->tif_rawdata;
+	sp->stream.avail_in = tif->tif_rawcc;
+	return (inflateReset(&sp->stream) == Z_OK);
+}
+
+static int
+ZIPDecode(TIFF* tif, tidata_t op, tsize_t occ, tsample_t s)
+{
+	ZIPState* sp = DecoderState(tif);
+	static const char module[] = "ZIPDecode";
+
+	(void) s;
+	assert(sp != NULL);
+	sp->stream.next_out = op;
+	sp->stream.avail_out = occ;
+	do {
+		int state = inflate(&sp->stream, Z_PARTIAL_FLUSH);
+		if (state == Z_STREAM_END)
+			break;
+		if (state == Z_DATA_ERROR) {
+			TIFFErrorExt(tif->tif_clientdata, module,
+			    "%s: Decoding error at scanline %d, %s",
+			    tif->tif_name, tif->tif_row, sp->stream.msg);
+			if (inflateSync(&sp->stream) != Z_OK)
+				return (0);
+			continue;
+		}
+		if (state != Z_OK) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: zlib error: %s",
+			    tif->tif_name, sp->stream.msg);
+			return (0);
+		}
+	} while (sp->stream.avail_out > 0);
+	if (sp->stream.avail_out != 0) {
+		TIFFErrorExt(tif->tif_clientdata, module,
+		    "%s: Not enough data at scanline %d (short %d bytes)",
+		    tif->tif_name, tif->tif_row, sp->stream.avail_out);
+		return (0);
+	}
+	return (1);
+}
+
+static int
+ZIPSetupEncode(TIFF* tif)
+{
+	ZIPState* sp = EncoderState(tif);
+	static const char module[] = "ZIPSetupEncode";
+
+	assert(sp != NULL);
+	if (deflateInit(&sp->stream, sp->zipquality) != Z_OK) {
+		TIFFErrorExt(tif->tif_clientdata, module, "%s: %s", tif->tif_name, sp->stream.msg);
+		return (0);
+	} else {
+		sp->state |= ZSTATE_INIT;
+		return (1);
+	}
+}
+
+/*
+ * Reset encoding state at the start of a strip.
+ */
+static int
+ZIPPreEncode(TIFF* tif, tsample_t s)
+{
+	ZIPState *sp = EncoderState(tif);
+
+	(void) s;
+	assert(sp != NULL);
+	sp->stream.next_out = tif->tif_rawdata;
+	sp->stream.avail_out = tif->tif_rawdatasize;
+	return (deflateReset(&sp->stream) == Z_OK);
+}
+
+/*
+ * Encode a chunk of pixels.
+ */
+static int
+ZIPEncode(TIFF* tif, tidata_t bp, tsize_t cc, tsample_t s)
+{
+	ZIPState *sp = EncoderState(tif);
+	static const char module[] = "ZIPEncode";
+
+	(void) s;
+	sp->stream.next_in = bp;
+	sp->stream.avail_in = cc;
+	do {
+		if (deflate(&sp->stream, Z_NO_FLUSH) != Z_OK) {
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: Encoder error: %s",
+			    tif->tif_name, sp->stream.msg);
+			return (0);
+		}
+		if (sp->stream.avail_out == 0) {
+			tif->tif_rawcc = tif->tif_rawdatasize;
+			TIFFFlushData1(tif);
+			sp->stream.next_out = tif->tif_rawdata;
+			sp->stream.avail_out = tif->tif_rawdatasize;
+		}
+	} while (sp->stream.avail_in > 0);
+	return (1);
+}
+
+/*
+ * Finish off an encoded strip by flushing the last
+ * string and tacking on an End Of Information code.
+ */
+static int
+ZIPPostEncode(TIFF* tif)
+{
+	ZIPState *sp = EncoderState(tif);
+	static const char module[] = "ZIPPostEncode";
+	int state;
+
+	sp->stream.avail_in = 0;
+	do {
+		state = deflate(&sp->stream, Z_FINISH);
+		switch (state) {
+		case Z_STREAM_END:
+		case Z_OK:
+		    if ((int)sp->stream.avail_out != (int)tif->tif_rawdatasize)
+                    {
+			    tif->tif_rawcc =
+				tif->tif_rawdatasize - sp->stream.avail_out;
+			    TIFFFlushData1(tif);
+			    sp->stream.next_out = tif->tif_rawdata;
+			    sp->stream.avail_out = tif->tif_rawdatasize;
+		    }
+		    break;
+		default:
+			TIFFErrorExt(tif->tif_clientdata, module, "%s: zlib error: %s",
+			tif->tif_name, sp->stream.msg);
+		    return (0);
+		}
+	} while (state != Z_STREAM_END);
+	return (1);
+}
+
+static void
+ZIPCleanup(TIFF* tif)
+{
+	ZIPState* sp = ZState(tif);
+
+	assert(sp != 0);
+
+	(void)TIFFPredictorCleanup(tif);
+
+	tif->tif_tagmethods.vgetfield = sp->vgetparent;
+	tif->tif_tagmethods.vsetfield = sp->vsetparent;
+
+	if (sp->state&ZSTATE_INIT) {
+		/* NB: avoid problems in the library */
+		if (tif->tif_mode == O_RDONLY)
+			inflateEnd(&sp->stream);
+		else
+			deflateEnd(&sp->stream);
+	}
+	_TIFFfree(sp);
+	tif->tif_data = NULL;
+
+	_TIFFSetDefaultCompressionState(tif);
+}
+
+static int
+ZIPVSetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	ZIPState* sp = ZState(tif);
+	static const char module[] = "ZIPVSetField";
+
+	switch (tag) {
+	case TIFFTAG_ZIPQUALITY:
+		sp->zipquality = va_arg(ap, int);
+		if (tif->tif_mode != O_RDONLY && (sp->state&ZSTATE_INIT)) {
+			if (deflateParams(&sp->stream,
+			    sp->zipquality, Z_DEFAULT_STRATEGY) != Z_OK) {
+				TIFFErrorExt(tif->tif_clientdata, module, "%s: zlib error: %s",
+				    tif->tif_name, sp->stream.msg);
+				return (0);
+			}
+		}
+		return (1);
+	default:
+		return (*sp->vsetparent)(tif, tag, ap);
+	}
+	/*NOTREACHED*/
+}
+
+static int
+ZIPVGetField(TIFF* tif, ttag_t tag, va_list ap)
+{
+	ZIPState* sp = ZState(tif);
+
+	switch (tag) {
+	case TIFFTAG_ZIPQUALITY:
+		*va_arg(ap, int*) = sp->zipquality;
+		break;
+	default:
+		return (*sp->vgetparent)(tif, tag, ap);
+	}
+	return (1);
+}
+
+static const TIFFFieldInfo zipFieldInfo[] = {
+    { TIFFTAG_ZIPQUALITY,	 0, 0,	TIFF_ANY,	FIELD_PSEUDO,
+      TRUE,	FALSE,	"" },
+};
+
+int
+TIFFInitZIP(TIFF* tif, int scheme)
+{
+	ZIPState* sp;
+
+	assert( (scheme == COMPRESSION_DEFLATE)
+		|| (scheme == COMPRESSION_ADOBE_DEFLATE));
+
+	/*
+	 * Allocate state block so tag methods have storage to record values.
+	 */
+	tif->tif_data = (tidata_t) _TIFFmalloc(sizeof (ZIPState));
+	if (tif->tif_data == NULL)
+		goto bad;
+	sp = ZState(tif);
+	sp->stream.zalloc = NULL;
+	sp->stream.zfree = NULL;
+	sp->stream.opaque = NULL;
+	sp->stream.data_type = Z_BINARY;
+
+	/*
+	 * Merge codec-specific tag information and
+	 * override parent get/set field methods.
+	 */
+	_TIFFMergeFieldInfo(tif, zipFieldInfo, TIFFArrayCount(zipFieldInfo));
+	sp->vgetparent = tif->tif_tagmethods.vgetfield;
+	tif->tif_tagmethods.vgetfield = ZIPVGetField; /* hook for codec tags */
+	sp->vsetparent = tif->tif_tagmethods.vsetfield;
+	tif->tif_tagmethods.vsetfield = ZIPVSetField; /* hook for codec tags */
+
+	/* Default values for codec-specific fields */
+	sp->zipquality = Z_DEFAULT_COMPRESSION;	/* default comp. level */
+	sp->state = 0;
+
+	/*
+	 * Install codec methods.
+	 */
+	tif->tif_setupdecode = ZIPSetupDecode;
+	tif->tif_predecode = ZIPPreDecode;
+	tif->tif_decoderow = ZIPDecode;
+	tif->tif_decodestrip = ZIPDecode;
+	tif->tif_decodetile = ZIPDecode;
+	tif->tif_setupencode = ZIPSetupEncode;
+	tif->tif_preencode = ZIPPreEncode;
+	tif->tif_postencode = ZIPPostEncode;
+	tif->tif_encoderow = ZIPEncode;
+	tif->tif_encodestrip = ZIPEncode;
+	tif->tif_encodetile = ZIPEncode;
+	tif->tif_cleanup = ZIPCleanup;
+	/*
+	 * Setup predictor setup.
+	 */
+	(void) TIFFPredictorInit(tif);
+	return (1);
+bad:
+	TIFFErrorExt(tif->tif_clientdata, "TIFFInitZIP",
+		     "No space for ZIP state block");
+	return (0);
+}
+#endif /* ZIP_SUPORT */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tiff.h b/src/libtiff/tiff.h
new file mode 100644
index 0000000..0697268
--- /dev/null
+++ b/src/libtiff/tiff.h
@@ -0,0 +1,647 @@
+/* $Id: tiff.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _TIFF_
+#define	_TIFF_
+
+#include "tif_config.h"  /* IMLIB */
+
+/*
+ * Tag Image File Format (TIFF)
+ *
+ * Based on Rev 6.0 from:
+ *    Developer's Desk
+ *    Aldus Corporation
+ *    411 First Ave. South
+ *    Suite 200
+ *    Seattle, WA  98104
+ *    206-622-5500
+ *    
+ *    (http://partners.adobe.com/asn/developer/PDFS/TN/TIFF6.pdf)
+ *
+ * For Big TIFF design notes see the following link
+ *    http://gdal.maptools.org/twiki/bin/view/libtiff/BigTIFFDesign
+ */
+#define	TIFF_VERSION	        42
+#define TIFF_BIGTIFF_VERSION    43
+
+#define	TIFF_BIGENDIAN		0x4d4d
+#define	TIFF_LITTLEENDIAN	0x4949
+#define	MDI_LITTLEENDIAN        0x5045
+#define	MDI_BIGENDIAN           0x4550
+/*
+ * Intrinsic data types required by the file format:
+ *
+ * 8-bit quantities	int8/uint8
+ * 16-bit quantities	int16/uint16
+ * 32-bit quantities	int32/uint32
+ * strings		unsigned char*
+ */
+
+#ifndef HAVE_INT8
+typedef	signed char int8;	/* NB: non-ANSI compilers may not grok */
+#endif
+typedef	unsigned char uint8;
+#ifndef HAVE_INT16
+typedef	short int16;
+#endif
+typedef	unsigned short uint16;	/* sizeof (uint16) must == 2 */
+#if SIZEOF_INT == 4
+#ifndef HAVE_INT32
+typedef	int int32;
+#endif
+typedef	unsigned int uint32;	/* sizeof (uint32) must == 4 */
+#elif SIZEOF_LONG == 4
+#ifndef HAVE_INT32
+typedef	long int32;
+#endif
+typedef	unsigned long uint32;	/* sizeof (uint32) must == 4 */
+#endif
+
+/* For TIFFReassignTagToIgnore */
+enum TIFFIgnoreSense /* IGNORE tag table */
+{
+	TIS_STORE,
+	TIS_EXTRACT,
+	TIS_EMPTY
+};
+
+/*
+ * TIFF header.
+ */
+typedef	struct {
+	uint16	tiff_magic;	/* magic number (defines byte order) */
+#define TIFF_MAGIC_SIZE		2
+	uint16	tiff_version;	/* TIFF version number */
+#define TIFF_VERSION_SIZE	2
+	uint32	tiff_diroff;	/* byte offset to first directory */
+#define TIFF_DIROFFSET_SIZE	4
+} TIFFHeader;
+
+
+/*
+ * TIFF Image File Directories are comprised of a table of field
+ * descriptors of the form shown below.  The table is sorted in
+ * ascending order by tag.  The values associated with each entry are
+ * disjoint and may appear anywhere in the file (so long as they are
+ * placed on a word boundary).
+ *
+ * If the value is 4 bytes or less, then it is placed in the offset
+ * field to save space.  If the value is less than 4 bytes, it is
+ * left-justified in the offset field.
+ */
+typedef	struct {
+	uint16		tdir_tag;	/* see below */
+	uint16		tdir_type;	/* data type; see below */
+	uint32		tdir_count;	/* number of items; length in spec */
+	uint32		tdir_offset;	/* byte offset to field data */
+} TIFFDirEntry;
+
+/*
+ * NB: In the comments below,
+ *  - items marked with a + are obsoleted by revision 5.0,
+ *  - items marked with a ! are introduced in revision 6.0.
+ *  - items marked with a % are introduced post revision 6.0.
+ *  - items marked with a $ are obsoleted by revision 6.0.
+ *  - items marked with a & are introduced by Adobe DNG specification.
+ */
+
+/*
+ * Tag data type information.
+ *
+ * Note: RATIONALs are the ratio of two 32-bit integer values.
+ */
+typedef	enum {
+	TIFF_NOTYPE	= 0,	/* placeholder */
+	TIFF_BYTE	= 1,	/* 8-bit unsigned integer */
+	TIFF_ASCII	= 2,	/* 8-bit bytes w/ last byte null */
+	TIFF_SHORT	= 3,	/* 16-bit unsigned integer */
+	TIFF_LONG	= 4,	/* 32-bit unsigned integer */
+	TIFF_RATIONAL	= 5,	/* 64-bit unsigned fraction */
+	TIFF_SBYTE	= 6,	/* !8-bit signed integer */
+	TIFF_UNDEFINED	= 7,	/* !8-bit untyped data */
+	TIFF_SSHORT	= 8,	/* !16-bit signed integer */
+	TIFF_SLONG	= 9,	/* !32-bit signed integer */
+	TIFF_SRATIONAL	= 10,	/* !64-bit signed fraction */
+	TIFF_FLOAT	= 11,	/* !32-bit IEEE floating point */
+	TIFF_DOUBLE	= 12,	/* !64-bit IEEE floating point */
+	TIFF_IFD	= 13	/* %32-bit unsigned integer (offset) */
+} TIFFDataType;
+
+/*
+ * TIFF Tag Definitions.
+ */
+#define	TIFFTAG_SUBFILETYPE		254	/* subfile data descriptor */
+#define	    FILETYPE_REDUCEDIMAGE	0x1	/* reduced resolution version */
+#define	    FILETYPE_PAGE		0x2	/* one page of many */
+#define	    FILETYPE_MASK		0x4	/* transparency mask */
+#define	TIFFTAG_OSUBFILETYPE		255	/* +kind of data in subfile */
+#define	    OFILETYPE_IMAGE		1	/* full resolution image data */
+#define	    OFILETYPE_REDUCEDIMAGE	2	/* reduced size image data */
+#define	    OFILETYPE_PAGE		3	/* one page of many */
+#define	TIFFTAG_IMAGEWIDTH		256	/* image width in pixels */
+#define	TIFFTAG_IMAGELENGTH		257	/* image height in pixels */
+#define	TIFFTAG_BITSPERSAMPLE		258	/* bits per channel (sample) */
+#define	TIFFTAG_COMPRESSION		259	/* data compression technique */
+#define	    COMPRESSION_NONE		1	/* dump mode */
+#define	    COMPRESSION_CCITTRLE	2	/* CCITT modified Huffman RLE */
+#define	    COMPRESSION_CCITTFAX3	3	/* CCITT Group 3 fax encoding */
+#define     COMPRESSION_CCITT_T4        3       /* CCITT T.4 (TIFF 6 name) */
+#define	    COMPRESSION_CCITTFAX4	4	/* CCITT Group 4 fax encoding */
+#define     COMPRESSION_CCITT_T6        4       /* CCITT T.6 (TIFF 6 name) */
+#define	    COMPRESSION_LZW		5       /* Lempel-Ziv  & Welch */
+#define	    COMPRESSION_OJPEG		6	/* !6.0 JPEG */
+#define	    COMPRESSION_JPEG		7	/* %JPEG DCT compression */
+#define	    COMPRESSION_NEXT		32766	/* NeXT 2-bit RLE */
+#define	    COMPRESSION_CCITTRLEW	32771	/* #1 w/ word alignment */
+#define	    COMPRESSION_PACKBITS	32773	/* Macintosh RLE */
+#define	    COMPRESSION_THUNDERSCAN	32809	/* ThunderScan RLE */
+/* codes 32895-32898 are reserved for ANSI IT8 TIFF/IT <dkelly@apago.com) */
+#define	    COMPRESSION_IT8CTPAD	32895   /* IT8 CT w/padding */
+#define	    COMPRESSION_IT8LW		32896   /* IT8 Linework RLE */
+#define	    COMPRESSION_IT8MP		32897   /* IT8 Monochrome picture */
+#define	    COMPRESSION_IT8BL		32898   /* IT8 Binary line art */
+/* compression codes 32908-32911 are reserved for Pixar */
+#define     COMPRESSION_PIXARFILM	32908   /* Pixar companded 10bit LZW */
+#define	    COMPRESSION_PIXARLOG	32909   /* Pixar companded 11bit ZIP */
+#define	    COMPRESSION_DEFLATE		32946	/* Deflate compression */
+#define     COMPRESSION_ADOBE_DEFLATE   8       /* Deflate compression,
+						   as recognized by Adobe */
+/* compression code 32947 is reserved for Oceana Matrix <dev@oceana.com> */
+#define     COMPRESSION_DCS             32947   /* Kodak DCS encoding */
+#define	    COMPRESSION_JBIG		34661	/* ISO JBIG */
+#define     COMPRESSION_SGILOG		34676	/* SGI Log Luminance RLE */
+#define     COMPRESSION_SGILOG24	34677	/* SGI Log 24-bit packed */
+#define     COMPRESSION_JP2000          34712   /* Leadtools JPEG2000 */
+#define	TIFFTAG_PHOTOMETRIC		262	/* photometric interpretation */
+#define	    PHOTOMETRIC_MINISWHITE	0	/* min value is white */
+#define	    PHOTOMETRIC_MINISBLACK	1	/* min value is black */
+#define	    PHOTOMETRIC_RGB		2	/* RGB color model */
+#define	    PHOTOMETRIC_PALETTE		3	/* color map indexed */
+#define	    PHOTOMETRIC_MASK		4	/* $holdout mask */
+#define	    PHOTOMETRIC_SEPARATED	5	/* !color separations */
+#define	    PHOTOMETRIC_YCBCR		6	/* !CCIR 601 */
+#define	    PHOTOMETRIC_CIELAB		8	/* !1976 CIE L*a*b* */
+#define	    PHOTOMETRIC_ICCLAB		9	/* ICC L*a*b* [Adobe TIFF Technote 4] */
+#define	    PHOTOMETRIC_ITULAB		10	/* ITU L*a*b* */
+#define     PHOTOMETRIC_LOGL		32844	/* CIE Log2(L) */
+#define     PHOTOMETRIC_LOGLUV		32845	/* CIE Log2(L) (u',v') */
+#define	TIFFTAG_THRESHHOLDING		263	/* +thresholding used on data */
+#define	    THRESHHOLD_BILEVEL		1	/* b&w art scan */
+#define	    THRESHHOLD_HALFTONE		2	/* or dithered scan */
+#define	    THRESHHOLD_ERRORDIFFUSE	3	/* usually floyd-steinberg */
+#define	TIFFTAG_CELLWIDTH		264	/* +dithering matrix width */
+#define	TIFFTAG_CELLLENGTH		265	/* +dithering matrix height */
+#define	TIFFTAG_FILLORDER		266	/* data order within a byte */
+#define	    FILLORDER_MSB2LSB		1	/* most significant -> least */
+#define	    FILLORDER_LSB2MSB		2	/* least significant -> most */
+#define	TIFFTAG_DOCUMENTNAME		269	/* name of doc. image is from */
+#define	TIFFTAG_IMAGEDESCRIPTION	270	/* info about image */
+#define	TIFFTAG_MAKE			271	/* scanner manufacturer name */
+#define	TIFFTAG_MODEL			272	/* scanner model name/number */
+#define	TIFFTAG_STRIPOFFSETS		273	/* offsets to data strips */
+#define	TIFFTAG_ORIENTATION		274	/* +image orientation */
+#define	    ORIENTATION_TOPLEFT		1	/* row 0 top, col 0 lhs */
+#define	    ORIENTATION_TOPRIGHT	2	/* row 0 top, col 0 rhs */
+#define	    ORIENTATION_BOTRIGHT	3	/* row 0 bottom, col 0 rhs */
+#define	    ORIENTATION_BOTLEFT		4	/* row 0 bottom, col 0 lhs */
+#define	    ORIENTATION_LEFTTOP		5	/* row 0 lhs, col 0 top */
+#define	    ORIENTATION_RIGHTTOP	6	/* row 0 rhs, col 0 top */
+#define	    ORIENTATION_RIGHTBOT	7	/* row 0 rhs, col 0 bottom */
+#define	    ORIENTATION_LEFTBOT		8	/* row 0 lhs, col 0 bottom */
+#define	TIFFTAG_SAMPLESPERPIXEL		277	/* samples per pixel */
+#define	TIFFTAG_ROWSPERSTRIP		278	/* rows per strip of data */
+#define	TIFFTAG_STRIPBYTECOUNTS		279	/* bytes counts for strips */
+#define	TIFFTAG_MINSAMPLEVALUE		280	/* +minimum sample value */
+#define	TIFFTAG_MAXSAMPLEVALUE		281	/* +maximum sample value */
+#define	TIFFTAG_XRESOLUTION		282	/* pixels/resolution in x */
+#define	TIFFTAG_YRESOLUTION		283	/* pixels/resolution in y */
+#define	TIFFTAG_PLANARCONFIG		284	/* storage organization */
+#define	    PLANARCONFIG_CONTIG		1	/* single image plane */
+#define	    PLANARCONFIG_SEPARATE	2	/* separate planes of data */
+#define	TIFFTAG_PAGENAME		285	/* page name image is from */
+#define	TIFFTAG_XPOSITION		286	/* x page offset of image lhs */
+#define	TIFFTAG_YPOSITION		287	/* y page offset of image lhs */
+#define	TIFFTAG_FREEOFFSETS		288	/* +byte offset to free block */
+#define	TIFFTAG_FREEBYTECOUNTS		289	/* +sizes of free blocks */
+#define	TIFFTAG_GRAYRESPONSEUNIT	290	/* $gray scale curve accuracy */
+#define	    GRAYRESPONSEUNIT_10S	1	/* tenths of a unit */
+#define	    GRAYRESPONSEUNIT_100S	2	/* hundredths of a unit */
+#define	    GRAYRESPONSEUNIT_1000S	3	/* thousandths of a unit */
+#define	    GRAYRESPONSEUNIT_10000S	4	/* ten-thousandths of a unit */
+#define	    GRAYRESPONSEUNIT_100000S	5	/* hundred-thousandths */
+#define	TIFFTAG_GRAYRESPONSECURVE	291	/* $gray scale response curve */
+#define	TIFFTAG_GROUP3OPTIONS		292	/* 32 flag bits */
+#define	TIFFTAG_T4OPTIONS		292	/* TIFF 6.0 proper name alias */
+#define	    GROUP3OPT_2DENCODING	0x1	/* 2-dimensional coding */
+#define	    GROUP3OPT_UNCOMPRESSED	0x2	/* data not compressed */
+#define	    GROUP3OPT_FILLBITS		0x4	/* fill to byte boundary */
+#define	TIFFTAG_GROUP4OPTIONS		293	/* 32 flag bits */
+#define TIFFTAG_T6OPTIONS               293     /* TIFF 6.0 proper name */
+#define	    GROUP4OPT_UNCOMPRESSED	0x2	/* data not compressed */
+#define	TIFFTAG_RESOLUTIONUNIT		296	/* units of resolutions */
+#define	    RESUNIT_NONE		1	/* no meaningful units */
+#define	    RESUNIT_INCH		2	/* english */
+#define	    RESUNIT_CENTIMETER		3	/* metric */
+#define	TIFFTAG_PAGENUMBER		297	/* page numbers of multi-page */
+#define	TIFFTAG_COLORRESPONSEUNIT	300	/* $color curve accuracy */
+#define	    COLORRESPONSEUNIT_10S	1	/* tenths of a unit */
+#define	    COLORRESPONSEUNIT_100S	2	/* hundredths of a unit */
+#define	    COLORRESPONSEUNIT_1000S	3	/* thousandths of a unit */
+#define	    COLORRESPONSEUNIT_10000S	4	/* ten-thousandths of a unit */
+#define	    COLORRESPONSEUNIT_100000S	5	/* hundred-thousandths */
+#define	TIFFTAG_TRANSFERFUNCTION	301	/* !colorimetry info */
+#define	TIFFTAG_SOFTWARE		305	/* name & release */
+#define	TIFFTAG_DATETIME		306	/* creation date and time */
+#define	TIFFTAG_ARTIST			315	/* creator of image */
+#define	TIFFTAG_HOSTCOMPUTER		316	/* machine where created */
+#define	TIFFTAG_PREDICTOR		317	/* prediction scheme w/ LZW */
+#define     PREDICTOR_NONE		1	/* no prediction scheme used */
+#define     PREDICTOR_HORIZONTAL	2	/* horizontal differencing */
+#define     PREDICTOR_FLOATINGPOINT	3	/* floating point predictor */
+#define	TIFFTAG_WHITEPOINT		318	/* image white point */
+#define	TIFFTAG_PRIMARYCHROMATICITIES	319	/* !primary chromaticities */
+#define	TIFFTAG_COLORMAP		320	/* RGB map for pallette image */
+#define	TIFFTAG_HALFTONEHINTS		321	/* !highlight+shadow info */
+#define	TIFFTAG_TILEWIDTH		322	/* !tile width in pixels */
+#define	TIFFTAG_TILELENGTH		323	/* !tile height in pixels */
+#define TIFFTAG_TILEOFFSETS		324	/* !offsets to data tiles */
+#define TIFFTAG_TILEBYTECOUNTS		325	/* !byte counts for tiles */
+#define	TIFFTAG_BADFAXLINES		326	/* lines w/ wrong pixel count */
+#define	TIFFTAG_CLEANFAXDATA		327	/* regenerated line info */
+#define	    CLEANFAXDATA_CLEAN		0	/* no errors detected */
+#define	    CLEANFAXDATA_REGENERATED	1	/* receiver regenerated lines */
+#define	    CLEANFAXDATA_UNCLEAN	2	/* uncorrected errors exist */
+#define	TIFFTAG_CONSECUTIVEBADFAXLINES	328	/* max consecutive bad lines */
+#define	TIFFTAG_SUBIFD			330	/* subimage descriptors */
+#define	TIFFTAG_INKSET			332	/* !inks in separated image */
+#define	    INKSET_CMYK			1	/* !cyan-magenta-yellow-black color */
+#define	    INKSET_MULTIINK		2	/* !multi-ink or hi-fi color */
+#define	TIFFTAG_INKNAMES		333	/* !ascii names of inks */
+#define	TIFFTAG_NUMBEROFINKS		334	/* !number of inks */
+#define	TIFFTAG_DOTRANGE		336	/* !0% and 100% dot codes */
+#define	TIFFTAG_TARGETPRINTER		337	/* !separation target */
+#define	TIFFTAG_EXTRASAMPLES		338	/* !info about extra samples */
+#define	    EXTRASAMPLE_UNSPECIFIED	0	/* !unspecified data */
+#define	    EXTRASAMPLE_ASSOCALPHA	1	/* !associated alpha data */
+#define	    EXTRASAMPLE_UNASSALPHA	2	/* !unassociated alpha data */
+#define	TIFFTAG_SAMPLEFORMAT		339	/* !data sample format */
+#define	    SAMPLEFORMAT_UINT		1	/* !unsigned integer data */
+#define	    SAMPLEFORMAT_INT		2	/* !signed integer data */
+#define	    SAMPLEFORMAT_IEEEFP		3	/* !IEEE floating point data */
+#define	    SAMPLEFORMAT_VOID		4	/* !untyped data */
+#define	    SAMPLEFORMAT_COMPLEXINT	5	/* !complex signed int */
+#define	    SAMPLEFORMAT_COMPLEXIEEEFP	6	/* !complex ieee floating */
+#define	TIFFTAG_SMINSAMPLEVALUE		340	/* !variable MinSampleValue */
+#define	TIFFTAG_SMAXSAMPLEVALUE		341	/* !variable MaxSampleValue */
+#define	TIFFTAG_CLIPPATH		343	/* %ClipPath
+						   [Adobe TIFF technote 2] */
+#define	TIFFTAG_XCLIPPATHUNITS		344	/* %XClipPathUnits
+						   [Adobe TIFF technote 2] */
+#define	TIFFTAG_YCLIPPATHUNITS		345	/* %YClipPathUnits
+						   [Adobe TIFF technote 2] */
+#define	TIFFTAG_INDEXED			346	/* %Indexed
+						   [Adobe TIFF Technote 3] */
+#define	TIFFTAG_JPEGTABLES		347	/* %JPEG table stream */
+#define	TIFFTAG_OPIPROXY		351	/* %OPI Proxy [Adobe TIFF technote] */
+/*
+ * Tags 512-521 are obsoleted by Technical Note #2 which specifies a
+ * revised JPEG-in-TIFF scheme.
+ */
+#define	TIFFTAG_JPEGPROC		512	/* !JPEG processing algorithm */
+#define	    JPEGPROC_BASELINE		1	/* !baseline sequential */
+#define	    JPEGPROC_LOSSLESS		14	/* !Huffman coded lossless */
+#define	TIFFTAG_JPEGIFOFFSET		513	/* !pointer to SOI marker */
+#define	TIFFTAG_JPEGIFBYTECOUNT		514	/* !JFIF stream length */
+#define	TIFFTAG_JPEGRESTARTINTERVAL	515	/* !restart interval length */
+#define	TIFFTAG_JPEGLOSSLESSPREDICTORS	517	/* !lossless proc predictor */
+#define	TIFFTAG_JPEGPOINTTRANSFORM	518	/* !lossless point transform */
+#define	TIFFTAG_JPEGQTABLES		519	/* !Q matrice offsets */
+#define	TIFFTAG_JPEGDCTABLES		520	/* !DCT table offsets */
+#define	TIFFTAG_JPEGACTABLES		521	/* !AC coefficient offsets */
+#define	TIFFTAG_YCBCRCOEFFICIENTS	529	/* !RGB -> YCbCr transform */
+#define	TIFFTAG_YCBCRSUBSAMPLING	530	/* !YCbCr subsampling factors */
+#define	TIFFTAG_YCBCRPOSITIONING	531	/* !subsample positioning */
+#define	    YCBCRPOSITION_CENTERED	1	/* !as in PostScript Level 2 */
+#define	    YCBCRPOSITION_COSITED	2	/* !as in CCIR 601-1 */
+#define	TIFFTAG_REFERENCEBLACKWHITE	532	/* !colorimetry info */
+#define	TIFFTAG_XMLPACKET		700	/* %XML packet
+						   [Adobe XMP Specification,
+						   January 2004 */
+#define TIFFTAG_OPIIMAGEID		32781	/* %OPI ImageID
+						   [Adobe TIFF technote] */
+/* tags 32952-32956 are private tags registered to Island Graphics */
+#define TIFFTAG_REFPTS			32953	/* image reference points */
+#define TIFFTAG_REGIONTACKPOINT		32954	/* region-xform tack point */
+#define TIFFTAG_REGIONWARPCORNERS	32955	/* warp quadrilateral */
+#define TIFFTAG_REGIONAFFINE		32956	/* affine transformation mat */
+/* tags 32995-32999 are private tags registered to SGI */
+#define	TIFFTAG_MATTEING		32995	/* $use ExtraSamples */
+#define	TIFFTAG_DATATYPE		32996	/* $use SampleFormat */
+#define	TIFFTAG_IMAGEDEPTH		32997	/* z depth of image */
+#define	TIFFTAG_TILEDEPTH		32998	/* z depth/data tile */
+/* tags 33300-33309 are private tags registered to Pixar */
+/*
+ * TIFFTAG_PIXAR_IMAGEFULLWIDTH and TIFFTAG_PIXAR_IMAGEFULLLENGTH
+ * are set when an image has been cropped out of a larger image.  
+ * They reflect the size of the original uncropped image.
+ * The TIFFTAG_XPOSITION and TIFFTAG_YPOSITION can be used
+ * to determine the position of the smaller image in the larger one.
+ */
+#define TIFFTAG_PIXAR_IMAGEFULLWIDTH    33300   /* full image size in x */
+#define TIFFTAG_PIXAR_IMAGEFULLLENGTH   33301   /* full image size in y */
+ /* Tags 33302-33306 are used to identify special image modes and data
+  * used by Pixar's texture formats.
+  */
+#define TIFFTAG_PIXAR_TEXTUREFORMAT	33302	/* texture map format */
+#define TIFFTAG_PIXAR_WRAPMODES		33303	/* s & t wrap modes */
+#define TIFFTAG_PIXAR_FOVCOT		33304	/* cotan(fov) for env. maps */
+#define TIFFTAG_PIXAR_MATRIX_WORLDTOSCREEN 33305
+#define TIFFTAG_PIXAR_MATRIX_WORLDTOCAMERA 33306
+/* tag 33405 is a private tag registered to Eastman Kodak */
+#define TIFFTAG_WRITERSERIALNUMBER      33405   /* device serial number */
+/* tag 33432 is listed in the 6.0 spec w/ unknown ownership */
+#define	TIFFTAG_COPYRIGHT		33432	/* copyright string */
+/* IPTC TAG from RichTIFF specifications */
+#define TIFFTAG_RICHTIFFIPTC		33723
+/* 34016-34029 are reserved for ANSI IT8 TIFF/IT <dkelly@apago.com) */
+#define TIFFTAG_IT8SITE			34016	/* site name */
+#define TIFFTAG_IT8COLORSEQUENCE	34017	/* color seq. [RGB,CMYK,etc] */
+#define TIFFTAG_IT8HEADER		34018	/* DDES Header */
+#define TIFFTAG_IT8RASTERPADDING	34019	/* raster scanline padding */
+#define TIFFTAG_IT8BITSPERRUNLENGTH	34020	/* # of bits in short run */
+#define TIFFTAG_IT8BITSPEREXTENDEDRUNLENGTH 34021/* # of bits in long run */
+#define TIFFTAG_IT8COLORTABLE		34022	/* LW colortable */
+#define TIFFTAG_IT8IMAGECOLORINDICATOR	34023	/* BP/BL image color switch */
+#define TIFFTAG_IT8BKGCOLORINDICATOR	34024	/* BP/BL bg color switch */
+#define TIFFTAG_IT8IMAGECOLORVALUE	34025	/* BP/BL image color value */
+#define TIFFTAG_IT8BKGCOLORVALUE	34026	/* BP/BL bg color value */
+#define TIFFTAG_IT8PIXELINTENSITYRANGE	34027	/* MP pixel intensity value */
+#define TIFFTAG_IT8TRANSPARENCYINDICATOR 34028	/* HC transparency switch */
+#define TIFFTAG_IT8COLORCHARACTERIZATION 34029	/* color character. table */
+#define TIFFTAG_IT8HCUSAGE		34030	/* HC usage indicator */
+#define TIFFTAG_IT8TRAPINDICATOR	34031	/* Trapping indicator
+						   (untrapped=0, trapped=1) */
+#define TIFFTAG_IT8CMYKEQUIVALENT	34032	/* CMYK color equivalents */
+/* tags 34232-34236 are private tags registered to Texas Instruments */
+#define TIFFTAG_FRAMECOUNT              34232   /* Sequence Frame Count */
+/* tag 34377 is private tag registered to Adobe for PhotoShop */
+#define TIFFTAG_PHOTOSHOP		34377 
+/* tags 34665, 34853 and 40965 are documented in EXIF specification */
+#define TIFFTAG_EXIFIFD			34665	/* Pointer to EXIF private directory */
+/* tag 34750 is a private tag registered to Adobe? */
+#define TIFFTAG_ICCPROFILE		34675	/* ICC profile data */
+/* tag 34750 is a private tag registered to Pixel Magic */
+#define	TIFFTAG_JBIGOPTIONS		34750	/* JBIG options */
+#define TIFFTAG_GPSIFD			34853	/* Pointer to GPS private directory */
+/* tags 34908-34914 are private tags registered to SGI */
+#define	TIFFTAG_FAXRECVPARAMS		34908	/* encoded Class 2 ses. parms */
+#define	TIFFTAG_FAXSUBADDRESS		34909	/* received SubAddr string */
+#define	TIFFTAG_FAXRECVTIME		34910	/* receive time (secs) */
+#define	TIFFTAG_FAXDCS			34911	/* encoded fax ses. params, Table 2/T.30 */
+/* tags 37439-37443 are registered to SGI <gregl@sgi.com> */
+#define TIFFTAG_STONITS			37439	/* Sample value to Nits */
+/* tag 34929 is a private tag registered to FedEx */
+#define	TIFFTAG_FEDEX_EDR		34929	/* unknown use */
+#define TIFFTAG_INTEROPERABILITYIFD	40965	/* Pointer to Interoperability private directory */
+/* Adobe Digital Negative (DNG) format tags */
+#define TIFFTAG_DNGVERSION		50706	/* &DNG version number */
+#define TIFFTAG_DNGBACKWARDVERSION	50707	/* &DNG compatibility version */
+#define TIFFTAG_UNIQUECAMERAMODEL	50708	/* &name for the camera model */
+#define TIFFTAG_LOCALIZEDCAMERAMODEL	50709	/* &localized camera model
+						   name */
+#define TIFFTAG_CFAPLANECOLOR		50710	/* &CFAPattern->LinearRaw space
+						   mapping */
+#define TIFFTAG_CFALAYOUT		50711	/* &spatial layout of the CFA */
+#define TIFFTAG_LINEARIZATIONTABLE	50712	/* &lookup table description */
+#define TIFFTAG_BLACKLEVELREPEATDIM	50713	/* &repeat pattern size for
+						   the BlackLevel tag */
+#define TIFFTAG_BLACKLEVEL		50714	/* &zero light encoding level */
+#define TIFFTAG_BLACKLEVELDELTAH	50715	/* &zero light encoding level
+						   differences (columns) */
+#define TIFFTAG_BLACKLEVELDELTAV	50716	/* &zero light encoding level
+						   differences (rows) */
+#define TIFFTAG_WHITELEVEL		50717	/* &fully saturated encoding
+						   level */
+#define TIFFTAG_DEFAULTSCALE		50718	/* &default scale factors */
+#define TIFFTAG_DEFAULTCROPORIGIN	50719	/* &origin of the final image
+						   area */
+#define TIFFTAG_DEFAULTCROPSIZE		50720	/* &size of the final image 
+						   area */
+#define TIFFTAG_COLORMATRIX1		50721	/* &XYZ->reference color space
+						   transformation matrix 1 */
+#define TIFFTAG_COLORMATRIX2		50722	/* &XYZ->reference color space
+						   transformation matrix 2 */
+#define TIFFTAG_CAMERACALIBRATION1	50723	/* &calibration matrix 1 */
+#define TIFFTAG_CAMERACALIBRATION2	50724	/* &calibration matrix 2 */
+#define TIFFTAG_REDUCTIONMATRIX1	50725	/* &dimensionality reduction
+						   matrix 1 */
+#define TIFFTAG_REDUCTIONMATRIX2	50726	/* &dimensionality reduction
+						   matrix 2 */
+#define TIFFTAG_ANALOGBALANCE		50727	/* &gain applied the stored raw
+						   values*/
+#define TIFFTAG_ASSHOTNEUTRAL		50728	/* &selected white balance in
+						   linear reference space */
+#define TIFFTAG_ASSHOTWHITEXY		50729	/* &selected white balance in
+						   x-y chromaticity
+						   coordinates */
+#define TIFFTAG_BASELINEEXPOSURE	50730	/* &how much to move the zero
+						   point */
+#define TIFFTAG_BASELINENOISE		50731	/* &relative noise level */
+#define TIFFTAG_BASELINESHARPNESS	50732	/* &relative amount of
+						   sharpening */
+#define TIFFTAG_BAYERGREENSPLIT		50733	/* &how closely the values of
+						   the green pixels in the
+						   blue/green rows track the
+						   values of the green pixels
+						   in the red/green rows */
+#define TIFFTAG_LINEARRESPONSELIMIT	50734	/* &non-linear encoding range */
+#define TIFFTAG_CAMERASERIALNUMBER	50735	/* &camera's serial number */
+#define TIFFTAG_LENSINFO		50736	/* info about the lens */
+#define TIFFTAG_CHROMABLURRADIUS	50737	/* &chroma blur radius */
+#define TIFFTAG_ANTIALIASSTRENGTH	50738	/* &relative strength of the
+						   camera's anti-alias filter */
+#define TIFFTAG_SHADOWSCALE		50739	/* &used by Adobe Camera Raw */
+#define TIFFTAG_DNGPRIVATEDATA		50740	/* &manufacturer's private data */
+#define TIFFTAG_MAKERNOTESAFETY		50741	/* &whether the EXIF MakerNote
+						   tag is safe to preserve
+						   along with the rest of the
+						   EXIF data */
+#define	TIFFTAG_CALIBRATIONILLUMINANT1	50778	/* &illuminant 1 */
+#define TIFFTAG_CALIBRATIONILLUMINANT2	50779	/* &illuminant 2 */
+#define TIFFTAG_BESTQUALITYSCALE	50780	/* &best quality multiplier */
+#define TIFFTAG_RAWDATAUNIQUEID		50781	/* &unique identifier for
+						   the raw image data */
+#define TIFFTAG_ORIGINALRAWFILENAME	50827	/* &file name of the original
+						   raw file */
+#define TIFFTAG_ORIGINALRAWFILEDATA	50828	/* &contents of the original
+						   raw file */
+#define TIFFTAG_ACTIVEAREA		50829	/* &active (non-masked) pixels
+						   of the sensor */
+#define TIFFTAG_MASKEDAREAS		50830	/* &list of coordinates
+						   of fully masked pixels */
+#define TIFFTAG_ASSHOTICCPROFILE	50831	/* &these two tags used to */
+#define TIFFTAG_ASSHOTPREPROFILEMATRIX	50832	/* map cameras's color space
+						   into ICC profile space */
+#define TIFFTAG_CURRENTICCPROFILE	50833	/* & */
+#define TIFFTAG_CURRENTPREPROFILEMATRIX	50834	/* & */
+/* tag 65535 is an undefined tag used by Eastman Kodak */
+#define TIFFTAG_DCSHUESHIFTVALUES       65535   /* hue shift correction data */
+
+/*
+ * The following are ``pseudo tags'' that can be used to control
+ * codec-specific functionality.  These tags are not written to file.
+ * Note that these values start at 0xffff+1 so that they'll never
+ * collide with Aldus-assigned tags.
+ *
+ * If you want your private pseudo tags ``registered'' (i.e. added to
+ * this file), please post a bug report via the tracking system at
+ * http://www.remotesensing.org/libtiff/bugs.html with the appropriate
+ * C definitions to add.
+ */
+#define	TIFFTAG_FAXMODE			65536	/* Group 3/4 format control */
+#define	    FAXMODE_CLASSIC	0x0000		/* default, include RTC */
+#define	    FAXMODE_NORTC	0x0001		/* no RTC at end of data */
+#define	    FAXMODE_NOEOL	0x0002		/* no EOL code at end of row */
+#define	    FAXMODE_BYTEALIGN	0x0004		/* byte align row */
+#define	    FAXMODE_WORDALIGN	0x0008		/* word align row */
+#define	    FAXMODE_CLASSF	FAXMODE_NORTC	/* TIFF Class F */
+#define	TIFFTAG_JPEGQUALITY		65537	/* Compression quality level */
+/* Note: quality level is on the IJG 0-100 scale.  Default value is 75 */
+#define	TIFFTAG_JPEGCOLORMODE		65538	/* Auto RGB<=>YCbCr convert? */
+#define	    JPEGCOLORMODE_RAW	0x0000		/* no conversion (default) */
+#define	    JPEGCOLORMODE_RGB	0x0001		/* do auto conversion */
+#define	TIFFTAG_JPEGTABLESMODE		65539	/* What to put in JPEGTables */
+#define	    JPEGTABLESMODE_QUANT 0x0001		/* include quantization tbls */
+#define	    JPEGTABLESMODE_HUFF	0x0002		/* include Huffman tbls */
+/* Note: default is JPEGTABLESMODE_QUANT | JPEGTABLESMODE_HUFF */
+#define	TIFFTAG_FAXFILLFUNC		65540	/* G3/G4 fill function */
+#define	TIFFTAG_PIXARLOGDATAFMT		65549	/* PixarLogCodec I/O data sz */
+#define	    PIXARLOGDATAFMT_8BIT	0	/* regular u_char samples */
+#define	    PIXARLOGDATAFMT_8BITABGR	1	/* ABGR-order u_chars */
+#define	    PIXARLOGDATAFMT_11BITLOG	2	/* 11-bit log-encoded (raw) */
+#define	    PIXARLOGDATAFMT_12BITPICIO	3	/* as per PICIO (1.0==2048) */
+#define	    PIXARLOGDATAFMT_16BIT	4	/* signed short samples */
+#define	    PIXARLOGDATAFMT_FLOAT	5	/* IEEE float samples */
+/* 65550-65556 are allocated to Oceana Matrix <dev@oceana.com> */
+#define TIFFTAG_DCSIMAGERTYPE           65550   /* imager model & filter */
+#define     DCSIMAGERMODEL_M3           0       /* M3 chip (1280 x 1024) */
+#define     DCSIMAGERMODEL_M5           1       /* M5 chip (1536 x 1024) */
+#define     DCSIMAGERMODEL_M6           2       /* M6 chip (3072 x 2048) */
+#define     DCSIMAGERFILTER_IR          0       /* infrared filter */
+#define     DCSIMAGERFILTER_MONO        1       /* monochrome filter */
+#define     DCSIMAGERFILTER_CFA         2       /* color filter array */
+#define     DCSIMAGERFILTER_OTHER       3       /* other filter */
+#define TIFFTAG_DCSINTERPMODE           65551   /* interpolation mode */
+#define     DCSINTERPMODE_NORMAL        0x0     /* whole image, default */
+#define     DCSINTERPMODE_PREVIEW       0x1     /* preview of image (384x256) */
+#define TIFFTAG_DCSBALANCEARRAY         65552   /* color balance values */
+#define TIFFTAG_DCSCORRECTMATRIX        65553   /* color correction values */
+#define TIFFTAG_DCSGAMMA                65554   /* gamma value */
+#define TIFFTAG_DCSTOESHOULDERPTS       65555   /* toe & shoulder points */
+#define TIFFTAG_DCSCALIBRATIONFD        65556   /* calibration file desc */
+/* Note: quality level is on the ZLIB 1-9 scale. Default value is -1 */
+#define	TIFFTAG_ZIPQUALITY		65557	/* compression quality level */
+#define	TIFFTAG_PIXARLOGQUALITY		65558	/* PixarLog uses same scale */
+/* 65559 is allocated to Oceana Matrix <dev@oceana.com> */
+#define TIFFTAG_DCSCLIPRECTANGLE	65559	/* area of image to acquire */
+#define TIFFTAG_SGILOGDATAFMT		65560	/* SGILog user data format */
+#define     SGILOGDATAFMT_FLOAT		0	/* IEEE float samples */
+#define     SGILOGDATAFMT_16BIT		1	/* 16-bit samples */
+#define     SGILOGDATAFMT_RAW		2	/* uninterpreted data */
+#define     SGILOGDATAFMT_8BIT		3	/* 8-bit RGB monitor values */
+#define TIFFTAG_SGILOGENCODE		65561 /* SGILog data encoding control*/
+#define     SGILOGENCODE_NODITHER	0     /* do not dither encoded values*/
+#define     SGILOGENCODE_RANDITHER	1     /* randomly dither encd values */
+
+/*
+ * EXIF tags
+ */
+#define EXIFTAG_EXPOSURETIME		33434	/* Exposure time */
+#define EXIFTAG_FNUMBER			33437	/* F number */
+#define EXIFTAG_EXPOSUREPROGRAM		34850	/* Exposure program */
+#define EXIFTAG_SPECTRALSENSITIVITY	34852	/* Spectral sensitivity */
+#define EXIFTAG_ISOSPEEDRATINGS		34855	/* ISO speed rating */
+#define EXIFTAG_OECF			34856	/* Optoelectric conversion
+						   factor */
+#define EXIFTAG_EXIFVERSION		36864	/* Exif version */
+#define EXIFTAG_DATETIMEORIGINAL	36867	/* Date and time of original
+						   data generation */
+#define EXIFTAG_DATETIMEDIGITIZED	36868	/* Date and time of digital
+						   data generation */
+#define EXIFTAG_COMPONENTSCONFIGURATION	37121	/* Meaning of each component */
+#define EXIFTAG_COMPRESSEDBITSPERPIXEL	37122	/* Image compression mode */
+#define EXIFTAG_SHUTTERSPEEDVALUE	37377	/* Shutter speed */
+#define EXIFTAG_APERTUREVALUE		37378	/* Aperture */
+#define EXIFTAG_BRIGHTNESSVALUE		37379	/* Brightness */
+#define EXIFTAG_EXPOSUREBIASVALUE	37380	/* Exposure bias */
+#define EXIFTAG_MAXAPERTUREVALUE	37381	/* Maximum lens aperture */
+#define EXIFTAG_SUBJECTDISTANCE		37382	/* Subject distance */
+#define EXIFTAG_METERINGMODE		37383	/* Metering mode */
+#define EXIFTAG_LIGHTSOURCE		37384	/* Light source */
+#define EXIFTAG_FLASH			37385	/* Flash */
+#define EXIFTAG_FOCALLENGTH		37386	/* Lens focal length */
+#define EXIFTAG_SUBJECTAREA		37396	/* Subject area */
+#define EXIFTAG_MAKERNOTE		37500	/* Manufacturer notes */
+#define EXIFTAG_USERCOMMENT		37510	/* User comments */
+#define EXIFTAG_SUBSECTIME		37520	/* DateTime subseconds */
+#define EXIFTAG_SUBSECTIMEORIGINAL	37521	/* DateTimeOriginal subseconds */
+#define EXIFTAG_SUBSECTIMEDIGITIZED	37522	/* DateTimeDigitized subseconds */
+#define EXIFTAG_FLASHPIXVERSION		40960	/* Supported Flashpix version */
+#define EXIFTAG_COLORSPACE		40961	/* Color space information */
+#define EXIFTAG_PIXELXDIMENSION		40962	/* Valid image width */
+#define EXIFTAG_PIXELYDIMENSION		40963	/* Valid image height */
+#define EXIFTAG_RELATEDSOUNDFILE	40964	/* Related audio file */
+#define EXIFTAG_FLASHENERGY		41483	/* Flash energy */
+#define EXIFTAG_SPATIALFREQUENCYRESPONSE 41484	/* Spatial frequency response */
+#define EXIFTAG_FOCALPLANEXRESOLUTION	41486	/* Focal plane X resolution */
+#define EXIFTAG_FOCALPLANEYRESOLUTION	41487	/* Focal plane Y resolution */
+#define EXIFTAG_FOCALPLANERESOLUTIONUNIT 41488	/* Focal plane resolution unit */
+#define EXIFTAG_SUBJECTLOCATION		41492	/* Subject location */
+#define EXIFTAG_EXPOSUREINDEX		41493	/* Exposure index */
+#define EXIFTAG_SENSINGMETHOD		41495	/* Sensing method */
+#define EXIFTAG_FILESOURCE		41728	/* File source */
+#define EXIFTAG_SCENETYPE		41729	/* Scene type */
+#define EXIFTAG_CFAPATTERN		41730	/* CFA pattern */
+#define EXIFTAG_CUSTOMRENDERED		41985	/* Custom image processing */
+#define EXIFTAG_EXPOSUREMODE		41986	/* Exposure mode */
+#define EXIFTAG_WHITEBALANCE		41987	/* White balance */
+#define EXIFTAG_DIGITALZOOMRATIO	41988	/* Digital zoom ratio */
+#define EXIFTAG_FOCALLENGTHIN35MMFILM	41989	/* Focal length in 35 mm film */
+#define EXIFTAG_SCENECAPTURETYPE	41990	/* Scene capture type */
+#define EXIFTAG_GAINCONTROL		41991	/* Gain control */
+#define EXIFTAG_CONTRAST		41992	/* Contrast */
+#define EXIFTAG_SATURATION		41993	/* Saturation */
+#define EXIFTAG_SHARPNESS		41994	/* Sharpness */
+#define EXIFTAG_DEVICESETTINGDESCRIPTION 41995	/* Device settings description */
+#define EXIFTAG_SUBJECTDISTANCERANGE	41996	/* Subject distance range */
+#define EXIFTAG_GAINCONTROL		41991	/* Gain control */
+#define EXIFTAG_GAINCONTROL		41991	/* Gain control */
+#define EXIFTAG_IMAGEUNIQUEID		42016	/* Unique image ID */
+
+#endif /* _TIFF_ */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tiffio.h b/src/libtiff/tiffio.h
new file mode 100644
index 0000000..3a5eebd
--- /dev/null
+++ b/src/libtiff/tiffio.h
@@ -0,0 +1,515 @@
+/* $Id: tiffio.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _TIFFIO_
+#define	_TIFFIO_
+
+/*
+ * TIFF I/O Library Definitions.
+ */
+#include "tiff.h"
+#include "tiffvers.h"
+
+/*
+ * TIFF is defined as an incomplete type to hide the
+ * library's internal data structures from clients.
+ */
+typedef	struct tiff TIFF;
+
+/*
+ * The following typedefs define the intrinsic size of
+ * data types used in the *exported* interfaces.  These
+ * definitions depend on the proper definition of types
+ * in tiff.h.  Note also that the varargs interface used
+ * to pass tag types and values uses the types defined in
+ * tiff.h directly.
+ *
+ * NB: ttag_t is unsigned int and not unsigned short because
+ *     ANSI C requires that the type before the ellipsis be a
+ *     promoted type (i.e. one of int, unsigned int, pointer,
+ *     or double) and because we defined pseudo-tags that are
+ *     outside the range of legal Aldus-assigned tags.
+ * NB: tsize_t is int32 and not uint32 because some functions
+ *     return -1.
+ * NB: toff_t is not off_t for many reasons; TIFFs max out at
+ *     32-bit file offsets being the most important, and to ensure
+ *     that it is unsigned, rather than signed.
+ */
+typedef	uint32 ttag_t;		/* directory tag */
+typedef	uint16 tdir_t;		/* directory index */
+typedef	uint16 tsample_t;	/* sample number */
+typedef	uint32 tstrip_t;	/* strip number */
+typedef uint32 ttile_t;		/* tile number */
+typedef	int32 tsize_t;		/* i/o size in bytes */
+typedef	void* tdata_t;		/* image data ref */
+typedef	uint32 toff_t;		/* file offset */
+
+#if !defined(__WIN32__) && (defined(_WIN32) || defined(WIN32))
+#define __WIN32__
+#endif
+
+/*
+ * On windows you should define USE_WIN32_FILEIO if you are using tif_win32.c
+ * or AVOID_WIN32_FILEIO if you are using something else (like tif_unix.c).
+ *
+ * By default tif_unix.c is assumed.
+ */
+
+#if defined(_WINDOWS) || defined(__WIN32__) || defined(_Windows)
+#  if !defined(__CYGWIN) && !defined(AVOID_WIN32_FILEIO) && !defined(USE_WIN32_FILEIO)
+#    define AVOID_WIN32_FILEIO
+#  endif
+#endif
+
+#if defined(USE_WIN32_FILEIO)
+# define VC_EXTRALEAN
+# include <windows.h>
+# ifdef __WIN32__
+DECLARE_HANDLE(thandle_t);	/* Win32 file handle */
+# else
+typedef	HFILE thandle_t;	/* client data handle */
+# endif /* __WIN32__ */
+#else
+typedef	void* thandle_t;	/* client data handle */
+#endif /* USE_WIN32_FILEIO */
+
+#ifndef NULL
+# define NULL	(void *)0
+#endif
+
+/*
+ * Flags to pass to TIFFPrintDirectory to control
+ * printing of data structures that are potentially
+ * very large.   Bit-or these flags to enable printing
+ * multiple items.
+ */
+#define	TIFFPRINT_NONE		0x0		/* no extra info */
+#define	TIFFPRINT_STRIPS	0x1		/* strips/tiles info */
+#define	TIFFPRINT_CURVES	0x2		/* color/gray response curves */
+#define	TIFFPRINT_COLORMAP	0x4		/* colormap */
+#define	TIFFPRINT_JPEGQTABLES	0x100		/* JPEG Q matrices */
+#define	TIFFPRINT_JPEGACTABLES	0x200		/* JPEG AC tables */
+#define	TIFFPRINT_JPEGDCTABLES	0x200		/* JPEG DC tables */
+
+/* 
+ * Colour conversion stuff
+ */
+
+/* reference white */
+#define D65_X0 (95.0470F)
+#define D65_Y0 (100.0F)
+#define D65_Z0 (108.8827F)
+
+#define D50_X0 (96.4250F)
+#define D50_Y0 (100.0F)
+#define D50_Z0 (82.4680F)
+
+/* Structure for holding information about a display device. */
+
+typedef	unsigned char TIFFRGBValue;		/* 8-bit samples */
+
+typedef struct {
+	float d_mat[3][3]; 		/* XYZ -> luminance matrix */
+	float d_YCR;			/* Light o/p for reference white */
+	float d_YCG;
+	float d_YCB;
+	uint32 d_Vrwr;			/* Pixel values for ref. white */
+	uint32 d_Vrwg;
+	uint32 d_Vrwb;
+	float d_Y0R;			/* Residual light for black pixel */
+	float d_Y0G;
+	float d_Y0B;
+	float d_gammaR;			/* Gamma values for the three guns */
+	float d_gammaG;
+	float d_gammaB;
+} TIFFDisplay;
+
+typedef struct {				/* YCbCr->RGB support */
+	TIFFRGBValue* clamptab;			/* range clamping table */
+	int*	Cr_r_tab;
+	int*	Cb_b_tab;
+	int32*	Cr_g_tab;
+	int32*	Cb_g_tab;
+        int32*  Y_tab;
+} TIFFYCbCrToRGB;
+
+typedef struct {				/* CIE Lab 1976->RGB support */
+	int	range;				/* Size of conversion table */
+#define CIELABTORGB_TABLE_RANGE 1500
+	float	rstep, gstep, bstep;
+	float	X0, Y0, Z0;			/* Reference white point */
+	TIFFDisplay display;
+	float	Yr2r[CIELABTORGB_TABLE_RANGE + 1];  /* Conversion of Yr to r */
+	float	Yg2g[CIELABTORGB_TABLE_RANGE + 1];  /* Conversion of Yg to g */
+	float	Yb2b[CIELABTORGB_TABLE_RANGE + 1];  /* Conversion of Yb to b */
+} TIFFCIELabToRGB;
+
+/*
+ * RGBA-style image support.
+ */
+typedef struct _TIFFRGBAImage TIFFRGBAImage;
+/*
+ * The image reading and conversion routines invoke
+ * ``put routines'' to copy/image/whatever tiles of
+ * raw image data.  A default set of routines are 
+ * provided to convert/copy raw image data to 8-bit
+ * packed ABGR format rasters.  Applications can supply
+ * alternate routines that unpack the data into a
+ * different format or, for example, unpack the data
+ * and draw the unpacked raster on the display.
+ */
+typedef void (*tileContigRoutine)
+    (TIFFRGBAImage*, uint32*, uint32, uint32, uint32, uint32, int32, int32,
+	unsigned char*);
+typedef void (*tileSeparateRoutine)
+    (TIFFRGBAImage*, uint32*, uint32, uint32, uint32, uint32, int32, int32,
+	unsigned char*, unsigned char*, unsigned char*, unsigned char*);
+/*
+ * RGBA-reader state.
+ */
+struct _TIFFRGBAImage {
+	TIFF*	tif;				/* image handle */
+	int	stoponerr;			/* stop on read error */
+	int	isContig;			/* data is packed/separate */
+	int	alpha;				/* type of alpha data present */
+	uint32	width;				/* image width */
+	uint32	height;				/* image height */
+	uint16	bitspersample;			/* image bits/sample */
+	uint16	samplesperpixel;		/* image samples/pixel */
+	uint16	orientation;			/* image orientation */
+	uint16	req_orientation;		/* requested orientation */
+	uint16	photometric;			/* image photometric interp */
+	uint16*	redcmap;			/* colormap pallete */
+	uint16*	greencmap;
+	uint16*	bluecmap;
+						/* get image data routine */
+	int	(*get)(TIFFRGBAImage*, uint32*, uint32, uint32);
+	union {
+	    void (*any)(TIFFRGBAImage*);
+	    tileContigRoutine	contig;
+	    tileSeparateRoutine	separate;
+	} put;					/* put decoded strip/tile */
+	TIFFRGBValue* Map;			/* sample mapping array */
+	uint32** BWmap;				/* black&white map */
+	uint32** PALmap;			/* palette image map */
+	TIFFYCbCrToRGB* ycbcr;			/* YCbCr conversion state */
+        TIFFCIELabToRGB* cielab;		/* CIE L*a*b conversion state */
+
+        int	row_offset;
+        int     col_offset;
+};
+
+/*
+ * Macros for extracting components from the
+ * packed ABGR form returned by TIFFReadRGBAImage.
+ */
+#define	TIFFGetR(abgr)	((abgr) & 0xff)
+#define	TIFFGetG(abgr)	(((abgr) >> 8) & 0xff)
+#define	TIFFGetB(abgr)	(((abgr) >> 16) & 0xff)
+#define	TIFFGetA(abgr)	(((abgr) >> 24) & 0xff)
+
+/*
+ * A CODEC is a software package that implements decoding,
+ * encoding, or decoding+encoding of a compression algorithm.
+ * The library provides a collection of builtin codecs.
+ * More codecs may be registered through calls to the library
+ * and/or the builtin implementations may be overridden.
+ */
+typedef	int (*TIFFInitMethod)(TIFF*, int);
+typedef struct {
+	char*		name;
+	uint16		scheme;
+	TIFFInitMethod	init;
+} TIFFCodec;
+
+#include <stdio.h>
+#include <stdarg.h>
+
+/* share internal LogLuv conversion routines? */
+#ifndef LOGLUV_PUBLIC
+#define LOGLUV_PUBLIC		1
+#endif
+
+#if defined(c_plusplus) || defined(__cplusplus)
+extern "C" {
+#endif
+typedef	void (*TIFFErrorHandler)(const char*, const char*, va_list);
+typedef	void (*TIFFErrorHandlerExt)(thandle_t, const char*, const char*, va_list);
+typedef	tsize_t (*TIFFReadWriteProc)(thandle_t, tdata_t, tsize_t);
+typedef	toff_t (*TIFFSeekProc)(thandle_t, toff_t, int);
+typedef	int (*TIFFCloseProc)(thandle_t);
+typedef	toff_t (*TIFFSizeProc)(thandle_t);
+typedef	int (*TIFFMapFileProc)(thandle_t, tdata_t*, toff_t*);
+typedef	void (*TIFFUnmapFileProc)(thandle_t, tdata_t, toff_t);
+typedef	void (*TIFFExtendProc)(TIFF*); 
+
+extern	const char* TIFFGetVersion(void);
+
+extern	const TIFFCodec* TIFFFindCODEC(uint16);
+extern	TIFFCodec* TIFFRegisterCODEC(uint16, const char*, TIFFInitMethod);
+extern	void TIFFUnRegisterCODEC(TIFFCodec*);
+extern  int TIFFIsCODECConfigured(uint16);
+extern	TIFFCodec* TIFFGetConfiguredCODECs(void);
+
+/*
+ * Auxiliary functions.
+ */
+
+extern	tdata_t _TIFFmalloc(tsize_t);
+extern	tdata_t _TIFFrealloc(tdata_t, tsize_t);
+extern	void _TIFFmemset(tdata_t, int, tsize_t);
+extern	void _TIFFmemcpy(tdata_t, const tdata_t, tsize_t);
+extern	int _TIFFmemcmp(const tdata_t, const tdata_t, tsize_t);
+extern	void _TIFFfree(tdata_t);
+
+/*
+** Stuff, related to tag handling and creating custom tags.
+*/
+extern  int  TIFFGetTagListCount( TIFF * );
+extern  ttag_t TIFFGetTagListEntry( TIFF *, int tag_index );
+    
+#define	TIFF_ANY	TIFF_NOTYPE	/* for field descriptor searching */
+#define	TIFF_VARIABLE	-1		/* marker for variable length tags */
+#define	TIFF_SPP	-2		/* marker for SamplesPerPixel tags */
+#define	TIFF_VARIABLE2	-3		/* marker for uint32 var-length tags */
+
+#define FIELD_CUSTOM    65    
+
+typedef	struct {
+	ttag_t	field_tag;		/* field's tag */
+	short	field_readcount;	/* read count/TIFF_VARIABLE/TIFF_SPP */
+	short	field_writecount;	/* write count/TIFF_VARIABLE */
+	TIFFDataType field_type;	/* type of associated data */
+        unsigned short field_bit;	/* bit in fieldsset bit vector */
+	unsigned char field_oktochange;	/* if true, can change while writing */
+	unsigned char field_passcount;	/* if true, pass dir count on set */
+	char	*field_name;		/* ASCII name */
+} TIFFFieldInfo;
+
+typedef struct _TIFFTagValue {
+    const TIFFFieldInfo  *info;
+    int             count;
+    void           *value;
+} TIFFTagValue;
+
+extern	void TIFFMergeFieldInfo(TIFF*, const TIFFFieldInfo[], int);
+extern	const TIFFFieldInfo* TIFFFindFieldInfo(TIFF*, ttag_t, TIFFDataType);
+extern  const TIFFFieldInfo* TIFFFindFieldInfoByName(TIFF* , const char *,
+						     TIFFDataType);
+extern	const TIFFFieldInfo* TIFFFieldWithTag(TIFF*, ttag_t);
+extern	const TIFFFieldInfo* TIFFFieldWithName(TIFF*, const char *);
+
+typedef	int (*TIFFVSetMethod)(TIFF*, ttag_t, va_list);
+typedef	int (*TIFFVGetMethod)(TIFF*, ttag_t, va_list);
+typedef	void (*TIFFPrintMethod)(TIFF*, FILE*, long);
+    
+typedef struct {
+    TIFFVSetMethod	vsetfield;	/* tag set routine */
+    TIFFVGetMethod	vgetfield;	/* tag get routine */
+    TIFFPrintMethod	printdir;	/* directory print routine */
+} TIFFTagMethods;
+        
+extern  TIFFTagMethods *TIFFAccessTagMethods( TIFF * );
+extern  void *TIFFGetClientInfo( TIFF *, const char * );
+extern  void TIFFSetClientInfo( TIFF *, void *, const char * );
+
+extern	void TIFFCleanup(TIFF*);
+extern	void TIFFClose(TIFF*);
+extern	int TIFFFlush(TIFF*);
+extern	int TIFFFlushData(TIFF*);
+extern	int TIFFGetField(TIFF*, ttag_t, ...);
+extern	int TIFFVGetField(TIFF*, ttag_t, va_list);
+extern	int TIFFGetFieldDefaulted(TIFF*, ttag_t, ...);
+extern	int TIFFVGetFieldDefaulted(TIFF*, ttag_t, va_list);
+extern	int TIFFReadDirectory(TIFF*);
+extern	int TIFFReadCustomDirectory(TIFF*, toff_t, const TIFFFieldInfo[],
+				    size_t);
+extern	int TIFFReadEXIFDirectory(TIFF*, toff_t);
+extern	tsize_t TIFFScanlineSize(TIFF*);
+extern	tsize_t TIFFRasterScanlineSize(TIFF*);
+extern	tsize_t TIFFStripSize(TIFF*);
+extern	tsize_t TIFFRawStripSize(TIFF*, tstrip_t);
+extern	tsize_t TIFFVStripSize(TIFF*, uint32);
+extern	tsize_t TIFFTileRowSize(TIFF*);
+extern	tsize_t TIFFTileSize(TIFF*);
+extern	tsize_t TIFFVTileSize(TIFF*, uint32);
+extern	uint32 TIFFDefaultStripSize(TIFF*, uint32);
+extern	void TIFFDefaultTileSize(TIFF*, uint32*, uint32*);
+extern	int TIFFFileno(TIFF*);
+extern  int TIFFSetFileno(TIFF*, int);
+extern  thandle_t TIFFClientdata(TIFF*);
+extern  thandle_t TIFFSetClientdata(TIFF*, thandle_t);
+extern	int TIFFGetMode(TIFF*);
+extern	int TIFFSetMode(TIFF*, int);
+extern	int TIFFIsTiled(TIFF*);
+extern	int TIFFIsByteSwapped(TIFF*);
+extern	int TIFFIsUpSampled(TIFF*);
+extern	int TIFFIsMSB2LSB(TIFF*);
+extern	int TIFFIsBigEndian(TIFF*);
+extern	TIFFReadWriteProc TIFFGetReadProc(TIFF*);
+extern	TIFFReadWriteProc TIFFGetWriteProc(TIFF*);
+extern	TIFFSeekProc TIFFGetSeekProc(TIFF*);
+extern	TIFFCloseProc TIFFGetCloseProc(TIFF*);
+extern	TIFFSizeProc TIFFGetSizeProc(TIFF*);
+extern	TIFFMapFileProc TIFFGetMapFileProc(TIFF*);
+extern	TIFFUnmapFileProc TIFFGetUnmapFileProc(TIFF*);
+extern	uint32 TIFFCurrentRow(TIFF*);
+extern	tdir_t TIFFCurrentDirectory(TIFF*);
+extern	tdir_t TIFFNumberOfDirectories(TIFF*);
+extern	uint32 TIFFCurrentDirOffset(TIFF*);
+extern	tstrip_t TIFFCurrentStrip(TIFF*);
+extern	ttile_t TIFFCurrentTile(TIFF*);
+extern	int TIFFReadBufferSetup(TIFF*, tdata_t, tsize_t);
+extern	int TIFFWriteBufferSetup(TIFF*, tdata_t, tsize_t);
+extern	int TIFFSetupStrips(TIFF *);
+extern  int TIFFWriteCheck(TIFF*, int, const char *);
+extern	void TIFFFreeDirectory(TIFF*);
+extern  int TIFFCreateDirectory(TIFF*);
+extern	int TIFFLastDirectory(TIFF*);
+extern	int TIFFSetDirectory(TIFF*, tdir_t);
+extern	int TIFFSetSubDirectory(TIFF*, uint32);
+extern	int TIFFUnlinkDirectory(TIFF*, tdir_t);
+extern	int TIFFSetField(TIFF*, ttag_t, ...);
+extern	int TIFFVSetField(TIFF*, ttag_t, va_list);
+extern	int TIFFWriteDirectory(TIFF *);
+extern	int TIFFCheckpointDirectory(TIFF *);
+extern	int TIFFRewriteDirectory(TIFF *);
+extern	int TIFFReassignTagToIgnore(enum TIFFIgnoreSense, int);
+
+#if defined(c_plusplus) || defined(__cplusplus)
+extern	void TIFFPrintDirectory(TIFF*, FILE*, long = 0);
+extern	int TIFFReadScanline(TIFF*, tdata_t, uint32, tsample_t = 0);
+extern	int TIFFWriteScanline(TIFF*, tdata_t, uint32, tsample_t = 0);
+extern	int TIFFReadRGBAImage(TIFF*, uint32, uint32, uint32*, int = 0);
+extern	int TIFFReadRGBAImageOriented(TIFF*, uint32, uint32, uint32*,
+				      int = ORIENTATION_BOTLEFT, int = 0);
+#else
+extern	void TIFFPrintDirectory(TIFF*, FILE*, long);
+extern	int TIFFReadScanline(TIFF*, tdata_t, uint32, tsample_t);
+extern	int TIFFWriteScanline(TIFF*, tdata_t, uint32, tsample_t);
+extern	int TIFFReadRGBAImage(TIFF*, uint32, uint32, uint32*, int);
+extern	int TIFFReadRGBAImageOriented(TIFF*, uint32, uint32, uint32*, int, int);
+#endif
+
+extern	int TIFFReadRGBAStrip(TIFF*, tstrip_t, uint32 * );
+extern	int TIFFReadRGBATile(TIFF*, uint32, uint32, uint32 * );
+extern	int TIFFRGBAImageOK(TIFF*, char [1024]);
+extern	int TIFFRGBAImageBegin(TIFFRGBAImage*, TIFF*, int, char [1024]);
+extern	int TIFFRGBAImageGet(TIFFRGBAImage*, uint32*, uint32, uint32);
+extern	void TIFFRGBAImageEnd(TIFFRGBAImage*);
+extern	TIFF* TIFFOpen(const char*, const char*);
+# ifdef __WIN32__
+extern	TIFF* TIFFOpenW(const wchar_t*, const char*);
+# endif /* __WIN32__ */
+extern	TIFF* TIFFFdOpen(int, const char*, const char*);
+extern	TIFF* TIFFClientOpen(const char*, const char*,
+	    thandle_t,
+	    TIFFReadWriteProc, TIFFReadWriteProc,
+	    TIFFSeekProc, TIFFCloseProc,
+	    TIFFSizeProc,
+	    TIFFMapFileProc, TIFFUnmapFileProc);
+extern	const char* TIFFFileName(TIFF*);
+extern	const char* TIFFSetFileName(TIFF*, const char *);
+extern	void TIFFError(const char*, const char*, ...);
+extern	void TIFFErrorExt(thandle_t, const char*, const char*, ...);
+extern	void TIFFWarning(const char*, const char*, ...);
+extern	void TIFFWarningExt(thandle_t, const char*, const char*, ...);
+extern	TIFFErrorHandler TIFFSetErrorHandler(TIFFErrorHandler);
+extern	TIFFErrorHandlerExt TIFFSetErrorHandlerExt(TIFFErrorHandlerExt);
+extern	TIFFErrorHandler TIFFSetWarningHandler(TIFFErrorHandler);
+extern	TIFFErrorHandlerExt TIFFSetWarningHandlerExt(TIFFErrorHandlerExt);
+extern	TIFFExtendProc TIFFSetTagExtender(TIFFExtendProc);
+extern	ttile_t TIFFComputeTile(TIFF*, uint32, uint32, uint32, tsample_t);
+extern	int TIFFCheckTile(TIFF*, uint32, uint32, uint32, tsample_t);
+extern	ttile_t TIFFNumberOfTiles(TIFF*);
+extern	tsize_t TIFFReadTile(TIFF*,
+	    tdata_t, uint32, uint32, uint32, tsample_t);
+extern	tsize_t TIFFWriteTile(TIFF*,
+	    tdata_t, uint32, uint32, uint32, tsample_t);
+extern	tstrip_t TIFFComputeStrip(TIFF*, uint32, tsample_t);
+extern	tstrip_t TIFFNumberOfStrips(TIFF*);
+extern	tsize_t TIFFReadEncodedStrip(TIFF*, tstrip_t, tdata_t, tsize_t);
+extern	tsize_t TIFFReadRawStrip(TIFF*, tstrip_t, tdata_t, tsize_t);
+extern	tsize_t TIFFReadEncodedTile(TIFF*, ttile_t, tdata_t, tsize_t);
+extern	tsize_t TIFFReadRawTile(TIFF*, ttile_t, tdata_t, tsize_t);
+extern	tsize_t TIFFWriteEncodedStrip(TIFF*, tstrip_t, tdata_t, tsize_t);
+extern	tsize_t TIFFWriteRawStrip(TIFF*, tstrip_t, tdata_t, tsize_t);
+extern	tsize_t TIFFWriteEncodedTile(TIFF*, ttile_t, tdata_t, tsize_t);
+extern	tsize_t TIFFWriteRawTile(TIFF*, ttile_t, tdata_t, tsize_t);
+extern	int TIFFDataWidth(TIFFDataType);    /* table of tag datatype widths */
+extern	void TIFFSetWriteOffset(TIFF*, toff_t);
+extern	void TIFFSwabShort(uint16*);
+extern	void TIFFSwabLong(uint32*);
+extern	void TIFFSwabDouble(double*);
+extern	void TIFFSwabArrayOfShort(uint16*, unsigned long);
+extern	void TIFFSwabArrayOfTriples(uint8*, unsigned long);
+extern	void TIFFSwabArrayOfLong(uint32*, unsigned long);
+extern	void TIFFSwabArrayOfDouble(double*, unsigned long);
+extern	void TIFFReverseBits(unsigned char *, unsigned long);
+extern	const unsigned char* TIFFGetBitRevTable(int);
+
+#ifdef LOGLUV_PUBLIC
+#define U_NEU		0.210526316
+#define V_NEU		0.473684211
+#define UVSCALE		410.
+extern	double LogL16toY(int);
+extern	double LogL10toY(int);
+extern	void XYZtoRGB24(float*, uint8*);
+extern	int uv_decode(double*, double*, int);
+extern	void LogLuv24toXYZ(uint32, float*);
+extern	void LogLuv32toXYZ(uint32, float*);
+#if defined(c_plusplus) || defined(__cplusplus)
+extern	int LogL16fromY(double, int = SGILOGENCODE_NODITHER);
+extern	int LogL10fromY(double, int = SGILOGENCODE_NODITHER);
+extern	int uv_encode(double, double, int = SGILOGENCODE_NODITHER);
+extern	uint32 LogLuv24fromXYZ(float*, int = SGILOGENCODE_NODITHER);
+extern	uint32 LogLuv32fromXYZ(float*, int = SGILOGENCODE_NODITHER);
+#else
+extern	int LogL16fromY(double, int);
+extern	int LogL10fromY(double, int);
+extern	int uv_encode(double, double, int);
+extern	uint32 LogLuv24fromXYZ(float*, int);
+extern	uint32 LogLuv32fromXYZ(float*, int);
+#endif
+#endif /* LOGLUV_PUBLIC */
+    
+extern int TIFFCIELabToRGBInit(TIFFCIELabToRGB*, TIFFDisplay *, float*);
+extern void TIFFCIELabToXYZ(TIFFCIELabToRGB *, uint32, int32, int32,
+			    float *, float *, float *);
+extern void TIFFXYZToRGB(TIFFCIELabToRGB *, float, float, float,
+			 uint32 *, uint32 *, uint32 *);
+
+extern int TIFFYCbCrToRGBInit(TIFFYCbCrToRGB*, float*, float*);
+extern void TIFFYCbCrtoRGB(TIFFYCbCrToRGB *, uint32, int32, int32,
+			   uint32 *, uint32 *, uint32 *);
+
+#if defined(c_plusplus) || defined(__cplusplus)
+}
+#endif
+
+#endif /* _TIFFIO_ */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tiffiop.h b/src/libtiff/tiffiop.h
new file mode 100644
index 0000000..a0e5856
--- /dev/null
+++ b/src/libtiff/tiffiop.h
@@ -0,0 +1,323 @@
+/* $Id: tiffiop.h,v 1.1 2008/10/17 06:16:07 scuri Exp $ */
+
+/*
+ * Copyright (c) 1988-1997 Sam Leffler
+ * Copyright (c) 1991-1997 Silicon Graphics, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and 
+ * its documentation for any purpose is hereby granted without fee, provided
+ * that (i) the above copyright notices and this permission notice appear in
+ * all copies of the software and related documentation, and (ii) the names of
+ * Sam Leffler and Silicon Graphics may not be used in any advertising or
+ * publicity relating to the software without the specific, prior written
+ * permission of Sam Leffler and Silicon Graphics.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY 
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  
+ * 
+ * IN NO EVENT SHALL SAM LEFFLER OR SILICON GRAPHICS BE LIABLE FOR
+ * ANY SPECIAL, INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND,
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF 
+ * LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef _TIFFIOP_
+#define	_TIFFIOP_
+/*
+ * ``Library-private'' definitions.
+ */
+
+#include "tif_config.h"
+
+#ifdef HAVE_FCNTL_H
+# include <fcntl.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+# include <assert.h>
+#else
+# define assert(x) 
+#endif
+
+#ifdef HAVE_SEARCH_H
+# include <search.h>
+#else
+extern void *lfind(const void *, const void *, size_t *, size_t,
+		   int (*)(const void *, const void *));
+#endif
+
+#include "tiffio.h"
+#include "tif_dir.h"
+
+#ifndef STRIP_SIZE_DEFAULT
+# define STRIP_SIZE_DEFAULT 8192
+#endif
+
+#define    streq(a,b)      (strcmp(a,b) == 0)
+
+#ifndef TRUE
+#define	TRUE	1
+#define	FALSE	0
+#endif
+
+typedef struct client_info {
+    struct client_info *next;
+    void      *data;
+    char      *name;
+} TIFFClientInfoLink;
+
+/*
+ * Typedefs for ``method pointers'' used internally.
+ */
+typedef	unsigned char tidataval_t;	/* internal image data value type */
+typedef	tidataval_t* tidata_t;		/* reference to internal image data */
+
+typedef	void (*TIFFVoidMethod)(TIFF*);
+typedef	int (*TIFFBoolMethod)(TIFF*);
+typedef	int (*TIFFPreMethod)(TIFF*, tsample_t);
+typedef	int (*TIFFCodeMethod)(TIFF*, tidata_t, tsize_t, tsample_t);
+typedef	int (*TIFFSeekMethod)(TIFF*, uint32);
+typedef	void (*TIFFPostMethod)(TIFF*, tidata_t, tsize_t);
+typedef	uint32 (*TIFFStripMethod)(TIFF*, uint32);
+typedef	void (*TIFFTileMethod)(TIFF*, uint32*, uint32*);
+
+struct tiff {
+	char*		tif_name;	/* name of open file */
+	int		tif_fd;		/* open file descriptor */
+	int		tif_mode;	/* open mode (O_*) */
+	uint32		tif_flags;
+#define	TIFF_FILLORDER		0x0003	/* natural bit fill order for machine */
+#define	TIFF_DIRTYHEADER	0x0004	/* header must be written on close */
+#define	TIFF_DIRTYDIRECT	0x0008	/* current directory must be written */
+#define	TIFF_BUFFERSETUP	0x0010	/* data buffers setup */
+#define	TIFF_CODERSETUP		0x0020	/* encoder/decoder setup done */
+#define	TIFF_BEENWRITING	0x0040	/* written 1+ scanlines to file */
+#define	TIFF_SWAB		0x0080	/* byte swap file information */
+#define	TIFF_NOBITREV		0x0100	/* inhibit bit reversal logic */
+#define	TIFF_MYBUFFER		0x0200	/* my raw data buffer; free on close */
+#define	TIFF_ISTILED		0x0400	/* file is tile, not strip- based */
+#define	TIFF_MAPPED		0x0800	/* file is mapped into memory */
+#define	TIFF_POSTENCODE		0x1000	/* need call to postencode routine */
+#define	TIFF_INSUBIFD		0x2000	/* currently writing a subifd */
+#define	TIFF_UPSAMPLED		0x4000	/* library is doing data up-sampling */ 
+#define	TIFF_STRIPCHOP		0x8000	/* enable strip chopping support */
+#define	TIFF_HEADERONLY		0x10000	/* read header only, do not process */
+					/* the first directory */
+	toff_t		tif_diroff;	/* file offset of current directory */
+	toff_t		tif_nextdiroff;	/* file offset of following directory */
+	toff_t*		tif_dirlist;	/* list of offsets to already seen */
+					/* directories to prevent IFD looping */
+	uint16		tif_dirnumber;  /* number of already seen directories */
+	TIFFDirectory	tif_dir;	/* internal rep of current directory */
+	TIFFHeader	tif_header;	/* file's header block */
+	const int*	tif_typeshift;	/* data type shift counts */
+	const long*	tif_typemask;	/* data type masks */
+	uint32		tif_row;	/* current scanline */
+	tdir_t		tif_curdir;	/* current directory (index) */
+	tstrip_t	tif_curstrip;	/* current strip for read/write */
+	toff_t		tif_curoff;	/* current offset for read/write */
+	toff_t		tif_dataoff;	/* current offset for writing dir */
+/* SubIFD support */
+	uint16		tif_nsubifd;	/* remaining subifds to write */
+	toff_t		tif_subifdoff;	/* offset for patching SubIFD link */
+/* tiling support */
+	uint32 		tif_col;	/* current column (offset by row too) */
+	ttile_t		tif_curtile;	/* current tile for read/write */
+	tsize_t		tif_tilesize;	/* # of bytes in a tile */
+/* compression scheme hooks */
+	int		tif_decodestatus;
+	TIFFBoolMethod	tif_setupdecode;/* called once before predecode */
+	TIFFPreMethod	tif_predecode;	/* pre- row/strip/tile decoding */
+	TIFFBoolMethod	tif_setupencode;/* called once before preencode */
+	int		tif_encodestatus;
+	TIFFPreMethod	tif_preencode;	/* pre- row/strip/tile encoding */
+	TIFFBoolMethod	tif_postencode;	/* post- row/strip/tile encoding */
+	TIFFCodeMethod	tif_decoderow;	/* scanline decoding routine */
+	TIFFCodeMethod	tif_encoderow;	/* scanline encoding routine */
+	TIFFCodeMethod	tif_decodestrip;/* strip decoding routine */
+	TIFFCodeMethod	tif_encodestrip;/* strip encoding routine */
+	TIFFCodeMethod	tif_decodetile;	/* tile decoding routine */
+	TIFFCodeMethod	tif_encodetile;	/* tile encoding routine */
+	TIFFVoidMethod	tif_close;	/* cleanup-on-close routine */
+	TIFFSeekMethod	tif_seek;	/* position within a strip routine */
+	TIFFVoidMethod	tif_cleanup;	/* cleanup state routine */
+	TIFFStripMethod	tif_defstripsize;/* calculate/constrain strip size */
+	TIFFTileMethod	tif_deftilesize;/* calculate/constrain tile size */
+	tidata_t	tif_data;	/* compression scheme private data */
+/* input/output buffering */
+	tsize_t		tif_scanlinesize;/* # of bytes in a scanline */
+	tsize_t		tif_scanlineskew;/* scanline skew for reading strips */
+	tidata_t	tif_rawdata;	/* raw data buffer */
+	tsize_t		tif_rawdatasize;/* # of bytes in raw data buffer */
+	tidata_t	tif_rawcp;	/* current spot in raw buffer */
+	tsize_t		tif_rawcc;	/* bytes unread from raw buffer */
+/* memory-mapped file support */
+	tidata_t	tif_base;	/* base of mapped file */
+	toff_t		tif_size;	/* size of mapped file region (bytes) */
+	TIFFMapFileProc	tif_mapproc;	/* map file method */
+	TIFFUnmapFileProc tif_unmapproc;/* unmap file method */
+/* input/output callback methods */
+	thandle_t	tif_clientdata;	/* callback parameter */
+	TIFFReadWriteProc tif_readproc;	/* read method */
+	TIFFReadWriteProc tif_writeproc;/* write method */
+	TIFFSeekProc	tif_seekproc;	/* lseek method */
+	TIFFCloseProc	tif_closeproc;	/* close method */
+	TIFFSizeProc	tif_sizeproc;	/* filesize method */
+/* post-decoding support */
+	TIFFPostMethod	tif_postdecode;	/* post decoding routine */
+/* tag support */
+	TIFFFieldInfo**	tif_fieldinfo;	/* sorted table of registered tags */
+	size_t		tif_nfields;	/* # entries in registered tag table */
+	const TIFFFieldInfo *tif_foundfield;/* cached pointer to already found tag */
+        TIFFTagMethods  tif_tagmethods; /* tag get/set/print routines */
+        TIFFClientInfoLink *tif_clientinfo; /* extra client information. */
+};
+
+#define	isPseudoTag(t)	(t > 0xffff)	/* is tag value normal or pseudo */
+
+#define	isTiled(tif)	(((tif)->tif_flags & TIFF_ISTILED) != 0)
+#define	isMapped(tif)	(((tif)->tif_flags & TIFF_MAPPED) != 0)
+#define	isFillOrder(tif, o)	(((tif)->tif_flags & (o)) != 0)
+#define	isUpSampled(tif)	(((tif)->tif_flags & TIFF_UPSAMPLED) != 0)
+#define	TIFFReadFile(tif, buf, size) \
+	((*(tif)->tif_readproc)((tif)->tif_clientdata,buf,size))
+#define	TIFFWriteFile(tif, buf, size) \
+	((*(tif)->tif_writeproc)((tif)->tif_clientdata,buf,size))
+#define	TIFFSeekFile(tif, off, whence) \
+	((*(tif)->tif_seekproc)((tif)->tif_clientdata,(toff_t)(off),whence))
+#define	TIFFCloseFile(tif) \
+	((*(tif)->tif_closeproc)((tif)->tif_clientdata))
+#define	TIFFGetFileSize(tif) \
+	((*(tif)->tif_sizeproc)((tif)->tif_clientdata))
+#define	TIFFMapFileContents(tif, paddr, psize) \
+	((*(tif)->tif_mapproc)((tif)->tif_clientdata,paddr,psize))
+#define	TIFFUnmapFileContents(tif, addr, size) \
+	((*(tif)->tif_unmapproc)((tif)->tif_clientdata,addr,size))
+
+/*
+ * Default Read/Seek/Write definitions.
+ */
+#ifndef ReadOK
+#define	ReadOK(tif, buf, size) \
+	(TIFFReadFile(tif, (tdata_t) buf, (tsize_t)(size)) == (tsize_t)(size))
+#endif
+#ifndef SeekOK
+#define	SeekOK(tif, off) \
+	(TIFFSeekFile(tif, (toff_t) off, SEEK_SET) == (toff_t) off)
+#endif
+#ifndef WriteOK
+#define	WriteOK(tif, buf, size) \
+	(TIFFWriteFile(tif, (tdata_t) buf, (tsize_t) size) == (tsize_t) size)
+#endif
+
+/* NB: the uint32 casts are to silence certain ANSI-C compilers */
+#define TIFFhowmany(x, y) ((((uint32)(x))+(((uint32)(y))-1))/((uint32)(y)))
+#define TIFFhowmany8(x) (((x)&0x07)?((uint32)(x)>>3)+1:(uint32)(x)>>3)
+#define	TIFFroundup(x, y) (TIFFhowmany(x,y)*(y))
+
+#define TIFFmax(A,B) ((A)>(B)?(A):(B))
+#define TIFFmin(A,B) ((A)<(B)?(A):(B))
+
+#define TIFFArrayCount(a) (sizeof (a) / sizeof ((a)[0]))
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+extern	int _TIFFgetMode(const char*, const char*);
+extern	int _TIFFNoRowEncode(TIFF*, tidata_t, tsize_t, tsample_t);
+extern	int _TIFFNoStripEncode(TIFF*, tidata_t, tsize_t, tsample_t);
+extern	int _TIFFNoTileEncode(TIFF*, tidata_t, tsize_t, tsample_t);
+extern	int _TIFFNoRowDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+extern	int _TIFFNoStripDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+extern	int _TIFFNoTileDecode(TIFF*, tidata_t, tsize_t, tsample_t);
+extern	void _TIFFNoPostDecode(TIFF*, tidata_t, tsize_t);
+extern  int  _TIFFNoPreCode (TIFF*, tsample_t); 
+extern	int _TIFFNoSeek(TIFF*, uint32);
+extern	void _TIFFSwab16BitData(TIFF*, tidata_t, tsize_t);
+extern	void _TIFFSwab24BitData(TIFF*, tidata_t, tsize_t);
+extern	void _TIFFSwab32BitData(TIFF*, tidata_t, tsize_t);
+extern	void _TIFFSwab64BitData(TIFF*, tidata_t, tsize_t);
+extern	int TIFFFlushData1(TIFF*);
+extern	int TIFFDefaultDirectory(TIFF*);
+extern	void _TIFFSetDefaultCompressionState(TIFF*);
+extern	int TIFFSetCompressionScheme(TIFF*, int);
+extern	int TIFFSetDefaultCompressionState(TIFF*);
+extern	uint32 _TIFFDefaultStripSize(TIFF*, uint32);
+extern	void _TIFFDefaultTileSize(TIFF*, uint32*, uint32*);
+extern	int _TIFFDataSize(TIFFDataType);
+
+extern	void _TIFFsetByteArray(void**, void*, uint32);
+extern	void _TIFFsetString(char**, char*);
+extern	void _TIFFsetShortArray(uint16**, uint16*, uint32);
+extern	void _TIFFsetLongArray(uint32**, uint32*, uint32);
+extern	void _TIFFsetFloatArray(float**, float*, uint32);
+extern	void _TIFFsetDoubleArray(double**, double*, uint32);
+
+extern	void _TIFFprintAscii(FILE*, const char*);
+extern	void _TIFFprintAsciiTag(FILE*, const char*, const char*);
+
+extern	TIFFErrorHandler _TIFFwarningHandler;
+extern	TIFFErrorHandler _TIFFerrorHandler;
+extern	TIFFErrorHandlerExt _TIFFwarningHandlerExt;
+extern	TIFFErrorHandlerExt _TIFFerrorHandlerExt;
+
+extern	tdata_t _TIFFCheckMalloc(TIFF*, size_t, size_t, const char*);
+
+extern	int TIFFInitDumpMode(TIFF*, int);
+#ifdef PACKBITS_SUPPORT
+extern	int TIFFInitPackBits(TIFF*, int);
+#endif
+#ifdef CCITT_SUPPORT
+extern	int TIFFInitCCITTRLE(TIFF*, int), TIFFInitCCITTRLEW(TIFF*, int);
+extern	int TIFFInitCCITTFax3(TIFF*, int), TIFFInitCCITTFax4(TIFF*, int);
+#endif
+#ifdef THUNDER_SUPPORT
+extern	int TIFFInitThunderScan(TIFF*, int);
+#endif
+#ifdef NEXT_SUPPORT
+extern	int TIFFInitNeXT(TIFF*, int);
+#endif
+#ifdef LZW_SUPPORT
+extern	int TIFFInitLZW(TIFF*, int);
+#endif
+#ifdef OJPEG_SUPPORT
+extern	int TIFFInitOJPEG(TIFF*, int);
+#endif
+#ifdef JPEG_SUPPORT
+extern	int TIFFInitJPEG(TIFF*, int);
+#endif
+#ifdef JBIG_SUPPORT
+extern	int TIFFInitJBIG(TIFF*, int);
+#endif
+#ifdef ZIP_SUPPORT
+extern	int TIFFInitZIP(TIFF*, int);
+#endif
+#ifdef PIXARLOG_SUPPORT
+extern	int TIFFInitPixarLog(TIFF*, int);
+#endif
+#ifdef LOGLUV_SUPPORT
+extern	int TIFFInitSGILog(TIFF*, int);
+#endif
+#ifdef VMS
+extern	const TIFFCodec _TIFFBuiltinCODECS[];
+#else
+extern	TIFFCodec _TIFFBuiltinCODECS[];
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+#endif /* _TIFFIOP_ */
+
+/* vim: set ts=8 sts=8 sw=8 noet: */
diff --git a/src/libtiff/tiffvers.h b/src/libtiff/tiffvers.h
new file mode 100644
index 0000000..9744f8d
--- /dev/null
+++ b/src/libtiff/tiffvers.h
@@ -0,0 +1,9 @@
+#define TIFFLIB_VERSION_STR "LIBTIFF, Version 3.8.2\nCopyright (c) 1988-1996 Sam Leffler\nCopyright (c) 1991-1996 Silicon Graphics, Inc."
+/*
+ * This define can be used in code that requires
+ * compilation-related definitions specific to a
+ * version or versions of the library.  Runtime
+ * version checking should be done based on the
+ * string returned by TIFFGetVersion.
+ */
+#define TIFFLIB_VERSION 20060323
diff --git a/src/libtiff/uvcode.h b/src/libtiff/uvcode.h
new file mode 100644
index 0000000..5b2d7d7
--- /dev/null
+++ b/src/libtiff/uvcode.h
@@ -0,0 +1,173 @@
+/* Version 1.0 generated April 7, 1997 by Greg Ward Larson, SGI */
+#define UV_SQSIZ	(float)0.003500
+#define UV_NDIVS	16289
+#define UV_VSTART	(float)0.016940
+#define UV_NVS		163
+static struct {
+	float	ustart;
+	short	nus, ncum;
+}	uv_row[UV_NVS] = {
+	{ (float)0.247663,	4,	0 },
+	{ (float)0.243779,	6,	4 },
+	{ (float)0.241684,	7,	10 },
+	{ (float)0.237874,	9,	17 },
+	{ (float)0.235906,	10,	26 },
+	{ (float)0.232153,	12,	36 },
+	{ (float)0.228352,	14,	48 },
+	{ (float)0.226259,	15,	62 },
+	{ (float)0.222371,	17,	77 },
+	{ (float)0.220410,	18,	94 },
+	{ (float)0.214710,	21,	112 },
+	{ (float)0.212714,	22,	133 },
+	{ (float)0.210721,	23,	155 },
+	{ (float)0.204976,	26,	178 },
+	{ (float)0.202986,	27,	204 },
+	{ (float)0.199245,	29,	231 },
+	{ (float)0.195525,	31,	260 },
+	{ (float)0.193560,	32,	291 },
+	{ (float)0.189878,	34,	323 },
+	{ (float)0.186216,	36,	357 },
+	{ (float)0.186216,	36,	393 },
+	{ (float)0.182592,	38,	429 },
+	{ (float)0.179003,	40,	467 },
+	{ (float)0.175466,	42,	507 },
+	{ (float)0.172001,	44,	549 },
+	{ (float)0.172001,	44,	593 },
+	{ (float)0.168612,	46,	637 },
+	{ (float)0.168612,	46,	683 },
+	{ (float)0.163575,	49,	729 },
+	{ (float)0.158642,	52,	778 },
+	{ (float)0.158642,	52,	830 },
+	{ (float)0.158642,	52,	882 },
+	{ (float)0.153815,	55,	934 },
+	{ (float)0.153815,	55,	989 },
+	{ (float)0.149097,	58,	1044 },
+	{ (float)0.149097,	58,	1102 },
+	{ (float)0.142746,	62,	1160 },
+	{ (float)0.142746,	62,	1222 },
+	{ (float)0.142746,	62,	1284 },
+	{ (float)0.138270,	65,	1346 },
+	{ (float)0.138270,	65,	1411 },
+	{ (float)0.138270,	65,	1476 },
+	{ (float)0.132166,	69,	1541 },
+	{ (float)0.132166,	69,	1610 },
+	{ (float)0.126204,	73,	1679 },
+	{ (float)0.126204,	73,	1752 },
+	{ (float)0.126204,	73,	1825 },
+	{ (float)0.120381,	77,	1898 },
+	{ (float)0.120381,	77,	1975 },
+	{ (float)0.120381,	77,	2052 },
+	{ (float)0.120381,	77,	2129 },
+	{ (float)0.112962,	82,	2206 },
+	{ (float)0.112962,	82,	2288 },
+	{ (float)0.112962,	82,	2370 },
+	{ (float)0.107450,	86,	2452 },
+	{ (float)0.107450,	86,	2538 },
+	{ (float)0.107450,	86,	2624 },
+	{ (float)0.107450,	86,	2710 },
+	{ (float)0.100343,	91,	2796 },
+	{ (float)0.100343,	91,	2887 },
+	{ (float)0.100343,	91,	2978 },
+	{ (float)0.095126,	95,	3069 },
+	{ (float)0.095126,	95,	3164 },
+	{ (float)0.095126,	95,	3259 },
+	{ (float)0.095126,	95,	3354 },
+	{ (float)0.088276,	100,	3449 },
+	{ (float)0.088276,	100,	3549 },
+	{ (float)0.088276,	100,	3649 },
+	{ (float)0.088276,	100,	3749 },
+	{ (float)0.081523,	105,	3849 },
+	{ (float)0.081523,	105,	3954 },
+	{ (float)0.081523,	105,	4059 },
+	{ (float)0.081523,	105,	4164 },
+	{ (float)0.074861,	110,	4269 },
+	{ (float)0.074861,	110,	4379 },
+	{ (float)0.074861,	110,	4489 },
+	{ (float)0.074861,	110,	4599 },
+	{ (float)0.068290,	115,	4709 },
+	{ (float)0.068290,	115,	4824 },
+	{ (float)0.068290,	115,	4939 },
+	{ (float)0.068290,	115,	5054 },
+	{ (float)0.063573,	119,	5169 },
+	{ (float)0.063573,	119,	5288 },
+	{ (float)0.063573,	119,	5407 },
+	{ (float)0.063573,	119,	5526 },
+	{ (float)0.057219,	124,	5645 },
+	{ (float)0.057219,	124,	5769 },
+	{ (float)0.057219,	124,	5893 },
+	{ (float)0.057219,	124,	6017 },
+	{ (float)0.050985,	129,	6141 },
+	{ (float)0.050985,	129,	6270 },
+	{ (float)0.050985,	129,	6399 },
+	{ (float)0.050985,	129,	6528 },
+	{ (float)0.050985,	129,	6657 },
+	{ (float)0.044859,	134,	6786 },
+	{ (float)0.044859,	134,	6920 },
+	{ (float)0.044859,	134,	7054 },
+	{ (float)0.044859,	134,	7188 },
+	{ (float)0.040571,	138,	7322 },
+	{ (float)0.040571,	138,	7460 },
+	{ (float)0.040571,	138,	7598 },
+	{ (float)0.040571,	138,	7736 },
+	{ (float)0.036339,	142,	7874 },
+	{ (float)0.036339,	142,	8016 },
+	{ (float)0.036339,	142,	8158 },
+	{ (float)0.036339,	142,	8300 },
+	{ (float)0.032139,	146,	8442 },
+	{ (float)0.032139,	146,	8588 },
+	{ (float)0.032139,	146,	8734 },
+	{ (float)0.032139,	146,	8880 },
+	{ (float)0.027947,	150,	9026 },
+	{ (float)0.027947,	150,	9176 },
+	{ (float)0.027947,	150,	9326 },
+	{ (float)0.023739,	154,	9476 },
+	{ (float)0.023739,	154,	9630 },
+	{ (float)0.023739,	154,	9784 },
+	{ (float)0.023739,	154,	9938 },
+	{ (float)0.019504,	158,	10092 },
+	{ (float)0.019504,	158,	10250 },
+	{ (float)0.019504,	158,	10408 },
+	{ (float)0.016976,	161,	10566 },
+	{ (float)0.016976,	161,	10727 },
+	{ (float)0.016976,	161,	10888 },
+	{ (float)0.016976,	161,	11049 },
+	{ (float)0.012639,	165,	11210 },
+	{ (float)0.012639,	165,	11375 },
+	{ (float)0.012639,	165,	11540 },
+	{ (float)0.009991,	168,	11705 },
+	{ (float)0.009991,	168,	11873 },
+	{ (float)0.009991,	168,	12041 },
+	{ (float)0.009016,	170,	12209 },
+	{ (float)0.009016,	170,	12379 },
+	{ (float)0.009016,	170,	12549 },
+	{ (float)0.006217,	173,	12719 },
+	{ (float)0.006217,	173,	12892 },
+	{ (float)0.005097,	175,	13065 },
+	{ (float)0.005097,	175,	13240 },
+	{ (float)0.005097,	175,	13415 },
+	{ (float)0.003909,	177,	13590 },
+	{ (float)0.003909,	177,	13767 },
+	{ (float)0.002340,	177,	13944 },
+	{ (float)0.002389,	170,	14121 },
+	{ (float)0.001068,	164,	14291 },
+	{ (float)0.001653,	157,	14455 },
+	{ (float)0.000717,	150,	14612 },
+	{ (float)0.001614,	143,	14762 },
+	{ (float)0.000270,	136,	14905 },
+	{ (float)0.000484,	129,	15041 },
+	{ (float)0.001103,	123,	15170 },
+	{ (float)0.001242,	115,	15293 },
+	{ (float)0.001188,	109,	15408 },
+	{ (float)0.001011,	103,	15517 },
+	{ (float)0.000709,	97,	15620 },
+	{ (float)0.000301,	89,	15717 },
+	{ (float)0.002416,	82,	15806 },
+	{ (float)0.003251,	76,	15888 },
+	{ (float)0.003246,	69,	15964 },
+	{ (float)0.004141,	62,	16033 },
+	{ (float)0.005963,	55,	16095 },
+	{ (float)0.008839,	47,	16150 },
+	{ (float)0.010490,	40,	16197 },
+	{ (float)0.016994,	31,	16237 },
+	{ (float)0.023659,	21,	16268 },
+};
diff --git a/src/lua5/.cvsignore b/src/lua5/.cvsignore
new file mode 100644
index 0000000..521532a
--- /dev/null
+++ b/src/lua5/.cvsignore
@@ -0,0 +1,9 @@
+so_locations
+*.dep
+*.wdep
+*.loh
+.plan
+.project
+*.err
+Makefile
+*.make
diff --git a/src/lua5/im_fftw.lua b/src/lua5/im_fftw.lua
new file mode 100644
index 0000000..e57c7bf
--- /dev/null
+++ b/src/lua5/im_fftw.lua
@@ -0,0 +1,48 @@
+
+-------------------------------------------------------------------------------
+-- Creates a new function, with the name suffixed by "New". This new function
+-- creates a new image, based on a source image, and calls the previous function
+-- with this new image.
+
+local function OneSourceOneDest (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  assert(func) -- see if function is really defined
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image, ...)
+    -- create destination image
+    local dst_image = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    func(src_image, dst_image, unpack(arg))
+    return dst_image
+  end
+end
+
+-------------------------------------------------------------------------------
+-- This function is similar to OneSourceOneDest, but it receives two source
+-- images.
+
+local function TwoSourcesOneDest (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  
+  -- see if function is really defined
+  assert(func, string.format("undefined function `%s'", funcname))
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image1, src_image2, ...)
+    -- create destination image
+    local dst_image = im.ImageCreateBased(src_image1, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    func(src_image1, src_image2, dst_image, unpack(arg))
+    return dst_image
+  end
+end
+
+-------------------------------------------------------------------------------
+
+TwoSourcesOneDest("ProcessCrossCorrelation")
+OneSourceOneDest("ProcessAutoCorrelation", nil, nil, nil, im.CFLOAT)
+OneSourceOneDest("ProcessFFT")
+OneSourceOneDest("ProcessIFFT")
diff --git a/src/lua5/im_process.lua b/src/lua5/im_process.lua
new file mode 100644
index 0000000..1d91d5e
--- /dev/null
+++ b/src/lua5/im_process.lua
@@ -0,0 +1,326 @@
+
+-------------------------------------------------------------------------------
+-- Creates a new function, with the name suffixed by "New". This new function
+-- creates a new image, based on a source image, and calls the previous function
+-- with this new image.
+-- We assume here that the functions returns only one parameter or none.
+
+local function OneSourceOneDest (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  assert(func) -- see if function is really defined
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image, ...)
+    -- create destination image
+    local dst_image = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    local ret = func(src_image, dst_image, unpack(arg))
+    if (ret) then
+      return ret, dst_image
+    else
+      return dst_image
+    end
+  end
+end
+
+-------------------------------------------------------------------------------
+-- This function is similar to OneSourceOneDest, but it receives two source
+-- images.
+
+local function TwoSourcesOneDest (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  
+  -- see if function is really defined
+  assert(func, string.format("undefined function `%s'", funcname))
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image1, src_image2, ...)
+    -- create destination image
+    local dst_image = im.ImageCreateBased(src_image1, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    local ret = func(src_image1, src_image2, dst_image, unpack(arg))
+    if (ret) then
+      return ret, dst_image
+    else
+      return dst_image
+    end
+  end
+end
+
+-------------------------------------------------------------------------------
+-- This function is similar to OneSourceOneDest, but it receives three source
+-- images.
+
+local function ThreeSourcesOneDest (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  assert(func) -- see if function is really defined
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image1, src_image2, src_image3, ...)
+    -- create destination image
+    local dst_image = im.ImageCreateBased(src_image1, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    local ret = func(src_image1, src_image2, src_image3, dst_image, unpack(arg))
+    if (ret) then
+      return ret, dst_image
+    else
+      return dst_image
+    end
+  end
+end
+
+-------------------------------------------------------------------------------
+-- This function is similar to OneSourceOneDest, but it creates two destiny
+-- images.
+
+local function OneSourceTwoDests (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  assert(func) -- see if function is really defined
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image, ...)
+    -- create destination image
+    local dst_image1 = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+    local dst_image2 = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    local ret = func(src_image, dst_image1, dst_image2, unpack(arg))
+    if (ret) then
+      return ret, dst_image1, dst_image2
+    else
+      return dst_image1, dst_image2
+    end
+  end
+end
+
+-------------------------------------------------------------------------------
+-- This function is similar to OneSourceOneDest, but it creates three destiny
+-- images.
+
+local function OneSourceThreeDests (funcname, width, height, color_space, data_type)
+  local func = im[funcname]
+  assert(func) -- see if function is really defined
+
+  -- define function with "New" suffix
+  im[funcname.."New"] = function (src_image, ...)
+    -- create destination image
+    local dst_image1 = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+    local dst_image2 = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+    local dst_image3 = im.ImageCreateBased(src_image, width, height, color_space, data_type)
+
+    -- call previous method, repassing all parameters
+    local ret = func(src_image, dst_image1, dst_image2, dst_image3, unpack(arg))
+    if (ret) then
+      return ret, dst_image1, dst_image2, dst_image3
+    else
+      return dst_image1, dst_image2, dst_image3
+    end
+  end
+end
+
+-------------------------------------------------------------------------------
+
+local function hough_height(image)
+  local function sqr(x) return x*x end
+  local rmax = math.sqrt(sqr(image:Width()) + sqr(image:Height())) / 2
+  return 2*rmax+1
+end
+
+OneSourceOneDest("AnalyzeFindRegions", nil, nil, nil, im.USHORT)
+OneSourceOneDest("ProcessPerimeterLine")
+OneSourceOneDest("ProcessPrune")
+OneSourceOneDest("ProcessFillHoles")
+OneSourceOneDest("ProcessHoughLines", 180, hough_height, im.GRAY, im.INT)
+OneSourceOneDest("ProcessHoughLinesDraw")
+OneSourceOneDest("ProcessDistanceTransform", nil, nil, nil, im.FLOAT)
+OneSourceOneDest("ProcessRegionalMaximum", nil, nil, im.BINARY, nil)
+
+function im.ProcessReduceNew (src_image, width, height)
+  local dst_image = im.ImageCreateBased(src_image, width, height)
+  return im.ProcessReduce(src_image, dst_image), dst_image
+end
+
+function im.ProcessResizeNew (src_image, width, height)
+  local dst_image = im.ImageCreateBased(src_image, width, height)
+  return im.ProcessResize(src_image, dst_image), dst_image
+end
+
+OneSourceOneDest("ProcessReduceBy4", function (image) return image:Width() / 2 end, 
+                                     function (image) return image:Height() / 2 end)
+
+function im.ProcessCropNew (src_image, xmin, xmax, ymin, ymax)
+  local width = xmax - xmin + 1
+  local height = xmax - ymin + 1
+  local dst_image = im.ImageCreateBased(src_image, width, height)
+  im.ProcessCrop(src_image, dst_image, xmin, ymin)
+  return dst_image
+end
+
+TwoSourcesOneDest("ProcessInsert")
+
+function im.ProcessAddMarginsNew (src_image, xmin, xmax, ymin, ymax)
+  local width = xmax - xmin + 1
+  local height = xmax - ymin + 1
+  local dst_image = im.ImageCreateBased(src_image, width, height)
+  im.ProcessAddMargins(src_image, dst_image, xmin, ymin)
+  return dst_image
+end
+
+function im.ProcessRotateNew (src_image, cos0, sin0, order)
+  local width, height = im.ProcessCalcRotateSize(src_image:Width(), src_image:Height(), cos0, sin0)
+  local dst_image = im.ImageCreateBased(src_image, width, height)
+  return im.ProcessRotate(src_image, dst_image, cos0, sin0, order), dst_image
+end
+
+OneSourceOneDest("ProcessRotateRef")
+OneSourceOneDest("ProcessRotate90", function (image) return image:Height() end, function (image) return image:Width() end)
+OneSourceOneDest("ProcessRotate180")
+OneSourceOneDest("ProcessMirror")
+OneSourceOneDest("ProcessFlip")
+OneSourceOneDest("ProcessRadial")
+OneSourceOneDest("ProcessGrayMorphConvolve")
+OneSourceOneDest("ProcessGrayMorphErode")
+OneSourceOneDest("ProcessGrayMorphDilate")
+OneSourceOneDest("ProcessGrayMorphOpen")
+OneSourceOneDest("ProcessGrayMorphClose")
+OneSourceOneDest("ProcessGrayMorphTopHat")
+OneSourceOneDest("ProcessGrayMorphWell")
+OneSourceOneDest("ProcessGrayMorphGradient")
+OneSourceOneDest("ProcessBinMorphConvolve")
+OneSourceOneDest("ProcessBinMorphErode")
+OneSourceOneDest("ProcessBinMorphDilate")
+OneSourceOneDest("ProcessBinMorphOpen")
+OneSourceOneDest("ProcessBinMorphClose")
+OneSourceOneDest("ProcessBinMorphOutline")
+OneSourceOneDest("ProcessBinMorphThin")
+OneSourceOneDest("ProcessMedianConvolve")
+OneSourceOneDest("ProcessRangeConvolve")
+OneSourceOneDest("ProcessRankClosestConvolve")
+OneSourceOneDest("ProcessRankMaxConvolve")
+OneSourceOneDest("ProcessRankMinConvolve")
+OneSourceOneDest("ProcessConvolve")
+OneSourceOneDest("ProcessConvolveSep")
+OneSourceOneDest("ProcessConvolveRep")
+OneSourceOneDest("ProcessConvolveDual")
+OneSourceOneDest("ProcessCompassConvolve")
+OneSourceOneDest("ProcessMeanConvolve")
+OneSourceOneDest("ProcessGaussianConvolve")
+OneSourceOneDest("ProcessBarlettConvolve")
+OneSourceTwoDests("ProcessInterlaceSplit", nil, function (image) if (image:Height()) then return image:Height() else return image:Height()/2 end end)
+
+function im.ProcessInterlaceSplitNew(src_image)
+  -- create destination image
+  local dst_height1 = src_image:Height()/2
+  if math.mod(src_image:Height(), 2) then
+    dst_height1 = dst_height1 + 1
+  end
+  
+  local dst_image1 = im.ImageCreateBased(src_image, nil, dst_height1)
+  local dst_image2 = im.ImageCreateBased(src_image, nil, src_image:Height()/2)
+
+  -- call method, repassing all parameters
+  im.ProcessInterlaceSplit(src_image, dst_image1, dst_image2)
+  return dst_image1, dst_image2
+end
+
+local function int_datatype (image)
+  local data_type = image:DataType()
+  if data_type == im.BYTE or data_type == im.USHORT then
+    data_type = im.INT
+  end
+  return data_type
+end
+
+OneSourceOneDest("ProcessDiffOfGaussianConvolve", nil, nil, nil, int_datatype)
+OneSourceOneDest("ProcessLapOfGaussianConvolve", nil, nil, nil, int_datatype)
+OneSourceOneDest("ProcessSobelConvolve")
+OneSourceOneDest("ProcessSplineEdgeConvolve")
+OneSourceOneDest("ProcessPrewittConvolve")
+OneSourceOneDest("ProcessZeroCrossing")
+OneSourceOneDest("ProcessCanny")
+OneSourceOneDest("ProcessUnArithmeticOp")
+TwoSourcesOneDest("ProcessArithmeticOp")
+
+function im.ProcessArithmeticConstOpNew (src_image, src_const, op)
+  local dst_image = im.ImageCreateBased(src_image)
+  im.ProcessArithmeticConstOp(src_image, src_const, dst_image, op)
+  return dst_image
+end
+
+TwoSourcesOneDest("ProcessBlendConst")
+ThreeSourcesOneDest("ProcessBlend")
+OneSourceTwoDests("ProcessSplitComplex")
+TwoSourcesOneDest("ProcessMergeComplex", nil, nil, nil, im.CFLOAT)
+
+function im.ProcessMultipleMeanNew (src_image_list, dst_image)
+  local dst_image = im.ImageCreateBased(src_image_list[1])
+  im.ProcessMultipleMean(src_image_list, dst_image)
+  return dst_image
+end
+
+function im.ProcessMultipleStdDevNew (src_image_list, mean_image)
+  local dst_image = im.ImageCreateBased(src_image_list[1])
+  im.ProcessMultipleStdDev(src_image_list, mean_image, dst_image)
+  return dst_image
+end
+
+TwoSourcesOneDest("ProcessAutoCovariance")
+OneSourceOneDest("ProcessMultiplyConj")
+OneSourceOneDest("ProcessQuantizeRGBUniform", nil, nil, im.MAP, nil)
+OneSourceOneDest("ProcessQuantizeGrayUniform")
+OneSourceOneDest("ProcessExpandHistogram")
+OneSourceOneDest("ProcessEqualizeHistogram")
+
+function im.ProcessSplitYChromaNew (src_image)
+  local y_image = im.ImageCreateBased(src_image, nil, nil, im.GRAY, im.BYTE)
+  local chroma_image = im.ImageCreateBased(src_image, nil, nil, im.RGB, im.BYTE)
+  im.ProcessSplitYChroma(src_image, y_image, chroma_image)
+  return y_image, chroma_image
+end
+
+OneSourceThreeDests("ProcessSplitHSI", nil, nil, im.GRAY, im.FLOAT)
+ThreeSourcesOneDest("ProcessMergeHSI", nil, nil, im.RGB, im.BYTE)
+
+function im.ProcessSplitComponentsNew (src_image)
+  local depth = src_image:Depth()
+  local dst_images = {}
+  for i = 1, depth do
+    table.insert(dst_images, im.ImageCreateBased(src_image, nil, nil, im.GRAY))
+  end
+  im.ProcessSplitComponents(src_image, dst_images)
+  return unpack(dst_images)
+end
+
+function im.ProcessMergeComponentsNew (src_image_list)
+  local dst_image = im.ImageCreateBased(src_image_list[1], nil, nil, im.RGB)
+  im.ProcessMergeComponents(src_image_list, dst_image)
+  return dst_image
+end
+
+OneSourceOneDest("ProcessNormalizeComponents", nil, nil, nil, im.FLOAT)
+OneSourceOneDest("ProcessReplaceColor")
+TwoSourcesOneDest("ProcessBitwiseOp")
+OneSourceOneDest("ProcessBitwiseNot")
+OneSourceOneDest("ProcessBitMask")
+OneSourceOneDest("ProcessBitPlane")
+OneSourceOneDest("ProcessToneGamut")
+OneSourceOneDest("ProcessUnNormalize", nil, nil, nil, im.BYTE)
+OneSourceOneDest("ProcessDirectConv", nil, nil, nil, im.BYTE)
+OneSourceOneDest("ProcessNegative")
+OneSourceOneDest("ProcessRangeContrastThreshold", nil, nil, im.BINARY, nil)
+OneSourceOneDest("ProcessLocalMaxThreshold", nil, nil, im.BINARY, nil)
+OneSourceOneDest("ProcessThreshold", nil, nil, im.BINARY, nil)
+TwoSourcesOneDest("ProcessThresholdByDiff")
+OneSourceOneDest("ProcessHysteresisThreshold", nil, nil, im.BINARY, nil)
+OneSourceOneDest("ProcessUniformErrThreshold", nil, nil, im.BINARY, nil)
+OneSourceOneDest("ProcessDifusionErrThreshold")
+OneSourceOneDest("ProcessPercentThreshold")
+OneSourceOneDest("ProcessOtsuThreshold")
+OneSourceOneDest("ProcessMinMaxThreshold", nil, nil, im.BINARY, nil)
+OneSourceOneDest("ProcessSliceThreshold", nil, nil, im.BINARY, nil)
+OneSourceOneDest("ProcessPixelate")
+OneSourceOneDest("ProcessPosterize")
+
diff --git a/src/lua5/imlua.c b/src/lua5/imlua.c
new file mode 100644
index 0000000..7d39ee7
--- /dev/null
+++ b/src/lua5/imlua.c
@@ -0,0 +1,246 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#include "im.h"
+#include "im_lib.h"
+#include "im_image.h"
+#include "im_convert.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+#include "imlua_palette.h"
+
+/*****************************************************************************\
+ im.Version()
+\*****************************************************************************/
+static int imluaVersion (lua_State *L)
+{
+  lua_pushstring(L, imVersion());
+  return 1;
+}
+
+/*****************************************************************************\
+ im.VersionDate()
+\*****************************************************************************/
+static int imluaVersionDate (lua_State *L)
+{
+  lua_pushstring(L, imVersionDate());
+  return 1;
+}
+
+/*****************************************************************************\
+ im.VersionNumber()
+\*****************************************************************************/
+static int imluaVersionNumber (lua_State *L)
+{
+  lua_pushnumber(L, imVersionNumber());
+  return 1;
+}
+
+/*****************************************************************************\
+ im.FormatList()
+\*****************************************************************************/
+static int imluaFormatList (lua_State *L)
+{
+  int i, format_count;
+  char *format_list[50];
+
+  imFormatList(format_list, &format_count);
+
+  lua_newtable(L);
+  for (i = 0; i < format_count; i++)
+  {
+    lua_pushstring(L, format_list[i]);
+    lua_settable(L, -2);
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.FormatInfo(format)
+\*****************************************************************************/
+static int imluaFormatInfo (lua_State *L)
+{
+  char desc[50];
+  char ext[50];
+  int can_sequence;
+  int error;
+
+  error = imFormatInfo(luaL_checkstring(L, 1), desc, ext, &can_sequence);
+
+  imlua_pusherror(L, error);
+  if (error)
+    return 1;
+
+  lua_pushstring(L, desc);
+  lua_pushstring(L, ext);
+  lua_pushboolean(L, can_sequence);
+
+  return 4;
+}
+
+/*****************************************************************************\
+ im.FormatCompressions(format)
+\*****************************************************************************/
+static int imluaFormatCompressions (lua_State *L)
+{
+  int i, comp_count;
+  int error;
+  char *comp[50];
+
+  int color_mode = luaL_optint(L, 2, -1);
+  int data_type = luaL_optint(L, 3, -1);
+
+  error = imFormatCompressions(luaL_checkstring(L, 1), comp, &comp_count, color_mode, data_type);
+
+  imlua_pusherror(L, error);
+  if (error)
+    return 1;
+
+  lua_newtable(L);
+  for (i = 0; i < comp_count; i++)
+  {
+    lua_pushstring(L, comp[i]);
+    lua_settable(L, -2);
+  }
+
+  return 2;
+}
+
+/*****************************************************************************\
+ im.FormatCanWriteImage(format, compression, color_mode, data_type)
+\*****************************************************************************/
+static int imluaFormatCanWriteImage (lua_State *L)
+{
+  const char *format = luaL_checkstring(L, 1);
+  const char *compression = luaL_checkstring(L, 2);
+  int color_mode = luaL_checkint(L, 3);
+  int data_type = luaL_checkint(L, 4);
+
+  lua_pushboolean(L, imFormatCanWriteImage(format, compression, color_mode, data_type));
+  return 1;
+}
+
+/*****************************************************************************\
+ Constants
+\*****************************************************************************/
+static const imlua_constant im_constants[] = {
+
+  { "BYTE", IM_BYTE, NULL },
+  { "USHORT", IM_USHORT, NULL },
+  { "INT", IM_INT, NULL },
+  { "FLOAT", IM_FLOAT, NULL },
+  { "CFLOAT", IM_CFLOAT, NULL },
+
+  { "RGB", IM_RGB, NULL },
+  { "MAP", IM_MAP, NULL },
+  { "GRAY", IM_GRAY, NULL },
+  { "BINARY", IM_BINARY, NULL },
+  { "CMYK", IM_CMYK, NULL },
+  { "YCBCR", IM_YCBCR, NULL },
+  { "LAB", IM_LAB, NULL },
+  { "LUV", IM_LUV, NULL },
+  { "XYZ", IM_XYZ, NULL },
+
+  { "ALPHA", IM_ALPHA, NULL },
+  { "PACKED", IM_PACKED, NULL },
+  { "TOPDOWN", IM_TOPDOWN, NULL },
+
+  { "ERR_NONE", IM_ERR_NONE, NULL },
+  { "ERR_OPEN", IM_ERR_OPEN, NULL }, 
+  { "ERR_ACCESS", IM_ERR_ACCESS, NULL }, 
+  { "ERR_FORMAT", IM_ERR_FORMAT, NULL }, 
+  { "ERR_DATA", IM_ERR_DATA, NULL }, 
+  { "ERR_COMPRESS", IM_ERR_COMPRESS, NULL }, 
+  { "ERR_MEM", IM_ERR_MEM, NULL }, 
+  { "ERR_COUNTER", IM_ERR_COUNTER, NULL }, 
+
+  { "CPX_REAL", IM_CPX_REAL, NULL },
+  { "CPX_IMAG", IM_CPX_IMAG, NULL },
+  { "CPX_MAG", IM_CPX_MAG, NULL },
+  { "CPX_PHASE", IM_CPX_PHASE, NULL },
+
+  { "GAMMA_LINEAR", IM_GAMMA_LINEAR, NULL },
+  { "GAMMA_LOGLITE", IM_GAMMA_LOGLITE, NULL },
+  { "GAMMA_LOGHEAVY", IM_GAMMA_LOGHEAVY, NULL },
+  { "GAMMA_EXPLITE", IM_GAMMA_EXPLITE, NULL },
+  { "GAMMA_EXPHEAVY", IM_GAMMA_EXPHEAVY, NULL },
+
+  { "CAST_MINMAX", IM_CAST_MINMAX, NULL },
+  { "CAST_FIXED", IM_CAST_FIXED, NULL },
+  { "CAST_DIRECT", IM_CAST_DIRECT, NULL },
+
+  { "_AUTHOR",  0, IM_AUTHOR },
+  { "_COPYRIGHT",  0, IM_COPYRIGHT },
+  { "_VERSION",  0, IM_VERSION },
+  { "_VERSION_NUMBER",  IM_VERSION_NUMBER, NULL },
+  { "_VERSION_DATE",  0, IM_VERSION_DATE },
+  { "_DESCRIPTION",  0, IM_DESCRIPTION },
+  { "_NAME",  0, IM_NAME },
+
+  { NULL, -1, NULL },
+};
+
+void imlua_regconstants (lua_State *L, const imlua_constant *imconst)
+{
+  const imlua_constant *l = imconst;
+  for (; l->name; l++) 
+  {
+    lua_pushstring(L, l->name);
+    if (l->str_value)
+      lua_pushstring(L, l->str_value);
+    else
+      lua_pushnumber(L, l->value);
+    lua_settable(L, -3);
+  }
+}
+
+static const luaL_reg im_lib[] = {
+  {"Version", imluaVersion},
+  {"VersionDate", imluaVersionDate},
+  {"VersionNumber", imluaVersionNumber},
+
+  {"FormatList", imluaFormatList},
+  {"FormatInfo", imluaFormatInfo},
+  {"FormatCompressions", imluaFormatCompressions},
+  {"FormatCanWriteImage", imluaFormatCanWriteImage},
+
+  {NULL, NULL}
+};
+
+int imlua_open (lua_State *L)
+{
+  luaL_register(L, "im", im_lib);   /* leave "im" table at the top of the stack */
+  imlua_regconstants(L, im_constants);
+
+  imlua_open_file(L);
+  imlua_open_image(L);
+  imlua_open_convert(L);
+  imlua_open_util(L);
+  imlua_open_palette(L);
+
+  return 1;
+}
+
+int luaopen_imlua(lua_State *L)
+{
+  return imlua_open(L);
+}
+
+int luaopen_imlua51(lua_State *L)
+{
+  return imlua_open(L);
+}
diff --git a/src/lua5/imlua.def b/src/lua5/imlua.def
new file mode 100644
index 0000000..259c822
--- /dev/null
+++ b/src/lua5/imlua.def
@@ -0,0 +1,24 @@
+EXPORTS
+  luaopen_imlua
+  luaopen_imlua51
+  imlua_open
+  imlua_checkimage
+  imlua_newarrayulong
+  imlua_newarrayint
+  imlua_newarrayfloat
+  imlua_checkdatatype
+  imlua_checkcolorspace
+  imlua_toarrayint
+  imlua_toarrayfloat
+  imlua_toarrayulong 
+  imlua_getn
+  imlua_checktype
+  imlua_checkmask
+  imlua_regconstants
+  imlua_pushimage
+  imlua_matchcolor
+  imlua_matchsize
+  imlua_matchdatatype
+  imlua_matchcolorspace
+  imlua_match
+  
\ No newline at end of file
diff --git a/src/lua5/imlua_aux.c b/src/lua5/imlua_aux.c
new file mode 100644
index 0000000..d5df8ac
--- /dev/null
+++ b/src/lua5/imlua_aux.c
@@ -0,0 +1,255 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_aux.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <memory.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_util.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+
+/*****************************************************************************\
+\*****************************************************************************/
+int imlua_getn (lua_State *L, int index)
+{
+  int n;
+  lua_pushstring(L, "table");
+  lua_gettable(L, LUA_GLOBALSINDEX);
+  lua_pushstring(L, "getn");
+  lua_gettable(L, -2);
+  lua_pushvalue(L, index);
+  lua_call(L, 1, 1);
+  n = luaL_checkint(L, -1);
+  lua_pop(L, 2);
+  return n;
+}
+
+/*****************************************************************************\
+ Creates an int array.
+\*****************************************************************************/
+int imlua_newarrayint (lua_State *L, int *value, int count, int start)
+{
+  int i;
+  lua_newtable(L);
+  for (i = 0; i < count; i++)
+  {
+    lua_pushnumber(L, value[i]);
+    lua_rawseti(L, -2, i+start);
+  }
+  return 1;
+}
+
+/*****************************************************************************\
+ Creates an unsigned long array.
+\*****************************************************************************/
+int imlua_newarrayulong (lua_State *L, unsigned long *value, int count, int start)
+{
+  int i;
+  lua_newtable(L);
+  for (i = 0; i < count; i++)
+  {
+    lua_pushnumber(L, value[i]);
+    lua_rawseti(L, -2, i+start);
+  }
+  return 1;
+}
+
+/*****************************************************************************\
+ Creates a float array.
+\*****************************************************************************/
+int imlua_newarrayfloat (lua_State *L, float *value, int count, int start)
+{
+  int i;
+  lua_newtable(L);
+  for (i = 0; i < count; i++)
+  {
+    lua_pushnumber(L, value[i]);
+    lua_rawseti(L, -2, i+start);
+  }
+  return 1;
+}
+
+/*****************************************************************************\
+ Retrieve an int array.
+\*****************************************************************************/
+int *imlua_toarrayint (lua_State *L, int index, int *count, int start)
+{
+  int i, n;
+  int *value = NULL;
+
+  if (lua_istable(L, index))
+  {
+    n = imlua_getn(L, index);
+    if (start == 0) n++;
+    if (count) *count = n;
+
+    value = (int*) malloc (sizeof(int) * n);
+    for (i = 0; i < n; i++)
+    {
+      lua_rawgeti(L, index, i+start);
+      value[i] = luaL_checkint(L, -1);
+      lua_pop(L, 1);
+    }
+  }
+  return value;
+}
+
+/*****************************************************************************\
+ Retrieve an ulong array.
+\*****************************************************************************/
+unsigned long *imlua_toarrayulong (lua_State *L, int index, int *count, int start)
+{
+  int i, n;
+  unsigned long *value = NULL;
+
+  if (lua_istable(L, index))
+  {
+    n = imlua_getn(L, index);
+    if (start == 0) n++;
+    if (count) *count = n;
+
+    value = (unsigned long*) malloc (sizeof(unsigned long) * n);
+    for (i = 0; i < n; i++)
+    {
+      lua_rawgeti(L, index, i+start);
+      value[i] = luaL_checkint(L, -1);
+      lua_pop(L, 1);
+    }
+  }
+  return value;
+}
+
+/*****************************************************************************\
+ Retrieve a float array.
+\*****************************************************************************/
+float *imlua_toarrayfloat (lua_State *L, int index, int *count, int start)
+{
+  int i, n;
+  float *value = NULL;
+
+  if (lua_istable(L, index))
+  {
+    n = imlua_getn(L, index);
+    if (start == 0) n++;
+    if (count) *count = n;
+
+    value = (float*) malloc (sizeof(float) * n);
+    for (i = 0; i < n; i++)
+    {
+      lua_rawgeti(L, index, i+start);
+      value[i] = (float) luaL_checknumber(L, -1);
+      lua_pop(L, 1);
+    }
+  }
+  return value;
+}
+
+
+/*****************************************************************************\
+ Creates a bit mask based on a string formatted as "11000110".
+\*****************************************************************************/
+unsigned char imlua_checkmask (lua_State *L, int index)
+{
+  int i;
+  unsigned char mask = 0;
+  const char *str = luaL_checkstring(L, index);
+  if (strlen(str) != 8)
+    luaL_argerror(L, index, "invalid mask, must have 8 elements");
+
+  for (i = 0; i < 8; i++)
+  {
+    char c = str[i];
+    if (c != '0' && c != '1')
+      luaL_argerror(L, index, "invalid mask, must have 0s or 1s only");
+
+    mask |= (c - '0') << (7 - i);
+  }
+
+  return mask;
+}
+
+/*****************************************************************************\
+ Checks data_type and color_space of an image. If it doesn't match throw a lua error.
+\*****************************************************************************/
+void imlua_checktype (lua_State *L, int index, imImage *image, int color_space, int data_type)
+{
+  if (image->data_type != data_type)
+  {
+    char msg[100] = "image data type must be ";
+    strcat(msg, imDataTypeName(data_type));
+    luaL_argerror(L, index, msg);
+  }
+
+  if (image->color_space != color_space)
+  {
+    char msg[100] = "image color space must be ";
+    strcat(msg, imColorModeSpaceName(color_space));
+    luaL_argerror(L, index, msg);
+  }
+}
+
+/*****************************************************************************\
+ Checks color_space of an image. If it doesn't match throw a lua error.
+\*****************************************************************************/
+void imlua_checkcolorspace (lua_State *L, int index, imImage *image, int color_space)
+{
+  if (image->color_space != color_space)
+  {
+    char msg[100] = "image color space must be ";
+    strcat(msg, imColorModeSpaceName(color_space));
+    luaL_argerror(L, index, msg);
+  }
+}
+
+/*****************************************************************************\
+ Checks a data_type of an image. If it doesn't match throw a lua error.
+\*****************************************************************************/
+void imlua_checkdatatype (lua_State *L, int index, imImage *image, int data_type)
+{
+  if (image->data_type != data_type)
+  {
+    char msg[100] = "image data type must be ";
+    strcat(msg, imDataTypeName(data_type));
+    luaL_argerror(L, index, msg);
+  }
+}
+
+/*****************************************************************************\
+ Checks if the size of the two images are equal. If it doesn't match throw a lua error.
+\*****************************************************************************/
+void imlua_matchsize(lua_State *L, imImage *image1, imImage *image2)
+{
+  imlua_matchcheck(L, imImageMatchSize(image1, image2), "images must have the same size");
+}
+
+void imlua_matchcolor(lua_State *L, imImage *image1, imImage *image2)
+{
+  imlua_matchcheck(L, imImageMatchColor(image1, image2), "images must have the same data type and color space");
+}
+
+void imlua_matchdatatype(lua_State *L, imImage *image1, imImage *image2)
+{
+  imlua_matchcheck(L, imImageMatchDataType(image1, image2), "images must have the same size and data type");
+}
+
+void imlua_matchcolorspace(lua_State *L, imImage *image1, imImage *image2)
+{
+  imlua_matchcheck(L, imImageMatchColorSpace(image1, image2), "images must have the same size and color space");
+}
+
+void imlua_match(lua_State *L, imImage *image1, imImage *image2)
+{
+  imlua_matchcheck(L, imImageMatch(image1, image2), "images must have the same size, data type and color space");
+}
diff --git a/src/lua5/imlua_aux.h b/src/lua5/imlua_aux.h
new file mode 100644
index 0000000..2dc4466
--- /dev/null
+++ b/src/lua5/imlua_aux.h
@@ -0,0 +1,82 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_aux.h,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#ifndef __IMLUA_AUX_H
+#define __IMLUA_AUX_H
+
+#if	defined(__cplusplus)
+extern "C" {
+#endif
+
+
+/********************************/
+/* exported from "imlua_aux.c". */
+/********************************/
+
+/* get table size */
+
+int imlua_getn(lua_State *L, int index);
+
+/* array */
+
+int imlua_newarrayint(lua_State *L, int *value, int count, int start);
+int imlua_newarrayulong(lua_State *L, unsigned long *value, int count, int start);
+int imlua_newarrayfloat(lua_State *L, float *value, int count, int start);
+
+int *imlua_toarrayint(lua_State *L, int index, int *count, int start);
+unsigned long *imlua_toarrayulong (lua_State *L, int index, int *count, int start);
+float *imlua_toarrayfloat(lua_State *L, int index, int *count, int start);
+
+/* other parameter checking */
+
+unsigned char imlua_checkmask(lua_State *L, int index);
+
+void imlua_checktype(lua_State *L, int index, imImage *image, int color_space, int data_type);
+void imlua_checkdatatype(lua_State *L, int index, imImage *image, int data_type);
+void imlua_checkcolorspace(lua_State *L, int index, imImage *image, int color_space);
+
+void imlua_matchsize(lua_State *L, imImage *image1, imImage *image2);
+void imlua_matchcolor(lua_State *L, imImage *image1, imImage *image2);
+void imlua_matchdatatype(lua_State *L, imImage *image1, imImage *image2);
+void imlua_matchcolorspace(lua_State *L, imImage *image1, imImage *image2);
+void imlua_match(lua_State *L, imImage *image1, imImage *image2);
+
+/* used only when comparing two images */
+#define imlua_matchcheck(L, cond, extramsg) if (!(cond)) \
+                                               luaL_error(L, extramsg)
+
+#define imlua_pusherror(L, _e) ((_e == IM_ERR_NONE)? lua_pushnil(L): lua_pushnumber(L, _e))
+
+
+/********************************/
+/* exported from "imlua.c".     */
+/********************************/
+
+/* constant registration. */
+
+typedef struct _imlua_constant {
+  const char *name;
+  lua_Number value;
+  const char *str_value;
+} imlua_constant;
+
+void imlua_regconstants(lua_State *L, const imlua_constant *imconst);
+
+
+/********************************/
+/* private module open          */
+/********************************/
+
+void imlua_open_convert(lua_State *L);  /* imlua_convert.c */
+void imlua_open_util(lua_State *L);     /* imlua_util.c    */
+void imlua_open_file(lua_State *L);     /* imlua_file.c    */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/lua5/imlua_avi.c b/src/lua5/imlua_avi.c
new file mode 100644
index 0000000..f2cd7f4
--- /dev/null
+++ b/src/lua5/imlua_avi.c
@@ -0,0 +1,44 @@
+/** \file
+ * \brief AVI format Lua 5 Binding
+ *
+ * See Copyright Notice in cd.h
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "im_format_avi.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+
+static int imlua_FormatRegisterAVI(lua_State *L)
+{
+  (void)L;
+  imFormatRegisterAVI();
+  return 0;
+}
+
+static const struct luaL_reg imlib[] = {
+  {"FormatRegisterAVI", imlua_FormatRegisterAVI},
+  {NULL, NULL},
+};
+
+
+static int imlua_avi_open (lua_State *L)
+{
+  imFormatRegisterAVI();
+  luaL_register(L, "im", imlib);   /* leave "im" table at the top of the stack */
+  return 1;
+}
+
+int luaopen_imlua_avi(lua_State* L)
+{
+  return imlua_avi_open(L);
+}
+
+int luaopen_imlua_avi51(lua_State* L)
+{
+  return imlua_avi_open(L);
+}
diff --git a/src/lua5/imlua_avi.def b/src/lua5/imlua_avi.def
new file mode 100644
index 0000000..3086a0d
--- /dev/null
+++ b/src/lua5/imlua_avi.def
@@ -0,0 +1,4 @@
+EXPORTS
+  luaopen_imlua_avi
+  luaopen_imlua_avi51
+ 
\ No newline at end of file
diff --git a/src/lua5/imlua_capture.c b/src/lua5/imlua_capture.c
new file mode 100644
index 0000000..15d52ce
--- /dev/null
+++ b/src/lua5/imlua_capture.c
@@ -0,0 +1,421 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_capture.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <string.h>
+#include <memory.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_capture.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+
+
+
+static imVideoCapture** imlua_rawcheckvideocapture (lua_State *L, int param)
+{
+  return (imVideoCapture**)luaL_checkudata(L, param, "imVideoCapture");
+}
+
+static imVideoCapture* imlua_checkvideocapture (lua_State *L, int param)
+{
+  imVideoCapture** vc_p = imlua_rawcheckvideocapture(L, param);
+
+  if (!(*vc_p))
+    luaL_argerror(L, param, "destroyed imVideoCapture");
+
+  return *vc_p;
+}
+
+static void imlua_pushvideocapture(lua_State *L, imVideoCapture* vc)
+{
+  if (!vc)
+    lua_pushnil(L);
+  else
+  {
+    imVideoCapture** vc_p = (imVideoCapture**) lua_newuserdata(L, sizeof(imVideoCapture*));
+    *vc_p = vc;
+    luaL_getmetatable(L, "imVideoCapture");
+    lua_setmetatable(L, -2);
+  }
+}
+
+/*****************************************************************************\
+ im.VideoCaptureDeviceCount()
+\*****************************************************************************/
+static int imluaVideoCaptureDeviceCount (lua_State *L)
+{
+  lua_pushnumber(L, imVideoCaptureDeviceCount());
+  return 1;
+}
+
+/*****************************************************************************\
+ im.VideoCaptureDeviceDesc(device)
+\*****************************************************************************/
+static int imluaVideoCaptureDeviceDesc (lua_State *L)
+{
+  lua_pushstring(L, imVideoCaptureDeviceDesc(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.VideoCaptureDeviceDesc(device)
+\*****************************************************************************/
+static int imluaVideoCaptureReloadDevices (lua_State *L)
+{
+  lua_pushnumber(L, imVideoCaptureReloadDevices());
+  return 1;
+}
+
+/*****************************************************************************\
+ im.VideoCaptureCreate()
+\*****************************************************************************/
+static int imluaVideoCaptureCreate (lua_State *L)
+{
+  imlua_pushvideocapture(L, imVideoCaptureCreate());
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:Connect([device])
+\*****************************************************************************/
+static int imluaVideoCaptureConnect (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int device = luaL_optint(L, 2, -1);
+  lua_pushnumber(L, imVideoCaptureConnect(vc, device));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:Disconnect()
+\*****************************************************************************/
+static int imluaVideoCaptureDisconnect (lua_State *L)
+{
+  imVideoCaptureDisconnect(imlua_checkvideocapture(L, 1));
+  return 0;
+}
+
+/*****************************************************************************\
+ vc:DialogCount()
+\*****************************************************************************/
+static int imluaVideoCaptureDialogCount (lua_State *L)
+{
+  lua_pushnumber(L, imVideoCaptureDialogCount(imlua_checkvideocapture(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:ShowDialog()
+\*****************************************************************************/
+static int imluaVideoCaptureShowDialog (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int dialog = luaL_checkint(L, 2);
+  void *parent = lua_touserdata(L, 3); 
+
+  lua_pushnumber(L, imVideoCaptureShowDialog(vc, dialog, parent));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:DialogDesc()
+\*****************************************************************************/
+static int imluaVideoCaptureDialogDesc (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int dialog = luaL_checkint(L, 2);
+
+  lua_pushstring(L, imVideoCaptureDialogDesc(vc, dialog));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:FormatCount()
+\*****************************************************************************/
+static int imluaVideoCaptureFormatCount (lua_State *L)
+{
+  lua_pushnumber(L, imVideoCaptureFormatCount(imlua_checkvideocapture(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:GetFormat()
+\*****************************************************************************/
+static int imluaVideoCaptureGetFormat (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int format = luaL_checkint(L, 2);
+  int width, height;
+  char desc[10];
+
+  lua_pushnumber(L, imVideoCaptureGetFormat(vc, format, &width, &height, desc));
+  lua_pushnumber(L, width);
+  lua_pushnumber(L, height);
+  lua_pushstring(L, desc);
+
+  return 4;
+}
+
+/*****************************************************************************\
+ vc:GetImageSize()
+\*****************************************************************************/
+static int imluaVideoCaptureGetImageSize (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int width, height;
+
+  imVideoCaptureGetImageSize(vc, &width, &height);
+  lua_pushnumber(L, width);
+  lua_pushnumber(L, height);
+
+  return 2;
+}
+
+/*****************************************************************************\
+ vc:SetImageSize()
+\*****************************************************************************/
+static int imluaVideoCaptureSetImageSize (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int width = luaL_checkint(L, 2);
+  int height = luaL_checkint(L, 3);
+
+  lua_pushnumber(L, imVideoCaptureSetImageSize(vc, width, height));
+
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:SetFormat()
+\*****************************************************************************/
+static int imluaVideoCaptureSetFormat (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int format = luaL_checkint(L, 2);
+
+  lua_pushnumber(L, imVideoCaptureSetFormat(vc, format));
+
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:ResetAttribute(attrib, fauto)
+\*****************************************************************************/
+static int imluaVideoCaptureResetAttribute (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+  int fauto = luaL_checkint(L, 3);
+
+  lua_pushnumber(L, imVideoCaptureResetAttribute(vc, attrib, fauto));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:SetAttribute(attrib, percent)
+\*****************************************************************************/
+static int imluaVideoCaptureSetAttribute (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+  float percent = (float) luaL_checknumber(L, 3);
+
+  lua_pushnumber(L, imVideoCaptureSetAttribute(vc, attrib, percent));
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:GetAttribute(attrib)
+\*****************************************************************************/
+static int imluaVideoCaptureGetAttribute (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+  float percent;
+
+  lua_pushnumber(L, imVideoCaptureGetAttribute(vc, attrib, &percent));
+  lua_pushnumber(L, percent);
+  return 2;
+}
+
+/*****************************************************************************\
+ vc:GetAttributeList()
+\*****************************************************************************/
+static int imluaVideoCaptureGetAttributeList (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int num_attrib;
+  const char **attribs;
+  int i;
+
+  attribs = imVideoCaptureGetAttributeList(vc, &num_attrib);
+  lua_newtable(L);
+  for (i = 0; i < num_attrib; i++)
+  {
+    lua_pushstring(L, attribs[i]);
+    lua_rawseti(L, -2, i + 1);
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:Frame(image)
+\*****************************************************************************/
+static int imluaVideoCaptureFrame (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  imImage *image = imlua_checkimage(L, 2);
+  int timeout = luaL_checkint(L, 3);
+
+  if (!(image->color_space == IM_RGB || image->color_space == IM_GRAY))
+    luaL_argerror(L, 2, "image must be of RGB or Gray color spaces");
+  imlua_checkdatatype(L, 2, image, IM_BYTE);
+
+  lua_pushnumber(L, imVideoCaptureFrame(vc, image->data[0], image->color_space, timeout));
+
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:OneFrame(image)
+\*****************************************************************************/
+static int imluaVideoCaptureOneFrame (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  imImage *image = imlua_checkimage(L, 2);
+
+  if (!(image->color_space == IM_RGB || image->color_space == IM_GRAY))
+    luaL_argerror(L, 2, "image must be of RGB or Gray color spaces");
+  imlua_checkdatatype(L, 2, image, IM_BYTE);
+
+  lua_pushnumber(L, imVideoCaptureOneFrame(vc, image->data[0], image->color_space));
+
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:Live(image)
+\*****************************************************************************/
+static int imluaVideoCaptureLive (lua_State *L)
+{
+  imVideoCapture *vc = imlua_checkvideocapture(L, 1);
+  int live = luaL_checkint(L, 2);
+
+  lua_pushnumber(L, imVideoCaptureLive(vc, live));
+
+  return 1;
+}
+
+/*****************************************************************************\
+ vc:Destroy()
+\*****************************************************************************/
+static int imluaVideoCaptureDestroy (lua_State *L)
+{
+  imVideoCapture **vc_p = imlua_rawcheckvideocapture(L, 1);
+  if (!(*vc_p))
+    luaL_argerror(L, 1, "destroyed imVideoCapture");
+
+  imVideoCaptureDestroy(*vc_p);
+  *vc_p = NULL;  /* mark as destroyed */
+
+  return 0;
+}
+
+/*****************************************************************************\
+ gc
+\*****************************************************************************/
+static int imluaVideoCapture_gc (lua_State *L)
+{
+  imVideoCapture **vc_p = (imVideoCapture **)lua_touserdata(L, 1);
+  if (*vc_p)
+  {
+    imVideoCaptureDestroy(*vc_p);
+    *vc_p = NULL;  /* mark as destroyed */
+  }
+  return 0;
+}
+
+/*****************************************************************************\
+ tostring
+\*****************************************************************************/
+static int imluaVideoCapture_tostring (lua_State *L)
+{
+  imVideoCapture **vc_p = (imVideoCapture **)lua_touserdata(L, 1);
+  lua_pushfstring(L, "imVideoCapture (%p)%s", vc_p, (*vc_p)? "": "-destroyed");
+  return 1;
+}
+
+static const luaL_reg imcapture_lib[] = {
+  {"VideoCaptureDeviceCount", imluaVideoCaptureDeviceCount},
+  {"VideoCaptureDeviceDesc", imluaVideoCaptureDeviceDesc},
+  {"VideoCaptureReloadDevices", imluaVideoCaptureReloadDevices},
+  {"VideoCaptureCreate", imluaVideoCaptureCreate},
+  {"VideoCaptureDestroy", imluaVideoCaptureDestroy},
+  {NULL, NULL}
+};
+
+static const luaL_reg imcapture_metalib[] = {
+  {"Destroy", imluaVideoCaptureDestroy},
+  {"Connect", imluaVideoCaptureConnect},
+  {"Disconnect", imluaVideoCaptureDisconnect},
+  {"DialogCount", imluaVideoCaptureDialogCount},
+  {"ShowDialog", imluaVideoCaptureShowDialog},
+  {"DialogDesc", imluaVideoCaptureDialogDesc},
+  {"FormatCount", imluaVideoCaptureFormatCount},
+  {"GetFormat", imluaVideoCaptureGetFormat},
+  {"SetFormat", imluaVideoCaptureSetFormat},
+  {"GetImageSize", imluaVideoCaptureGetImageSize},
+  {"SetImageSize", imluaVideoCaptureSetImageSize},
+  {"ResetAttribute", imluaVideoCaptureResetAttribute},
+  {"GetAttribute", imluaVideoCaptureGetAttribute},
+  {"SetAttribute", imluaVideoCaptureSetAttribute},
+  {"GetAttributeList", imluaVideoCaptureGetAttributeList},
+  {"OneFrame", imluaVideoCaptureOneFrame},
+  {"Frame", imluaVideoCaptureFrame},
+  {"Live", imluaVideoCaptureLive},
+
+  {"__gc", imluaVideoCapture_gc},
+  {"__tostring", imluaVideoCapture_tostring},
+
+  {NULL, NULL}
+};
+
+static void createmeta (lua_State *L) 
+{
+  /* Object Oriented Access */
+  luaL_newmetatable(L, "imVideoCapture");  /* create new metatable for imVideoCapture handle */
+  lua_pushliteral(L, "__index");
+  lua_pushvalue(L, -2);  /* push metatable */
+  lua_rawset(L, -3);  /* metatable.__index = metatable */
+  luaL_register(L, NULL, imcapture_metalib);  /* register methods */
+  lua_pop(L, 1);  /* removes the metatable from the top of the stack */
+}
+
+int imlua_open_capture(lua_State *L)
+{
+  createmeta(L);
+  luaL_register(L, "im", imcapture_lib);  /* leave "im" table at the top of the stack */
+  return 1;
+}
+
+int luaopen_imlua_capture(lua_State *L)
+{
+  return imlua_open_capture(L);
+}
+
+int luaopen_imlua_capture51(lua_State *L)
+{
+  return imlua_open_capture(L);
+}
+
diff --git a/src/lua5/imlua_capture.def b/src/lua5/imlua_capture.def
new file mode 100644
index 0000000..1b279cf
--- /dev/null
+++ b/src/lua5/imlua_capture.def
@@ -0,0 +1,5 @@
+EXPORTS
+  imlua_open_capture
+  luaopen_imlua_capture
+  luaopen_imlua_capture51
+  
\ No newline at end of file
diff --git a/src/lua5/imlua_convert.c b/src/lua5/imlua_convert.c
new file mode 100644
index 0000000..5ec73c9
--- /dev/null
+++ b/src/lua5/imlua_convert.c
@@ -0,0 +1,79 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_convert.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_image.h"
+#include "im_convert.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_image.h"
+#include "imlua_aux.h"
+
+/*****************************************************************************\
+ im.ConvertDataType(src_image, dst_image, cpx2real, gamma, abssolute, cast_mode)
+\*****************************************************************************/
+static int imluaConvertDataType (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int cpx2real = luaL_checkint(L, 3);
+  float gamma = (float) luaL_checknumber(L, 4);
+  int abssolute = luaL_checkint(L, 5);
+  int cast_mode = luaL_checkint(L, 6);
+
+  imlua_matchcolorspace(L, src_image, dst_image);
+  imlua_pusherror(L, imConvertDataType(src_image, dst_image, cpx2real, gamma, abssolute, cast_mode));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ConvertColorSpace(src_image, dst_image)
+\*****************************************************************************/
+static int imluaConvertColorSpace (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_matchdatatype(L, src_image, dst_image);
+  imlua_pusherror(L, imConvertColorSpace(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ConvertToBitmap(src_image, dst_image, cpx2real, gamma, abssolute, cast_mode)
+\*****************************************************************************/
+static int imluaConvertToBitmap (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int cpx2real = luaL_checkint(L, 3);
+  float gamma = (float) luaL_checknumber(L, 4);
+  int abssolute = luaL_checkint(L, 5);
+  int cast_mode = luaL_checkint(L, 6);
+
+  imlua_matchsize(L, src_image, dst_image);
+  imlua_matchcheck(L, imImageIsBitmap(dst_image), "image must be a bitmap");
+
+  imlua_pusherror(L, imConvertToBitmap(src_image, dst_image, cpx2real, gamma, abssolute, cast_mode));
+  return 1;
+}
+
+static const luaL_reg imconvert_lib[] = {
+  {"ConvertDataType", imluaConvertDataType},
+  {"ConvertColorSpace", imluaConvertColorSpace},
+  {"ConvertToBitmap", imluaConvertToBitmap},
+  {NULL, NULL}
+};
+
+void imlua_open_convert (lua_State *L)
+{
+  /* "im" table is at the top of the stack */
+  luaL_register(L, NULL, imconvert_lib);
+}
diff --git a/src/lua5/imlua_fftw.c b/src/lua5/imlua_fftw.c
new file mode 100644
index 0000000..0966572
--- /dev/null
+++ b/src/lua5/imlua_fftw.c
@@ -0,0 +1,162 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_fftw.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <memory.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_process.h"
+#include "im_util.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+
+
+/*****************************************************************************\
+ Domain Transform Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessFFT(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessFFT (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_matchsize(L, src_image, dst_image);
+  imlua_checkdatatype(L, 2, dst_image, IM_CFLOAT);
+
+  imProcessFFT(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessIFFT(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessIFFT (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_matchsize(L, src_image, dst_image);
+  imlua_checkdatatype(L, 1, src_image, IM_CFLOAT);
+  imlua_checkdatatype(L, 2, dst_image, IM_CFLOAT);
+
+  imProcessIFFT(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessFFTRaw(src_image, inverse, center, normalize)
+\*****************************************************************************/
+static int imluaProcessFFTraw (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  int inverse = luaL_checkint(L, 2);
+  int center = luaL_checkint(L, 3);
+  int normalize = luaL_checkint(L, 4);
+
+  imlua_checkdatatype(L, 1, src_image, IM_CFLOAT);
+
+  imProcessFFTraw(src_image, inverse, center, normalize);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessSwapQuadrants(src_image, inverse, center, normalize)
+\*****************************************************************************/
+static int imluaProcessSwapQuadrants (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  int center2origin = luaL_checkint(L, 2);
+
+  imlua_checkdatatype(L, 1, src_image, IM_CFLOAT);
+
+  imProcessSwapQuadrants(src_image, center2origin);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessCrossCorrelation(image1, image2, dst_image)
+\*****************************************************************************/
+static int imluaProcessCrossCorrelation (lua_State *L)
+{
+  imImage* image1 = imlua_checkimage(L, 1);
+  imImage* image2 = imlua_checkimage(L, 2);
+  imImage* dst_image = imlua_checkimage(L, 3);
+
+  imlua_matchsize(L, image1, dst_image);
+  imlua_matchsize(L, image2, dst_image);
+  imlua_checkdatatype(L, 3, dst_image, IM_CFLOAT);
+
+  imProcessCrossCorrelation(image1, image2, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessAutoCorrelation(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessAutoCorrelation (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_matchsize(L, src_image, dst_image);
+  imlua_checkdatatype(L, 2, dst_image, IM_CFLOAT);
+
+  imProcessAutoCorrelation(src_image, dst_image);
+  return 0;
+}
+
+static const luaL_reg imfftw_lib[] = {
+  {"ProcessFFT", imluaProcessFFT},
+  {"ProcessIFFT", imluaProcessIFFT},
+  {"ProcessFFTraw", imluaProcessFFTraw},
+  {"ProcessSwapQuadrants", imluaProcessSwapQuadrants},
+  {"ProcessCrossCorrelation", imluaProcessCrossCorrelation},
+  {"ProcessAutoCorrelation", imluaProcessAutoCorrelation},
+
+  {NULL, NULL}
+};
+
+int imlua_open_fftw (lua_State *L)
+{
+  luaL_register(L, "im", imfftw_lib);  /* leave "im" table at the top of the stack */
+#ifdef TEC_BIGENDIAN
+#ifdef TEC_64
+#include "im_fftw_be64.loh"
+#else
+#include "im_fftw_be32.loh"
+#endif  
+#else
+#ifdef TEC_64
+#ifdef WIN64
+#include "im_fftw_le64w.loh"
+#else
+#include "im_fftw_le64.loh"
+#endif  
+#else
+#include "im_fftw.loh"
+#endif  
+#endif  
+  return 1;
+}
+
+int luaopen_imlua_fftw(lua_State *L)
+{
+  return imlua_open_fftw(L);
+}
+
+int luaopen_imlua_fftw51(lua_State *L)
+{
+  return imlua_open_fftw(L);
+}
diff --git a/src/lua5/imlua_fftw.def b/src/lua5/imlua_fftw.def
new file mode 100644
index 0000000..216c967
--- /dev/null
+++ b/src/lua5/imlua_fftw.def
@@ -0,0 +1,4 @@
+EXPORTS
+  imlua_open_fftw
+  luaopen_imlua_fftw
+  luaopen_imlua_fftw51
\ No newline at end of file
diff --git a/src/lua5/imlua_file.c b/src/lua5/imlua_file.c
new file mode 100644
index 0000000..fd20c36
--- /dev/null
+++ b/src/lua5/imlua_file.c
@@ -0,0 +1,661 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_file.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#include "im.h"
+#include "im_raw.h"
+#include "im_image.h"
+#include "im_util.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+#include "imlua_palette.h"
+
+
+
+static imFile** imlua_rawcheckfile(lua_State *L, int param)
+{
+  return (imFile**)luaL_checkudata(L, param, "imFile");
+}
+
+static imFile* imlua_checkfile (lua_State *L, int param)
+{
+  imFile** ifile_p = imlua_rawcheckfile(L, param);
+
+  if (!(*ifile_p))
+    luaL_argerror(L, param, "closed imFile");
+
+  return *ifile_p;
+}
+
+static int imlua_pushifileerror(lua_State *L, imFile* ifile, int error)
+{
+  if (error)
+  {
+    lua_pushnil(L);
+    imlua_pusherror(L, error);
+    return 2;
+  }
+  else
+  {
+    imFile** ifile_p = (imFile**) lua_newuserdata(L, sizeof(imFile*));
+    *ifile_p = ifile;
+    luaL_getmetatable(L, "imFile");
+    lua_setmetatable(L, -2);
+    return 1;
+  }
+}
+
+
+/*****************************************************************************\
+ im.FileOpen(filename)
+\*****************************************************************************/
+static int imluaFileOpen (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  int error;
+  imFile *ifile = imFileOpen(filename, &error);
+  return imlua_pushifileerror(L, ifile, error);
+}
+
+/*****************************************************************************\
+ im.FileOpenAs(filename)
+\*****************************************************************************/
+static int imluaFileOpenAs (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  const char *format = luaL_checkstring(L, 2);
+  int error;
+  imFile *ifile = imFileOpenAs(filename, format, &error);
+  return imlua_pushifileerror(L, ifile, error);
+}
+
+/*****************************************************************************\
+ im.FileOpenRaw(filename)
+\*****************************************************************************/
+static int imluaFileOpenRaw (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  int error;
+  imFile *ifile = imFileOpenRaw(filename, &error);
+  return imlua_pushifileerror(L, ifile, error);
+}
+
+/*****************************************************************************\
+ im.FileNew(filename, format)
+\*****************************************************************************/
+static int imluaFileNew (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  const char *format = luaL_checkstring(L, 2);
+  int error;
+
+  imFile *ifile = imFileNew(filename, format, &error);
+  return imlua_pushifileerror(L, ifile, error);
+}
+
+/*****************************************************************************\
+ im.FileNewRaw(filename)
+\*****************************************************************************/
+static int imluaFileNewRaw (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  int error;
+  imFile *ifile = imFileNewRaw(filename, &error);
+  return imlua_pushifileerror(L, ifile, error);
+}
+
+/*****************************************************************************\
+ file:Handle()
+\*****************************************************************************/
+static int imluaFileHandle (lua_State *L)
+{
+  lua_pushlightuserdata(L, imFileHandle(imlua_checkfile(L, 1), luaL_checkint(L, 2)));
+  return 1;
+}
+
+/*****************************************************************************\
+ file:LoadImage()
+\*****************************************************************************/
+static int imluaFileLoadImage (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  int index = luaL_optint(L, 2, 0);
+  int error;
+  imImage *image = imFileLoadImage(ifile, index, &error);
+  return imlua_pushimageerror(L, image, error);
+}
+
+/*****************************************************************************\
+ file:LoadImageFrame()
+\*****************************************************************************/
+static int imluaFileLoadImageFrame (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  int index = luaL_checkint(L, 2);
+  imImage *image = imlua_checkimage(L, 3);
+  int error;
+
+  imFileLoadImageFrame(ifile, index, image, &error);
+  imlua_pusherror(L, error);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ file:LoadImageRegion()
+\*****************************************************************************/
+static int imluaFileLoadImageRegion (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  int index = luaL_checkint(L, 2);
+  int bitmap = luaL_checkint(L, 3);
+  int xmin = luaL_checkint(L, 4);
+  int xmax = luaL_checkint(L, 5);
+  int ymin = luaL_checkint(L, 6);
+  int ymax = luaL_checkint(L, 7);
+  int width = luaL_checkint(L, 8);
+  int height = luaL_checkint(L, 9);
+  int error;
+  imImage *image = imFileLoadImageRegion(ifile, index, bitmap, &error, xmin, xmax, ymin, ymax, width, height);
+  return imlua_pushimageerror(L, image, error);
+}
+
+/*****************************************************************************\
+ file:LoadBitmap()
+\*****************************************************************************/
+static int imluaFileLoadBitmap (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  int index = luaL_optint(L, 2, 0);
+  int error;
+  imImage *image = imFileLoadBitmap(ifile, index, &error);
+  return imlua_pushimageerror(L, image, error);
+}
+
+/*****************************************************************************\
+ file:LoadBitmapFrame()
+\*****************************************************************************/
+static int imluaFileLoadBitmapFrame (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  int index = luaL_checkint(L, 2);
+  imImage *image = imlua_checkimage(L, 3);
+  int error;
+
+  imFileLoadBitmapFrame(ifile, index, image, &error);
+  imlua_pusherror(L, error);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ file:SaveImage()
+\*****************************************************************************/
+static int imluaFileSaveImage (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  imImage *image = imlua_checkimage(L, 2);
+
+  imlua_pusherror(L, imFileSaveImage(ifile, image));
+  return 1;
+}
+
+/*****************************************************************************\
+ file:GetInfo()
+\*****************************************************************************/
+static int imluaFileGetInfo (lua_State *L)
+{
+  int image_count;
+  char format[10];
+  char compression[20];
+
+  imFile *ifile = imlua_checkfile(L, 1);
+
+  imFileGetInfo(ifile, format, compression, &image_count);
+
+  lua_pushstring(L, format);
+  lua_pushstring(L, compression);
+  lua_pushnumber(L, image_count);
+
+  return 3;
+}
+
+/*****************************************************************************\
+ file:SetInfo()
+\*****************************************************************************/
+static int imluaFileSetInfo (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  const char *compression = luaL_checkstring(L, 2);
+
+  imFileSetInfo(ifile, compression);
+
+  return 0;
+}
+
+/*****************************************************************************\
+ file:SetAttribute(attrib, data_type, data)
+\*****************************************************************************/
+static int imluaFileSetAttribute (lua_State *L)
+{
+  int i, count = 0;
+  void *data = NULL;
+
+  imFile *ifile = imlua_checkfile(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+  int data_type = luaL_checkint(L, 3);
+
+  if (!lua_isnil(L, 4))
+  {
+    if (lua_isstring(L, 4) && data_type != IM_BYTE)
+      luaL_argerror(L, 4, "if value is a string, then data type must be byte");
+    else
+    {
+      luaL_checktype(L, 4, LUA_TTABLE);
+      count = imlua_getn(L, 4);
+      data = malloc(imDataTypeSize(data_type) * count);
+    }
+
+    switch (data_type)
+    {
+    case IM_BYTE:
+      {
+        if (lua_isstring(L, 4))
+        {
+          const char* str = lua_tostring(L, 4);
+          count = strlen(str)+1;
+          data = malloc(imDataTypeSize(data_type) * count);
+          memcpy(data, str, count);
+        }
+        else
+        {
+          imbyte *d = (imbyte*) data;
+          for (i = 0; i < count; i++)
+          {
+            lua_rawgeti(L, 4, i+1);
+            d[i] = (imbyte) luaL_checkint(L, -1);
+            lua_pop(L, 1);
+          }
+        }
+      }
+      break;
+
+    case IM_USHORT:
+      {
+        imushort *d = (imushort*) data;
+        for (i = 0; i < count; i++)
+        {
+          lua_rawgeti(L, 4, i+1);
+          d[i] = (imushort) luaL_checkint(L, -1);
+          lua_pop(L, 1);
+        }
+      }
+      break;
+
+    case IM_INT:
+      {
+        int *d = (int*) data;
+        for (i = 0; i < count; i++)
+        {
+          lua_rawgeti(L, 4, i+1);
+          d[i] = luaL_checkint(L, -1);
+          lua_pop(L, 1);
+        }
+      }
+      break;
+
+    case IM_FLOAT:
+      {
+        float *d = (float*) data;
+        for (i = 0; i < count; i++)
+        {
+          lua_rawgeti(L, 4, i+1);
+          d[i] = (float) luaL_checknumber(L, -1);
+          lua_pop(L, 1);
+        }
+      }
+      break;
+
+    case IM_CFLOAT:
+      {
+        float *data_float = (float*) data;
+        for (i = 0; i < count; i++)
+        {
+          int two;
+          float *value = imlua_toarrayfloat(L, -1, &two, 1);
+          if (two != 2)
+          {
+            free(value);
+            luaL_argerror(L, 4, "invalid value");
+          }
+
+          data_float[i] = value[0];
+          data_float[i+1] = value[1];
+          free(value);
+          lua_pop(L, 1);
+        }        
+      }
+      break;
+    }
+  }
+
+  imFileSetAttribute(ifile, attrib, data_type, count, data);
+  return 0;
+}
+
+/*****************************************************************************\
+ file:GetAttribute(attrib)
+\*****************************************************************************/
+static int imluaFileGetAttribute (lua_State *L)
+{
+  int data_type;
+  int i, count;
+  const void *data;
+  int as_string = 0;
+
+  imFile *ifile = imlua_checkfile(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+
+  data = imFileGetAttribute(ifile, attrib, &data_type, &count);
+  if (!data)
+  {
+    lua_pushnil(L);
+    return 1;
+  }
+
+  if (data_type == IM_BYTE && lua_isboolean(L, 3))
+    as_string = lua_toboolean(L, 3);
+
+  if (!as_string)
+    lua_newtable(L);
+
+  switch (data_type)
+  {
+  case IM_BYTE:
+    {
+      if (as_string)
+      {
+        lua_pushstring(L, (const char*)data);
+      }
+      else
+      {
+        imbyte *data_byte = (imbyte*) data;
+        for (i = 0; i < count; i++, data_byte++)
+        {
+          lua_pushnumber(L, *data_byte);
+          lua_rawseti(L, -2, i+1);
+        }
+      }
+    }
+    break;
+
+  case IM_USHORT:
+    {
+      imushort *data_ushort = (imushort*) data;
+      for (i = 0; i < count; i++, data_ushort += 2)
+      {
+        lua_pushnumber(L, *data_ushort);
+        lua_rawseti(L, -2, i+1);
+      }
+    }
+    break;
+
+  case IM_INT:
+    {
+      int *data_int = (int*) data;
+      for (i = 0; i < count; i++, data_int++)
+      {
+        lua_pushnumber(L, *data_int);
+        lua_rawseti(L, -2, i+1);
+      }
+    }
+    break;
+
+  case IM_FLOAT:
+    {
+      float *data_float = (float*) data;
+      for (i = 0; i < count; i++, data_float++)
+      {
+        lua_pushnumber(L, *data_float);
+        lua_rawseti(L, -2, i+1);
+      }
+    }
+    break;
+
+  case IM_CFLOAT:
+    {
+      float *data_float = (float*) data;
+      for (i = 0; i < count; i++, data_float += 2)
+      {
+        imlua_newarrayfloat(L, data_float, 2, 1);
+        lua_rawseti(L, -2, i+1);
+      }        
+    }
+    break;
+  }
+
+  lua_pushnumber(L, data_type);
+
+  return 2;
+}
+
+/*****************************************************************************\
+ file:GetAttributeList()
+\*****************************************************************************/
+static int imluaFileGetAttributeList (lua_State *L)
+{
+  int i, attrib_count;
+  char **attrib;
+
+  imFile* ifile = imlua_checkfile(L, 1);
+
+  imFileGetAttributeList(ifile, NULL, &attrib_count);
+
+  attrib = (char**) malloc(attrib_count * sizeof(char*));
+
+  imFileGetAttributeList(ifile, attrib, &attrib_count);
+
+  lua_newtable(L);
+  for (i = 0; i < attrib_count; i++)
+  {
+    lua_pushstring(L, attrib[i]);
+    lua_rawseti(L, -2, i+1);
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ file:GetPalette()
+\*****************************************************************************/
+static int imluaFileGetPalette (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  long* color = malloc(sizeof(long) * 256);
+  int count;
+  imFileGetPalette(ifile, color, &count);
+  imlua_pushpalette(L, color, count);
+  return 1;
+}
+
+/*****************************************************************************\
+ file:SetPalette(pal)
+\*****************************************************************************/
+static int imluaFileSetPalette (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  imluaPalette *pal = imlua_checkpalette(L, 2);
+  imFileSetPalette(ifile, pal->color, pal->count);
+  return 0;
+}
+
+/*****************************************************************************\
+ file:ReadImageInfo()
+\*****************************************************************************/
+static int imluaFileReadImageInfo (lua_State *L)
+{
+  int width, height;
+  int file_color_mode, file_data_type;
+  int error;
+
+  imFile *ifile = imlua_checkfile(L, 1);
+  int index = luaL_optint(L, 2, 0);
+
+  error = imFileReadImageInfo(ifile, index, &width, &height, &file_color_mode, &file_data_type);
+
+  imlua_pusherror(L, error);
+  if (error)
+    return 1;
+
+  lua_pushnumber(L, width);
+  lua_pushnumber(L, height);
+  lua_pushnumber(L, file_color_mode);
+  lua_pushnumber(L, file_data_type);
+  return 5;
+}
+
+/*****************************************************************************\
+ file:WriteImageInfo(width, height, user_color_mode, user_data_type)
+\*****************************************************************************/
+static int imluaFileWriteImageInfo (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  int width = luaL_checkint(L, 2);
+  int height = luaL_checkint(L, 3);
+  int user_color_mode = luaL_checkint(L, 4);
+  int user_data_type = luaL_checkint(L, 5);
+
+  imlua_pusherror(L, imFileWriteImageInfo(ifile, width, height, user_color_mode, user_data_type));
+  return 1;
+}
+
+/*****************************************************************************\
+ file:imFileReadImageData(data)
+\*****************************************************************************/
+static int imluaFileReadImageData (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  void* data = lua_touserdata(L, 2);
+  int convert2bitmap = luaL_checkint(L, 3);
+  int color_mode_flags = luaL_checkint(L, 4);
+  imlua_pusherror(L, imFileReadImageData(ifile, data, convert2bitmap, color_mode_flags));
+  return 1;
+}
+
+/*****************************************************************************\
+ file:imFileWriteImageData(data)
+\*****************************************************************************/
+static int imluaFileWriteImageData (lua_State *L)
+{
+  imFile *ifile = imlua_checkfile(L, 1);
+  void* data = lua_touserdata(L, 2);
+  imlua_pusherror(L, imFileWriteImageData(ifile, data));
+  return 1;
+}
+
+/*****************************************************************************\
+ file:Close()
+\*****************************************************************************/
+static int imluaFileClose (lua_State *L)
+{
+  imFile** ifile_p = imlua_rawcheckfile(L, 1);
+  if (!(*ifile_p))
+    luaL_argerror(L, 1, "closed imFile");
+
+  imFileClose(*ifile_p);
+  *ifile_p = NULL;  /* mark as closed */
+  return 0;
+}
+
+/*****************************************************************************\
+ gc
+\*****************************************************************************/
+static int imluaFile_gc (lua_State *L)
+{
+  imFile **ifile_p = (imFile **)lua_touserdata(L, 1);
+  if (ifile_p && *ifile_p)
+  {
+    imFileClose(*ifile_p);
+    *ifile_p = NULL;  /* mark as closed */
+  }
+  return 0;
+}
+
+/*****************************************************************************\
+ tostring
+\*****************************************************************************/
+static int imluaFile_tostring (lua_State *L)
+{
+  imFile **ifile_p = (imFile **)lua_touserdata(L, 1);
+  lua_pushfstring(L, "imFile(%p)%s", ifile_p, (*ifile_p)? "": "-closed");
+  return 1;
+}
+
+/*****************************************************************************\
+\*****************************************************************************/
+static const luaL_reg imfile_lib[] = {
+  {"FileOpen", imluaFileOpen},
+  {"FileOpenAs", imluaFileOpenAs},
+  {"FileOpenRaw", imluaFileOpenRaw},
+  {"FileNew", imluaFileNew},
+  {"FileNewRaw", imluaFileNewRaw},
+  {"FileClose", imluaFileClose},
+  {NULL, NULL}
+};
+
+static const luaL_reg imfile_metalib[] = {
+  {"Handle", imluaFileHandle},
+  {"Close", imluaFileClose},
+  {"LoadImage", imluaFileLoadImage},
+  {"LoadImageFrame", imluaFileLoadImageFrame},
+  {"LoadImageRegion", imluaFileLoadImageRegion},
+  {"LoadBitmap", imluaFileLoadBitmap},
+  {"LoadBitmapFrame", imluaFileLoadBitmapFrame},
+  {"SaveImage", imluaFileSaveImage},
+  {"GetInfo", imluaFileGetInfo},
+  {"SetInfo", imluaFileSetInfo},
+  {"SetAttribute", imluaFileSetAttribute},
+  {"GetAttribute", imluaFileGetAttribute},
+  {"GetAttributeList", imluaFileGetAttributeList},
+  {"GetPalette", imluaFileGetPalette},
+  {"SetPalette", imluaFileSetPalette},
+  {"ReadImageInfo", imluaFileReadImageInfo},
+  {"WriteImageInfo", imluaFileWriteImageInfo},
+  {"ReadImageData", imluaFileReadImageData},
+  {"WriteImageData", imluaFileWriteImageData},
+
+  {"__gc", imluaFile_gc},
+  {"__tostring", imluaFile_tostring},
+
+  {NULL, NULL}
+};
+
+static void createmeta (lua_State *L) 
+{
+  /* Object Oriented Access */
+  luaL_newmetatable(L, "imFile");  /* create new metatable for imFile handles */
+  lua_pushliteral(L, "__index");
+  lua_pushvalue(L, -2);  /* push metatable */
+  lua_rawset(L, -3);  /* metatable.__index = metatable */
+  luaL_register(L, NULL, imfile_metalib);  /* register methods */
+  lua_pop(L, 1);  /* removes the metatable from the top of the stack */
+}
+
+void imlua_open_file (lua_State *L)
+{
+  /* "im" table is at the top of the stack */
+  createmeta(L);
+  luaL_register(L, NULL, imfile_lib);
+}
diff --git a/src/lua5/imlua_image.c b/src/lua5/imlua_image.c
new file mode 100644
index 0000000..22ed38c
--- /dev/null
+++ b/src/lua5/imlua_image.c
@@ -0,0 +1,1061 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_image.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_util.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_image.h"
+#include "imlua_palette.h"
+#include "imlua_aux.h"
+
+
+static imImage** imlua_rawcheckimage(lua_State *L, int param)
+{
+  return (imImage**) luaL_checkudata(L, param, "imImage");
+}
+
+imImage* imlua_checkimage(lua_State *L, int param)
+{
+  imImage** image_p = imlua_rawcheckimage(L, param);
+
+  if (!(*image_p))
+    luaL_argerror(L, param, "destroyed imImage");
+
+  return *image_p;
+}
+
+int imlua_pushimageerror(lua_State *L, imImage* image, int error)
+{
+  if (error)
+  {
+    lua_pushnil(L);
+    imlua_pusherror(L, error);
+    return 2;
+  }
+  else
+  {
+    imlua_pushimage(L, image);
+    return 1;
+  }
+}
+
+void imlua_pushimage(lua_State *L, imImage* image)
+{
+  if (!image)
+    lua_pushnil(L);
+  else
+  {
+    imImage **image_p = (imImage**) lua_newuserdata(L, sizeof(imImage*));
+    *image_p = image;
+    luaL_getmetatable(L, "imImage");
+    lua_setmetatable(L, -2);
+  }
+}
+
+/*****************************************************************************\
+ image channel, for indexing
+\*****************************************************************************/
+static imluaImageChannel *imlua_newimagechannel (lua_State *L, imImage *image, int channel)
+{
+  imluaImageChannel* imagechannel = (imluaImageChannel*) lua_newuserdata(L, sizeof(imluaImageChannel));
+  imagechannel->image = image;
+  imagechannel->channel = channel;
+  luaL_getmetatable(L, "imImageChannel");
+  lua_setmetatable(L, -2);
+  return imagechannel;
+}
+
+static imluaImageChannel* imlua_checkimagechannel (lua_State *L, int param)
+{
+  return (imluaImageChannel*) luaL_checkudata(L, param, "imImageChannel");
+}
+
+/*****************************************************************************\
+ image row, for indexing
+\*****************************************************************************/
+static imluaImageRow *imlua_newimagerow (lua_State *L, imImage *image, int channel, int row)
+{
+  imluaImageRow* imagerow = (imluaImageRow*) lua_newuserdata(L, sizeof(imluaImageRow));
+  imagerow->image = image;
+  imagerow->channel = channel;
+  imagerow->row = row;
+  luaL_getmetatable(L, "imImageChannelRow");
+  lua_setmetatable(L, -2);
+  return imagerow;
+}
+
+static imluaImageRow* imlua_checkimagerow (lua_State *L, int param)
+{
+  return (imluaImageRow*) luaL_checkudata(L, param, "imImageChannelRow");
+}
+
+/*****************************************************************************\
+ im.ImageCreate(width, height, color_space, data_type)
+\*****************************************************************************/
+static int imluaImageCreate (lua_State *L)
+{
+  int width = luaL_checkint(L, 1);
+  int height = luaL_checkint(L, 2);
+  int color_space = luaL_checkint(L, 3);
+  int data_type = luaL_checkint(L, 4);
+
+  imImage *image = imImageCreate(width, height, color_space, data_type);
+  imlua_pushimage(L, image);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:AddAlpha()
+\*****************************************************************************/
+static int imluaImageAddAlpha (lua_State *L)
+{
+  imImageAddAlpha(imlua_checkimage(L, 1));
+  return 0;
+}
+
+/*****************************************************************************\
+ image:Reshape()
+\*****************************************************************************/
+static int imluaImageReshape (lua_State *L)
+{
+  imImage* im = imlua_checkimage(L, 1);
+  int width = luaL_checkint(L, 2);
+  int height = luaL_checkint(L, 3);
+
+  imImageReshape(im, width, height);
+  return 0;
+}
+
+/*****************************************************************************\
+ image:Copy()
+\*****************************************************************************/
+static int imluaImageCopy (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+  imImageCopy(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ image:CopyData()
+\*****************************************************************************/
+static int imluaImageCopyData (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+  imImageCopyData(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ image:Duplicate()
+\*****************************************************************************/
+static int imluaImageDuplicate (lua_State *L)
+{
+  imImage* image = imlua_checkimage(L, 1);
+  imImage *new_image = imImageDuplicate(image);
+  imlua_pushimage(L, new_image);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Clone()
+\*****************************************************************************/
+static int imluaImageClone (lua_State *L)
+{
+  imImage* image = imlua_checkimage(L, 1);
+  imImage *new_image = imImageClone(image);
+  imlua_pushimage(L, new_image);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:SetAttribute(attrib, data_type, count, data)
+\*****************************************************************************/
+static int imluaImageSetAttribute (lua_State *L)
+{
+  int i, count = 0;
+  void *data = NULL;
+
+  imImage* image = imlua_checkimage(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+  int data_type = luaL_checkint(L, 3);
+
+  if (!lua_isnil(L, 4))
+  {
+    if (lua_isstring(L, 4) && data_type != IM_BYTE)
+      luaL_argerror(L, 4, "if value is string, then data type must be byte");
+    else
+    {
+      luaL_checktype(L, 4, LUA_TTABLE);
+      count = imlua_getn(L, 4);
+      data = malloc(imDataTypeSize(data_type) * count);
+    }
+
+    switch (data_type)
+    {
+    case IM_BYTE:
+      {
+        if (lua_isstring(L, 4))
+        {
+          const char* str = lua_tostring(L, 4);
+          count = strlen(str)+1;
+          data = malloc(imDataTypeSize(data_type) * count);
+          memcpy(data, str, count);
+        }
+        else
+        {
+          imbyte *data_byte = (imbyte*) data;
+          for (i = 0; i < count; i++)
+          {
+            lua_rawgeti(L, 4, i+1);
+            data_byte[i] = (imbyte)luaL_checkint(L, -1);
+            lua_pop(L, 1);
+          }
+        }
+      }
+      break;
+
+    case IM_USHORT:
+      {
+        imushort *data_ushort = (imushort*) data;
+        for (i = 0; i < count; i++)
+        {
+          lua_rawgeti(L, 4, i+1);
+          data_ushort[i] = (imushort)luaL_checkint(L, -1);
+          lua_pop(L, 1);
+        }
+      }
+      break;
+
+    case IM_INT:
+      {
+        int *data_int = (int*) data;
+        for (i = 0; i < count; i++)
+        {
+          lua_rawgeti(L, 4, i+1);
+          data_int[i] = luaL_checkint(L, -1);
+          lua_pop(L, 1);
+        }
+      }
+      break;
+
+    case IM_FLOAT:
+      {
+        float *data_float = (float*) data;
+        for (i = 0; i < count; i++)
+        {
+          lua_rawgeti(L, 4, i+1);
+          data_float[i] = (float) luaL_checknumber(L, -1);
+          lua_pop(L, 1);
+        }
+      }
+      break;
+
+    case IM_CFLOAT:
+      {
+        float *data_float = (float*) data;
+        for (i = 0; i < count; i++)
+        {
+          int two;
+          float *value = imlua_toarrayfloat(L, -1, &two, 1);
+          if (two != 2)
+          {
+            free(value);
+            luaL_argerror(L, 4, "invalid value");
+          }
+
+          data_float[i] = value[0];
+          data_float[i+1] = value[1];
+          free(value);
+          lua_pop(L, 1);
+        }        
+      }
+      break;
+    }
+  }
+
+  imImageSetAttribute(image, attrib, data_type, count, data);
+  return 0;
+}
+
+/*****************************************************************************\
+ image:GetAttribute(attrib)
+\*****************************************************************************/
+static int imluaImageGetAttribute (lua_State *L)
+{
+  int data_type;
+  int i, count;
+  const void *data;
+  int as_string = 0;
+
+  imImage* image = imlua_checkimage(L, 1);
+  const char *attrib = luaL_checkstring(L, 2);
+
+  data = imImageGetAttribute(image, attrib, &data_type, &count);
+  if (!data)
+  {
+    lua_pushnil(L);
+    return 1;
+  }
+
+  if (data_type == IM_BYTE && lua_isboolean(L, 3))
+    as_string = lua_toboolean(L, 3);
+
+  if (!as_string)
+    lua_newtable(L);
+  
+  switch (data_type)
+  {
+  case IM_BYTE:
+    {
+      if (as_string)
+      {
+        lua_pushstring(L, (const char*)data);
+      }
+      else
+      {
+        imbyte *data_byte = (imbyte*) data;
+        for (i = 0; i < count; i++, data_byte++)
+        {
+          lua_pushnumber(L, *data_byte);
+          lua_rawseti(L, -2, i+1);
+        }
+      }
+    }
+    break;
+
+  case IM_USHORT:
+    {
+      imushort *data_ushort = (imushort*) data;
+      for (i = 0; i < count; i++, data_ushort += 2)
+      {
+        lua_pushnumber(L, *data_ushort);
+        lua_rawseti(L, -2, i+1);
+      }
+    }
+    break;
+
+  case IM_INT:
+    {
+      int *data_int = (int*) data;
+      for (i = 0; i < count; i++, data_int++)
+      {
+        lua_pushnumber(L, *data_int);
+        lua_rawseti(L, -2, i+1);
+      }
+    }
+    break;
+
+  case IM_FLOAT:
+    {
+      float *data_float = (float*) data;
+      for (i = 0; i < count; i++, data_float++)
+      {
+        lua_pushnumber(L, *data_float);
+        lua_rawseti(L, -2, i+1);
+      }
+    }
+    break;
+
+  case IM_CFLOAT:
+    {
+      float *data_float = (float*) data;
+      for (i = 0; i < count; i++, data_float += 2)
+      {
+        imlua_newarrayfloat(L, data_float, 2, 1);
+        lua_rawseti(L, -2, i+1);
+      }        
+    }
+    break;
+  }
+
+  lua_pushnumber(L, data_type);
+
+  return 2;
+}
+
+/*****************************************************************************\
+ image:GetAttributeList()
+\*****************************************************************************/
+static int imluaImageGetAttributeList (lua_State *L)
+{
+  int i, attrib_count;
+  char **attrib;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imImageGetAttributeList(image, NULL, &attrib_count);
+
+  attrib = (char**) malloc(attrib_count * sizeof(char*));
+
+  imImageGetAttributeList(image, attrib, &attrib_count);
+
+  lua_newtable(L);
+  for (i = 0; i < attrib_count; i++)
+  {
+    lua_pushstring(L, attrib[i]);
+    lua_rawseti(L, -2, i+1);
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Clear()
+\*****************************************************************************/
+static int imluaImageClear (lua_State *L)
+{
+  imImageClear(imlua_checkimage(L, 1));
+  return 0;
+}
+
+/*****************************************************************************\
+ image:isBitmap()
+\*****************************************************************************/
+static int imluaImageIsBitmap (lua_State *L)
+{
+  lua_pushboolean(L, imImageIsBitmap(imlua_checkimage(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:GetPalette()
+\*****************************************************************************/
+static int imluaImageGetPalette (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  long* color = malloc(sizeof(long) * 256);
+  memcpy(color, image->palette, sizeof(long) * 256);
+  imlua_pushpalette(L, color, 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:SetPalette
+\*****************************************************************************/
+static int imluaImageSetPalette (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  imluaPalette *pal = imlua_checkpalette(L, 2);
+  imImageSetPalette(image, pal->color, pal->count);
+  return 0;
+}
+
+/*****************************************************************************\
+ image:CopyAttributes(dst_image)
+\*****************************************************************************/
+static int imluaImageCopyAttributes (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imImageCopyAttributes(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ image:MatchSize(image2)
+\*****************************************************************************/
+static int imluaImageMatchSize (lua_State *L)
+{
+  imImage *image1 = imlua_checkimage(L, 1);
+  imImage *image2 = imlua_checkimage(L, 2);
+
+  lua_pushboolean(L, imImageMatchSize(image1, image2));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:MatchColor(image2)
+\*****************************************************************************/
+static int imluaImageMatchColor (lua_State *L)
+{
+  imImage *image1 = imlua_checkimage(L, 1);
+  imImage *image2 = imlua_checkimage(L, 2);
+
+  lua_pushboolean(L, imImageMatchColor(image1, image2));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:MatchDataType(image2)
+\*****************************************************************************/
+static int imluaImageMatchDataType (lua_State *L)
+{
+  imImage *image1 = imlua_checkimage(L, 1);
+  imImage *image2 = imlua_checkimage(L, 2);
+
+  lua_pushboolean(L, imImageMatchDataType(image1, image2));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:MatchColorSpace(image2)
+\*****************************************************************************/
+static int imluaImageMatchColorSpace (lua_State *L)
+{
+  imImage *image1 = imlua_checkimage(L, 1);
+  imImage *image2 = imlua_checkimage(L, 2);
+
+  lua_pushboolean(L, imImageMatchColorSpace(image1, image2));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Match(image2)
+\*****************************************************************************/
+static int imluaImageMatch (lua_State *L)
+{
+  imImage *image1 = imlua_checkimage(L, 1);
+  imImage *image2 = imlua_checkimage(L, 2);
+
+  lua_pushboolean(L, imImageMatch(image1, image2));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:SetBinary()
+\*****************************************************************************/
+static int imluaImageSetBinary (lua_State *L)
+{
+  imImageSetBinary(imlua_checkimage(L, 1));
+  return 0;
+}
+
+/*****************************************************************************\
+ image:MakeBinary()
+\*****************************************************************************/
+static int imluaImageMakeBinary (lua_State *L)
+{
+  imImageMakeBinary(imlua_checkimage(L, 1));
+  return 0;
+}
+
+/*****************************************************************************\
+ image:Width()
+\*****************************************************************************/
+static int imluaImageWidth(lua_State *L)
+{
+  imImage *im = imlua_checkimage(L, 1);
+  lua_pushnumber(L, im->width);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Height()
+\*****************************************************************************/
+static int imluaImageHeight(lua_State *L)
+{
+  imImage *im = imlua_checkimage(L, 1);
+  lua_pushnumber(L, im->height);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Depth()
+\*****************************************************************************/
+static int imluaImageDepth(lua_State *L)
+{
+  imImage *im = imlua_checkimage(L, 1);
+  lua_pushnumber(L, im->depth);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:DataType()
+\*****************************************************************************/
+static int imluaImageDataType(lua_State *L)
+{
+  imImage *im = imlua_checkimage(L, 1);
+  lua_pushnumber(L, im->data_type);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:ColorSpace()
+\*****************************************************************************/
+static int imluaImageColorSpace(lua_State *L)
+{
+  imImage *im = imlua_checkimage(L, 1);
+  lua_pushnumber(L, im->color_space);
+  return 1;
+}
+
+/*****************************************************************************\
+ image:HasAlpha()
+\*****************************************************************************/
+static int imluaImageHasAlpha(lua_State *L)
+{
+  imImage *im = imlua_checkimage(L, 1);
+  lua_pushnumber(L, im->has_alpha);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.FileImageLoad(filename, [index])
+\*****************************************************************************/
+static int imluaFileImageLoad (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  int index = luaL_optint(L, 2, 0);
+  int error;
+  imImage *image = imFileImageLoad(filename, index, &error);
+  return imlua_pushimageerror(L, image, error);
+}
+
+/*****************************************************************************\
+ im.FileImageLoadRegion(filename, [index])
+\*****************************************************************************/
+static int imluaFileImageLoadRegion (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  int index = luaL_checkint(L, 2);
+  int bitmap = luaL_checkint(L, 3);
+  int xmin = luaL_checkint(L, 4);
+  int xmax = luaL_checkint(L, 5);
+  int ymin = luaL_checkint(L, 6);
+  int ymax = luaL_checkint(L, 7);
+  int width = luaL_checkint(L, 8);
+  int height = luaL_checkint(L, 9);
+  int error;
+  imImage *image = imFileImageLoadRegion(filename, index, bitmap, &error, xmin, xmax, ymin, ymax, width, height);
+  return imlua_pushimageerror(L, image, error);
+}
+
+/*****************************************************************************\
+ im.FileImageLoadBitmap(filename, [index])
+\*****************************************************************************/
+static int imluaFileImageLoadBitmap (lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  int index = luaL_optint(L, 2, 0);
+  int error;
+  imImage *image = imFileImageLoadBitmap(filename, index, &error);
+  return imlua_pushimageerror(L, image, error);
+}
+
+/*****************************************************************************\
+ im.FileImageSave(filename, format, image)
+\*****************************************************************************/
+static int imluaFileImageSave (lua_State *L)
+{
+  const char *file_name = luaL_checkstring(L, 1);
+  const char *format = luaL_checkstring(L, 2);
+  imImage *image = imlua_checkimage(L, 3);
+
+  imlua_pusherror(L, imFileImageSave(file_name, format, image));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Save(filename, format)
+\*****************************************************************************/
+static int imluaImageSave (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  const char *file_name = luaL_checkstring(L, 2);
+  const char *format = luaL_checkstring(L, 3);
+
+  imlua_pusherror(L, imFileImageSave(file_name, format, image));
+  return 1;
+}
+
+/*****************************************************************************\
+ image:Destroy()
+\*****************************************************************************/
+static int imluaImageDestroy (lua_State *L)
+{
+  imImage** image_p = imlua_rawcheckimage(L, 1);
+  if (!(*image_p))
+    luaL_argerror(L, 1, "destroyed imImage");
+
+  imImageDestroy(*image_p);
+  *image_p = NULL; /* mark as destroyed */
+  return 0;
+}
+
+/*****************************************************************************\
+ gc
+\*****************************************************************************/
+static int imluaImage_gc (lua_State *L)
+{
+  imImage** image_p = imlua_rawcheckimage(L, 1);
+  if (*image_p)
+  {
+    imImageDestroy(*image_p);
+    *image_p = NULL; /* mark as destroyed */
+  }
+
+  return 0;
+}
+
+/*****************************************************************************\
+ image tostring
+\*****************************************************************************/
+static int imluaImage_tostring (lua_State *L)
+{
+  imImage** image_p = (imImage**)lua_touserdata(L, 1);
+  if (*image_p)
+  {
+    imImage *image = *image_p;
+    lua_pushfstring(L, "imImage(%p) [width=%d,height=%d,color_space=%s,data_type=%s,depth=%d]", 
+      image_p,
+      image->width, 
+      image->height,
+      imColorModeSpaceName(image->color_space),
+      imDataTypeName(image->data_type),
+      image->depth
+    );
+  }
+  else
+  {
+    lua_pushfstring(L, "imImage(%p)-destroyed", image_p);
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ imagechannel tostring
+\*****************************************************************************/
+static int imluaImageChannel_tostring (lua_State *L)
+{
+  imluaImageChannel *imagechannel = imlua_checkimagechannel(L, 1);
+  lua_pushfstring(L, "imImageChannel(%p) [channel=%d]", 
+    imagechannel, 
+    imagechannel->channel
+  );
+  return 1;
+}
+
+/*****************************************************************************\
+ imagerow tostring
+\*****************************************************************************/
+static int imluaImageRow_tostring (lua_State *L)
+{
+  char buff[32];
+  imluaImageRow *imagerow = imlua_checkimagerow(L, 1);
+
+  sprintf(buff, "%p", lua_touserdata(L, 1));
+  lua_pushfstring(L, "imImageRow(%s) [channel=%d,row=%d]", 
+    buff, 
+    imagerow->channel,
+    imagerow->row
+  );
+  return 1;
+}
+
+/*****************************************************************************\
+ image row indexing
+\*****************************************************************************/
+static int imluaImageRow_index (lua_State *L)
+{
+  int index;
+  imluaImageRow *imagerow = imlua_checkimagerow(L, 1);
+  imImage *image = imagerow->image;
+  int channel = imagerow->channel;
+  int row = imagerow->row;
+  int column = luaL_checkint(L, 2);
+
+  if (column < 0 || column >= imagerow->image->width)
+    luaL_argerror(L, 2, "invalid column, out of bounds");
+
+  index = channel * image->width * image->height + row * image->width + column;
+
+  switch (image->data_type)
+  {
+  case IM_BYTE:
+    {
+      imbyte *bdata = (imbyte*) image->data[0];
+      lua_pushnumber(L, (lua_Number) bdata[index]);
+    }
+    break;
+
+  case IM_USHORT:
+    {
+      imushort *udata = (imushort*) image->data[0];
+      lua_pushnumber(L, (lua_Number) udata[index]);
+    }
+    break;
+
+  case IM_INT:
+    {
+      int *idata = (int*) image->data[0];
+      lua_pushnumber(L, (lua_Number) idata[index]);
+    }
+    break;
+
+  case IM_FLOAT:
+    {
+      float *fdata = (float*) image->data[0];
+      lua_pushnumber(L, (lua_Number) fdata[index]);
+    }
+    break;
+    
+  case IM_CFLOAT:
+    {
+      float *cdata = (float*) image->data[0];
+      imlua_newarrayfloat(L, cdata + (2*index), 2, 1);
+    }
+    break;
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ image row new index
+\*****************************************************************************/
+static int imluaImageRow_newindex (lua_State *L)
+{
+  int index;
+  imluaImageRow *imagerow = imlua_checkimagerow(L, 1);
+  imImage *image = imagerow->image;
+  int channel = imagerow->channel;
+  int row = imagerow->row;
+  int column = luaL_checkint(L, 2);
+
+  if (column < 0 || column >= imagerow->image->width)
+    luaL_argerror(L, 2, "invalid column, out of bounds");
+
+  index = channel * image->width * image->height + row * image->width + column;
+
+  switch (image->data_type)
+  {
+  case IM_BYTE:
+    {
+      lua_Number value = luaL_checknumber(L, 3);
+      imbyte *bdata = (imbyte*) image->data[0];
+      bdata[index] = (imbyte) value;
+    }
+    break;
+
+  case IM_USHORT:
+    {
+      lua_Number value = luaL_checknumber(L, 3);
+      imushort *udata = (imushort*) image->data[0];
+      udata[index] = (imushort) value;
+    }
+    break;
+
+  case IM_INT:
+    {
+      lua_Number value = luaL_checknumber(L, 3);
+      int *idata = (int*) image->data[0];
+      idata[index] = (int) value;
+    }
+    break;
+
+  case IM_FLOAT:
+    {
+      lua_Number value = luaL_checknumber(L, 3);
+      float *fdata = (float*) image->data[0];
+      fdata[index] = (float) value;
+    }
+    break;
+    
+  case IM_CFLOAT:
+    {
+      int count;
+      float *cdata = (float*) image->data[0];
+      float *value = imlua_toarrayfloat(L, 3, &count, 1);
+      if (count != 2)
+      {
+        free(value);
+        luaL_argerror(L, 3, "invalid value");
+      }
+
+      cdata[2*index] = value[0];
+      cdata[2*index+1] = value[1];
+      free(value);
+    }
+    break;
+  }
+
+  return 0;
+}
+
+/*****************************************************************************\
+ image channel indexing
+\*****************************************************************************/
+static int imluaImageChannel_index (lua_State *L)
+{
+  imluaImageChannel *imagechannel = imlua_checkimagechannel(L, 1);
+  int row = luaL_checkint(L, 2);
+
+  if (row < 0 || row >= imagechannel->image->height)
+    luaL_argerror(L, 2, "invalid row, out of bounds");
+
+  imlua_newimagerow(L, imagechannel->image, imagechannel->channel, row);
+  return 1;
+}
+
+/*****************************************************************************\
+ image indexing
+\*****************************************************************************/
+static int imluaImage_index (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+
+  if (lua_isnumber(L, 2))
+  {
+    /* handle numeric indexing */
+    int channel = luaL_checkint(L, 2);
+
+    /* create channel */
+    if (channel < 0 || channel >= image->depth)
+      luaL_argerror(L, 2, "invalid channel, out of bounds");
+
+    imlua_newimagechannel(L, image, channel);
+  }
+  else if (lua_isstring(L, 2))
+  {
+    /* get raw method */
+    lua_getmetatable(L, 1);
+    lua_pushvalue(L, 2);
+    lua_rawget(L, -2);
+  }
+  else
+  {
+    lua_pushnil(L);
+  }
+
+  return 1;
+}
+
+static const luaL_reg imimage_lib[] = {
+  {"ImageCreate", imluaImageCreate},
+  {"ImageDestroy", imluaImageDestroy},
+  {"FileImageLoad", imluaFileImageLoad},
+  {"FileImageLoadBitmap", imluaFileImageLoadBitmap},
+  {"FileImageLoadRegion", imluaFileImageLoadRegion},
+  {"FileImageSave", imluaFileImageSave},
+  {NULL, NULL}
+};
+
+static const luaL_reg imimage_metalib[] = {
+  {"Destroy", imluaImageDestroy},
+  {"AddAlpha", imluaImageAddAlpha},
+  {"Reshape", imluaImageReshape},
+  {"Copy", imluaImageCopy},
+  {"CopyData", imluaImageCopyData},
+  {"Duplicate", imluaImageDuplicate},
+  {"Clone", imluaImageClone},
+  {"SetAttribute", imluaImageSetAttribute},
+  {"GetAttribute", imluaImageGetAttribute},
+  {"GetAttributeList", imluaImageGetAttributeList},
+  {"Clear", imluaImageClear},
+  {"IsBitmap", imluaImageIsBitmap},
+  {"SetPalette", imluaImageSetPalette},
+  {"GetPalette", imluaImageGetPalette},
+  {"CopyAttributes", imluaImageCopyAttributes},
+  {"MatchSize", imluaImageMatchSize},
+  {"MatchColor", imluaImageMatchColor},
+  {"MatchDataType", imluaImageMatchDataType},
+  {"MatchColorSpace", imluaImageMatchColorSpace},
+  {"Match", imluaImageMatch},
+  {"SetBinary", imluaImageSetBinary},
+  {"MakeBinary", imluaImageMakeBinary},
+  {"Width", imluaImageWidth},
+  {"Height", imluaImageHeight},
+  {"Depth", imluaImageDepth},
+  {"DataType", imluaImageDataType},
+  {"ColorSpace", imluaImageColorSpace},
+  {"HasAlpha", imluaImageHasAlpha},
+  {"Save", imluaImageSave},
+
+  {"__gc", imluaImage_gc},
+  {"__tostring", imluaImage_tostring},
+  {"__index", imluaImage_index},
+
+  {NULL, NULL}
+};
+
+static void createmeta (lua_State *L) 
+{
+  luaL_newmetatable(L, "imImageChannel"); /* create new metatable for imImageChannel handles */
+  lua_pushliteral(L, "__index");
+  lua_pushcfunction(L, imluaImageChannel_index);
+  lua_rawset(L, -3);
+  lua_pushliteral(L, "__tostring");
+  lua_pushcfunction(L, imluaImageChannel_tostring);
+  lua_rawset(L, -3);
+  lua_pop(L, 1);  /* removes the metatable from the top of the stack */
+
+  luaL_newmetatable(L, "imImageChannelRow"); /* create new metatable for imImageChannelRow handles */
+  lua_pushliteral(L, "__index");
+  lua_pushcfunction(L, imluaImageRow_index);
+  lua_rawset(L, -3);
+  lua_pushliteral(L, "__newindex");
+  lua_pushcfunction(L, imluaImageRow_newindex);
+  lua_rawset(L, -3);
+  lua_pushliteral(L, "__tostring");
+  lua_pushcfunction(L, imluaImageRow_tostring);
+  lua_rawset(L, -3);
+  lua_pop(L, 1);   /* removes the metatable from the top of the stack */
+
+  /* Object Oriented Access */
+  luaL_newmetatable(L, "imImage");  /* create new metatable for imImage handles */
+  lua_pushliteral(L, "__index");    /* dummy code because imluaImage_index will overwrite this behavior */
+  lua_pushvalue(L, -2);  /* push metatable */
+  lua_rawset(L, -3);  /* metatable.__index = metatable */
+  luaL_register(L, NULL, imimage_metalib);  /* register methods */
+  lua_pop(L, 1);  /* removes the metatable from the top of the stack */
+}
+
+/* If all parameteres, besides the image, are nil, this is equivalent to image:Clone.
+   If any parameter is not nil, then the value is used instead of the one from the source image.
+   If a parameter is a function, then the function is called, passing the source
+   image as parameter, to obtain the substituion value. */
+static void reg_image(lua_State *L)
+{
+  const char* data = {
+"function im.ImageCreateBased(image, width, height, color_space, data_type)        \n"
+"  -- default values are those of the source image                                 \n"
+"  width       = width       or image:Width()                                      \n"
+"  height      = height      or image:Height()                                     \n"
+"  color_space = color_space or image:ColorSpace()                                 \n"
+"  data_type   = data_type   or image:DataType()                                   \n"
+"                                                                                  \n"
+"  -- callback to calculate parameters based on source image                       \n"
+"  if type(width)       == \"function\" then       width = width(image) end        \n"
+"  if type(height)      == \"function\" then      height = height(image) end       \n"
+"  if type(color_space) == \"function\" then color_space = color_space(image) end  \n"
+"  if type(data_type)   == \"function\" then   data_type = data_type(image) end    \n"
+"                                                                                  \n"
+"  -- create a new image                                                           \n"
+"  new_image = im.ImageCreate(width, height, color_space, data_type)               \n"
+"  image:CopyAttributes(new_image)                                                 \n"
+"  return new_image                                                                \n"
+"end                                                                               \n"
+  };                                                                       
+  
+  if (luaL_loadbuffer(L, data, strlen(data), "reg_image")==0) lua_pcall(L, 0, 0, 0);
+}
+
+void imlua_open_image (lua_State *L)
+{
+  /* "im" table is at the top of the stack */
+  createmeta(L);
+  luaL_register(L, NULL, imimage_lib);
+  reg_image(L);
+}
diff --git a/src/lua5/imlua_image.h b/src/lua5/imlua_image.h
new file mode 100644
index 0000000..0a39863
--- /dev/null
+++ b/src/lua5/imlua_image.h
@@ -0,0 +1,38 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_image.h,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#ifndef __IMLUA_IMAGE_H
+#define __IMLUA_IMAGE_H
+
+#if	defined(__cplusplus)
+extern "C" {
+#endif
+
+
+typedef struct _imluaImageChannel {
+  imImage *image;
+  int channel;
+} imluaImageChannel;
+
+typedef struct _imluaImageRow {
+  imImage *image;
+  int channel;
+  int row;
+} imluaImageRow;
+
+void imlua_open_image(lua_State *L);
+
+int imlua_pushimageerror(lua_State *L, imImage* image, int error);
+void imlua_pushimage(lua_State *L, imImage* image);
+imImage* imlua_checkimage(lua_State *L, int param);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/lua5/imlua_jp2.c b/src/lua5/imlua_jp2.c
new file mode 100644
index 0000000..d69ba7e
--- /dev/null
+++ b/src/lua5/imlua_jp2.c
@@ -0,0 +1,44 @@
+/** \file
+ * \brief jp2 format Lua 5 Binding
+ *
+ * See Copyright Notice in cd.h
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "im_format_jp2.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+
+static int imlua_FormatRegisterJP2(lua_State *L)
+{
+  (void)L;
+  imFormatRegisterJP2();
+  return 0;
+}
+
+static const struct luaL_reg imlib[] = {
+  {"FormatRegisterJP2", imlua_FormatRegisterJP2},
+  {NULL, NULL},
+};
+
+
+static int imlua_jp2_open (lua_State *L)
+{
+  imFormatRegisterJP2();
+  luaL_register(L, "im", imlib);   /* leave "im" table at the top of the stack */
+  return 1;
+}
+
+int luaopen_imlua_jp2(lua_State* L)
+{
+  return imlua_jp2_open(L);
+}
+
+int luaopen_imlua_jp251(lua_State* L)
+{
+  return imlua_jp2_open(L);
+}
diff --git a/src/lua5/imlua_jp2.def b/src/lua5/imlua_jp2.def
new file mode 100644
index 0000000..29aa05c
--- /dev/null
+++ b/src/lua5/imlua_jp2.def
@@ -0,0 +1,4 @@
+EXPORTS
+  luaopen_imlua_jp2
+  luaopen_imlua_jp251
+ 
\ No newline at end of file
diff --git a/src/lua5/imlua_kernel.c b/src/lua5/imlua_kernel.c
new file mode 100644
index 0000000..770a989
--- /dev/null
+++ b/src/lua5/imlua_kernel.c
@@ -0,0 +1,182 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_kernel.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <memory.h>
+#include <math.h>
+#include <stdlib.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_process.h"
+#include "im_util.h"
+#include "im_kernel.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+
+
+static int imluaKernelSobel(lua_State *L)
+{
+  imlua_pushimage(L, imKernelSobel());
+  return 1;
+}
+
+static int imluaKernelPrewitt(lua_State *L)
+{
+  imlua_pushimage(L, imKernelPrewitt());
+  return 1;
+}
+
+static int imluaKernelKirsh(lua_State *L)
+{
+  imlua_pushimage(L, imKernelKirsh());
+  return 1;
+}
+
+static int imluaKernelLaplacian4(lua_State *L)
+{
+  imlua_pushimage(L, imKernelLaplacian4());
+  return 1;
+}
+
+static int imluaKernelLaplacian8(lua_State *L)
+{
+  imlua_pushimage(L, imKernelLaplacian8());
+  return 1;
+}
+
+static int imluaKernelLaplacian5x5(lua_State *L)
+{
+  imlua_pushimage(L, imKernelLaplacian5x5());
+  return 1;
+}
+
+static int imluaKernelLaplacian7x7(lua_State *L)
+{
+  imlua_pushimage(L, imKernelLaplacian7x7());
+  return 1;
+}
+
+static int imluaKernelGradian3x3(lua_State *L)
+{
+  imlua_pushimage(L, imKernelGradian3x3());
+  return 1;
+}
+
+static int imluaKernelGradian7x7(lua_State *L)
+{
+  imlua_pushimage(L, imKernelGradian7x7());
+  return 1;
+}
+
+static int imluaKernelSculpt(lua_State *L)
+{
+  imlua_pushimage(L, imKernelSculpt());
+  return 1;
+}
+
+static int imluaKernelMean3x3(lua_State *L)
+{
+  imlua_pushimage(L, imKernelMean3x3());
+  return 1;
+}
+
+static int imluaKernelMean5x5(lua_State *L)
+{
+  imlua_pushimage(L, imKernelMean5x5());
+  return 1;
+}
+
+static int imluaKernelCircularMean5x5(lua_State *L)
+{
+  imlua_pushimage(L, imKernelCircularMean5x5());
+  return 1;
+}
+
+static int imluaKernelMean7x7(lua_State *L)
+{
+  imlua_pushimage(L, imKernelMean7x7());
+  return 1;
+}
+
+static int imluaKernelCircularMean7x7(lua_State *L)
+{
+  imlua_pushimage(L, imKernelCircularMean7x7());
+  return 1;
+}
+
+static int imluaKernelGaussian3x3(lua_State *L)
+{
+  imlua_pushimage(L, imKernelGaussian3x3());
+  return 1;
+}
+
+static int imluaKernelGaussian5x5(lua_State *L)
+{
+  imlua_pushimage(L, imKernelGaussian5x5());
+  return 1;
+}
+
+static int imluaKernelBarlett5x5(lua_State *L)
+{
+  imlua_pushimage(L, imKernelBarlett5x5());
+  return 1;
+}
+
+static int imluaKernelTopHat5x5(lua_State *L)
+{
+  imlua_pushimage(L, imKernelTopHat5x5());
+  return 1;
+}
+
+static int imluaKernelTopHat7x7(lua_State *L)
+{
+  imlua_pushimage(L, imKernelTopHat7x7());
+  return 1;
+}
+
+static int imluaKernelEnhance(lua_State *L)
+{
+  imlua_pushimage(L, imKernelEnhance());
+  return 1;
+}
+
+
+static const luaL_reg imkernel_lib[] = {
+  {"KernelSobel",           imluaKernelSobel},
+  {"KernelPrewitt",         imluaKernelPrewitt},
+  {"KernelKirsh",           imluaKernelKirsh},
+  {"KernelLaplacian4",      imluaKernelLaplacian4},
+  {"KernelLaplacian8",      imluaKernelLaplacian8},
+  {"KernelLaplacian5x5",    imluaKernelLaplacian5x5},
+  {"KernelLaplacian7x7",    imluaKernelLaplacian7x7},
+  {"KernelGradian3x3",      imluaKernelGradian3x3},
+  {"KernelGradian7x7",      imluaKernelGradian7x7},
+  {"KernelSculpt",          imluaKernelSculpt},
+  {"KernelMean3x3",         imluaKernelMean3x3},
+  {"KernelMean5x5",         imluaKernelMean5x5},
+  {"KernelCircularMean5x5", imluaKernelCircularMean5x5},
+  {"KernelMean7x7",         imluaKernelMean7x7},
+  {"KernelCircularMean7x7", imluaKernelCircularMean7x7},
+  {"KernelGaussian3x3",     imluaKernelGaussian3x3},
+  {"KernelGaussian5x5",     imluaKernelGaussian5x5},
+  {"KernelBarlett5x5",      imluaKernelBarlett5x5},
+  {"KernelTopHat5x5",       imluaKernelTopHat5x5},
+  {"KernelTopHat7x7",       imluaKernelTopHat7x7},
+  {"KernelEnhance",         imluaKernelEnhance},
+  {NULL, NULL}
+};
+
+void imlua_open_kernel (lua_State *L)
+{
+  /* "im" table is at the top of the stack */
+  luaL_register(L, NULL, imkernel_lib);
+}
diff --git a/src/lua5/imlua_palette.c b/src/lua5/imlua_palette.c
new file mode 100644
index 0000000..80d23eb
--- /dev/null
+++ b/src/lua5/imlua_palette.c
@@ -0,0 +1,399 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_palette.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <string.h>
+#include <memory.h>
+#include <stdlib.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_util.h"
+#include "im_palette.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_palette.h"
+
+
+static imluaPalette* imlua_rawcheckpalette(lua_State *L, int param)
+{
+  void *p = lua_touserdata(L, param);
+  if (p != NULL) {  /* value is a userdata? */
+    if (lua_getmetatable(L, param)) {  /* does it have a metatable? */
+      lua_getfield(L, LUA_REGISTRYINDEX, "imPalette");  /* get correct metatable */
+      if (lua_rawequal(L, -1, -2)) {  /* does it have the correct mt? */
+        lua_pop(L, 2);  /* remove both metatables */
+        return (imluaPalette*)p;
+      }
+      lua_pop(L, 1);  /* remove previous metatable */
+
+      /* check also for CD palette */
+      lua_getfield(L, LUA_REGISTRYINDEX, "cdPalette");  /* get correct metatable */
+      if (lua_rawequal(L, -1, -2)) {  /* does it have the correct mt? */
+        lua_pop(L, 2);  /* remove both metatables */
+        return (imluaPalette*)p;
+      }
+    }
+  }
+  luaL_typerror(L, param, "imPalette");  /* else error */
+  return NULL;  /* to avoid warnings */
+}
+
+imluaPalette* imlua_checkpalette (lua_State *L, int param)
+{
+  imluaPalette* pal = imlua_rawcheckpalette(L, param);
+  if (!pal->color)
+    luaL_argerror(L, param, "destroyed imPalette");
+
+  return pal;
+}
+
+void imlua_pushpalette(lua_State *L, long* color, int count)
+{
+  imluaPalette *pal = (imluaPalette*) lua_newuserdata(L, sizeof(imluaPalette));
+  pal->count = count;
+  pal->color = color;
+  luaL_getmetatable(L, "imPalette");
+  lua_setmetatable(L, -2);
+}
+
+/***************************************************************************\
+* Creates a palette as a "imPalette" userdata. A palette can be          *
+* considered and treated as a color table.                                 *
+* im.PaletteCreate(count: number) -> (palette: "imPalette")               *
+\***************************************************************************/
+static int imluaPaletteCreate(lua_State *L)
+{
+  long* color;
+
+  int count = luaL_optint(L, 1, 256);
+  if (count < 1 || count > 256)
+    luaL_argerror(L, 1, "palette count should be a positive integer and less then 256");
+
+  color = (long*)malloc(256*sizeof(long));
+  memset(color, 0, 256*sizeof(long));
+
+  imlua_pushpalette(L, color, count);
+  return 1;
+}
+
+
+/*****************************************************************************\
+ im.PaletteFindNearest
+\*****************************************************************************/
+static int imluaPaletteFindNearest (lua_State *L)
+{
+  imluaPalette *pal = imlua_checkpalette(L, 1);
+  long color = (long int) lua_touserdata(L, 1);
+
+  lua_pushnumber(L, imPaletteFindNearest(pal->color, pal->count, color));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteFindColor
+\*****************************************************************************/
+static int imluaPaletteFindColor (lua_State *L)
+{
+  imluaPalette *pal = imlua_checkpalette(L, 1);
+  long color = (long) lua_touserdata(L, 2);
+  unsigned char tol = (unsigned char)luaL_checkint(L, 3);
+
+  lua_pushnumber(L, imPaletteFindColor(pal->color, pal->count, color, tol));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteGray
+\*****************************************************************************/
+static int imluaPaletteGray (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteGray(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteRed
+\*****************************************************************************/
+static int imluaPaletteRed (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteRed(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteGreen
+\*****************************************************************************/
+static int imluaPaletteGreen (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteGreen(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteBlue
+\*****************************************************************************/
+static int imluaPaletteBlue (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteBlue(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteYellow
+\*****************************************************************************/
+static int imluaPaletteYellow (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteYellow(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteMagenta
+\*****************************************************************************/
+static int imluaPaletteMagenta (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteMagenta(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteCian
+\*****************************************************************************/
+static int imluaPaletteCian (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteCian(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteRainbow
+\*****************************************************************************/
+static int imluaPaletteRainbow (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteRainbow(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteHues
+\*****************************************************************************/
+static int imluaPaletteHues (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteHues(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteBlueIce
+\*****************************************************************************/
+static int imluaPaletteBlueIce (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteBlueIce(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteHotIron
+\*****************************************************************************/
+static int imluaPaletteHotIron (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteHotIron(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteBlackBody
+\*****************************************************************************/
+static int imluaPaletteBlackBody (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteBlackBody(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteHighContrast
+\*****************************************************************************/
+static int imluaPaletteHighContrast (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteHighContrast(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteUniform
+\*****************************************************************************/
+static int imluaPaletteUniform (lua_State *L)
+{
+  imlua_pushpalette(L, imPaletteUniform(), 256);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteUniformIndex
+\*****************************************************************************/
+static int imluaPaletteUniformIndex (lua_State *L)
+{
+  lua_pushnumber(L, imPaletteUniformIndex((long int) lua_touserdata(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.PaletteUniformIndexHalftoned
+\*****************************************************************************/
+static int imluaPaletteUniformIndexHalftoned (lua_State *L)
+{
+  long color = (long) lua_touserdata(L, 1);
+  int x = luaL_checkint(L, 2);
+  int y = luaL_checkint(L, 3);
+
+  lua_pushnumber(L, imPaletteUniformIndexHalftoned(color, x, y));
+  return 1;
+}
+
+/***************************************************************************\
+* Frees a previously allocated palette                                      *
+* im.PaletteDestroy(palette: "imPalette")                                      *
+\***************************************************************************/
+static int imluaPaletteDestroy (lua_State *L)
+{
+  imluaPalette *pal = imlua_rawcheckpalette(L, 1);
+  if (!pal->color)
+    luaL_argerror(L, 1, "destroyed imPalette");
+
+  free(pal->color);
+  pal->color = NULL;  /* mark as destroyed */
+  pal->count = 0;
+
+  return 0;
+}
+
+/*****************************************************************************\
+ gc
+\*****************************************************************************/
+static int imluaPalette_gc(lua_State *L)
+{
+  imluaPalette *pal = (imluaPalette*)lua_touserdata(L, 1);
+  if (pal && pal->color)
+  {
+    free(pal->color);
+    pal->color = NULL;  /* mark as destroyed */
+    pal->count = 0;
+  }
+
+  return 0;
+}
+
+/***************************************************************************\
+* color = palette[i]                                                        *
+\***************************************************************************/
+static int imluaPalette_index(lua_State *L)
+{
+  imluaPalette *pal = imlua_checkpalette(L, 1);
+  int index_i = luaL_checkint(L, 2);
+
+  if (index_i < 0 || index_i >= pal->count)
+    luaL_argerror(L, 2, "index is out of bounds");
+
+  lua_pushlightuserdata(L, (void*) pal->color[index_i]);
+  return 1;
+}
+
+/***************************************************************************\
+* palette[i] = color                                                        *
+\***************************************************************************/
+static int imluaPalette_newindex(lua_State *L)
+{
+  long color_i;
+  imluaPalette *pal = imlua_checkpalette(L, 1);
+  int index_i = luaL_checkint(L, 2);
+
+  if (index_i < 0 || index_i >= pal->count)
+    luaL_argerror(L, 2, "index is out of bounds");
+
+  if (!lua_islightuserdata(L, 3))
+    luaL_argerror(L, 3, "color must be a light user data");
+
+  color_i = (long int) lua_touserdata(L, 3);
+
+  pal->color[index_i] = color_i;
+  return 0;
+}
+
+/*****************************************************************************\
+ len
+\*****************************************************************************/
+static int imluaPalette_len(lua_State *L)
+{
+  imluaPalette *pal = (imluaPalette*)lua_touserdata(L, 1);
+  lua_pushinteger(L, pal->count);
+  return 1;
+}
+
+/*****************************************************************************\
+ tostring
+\*****************************************************************************/
+static int imluaPalette_tostring (lua_State *L)
+{
+  imluaPalette *pal = (imluaPalette*)lua_touserdata(L, 1);
+  lua_pushfstring(L, "imPalette(%p)%s", pal, (pal->color)? "": "-destroyed");
+  return 1;
+}
+
+static const luaL_reg impalette_lib[] = {
+  {"PaletteFindNearest", imluaPaletteFindNearest},
+  {"PaletteFindColor", imluaPaletteFindColor},
+  {"PaletteGray", imluaPaletteGray },
+  {"PaletteRed", imluaPaletteRed },
+  {"PaletteGreen", imluaPaletteGreen },
+  {"PaletteBlue", imluaPaletteBlue },
+  {"PaletteYellow", imluaPaletteYellow },
+  {"PaletteMagenta", imluaPaletteMagenta },
+  {"PaletteCian", imluaPaletteCian },
+  {"PaletteRainbow", imluaPaletteRainbow },
+  {"PaletteHues", imluaPaletteHues },
+  {"PaletteBlueIce", imluaPaletteBlueIce },
+  {"PaletteHotIron", imluaPaletteHotIron },
+  {"PaletteBlackBody", imluaPaletteBlackBody },
+  {"PaletteHighContrast", imluaPaletteHighContrast },
+  {"PaletteUniform", imluaPaletteUniform },
+  {"PaletteUniformIndex", imluaPaletteUniformIndex },
+  {"PaletteUniformIndexHalftoned", imluaPaletteUniformIndexHalftoned },
+
+  {"PaletteDestroy", imluaPaletteDestroy},
+  {"PaletteCreate", imluaPaletteCreate},
+
+  {NULL, NULL}
+};
+
+static const luaL_reg impalette_metalib[] = {
+  {"__gc", imluaPalette_gc},
+  {"__tostring", imluaPalette_tostring},
+  {"__index", imluaPalette_index},
+  {"__newindex", imluaPalette_newindex},
+  {"__len", imluaPalette_len},
+
+  {NULL, NULL}
+};
+
+static void createmeta (lua_State *L) 
+{
+  /* there is no object orientation for imPalette, only array access */
+  luaL_newmetatable(L, "imPalette");  /* create new metatable for imPalette handles */
+  luaL_register(L, NULL, impalette_metalib);     /* register methods */
+  lua_pop(L, 1);   /* removes the metatable from the top of the stack */
+}
+
+void imlua_open_palette (lua_State *L)
+{
+  /* "im" table is at the top of the stack */
+  createmeta(L);
+  luaL_register(L, NULL, impalette_lib);
+}
diff --git a/src/lua5/imlua_palette.h b/src/lua5/imlua_palette.h
new file mode 100644
index 0000000..453fd01
--- /dev/null
+++ b/src/lua5/imlua_palette.h
@@ -0,0 +1,32 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_palette.h,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#ifndef __IMLUA_PALETTE_H
+#define __IMLUA_PALETTE_H
+
+#if	defined(__cplusplus)
+extern "C" {
+#endif
+
+
+/* this is the same declaration used in the CD toolkit for cdPalette in Lua */
+typedef struct _imPalette {
+  long* color;
+  int count;
+} imluaPalette;
+
+void imlua_pushpalette(lua_State *L, long* color, int count);
+imluaPalette* imlua_checkpalette (lua_State *L, int param);
+
+void imlua_open_palette(lua_State *L);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/lua5/imlua_process.c b/src/lua5/imlua_process.c
new file mode 100644
index 0000000..8a6fe64
--- /dev/null
+++ b/src/lua5/imlua_process.c
@@ -0,0 +1,3091 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_process.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include <memory.h>
+#include <math.h>
+#include <stdlib.h>
+
+#include "im.h"
+#include "im_image.h"
+#include "im_process.h"
+#include "im_util.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+#include "imlua_image.h"
+
+
+
+/*****************************************************************************\
+ Image Statistics Calculations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.CalcRMSError(image1, image2)
+\*****************************************************************************/
+static int imluaCalcRMSError (lua_State *L)
+{
+  imImage* image1 = imlua_checkimage(L, 1);
+  imImage* image2 = imlua_checkimage(L, 2);
+
+  imlua_match(L, image1, image2);
+
+  lua_pushnumber(L, imCalcRMSError(image1, image2));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcSNR(src_image, noise_image)
+\*****************************************************************************/
+static int imluaCalcSNR (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* noise_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, noise_image);
+
+  lua_pushnumber(L, imCalcSNR(src_image, noise_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcCountColors(src_image)
+\*****************************************************************************/
+static int imluaCalcCountColors (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  if (src_image->color_space >= IM_CMYK)
+    luaL_argerror(L, 1, "color space can be RGB, Gray, Binary or Map only");
+
+  lua_pushnumber(L, imCalcCountColors(src_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcHistogram(src_image, plane, cumulative)
+\*****************************************************************************/
+static int imluaCalcHistogram (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  int plane = luaL_checkint(L, 2);
+  int cumulative = luaL_checkint(L, 3);
+
+  switch (src_image->data_type)
+  {
+  case IM_BYTE:
+    {
+      unsigned long hist[256];
+      imCalcHistogram((imbyte*) src_image->data[plane], src_image->count, hist, cumulative);
+      imlua_newarrayulong(L, hist, 256, 0);
+    }
+    break;
+
+  case IM_USHORT:
+    {
+      unsigned long hist[65535];
+      imCalcUShortHistogram(src_image->data[plane], src_image->count, hist, cumulative);
+      imlua_newarrayulong(L, hist, 65535, 0);
+    }
+    break;
+
+  default:
+    luaL_argerror(L, 1, "data_type can be byte or ushort only");
+    break;
+  }
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcGrayHistogram(src_image, cumulative)
+\*****************************************************************************/
+static int imluaCalcGrayHistogram (lua_State *L)
+{
+  unsigned long hist[256];
+  imImage* src_image = imlua_checkimage(L, 1);
+  int cumulative = luaL_checkint(L, 2);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  if (src_image->color_space >= IM_CMYK)
+    luaL_argerror(L, 1, "color space can be RGB, Gray, Binary or Map only");
+
+  imCalcGrayHistogram(src_image, hist, cumulative);
+  imlua_newarrayulong(L, hist, 256, 0);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcImageStatistics(src_image)
+\*****************************************************************************/
+static int imluaCalcImageStatistics (lua_State *L)
+{
+  imStats stats;
+  imImage *image = imlua_checkimage(L, 1);
+
+  if (image->data_type == IM_CFLOAT)
+    luaL_argerror(L, 1, "data type can NOT be of type cfloat");
+
+  imCalcImageStatistics(image, &stats);
+
+  lua_newtable(L);
+  lua_pushstring(L, "max");      lua_pushnumber(L, stats.max);      lua_settable(L, -3);
+  lua_pushstring(L, "min");      lua_pushnumber(L, stats.min);      lua_settable(L, -3);
+  lua_pushstring(L, "positive"); lua_pushnumber(L, stats.positive); lua_settable(L, -3);
+  lua_pushstring(L, "negative"); lua_pushnumber(L, stats.negative); lua_settable(L, -3);
+  lua_pushstring(L, "zeros");    lua_pushnumber(L, stats.zeros);    lua_settable(L, -3);
+  lua_pushstring(L, "mean");     lua_pushnumber(L, stats.mean);     lua_settable(L, -3);
+  lua_pushstring(L, "stddev");   lua_pushnumber(L, stats.stddev);   lua_settable(L, -3);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcHistogramStatistics(src_image)
+\*****************************************************************************/
+static int imluaCalcHistogramStatistics (lua_State *L)
+{
+  imStats stats;
+  imImage *image = imlua_checkimage(L, 1);
+
+  imlua_checkdatatype(L, 1, image, IM_BYTE);
+
+  imCalcHistogramStatistics(image, &stats);
+
+  lua_newtable(L);
+  lua_pushstring(L, "max");      lua_pushnumber(L, stats.max);      lua_settable(L, -3);
+  lua_pushstring(L, "min");      lua_pushnumber(L, stats.min);      lua_settable(L, -3);
+  lua_pushstring(L, "positive"); lua_pushnumber(L, stats.positive); lua_settable(L, -3);
+  lua_pushstring(L, "negative"); lua_pushnumber(L, stats.negative); lua_settable(L, -3);
+  lua_pushstring(L, "zeros");    lua_pushnumber(L, stats.zeros);    lua_settable(L, -3);
+  lua_pushstring(L, "mean");     lua_pushnumber(L, stats.mean);     lua_settable(L, -3);
+  lua_pushstring(L, "stddev");   lua_pushnumber(L, stats.stddev);   lua_settable(L, -3);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.CalcHistoImageStatistics
+\*****************************************************************************/
+static int imluaCalcHistoImageStatistics (lua_State *L)
+{
+  int* median;
+  int* mode;
+
+  imImage *image = imlua_checkimage(L, 1);
+
+  imlua_checkdatatype(L, 1, image, IM_BYTE);
+
+  median = (int*)malloc(sizeof(int)*image->depth);
+  mode = (int*)malloc(sizeof(int)*image->depth);
+
+  imCalcHistoImageStatistics(image, median, mode);
+
+  imlua_newarrayint (L, median, image->depth, 0);
+  imlua_newarrayint (L, mode, image->depth, 0);
+
+  free(median);
+  free(mode);
+
+  return 2;
+}
+
+/*****************************************************************************\
+ Image Analysis
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.AnalyzeFindRegions(src_image, dst_image, connect, touch_border)
+\*****************************************************************************/
+static int imluaAnalyzeFindRegions (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int connect = luaL_checkint(L, 3);
+  int touch_border = lua_toboolean(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_checktype(L, 2, dst_image, IM_GRAY, IM_USHORT);
+
+  luaL_argcheck(L, (connect == 4 || connect == 8), 3, "invalid connect value, must be 4 or 8");
+  lua_pushnumber(L, imAnalyzeFindRegions(src_image, dst_image, connect, touch_border));
+  return 1;
+}
+
+static int iGetMax(imImage* image)
+{
+  int max = 0;
+  int i;
+
+  imushort* data = (imushort*)image->data[0];
+  for (i = 0; i < image->count; i++)
+  {
+    if (*data > max)
+      max = *data;
+
+    data++;
+  }
+
+  return max;
+}
+
+static int imlua_checkregioncount(lua_State *L, int narg, imImage* image)
+{
+  if (lua_isnoneornil(L, narg)) return iGetMax(image);
+  else return (int)luaL_checknumber(L, narg);
+}
+
+
+/*****************************************************************************\
+ im.AnalyzeMeasureArea(image, [count])
+\*****************************************************************************/
+static int imluaAnalyzeMeasureArea (lua_State *L)
+{
+  int count;
+  int *area;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, image, IM_GRAY, IM_USHORT);
+
+  count = imlua_checkregioncount(L, 2, image);
+  area = (int*) malloc(sizeof(int) * count);
+
+  imAnalyzeMeasureArea(image, area, count);
+
+  imlua_newarrayint(L, area, count, 0);
+  free(area);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.AnalyzeMeasurePerimArea(image)
+\*****************************************************************************/
+static int imluaAnalyzeMeasurePerimArea (lua_State *L)
+{
+  int count;
+  float *perimarea;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, image, IM_GRAY, IM_USHORT);
+
+  count = imlua_checkregioncount(L, 2, image);
+  perimarea = (float*) malloc(sizeof(float) * count);
+
+  imAnalyzeMeasurePerimArea(image, perimarea);
+
+  imlua_newarrayfloat (L, perimarea, count, 0);
+  free(perimarea);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.AnalyzeMeasureCentroid(image, [area], [count])
+\*****************************************************************************/
+static int imluaAnalyzeMeasureCentroid (lua_State *L)
+{
+  int count;
+  float *cx, *cy;
+  int *area;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, image, IM_GRAY, IM_USHORT);
+
+  area = imlua_toarrayint(L, 2, &count, 0);
+  count = imlua_checkregioncount(L, 3, image);
+
+  cx = (float*) malloc (sizeof(float) * count);
+  cy = (float*) malloc (sizeof(float) * count);
+
+  imAnalyzeMeasureCentroid(image, area, count, cx, cy);
+
+  imlua_newarrayfloat(L, cx, count, 0);
+  imlua_newarrayfloat(L, cy, count, 0);
+
+  if (area)
+    free(area);
+  free(cx);
+  free(cy);
+
+  return 2;
+}
+
+/*****************************************************************************\
+ im.AnalyzeMeasurePrincipalAxis(image, [area], [cx], [cy])
+\*****************************************************************************/
+static int imluaAnalyzeMeasurePrincipalAxis (lua_State *L)
+{
+  int count;
+  float *cx, *cy;
+  int *area;
+  float *major_slope, *major_length, *minor_slope, *minor_length;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, image, IM_GRAY, IM_USHORT);
+
+  area = imlua_toarrayint(L, 2, &count, 0);
+  cx = imlua_toarrayfloat(L, 3, NULL, 0);
+  cy = imlua_toarrayfloat(L, 4, NULL, 0);
+  count = imlua_checkregioncount(L, 5, image);
+
+  major_slope = (float*) malloc (sizeof(float) * count);
+  major_length = (float*) malloc (sizeof(float) * count);
+  minor_slope = (float*) malloc (sizeof(float) * count);
+  minor_length = (float*) malloc (sizeof(float) * count);
+
+  imAnalyzeMeasurePrincipalAxis(image, area, cx, cy, count, major_slope, major_length, minor_slope, minor_length);
+
+  imlua_newarrayfloat(L, major_slope, count, 0);
+  imlua_newarrayfloat(L, major_length, count, 0);
+  imlua_newarrayfloat(L, minor_slope, count, 0);
+  imlua_newarrayfloat(L, minor_length, count, 0);
+
+  if (area)
+    free(area);
+  if (cx)
+    free(cx);
+  if (cy)
+    free(cy);
+
+  free(major_slope);
+  free(major_length);
+  free(minor_slope);
+  free(minor_length);
+
+  return 4;
+}
+
+/*****************************************************************************\
+ im.AnalyzeMeasureHoles
+\*****************************************************************************/
+static int imluaAnalyzeMeasureHoles (lua_State *L)
+{
+  int holes_count, count;
+  int connect;
+  int *area = NULL;
+  float *perim = NULL;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, image, IM_GRAY, IM_USHORT);
+
+  connect = luaL_checkint(L, 2);
+  count = imlua_checkregioncount(L, 3, image);
+
+  area = (int*) malloc (sizeof(int) * count);
+  perim = (float*) malloc (sizeof(float) * count);
+
+  imAnalyzeMeasureHoles(image, connect, &holes_count, area, perim);
+
+  lua_pushnumber(L, holes_count);
+  imlua_newarrayint(L, area, holes_count, 0);
+  imlua_newarrayfloat(L, perim, holes_count, 0);
+
+  if (area)
+    free(area);
+  if (perim)
+    free(perim);
+
+  return 3;
+}
+
+/*****************************************************************************\
+ im.AnalyzeMeasurePerimeter(image, [count])
+\*****************************************************************************/
+static int imluaAnalyzeMeasurePerimeter (lua_State *L)
+{
+  int count;
+  float *perim;
+
+  imImage* image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, image, IM_GRAY, IM_USHORT);
+
+  count = imlua_checkregioncount(L, 2, image);
+  perim = (float*) malloc(sizeof(float) * count);
+
+  imAnalyzeMeasurePerimeter(image, perim, count);
+
+  imlua_newarrayfloat(L, perim, count, 0);
+
+  free(perim);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessPerimeterLine(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessPerimeterLine (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_match(L, src_image, dst_image);
+
+  imProcessPerimeterLine(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessPrune(src_image, dst_image, connect, start_size, end_size)
+\*****************************************************************************/
+static int imluaProcessPrune (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int connect = luaL_checkint(L, 3);
+  int start_size = luaL_checkint(L, 4);
+  int end_size = luaL_checkint(L, 5);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, (connect == 4 || connect == 8), 3, "invalid connect value, must be 4 or 8");
+
+  imProcessPrune(src_image, dst_image, connect, start_size, end_size);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessFillHoles(src_image, dst_image, connect)
+\*****************************************************************************/
+static int imluaProcessFillHoles (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int connect = luaL_checkint(L, 3);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, (connect == 4 || connect == 8), 3, "invalid connect value, must be 4 or 8");
+
+  imProcessFillHoles(src_image, dst_image, connect);
+  return 0;
+}
+
+static void imlua_checkhoughsize(lua_State *L, imImage* image, imImage* hough_image, int param)
+{
+#define IMSQR(_x) (_x*_x)
+  int hough_rmax;
+  if (hough_image->width != 180)
+    luaL_argerror(L, param, "invalid image width");
+
+  hough_rmax = (int)(sqrt((double)(IMSQR(image->width) + IMSQR(image->height)))/2.0);
+  if (hough_image->height != 2*hough_rmax+1)
+    luaL_argerror(L, param, "invalid image height");
+}
+
+/*****************************************************************************\
+ im.ProcessHoughLines(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessHoughLines (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_checktype(L, 2, dst_image, IM_GRAY, IM_INT);
+  imlua_checkhoughsize(L, src_image, dst_image, 2);
+
+  lua_pushboolean(L, imProcessHoughLines(src_image, dst_image));
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessHoughLinesDraw(src_image, hough_points, dst_image)
+\*****************************************************************************/
+static int imluaProcessHoughLinesDraw (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* hough_points = imlua_checkimage(L, 3);
+  imImage* dst_image = imlua_checkimage(L, 4);
+  imImage* hough = NULL;
+  if (lua_isuserdata(L, 2))
+  {
+    hough = imlua_checkimage(L, 2);
+    imlua_checktype(L, 2, hough, IM_GRAY, IM_INT);
+    imlua_checkhoughsize(L, src_image, hough, 2);
+  }
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_checkcolorspace(L, 3, hough_points, IM_BINARY);
+  imlua_checkhoughsize(L, src_image, hough_points, 3);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushnumber(L, imProcessHoughLinesDraw(src_image, hough, hough_points, dst_image));
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessDistanceTransform(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessDistanceTransform (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_checkdatatype(L, 2, dst_image, IM_FLOAT);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessDistanceTransform(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessRegionalMaximum(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessRegionalMaximum (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_FLOAT);
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessRegionalMaximum(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ Image Resize
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessReduce(src_image, dst_image, order)
+\*****************************************************************************/
+static int imluaProcessReduce (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int order = luaL_checkint(L, 3);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, (order == 0 || order == 1), 3, "invalid order, must be 0 or 1");
+
+  lua_pushboolean(L, imProcessReduce(src_image, dst_image, order));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessResize(src_image, dst_image, order)
+\*****************************************************************************/
+static int imluaProcessResize (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int order = luaL_checkint(L, 3);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, (order == 0 || order == 1 || order == 3), 3, "invalid order, must be 0, 1 or 3");
+
+  lua_pushboolean(L, imProcessResize(src_image, dst_image, order));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessReduceBy4(src_image, dst_image)
+\*****************************************************************************/
+static int imluaProcessReduceBy4 (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, 
+    dst_image->width == (src_image->width / 2) && 
+    dst_image->height == (src_image->height / 2), 3, "destiny image size must be source image width/2, height/2");
+
+  imProcessReduceBy4(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessCrop(src_image, dst_image, xmin, ymin)
+\*****************************************************************************/
+static int imluaProcessCrop (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int xmin = luaL_checkint(L, 3);
+  int ymin = luaL_checkint(L, 4);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, xmin > 0 && xmin < src_image->width, 3, "xmin must be > 0 and < width");
+  luaL_argcheck(L, ymin > 0 && ymin < src_image->height, 3, "ymin must be > 0 and < height");
+  luaL_argcheck(L, dst_image->width < (src_image->width - xmin), 2, "destiny image size must be smaller than source image width-xmin, height-ymin");
+  luaL_argcheck(L, dst_image->height < (src_image->height - ymin), 2, "destiny image size must be smaller than source image width-xmin, height-ymin");
+
+  imProcessCrop(src_image, dst_image, xmin, ymin);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessInsert(src_image, region_image, dst_image, xmin, ymin)
+\*****************************************************************************/
+static int imluaProcessInsert (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* region_image = imlua_checkimage(L, 2);
+  imImage* dst_image = imlua_checkimage(L, 3);
+  int xmin = luaL_checkint(L, 4);
+  int ymin = luaL_checkint(L, 5);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, xmin > 0 && xmin < src_image->width, 3, "xmin must be > 0 and < width");
+  luaL_argcheck(L, ymin > 0 && ymin < src_image->height, 3, "ymin must be > 0 and < height");
+
+  imProcessInsert(src_image, region_image, dst_image, xmin, ymin);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessAddMargins(src_image, dst_image, xmin, ymin)
+\*****************************************************************************/
+static int imluaProcessAddMargins (lua_State *L)
+{
+  imImage* src_image = imlua_checkimage(L, 1);
+  imImage* dst_image = imlua_checkimage(L, 2);
+  int xmin = luaL_checkint(L, 3);
+  int ymin = luaL_checkint(L, 4);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, dst_image->width > (src_image->width + xmin), 2, "destiny image size must be greatter than source image width+xmin, height+ymin");
+  luaL_argcheck(L, dst_image->height > (src_image->height + ymin), 2, "destiny image size must be greatter than source image width+xmin, height+ymin");
+
+  imProcessAddMargins(src_image, dst_image, xmin, ymin);
+  return 0;
+}
+
+
+
+/*****************************************************************************\
+ Geometric Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessCalcRotateSize
+\*****************************************************************************/
+static int imluaProcessCalcRotateSize (lua_State *L)
+{
+  int new_width, new_height;
+
+  int width = luaL_checkint(L, 1);
+  int height = luaL_checkint(L, 2);
+  double cos0 = (double) luaL_checknumber(L, 3);
+  double sin0 = (double) luaL_checknumber(L, 4);
+
+  imProcessCalcRotateSize(width, height, &new_width, &new_height, cos0, sin0);
+  lua_pushnumber(L, new_width);
+  lua_pushnumber(L, new_height);
+  return 2;
+}
+
+/*****************************************************************************\
+ im.ProcessRotate
+\*****************************************************************************/
+static int imluaProcessRotate (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  double cos0 = (double) luaL_checknumber(L, 3);
+  double sin0 = (double) luaL_checknumber(L, 4);
+  int order = luaL_checkint(L, 5);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, (order == 0 || order == 1 || order == 3), 5, "invalid order, must be 0, 1 or 3");
+
+  lua_pushboolean(L, imProcessRotate(src_image, dst_image, cos0, sin0, order));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRotateRef
+\*****************************************************************************/
+static int imluaProcessRotateRef (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  double cos0 = (double) luaL_checknumber(L, 3);
+  double sin0 = (double) luaL_checknumber(L, 4);
+  int x = luaL_checkint(L, 5); 
+  int y = luaL_checkint(L, 6); 
+  int to_origin = luaL_checkint(L, 7); 
+  int order = luaL_checkint(L, 8);
+
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, (order == 0 || order == 1 || order == 3), 5, "invalid order, must be 0, 1, or 3");
+
+  lua_pushboolean(L, imProcessRotateRef(src_image, dst_image, cos0, sin0, x, y, to_origin, order));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRotate90
+\*****************************************************************************/
+static int imluaProcessRotate90 (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int dir = luaL_checkint(L, 3); 
+  
+  imlua_matchcolor(L, src_image, dst_image);
+  luaL_argcheck(L, dst_image->width == src_image->height && dst_image->height == src_image->width, 2, "destiny width and height must have the source height and width");
+  luaL_argcheck(L, (dir == -1 || dir == 1), 3, "invalid dir, can be -1 or 1 only");
+
+  imProcessRotate90(src_image, dst_image, dir);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessRotate180
+\*****************************************************************************/
+static int imluaProcessRotate180 (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+
+  imProcessRotate180(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessMirror
+\*****************************************************************************/
+static int imluaProcessMirror (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+
+  imProcessMirror(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessFlip
+\*****************************************************************************/
+static int imluaProcessFlip (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+
+  imProcessFlip(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessInterlaceSplit
+\*****************************************************************************/
+static int imluaProcessInterlaceSplit (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image1 = imlua_checkimage(L, 2);
+  imImage *dst_image2 = imlua_checkimage(L, 3);
+
+  imlua_matchcolor(L, src_image, dst_image1);
+  imlua_matchcolor(L, src_image, dst_image2);
+  luaL_argcheck(L, dst_image1->width == src_image->width && dst_image2->width == src_image->width, 2, "destiny width must be equal to source width");
+
+  if (src_image->height%2)
+  {
+    int dst_height1 = src_image->height/2 + 1;
+    luaL_argcheck(L, dst_image1->height == dst_height1, 2, "destiny1 height must be equal to source height/2+1 if height odd");
+  }
+  else
+    luaL_argcheck(L, dst_image1->height == src_image->height/2, 2, "destiny1 height must be equal to source height/2 if height even");
+
+  luaL_argcheck(L, dst_image2->height == src_image->height/2, 2, "destiny2 height must be equal to source height/2");
+
+  imProcessInterlaceSplit(src_image, dst_image1, dst_image2);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessRadial
+\*****************************************************************************/
+static int imluaProcessRadial (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float k1 = (float) luaL_checknumber(L, 3);
+  int order = luaL_checkint(L, 4);
+
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, (order == 0 || order == 1 || order == 3), 4, "invalid order");
+
+  lua_pushboolean(L, imProcessRadial(src_image, dst_image, k1, order));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessSwirl
+\*****************************************************************************/
+static int imluaProcessSwirl(lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float k1 = (float) luaL_checknumber(L, 3);
+  int order = luaL_checkint(L, 4);
+
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, (order == 0 || order == 1 || order == 3), 4, "invalid order, can be 0, 1 or 3");
+
+  lua_pushboolean(L, imProcessSwirl(src_image, dst_image, k1, order));
+  return 1;
+}
+
+static void imlua_checknotcfloat(lua_State *L, imImage *image, int index)
+{
+  if (image->data_type == IM_CFLOAT)
+    luaL_argerror(L, index, "image data type can NOT be cfloat");
+}
+
+
+/*****************************************************************************\
+ Morphology Operations for Gray Images
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessGrayMorphConvolve
+\*****************************************************************************/
+static int imluaProcessGrayMorphConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel = imlua_checkimage(L, 3);
+  int ismax = lua_toboolean(L, 4);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+  imlua_checkdatatype(L, 3, kernel, IM_INT);
+  imlua_matchsize(L, src_image, kernel);
+
+  lua_pushboolean(L, imProcessGrayMorphConvolve(src_image, dst_image, kernel, ismax));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphErode
+\*****************************************************************************/
+static int imluaProcessGrayMorphErode (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphErode(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphDilate
+\*****************************************************************************/
+static int imluaProcessGrayMorphDilate (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphDilate(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphOpen
+\*****************************************************************************/
+static int imluaProcessGrayMorphOpen (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphOpen(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphClose
+\*****************************************************************************/
+static int imluaProcessGrayMorphClose (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphClose(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphTopHat
+\*****************************************************************************/
+static int imluaProcessGrayMorphTopHat (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphTopHat(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphWell
+\*****************************************************************************/
+static int imluaProcessGrayMorphWell (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphWell(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGrayMorphGradient
+\*****************************************************************************/
+static int imluaProcessGrayMorphGradient (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGrayMorphGradient(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+
+
+/*****************************************************************************\
+ Morphology Operations for Binary Images
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessBinMorphConvolve
+\*****************************************************************************/
+static int imluaProcessBinMorphConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel = imlua_checkimage(L, 3);
+  int hit_white = luaL_checkint(L, 4);
+  int iter = luaL_checkint(L, 5);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+  imlua_checkdatatype(L, 3, kernel, IM_INT);
+  imlua_matchsize(L, src_image, kernel);
+
+  lua_pushboolean(L, imProcessBinMorphConvolve(src_image, dst_image, kernel, hit_white, iter));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBinMorphErode
+\*****************************************************************************/
+static int imluaProcessBinMorphErode (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int iter = luaL_checkint(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessBinMorphErode(src_image, dst_image, kernel_size, iter));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBinMorphDilate
+\*****************************************************************************/
+static int imluaProcessBinMorphDilate (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int iter = luaL_checkint(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessBinMorphDilate(src_image, dst_image, kernel_size, iter));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBinMorphOpen
+\*****************************************************************************/
+static int imluaProcessBinMorphOpen (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int iter = luaL_checkint(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessBinMorphOpen(src_image, dst_image, kernel_size, iter));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBinMorphClose
+\*****************************************************************************/
+static int imluaProcessBinMorphClose (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int iter = luaL_checkint(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessBinMorphClose(src_image, dst_image, kernel_size, iter));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBinMorphOutline
+\*****************************************************************************/
+static int imluaProcessBinMorphOutline (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int iter = luaL_checkint(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessBinMorphOutline(src_image, dst_image, kernel_size, iter));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBinMorphThin
+\*****************************************************************************/
+static int imluaProcessBinMorphThin (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_BINARY);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessBinMorphThin(src_image, dst_image);
+  return 0;
+}
+
+
+
+/*****************************************************************************\
+ Rank Convolution Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessMedianConvolve
+\*****************************************************************************/
+static int imluaProcessMedianConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessMedianConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRangeConvolve
+\*****************************************************************************/
+static int imluaProcessRangeConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRangeConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRankClosestConvolve
+\*****************************************************************************/
+static int imluaProcessRankClosestConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRankClosestConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRankMaxConvolve
+\*****************************************************************************/
+static int imluaProcessRankMaxConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRankMaxConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRankMinConvolve
+\*****************************************************************************/
+static int imluaProcessRankMinConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRankMinConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+
+/*****************************************************************************\
+ Convolution Operations
+\*****************************************************************************/
+
+static void imlua_checkkernel(lua_State *L, imImage* kernel, int index)
+{
+  imlua_checkcolorspace(L, index, kernel, IM_GRAY);
+  luaL_argcheck(L, kernel->data_type == IM_INT || kernel->data_type == IM_FLOAT, index, "kernel data type can be int or float only");
+}
+
+/*****************************************************************************\
+ im.ProcessConvolve
+\*****************************************************************************/
+static int imluaProcessConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel = imlua_checkimage(L, 3);
+
+  imlua_match(L, src_image, dst_image);
+  imlua_checkkernel(L, kernel, 3);
+
+  lua_pushboolean(L, imProcessConvolve(src_image, dst_image, kernel));
+  return 1;
+}
+  
+/*****************************************************************************\
+ im.ProcessConvolveDual
+\*****************************************************************************/
+static int imluaProcessConvolveDual (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel1 = imlua_checkimage(L, 3);
+  imImage *kernel2 = imlua_checkimage(L, 4);
+
+  imlua_match(L, src_image, dst_image);
+  imlua_checkkernel(L, kernel1, 3);
+  imlua_checkkernel(L, kernel2, 4);
+
+  lua_pushboolean(L, imProcessConvolveDual(src_image, dst_image, kernel1, kernel2));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessConvolveRep
+\*****************************************************************************/
+static int imluaProcessConvolveRep (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel = imlua_checkimage(L, 3);
+  int count = luaL_checkint(L, 4);
+
+  imlua_match(L, src_image, dst_image);
+  imlua_checkkernel(L, kernel, 3);
+
+  lua_pushboolean(L, imProcessConvolveRep(src_image, dst_image, kernel, count));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessConvolveSep
+\*****************************************************************************/
+static int imluaProcessConvolveSep (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel = imlua_checkimage(L, 3);
+
+  imlua_match(L, src_image, dst_image);
+  imlua_checkkernel(L, kernel, 3);
+
+  lua_pushboolean(L, imProcessConvolveSep(src_image, dst_image, kernel));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessCompassConvolve
+\*****************************************************************************/
+static int imluaProcessCompassConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  imImage *kernel = imlua_checkimage(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+  imlua_checkkernel(L, kernel, 3);
+
+  lua_pushboolean(L, imProcessCompassConvolve(src_image, dst_image, kernel));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRotateKernel
+\*****************************************************************************/
+static int imluaProcessRotateKernel (lua_State *L)
+{
+  imProcessRotateKernel(imlua_checkimage(L, 1));
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessDiffOfGaussianConvolve
+\*****************************************************************************/
+static int imluaProcessDiffOfGaussianConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float stddev1 = (float) luaL_checknumber(L, 3);
+  float stddev2 = (float) luaL_checknumber(L, 4);
+
+  if (src_image->data_type == IM_BYTE || src_image->data_type == IM_USHORT)
+  {
+    imlua_matchcolor(L, src_image, dst_image);
+    imlua_checkdatatype(L, 2, dst_image, IM_INT);
+  }
+  else
+    imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessDiffOfGaussianConvolve(src_image, dst_image, stddev1, stddev2));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessLapOfGaussianConvolve
+\*****************************************************************************/
+static int imluaProcessLapOfGaussianConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float stddev = (float) luaL_checknumber(L, 3);
+
+  if (src_image->data_type == IM_BYTE || src_image->data_type == IM_USHORT)
+  {
+    imlua_matchcolor(L, src_image, dst_image);
+    imlua_checkdatatype(L, 2, dst_image, IM_INT);
+  }
+  else
+    imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessLapOfGaussianConvolve(src_image, dst_image, stddev));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessMeanConvolve
+\*****************************************************************************/
+static int imluaProcessMeanConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessMeanConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessBarlettConvolve
+\*****************************************************************************/
+static int imluaProcessBarlettConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessBarlettConvolve(src_image, dst_image, kernel_size));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessGaussianConvolve
+\*****************************************************************************/
+static int imluaProcessGaussianConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float stddev = (float) luaL_checknumber(L, 3);
+
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessGaussianConvolve(src_image, dst_image, stddev));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessPrewittConvolve
+\*****************************************************************************/
+static int imluaProcessPrewittConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessPrewittConvolve(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessSplineEdgeConvolve
+\*****************************************************************************/
+static int imluaProcessSplineEdgeConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessSplineEdgeConvolve(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessSobelConvolve
+\*****************************************************************************/
+static int imluaProcessSobelConvolve (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessSobelConvolve(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessZeroCrossing
+\*****************************************************************************/
+static int imluaProcessZeroCrossing (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  luaL_argcheck(L, src_image->data_type == IM_INT || src_image->data_type == IM_FLOAT, 1, "image data type can be int or float only");
+  imlua_match(L, src_image, dst_image);
+
+  imProcessZeroCrossing(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessCanny
+\*****************************************************************************/
+static int imluaProcessCanny (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float stddev = (float) luaL_checknumber(L, 3);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessCanny(src_image, dst_image, stddev);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.GaussianStdDev2Repetitions
+\*****************************************************************************/
+static int imluaGaussianKernelSize2StdDev(lua_State *L)
+{
+  lua_pushnumber(L, imGaussianKernelSize2StdDev((int)luaL_checknumber(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.GaussianStdDev2KernelSize
+\*****************************************************************************/
+static int imluaGaussianStdDev2KernelSize (lua_State *L)
+{
+  lua_pushnumber(L, imGaussianStdDev2KernelSize((float)luaL_checknumber(L, 1)));
+  return 1;
+}
+
+
+
+/*****************************************************************************\
+ Arithmetic Operations 
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessUnArithmeticOp
+\*****************************************************************************/
+static int imluaProcessUnArithmeticOp (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int op = luaL_checkint(L, 3);
+
+  imlua_matchcolorspace(L, src_image, dst_image);
+
+  imProcessUnArithmeticOp(src_image, dst_image, op);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessArithmeticOp
+\*****************************************************************************/
+static int imluaProcessArithmeticOp (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+  int op = luaL_checkint(L, 4);
+
+  imlua_match(L, src_image1, src_image2);
+  imlua_matchsize(L, src_image1, dst_image);
+  imlua_matchsize(L, src_image2, dst_image);
+
+  switch (src_image1->data_type)
+  {
+  case IM_BYTE:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_BYTE || 
+      dst_image->data_type == IM_USHORT || 
+      dst_image->data_type == IM_INT || 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is byte, destiny image data type can be byte, ushort, int and float only.");
+    break;
+  case IM_USHORT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_USHORT || 
+      dst_image->data_type == IM_INT || 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is ushort, destiny image data type can be ushort, int and float only.");
+    break;
+  case IM_INT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_INT || 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is int, destiny image data type can be int and float only.");
+    break;
+  case IM_FLOAT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is float, destiny image data type can be float only.");
+    break;
+  case IM_CFLOAT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_CFLOAT, 
+      2, "source image is cfloat, destiny image data type can be cfloat only.");
+    break;
+  }
+
+  imProcessArithmeticOp(src_image1, src_image2, dst_image, op);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessArithmeticConstOp
+\*****************************************************************************/
+static int imluaProcessArithmeticConstOp (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  float src_const = (float) luaL_checknumber(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+  int op = luaL_checkint(L, 4);
+
+  imlua_matchsize(L, src_image, dst_image);
+
+  switch (src_image->data_type)
+  {
+  case IM_BYTE:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_BYTE || 
+      dst_image->data_type == IM_USHORT || 
+      dst_image->data_type == IM_INT || 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is byte, destiny image data type can be byte, ushort, int and float only.");
+    break;
+  case IM_USHORT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_USHORT || 
+      dst_image->data_type == IM_INT || 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is ushort, destiny image data type can be ushort, int and float only.");
+    break;
+  case IM_INT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_INT || 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is int, destiny image data type can be int and float only.");
+    break;
+  case IM_FLOAT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_FLOAT, 
+      2, "source image is float, destiny image data type can be float only.");
+    break;
+  case IM_CFLOAT:
+    luaL_argcheck(L, 
+      dst_image->data_type == IM_CFLOAT, 
+      2, "source image is cfloat, destiny image data type can be cfloat only.");
+    break;
+  }
+
+  imProcessArithmeticConstOp(src_image, src_const, dst_image, op);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessBlendConst
+\*****************************************************************************/
+static int imluaProcessBlendConst (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+  float alpha = (float) luaL_checknumber(L, 4);
+
+  imlua_match(L, src_image1, src_image2);
+  imlua_match(L, src_image1, dst_image);
+
+  imProcessBlendConst(src_image1, src_image2, dst_image, alpha);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessBlend
+\*****************************************************************************/
+static int imluaProcessBlend (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *alpha_image = imlua_checkimage(L, 3);
+  imImage *dst_image = imlua_checkimage(L, 4);
+
+  imlua_match(L, src_image1, src_image2);
+  imlua_match(L, src_image1, dst_image);
+  imlua_matchdatatype(L, src_image1, alpha_image);
+
+  imProcessBlend(src_image1, src_image2, alpha_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessSplitComplex
+\*****************************************************************************/
+static int imluaProcessSplitComplex (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image1 = imlua_checkimage(L, 2);
+  imImage *dst_image2 = imlua_checkimage(L, 3);
+  int polar = luaL_checkint(L, 4);
+
+  imlua_checkdatatype(L, 1, src_image, IM_CFLOAT);
+  imlua_checkdatatype(L, 2, dst_image1, IM_FLOAT);
+  imlua_checkdatatype(L, 3, dst_image2, IM_FLOAT);
+  imlua_matchcolorspace(L, src_image, dst_image1);
+  imlua_matchcolorspace(L, src_image, dst_image2);
+
+  imProcessSplitComplex(src_image, dst_image1, dst_image2, polar);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessMergeComplex
+\*****************************************************************************/
+static int imluaProcessMergeComplex (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+  int polar = luaL_checkint(L, 5);
+
+  imlua_checkdatatype(L, 1, src_image1, IM_FLOAT);
+  imlua_checkdatatype(L, 2, src_image2, IM_FLOAT);
+  imlua_checkdatatype(L, 3, dst_image, IM_CFLOAT);
+  imlua_matchcolorspace(L, src_image1, src_image2);
+  imlua_matchcolorspace(L, src_image1, dst_image);
+
+  imProcessMergeComplex(src_image1, src_image2, dst_image, polar);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessMultipleMean
+\*****************************************************************************/
+static int imluaProcessMultipleMean (lua_State *L)
+{
+  int i, src_image_count;
+  imImage *dst_image;
+  imImage **src_image_list;
+
+  luaL_checktype(L, 1, LUA_TTABLE);
+  src_image_count = imlua_getn(L, 1);
+
+  src_image_list = (imImage**)malloc(sizeof(imImage*)*src_image_count);
+
+  for (i = 0; i < src_image_count; i++)
+  {
+    lua_rawgeti(L, 1, i+1);
+    src_image_list[i] = imlua_checkimage(L, -1);
+  }
+
+  dst_image = imlua_checkimage(L, 2);
+
+  for (i = 0; i < src_image_count; i++)
+  {
+    int check = imImageMatchDataType(src_image_list[i], dst_image);
+    if (!check) free(src_image_list);
+    imlua_matchcheck(L, check, "images must have the same size and data type");
+  }
+
+  imProcessMultipleMean((const imImage**)src_image_list, src_image_count, dst_image);
+  free(src_image_list);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessMultipleStdDev
+\*****************************************************************************/
+static int imluaProcessMultipleStdDev (lua_State *L)
+{
+  int i, src_image_count, check;
+  imImage *dst_image, *mean_image;
+  imImage **src_image_list;
+
+  if (!lua_istable(L, 1))
+    luaL_argerror(L, 1, "must be a table");
+
+  lua_pushstring(L, "table");
+  lua_gettable(L, LUA_GLOBALSINDEX);
+  lua_pushstring(L, "getn");
+  lua_gettable(L, -2);
+  src_image_count = luaL_checkint(L, -1);
+  lua_pop(L, 1);
+
+  src_image_list = (imImage**) malloc(src_image_count * sizeof(imImage*));
+
+  for (i = 0; i < src_image_count; i++)
+  {
+    lua_rawgeti(L, 1, i+1);
+    src_image_list[i] = imlua_checkimage(L, -1);
+  }
+
+  mean_image = imlua_checkimage(L, 2);
+  dst_image = imlua_checkimage(L, 3);
+
+  for (i = 0; i < src_image_count; i++)
+  {
+    check = imImageMatchDataType(src_image_list[i], dst_image);
+    if (!check) free(src_image_list);
+    imlua_matchcheck(L, check, "images must have the same size and data type");
+  }
+  check = imImageMatchDataType(mean_image, dst_image);
+  if (!check) free(src_image_list);
+  imlua_matchcheck(L, check, "images must have the same size and data type");
+
+  imProcessMultipleStdDev((const imImage**)src_image_list, src_image_count, mean_image, dst_image);
+  free(src_image_list);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessAutoCovariance
+\*****************************************************************************/
+static int imluaProcessAutoCovariance (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *mean_image = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+
+  imlua_match(L, src_image, mean_image);
+  imlua_matchcolorspace(L, src_image, dst_image);
+  imlua_checkdatatype(L, 3, dst_image, IM_FLOAT);
+
+  lua_pushnumber(L, imProcessAutoCovariance(src_image, mean_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessMultiplyConj
+\*****************************************************************************/
+static int imluaProcessMultiplyConj (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+
+  imlua_match(L, src_image1, src_image2);
+  imlua_match(L, src_image1, dst_image);
+
+  imProcessMultiplyConj(src_image1, src_image2, dst_image);
+  return 0;
+}
+
+
+/*****************************************************************************\
+ Additional Image Quantization Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessQuantizeRGBUniform
+\*****************************************************************************/
+static int imluaProcessQuantizeRGBUniform (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int dither = luaL_checkint(L, 3);
+
+  imlua_checktype(L, 1, src_image, IM_RGB, IM_BYTE);
+  imlua_checkcolorspace(L, 2, dst_image, IM_MAP);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessQuantizeRGBUniform(src_image, dst_image, dither);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessQuantizeGrayUniform
+\*****************************************************************************/
+static int imluaProcessQuantizeGrayUniform (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int grays = luaL_checkint(L, 3);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_checktype(L, 2, dst_image, IM_GRAY, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessQuantizeGrayUniform(src_image, dst_image, grays);
+  return 0;
+}
+
+
+/*****************************************************************************\
+ Histogram Based Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessExpandHistogram
+\*****************************************************************************/
+static int imluaProcessExpandHistogram (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float percent = (float) luaL_checknumber(L, 3);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, src_image->color_space == IM_RGB || src_image->color_space == IM_GRAY, 1, "color space can be RGB or Gray only");
+  luaL_argcheck(L, dst_image->color_space == IM_RGB || dst_image->color_space == IM_GRAY, 2, "color space can be RGB or Gray only");
+
+  imProcessExpandHistogram(src_image, dst_image, percent);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessEqualizeHistogram
+\*****************************************************************************/
+static int imluaProcessEqualizeHistogram (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, src_image->color_space == IM_RGB || src_image->color_space == IM_GRAY, 1, "color space can be RGB or Gray only");
+  luaL_argcheck(L, dst_image->color_space == IM_RGB || dst_image->color_space == IM_GRAY, 2, "color space can be RGB or Gray only");
+
+  imProcessEqualizeHistogram(src_image, dst_image);
+  return 0;
+}
+
+
+
+/*****************************************************************************\
+ Color Processing Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessSplitYChroma
+\*****************************************************************************/
+static int imluaProcessSplitYChroma (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *y_image = imlua_checkimage(L, 2);
+  imImage *chroma_image = imlua_checkimage(L, 3);
+
+  imlua_checktype(L, 1, src_image, IM_RGB, IM_BYTE);
+  imlua_checktype(L, 2, y_image, IM_GRAY, IM_BYTE);
+  imlua_checktype(L, 3, chroma_image, IM_RGB, IM_BYTE);
+  imlua_matchsize(L, src_image, y_image);
+  imlua_matchsize(L, src_image, chroma_image);
+
+  imProcessSplitYChroma(src_image, y_image, chroma_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessSplitHSI
+\*****************************************************************************/
+static int imluaProcessSplitHSI (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *h_image = imlua_checkimage(L, 2);
+  imImage *s_image = imlua_checkimage(L, 3);
+  imImage *i_image = imlua_checkimage(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_RGB);
+  luaL_argcheck(L, src_image->data_type == IM_BYTE || src_image->data_type == IM_FLOAT, 1, "data type can be float or byte only");
+  imlua_checktype(L, 2, h_image, IM_GRAY, IM_FLOAT);
+  imlua_checktype(L, 3, s_image, IM_GRAY, IM_FLOAT);
+  imlua_checktype(L, 4, i_image, IM_GRAY, IM_FLOAT);
+  imlua_matchsize(L, src_image, h_image);
+  imlua_matchsize(L, src_image, s_image);
+  imlua_matchsize(L, src_image, i_image);
+
+  imProcessSplitHSI(src_image, h_image, s_image, i_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessMergeHSI
+\*****************************************************************************/
+static int imluaProcessMergeHSI (lua_State *L)
+{
+  imImage *h_image = imlua_checkimage(L, 1);
+  imImage *s_image = imlua_checkimage(L, 2);
+  imImage *i_image = imlua_checkimage(L, 3);
+  imImage *dst_image = imlua_checkimage(L, 4);
+
+  imlua_checktype(L, 1, h_image, IM_GRAY, IM_FLOAT);
+  imlua_checktype(L, 2, s_image, IM_GRAY, IM_FLOAT);
+  imlua_checktype(L, 3, i_image, IM_GRAY, IM_FLOAT);
+  imlua_checkcolorspace(L, 4, dst_image, IM_RGB);
+  luaL_argcheck(L, dst_image->data_type == IM_BYTE || dst_image->data_type == IM_FLOAT, 4, "data type can be float or byte only");
+  imlua_matchsize(L, dst_image, h_image);
+  imlua_matchsize(L, dst_image, s_image);
+  imlua_matchsize(L, dst_image, i_image);
+
+  imProcessMergeHSI(h_image, s_image, i_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessSplitComponents(src_image, { r, g, b} )
+\*****************************************************************************/
+static int imluaProcessSplitComponents (lua_State *L)
+{
+  int i;
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage **dst_image_list;
+
+  luaL_checktype(L, 2, LUA_TTABLE);
+
+  if (imlua_getn(L, 2) != src_image->depth)
+    luaL_error(L, "number of destiny images must match the depth of the source image");
+
+  dst_image_list = (imImage**)malloc(sizeof(imImage*)*src_image->depth);
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    lua_pushnumber(L, i+1);
+    lua_gettable(L, 2);
+    dst_image_list[i] = imlua_checkimage(L, -1);
+    imlua_checkcolorspace(L, 2, dst_image_list[i], IM_GRAY);  /* if error here, there will be a memory leak */
+    lua_pop(L, 1);
+  }
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    int check = imImageMatchDataType(src_image, dst_image_list[i]);
+    if (!check) free(dst_image_list);
+    imlua_matchcheck(L, check, "images must have the same size and data type");
+  }
+
+  imProcessSplitComponents(src_image, dst_image_list);
+
+  free(dst_image_list);
+
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessMergeComponents({r, g, b}, rgb)
+\*****************************************************************************/
+static int imluaProcessMergeComponents (lua_State *L)
+{
+  int i;
+  imImage** src_image_list;
+  imImage *dst_image;
+
+  luaL_checktype(L, 1, LUA_TTABLE);
+  dst_image = imlua_checkimage(L, 2);
+
+  if (imlua_getn(L, 1) != dst_image->depth)
+    luaL_error(L, "number of source images must match the depth of the destination image");
+
+  src_image_list = (imImage**)malloc(sizeof(imImage*)*dst_image->depth);
+
+  for (i = 0; i < dst_image->depth; i++)
+  {
+    lua_pushnumber(L, i+1);
+    lua_gettable(L, 1);
+    src_image_list[i] = imlua_checkimage(L, -1);
+    imlua_checkcolorspace(L, 1, src_image_list[i], IM_GRAY);
+    lua_pop(L, 1);
+  }
+
+  for (i = 0; i < dst_image->depth; i++)
+  {
+    int check = imImageMatchDataType(src_image_list[i], dst_image);
+    if (!check) free(src_image_list);
+    imlua_matchcheck(L, check, "images must have the same size and data type");
+  }
+
+  imProcessMergeComponents((const imImage**)src_image_list, dst_image);
+
+  free(src_image_list);
+
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessNormalizeComponents
+\*****************************************************************************/
+static int imluaProcessNormalizeComponents (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkdatatype(L, 2, dst_image, IM_FLOAT);
+  imlua_matchcolorspace(L, src_image, dst_image);
+
+  imProcessNormalizeComponents(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessReplaceColor
+\*****************************************************************************/
+static int imluaProcessReplaceColor (lua_State *L)
+{
+  int src_count, dst_count;
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float *src_color = imlua_toarrayfloat(L, 3, &src_count, 1);
+  float *dst_color = imlua_toarrayfloat(L, 4, &dst_count, 1);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, src_count == src_image->depth, 3, "the colors must have the same number of components of the images");
+  luaL_argcheck(L, dst_count == src_image->depth, 4, "the colors must have the same number of components of the images");
+
+  imProcessReplaceColor(src_image, dst_image, src_color, dst_color);
+  return 0;
+}
+
+
+
+/*****************************************************************************\
+ Logical Arithmetic Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessBitwiseOp
+\*****************************************************************************/
+static int imluaProcessBitwiseOp (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+  int op = luaL_checkint(L, 4);
+
+  luaL_argcheck(L, (src_image1->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_match(L, src_image1, src_image2);
+  imlua_match(L, src_image1, dst_image);
+
+  imProcessBitwiseOp(src_image1, src_image2, dst_image, op);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessBitwiseNot
+\*****************************************************************************/
+static int imluaProcessBitwiseNot (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_match(L, src_image, dst_image);
+
+  imProcessBitwiseNot(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessBitMask(src_image, dst_image, mask, op)
+\*****************************************************************************/
+static int imluaProcessBitMask (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  unsigned char mask = imlua_checkmask(L, 3);
+  int op = luaL_checkint(L, 4);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessBitMask(src_image, dst_image, mask, op);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessBitPlane(src_image, dst_image, plane, reset)
+\*****************************************************************************/
+static int imluaProcessBitPlane (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int plane = luaL_checkint(L, 3);
+  int reset = luaL_checkint(L, 4);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessBitPlane(src_image, dst_image, plane, reset);
+  return 0;
+}
+
+
+
+/*****************************************************************************\
+ Synthetic Image Render
+\*****************************************************************************/
+
+/* NOTE: This breaks on multithread */
+static lua_State *g_renderState = NULL;
+int g_paramCount = 0;
+
+static float imluaRenderFunc (int x, int y, int d, float *param)
+{
+  lua_State *L = g_renderState;
+
+  luaL_checktype(L, 2, LUA_TFUNCTION);
+  lua_pushvalue(L, 2);
+  lua_pushnumber(L, x);
+  lua_pushnumber(L, y);
+  lua_pushnumber(L, d);
+  imlua_newarrayfloat(L, param, g_paramCount, 1);
+
+  lua_call(L, 4, 1);
+
+  return (float) luaL_checknumber(L, -1);
+}
+
+/*****************************************************************************\
+ im.ProcessRenderOp(image, function, name, param, plus)
+\*****************************************************************************/
+static int imluaProcessRenderOp (lua_State *L)
+{
+  int count;
+
+  imImage *image = imlua_checkimage(L, 1);
+  const char *render_name = luaL_checkstring(L, 3);
+  float *param = imlua_toarrayfloat(L, 4, &count, 1);
+  int plus = luaL_checkint(L, 5);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  luaL_checktype(L, 2, LUA_TFUNCTION);
+
+  g_renderState = L;
+  g_paramCount = count;
+  lua_pushboolean(L, imProcessRenderOp(image, imluaRenderFunc, (char*) render_name, param, plus));
+  return 1;
+}
+
+static float imluaRenderCondFunc (int x, int y, int d, int *cond, float *param)
+{
+  lua_State *L = g_renderState;
+
+  luaL_checktype(L, 2, LUA_TFUNCTION);
+  lua_pushvalue(L, 2);
+  lua_pushnumber(L, x);
+  lua_pushnumber(L, y);
+  lua_pushnumber(L, d);
+  imlua_newarrayfloat(L, param, g_paramCount, 1);
+
+  lua_call(L, 4, 2);
+
+  *cond = luaL_checkint(L, -1);
+  return (float) luaL_checknumber(L, -2);
+}
+
+/*****************************************************************************\
+ im.ProcessRenderCondOp(image, function, name, param)
+\*****************************************************************************/
+static int imluaProcessRenderCondOp (lua_State *L)
+{
+  int count;
+
+  imImage *image = imlua_checkimage(L, 1);
+  const char *render_name = luaL_checkstring(L, 3);
+  float *param = imlua_toarrayfloat(L, 4, &count, 1);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  luaL_checktype(L, 2, LUA_TFUNCTION);
+
+  g_renderState = L;
+  g_paramCount = count;
+  lua_pushboolean(L, imProcessRenderCondOp(image, imluaRenderCondFunc, (char*) render_name, param));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderAddSpeckleNoise
+\*****************************************************************************/
+static int imluaProcessRenderAddSpeckleNoise (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float percent = (float) luaL_checknumber(L, 3);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRenderAddSpeckleNoise(src_image, dst_image, percent));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderAddGaussianNoise
+\*****************************************************************************/
+static int imluaProcessRenderAddGaussianNoise (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float mean = (float) luaL_checknumber(L, 3);
+  float stddev = (float) luaL_checknumber(L, 4);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRenderAddGaussianNoise(src_image, dst_image, mean, stddev));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderAddUniformNoise
+\*****************************************************************************/
+static int imluaProcessRenderAddUniformNoise (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float mean = (float) luaL_checknumber(L, 3);
+  float stddev = (float) luaL_checknumber(L, 4);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRenderAddUniformNoise(src_image, dst_image, mean, stddev));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderRandomNoise
+\*****************************************************************************/
+static int imluaProcessRenderRandomNoise (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  imlua_checknotcfloat(L, image, 1);
+  lua_pushboolean(L, imProcessRenderRandomNoise(image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderConstant(image, [count])
+\*****************************************************************************/
+static int imluaProcessRenderConstant (lua_State *L)
+{
+  int i;
+  float *value = NULL;
+
+  imImage *image = imlua_checkimage(L, 1);
+  int count = image->depth;
+
+  imlua_checknotcfloat(L, image, 1);
+  
+  if (lua_istable(L, 2))
+  {
+    value = (float*) malloc (sizeof(float) * count);
+    
+    for (i = 0; i < count; i++)
+    {
+      lua_rawgeti(L, 2, i+1);
+      value[i] = (float) lua_tonumber(L, -1);
+      lua_pop(L, 1);      
+    }
+  }
+
+  lua_pushboolean(L, imProcessRenderConstant(image, value));
+
+  if (value)
+    free(value);
+
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderWheel
+\*****************************************************************************/
+static int imluaProcessRenderWheel (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int int_radius = luaL_checkint(L, 2);
+  int ext_radius = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderWheel(image, int_radius, ext_radius));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderCone
+\*****************************************************************************/
+static int imluaProcessRenderCone (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int radius = luaL_checkint(L, 2);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderCone(image, radius));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderTent
+\*****************************************************************************/
+static int imluaProcessRenderTent (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int width = luaL_checkint(L, 2);
+  int height = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderTent(image, width, height));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderRamp
+\*****************************************************************************/
+static int imluaProcessRenderRamp (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int start = luaL_checkint(L, 2);
+  int end = luaL_checkint(L, 3);
+  int dir = luaL_checkint(L, 4);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderRamp(image, start, end, dir));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderBox
+\*****************************************************************************/
+static int imluaProcessRenderBox (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int width = luaL_checkint(L, 2);
+  int height = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderBox(image, width, height));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderSinc
+\*****************************************************************************/
+static int imluaProcessRenderSinc (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  float xperiod = (float) luaL_checknumber(L, 2);
+  float yperiod = (float) luaL_checknumber(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderSinc(image, xperiod, yperiod));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderGaussian
+\*****************************************************************************/
+static int imluaProcessRenderGaussian (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  float stddev = (float) luaL_checknumber(L, 2);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderGaussian(image, stddev));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderLapOfGaussian
+\*****************************************************************************/
+static int imluaProcessRenderLapOfGaussian (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  float stddev = (float) luaL_checknumber(L, 2);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderLapOfGaussian(image, stddev));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderCosine
+\*****************************************************************************/
+static int imluaProcessRenderCosine (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  float xperiod = (float) luaL_checknumber(L, 2);
+  float yperiod = (float) luaL_checknumber(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderCosine(image, xperiod, yperiod));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderGrid
+\*****************************************************************************/
+static int imluaProcessRenderGrid (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int x_space = luaL_checkint(L, 2);
+  int y_space = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderGrid(image, x_space, y_space));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessRenderChessboard
+\*****************************************************************************/
+static int imluaProcessRenderChessboard (lua_State *L)
+{
+  imImage *image = imlua_checkimage(L, 1);
+  int x_space = luaL_checkint(L, 2);
+  int y_space = luaL_checkint(L, 3);
+
+  imlua_checknotcfloat(L, image, 1);
+
+  lua_pushboolean(L, imProcessRenderChessboard(image, x_space, y_space));
+  return 1;
+}
+
+
+
+/*****************************************************************************\
+ Tone Gamut Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessToneGamut
+\*****************************************************************************/
+static int imluaProcessToneGamut (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int op = luaL_checkint(L, 3);
+  float *param = NULL;
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  param = imlua_toarrayfloat(L, 4, NULL, 1);
+
+  imProcessToneGamut(src_image, dst_image, op, param);
+
+  if (param)
+    free(param);
+
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessUnNormalize
+\*****************************************************************************/
+static int imluaProcessUnNormalize (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkdatatype(L, 1, src_image, IM_FLOAT);
+  imlua_checkdatatype(L, 2, dst_image, IM_BYTE);
+  imlua_matchcolorspace(L, src_image, dst_image);
+
+  imProcessUnNormalize(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessDirectConv
+\*****************************************************************************/
+static int imluaProcessDirectConv (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  luaL_argcheck(L, 
+    src_image->data_type == IM_USHORT || 
+    src_image->data_type == IM_INT ||
+    src_image->data_type == IM_FLOAT, 
+    1, "data type can be ushort, int or float only");
+  imlua_checkdatatype(L, 2, dst_image, IM_BYTE);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessDirectConv(src_image, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessNegative
+\*****************************************************************************/
+static int imluaProcessNegative (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checknotcfloat(L, src_image, 1);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessNegative(src_image, dst_image);
+  return 0;
+}
+
+
+
+/*****************************************************************************\
+ Threshold Operations
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessRangeContrastThreshold
+\*****************************************************************************/
+static int imluaProcessRangeContrastThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int min_range = luaL_checkint(L, 4);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessRangeContrastThreshold(src_image, dst_image, kernel_size, min_range));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessLocalMaxThreshold
+\*****************************************************************************/
+static int imluaProcessLocalMaxThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int kernel_size = luaL_checkint(L, 3);
+  int min_thres = luaL_checkint(L, 4);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessLocalMaxThreshold(src_image, dst_image, kernel_size, min_thres));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessThreshold
+\*****************************************************************************/
+static int imluaProcessThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int level = luaL_checkint(L, 3);
+  int value = luaL_checkint(L, 4);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessThreshold(src_image, dst_image, level, value);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessThresholdByDiff
+\*****************************************************************************/
+static int imluaProcessThresholdByDiff (lua_State *L)
+{
+  imImage *src_image1 = imlua_checkimage(L, 1);
+  imImage *src_image2 = imlua_checkimage(L, 2);
+  imImage *dst_image = imlua_checkimage(L, 3);
+
+  imlua_checktype(L, 1, src_image1, IM_GRAY, IM_BYTE);
+  imlua_match(L, src_image1, src_image2);
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image1, dst_image);
+
+  imProcessThresholdByDiff(src_image1, src_image2, dst_image);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessHysteresisThreshold
+\*****************************************************************************/
+static int imluaProcessHysteresisThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int low_thres = luaL_checkint(L, 3);
+  int high_thres = luaL_checkint(L, 4);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessHysteresisThreshold(src_image, dst_image, low_thres, high_thres);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessHysteresisThresEstimate
+\*****************************************************************************/
+static int imluaProcessHysteresisThresEstimate (lua_State *L)
+{
+  int low_thres, high_thres;
+
+  imImage *src_image = imlua_checkimage(L, 1);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+
+  imProcessHysteresisThresEstimate(src_image, &low_thres, &high_thres);
+  lua_pushnumber(L, low_thres);
+  lua_pushnumber(L, high_thres);
+
+  return 2;
+}
+
+/*****************************************************************************\
+ im.ProcessUniformErrThreshold
+\*****************************************************************************/
+static int imluaProcessUniformErrThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessUniformErrThreshold(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessDifusionErrThreshold
+\*****************************************************************************/
+static int imluaProcessDifusionErrThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int level = luaL_checkint(L, 3);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_checkdatatype(L, 2, dst_image, IM_BYTE);
+  imlua_matchcheck(L, src_image->depth == dst_image->depth, "images must have the same depth");
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessDifusionErrThreshold(src_image, dst_image, level);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessPercentThreshold
+\*****************************************************************************/
+static int imluaProcessPercentThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  float percent = (float) luaL_checknumber(L, 3);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessPercentThreshold(src_image, dst_image, percent));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessOtsuThreshold
+\*****************************************************************************/
+static int imluaProcessOtsuThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checktype(L, 1, src_image, IM_GRAY, IM_BYTE);
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushnumber(L, imProcessOtsuThreshold(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessMinMaxThreshold
+\*****************************************************************************/
+static int imluaProcessMinMaxThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_GRAY);
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  lua_pushboolean(L, imProcessMinMaxThreshold(src_image, dst_image));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessLocalMaxThresEstimate
+\*****************************************************************************/
+static int imluaProcessLocalMaxThresEstimate (lua_State *L)
+{
+  int thres;
+  imImage *image = imlua_checkimage(L, 1);
+
+  imlua_checkdatatype(L, 1, image, IM_BYTE);
+
+  imProcessLocalMaxThresEstimate(image, &thres);
+
+  lua_pushnumber(L, thres);
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ProcessSliceThreshold
+\*****************************************************************************/
+static int imluaProcessSliceThreshold (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+
+  int start_level = luaL_checkint(L, 3);
+  int end_level = luaL_checkint(L, 4);
+
+  imlua_checkcolorspace(L, 1, src_image, IM_GRAY);
+  luaL_argcheck(L, (src_image->data_type < IM_FLOAT), 1, "image data type can be integer only");
+  imlua_checkcolorspace(L, 2, dst_image, IM_BINARY);
+  imlua_matchsize(L, src_image, dst_image);
+
+  imProcessSliceThreshold(src_image, dst_image, start_level, end_level);
+  return 0;
+}
+
+
+/*****************************************************************************\
+ Special Effects
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ProcessPixelate
+\*****************************************************************************/
+static int imluaProcessPixelate (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int box_size = luaL_checkint(L, 3);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+
+  imProcessPixelate(src_image, dst_image, box_size);
+  return 0;
+}
+
+/*****************************************************************************\
+ im.ProcessPosterize
+\*****************************************************************************/
+static int imluaProcessPosterize (lua_State *L)
+{
+  imImage *src_image = imlua_checkimage(L, 1);
+  imImage *dst_image = imlua_checkimage(L, 2);
+  int level = luaL_checkint(L, 3);
+
+  imlua_checkdatatype(L, 1, src_image, IM_BYTE);
+  imlua_match(L, src_image, dst_image);
+  luaL_argcheck(L, (level >= 1 && level <= 7), 3, "invalid level, must be >=1 and <=7");
+
+  imProcessPosterize(src_image, dst_image, level);
+  return 0;
+}
+
+
+
+static const luaL_reg improcess_lib[] = {
+  {"CalcRMSError", imluaCalcRMSError},
+  {"CalcSNR", imluaCalcSNR},
+  {"CalcCountColors", imluaCalcCountColors},
+  {"CalcHistogram", imluaCalcHistogram},
+  /*{"CalcUShortHistogram", imluaCalcUShortHistogram}, done by imluaCalcHistogram */
+  {"CalcGrayHistogram", imluaCalcGrayHistogram},
+  {"CalcImageStatistics", imluaCalcImageStatistics},
+  {"CalcHistogramStatistics", imluaCalcHistogramStatistics},
+  {"CalcHistoImageStatistics", imluaCalcHistoImageStatistics},
+
+  {"AnalyzeFindRegions", imluaAnalyzeFindRegions},
+  {"AnalyzeMeasureArea", imluaAnalyzeMeasureArea},
+  {"AnalyzeMeasurePerimArea", imluaAnalyzeMeasurePerimArea},
+  {"AnalyzeMeasureCentroid", imluaAnalyzeMeasureCentroid},
+  {"AnalyzeMeasurePrincipalAxis", imluaAnalyzeMeasurePrincipalAxis},
+  {"AnalyzeMeasurePerimeter", imluaAnalyzeMeasurePerimeter},
+  {"AnalyzeMeasureHoles", imluaAnalyzeMeasureHoles},
+
+  {"ProcessPerimeterLine", imluaProcessPerimeterLine},
+  {"ProcessPrune", imluaProcessPrune},
+  {"ProcessFillHoles", imluaProcessFillHoles},
+
+  {"ProcessHoughLines", imluaProcessHoughLines},
+  {"ProcessHoughLinesDraw", imluaProcessHoughLinesDraw},
+  {"ProcessDistanceTransform", imluaProcessDistanceTransform},
+  {"ProcessRegionalMaximum", imluaProcessRegionalMaximum},
+
+  {"ProcessReduce", imluaProcessReduce},
+  {"ProcessResize", imluaProcessResize},
+  {"ProcessReduceBy4", imluaProcessReduceBy4},
+  {"ProcessCrop", imluaProcessCrop},
+  {"ProcessAddMargins", imluaProcessAddMargins},
+  {"ProcessInsert", imluaProcessInsert},
+
+  {"ProcessCalcRotateSize", imluaProcessCalcRotateSize},
+  {"ProcessRotate", imluaProcessRotate},
+  {"ProcessRotateRef", imluaProcessRotateRef},
+  {"ProcessRotate90", imluaProcessRotate90},
+  {"ProcessRotate180", imluaProcessRotate180},
+  {"ProcessMirror", imluaProcessMirror},
+  {"ProcessFlip", imluaProcessFlip},
+  {"ProcessRadial", imluaProcessRadial},
+  {"ProcessSwirl", imluaProcessSwirl},
+  {"ProcessInterlaceSplit", imluaProcessInterlaceSplit},
+
+  {"ProcessGrayMorphConvolve", imluaProcessGrayMorphConvolve},
+  {"ProcessGrayMorphErode", imluaProcessGrayMorphErode},
+  {"ProcessGrayMorphDilate", imluaProcessGrayMorphDilate},
+  {"ProcessGrayMorphOpen", imluaProcessGrayMorphOpen},
+  {"ProcessGrayMorphClose", imluaProcessGrayMorphClose},
+  {"ProcessGrayMorphTopHat", imluaProcessGrayMorphTopHat},
+  {"ProcessGrayMorphWell", imluaProcessGrayMorphWell},
+  {"ProcessGrayMorphGradient", imluaProcessGrayMorphGradient},
+
+  {"ProcessBinMorphConvolve", imluaProcessBinMorphConvolve},
+  {"ProcessBinMorphErode", imluaProcessBinMorphErode},
+  {"ProcessBinMorphDilate", imluaProcessBinMorphDilate},
+  {"ProcessBinMorphOpen", imluaProcessBinMorphOpen},
+  {"ProcessBinMorphClose", imluaProcessBinMorphClose},
+  {"ProcessBinMorphOutline", imluaProcessBinMorphOutline},
+  {"ProcessBinMorphThin", imluaProcessBinMorphThin},
+
+  {"ProcessMedianConvolve", imluaProcessMedianConvolve},
+  {"ProcessRangeConvolve", imluaProcessRangeConvolve},
+  {"ProcessRankClosestConvolve", imluaProcessRankClosestConvolve},
+  {"ProcessRankMaxConvolve", imluaProcessRankMaxConvolve},
+  {"ProcessRankMinConvolve", imluaProcessRankMinConvolve},
+
+  {"ProcessConvolve", imluaProcessConvolve},
+  {"ProcessConvolveDual", imluaProcessConvolveDual},
+  {"ProcessConvolveRep", imluaProcessConvolveRep},
+  {"ProcessConvolveSep", imluaProcessConvolveSep},
+  {"ProcessCompassConvolve", imluaProcessCompassConvolve},
+  {"ProcessRotateKernel", imluaProcessRotateKernel},
+  {"ProcessDiffOfGaussianConvolve", imluaProcessDiffOfGaussianConvolve},
+  {"ProcessLapOfGaussianConvolve", imluaProcessLapOfGaussianConvolve},
+  {"ProcessMeanConvolve", imluaProcessMeanConvolve},
+  {"ProcessBarlettConvolve", imluaProcessBarlettConvolve},
+  {"ProcessGaussianConvolve", imluaProcessGaussianConvolve},
+  {"ProcessSobelConvolve", imluaProcessSobelConvolve},
+  {"ProcessPrewittConvolve", imluaProcessPrewittConvolve},
+  {"ProcessSplineEdgeConvolve", imluaProcessSplineEdgeConvolve},
+  {"ProcessZeroCrossing", imluaProcessZeroCrossing},
+  {"ProcessCanny", imluaProcessCanny},
+  {"GaussianKernelSize2StdDev", imluaGaussianKernelSize2StdDev},
+  {"GaussianStdDev2KernelSize", imluaGaussianStdDev2KernelSize},
+
+  {"ProcessUnArithmeticOp", imluaProcessUnArithmeticOp},
+  {"ProcessArithmeticOp", imluaProcessArithmeticOp},
+  {"ProcessArithmeticConstOp", imluaProcessArithmeticConstOp},
+  {"ProcessBlendConst", imluaProcessBlendConst},
+  {"ProcessBlend", imluaProcessBlend},
+  {"ProcessSplitComplex", imluaProcessSplitComplex},
+  {"ProcessMergeComplex", imluaProcessMergeComplex},
+  {"ProcessMultipleMean", imluaProcessMultipleMean},
+  {"ProcessMultipleStdDev", imluaProcessMultipleStdDev},
+  {"ProcessAutoCovariance", imluaProcessAutoCovariance},
+  {"ProcessMultiplyConj", imluaProcessMultiplyConj},
+
+  {"ProcessQuantizeRGBUniform", imluaProcessQuantizeRGBUniform},
+  {"ProcessQuantizeGrayUniform", imluaProcessQuantizeGrayUniform},
+
+  {"ProcessExpandHistogram", imluaProcessExpandHistogram},
+  {"ProcessEqualizeHistogram", imluaProcessEqualizeHistogram},
+
+  {"ProcessSplitYChroma", imluaProcessSplitYChroma},
+  {"ProcessSplitHSI", imluaProcessSplitHSI},
+  {"ProcessMergeHSI", imluaProcessMergeHSI},
+  {"ProcessSplitComponents", imluaProcessSplitComponents},
+  {"ProcessMergeComponents", imluaProcessMergeComponents},
+  {"ProcessNormalizeComponents", imluaProcessNormalizeComponents},
+  {"ProcessReplaceColor", imluaProcessReplaceColor},
+
+  {"ProcessBitwiseOp", imluaProcessBitwiseOp},
+  {"ProcessBitwiseNot", imluaProcessBitwiseNot},
+  {"ProcessBitMask", imluaProcessBitMask},
+  {"ProcessBitPlane", imluaProcessBitPlane},
+
+  {"ProcessRenderOp", imluaProcessRenderOp},
+  {"ProcessRenderCondOp", imluaProcessRenderCondOp},
+  {"ProcessRenderAddSpeckleNoise", imluaProcessRenderAddSpeckleNoise},
+  {"ProcessRenderAddGaussianNoise", imluaProcessRenderAddGaussianNoise},
+  {"ProcessRenderAddUniformNoise", imluaProcessRenderAddUniformNoise},
+  {"ProcessRenderRandomNoise", imluaProcessRenderRandomNoise},
+  {"ProcessRenderConstant", imluaProcessRenderConstant},
+  {"ProcessRenderWheel", imluaProcessRenderWheel},
+  {"ProcessRenderCone", imluaProcessRenderCone},
+  {"ProcessRenderTent", imluaProcessRenderTent},
+  {"ProcessRenderRamp", imluaProcessRenderRamp},
+  {"ProcessRenderBox", imluaProcessRenderBox},
+  {"ProcessRenderSinc", imluaProcessRenderSinc},
+  {"ProcessRenderGaussian", imluaProcessRenderGaussian},
+  {"ProcessRenderLapOfGaussian", imluaProcessRenderLapOfGaussian},
+  {"ProcessRenderCosine", imluaProcessRenderCosine},
+  {"ProcessRenderGrid", imluaProcessRenderGrid},
+  {"ProcessRenderChessboard", imluaProcessRenderChessboard},
+
+  {"ProcessToneGamut", imluaProcessToneGamut},
+  {"ProcessUnNormalize", imluaProcessUnNormalize},
+  {"ProcessDirectConv", imluaProcessDirectConv},
+  {"ProcessNegative", imluaProcessNegative},
+
+  {"ProcessRangeContrastThreshold", imluaProcessRangeContrastThreshold},
+  {"ProcessLocalMaxThreshold", imluaProcessLocalMaxThreshold},
+  {"ProcessThreshold", imluaProcessThreshold},
+  {"ProcessThresholdByDiff", imluaProcessThresholdByDiff},
+  {"ProcessHysteresisThreshold", imluaProcessHysteresisThreshold},
+  {"ProcessHysteresisThresEstimate", imluaProcessHysteresisThresEstimate},
+  {"ProcessUniformErrThreshold", imluaProcessUniformErrThreshold},
+  {"ProcessDifusionErrThreshold", imluaProcessDifusionErrThreshold},
+  {"ProcessPercentThreshold", imluaProcessPercentThreshold},
+  {"ProcessOtsuThreshold", imluaProcessOtsuThreshold},
+  {"ProcessMinMaxThreshold", imluaProcessMinMaxThreshold},
+  {"ProcessLocalMaxThresEstimate", imluaProcessLocalMaxThresEstimate},
+  {"ProcessSliceThreshold", imluaProcessSliceThreshold},
+
+  {"ProcessPixelate", imluaProcessPixelate},
+  {"ProcessPosterize", imluaProcessPosterize},
+
+  {NULL, NULL}
+};
+
+/*****************************************************************************\
+ Constants
+\*****************************************************************************/
+static const imlua_constant im_process_constants[] = {
+
+  { "UN_EQL", IM_UN_EQL, NULL },
+  { "UN_ABS", IM_UN_ABS, NULL },
+  { "UN_LESS", IM_UN_LESS, NULL },
+  { "UN_INC", IM_UN_INC, NULL },
+  { "UN_INV", IM_UN_INV, NULL },
+  { "UN_SQR", IM_UN_SQR, NULL },
+  { "UN_SQRT", IM_UN_SQRT, NULL },
+  { "UN_LOG", IM_UN_LOG, NULL },
+  { "UN_EXP", IM_UN_EXP, NULL },
+  { "UN_SIN", IM_UN_SIN, NULL },
+  { "UN_COS", IM_UN_COS, NULL },
+  { "UN_CONJ", IM_UN_CONJ, NULL },
+  { "UN_CPXNORM", IM_UN_CPXNORM, NULL },
+
+  { "BIN_ADD", IM_BIN_ADD, NULL },
+  { "BIN_SUB", IM_BIN_SUB, NULL },
+  { "BIN_MUL", IM_BIN_MUL, NULL },
+  { "BIN_DIV", IM_BIN_DIV, NULL },
+  { "BIN_DIFF", IM_BIN_DIFF, NULL },
+  { "BIN_POW", IM_BIN_POW, NULL },
+  { "BIN_MIN", IM_BIN_MIN, NULL },
+  { "BIN_MAX", IM_BIN_MAX, NULL },
+
+  { "BIT_AND", IM_BIT_AND, NULL },
+  { "BIT_OR", IM_BIT_OR, NULL },
+  { "BIT_XOR", IM_BIT_XOR, NULL },
+
+  { "GAMUT_NORMALIZE", IM_GAMUT_NORMALIZE, NULL },
+  { "GAMUT_POW", IM_GAMUT_POW, NULL },
+  { "GAMUT_LOG", IM_GAMUT_LOG, NULL },
+  { "GAMUT_EXP", IM_GAMUT_EXP, NULL },
+  { "GAMUT_INVERT", IM_GAMUT_INVERT, NULL },
+  { "GAMUT_ZEROSTART", IM_GAMUT_ZEROSTART, NULL },
+  { "GAMUT_SOLARIZE", IM_GAMUT_SOLARIZE, NULL },
+  { "GAMUT_SLICE", IM_GAMUT_SLICE, NULL },
+  { "GAMUT_EXPAND", IM_GAMUT_EXPAND, NULL },
+  { "GAMUT_CROP", IM_GAMUT_CROP, NULL },
+  { "GAMUT_BRIGHTCONT", IM_GAMUT_BRIGHTCONT, NULL },
+
+  { NULL, -1, NULL },
+};
+
+/* from imlua_kernel.c */
+void imlua_open_kernel(lua_State *L);
+
+int imlua_open_process(lua_State *L)
+{
+  luaL_register(L, "im", improcess_lib);   /* leave "im" table at the top of the stack */
+  imlua_regconstants(L, im_process_constants);
+#ifdef TEC_BIGENDIAN
+#ifdef TEC_64
+#include "im_process_be64.loh"
+#else
+#include "im_process_be32.loh"
+#endif  
+#else
+#ifdef TEC_64
+#ifdef WIN64
+#include "im_process_le64w.loh"
+#else
+#include "im_process_le64.loh"
+#endif  
+#else
+#include "im_process.loh"
+#endif  
+#endif  
+  imlua_open_kernel(L);
+  return 1;
+}
+
+int luaopen_imlua_process(lua_State *L)
+{
+  return imlua_open_process(L);
+}
+
+int luaopen_imlua_process51(lua_State *L)
+{
+  return imlua_open_process(L);
+}
+
diff --git a/src/lua5/imlua_process.def b/src/lua5/imlua_process.def
new file mode 100644
index 0000000..2b77e77
--- /dev/null
+++ b/src/lua5/imlua_process.def
@@ -0,0 +1,4 @@
+EXPORTS
+  imlua_open_process
+  luaopen_imlua_process
+  luaopen_imlua_process51
\ No newline at end of file
diff --git a/src/lua5/imlua_util.c b/src/lua5/imlua_util.c
new file mode 100644
index 0000000..69cfb19
--- /dev/null
+++ b/src/lua5/imlua_util.c
@@ -0,0 +1,279 @@
+/** \file
+ * \brief IM Lua 5 Binding
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: imlua_util.c,v 1.1 2008/10/17 06:16:32 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_util.h"
+#include "im_image.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+#include "imlua.h"
+#include "imlua_aux.h"
+
+/*****************************************************************************\
+ im.ImageDataSize(width, height, color_mode, data_type)
+\*****************************************************************************/
+static int imluaImageDataSize (lua_State *L)
+{
+  int width = luaL_checkint(L, 1);
+  int height = luaL_checkint(L, 2);
+  int color_mode = luaL_checkint(L, 3);
+  int data_type = luaL_checkint(L, 4);
+
+  lua_pushnumber(L, imImageDataSize(width, height, color_mode, data_type));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ImageLineSize(width, color_mode, data_type)
+\*****************************************************************************/
+static int imluaImageLineSize (lua_State *L)
+{
+  int width = luaL_checkint(L, 1);
+  int color_mode = luaL_checkint(L, 2);
+  int data_type = luaL_checkint(L, 3);
+
+  lua_pushnumber(L, imImageLineSize(width, color_mode, data_type));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ImageLineCount(width, color_mode)
+\*****************************************************************************/
+static int imluaImageLineCount (lua_State *L)
+{
+  int width = luaL_checkint(L, 1);
+  int color_mode = luaL_checkint(L, 2);
+
+  lua_pushnumber(L, imImageLineCount(width, color_mode));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ImageCheckFormat(width, color_mode)
+\*****************************************************************************/
+static int imluaImageCheckFormat (lua_State *L)
+{
+  int color_mode = luaL_checkint(L, 1);
+  int data_type = luaL_checkint(L, 2);
+
+  lua_pushboolean(L, imImageCheckFormat(color_mode, data_type));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeSpaceName(color_mode)
+\*****************************************************************************/
+static int imluaColorModeSpaceName (lua_State *L)
+{
+  lua_pushstring(L, imColorModeSpaceName(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeDepth(color_mode)
+\*****************************************************************************/
+static int imluaColorModeDepth (lua_State *L)
+{
+  lua_pushnumber(L, imColorModeDepth(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ 
+\*****************************************************************************/
+
+/*****************************************************************************\
+ im.ColorModeSpace(color_mode)
+\*****************************************************************************/
+static int imluaColorModeSpace (lua_State *L)
+{
+  lua_pushnumber(L, imColorModeSpace(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeHasAlpha(color_mode)
+\*****************************************************************************/
+static int imluaColorModeMatch (lua_State *L)
+{
+  lua_pushboolean(L, imColorModeMatch(luaL_checkint(L, 1), luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeHasAlpha(color_mode)
+\*****************************************************************************/
+static int imluaColorModeHasAlpha (lua_State *L)
+{
+  lua_pushboolean(L, imColorModeHasAlpha(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeIsPacked(color_mode)
+\*****************************************************************************/
+static int imluaColorModeIsPacked (lua_State *L)
+{
+  lua_pushboolean(L, imColorModeIsPacked(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeIsTopDown(color_mode)
+\*****************************************************************************/
+static int imluaColorModeIsTopDown (lua_State *L)
+{
+  lua_pushboolean(L, imColorModeIsTopDown(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeToBitmap(color_mode)
+\*****************************************************************************/
+static int imluaColorModeToBitmap (lua_State *L)
+{
+  lua_pushnumber(L, imColorModeToBitmap(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.ColorModeIsBitmap
+\*****************************************************************************/
+static int imluaColorModeIsBitmap (lua_State *L)
+{
+  int color_mode = luaL_checkint(L, 1);
+  int data_type = luaL_checkint(L, 2);
+
+  lua_pushboolean(L, imColorModeIsBitmap(color_mode, data_type));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.DataTypeSize(data_type)
+\*****************************************************************************/
+static int imluaDataTypeSize (lua_State *L)
+{
+  lua_pushnumber(L, imDataTypeSize(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.DataTypeName(data_type)
+\*****************************************************************************/
+static int imluaDataTypeName (lua_State *L)
+{
+  lua_pushstring(L, imDataTypeName(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.DataTypeIntMax(data_type)
+\*****************************************************************************/
+static int imluaDataTypeIntMax(lua_State *L)
+{
+  lua_pushnumber(L, imDataTypeIntMax(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/*****************************************************************************\
+ im.DataTypeIntMin(data_type)
+\*****************************************************************************/
+static int imluaDataTypeIntMin(lua_State *L)
+{
+  lua_pushnumber(L, imDataTypeIntMin(luaL_checkint(L, 1)));
+  return 1;
+}
+
+/***************************************************************************\
+* Creates a color as a light userdata. The color value is                   *
+* placed in the (void *) value. Not beautiful, but works best.              *
+* im.ColorEncode(r, g, b: number) -> (c: color)                             *
+\***************************************************************************/
+static int imlua_colorencode(lua_State *L)
+{
+  int red_f, green_f, blue_f;
+  unsigned char red_i, green_i, blue_i;
+  long int color_i;
+
+  red_f   = luaL_checkint(L, 1);
+  green_f = luaL_checkint(L, 2);
+  blue_f  = luaL_checkint(L, 3);
+
+  if (red_f < 0 || red_f > 255)
+    luaL_argerror(L, 1, "color components values should be in range [0, 255]");
+  if (green_f < 0 || green_f > 255)
+    luaL_argerror(L, 2, "color components values should be in range [0, 255]");
+  if (blue_f < 0 ||  blue_f > 255)
+    luaL_argerror(L, 3, "color components values should be in range [0, 255]");
+  
+  red_i   = (unsigned char) (red_f);
+  green_i = (unsigned char) (green_f);
+  blue_i  = (unsigned char) (blue_f);
+
+  color_i = imColorEncode(red_i, green_i, blue_i);
+  lua_pushlightuserdata(L, (void *)color_i);
+  
+  return 1;
+}
+
+/***************************************************************************\
+* Decodes a color previously created.                                       *
+* im.ColorDecode(c: color) -> (r, g, b: number)                             *
+\***************************************************************************/
+static int imlua_colordecode(lua_State *L)
+{
+  long int color_i;
+  unsigned char red_i, green_i, blue_i;
+
+  if (!lua_islightuserdata(L, 1))
+    luaL_argerror(L, 1, "color must be a light user data");
+
+  color_i = (long int) lua_touserdata(L,1);
+
+  imColorDecode(&red_i, &green_i, &blue_i, color_i);
+  lua_pushnumber(L, red_i);
+  lua_pushnumber(L, green_i);
+  lua_pushnumber(L, blue_i);
+
+  return 3;
+}
+
+static const luaL_reg imutil_lib[] = {
+  {"ImageDataSize", imluaImageDataSize},
+  {"ImageLineSize", imluaImageLineSize},
+  {"ImageLineCount", imluaImageLineCount},
+  {"ImageCheckFormat", imluaImageCheckFormat},
+
+  {"ColorModeSpace", imluaColorModeSpace},
+  {"ColorModeSpaceName", imluaColorModeSpaceName},
+  {"ColorModeDepth", imluaColorModeDepth},
+
+  {"ColorModeToBitmap", imluaColorModeToBitmap},
+  {"ColorModeIsBitmap", imluaColorModeIsBitmap},
+  {"ColorModeMatch", imluaColorModeMatch},
+  {"ColorModeHasAlpha", imluaColorModeHasAlpha},
+  {"ColorModeIsPacked", imluaColorModeIsPacked},
+  {"ColorModeIsTopDown", imluaColorModeIsTopDown},
+
+  {"DataTypeSize", imluaDataTypeSize},
+  {"DataTypeName", imluaDataTypeName},
+  {"DataTypeIntMax", imluaDataTypeIntMax},
+  {"DataTypeIntMin", imluaDataTypeIntMin},
+
+  {"ColorEncode", imlua_colorencode},
+  {"ColorDecode", imlua_colordecode},
+
+  {NULL, NULL}
+};
+
+void imlua_open_util(lua_State *L)
+{
+  /* "im" table is at the top of the stack */
+  luaL_register(L, NULL, imutil_lib);
+}
diff --git a/src/lua5/imlua_wmv.c b/src/lua5/imlua_wmv.c
new file mode 100644
index 0000000..7f61030
--- /dev/null
+++ b/src/lua5/imlua_wmv.c
@@ -0,0 +1,44 @@
+/** \file
+ * \brief wmv format Lua 5 Binding
+ *
+ * See Copyright Notice in cd.h
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "im_format_wmv.h"
+
+#include <lua.h>
+#include <lauxlib.h>
+
+
+static int imlua_FormatRegisterWMV(lua_State *L)
+{
+  (void)L;
+  imFormatRegisterWMV();
+  return 0;
+}
+
+static const struct luaL_reg imlib[] = {
+  {"FormatRegisterWMV", imlua_FormatRegisterWMV},
+  {NULL, NULL},
+};
+
+
+static int imlua_wmv_open (lua_State *L)
+{
+  imFormatRegisterWMV();
+  luaL_register(L, "im", imlib);   /* leave "im" table at the top of the stack */
+  return 1;
+}
+
+int luaopen_imlua_wmv(lua_State* L)
+{
+  return imlua_wmv_open(L);
+}
+
+int luaopen_imlua_wmv51(lua_State* L)
+{
+  return imlua_wmv_open(L);
+}
diff --git a/src/lua5/imlua_wmv.def b/src/lua5/imlua_wmv.def
new file mode 100644
index 0000000..0c05563
--- /dev/null
+++ b/src/lua5/imlua_wmv.def
@@ -0,0 +1,4 @@
+EXPORTS
+  luaopen_imlua_wmv
+  luaopen_imlua_wmv51
+ 
\ No newline at end of file
diff --git a/src/make_uname b/src/make_uname
new file mode 100644
index 0000000..a9ceb74
--- /dev/null
+++ b/src/make_uname
@@ -0,0 +1,13 @@
+# This builds all the libraries of the folder for 1 uname
+
+tecmake $1 $2 $3 $4 $5 $6 $7 $8
+tecmake $1 MF=im_process $2 $3 $4 $5 $6 $7 $8
+tecmake $1 MF=im_jp2 $2 $3 $4 $5 $6 $7 $8
+tecmake $1 MF=im_fftw $2 $3 $4 $5 $6 $7 $8
+
+tecmake $1 MF=imlua3 $2 $3 $4 $5 $6 $7 $8
+
+tecmake $1 MF=imlua5 $2 $3 $4 $5 $6 $7 $8
+tecmake $1 MF=imlua_process5 $2 $3 $4 $5 $6 $7 $8
+tecmake $1 MF=imlua_jp2 $2 $3 $4 $5 $6 $7 $8
+tecmake $1 MF=imlua_fftw5 $2 $3 $4 $5 $6 $7 $8
diff --git a/src/make_uname.bat b/src/make_uname.bat
new file mode 100644
index 0000000..668265a
--- /dev/null
+++ b/src/make_uname.bat
@@ -0,0 +1,73 @@
+@echo off
+REM This builds all the libraries of the folder for 1 uname  
+
+if "%1"=="VCC" goto do-vcc
+if "%1"=="vc-all" goto start-all-vc
+
+call tecmake %1 %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=im_process" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=im_jp2" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=im_avi" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=im_fftw" %2 %3 %4 %5 %6 %7 %8
+
+call tecmake %1 "MF=imlua3" %2 %3 %4 %5 %6 %7 %8
+
+call tecmake %1 "MF=imlua5" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=imlua_process5" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=imlua_jp2" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=imlua_avi" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=imlua_fftw5" %2 %3 %4 %5 %6 %7 %8
+
+if "%1"=="vc6"  goto vc
+if "%1"=="vc7"  goto vc
+if "%1"=="vc8"  goto vc
+if "%1"=="vc8_64"  goto vc
+if "%1"=="vc9"  goto vc
+if "%1"=="vc9_64"  goto vc
+if "%1"=="dll"  goto vc
+if "%1"=="dll7"  goto vc
+if "%1"=="dll8"  goto vc
+if "%1"=="dll8_64"  goto vc
+if "%1"=="dll9"  goto vc
+if "%1"=="dll9_64"  goto vc
+if "%1"=="all"  goto start-all-vc
+goto end
+
+:vc
+call tecmake %1 "MF=im_wmv" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=imlua_wmv" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=im_capture" %2 %3 %4 %5 %6 %7 %8
+call tecmake %1 "MF=imlua_capture5" %2 %3 %4 %5 %6 %7 %8
+if "%1"=="dll"  goto dll
+goto end
+
+:dll
+call tecmake mingw3 "MF=im_capture" mingw3-dll
+call tecmake bc56 "MF=im_capture" bc56-dll
+REM call tecmake owc1 "MF=im_capture" owc1-dll
+goto end
+
+:start-all-vc
+call make_uname VCC vc6 %2 %3 %4 %5 %6
+call make_uname VCC vc7 %2 %3 %4 %5 %6
+call make_uname VCC vc8 %2 %3 %4 %5 %6
+call make_uname VCC vc8_64 %2 %3 %4 %5 %6
+call make_uname VCC vc9 %2 %3 %4 %5 %6
+call make_uname VCC vc9_64 %2 %3 %4 %5 %6
+call make_uname VCC dll %2 %3 %4 %5 %6
+call make_uname VCC dll7 %2 %3 %4 %5 %6
+call make_uname VCC dll8 %2 %3 %4 %5 %6
+call make_uname VCC dll8_64 %2 %3 %4 %5 %6
+call make_uname VCC dll9 %2 %3 %4 %5 %6
+call make_uname VCC dll9_64 %2 %3 %4 %5 %6
+goto end
+
+:do-vcc
+call tecmake %2 "MF=im_wmv" %3 %4 %5 %6 %7 %8
+call tecmake %2 "MF=imlua_wmv" %3 %4 %5 %6 %7 %8
+call tecmake %2 "MF=im_capture" %3 %4 %5 %6 %7 %8
+call tecmake %2 "MF=imlua_capture5" %3 %4 %5 %6 %7 %8
+if "%2"=="dll"  goto dll
+goto end
+
+:end
diff --git a/src/old_im.cpp b/src/old_im.cpp
new file mode 100644
index 0000000..43a8afb
--- /dev/null
+++ b/src/old_im.cpp
@@ -0,0 +1,440 @@
+/** \file
+ * \brief Old API
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: old_im.cpp,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+
+#include "old_im.h"
+#include "im.h"
+#include "im_util.h"
+#include "im_counter.h"
+
+long imEncodeColor(unsigned char Red, unsigned char Green, unsigned char Blue)
+{
+  return imColorEncode(Red, Green, Blue);
+}
+
+void imDecodeColor(unsigned char* Red, unsigned char* Green, unsigned char* Blue, long Color)
+{
+  imColorDecode(Red, Green, Blue, Color);
+}
+
+static int FormatNew2Old(const char* new_format, const char* compression)
+{
+  int format;
+
+  if (!imStrEqual(new_format, "BMP"))
+    format = IM_BMP;
+  else if (!imStrEqual(new_format, "GIF"))
+    format = IM_GIF;
+  else if (!imStrEqual(new_format, "PCX"))
+    format = IM_PCX;
+  else if (!imStrEqual(new_format, "RAS"))
+    format = IM_RAS;
+  else if (!imStrEqual(new_format, "SGI"))
+    format = IM_SGI;
+  else if (!imStrEqual(new_format, "JPEG"))
+    format = IM_JPG;
+  else if (!imStrEqual(new_format, "LED"))
+    format = IM_LED;
+  else if (!imStrEqual(new_format, "TIFF"))
+    format = IM_TIF;
+  else if (!imStrEqual(new_format, "TGA"))
+    format = IM_TGA;
+  else
+    return -1;
+
+  if (!imStrEqual(compression, "NONE"))
+    format |= IM_DEFAULT;
+
+  return format;
+}
+
+int imFileFormat(char *filename, int* format)
+{
+  char new_format[10], compression[10];
+  int error, image_count;
+  
+  imFile* ifile = imFileOpen(filename, &error);
+  if (!ifile) return error;
+  
+  imFileGetInfo(ifile, new_format, compression, &image_count);
+  imFileClose(ifile);
+
+  *format = FormatNew2Old(new_format, compression);
+  if (*format == -1)
+    return IM_ERR_FORMAT;
+
+  return IM_ERR_NONE;
+}
+
+static int ColorMode2Type(int color_mode)
+{
+  switch (imColorModeSpace(color_mode))
+  {
+  case IM_BINARY:
+  case IM_GRAY:
+  case IM_MAP:
+    return IM_MAP;
+  default:
+    return IM_RGB;
+  }
+}
+
+int imImageInfo(char *filename, int *width, int *height, int *type, int *palette_count)
+{
+  int error;
+  imFile* ifile = imFileOpen(filename, &error);
+  if (!ifile) return error;
+
+  int data_type, color_mode;
+  error = imFileReadImageInfo(ifile, 0, width, height, &color_mode, &data_type);
+  if (error)
+  {
+    imFileClose(ifile);
+    return error;
+  }
+
+  *type = ColorMode2Type(color_mode);
+  if (*type == -1)
+  {
+    imFileClose(ifile);
+    return IM_ERR_DATA;
+  }
+
+  if (*type == IM_MAP)
+  {
+    long palette[256];
+    imFileGetPalette(ifile, palette, palette_count);
+  }
+
+  imFileClose(ifile);
+  return IM_ERR_NONE;
+}
+
+static imTiffImageDesc      iOldTiffImageDescCB = NULL;
+static imGifTranspIndex     iOldGifTranspIndexCB = NULL;
+static imResolutionCallback iOldResolutionCB = NULL;
+static imFileCounterCallback    iOldCounterCB = NULL;
+
+static int iOldFileCounter(int counter, void* user_data, const char* name, int progress)
+{
+  (void)counter;
+  if (progress == -1 || progress == 1001) return 1;
+  return !iOldCounterCB((char*)user_data, progress/10, (name[4] == 'R')? 0: 1);
+}
+
+int imRegisterCallback(imCallback cb, int cb_id, int format)
+{
+  if (format == IM_ALL)
+  {
+    switch(cb_id)
+    {
+    case IM_COUNTER_CB:
+      iOldCounterCB = (imFileCounterCallback)cb;
+      return 1;
+    case IM_RESOLUTION_CB:
+      iOldResolutionCB = (imResolutionCallback)cb;
+      return 1;
+    }
+  }
+
+  if (format == IM_GIF && cb_id == IM_GIF_TRANSPARENT_COLOR_CB)
+  {
+    iOldGifTranspIndexCB = (imGifTranspIndex)cb;
+    return 1;
+  }
+
+  if (format == IM_TIF && cb_id == IM_TIF_IMAGE_DESCRIPTION_CB)
+  {
+    iOldTiffImageDescCB = (imTiffImageDesc)cb;
+    return 1;
+  }
+
+  return 0;
+}
+
+static void iConvertMapToRGB(const imbyte* src_map, imbyte* red, imbyte* green, imbyte* blue, int count, const long* palette, const int palette_count)
+{
+  imbyte r[256], g[256], b[256];
+  for (int c = 0; c < palette_count; c++)
+    imColorDecode(&r[c], &g[c], &b[c], palette[c]);
+
+  for (int i = 0; i < count; i++)
+  {
+    int index = *src_map++;
+    *red++ = r[index];
+    *green++ = g[index];
+    *blue++ = b[index];
+  }
+}
+
+int imLoadRGB(char *filename, unsigned char *red, unsigned char *green, unsigned char *blue)
+{
+  int error;
+  imFile* ifile = imFileOpen(filename, &error);
+  if (!ifile) return error;
+  
+  int width, height, color_mode, data_type;
+  error = imFileReadImageInfo(ifile, 0, &width, &height, &color_mode, &data_type);
+  if (error) 
+  {
+    imFileClose(ifile);
+    return error;
+  }
+
+  if (iOldResolutionCB)
+  {
+    double xres = *(float*)imFileGetAttribute(ifile, "XResolution", NULL, NULL);
+    double yres = *(float*)imFileGetAttribute(ifile, "YResolution", NULL, NULL);
+    int res_unit = *(int*)imFileGetAttribute(ifile, "ResolutionUnit", NULL, NULL);
+    iOldResolutionCB(filename, &xres, &yres, &res_unit);
+  }
+
+  if (iOldTiffImageDescCB)
+  {
+    char* img_desc = (char*)imFileGetAttribute(ifile, "Description", NULL, NULL);
+    iOldTiffImageDescCB(filename, img_desc);
+  }
+
+  if (iOldGifTranspIndexCB)
+  {
+    unsigned char transp_index = *(unsigned char*)imFileGetAttribute(ifile, "TransparencyIndex", NULL, NULL);
+    iOldGifTranspIndexCB(filename, &transp_index);
+  }
+
+  int count = width*height;
+  void* data;
+  if (green != red + count || blue != green + count)
+    data = malloc(imImageDataSize(width, height, IM_RGB, IM_BYTE));
+  else
+    data = red;
+    
+  if (!data)
+  {
+    imFileClose(ifile);
+    return IM_ERR_MEM;
+  }
+
+  if (iOldCounterCB)
+    imCounterSetCallback(filename, iOldFileCounter);
+  
+  error = imFileReadImageData(ifile, data, 1, 0);
+  if (error) 
+  {
+    if (data != red) free(data);
+    imFileClose(ifile);
+    return error;
+  }
+
+  if (imColorModeToBitmap(color_mode) != IM_RGB)
+  {
+    long palette[256];
+    int palette_count;
+    imFileGetPalette(ifile, palette, &palette_count);
+    iConvertMapToRGB((imbyte*)data, red, green, blue, count, palette, palette_count);
+  }
+  else if (data != red)
+  {
+    memcpy(red, data, count);
+    memcpy(green, (unsigned char*)data+count, count);
+    memcpy(blue, (unsigned char*)data+2*count, count);
+  }
+
+  imFileClose(ifile);
+
+  if (data != red) free(data);
+  return IM_ERR_NONE;
+}
+
+int imLoadMap(char *filename, unsigned char *map, long *palette)
+{
+  int error;
+  imFile* ifile = imFileOpen(filename, &error);
+  if (!ifile) return error;
+  
+  int width, height, color_mode, data_type;
+  error = imFileReadImageInfo(ifile, 0, &width, &height, &color_mode, &data_type);
+  if (error)
+  {
+    imFileClose(ifile);
+    return error;
+  }
+
+  if (imColorModeSpace(color_mode) != IM_MAP &&
+      imColorModeSpace(color_mode) != IM_GRAY &&
+      imColorModeSpace(color_mode) != IM_BINARY)
+    return IM_ERR_DATA;
+
+  if (iOldResolutionCB)
+  {
+    double xres = *(float*)imFileGetAttribute(ifile, "XResolution", NULL, NULL);
+    double yres = *(float*)imFileGetAttribute(ifile, "YResolution", NULL, NULL);
+    int res_unit = *(int*)imFileGetAttribute(ifile, "ResolutionUnit", NULL, NULL);
+    iOldResolutionCB(filename, &xres, &yres, &res_unit);
+  }
+
+  if (iOldTiffImageDescCB)
+  {
+    char* img_desc = (char*)imFileGetAttribute(ifile, "Description", NULL, NULL);
+    iOldTiffImageDescCB(filename, img_desc);
+  }
+
+  if (iOldGifTranspIndexCB)
+  {
+    unsigned char transp_index = *(unsigned char*)imFileGetAttribute(ifile, "TransparencyIndex", NULL, NULL);
+    iOldGifTranspIndexCB(filename, &transp_index);
+  }
+
+  if (iOldCounterCB)
+    imCounterSetCallback(filename, iOldFileCounter);
+
+  error = imFileReadImageData(ifile, map, 1, 0);
+  if (error)
+  {
+    imFileClose(ifile);
+    return error;
+  }
+
+  int palette_count;
+  imFileGetPalette(ifile, palette, &palette_count);
+
+  imFileClose(ifile);
+
+  return IM_ERR_NONE;
+}
+
+static char* i_format_old2new[] = {"BMP", "PCX", "GIF", "TIFF", "RAS", "SGI", "JPEG", "LED", "TGA"};
+
+int imSaveRGB(int width, int height, int format, unsigned char *red, unsigned char *green, unsigned char *blue, char *filename)
+{
+  int error;
+  char* new_format = i_format_old2new[format & 0x00FF];  
+  
+  imFile* ifile = imFileNew(filename, new_format, &error);
+  if (!ifile) return error;
+  
+  if (format & 0xFF00)
+    imFileSetInfo(ifile, NULL);
+  else
+    imFileSetInfo(ifile, "NONE");
+
+  if (iOldResolutionCB)
+  {
+    double xres, yres;
+    int res_unit;
+    iOldResolutionCB(filename, &xres, &yres, &res_unit);
+    float fxres=(float)xres, fyres=(float)yres;
+    imFileSetAttribute(ifile, "XResolution", IM_FLOAT, 1, (void*)&fxres);
+    imFileSetAttribute(ifile, "YResolution", IM_FLOAT, 1, (void*)&fyres);
+    imFileSetAttribute(ifile, "ResolutionUnit", IM_INT, 1, (void*)&res_unit);
+  }
+
+  if (iOldTiffImageDescCB)
+  {
+    char img_desc[50];
+    iOldTiffImageDescCB(filename, img_desc);
+    imFileSetAttribute(ifile, "Description", IM_BYTE, strlen(img_desc)+1, (void*)img_desc);
+  }
+
+  if (iOldGifTranspIndexCB)
+  {
+    unsigned char transp_index;
+    iOldGifTranspIndexCB(filename, &transp_index);
+    imFileSetAttribute(ifile, "TransparencyIndex", IM_BYTE, 1, (void*)&transp_index);
+  }
+  
+  error = imFileWriteImageInfo(ifile, width, height, IM_RGB, IM_BYTE);
+  if (error)
+  {
+    imFileClose(ifile);
+    return error;
+  }
+
+  if (iOldCounterCB)
+    imCounterSetCallback(filename, iOldFileCounter);
+  
+  int count = width*height;
+  void* data;
+  if (green != red + count || blue != green + count)
+    data = malloc(imImageDataSize(width, height, IM_RGB, IM_BYTE));
+  else
+    data = red;
+
+  if (!data)
+  {
+    imFileClose(ifile);
+    return IM_ERR_MEM;
+  }
+  
+  if (data != red)
+  {
+    memcpy(data, red, count);
+    memcpy((unsigned char*)data+count, green, count);
+    memcpy((unsigned char*)data+2*count, blue, count);
+  }
+ 
+  error = imFileWriteImageData(ifile, data);
+  imFileClose(ifile);  
+  if (data != red) free(data);
+  return error;
+}
+
+int imSaveMap(int width, int height, int format, unsigned char *map, int palette_count, long *palette, char *filename)
+{
+  int error;
+  char* new_format = i_format_old2new[format & 0x00FF];
+  imFile* ifile = imFileNew(filename, new_format, &error);
+  if (!ifile) return error;
+  
+  if (format & 0xFF00)
+    imFileSetInfo(ifile, NULL);
+  else
+    imFileSetInfo(ifile, "NONE");
+
+  imFileSetPalette(ifile, palette, palette_count);
+
+  if (iOldResolutionCB)
+  {
+    double xres, yres;
+    int res_unit;
+    iOldResolutionCB(filename, &xres, &yres, &res_unit);
+    float fxres=(float)xres, fyres=(float)yres;
+    imFileSetAttribute(ifile, "XResolution", IM_FLOAT, 1, (void*)&fxres);
+    imFileSetAttribute(ifile, "YResolution", IM_FLOAT, 1, (void*)&fyres);
+    imFileSetAttribute(ifile, "ResolutionUnit", IM_INT, 1, (void*)&res_unit);
+  }
+
+  if (iOldTiffImageDescCB)
+  {
+    char img_desc[50];
+    iOldTiffImageDescCB(filename, img_desc);
+    imFileSetAttribute(ifile, "Description", IM_BYTE, strlen(img_desc)+1, (void*)img_desc);
+  }
+
+  if (iOldGifTranspIndexCB)
+  {
+    unsigned char transp_index;
+    iOldGifTranspIndexCB(filename, &transp_index);
+    imFileSetAttribute(ifile, "TransparencyIndex", IM_BYTE, 1, (void*)&transp_index);
+  }
+  
+  error = imFileWriteImageInfo(ifile, width, height, IM_MAP, IM_BYTE);
+  if (error)
+  {
+    imFileClose(ifile);
+    return error;
+  }
+
+  if (iOldCounterCB)
+    imCounterSetCallback(filename, iOldFileCounter);
+ 
+  error = imFileWriteImageData(ifile, map);
+  imFileClose(ifile);  
+  return error;
+}
diff --git a/src/old_imcolor.c b/src/old_imcolor.c
new file mode 100644
index 0000000..0c6b353
--- /dev/null
+++ b/src/old_imcolor.c
@@ -0,0 +1,75 @@
+/** \file
+ * \brief Old resize/stretch functions
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: old_imcolor.c,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "old_im.h"
+#include "im.h"
+#include "im_util.h"
+#include "im_image.h"
+#include "im_convert.h"
+
+void imRGB2Map(int width, int height, 
+              unsigned char *red, unsigned char *green, unsigned char *blue, 
+              unsigned char *map, int palette_count, long *palette)
+{
+  imConvertRGB2Map(width, height, 
+                   red,  green, blue, 
+                   map, palette, &palette_count);
+}
+
+void imMap2RGB(int width, int height, unsigned char *map, int palette_count, long *palette, unsigned char *red, unsigned char *green, unsigned char *blue)
+{
+  int i, count, c, index;
+  unsigned char r[256], g[256], b[256];
+
+  for (c = 0; c < palette_count; c++)
+    imColorDecode(&r[c], &g[c], &b[c], palette[c]);
+
+  count = width*height;
+  for (i = 0; i < count; i++)
+  {
+    index = *map++;
+    *red++ = r[index];
+    *green++ = g[index];
+    *blue++ = b[index];
+  }
+}
+
+void imRGB2Gray(int width, int height, unsigned char *red, unsigned char *green, unsigned char *blue, unsigned char *map, long *grays)
+{
+  int i, count, c;
+
+  for (c = 0; c < 256; c++)
+    *grays++ = imColorEncode((unsigned char)c, (unsigned char)c, (unsigned char)c);
+
+  count = width*height;
+  for (i = 0; i < count; i++)
+  {
+    *map++ = (unsigned char)((*red++ * 30 + *green++ * 59 + *blue++ * 11) / 100);
+  }
+}
+
+void imMap2Gray(int width, int height, unsigned char *map, int palette_count, long *palette, unsigned char *gray_map, long *grays)
+{
+  int i, count, c;
+  unsigned char cnv_table[256];
+  unsigned char r, g, b;
+
+  for (c = 0; c < 256; c++)
+    *grays++ = imColorEncode((unsigned char)c, (unsigned char)c, (unsigned char)c);
+
+  for (c = 0; c < palette_count; c++)
+  {
+    imColorDecode(&r, &g, &b, palette[c]);
+    cnv_table[c] = (unsigned char)((r * 30 + g * 59 + b * 11) / 100);
+  }
+
+  count = width*height;
+  for (i = 0; i < count; i++)
+  {
+    *gray_map++ = cnv_table[*map++];
+  }
+}
diff --git a/src/old_imresize.c b/src/old_imresize.c
new file mode 100644
index 0000000..8191037
--- /dev/null
+++ b/src/old_imresize.c
@@ -0,0 +1,117 @@
+/** \file
+ * \brief Old resize/stretch functions
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: old_imresize.c,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "old_im.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <memory.h>
+
+
+/*
+  BILINEAR INTERPOLATION:
+
+  x' = floor(x)                                        f(x'   , y')    = fll
+  y' = floor(y)                                        f(x'   , y'+ 1) = flh
+                                                       f(x'+ 1, y')    = fhl
+  t = x - x'                                           f(x'+ 1, y'+ 1) = fhh
+  u = y - y'
+
+  f(x,y) = (1-t) * (1-u) * f(x'   , y'),
+           (1-t) *    u  * f(x'   , y'+ 1),
+               t * (1-u) * f(x'+ 1, y'),
+               t *    u  * f(x'+ 1, y'+ 1)
+
+  f(x,y) =          fll +                              (re-arranging)
+               t * (fhl - fll),
+               u * (flh - fll),
+           u * t * (fhh - flh - fhl + fll)
+          
+*/
+
+void imResize(int src_width, int src_height, unsigned char *src_map, int dst_width, int dst_height, unsigned char *dst_map)
+{
+  /* Do bilinear interpolation */
+
+	unsigned char *line_mapl, *line_maph;
+  double t, u, src_x, src_y, factor;
+  int fhh, fll, fhl, flh, xl, yl, xh, yh, x, y;
+
+  int *XL = (int*)malloc(dst_width * sizeof(int));
+  double *T = (double*)malloc(dst_width * sizeof(double));
+
+	factor = (double)(src_width-1) / (double)(dst_width-1);
+  for (x = 0; x < dst_width; x++)
+  {
+	  src_x = x * factor;
+	  xl = (int)floor(src_x);
+	  T[x] = src_x - xl;
+	  XL[x] = xl;
+  }
+
+	factor = (double)(src_height-1) / (double)(dst_height-1);
+
+  for (y = 0; y < dst_height; y++)
+  {
+	  src_y = y * factor;
+	  yl = (int)floor(src_y);
+    yh = (yl == src_height-1)? yl: yl + 1;
+	  u = src_y - yl;
+
+	  line_mapl = src_map + yl * src_width;
+	  line_maph = src_map + yh * src_width;
+
+	  for (x = 0; x < dst_width; x++)
+	  {
+		  xl = XL[x];
+      xh = (xl == src_width-1)? xl: xl + 1;
+		  t = T[x];
+
+		  fll = line_mapl[xl];
+		  fhl = line_mapl[xh];
+		  flh = line_maph[xl];
+		  fhh = line_maph[xh];
+
+		  *(dst_map++) = (unsigned char)(u * t * (fhh - flh - fhl + fll) + t * (fhl - fll) + u * (flh - fll) + fll);
+	  }
+  }
+
+  free(XL);
+  free(T);
+}
+
+void imStretch(int src_width, int src_height, unsigned char *src_map, int dst_width, int dst_height, unsigned char *dst_map)
+{
+  int x, y, offset;
+  double factor;
+  unsigned char *line_map;
+	int* XTab = (int*)malloc(dst_width*sizeof(int));
+
+  /* initialize convertion tables to speed up the stretch process */
+	factor = (double)(src_width-1) / (double)(dst_width-1);
+	for(x = 0; x < dst_width; x++)
+		XTab[x] = (int)(factor * x + 0.5);
+
+	factor = (double)(src_height-1) / (double)(dst_height-1);
+
+  line_map = src_map;
+
+  for (y = 0; y < dst_height; y++)
+  {
+    for (x = 0; x < dst_width; x++)
+    {
+      offset = XTab[x];
+      *(dst_map++) = line_map[offset];
+    }
+
+    offset = ((int)(factor * y + 0.5)) * src_width;
+		line_map = src_map + offset;
+  }
+
+  free(XTab);
+}
+
diff --git a/src/process/im_analyze.cpp b/src/process/im_analyze.cpp
new file mode 100644
index 0000000..50bcbcd
--- /dev/null
+++ b/src/process/im_analyze.cpp
@@ -0,0 +1,1262 @@
+/** \file
+ * \brief Image Analysis
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_analyze.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+
+#include "im_process_ana.h"
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+
+#define MAX_COUNT 65536  // maximum number of regions
+
+/* ajust the alias table to be a remap table (final step) */
+static void alias_update(imushort* alias_table, int &region_count)
+{
+  int i, real_count = region_count;
+
+  for (i = 0; i < region_count; i++)
+  {
+    if (alias_table[i])
+    {
+      // search for the first alias
+      imushort prev = alias_table[i];
+      while (alias_table[prev])
+        prev = alias_table[prev];
+
+      alias_table[i] = prev;
+      real_count--;  // decrement aliases from the region count
+    }
+  }
+
+  // now all the aliases in the same group point to only one alias
+  // transform the alias table into a remap table
+
+  alias_table[0] = 0;
+  alias_table[1] = 0;  // border is mapped to background
+
+  int r = 1;
+  for (i = 2; i < region_count; i++)
+  {
+    if (!alias_table[i])
+    {
+      alias_table[i] = (imushort)r; // only non alias get real values
+      r++;
+    }
+    else
+      alias_table[i] = (imushort)(alias_table[alias_table[i]]);
+  }
+
+  region_count = real_count-2; // remove the regions (background,border) from the count 
+}
+
+/* find the smallest region number to be set as alias. */
+static void alias_getmin(imushort* alias_table, imushort region, imushort &min)
+{
+  while (alias_table[region])
+  {
+    if (min > alias_table[region])
+      min = alias_table[region];
+
+    region = alias_table[region];
+  }
+}
+
+/* replace all the aliases of a region by its smallest value. */
+static void alias_setmin(imushort* alias_table, imushort region, imushort min)
+{
+  while (alias_table[region])
+  {
+    imushort next_region = alias_table[region];
+    alias_table[region] = min;
+    region = next_region;
+  }
+
+  if (region != min)
+    alias_table[region] = min;
+}
+
+/* set a region number to be an alias of another */
+static void alias_set(imushort* alias_table, imushort region1, imushort region2)
+{
+  if (region1 == region2)
+    return;
+
+  imushort min = region1<region2? region1: region2;
+
+  alias_getmin(alias_table, region1, min);
+  alias_getmin(alias_table, region2, min);
+
+  if (region1 != min && alias_table[region1] != min)
+    alias_setmin(alias_table, region1, min);
+  if (region2 != min && alias_table[region2] != min)
+    alias_setmin(alias_table, region2, min);
+}
+
+static int DoAnalyzeFindRegions(int width, int height, imbyte* map, imushort* new_map, int connect)
+{
+  int i, j;
+
+  // mark the pixels that touch the border
+  // if a region touch the border, is the invalid region 1
+
+  imbyte* pmap = map;
+  imushort* new_pmap = new_map;
+  for (j = 0; j < width; j++)     // first line
+  {
+    if (pmap[j])
+      new_pmap[j] = 1;
+  }
+  pmap += width;
+  new_pmap += width;
+
+  for (i = 1; i < height-1; i++)  // first column
+  {
+    if (pmap[0])
+      new_pmap[0] = 1;
+
+    pmap += width;
+    new_pmap += width;
+  }
+
+  // find and connect the regions
+
+  imbyte* pmap1 = map;         // previous line (line 0)
+  imushort* new_pmap1 = new_map; 
+
+  pmap = map + width;          // current line (line 1)
+  new_pmap = new_map + width;
+
+  int region_count = 2;  // 0- background, 1-border
+  imushort* alias_table = new imushort [MAX_COUNT];
+  memset(alias_table, 0, MAX_COUNT); // aliases are all zero at start (not used)
+
+  for (i = 1; i < height; i++)
+  {
+    for (j = 1; j < width; j++)
+    {
+      int has_j1 = j < width-1? 1: 0;
+      if (pmap[j])
+      {
+        if (pmap[j-1] || pmap1[j] || 
+            (connect == 8 && (pmap1[j-1] || (has_j1&&pmap1[j+1])))) // 4 or 8 connected to the previous neighbors
+        {
+          imushort region = 0;
+          if (i == height-1 || j == width-1)
+          {
+            region = new_pmap[j] = 1;
+          }
+
+          if (pmap[j-1])
+          {
+            if (!region)
+              region = new_pmap[j-1];  // horizontal neighbor  -00
+            else                       //                      X1
+            {
+              // this is a right border pixel that connects to an horizontal neighbor
+
+              // this pixel can connect two different regions
+              alias_set(alias_table, region, new_pmap[j-1]);
+            }
+          }
+
+          if (pmap1[j])    // vertical neighbor
+          {
+            if (!region)
+              region = new_pmap1[j];  // isolated vertical neighbor  -X-
+            else                      //                             01
+            {
+              // an horizontal neighbor connects to a vertical neighbor  -X-
+              //                                                         X1
+
+              // this pixel can connect two different regions
+              alias_set(alias_table, region, new_pmap1[j]);
+            }
+          }
+          else if (region && connect==8 && (has_j1&&pmap1[j+1]))
+          {
+            // an horizontal neighbor connects to a right corner neighbor   00X
+            //                                                              X1
+
+            // this pixel can connect two different regions
+            alias_set(alias_table, region, new_pmap1[j+1]);
+          }
+
+          if (connect == 8 && (pmap1[j-1] || (has_j1&&pmap1[j+1])) && !region) // isolated corner
+          {
+            // a left corner neighbor or a right corner neighbor  X0X
+            //                                                    01
+
+            if (pmap1[j-1])  // left corner
+              region = new_pmap1[j-1];
+
+            if (pmap1[j+1])  // right corner
+            {
+              if (!region) // isolated right corner
+                region = new_pmap1[j+1];
+              else
+              {
+                // this pixel can connect two different regions
+                alias_set(alias_table, new_pmap1[j-1], new_pmap1[j+1]);
+              }
+            }
+          }
+
+          new_pmap[j] = region;
+        }
+        else
+        {
+          // this pixel touches no pixels
+
+          if (i == height-1 || j == width-1)
+            new_pmap[j] = 1;
+          else
+          {
+            // create a new region  000
+            //                      01
+            new_pmap[j] = (imushort)region_count;
+            region_count++;
+
+            if (region_count > MAX_COUNT)
+            {
+              delete [] alias_table;
+              return -1;
+            }
+          }
+        }
+      }
+    }
+
+    pmap1 = pmap;
+    new_pmap1 = new_pmap;
+    pmap += width;
+    new_pmap += width;
+  }
+
+  // now all pixels are marked, 
+  // but some marks are aliases to others
+
+  // ajust the alias table to be a remap table
+  // and return the real region count
+  alias_update(alias_table, region_count);
+
+  int count = width*height;
+  for (i = 0; i < count; i++)
+  {
+    new_map[i] = alias_table[new_map[i]];
+  }
+
+  delete [] alias_table;
+
+  return region_count;
+}
+
+static int DoAnalyzeFindRegionsBorder(int width, int height, imbyte* map, imushort* new_map, int connect)
+{
+  int i, j;
+
+  imbyte* pmap1 = map - width;         // previous line (line -1 = invalid)
+  imushort* new_pmap1 = new_map - width; 
+
+  imbyte* pmap = map;                  // current line (line 0)
+  imushort* new_pmap = new_map;
+
+  int region_count = 2;  // still consider: 0- background, 1-border
+  imushort* alias_table = new imushort [MAX_COUNT];
+  memset(alias_table, 0, MAX_COUNT); // aliases are all zero at start (not used)
+
+  for (i = 0; i < height; i++)
+  {
+    for (j = 0; j < width; j++)
+    {
+      if (pmap[j])
+      {
+        int b01 = j > 0? 1: 0; // valid for pmap[j-1]
+        int b10 = i > 0? 1: 0; // valid for pmap1[j]
+        int b11 = i > 0 && j > 0? 1: 0; // valid for pmap1[j-1]
+        int b12 = i > 0 && j < width-1? 1: 0; // valid for pmap1[j+1]
+
+        if ((b01&&pmap[j-1]) || (b10&&pmap1[j]) || 
+            (connect == 8 && ((b11&&pmap1[j-1]) || (b12&&pmap1[j+1])))) // 4 or 8 connected to the previous neighbors
+        {
+          imushort region = 0;
+
+          if (b01&&pmap[j-1])
+          {
+            if (!region)
+              region = new_pmap[j-1];  // horizontal neighbor  -00
+            else                       //                      X1
+            {
+              // this is a right border pixel that connects to an horizontal neighbor
+
+              // this pixel can connect two different regions
+              alias_set(alias_table, region, new_pmap[j-1]);
+            }
+          }
+
+          if (b10&&pmap1[j])    // vertical neighbor
+          {
+            if (!region)
+              region = new_pmap1[j];  // isolated vertical neighbor  -X-
+            else                      //                             01
+            {
+              // an horizontal neighbor connects to a vertical neighbor  -X-
+              //                                                         X1
+
+              // this pixel can connect two different regions
+              alias_set(alias_table, region, new_pmap1[j]);
+            }
+          }
+          else if (region && connect == 8 && (b12&&pmap1[j+1]))
+          {
+            // an horizontal neighbor connects to a right corner neighbor   00X
+            //                                                              X1
+
+            // this pixel can connect two different regions
+            alias_set(alias_table, region, new_pmap1[j+1]);
+          }
+
+          if (connect == 8 && ((b11&&pmap1[j-1]) || (b12&&pmap1[j+1])) && !region) // isolated corner
+          {
+            // a left corner neighbor or a right corner neighbor  X0X
+            //                                                    01
+
+            if (b11&&pmap1[j-1])  // left corner
+              region = new_pmap1[j-1];
+
+            if (b12&&pmap1[j+1])  // right corner
+            {
+              if (!region) // isolated right corner
+                region = new_pmap1[j+1];
+              else
+              {
+                // this pixel can connect two different regions
+                alias_set(alias_table, new_pmap1[j-1], new_pmap1[j+1]);
+              }
+            }
+          }
+
+          new_pmap[j] = region;
+        }
+        else
+        {
+          // this pixel touches no pixels
+
+          // create a new region  000
+          //                      01
+          new_pmap[j] = (imushort)region_count;
+          region_count++;
+
+          if (region_count > MAX_COUNT)
+          {
+            delete [] alias_table;
+            return -1;
+          }
+        }
+      }
+    }
+
+    pmap1 = pmap;
+    new_pmap1 = new_pmap;
+    pmap += width;
+    new_pmap += width;
+  }
+
+  // now all pixels are marked, 
+  // but some marks are aliases to others
+
+  // ajust the alias table to be a remap table
+  // and return the real region count
+  alias_update(alias_table, region_count);
+
+  int count = width*height;
+  for (i = 0; i < count; i++)
+  {
+    new_map[i] = alias_table[new_map[i]];
+  }
+
+  delete [] alias_table;
+
+  return region_count;
+}
+
+int imAnalyzeFindRegions(const imImage* image, imImage* NewImage, int connect, int touch_border)
+{
+  imImageSetAttribute(NewImage, "REGION_CONNECT", IM_BYTE, 1, connect==4?"4":"8");
+  if (touch_border)
+    return DoAnalyzeFindRegionsBorder(image->width, image->height, (imbyte*)image->data[0], (imushort*)NewImage->data[0], connect);
+  else
+    return DoAnalyzeFindRegions(image->width, image->height, (imbyte*)image->data[0], (imushort*)NewImage->data[0], connect);
+}
+
+void imAnalyzeMeasureArea(const imImage* image, int* data_area, int region_count)
+{
+  imushort* img_data = (imushort*)image->data[0];
+
+  memset(data_area, 0, region_count*sizeof(int));
+
+  for (int i = 0; i < image->count; i++)
+  {
+    if (*img_data)
+      data_area[(*img_data) - 1]++;
+    img_data++;
+  }
+}
+
+void imAnalyzeMeasureCentroid(const imImage* image, const int* data_area, int region_count, float* data_cx, float* data_cy)
+{
+  imushort* img_data = (imushort*)image->data[0];
+  int* local_data_area = 0;
+
+  if (!data_area)
+  {
+    local_data_area = (int*)malloc(region_count*sizeof(int));
+    imAnalyzeMeasureArea(image, local_data_area, region_count);
+    data_area = (const int*)local_data_area;
+  }
+
+  if (data_cx) memset(data_cx, 0, region_count*sizeof(float));
+  if (data_cy) memset(data_cy, 0, region_count*sizeof(float));
+
+  for (int y = 0; y < image->height; y++) 
+  {
+    int offset = y*image->width;
+
+    for (int x = 0; x < image->width; x++)
+    {
+      int region_index = img_data[offset+x];
+      if (region_index)
+      {
+        if (data_cx) data_cx[region_index-1] += (float)x;
+        if (data_cy) data_cy[region_index-1] += (float)y;
+      }
+    }
+  }
+
+  for (int i = 0; i < region_count; i++) 
+  {
+    if (data_cx) data_cx[i] /= (float)data_area[i];
+    if (data_cy) data_cy[i] /= (float)data_area[i];
+  }
+
+  if (local_data_area)
+    free(local_data_area);
+}
+
+static inline double ipow(double x, int j)
+{
+	double r = 1.0;
+	for (int i = 0; i < j; i++) 
+    r *= x;
+	return r;
+}
+
+static void iCalcMoment(double* cm, int px, int py, const imImage* image, const float* cx, const float* cy, int region_count)
+{
+  imushort* img_data = (imushort*)image->data[0];
+
+  memset(cm, 0, region_count*sizeof(double));
+
+  for (int y = 0; y < image->height; y++) 
+  {
+    int offset = y*image->width;
+
+    for (int x = 0; x < image->width; x++)
+    {
+      int region_index = img_data[offset+x];
+      if (region_index)
+      {
+        int i = region_index-1;
+
+        if (px == 0)
+          cm[i] += ipow(y-cy[i],py);
+        else if (py == 0)
+          cm[i] += ipow(x-cx[i],px);
+        else
+          cm[i] += ipow(x-cx[i],px)*ipow(y-cy[i],py);
+      }
+    }
+  }
+}
+
+template<class T>
+static inline int IsPerimeterPoint(T* map, int width, int height, int x, int y)
+{
+  // map here points to the start of the line, even if its an invalid line.
+
+  // if outside the image, then is not a perimeter line.
+  if (x == -1 || x == width ||
+      y == -1 || y == height)
+    return 0;
+
+  T v = map[x]; // here v is image(x,y)
+  if (!v)
+    return 0;
+
+  // if touches the border, then is a perimeter line.
+  if (x == 0 || x == width-1 ||
+      y == 0 || y == height-1)
+    return 1;
+
+  // if has 4 connected neighbors, then is a perimeter line.
+  if (map[width+x] != v ||
+      map[x+1] != v ||
+      map[x-1] != v ||
+      map[-width+x] != v)
+    return 1;
+
+  return 0;
+}
+
+void imAnalyzeMeasurePrincipalAxis(const imImage* image, const int* data_area, const float* data_cx, const float* data_cy, 
+                                   const int region_count, float* major_slope, float* major_length, 
+                                                           float* minor_slope, float* minor_length)
+{
+  int i;
+  int *local_data_area = 0;
+  float *local_data_cx = 0, *local_data_cy = 0;
+
+  if (!data_area)
+  {
+    local_data_area = (int*)malloc(region_count*sizeof(int));
+    imAnalyzeMeasureArea(image, local_data_area, region_count);
+    data_area = (const int*)local_data_area;
+  }
+
+  if (!data_cx || !data_cy)
+  {
+    if (!data_cx)
+    {
+      local_data_cx = (float*)malloc(region_count*sizeof(float));
+      data_cx = (const float*)local_data_cx;
+    }
+
+    if (!data_cy)
+    {
+      local_data_cy = (float*)malloc(region_count*sizeof(float));
+      data_cy = (const float*)local_data_cy;
+    }
+
+    if (local_data_cx && local_data_cy)
+      imAnalyzeMeasureCentroid(image, data_area, region_count, local_data_cx, local_data_cy);
+    else if (local_data_cx)
+      imAnalyzeMeasureCentroid(image, data_area, region_count, local_data_cx, NULL);
+    else if (local_data_cy)
+      imAnalyzeMeasureCentroid(image, data_area, region_count, NULL, local_data_cy);
+  }
+
+  // additional moments
+  double* cm20 = (double*)malloc(region_count*sizeof(double));
+  double* cm02 = (double*)malloc(region_count*sizeof(double));
+  double* cm11 = (double*)malloc(region_count*sizeof(double));
+  
+  iCalcMoment(cm20, 2, 0, image, data_cx, data_cy, region_count);
+  iCalcMoment(cm02, 0, 2, image, data_cx, data_cy, region_count);
+  iCalcMoment(cm11, 1, 1, image, data_cx, data_cy, region_count);
+
+  float *local_major_slope = 0, *local_minor_slope = 0;
+  if (!major_slope)
+  {
+    local_major_slope = (float*)malloc(region_count*sizeof(float));
+    major_slope = local_major_slope;
+  }
+  if (!minor_slope)
+  {
+    local_minor_slope = (float*)malloc(region_count*sizeof(float));
+    minor_slope = local_minor_slope;
+  }
+
+#define RAD2DEG  57.296
+
+  // We are going to find 2 axis parameters.
+  // Axis 1 are located in quadrants 1-3
+  // Axis 2 are located in quadrants 2-4
+
+  // Quadrants
+  //    2 | 1
+  //    -----
+  //    3 | 4
+
+  // line coeficients for lines that belongs to axis 1 and 2
+  float* A1 = (float*)malloc(region_count*sizeof(float));
+  float* A2 = (float*)malloc(region_count*sizeof(float));
+  float* C1 = (float*)malloc(region_count*sizeof(float));
+  float* C2 = (float*)malloc(region_count*sizeof(float));
+
+  float *slope1 = major_slope; // Use major_slope as a storage place, 
+  float *slope2 = minor_slope; // and create an alias to make code clear.
+
+  for (i = 0; i < region_count; i++) 
+  {
+    if (cm11[i] == 0)
+    {
+      slope1[i] = 0;
+      slope2[i] = 90;
+
+      // These should not be used
+      A1[i] = 0; 
+      A2[i] = 0;  // infinite
+      C1[i] = 0;  // data_cy[i]
+      C2[i] = 0;  
+    }
+    else
+    {
+      double b = (cm20[i] - cm02[i])/cm11[i];
+      double delta = sqrt(b*b + 4.0);
+      double r1 = (-b-delta)/2.0;
+      double r2 = (-b+delta)/2.0;
+      float a1 = (float)(atan(r1)*RAD2DEG + 90);  // to avoid negative results
+      float a2 = (float)(atan(r2)*RAD2DEG + 90);
+
+      if (a1 == 180) a1 = 0;
+      if (a2 == 180) a2 = 0;
+
+      if (a1 < 90)             // a1 is quadrants q1-q3
+      {                        
+        slope1[i] = a1;   
+        slope2[i] = a2;   
+        A1[i] = (float)r1;
+        A2[i] = (float)r2;
+      }
+      else                     // a2 is quadrants q1-q3
+      {
+        slope1[i] = a2;
+        slope2[i] = a1;
+        A1[i] = (float)r2;
+        A2[i] = (float)r1;
+      }
+
+      C1[i] = data_cy[i] - A1[i] * data_cx[i];
+      C2[i] = data_cy[i] - A2[i] * data_cx[i];
+    }
+  }
+
+  // moments are not necessary anymore
+  free(cm20); free(cm02); free(cm11);
+  cm20 = 0; cm02 = 0; cm11 = 0;
+
+  // maximum distance from a point in the perimeter to an axis in each side of the axis
+  // D1 is distance to axis 1, a and b are sides
+  float* D1a = (float*)malloc(region_count*sizeof(float));
+  float* D1b = (float*)malloc(region_count*sizeof(float));
+  float* D2a = (float*)malloc(region_count*sizeof(float));
+  float* D2b = (float*)malloc(region_count*sizeof(float));
+  memset(D1a, 0, region_count*sizeof(float));
+  memset(D1b, 0, region_count*sizeof(float));
+  memset(D2a, 0, region_count*sizeof(float));
+  memset(D2b, 0, region_count*sizeof(float));
+
+  imushort* img_data = (imushort*)image->data[0];
+  int width = image->width;
+  int height = image->height;
+  for (int y = 0; y < height; y++) 
+  {
+    int offset = y*width;
+
+    for (int x = 0; x < width; x++)
+    {
+      if (IsPerimeterPoint(img_data+offset, width, height, x, y))
+      {
+        i = img_data[offset+x] - 1;
+
+        float d1, d2;
+        if (slope2[i] == 90)
+        {
+          d2 = y - data_cy[i];   // I ckecked this many times, looks odd but it is correct.
+          d1 = x - data_cx[i];
+        }
+        else
+        {
+          d1 = A1[i]*x - y + C1[i];
+          d2 = A2[i]*x - y + C2[i];
+        }
+
+        if (d1 < 0)
+        {
+          d1 = (float)fabs(d1);
+          if (d1 > D1a[i])         
+            D1a[i] = d1;
+        }
+        else
+        {
+          if (d1 > D1b[i])
+            D1b[i] = d1;
+        }
+
+        if (d2 < 0)
+        {
+          d2 = (float)fabs(d2);
+          if (d2 > D2a[i])         
+            D2a[i] = d2;
+        }
+        else
+        {
+          if (d2 > D2b[i])
+            D2b[i] = d2;
+        }
+      }
+    }
+  }
+
+  for (i = 0; i < region_count; i++) 
+  {
+    float AB1 = (float)sqrt(A1[i]*A1[i] + 1);
+    float AB2 = (float)sqrt(A2[i]*A2[i] + 1);
+
+    float D1 = (D1a[i] + D1b[i]) / AB1; 
+    float D2 = (D2a[i] + D2b[i]) / AB2;
+
+    if (D1 < D2) // Major Axis in 2-4 quadrants
+    {
+      // now remember that we did an alias before
+      // slope1 -> major_slope
+      // slope2 -> minor_slope
+
+      float tmp = major_slope[i];
+      major_slope[i] = minor_slope[i];
+      minor_slope[i] = tmp;
+
+      if (minor_length) minor_length[i] = D1;
+      if (major_length) major_length[i] = D2;
+    }
+    else
+    {
+      if (minor_length) minor_length[i] = D2;
+      if (major_length) major_length[i] = D1;
+    }
+  }
+
+  if (local_major_slope) free(local_major_slope);
+  if (local_minor_slope) free(local_minor_slope);
+  if (local_data_area) free(local_data_area);
+  if (local_data_cx) free(local_data_cx);
+  if (local_data_cy) free(local_data_cy);
+
+  free(A1);  
+  free(A2);  
+  free(C1);  
+  free(C2);
+
+  free(D1b); 
+  free(D2b);
+  free(D1a); 
+  free(D2a); 
+}
+
+void imAnalyzeMeasureHoles(const imImage* image, int connect, int* count_data, int* area_data, float* perim_data)
+{
+  int i;
+  imImage *inv_image = imImageCreate(image->width, image->height, IM_BINARY, IM_BYTE);
+  imbyte* inv_data = (imbyte*)inv_image->data[0];
+  imushort* img_data = (imushort*)image->data[0];
+
+  // finds the holes in the inverted image
+  for (i = 0; i < image->count; i++)
+  {
+    if (*img_data)
+      *inv_data = 0;
+    else
+      *inv_data = 1;
+
+    img_data++;
+    inv_data++;
+  }
+
+  imImage *holes_image = imImageClone(image);
+  if (!holes_image)
+    return;
+
+  int holes_count = imAnalyzeFindRegions(inv_image, holes_image, connect, 0);
+  imImageDestroy(inv_image);
+
+  if (!holes_count)
+  {
+    imImageDestroy(holes_image);
+    return;
+  }
+
+  // measure the holes area
+  int* holes_area = (int*)malloc(holes_count*sizeof(int));
+  imAnalyzeMeasureArea(holes_image, holes_area, holes_count);
+
+  float* holes_perim = 0;
+  if (perim_data) 
+  {
+    holes_perim = (float*)malloc(holes_count*sizeof(int));
+    imAnalyzeMeasurePerimeter(holes_image, holes_perim, holes_count);
+  }
+
+  imushort* holes_data = (imushort*)holes_image->data[0];
+  img_data = (imushort*)image->data[0];
+
+  // holes do not touch the border
+  for (int y = 1; y < image->height-1; y++) 
+  {
+    int offset_up = (y+1)*image->width;
+    int offset = y*image->width;
+    int offset_dw = (y-1)*image->width;
+
+    for (int x = 1; x < image->width-1; x++)
+    {
+      int hole_index = holes_data[offset+x];
+
+      if (hole_index && holes_area[hole_index-1]) // a hole not yet used
+      {
+        // if the hole has not been used, 
+        // it is the first time we encounter a pixel of this hole.
+        // then it is a pixel from the hole border.
+        // now find which region this hole is inside.
+        // a 4 connected neighbour is necessarilly a valid region or 0.
+
+        int region_index = 0;
+        if (img_data[offset_up + x]) region_index = img_data[offset_up + x];
+        else if (img_data[offset + x+1]) region_index = img_data[offset + x+1];
+        else if (img_data[offset + x-1]) region_index = img_data[offset + x-1]; 
+        else if (img_data[offset_dw+x]) region_index = img_data[offset_dw+x];
+
+        if (!region_index) continue;
+
+        if (count_data) count_data[region_index-1]++;
+        if (area_data) area_data[region_index-1] += holes_area[hole_index-1];
+        if (perim_data) perim_data[region_index-1] += holes_perim[hole_index-1];
+        holes_area[hole_index-1] = 0; // mark hole as used
+      }
+    }
+  }
+
+  if (holes_perim) free(holes_perim);
+  free(holes_area);
+  imImageDestroy(holes_image);
+}
+
+template<class T>
+static void DoPerimeterLine(T* map, T* new_map, int width, int height)
+{
+  int x, y, offset;
+
+  for (y = 0; y < height; y++) 
+  {
+    offset = y*width;
+
+    for (x = 0; x < width; x++)
+    {
+      if (IsPerimeterPoint(map+offset, width, height, x, y))
+        new_map[offset+x] = map[offset+x];
+      else
+        new_map[offset+x] = 0;
+    }
+  }
+}
+
+void imProcessPerimeterLine(const imImage* src_image, imImage* dst_image)
+{
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    DoPerimeterLine((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], src_image->width, src_image->height);
+    break;                                                                                
+  case IM_USHORT:
+    DoPerimeterLine((imushort*)src_image->data[0], (imushort*)dst_image->data[0], src_image->width, src_image->height);
+    break;                                                                                
+  case IM_INT:                                                                           
+    DoPerimeterLine((int*)src_image->data[0], (int*)dst_image->data[0], src_image->width, src_image->height);
+    break;                                                                                
+  }
+}
+
+/* Perimeter Templates idea based in
+   Parker, Pratical Computer Vision Using C
+
+For 1.414 (sqrt(2)/2 + sqrt(2)/2) [1]:
+     1 0 0   0 0 1   1 0 0   0 0 1   0 0 0   1 0 1
+     0 x 0   0 x 0   0 x 0   0 x 0   0 x 0   0 x 0
+     0 0 1   1 0 0   1 0 0   0 0 1   1 0 1   0 0 0
+      129      36     132      33      5      160
+
+For 1.207 (sqrt(2)/2 + 1.0/2) [2]:
+     0 0 0   0 0 1   0 1 0   0 1 0   1 0 0   0 0 1   0 0 0   1 0 0
+     1 x 0   1 x 0   0 x 0   0 x 0   0 x 0   0 x 0   0 x 1   0 x 1
+     0 0 1   0 0 0   1 0 0   0 0 1   0 1 0   0 1 0   1 0 0   0 0 0
+       17      48      68      65     130      34       12     136
+
+     0 0 0   1 0 0   1 1 0   0 1 1   0 0 1   0 0 0   0 0 0   0 0 0
+     1 x 0   1 x 0   0 x 0   0 x 0   0 x 1   0 x 1   0 x 0   0 x 0
+     1 0 0   0 0 0   0 0 0   0 0 0   0 0 0   0 0 1   0 1 1   1 1 0
+       20     144     192      96      40      9       3       6
+
+For 1.0 (1.0/2 + 1.0/2) [0]:
+     0 0 0   0 1 0   0 0 0   0 0 0   0 1 0   0 1 0
+     1 x 1   0 x 0   1 x 0   0 x 1   1 x 0   0 x 1
+     0 0 0   0 1 0   0 1 0   0 1 0   0 0 0   0 0 0
+       24      66      18      10      80      72
+
+For 0.707 (sqrt(2)/2) [3]:
+     1 0 0   0 0 1   0 0 0   0 0 0
+     0 x 0   0 x 0   0 x 0   0 x 0         (For Line Length)
+     0 0 0   0 0 0   0 0 1   1 0 0
+      128      32      1       4
+
+For 0.5 (1.0/2) [4]:
+     0 1 0   0 0 0   0 0 0   0 0 0
+     0 x 0   0 x 1   0 x 0   1 x 0         (For Line Length)
+     0 0 0   0 0 0   0 1 0   0 0 0
+       64      8       2      16
+
+*/
+static void iInitPerimTemplate(imbyte *templ, float *v)
+{
+  memset(templ, 0, 256);
+
+  templ[129] = 1;
+  templ[36]  = 1;
+  templ[132] = 1;
+  templ[33]  = 1;
+  templ[5]   = 1;
+  templ[160] = 1;
+
+  templ[17]  = 2;
+  templ[48]  = 2;
+  templ[68]  = 2;
+  templ[65]  = 2;
+  templ[130] = 2;
+  templ[34]  = 2;
+  templ[12]  = 2;
+  templ[136] = 2;
+  templ[20]  = 2;
+  templ[144] = 2;
+  templ[192] = 2;
+  templ[96]  = 2;
+  templ[40]  = 2;
+  templ[9]   = 2;
+  templ[3]   = 2;
+  templ[6]   = 2;
+
+  templ[24] = 0;
+  templ[66] = 0;
+  templ[18] = 0;
+  templ[10] = 0;
+  templ[80] = 0;
+  templ[72] = 0;
+
+  templ[128] = 3;
+  templ[32]  = 3;
+  templ[1]   = 3;
+  templ[4]   = 3;
+
+  templ[64] = 4;
+  templ[8]  = 4;
+  templ[2]  = 4;
+  templ[16] = 4;
+
+const float DT_SQRT2   = 1.414213562373f;
+const float DT_SQRT2D2 = 0.707106781187f;
+
+  v[1] = DT_SQRT2;   
+  v[2] = DT_SQRT2D2 + 0.5f;   
+  v[0] = 1.0f;
+  v[3] = DT_SQRT2D2;
+  v[4] = 0.5f;
+}
+
+void imAnalyzeMeasurePerimeter(const imImage* image, float* perim_data, int region_count)
+{
+  static imbyte templ[256];
+  static float vt[5];
+  static int first = 1;
+  if (first)
+  {
+    iInitPerimTemplate(templ, vt);
+    first = 0;
+  }
+
+  imushort* map = (imushort*)image->data[0];
+
+  memset(perim_data, 0, region_count*sizeof(int));
+
+  int width = image->width;
+  int height = image->height;
+  for (int y = 0; y < height; y++) 
+  {
+    int offset = y*image->width;
+
+    for (int x = 0; x < width; x++)
+    {
+      if (IsPerimeterPoint(map+offset, width, height, x, y))
+      {
+        int T = 0;
+
+        // check the 8 neighboors if they belong to the perimeter
+        if (IsPerimeterPoint(map+offset+width, width, height, x-1, y+1))
+          T |= 0x01;
+        if (IsPerimeterPoint(map+offset+width, width, height, x, y+1))
+          T |= 0x02;
+        if (IsPerimeterPoint(map+offset+width, width, height, x+1, y+1))
+          T |= 0x04;
+
+        if (IsPerimeterPoint(map+offset, width, height, x-1, y))
+          T |= 0x08;
+        if (IsPerimeterPoint(map+offset, width, height, x+1, y))
+          T |= 0x10;
+
+        if (IsPerimeterPoint(map+offset-width, width, height, x-1, y-1))
+          T |= 0x20;
+        if (IsPerimeterPoint(map+offset-width, width, height, x, y-1))
+          T |= 0x40;
+        if (IsPerimeterPoint(map+offset-width, width, height, x+1, y-1))
+          T |= 0x80;
+
+        if (T)
+          perim_data[map[offset+x] - 1] += vt[templ[T]];
+      }
+    }
+  }
+}
+
+/* Perimeter Area Templates
+
+For "1.0" (0):
+
+     1 1 1
+     1 x 1
+     1 1 1
+      255
+
+For "0.75" (1):
+
+     1 1 1   1 1 1   0 1 1   1 1 0   1 1 1   1 1 1   1 1 1   1 0 1
+     1 x 1   1 x 1   1 x 1   1 x 1   0 x 1   1 x 0   1 x 1   1 x 1
+     0 1 1   1 1 0   1 1 1   1 1 1   1 1 1   1 1 1   1 0 1   1 1 1
+      251     254     127     223     239     247     253     191
+
+For "0.625" (2):
+
+     1 1 1   0 0 1   0 1 1   1 1 0   1 1 1   1 1 1   1 1 1   1 0 0
+     1 x 1   1 x 1   0 x 1   1 x 0   0 x 1   1 x 0   1 x 1   1 x 1
+     0 0 1   1 1 1   1 1 1   1 1 1   0 1 1   1 1 0   1 0 0   1 1 1
+      249     63      111     215     235     246     252     159
+
+For "0.5" (3):
+
+     0 0 0   0 1 1   1 1 1   1 1 0   1 1 1   0 0 1   1 0 0   1 1 1  
+     1 x 1   0 x 1   1 x 1   1 x 0   0 x 1   0 x 1   1 x 0   1 x 0  
+     1 1 1   0 1 1   0 0 0   1 1 0   0 0 1   1 1 1   1 1 1   1 0 0  
+      31      107     248     214     233     47      151     244
+
+For "0.375" (4):
+
+     0 0 0   1 1 1   1 1 0   0 1 1   1 0 0   0 0 1   0 0 0   1 1 1
+     1 x 0   1 x 0   1 x 0   0 x 1   1 x 0   0 x 1   0 x 1   0 x 1
+     1 1 1   0 0 0   1 0 0   0 0 1   1 1 0   0 1 1   1 1 1   0 0 0
+      23      240     212     105     150     43      15      232
+
+For "0.25" (5):
+
+     0 0 0   0 0 0   1 1 0   0 1 1   1 0 0   0 0 1   0 0 0   1 1 1
+     1 x 0   0 x 1   1 x 0   0 x 1   1 x 0   0 x 1   0 x 0   0 x 0
+     1 1 0   0 1 1   0 0 0   0 0 0   1 0 0   0 0 1   1 1 1   0 0 0
+      22      11      208     104     148     41       7      224
+
+For "0.125" (6):
+
+     0 0 0   0 0 0   1 1 0   0 0 1   1 0 0   0 0 0   0 0 0   0 1 1
+     1 x 0   0 x 0   0 x 0   0 x 1   1 x 0   0 x 1   0 x 0   0 x 0
+     1 0 0   0 1 1   0 0 0   0 0 0   0 0 0   0 0 1   1 1 0   0 0 0
+      20       3      192      40     144      9       6       96
+
+*/
+static void iInitPerimAreaTemplate(imbyte *templ, float *v)
+{
+  memset(templ, 0, 256);
+
+  templ[255] = 0;
+
+  templ[251] = 1;
+  templ[254] = 1;
+  templ[127] = 1;
+  templ[223] = 1;
+  templ[239] = 1;
+  templ[247] = 1;
+  templ[253] = 1;
+  templ[191] = 1;
+        
+  templ[249] = 2;
+  templ[63] = 2;
+  templ[111] = 2;
+  templ[215] = 2;
+  templ[235] = 2;
+  templ[246] = 2;
+  templ[252] = 2;
+  templ[159] = 2;
+        
+  templ[31] = 3;
+  templ[107] = 3;
+  templ[248] = 3;
+  templ[214] = 3;
+  templ[233] = 3;
+  templ[47] = 3;
+  templ[151] = 3;
+  templ[244] = 3;
+        
+  templ[23] = 4;
+  templ[240] = 4;
+  templ[212] = 4;
+  templ[105] = 4;
+  templ[150] = 4;
+  templ[43] = 4;
+  templ[15] = 4;
+  templ[232] = 4;
+        
+  templ[22] = 5;
+  templ[11] = 5;
+  templ[208] = 5;
+  templ[104] = 5;
+  templ[148] = 5;
+  templ[41] = 5;
+  templ[7] = 5;
+  templ[224] = 5;
+        
+  templ[20] = 6;
+  templ[3] = 6;
+  templ[192] = 6;
+  templ[40] = 6;
+  templ[144] = 6;
+  templ[9] = 6;
+  templ[6] = 6;
+  templ[96] = 6;
+
+  v[0] = 1.0f;
+  v[1] = 0.75f;  
+  v[2] = 0.625f;  
+  v[3] = 0.5f;
+  v[4] = 0.375f;
+  v[5] = 0.25f;
+  v[6] = 0.125f;
+}
+
+void imAnalyzeMeasurePerimArea(const imImage* image, float* area_data)
+{
+  static imbyte templ[256];
+  static float vt[7];
+  static int first = 1;
+  if (first)
+  {
+    iInitPerimAreaTemplate(templ, vt);
+    first = 0;
+  }
+
+  imushort* map = (imushort*)image->data[0];
+
+  int width = image->width;
+  int height = image->height;
+  for (int y = 0; y < height; y++) 
+  {
+    int offset_up = (y+1)*width;
+    int offset = y*width;
+    int offset_dw = (y-1)*width;
+
+    for (int x = 0; x < width; x++)
+    {
+      imushort v = map[offset+x];
+      if (v)
+      {
+        int T = 0;
+        if (x>0 && y<height-1 &&       map[offset_up + x-1] == v) T |= 0x01;
+        if (y<height-1 &&              map[offset_up + x  ] == v) T |= 0x02;
+        if (x<width-1 && y<height-1 && map[offset_up + x+1] == v) T |= 0x04;
+        if (x>0 &&                     map[offset    + x-1] == v) T |= 0x08;
+        if (x<width-1 &&               map[offset    + x+1] == v) T |= 0x10; 
+        if (x>0 && y>0 &&              map[offset_dw + x-1] == v) T |= 0x20;
+        if (y>0 &&                     map[offset_dw + x  ] == v) T |= 0x40;
+        if (x<width-1 && y>0 &&        map[offset_dw + x+1] == v) T |= 0x80;
+
+        if (T)
+          area_data[v-1] += vt[templ[T]];
+      }
+    }
+  }
+}
+
+void imProcessPrune(const imImage* image, imImage* NewImage, int connect, int start_size, int end_size)
+{
+  imImage *region_image = imImageCreate(image->width, image->height, IM_GRAY, IM_USHORT);
+  if (!region_image)
+    return;
+
+  int region_count = imAnalyzeFindRegions(image, region_image, connect, 1); 
+  if (!region_count)
+  {
+    imImageClear(NewImage);
+    imImageDestroy(region_image);
+    return;
+  }
+
+  int* area_data = (int*)malloc(region_count*sizeof(int));
+  imAnalyzeMeasureArea(region_image, area_data, region_count);
+
+  imushort* region_data = (imushort*)region_image->data[0];
+  imbyte* img_data = (imbyte*)NewImage->data[0];
+
+  for (int i = 0; i < image->count; i++)
+  {
+    if (*region_data)
+    {
+      int area = area_data[(*region_data) - 1];
+      if (area < start_size || (end_size && area > end_size))
+        *img_data = 0;
+      else
+        *img_data = 1;
+    }
+    else
+      *img_data = 0;
+
+    region_data++;
+    img_data++;
+  }
+
+  free(area_data);
+  imImageDestroy(region_image);
+}
+
+void imProcessFillHoles(const imImage* image, imImage* NewImage, int connect)
+{
+  // finding regions in the inverted image will isolate only the holes.
+  imProcessNegative(image, NewImage);
+
+  imImage *region_image = imImageCreate(image->width, image->height, IM_GRAY, IM_USHORT);
+  if (!region_image)
+    return;
+
+  int holes_count = imAnalyzeFindRegions(NewImage, region_image, connect, 0);
+  if (!holes_count)
+  {
+    imImageCopy(image, NewImage);
+    imImageDestroy(region_image);
+    return;
+  }
+
+  imushort* region_data = (imushort*)region_image->data[0];
+  imbyte* dst_data = (imbyte*)NewImage->data[0];
+
+  for (int i = 0; i < image->count; i++)
+  {
+    if (*region_data)
+      *dst_data = 1;
+    else
+      *dst_data = !(*dst_data);  // Fix negative data.
+
+    region_data++;
+    dst_data++;
+  }
+
+  imImageDestroy(region_image);
+}
diff --git a/src/process/im_arithmetic_bin.cpp b/src/process/im_arithmetic_bin.cpp
new file mode 100644
index 0000000..74fe010
--- /dev/null
+++ b/src/process/im_arithmetic_bin.cpp
@@ -0,0 +1,503 @@
+/** \file
+ * \brief Binary Arithmetic Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_arithmetic_bin.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+#include <im_complex.h>
+#include <im_counter.h>
+
+#include "im_process_pon.h"
+#include "im_math_op.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+
+template <class T1, class T2, class T3> 
+static void DoBinaryOp(T1 *map1, T2 *map2, T3 *map, int count, int op)
+{
+  int i;
+
+  switch(op)
+  {
+  case IM_BIN_ADD:
+    for (i = 0; i < count; i++)
+      map[i] = add_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_SUB:
+    for (i = 0; i < count; i++)
+      map[i] = sub_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_MUL:
+    for (i = 0; i < count; i++)
+      map[i] = mul_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_DIV:
+    for (i = 0; i < count; i++)
+      map[i] = div_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_DIFF:
+    for (i = 0; i < count; i++)
+      map[i] = diff_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_MIN:
+    for (i = 0; i < count; i++)
+      map[i] = min_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_MAX:
+    for (i = 0; i < count; i++)
+      map[i] = max_op((T3)map1[i], (T3)map2[i]);
+    break;
+  case IM_BIN_POW:
+    for (i = 0; i < count; i++)
+      map[i] = pow_op((T3)map1[i], (T3)map2[i]);
+    break;
+  }
+}
+
+static void DoBinaryOpCpxReal(imcfloat *map1, float *map2, imcfloat *map, int count, int op)
+{
+  int i;
+
+  switch(op)
+  {
+  case IM_BIN_ADD:
+    for (i = 0; i < count; i++)
+      map[i] = add_op(map1[i], map2[i]);
+    break;
+  case IM_BIN_SUB:
+    for (i = 0; i < count; i++)
+      map[i] = sub_op(map1[i], map2[i]);
+    break;
+  case IM_BIN_MUL:
+    for (i = 0; i < count; i++)
+      map[i] = mul_op(map1[i], map2[i]);
+    break;
+  case IM_BIN_DIV:
+    for (i = 0; i < count; i++)
+      map[i] = div_op(map1[i], (imcfloat)map2[i]);
+    break;
+  case IM_BIN_DIFF:
+    for (i = 0; i < count; i++)
+      map[i] = diff_op(map1[i], map2[i]);
+    break;
+  case IM_BIN_MIN:
+    for (i = 0; i < count; i++)
+      map[i] = min_op(map1[i], map2[i]);
+    break;
+  case IM_BIN_MAX:
+    for (i = 0; i < count; i++)
+      map[i] = max_op(map1[i], map2[i]);
+    break;
+  case IM_BIN_POW:
+    for (i = 0; i < count; i++)
+      map[i] = pow_op(map1[i], map2[i]);
+    break;
+  }
+}
+
+void imProcessArithmeticOp(const imImage* src_image1, const imImage* src_image2, imImage* dst_image, int op)
+{
+  int count = src_image1->count;
+
+  for (int i = 0; i < src_image1->depth; i++)
+  {
+    switch(src_image1->data_type)
+    {
+    case IM_BYTE:
+      if (dst_image->data_type == IM_FLOAT)
+        DoBinaryOp((imbyte*)src_image1->data[i], (imbyte*)src_image2->data[i], (float*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_USHORT)
+        DoBinaryOp((imbyte*)src_image1->data[i], (imbyte*)src_image2->data[i], (imushort*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_INT)
+        DoBinaryOp((imbyte*)src_image1->data[i], (imbyte*)src_image2->data[i], (int*)dst_image->data[i], count, op);
+      else
+        DoBinaryOp((imbyte*)src_image1->data[i], (imbyte*)src_image2->data[i], (imbyte*)dst_image->data[i], count, op);
+      break;
+    case IM_USHORT:
+      if (dst_image->data_type == IM_FLOAT)
+        DoBinaryOp((imushort*)src_image1->data[i], (imushort*)src_image2->data[i], (float*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_INT)
+        DoBinaryOp((imushort*)src_image1->data[i], (imushort*)src_image2->data[i], (int*)dst_image->data[i], count, op);
+      else
+        DoBinaryOp((imushort*)src_image1->data[i], (imushort*)src_image2->data[i], (imushort*)dst_image->data[i], count, op);
+      break;
+    case IM_INT:
+      if (dst_image->data_type == IM_FLOAT)
+        DoBinaryOp((int*)src_image1->data[i], (int*)src_image2->data[i], (float*)dst_image->data[i], count, op);
+      else
+        DoBinaryOp((int*)src_image1->data[i], (int*)src_image2->data[i], (int*)dst_image->data[i], count, op);
+      break;
+    case IM_FLOAT:
+      DoBinaryOp((float*)src_image1->data[i], (float*)src_image2->data[i], (float*)dst_image->data[i], count, op);
+      break;
+    case IM_CFLOAT:
+      if (src_image2->data_type == IM_FLOAT)
+        DoBinaryOpCpxReal((imcfloat*)src_image1->data[i], (float*)src_image2->data[i], (imcfloat*)dst_image->data[i], count, op);
+      else
+        DoBinaryOp((imcfloat*)src_image1->data[i], (imcfloat*)src_image2->data[i], (imcfloat*)dst_image->data[i], count, op);
+      break;
+    }
+  }
+}
+
+template <class T>
+static inline T blend_op(const T& v1, const T& v2, const float& alpha)
+{
+  return (T)(alpha*v1 + (1.0f - alpha)*v2);
+}
+
+template <class T> 
+static void DoBlendConst(T *map1, T *map2, T *map, int count, float alpha)
+{
+  for (int i = 0; i < count; i++)
+    map[i] = blend_op(map1[i], map2[i], alpha);
+}
+
+void imProcessBlendConst(const imImage* src_image1, const imImage* src_image2, imImage* dst_image, float alpha)
+{
+  int count = src_image1->count;
+
+  for (int i = 0; i < src_image1->depth; i++)
+  {
+    switch(src_image1->data_type)
+    {
+    case IM_BYTE:
+      DoBlendConst((imbyte*)src_image1->data[i], (imbyte*)src_image2->data[i], (imbyte*)dst_image->data[i], count, alpha);
+      break;
+    case IM_USHORT:
+      DoBlendConst((imushort*)src_image1->data[i], (imushort*)src_image2->data[i], (imushort*)dst_image->data[i], count, alpha);
+      break;
+    case IM_INT:
+      DoBlendConst((int*)src_image1->data[i], (int*)src_image2->data[i], (int*)dst_image->data[i], count, alpha);
+      break;
+    case IM_FLOAT:
+      DoBlendConst((float*)src_image1->data[i], (float*)src_image2->data[i], (float*)dst_image->data[i], count, alpha);
+      break;
+    case IM_CFLOAT:
+      DoBlendConst((imcfloat*)src_image1->data[i], (imcfloat*)src_image2->data[i], (imcfloat*)dst_image->data[i], count, alpha);
+      break;
+    }
+  }
+}
+
+template <class T, class TA> 
+static void DoBlend(T *map1, T *map2, TA *alpha, T *map, int count, TA max)
+{
+  for (int i = 0; i < count; i++)
+    map[i] = blend_op(map1[i], map2[i], ((float)alpha[i])/max);
+}
+
+void imProcessBlend(const imImage* src_image1, const imImage* src_image2, const imImage* alpha, imImage* dst_image)
+{
+  int count = src_image1->count;
+
+  for (int i = 0; i < src_image1->depth; i++)
+  {
+    switch(src_image1->data_type)
+    {
+    case IM_BYTE:
+      DoBlend((imbyte*)src_image1->data[i], (imbyte*)src_image2->data[i], (imbyte*)alpha->data[0], (imbyte*)dst_image->data[i], count, (imbyte)255);
+      break;
+    case IM_USHORT:
+      DoBlend((imushort*)src_image1->data[i], (imushort*)src_image2->data[i], (imushort*)alpha->data[0], (imushort*)dst_image->data[i], count, (imushort)65535);
+      break;
+    case IM_INT:
+      DoBlend((int*)src_image1->data[i], (int*)src_image2->data[i], (int*)alpha->data[0], (int*)dst_image->data[i], count, (int)2147483647);
+      break;
+    case IM_FLOAT:
+      DoBlend((float*)src_image1->data[i], (float*)src_image2->data[i], (float*)alpha->data[0], (float*)dst_image->data[i], count, 1.0f);
+      break;
+    case IM_CFLOAT:
+      DoBlend((imcfloat*)src_image1->data[i], (imcfloat*)src_image2->data[i], (float*)alpha->data[0], (imcfloat*)dst_image->data[i], count, 1.0f);
+      break;
+    }
+  }
+}
+
+static void DoBinaryConstOpCpxReal(imcfloat *map1, float value, imcfloat *map, int count, int op)
+{
+  int i;
+
+  switch(op)
+  {
+  case IM_BIN_ADD:
+    for (i = 0; i < count; i++)
+      map[i] = add_op(map1[i], value);
+    break;
+  case IM_BIN_SUB:
+    for (i = 0; i < count; i++)
+      map[i] = sub_op(map1[i], value);
+    break;
+  case IM_BIN_MUL:
+    for (i = 0; i < count; i++)
+      map[i] = mul_op(map1[i], value);
+    break;
+  case IM_BIN_DIV:
+    for (i = 0; i < count; i++)
+      map[i] = div_op(map1[i], (imcfloat)value);
+    break;
+  case IM_BIN_DIFF:
+    for (i = 0; i < count; i++)
+      map[i] = diff_op(map1[i], value);
+    break;
+  case IM_BIN_MIN:
+    for (i = 0; i < count; i++)
+      map[i] = min_op(map1[i], value);
+    break;
+  case IM_BIN_MAX:
+    for (i = 0; i < count; i++)
+      map[i] = max_op(map1[i], value);
+    break;
+  case IM_BIN_POW:
+    for (i = 0; i < count; i++)
+      map[i] = pow_op(map1[i], value);
+    break;
+  }
+}
+
+template <class T1, class T2, class T3> 
+static void DoBinaryConstOp(T1 *map1, T2 value, T3 *map, int count, int op)
+{
+  int i;
+
+  switch(op)
+  {
+  case IM_BIN_ADD:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)add_op((T2)map1[i], value);
+    break;
+  case IM_BIN_SUB:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)sub_op((T2)map1[i], value);
+    break;
+  case IM_BIN_MUL:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)mul_op((T2)map1[i], value);
+    break;
+  case IM_BIN_DIV:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)div_op((T2)map1[i], value);
+    break;
+  case IM_BIN_DIFF:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)diff_op((T2)map1[i], value);
+    break;
+  case IM_BIN_MIN:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)min_op((T2)map1[i], value);
+    break;
+  case IM_BIN_MAX:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)max_op((T2)map1[i], value);
+    break;
+  case IM_BIN_POW:
+    for (i = 0; i < count; i++)
+      map[i] = (T3)pow_op((T2)map1[i], value);
+    break;
+  }
+}
+
+void imProcessArithmeticConstOp(const imImage* src_image1, float value, imImage* dst_image, int op)
+{
+  int count = src_image1->count;
+
+  for (int i = 0; i < src_image1->depth; i++)
+  {
+    switch(src_image1->data_type)
+    {
+    case IM_BYTE:
+      if (dst_image->data_type == IM_FLOAT)
+        DoBinaryConstOp((imbyte*)src_image1->data[i], (float)value, (float*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_USHORT)
+        DoBinaryConstOp((imbyte*)src_image1->data[i], (imushort)value, (imushort*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_INT)
+        DoBinaryConstOp((imbyte*)src_image1->data[i], (int)value, (int*)dst_image->data[i], count, op);
+      else
+        DoBinaryConstOp((imbyte*)src_image1->data[i], (imushort)value, (imbyte*)dst_image->data[i], count, op);
+      break;
+    case IM_USHORT:
+      if (dst_image->data_type == IM_FLOAT)
+        DoBinaryConstOp((imushort*)src_image1->data[i], (float)value, (float*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_INT)
+        DoBinaryConstOp((imushort*)src_image1->data[i], (int)value, (int*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_BYTE)
+        DoBinaryConstOp((imushort*)src_image1->data[i], (imushort)value, (imbyte*)dst_image->data[i], count, op);
+      else
+        DoBinaryConstOp((imushort*)src_image1->data[i], (imushort)value, (imushort*)dst_image->data[i], count, op);
+      break;
+    case IM_INT:
+      if (dst_image->data_type == IM_FLOAT)
+        DoBinaryConstOp((int*)src_image1->data[i], (float)value, (float*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_USHORT)
+        DoBinaryConstOp((int*)src_image1->data[i], (int)value, (imushort*)dst_image->data[i], count, op);
+      else if (dst_image->data_type == IM_BYTE)
+        DoBinaryConstOp((int*)src_image1->data[i], (int)value, (imbyte*)dst_image->data[i], count, op);
+      else
+        DoBinaryConstOp((int*)src_image1->data[i], (int)value, (int*)dst_image->data[i], count, op);
+      break;
+    case IM_FLOAT:
+      DoBinaryConstOp((float*)src_image1->data[i], (float)value, (float*)dst_image->data[i], count, op);
+      break;
+    case IM_CFLOAT:
+      DoBinaryConstOpCpxReal((imcfloat*)src_image1->data[i], (float)value, (imcfloat*)dst_image->data[i], count, op);
+      break;
+    }
+  }
+}
+
+void imProcessMultipleMean(const imImage** src_image_list, int src_image_count, imImage* dst_image)
+{
+  const imImage* image1 = src_image_list[0];
+
+  int data_type = image1->data_type;
+  if (image1->data_type == IM_BYTE)
+    data_type = IM_USHORT;
+
+  imImage *acum_image = imImageCreate(image1->width, image1->height, image1->color_space, data_type);
+  if (!acum_image)
+    return;
+
+  for(int i = 0; i < src_image_count; i++)
+  {
+    const imImage *image = src_image_list[i];
+    imProcessUnArithmeticOp(image, acum_image, IM_UN_INC);
+  }
+
+  imProcessArithmeticConstOp(acum_image, float(src_image_count), dst_image, IM_BIN_DIV);
+
+  imImageDestroy(acum_image);
+}
+
+void imProcessMultipleStdDev(const imImage** src_image_list, int src_image_count, const imImage *mean_image, imImage* dst_image)
+{
+  imImage* aux_image = imImageClone(dst_image);
+  if (!aux_image)
+    return;
+
+  // sdtdev = sqrt( sum(sqr(x - m)) / N)
+
+  // a = sum(sqr(x - m))
+  for(int i = 0; i < src_image_count; i++)
+  {
+    // aux_image = image - mean_image
+    imProcessArithmeticOp(src_image_list[i], mean_image, aux_image, IM_BIN_SUB);
+
+    // aux_image = aux_image * aux_image
+    imProcessUnArithmeticOp(aux_image, aux_image, IM_UN_SQR);
+
+    // dst_image += aux_image
+    imProcessUnArithmeticOp(aux_image, dst_image, IM_UN_INC);
+  }
+
+  // dst_image = dst_image / src_image_count;
+  imProcessArithmeticConstOp(dst_image, float(src_image_count), dst_image, IM_BIN_DIV);
+
+  // dst_image = sqrt(dst_image);
+  imProcessUnArithmeticOp(dst_image, dst_image, IM_UN_SQRT);
+
+  imImageDestroy(aux_image);
+}
+
+template <class DT> 
+static float AutoCovCalc(int width, int height, DT *src_map, DT *mean_map, int x, int y, float count)
+{
+  float value = 0;
+  int ni = height - y;
+  int nj = width - x;
+  int offset, offset1;
+  int next = width*y + x;
+
+  for (int i = 0; i < ni; i++)
+  {
+    for (int j = 0; j < nj; j++)
+    {
+      offset = width*i + j;
+      offset1 = offset + next;
+      value += float(src_map[offset] - mean_map[offset]) * float(src_map[offset1] - mean_map[offset1]);
+    }
+  }
+
+  return (value/count);
+}
+
+template <class DT> 
+static int AutoCov(int width, int height, DT *src_map, DT *mean_map, float *dst_map, int counter)
+{
+  int count = width*height;
+
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      *dst_map = AutoCovCalc(width, height, src_map, mean_map, x, y, (float)count);
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+int imProcessAutoCovariance(const imImage* image, const imImage* mean_image, imImage* dst_image)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("Auto Convariance");
+  imCounterTotal(counter, image->depth*image->height, "Processing...");
+
+  for (int i = 0; i < image->depth; i++)
+  {
+    switch(image->data_type)
+    {
+    case IM_BYTE:
+      ret = AutoCov(image->width, image->height, (imbyte*)image->data[i], (imbyte*)mean_image->data[i], (float*)dst_image->data[i], counter);
+      break;
+    case IM_USHORT:
+      ret = AutoCov(image->width, image->height, (imushort*)image->data[i], (imushort*)mean_image->data[i], (float*)dst_image->data[i], counter);
+      break;
+    case IM_INT:
+      ret = AutoCov(image->width, image->height, (int*)image->data[i], (int*)mean_image->data[i], (float*)dst_image->data[i], counter);
+      break;
+    case IM_FLOAT:
+      ret = AutoCov(image->width, image->height, (float*)image->data[i], (float*)mean_image->data[i], (float*)dst_image->data[i], counter);
+      break;
+    }
+
+    if (!ret)
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+void imProcessMultiplyConj(const imImage* image1, const imImage* image2, imImage* NewImage)
+{
+  int total_count = image1->count*image1->depth;
+
+  imcfloat* map = (imcfloat*)NewImage->data[0];
+  imcfloat* map1 = (imcfloat*)image1->data[0];
+  imcfloat* map2 = (imcfloat*)image2->data[0];
+  imcfloat tmp; // this will allow an in-place operation
+
+  for (int i = 0; i < total_count; i++)
+  {
+    tmp.real = map1->real * map2->real + map1->imag * map2->imag; 
+    tmp.imag = map1->real * map2->imag - map1->imag * map2->real;
+    *map = tmp;
+
+    map++;
+    map1++;
+    map2++;
+  }
+}
diff --git a/src/process/im_arithmetic_un.cpp b/src/process/im_arithmetic_un.cpp
new file mode 100644
index 0000000..59e384c
--- /dev/null
+++ b/src/process/im_arithmetic_un.cpp
@@ -0,0 +1,210 @@
+/** \file
+ * \brief Unary Arithmetic Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_arithmetic_un.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+#include <im_complex.h>
+
+#include "im_process_pon.h"
+#include "im_math_op.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+// Fake complex operations for real types
+static inline imbyte conj_op(const imbyte& v) {return v;}
+static inline imushort conj_op(const imushort& v) {return v;}
+static inline int conj_op(const int& v) {return v;}
+static inline float conj_op(const float& v) {return v;}
+static inline imbyte cpxnorm_op(const imbyte& v) {return v;}
+static inline imushort cpxnorm_op(const imushort& v) {return v;}
+static inline int cpxnorm_op(const int& v) {return v;}
+static inline float cpxnorm_op(const float& v) {return v;}
+
+static inline imcfloat conj_op(const imcfloat& v)
+{
+  imcfloat r;
+  r.real = v.real;
+  r.imag = -v.imag;
+  return r;
+}
+
+static inline imcfloat cpxnorm_op(const imcfloat& v)
+{
+  imcfloat r;
+  float rmag = cpxmag(v);
+  if (rmag != 0.0f)
+  {
+    r.real = v.real/rmag;
+    r.imag = v.imag/rmag;
+  }
+  else
+  {
+    r.real = 0.0f;
+    r.imag = 0.0f;
+  }
+  return r;
+}
+
+template <class T1, class T2> 
+static void DoUnaryOp(T1 *map, T2 *new_map, int count, int op)
+{
+  int i;
+
+  switch(op)
+  {
+  case IM_UN_ABS:
+    for (i = 0; i < count; i++)
+      new_map[i] = abs_op((T2)map[i]);
+    break;
+  case IM_UN_INV:
+    for (i = 0; i < count; i++)
+      new_map[i] = inv_op((T2)map[i]);
+    break;
+  case IM_UN_EQL:
+    for (i = 0; i < count; i++)
+      new_map[i] = (T2)map[i];
+    break;
+  case IM_UN_INC:
+    for (i = 0; i < count; i++)
+      new_map[i] = (T2)(new_map[i] + map[i]);
+    break;
+  case IM_UN_LESS:
+    for (i = 0; i < count; i++)
+      new_map[i] = less_op((T2)map[i]);
+    break;
+  case IM_UN_SQR:
+    for (i = 0; i < count; i++)
+      new_map[i] = sqr_op((T2)map[i]);
+    break;
+  case IM_UN_SQRT:
+    for (i = 0; i < count; i++)
+      new_map[i] = (T2)sqrt_op(map[i]);
+    break;
+  case IM_UN_LOG:
+    for (i = 0; i < count; i++)
+      new_map[i] = log_op((T2)map[i]);
+    break;
+  case IM_UN_SIN:
+    for (i = 0; i < count; i++)
+      new_map[i] = sin_op((T2)map[i]);
+    break;
+  case IM_UN_COS:
+    for (i = 0; i < count; i++)
+      new_map[i] = cos_op((T2)map[i]);
+    break;
+  case IM_UN_EXP:
+    for (i = 0; i < count; i++)
+      new_map[i] = exp_op((T2)map[i]);
+    break;
+  case IM_UN_CONJ:
+    for (i = 0; i < count; i++)
+      new_map[i] = conj_op((T2)map[i]);
+    break;
+  case IM_UN_CPXNORM:
+    for (i = 0; i < count; i++)
+      new_map[i] = cpxnorm_op((T2)map[i]);
+    break;
+  }
+}
+
+void imProcessUnArithmeticOp(const imImage* src_image, imImage* dst_image, int op)
+{
+  int total_count = src_image->count * src_image->depth;
+
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    if (dst_image->data_type == IM_FLOAT)
+      DoUnaryOp((imbyte*)src_image->data[0], (float*)dst_image->data[0], total_count, op);
+    else if (dst_image->data_type == IM_INT)
+      DoUnaryOp((imbyte*)src_image->data[0], (int*)dst_image->data[0], total_count, op);
+    else if (dst_image->data_type == IM_USHORT)
+      DoUnaryOp((imbyte*)src_image->data[0], (imushort*)dst_image->data[0], total_count, op);
+    else
+      DoUnaryOp((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], total_count, op);
+    break;                                                                                
+  case IM_USHORT:
+    if (dst_image->data_type == IM_BYTE)
+      DoUnaryOp((imushort*)src_image->data[0], (imbyte*)dst_image->data[0], total_count, op);
+    else if (dst_image->data_type == IM_INT)
+      DoUnaryOp((imushort*)src_image->data[0], (int*)dst_image->data[0], total_count, op);
+    else if (dst_image->data_type == IM_FLOAT)
+      DoUnaryOp((imushort*)src_image->data[0], (float*)dst_image->data[0], total_count, op);
+    else
+      DoUnaryOp((imushort*)src_image->data[0], (imushort*)dst_image->data[0], total_count, op);
+    break;                                                                                
+  case IM_INT:                                                                           
+    if (dst_image->data_type == IM_BYTE)
+      DoUnaryOp((int*)src_image->data[0], (imbyte*)dst_image->data[0], total_count, op);
+    else if (dst_image->data_type == IM_USHORT)
+      DoUnaryOp((int*)src_image->data[0], (imushort*)dst_image->data[0], total_count, op);
+    else if (dst_image->data_type == IM_FLOAT)
+      DoUnaryOp((int*)src_image->data[0], (float*)dst_image->data[0], total_count, op);
+    else
+      DoUnaryOp((int*)src_image->data[0], (int*)dst_image->data[0], total_count, op);
+    break;                                                                                
+  case IM_FLOAT:                                                                           
+    DoUnaryOp((float*)src_image->data[0], (float*)dst_image->data[0], total_count, op);
+    break;                                                                                
+  case IM_CFLOAT:            
+    DoUnaryOp((imcfloat*)src_image->data[0], (imcfloat*)dst_image->data[0], total_count, op);
+    break;
+  }
+}
+
+void imProcessSplitComplex(const imImage* image, imImage* NewImage1, imImage* NewImage2, int polar)
+{
+  int total_count = image->count*image->depth;
+
+  imcfloat* map = (imcfloat*)image->data[0];
+  float* map1 = (float*)NewImage1->data[0];
+  float* map2 = (float*)NewImage2->data[0];
+
+  for (int i = 0; i < total_count; i++)
+  {
+    if (polar)
+    {
+      map1[i] = cpxmag(map[i]);
+      map2[i] = cpxphase(map[i]);
+    }
+    else
+    {
+      map1[i] = map[i].real;
+      map2[i] = map[i].imag;
+    }
+  }
+}
+                  
+void imProcessMergeComplex(const imImage* image1, const imImage* image2, imImage* NewImage, int polar)
+{
+  int total_count = image1->count*image1->depth;
+
+  imcfloat* map = (imcfloat*)NewImage->data[0];
+  float* map1 = (float*)image1->data[0];
+  float* map2 = (float*)image2->data[0];
+
+  for (int i = 0; i < total_count; i++)
+  {
+    if (polar)
+    {
+      float phase = map2[i];
+      if (phase > 180) phase -= 360;   
+      phase /= 57.2957795f;
+
+      map[i].real = (float)(map1[i] * cos(phase));
+      map[i].imag = (float)(map1[i] * sin(phase));
+    }
+    else
+    {
+      map[i].real = map1[i];
+      map[i].imag = map2[i];
+    }
+  }
+}
diff --git a/src/process/im_canny.cpp b/src/process/im_canny.cpp
new file mode 100644
index 0000000..d749fc0
--- /dev/null
+++ b/src/process/im_canny.cpp
@@ -0,0 +1,254 @@
+/** \file
+ * \brief Canny Edge Detector
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_canny.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+#include <im.h>
+
+#include "im_process_loc.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <memory.h>
+
+/* Scale floating point magnitudes to 8 bits */
+static float MAG_SCALE;
+
+/* Biggest possible filter mask */
+#define MAX_MASK_SIZE 100
+
+static float ** f2d (int nr, int nc);
+static float gauss(float x, float sigma);
+static float dGauss (float x, float sigma);
+static float meanGauss (float x, float sigma);
+static void seperable_convolution (const imImage* im, float *gau, int width, float **smx, float **smy);
+static void dxy_seperable_convolution (float** im, int nr, int nc, float *gau, int width, float **sm, int which);
+static void nonmax_suppress (float **dx, float **dy, imImage* mag);
+
+void imProcessCanny(const imImage* im, imImage* NewImage, float stddev)
+{
+  int width = 1;
+  float **smx,**smy;
+  float **dx,**dy;
+  int i;
+  float gau[MAX_MASK_SIZE], dgau[MAX_MASK_SIZE];
+
+/* Create a Gaussian and a derivative of Gaussian filter mask */
+  for(i=0; i<MAX_MASK_SIZE; i++)
+  {
+    gau[i] = meanGauss ((float)i, stddev);
+    if (gau[i] < 0.005)
+    {
+      width = i;
+      break;
+    }
+    dgau[i] = dGauss ((float)i, stddev);
+  }
+
+  smx = f2d (im->height, im->width);
+  smy = f2d (im->height, im->width);
+
+/* Convolution of source image with a Gaussian in X and Y directions  */
+  seperable_convolution (im, gau, width, smx, smy);
+
+  MAG_SCALE = 0;
+
+/* Now convolve smoothed data with a derivative */
+  dx = f2d (im->height, im->width);
+  dxy_seperable_convolution (smx, im->height, im->width, dgau, width, dx, 1);
+  free(smx[0]); free(smx);
+
+  dy = f2d (im->height, im->width);
+  dxy_seperable_convolution (smy, im->height, im->width, dgau, width, dy, 0);
+  free(smy[0]); free(smy);
+
+  if (MAG_SCALE)
+    MAG_SCALE = 255.0f/(1.4142f*MAG_SCALE);
+
+  /* Non-maximum suppression - edge pixels should be a local max */
+  nonmax_suppress (dx, dy, NewImage);
+
+  free(dx[0]); free(dx);
+  free(dy[0]); free(dy);
+}
+
+static float norm (float x, float y)
+{
+  return (float) sqrt ( (double)(x*x + y*y) );
+}
+
+static float ** f2d (int nr, int nc)
+{
+  float **x, *y;
+  int i;
+
+  x = (float **)calloc ( nr, sizeof (float *) );
+  if (!x)
+    return NULL;
+
+  y = (float *)calloc ( nr*nc, sizeof (float) );
+  if (!y)
+    return NULL;
+
+  for (i=0; i<nr; i++)
+  {  
+    x[i] = y + i*nc;
+  }
+
+  return x;
+}
+
+/*      Gaussian        */
+static float gauss(float x, float sigma)
+{
+  return (float)exp((double) ((-x*x)/(2*sigma*sigma)));
+}
+
+static float meanGauss (float x, float sigma)
+{
+  float z;
+  z = (gauss(x,sigma)+gauss(x+0.5f,sigma)+gauss(x-0.5f,sigma))/3.0f;
+//  z = z/(3.1415f*2.0f*sigma*sigma);
+  return z;
+}
+
+/*      First derivative of Gaussian    */
+static float dGauss (float x, float sigma)
+{
+//  return -x/(sigma*sigma) * gauss(x, sigma);
+  return -x * gauss(x, sigma);
+}
+
+static void seperable_convolution (const imImage* im, float *gau, int width, float **smx, float **smy)
+{
+  int i,j,k, I1, I2, nr, nc;
+  float x, y;
+  unsigned char* im_data = (unsigned char*)im->data[0];
+
+  nr = im->height;
+  nc = im->width;
+
+  for (i=0; i<nr; i++)
+  {
+    for (j=0; j<nc; j++)
+    {
+      x = gau[0] * im_data[i*im->width + j]; y = gau[0] * im_data[i*im->width + j];
+      for (k=1; k<width; k++)
+      {
+        I1 = (i+k)%nr; I2 = (i-k+nr)%nr;
+        y += gau[k]*im_data[I1*im->width + j] + gau[k]*im_data[I2*im->width + j];
+        I1 = (j+k)%nc; I2 = (j-k+nc)%nc;
+        x += gau[k]*im_data[i*im->width + I1] + gau[k]*im_data[i*im->width + I2];
+      }
+      smx[i][j] = x; smy[i][j] = y;
+    }
+  }
+}
+
+static void dxy_seperable_convolution (float** im, int nr, int nc,  float *gau, int width, float **sm, int which)
+{
+  int i,j,k, I1, I2;
+  float x;
+
+  for (i=0; i<nr; i++)
+  {
+    for (j=0; j<nc; j++)
+    {
+      x = 0.0;
+      for (k=1; k<width; k++)
+      {
+        if (which == 0)
+        {
+          I1 = (i+k)%nr; I2 = (i-k+nr)%nr;
+          x += -gau[k]*im[I1][j] + gau[k]*im[I2][j];
+        }
+        else
+        {
+          I1 = (j+k)%nc; I2 = (j-k+nc)%nc;
+          x += -gau[k]*im[i][I1] + gau[k]*im[i][I2];
+        }
+      }
+      sm[i][j] = x;
+
+      if (x > MAG_SCALE)
+        MAG_SCALE = x;
+    }
+  }
+}
+
+static unsigned char tobyte(float x)
+{
+  if (x > 255) return 255;
+  return (unsigned char)x;
+}
+
+static void nonmax_suppress (float **dx, float **dy, imImage* mag)
+{
+  int i,j;
+  float xx, yy, g2, g1, g3, g4, g, xc, yc;
+  unsigned char* mag_data = (unsigned char*)mag->data[0];
+
+  for (i=1; i<mag->height-1; i++)
+  {
+    for (j=1; j<mag->width-1; j++)
+    {
+      /* Treat the x and y derivatives as components of a vector */
+      xc = dx[i][j];
+      yc = dy[i][j];
+      if (fabs(xc)<0.01 && fabs(yc)<0.01) continue;
+
+      g  = norm (xc, yc);
+
+      /* Follow the gradient direction, as indicated by the direction of
+        the vector (xc, yc); retain pixels that are a local maximum. */
+
+      if (fabs(yc) > fabs(xc))
+      {
+        /* The Y component is biggest, so gradient direction is basically UP/DOWN */
+        xx = (float)(fabs(xc)/fabs(yc));
+        yy = 1.0;
+
+        g2 = norm (dx[i-1][j], dy[i-1][j]);
+        g4 = norm (dx[i+1][j], dy[i+1][j]);
+        if (xc*yc > 0.0)
+        {
+          g3 = norm (dx[i+1][j+1], dy[i+1][j+1]);
+          g1 = norm (dx[i-1][j-1], dy[i-1][j-1]);
+        } 
+        else
+        {
+          g3 = norm (dx[i+1][j-1], dy[i+1][j-1]);
+          g1 = norm (dx[i-1][j+1], dy[i-1][j+1]);
+        }
+
+      } 
+      else
+      {
+        /* The X component is biggest, so gradient direction is basically LEFT/RIGHT */
+        xx = (float)(fabs(yc)/fabs(xc));
+        yy = 1.0;
+
+        g2 = norm (dx[i][j+1], dy[i][j+1]);
+        g4 = norm (dx[i][j-1], dy[i][j-1]);
+        if (xc*yc > 0.0)
+        {
+          g3 = norm (dx[i-1][j-1], dy[i-1][j-1]);
+          g1 = norm (dx[i+1][j+1], dy[i+1][j+1]);
+        }
+        else
+        {
+          g1 = norm (dx[i-1][j+1], dy[i-1][j+1]);
+          g3 = norm (dx[i+1][j-1], dy[i+1][j-1]);
+        }
+      }
+
+      /* Compute the interpolated value of the gradient magnitude */
+      if ( (g > (xx*g1 + (yy-xx)*g2)) && (g > (xx*g3 + (yy-xx)*g4)) )
+      {
+        mag_data[i*mag->width + j] = tobyte(g*MAG_SCALE);
+      } 
+    }
+  }
+}
diff --git a/src/process/im_color.cpp b/src/process/im_color.cpp
new file mode 100644
index 0000000..b27d4b3
--- /dev/null
+++ b/src/process/im_color.cpp
@@ -0,0 +1,255 @@
+/** \file
+ * \brief Color Processing Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_color.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+#include <im.h>
+#include <im_util.h>
+#include <im_colorhsi.h>
+#include <im_palette.h>
+
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+
+static void rgb2yrgb(imbyte* r, imbyte* g, imbyte* b, imbyte* y)
+{
+  int ri,gi,bi;
+
+  *y = (imbyte)((299*(*r) + 587*(*g) + 114*(*b)) / 1000);
+  ri = (*r) - (*y) + 128;
+  gi = (*g) - (*y) + 128;
+  bi = (*b) - (*y) + 128;
+
+  if (ri < 0) ri = 0;
+  if (gi < 0) gi = 0;
+  if (bi < 0) bi = 0;
+
+  *r = (imbyte)ri;
+  *g = (imbyte)gi;
+  *b = (imbyte)bi;
+}
+
+void imProcessSplitYChroma(const imImage* src_image, imImage* y_image, imImage* chroma_image)
+{
+  imbyte Y,
+    *red=(imbyte*)src_image->data[0],
+    *green=(imbyte*)src_image->data[1],
+    *blue=(imbyte*)src_image->data[2],
+    *red2=(imbyte*)chroma_image->data[0],
+    *green2=(imbyte*)chroma_image->data[1],
+    *blue2=(imbyte*)chroma_image->data[2],
+    *map1=(imbyte*)y_image->data[0];
+
+  for (int i = 0; i < src_image->count; i++)
+  {
+    imbyte R = red[i];
+    imbyte G = green[i];
+    imbyte B = blue[i];
+
+    rgb2yrgb(&R, &G, &B, &Y);
+
+    map1[i] = Y;
+
+    red2[i] = R;
+    green2[i] = G;
+    blue2[i] = B;
+  }
+}
+
+static void DoSplitHSIFloat(float** data, float* hue, float* saturation, float* intensity, int count)
+{
+  float *red=data[0],
+      *green=data[1],
+       *blue=data[2];
+
+  for (int i = 0; i < count; i++)
+  {
+    imColorRGB2HSI(red[i], green[i], blue[i], &hue[i], &saturation[i], &intensity[i]);
+  }
+}
+
+static void DoSplitHSIByte(imbyte** data, float* hue, float* saturation, float* intensity, int count)
+{
+  imbyte *red=data[0],
+       *green=data[1],
+        *blue=data[2];
+
+  for (int i = 0; i < count; i++)
+  {
+    imColorRGB2HSIbyte(red[i], green[i], blue[i], &hue[i], &saturation[i], &intensity[i]);
+  }
+}
+
+void imProcessSplitHSI(const imImage* image, imImage* image1, imImage* image2, imImage* image3)
+{
+  switch(image->data_type)
+  {
+  case IM_BYTE:
+    DoSplitHSIByte((imbyte**)image->data, (float*)image1->data[0], (float*)image2->data[0], (float*)image3->data[0], image->count);
+    break;                                                                                                                                    
+  case IM_FLOAT:                                                                                                                               
+    DoSplitHSIFloat((float**)image->data, (float*)image1->data[0], (float*)image2->data[0], (float*)image3->data[0], image->count);
+    break;                                                                                
+  }
+
+  imImageSetPalette(image1, imPaletteHues(), 256);
+}
+
+static void DoMergeHSIFloat(float** data, float* hue, float* saturation, float* intensity, int count)
+{
+  float *red=data[0],
+      *green=data[1],
+       *blue=data[2];
+
+  for (int i = 0; i < count; i++)
+  {
+    imColorHSI2RGB(hue[i], saturation[i], intensity[i], &red[i], &green[i], &blue[i]);
+  }
+}
+
+static void DoMergeHSIByte(imbyte** data, float* hue, float* saturation, float* intensity, int count)
+{
+  imbyte *red=data[0],
+       *green=data[1],
+        *blue=data[2];
+
+  for (int i = 0; i < count; i++)
+  {
+    imColorHSI2RGBbyte(hue[i], saturation[i], intensity[i], &red[i], &green[i], &blue[i]);
+  }
+}
+
+void imProcessMergeHSI(const imImage* image1, const imImage* image2, const imImage* image3, imImage* image)
+{
+  switch(image->data_type)
+  {
+  case IM_BYTE:
+    DoMergeHSIByte((imbyte**)image->data, (float*)image1->data[0], (float*)image2->data[0], (float*)image3->data[0], image->count);
+    break;                                                                                                                                    
+  case IM_FLOAT:                                                                                                                               
+    DoMergeHSIFloat((float**)image->data, (float*)image1->data[0], (float*)image2->data[0], (float*)image3->data[0], image->count);
+    break;                                                                                
+  }
+}
+
+void imProcessSplitComponents(const imImage* src_image, imImage** dst_image)
+{
+  memcpy(dst_image[0]->data[0], src_image->data[0], src_image->plane_size);
+  memcpy(dst_image[1]->data[0], src_image->data[1], src_image->plane_size);
+  memcpy(dst_image[2]->data[0], src_image->data[2], src_image->plane_size);
+  if (imColorModeDepth(src_image->color_space) == 4) 
+    memcpy(dst_image[3]->data[0], src_image->data[3], src_image->plane_size);
+}
+
+void imProcessMergeComponents(const imImage** src_image, imImage* dst_image)
+{
+  memcpy(dst_image->data[0], src_image[0]->data[0], dst_image->plane_size);
+  memcpy(dst_image->data[1], src_image[1]->data[0], dst_image->plane_size);
+  memcpy(dst_image->data[2], src_image[2]->data[0], dst_image->plane_size);
+  if (imColorModeDepth(dst_image->color_space) == 4) 
+    memcpy(dst_image->data[3], src_image[3]->data[0], dst_image->plane_size);
+}
+
+template <class T> 
+static void DoNormalizeComp(T** src_data, float** dst_data, int count, int depth)
+{
+  int d;
+  T* src_pdata[4];
+  float* dst_pdata[4];
+
+  for(d = 0; d < depth; d++)
+  {
+    dst_pdata[d] = dst_data[d];
+    src_pdata[d] = src_data[d];
+  }
+
+  for (int i = 0; i < count; i++)
+  {
+    float sum = 0;
+    for(d = 0; d < depth; d++)
+      sum += (float)*(src_pdata[d]);
+
+    for(d = 0; d < depth; d++)
+    {
+      if (sum == 0)
+        *(dst_pdata[d]) = 0;
+      else
+        *(dst_pdata[d]) = (float)*(src_pdata[d]) / sum;
+
+      dst_pdata[d]++;
+      src_pdata[d]++;
+    }
+  }
+}
+
+void imProcessNormalizeComponents(const imImage* src_image, imImage* dst_image)
+{
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    DoNormalizeComp((imbyte**)src_image->data, (float**)dst_image->data, src_image->count, src_image->depth);
+    break;                                                                                                                                    
+  case IM_USHORT:                                                                                                                               
+    DoNormalizeComp((imushort**)src_image->data,  (float**)dst_image->data, src_image->count, src_image->depth);
+    break;                                                                                                                                    
+  case IM_INT:                                                                                                                               
+    DoNormalizeComp((int**)src_image->data,  (float**)dst_image->data, src_image->count, src_image->depth);
+    break;                                                                                                                                    
+  case IM_FLOAT:                                                                                                                               
+    DoNormalizeComp((float**)src_image->data, (float**)dst_image->data, src_image->count, src_image->depth);
+    break;                                                                                
+  }
+}
+
+template <class T> 
+static void DoReplaceColor(T *src_data, T *dst_data, int width, int height, int depth, float* src_color, float* dst_color)
+{
+  int d, count = width*height;
+  for (int i = 0; i < count; i++)
+  {
+    int equal = 1;
+    for (d = 0; d < depth; d++)
+    {
+      if (*(src_data+d*count) != (T)src_color[d])
+      {
+        equal = 0;
+        break;
+      }
+    }
+
+    for (d = 0; d < depth; d++)
+    {
+      if (equal)
+        *(dst_data+d*count) = (T)dst_color[d];
+      else
+        *(dst_data+d*count) = *(src_data+d*count);
+    }
+
+    src_data++;
+    dst_data++;
+  }
+}
+
+void imProcessReplaceColor(const imImage* src_image, imImage* dst_image, float* src_color, float* dst_color)
+{
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    DoReplaceColor((imbyte*)src_image->data[0],   (imbyte*)dst_image->data[0],   src_image->width, src_image->height, src_image->depth, src_color, dst_color);
+    break;                                                                                                           
+  case IM_USHORT:                                                                                                      
+    DoReplaceColor((imushort*)src_image->data[0], (imushort*)dst_image->data[0], src_image->width, src_image->height, src_image->depth, src_color, dst_color);
+    break;                                                                                                           
+  case IM_INT:                                                                                                      
+    DoReplaceColor((int*)src_image->data[0],      (int*)dst_image->data[0],      src_image->width, src_image->height, src_image->depth, src_color, dst_color);
+    break;                                                                                                           
+  case IM_FLOAT:                                                                                                      
+    DoReplaceColor((float*)src_image->data[0],    (float*)dst_image->data[0],    src_image->width, src_image->height, src_image->depth, src_color, dst_color);
+    break;                                                                                
+  }
+}
diff --git a/src/process/im_convolve.cpp b/src/process/im_convolve.cpp
new file mode 100644
index 0000000..bca2dcd
--- /dev/null
+++ b/src/process/im_convolve.cpp
@@ -0,0 +1,1512 @@
+/** \file
+ * \brief Convolution Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_convolve.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_counter.h>
+#include <im_complex.h>
+#include <im_math_op.h>
+#include <im_image.h>
+#include <im_kernel.h>
+
+#include "im_process_loc.h"
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+/* Rotating Kernels
+3x3
+  6 7 8   7 8 5
+  3 4 5   6 4 2
+  0 1 2   3 0 1
+
+5x5
+  20 21 22 23 24   22 23 24 19 14
+  15 16 17 18 19   21 17 18 13  9
+  10 11 12 13 14   20 16 12  8  4
+   5  6  7  8  9   15 11  6  7  3
+   0  1  2  3  4   10  5  0  1  2
+
+7x7
+  42 43 44 45 46 47 48     45 46 47 48 41 34 27
+  35 36 37 38 39 40 41     44 38 39 40 33 26 20
+  28 29 30 31 32 33 34     43 37 31 32 25 19 13
+  21 22 23 24 25 26 27     42 36 30 24 18 12  6
+  14 15 16 17 18 19 20     35 29 23 16 17 11  5
+   7  8  9 10 11 12 13     28 22 15  8  9 10  4
+   0  1  2  3  4  5  6     21 14  7  0  1  2  3
+
+ TO DO: a generic odd rotation function...
+*/
+
+template <class KT> 
+static void iRotateKernel(KT* kernel_map, int kernel_size)
+{
+  KT temp;
+
+  switch (kernel_size)
+  {
+  case 3:
+    {
+      temp = kernel_map[0];
+      kernel_map[0] = kernel_map[3];
+      kernel_map[3] = kernel_map[6];
+      kernel_map[6] = kernel_map[7];
+      kernel_map[7] = kernel_map[8];
+      kernel_map[8] = kernel_map[5];
+      kernel_map[5] = kernel_map[2];
+      kernel_map[2] = kernel_map[1];
+      kernel_map[1] = temp;
+    }
+    break;
+  case 5:
+    {
+      temp = kernel_map[0];
+      kernel_map[0] = kernel_map[10];
+      kernel_map[10] = kernel_map[20];
+      kernel_map[20] = kernel_map[22];
+      kernel_map[22] = kernel_map[24];
+      kernel_map[24] = kernel_map[14];
+      kernel_map[14] = kernel_map[4];
+      kernel_map[4] = kernel_map[2];
+      kernel_map[2] = temp;
+
+      temp = kernel_map[5];
+      kernel_map[5] = kernel_map[15];
+      kernel_map[15] = kernel_map[21];
+      kernel_map[21] = kernel_map[23];
+      kernel_map[23] = kernel_map[19];
+      kernel_map[19] = kernel_map[9];
+      kernel_map[9] = kernel_map[3];
+      kernel_map[3] = kernel_map[1];
+      kernel_map[1] = temp;
+
+      temp = kernel_map[6];
+      kernel_map[6] = kernel_map[11];
+      kernel_map[11] = kernel_map[16];
+      kernel_map[16] = kernel_map[17];
+      kernel_map[17] = kernel_map[18];
+      kernel_map[18] = kernel_map[13];
+      kernel_map[13] = kernel_map[8];
+      kernel_map[8] = kernel_map[7];
+      kernel_map[7] = temp;
+    }
+    break;
+  case 7:
+    {
+      temp = kernel_map[2];
+      kernel_map[2] = kernel_map[7];
+      kernel_map[7] = kernel_map[28];
+      kernel_map[28] = kernel_map[43];
+      kernel_map[43] = kernel_map[46];
+      kernel_map[46] = kernel_map[41];
+      kernel_map[41] = kernel_map[20];
+      kernel_map[20] = kernel_map[5];
+      kernel_map[5] = temp;
+
+      temp = kernel_map[1];
+      kernel_map[1] = kernel_map[14];
+      kernel_map[14] = kernel_map[35];
+      kernel_map[35] = kernel_map[44];
+      kernel_map[44] = kernel_map[47];
+      kernel_map[47] = kernel_map[34];
+      kernel_map[34] = kernel_map[13];
+      kernel_map[13] = kernel_map[4];
+      kernel_map[4] = temp;
+
+      temp = kernel_map[0];
+      kernel_map[0] = kernel_map[21];
+      kernel_map[21] = kernel_map[42];
+      kernel_map[42] = kernel_map[45];
+      kernel_map[45] = kernel_map[48];
+      kernel_map[48] = kernel_map[27];
+      kernel_map[27] = kernel_map[6];
+      kernel_map[6] = kernel_map[3];
+      kernel_map[3] = temp;
+
+      temp = kernel_map[9];
+      kernel_map[9] = kernel_map[15];
+      kernel_map[15] = kernel_map[29];
+      kernel_map[29] = kernel_map[37];
+      kernel_map[37] = kernel_map[39];
+      kernel_map[39] = kernel_map[33];
+      kernel_map[33] = kernel_map[19];
+      kernel_map[19] = kernel_map[11];
+      kernel_map[11] = temp;
+
+      temp = kernel_map[8];
+      kernel_map[8] = kernel_map[22];
+      kernel_map[22] = kernel_map[36];
+      kernel_map[36] = kernel_map[38];
+      kernel_map[38] = kernel_map[40];
+      kernel_map[40] = kernel_map[26];
+      kernel_map[26] = kernel_map[12];
+      kernel_map[12] = kernel_map[10];
+      kernel_map[10] = temp;
+
+      temp = kernel_map[16];
+      kernel_map[16] = kernel_map[23];
+      kernel_map[23] = kernel_map[30];
+      kernel_map[30] = kernel_map[31];
+      kernel_map[31] = kernel_map[32];
+      kernel_map[32] = kernel_map[25];
+      kernel_map[25] = kernel_map[18];
+      kernel_map[18] = kernel_map[17];
+      kernel_map[17] = temp;
+    }
+    break;
+  }
+}
+
+void imProcessRotateKernel(imImage* kernel)
+{
+  if (kernel->data_type == IM_INT)
+    iRotateKernel((int*)kernel->data[0], kernel->width);
+  else
+    iRotateKernel((float*)kernel->data[0], kernel->width);
+}
+
+template <class T, class KT, class CT> 
+static int DoCompassConvolve(T* map, T* new_map, int width, int height, KT* orig_kernel_map, int kernel_size, int counter, CT)
+{
+  CT value;
+  KT total, *kernel_line, kvalue;
+  int offset, new_offset, i, j, x, y, kcount;
+
+  // duplicate the kernel data so we can rotate it
+  kcount = kernel_size*kernel_size;
+  KT* kernel_map = (KT*)malloc(kcount*sizeof(KT));
+
+  int ks2 = kernel_size/2;
+
+  total = 0;
+  for(j = 0; j < kcount; j++) 
+  {
+    kvalue = orig_kernel_map[j];
+    kernel_map[j] = kvalue;
+    total += kvalue;
+  }
+
+  if (total == 0)
+    total = 1;
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      CT max_value = 0;
+
+      for(int k = 0; k < 8; k++) // Rotate 8 times
+      {
+        value = 0;
+      
+        for(y = -ks2; y <= ks2; y++)
+        {
+          kernel_line = kernel_map + (y+ks2)*kernel_size;
+
+          if (j + y < 0)             // pass the bottom border
+            offset = -(y + j + 1) * width;
+          else if (j + y >= height)  // pass the top border
+            offset = (2*height - 1 - (j + y)) * width;
+          else
+            offset = (j + y) * width;
+
+          for(x = -ks2; x <= ks2; x++)
+          {
+            if (i + x < 0)            // pass the left border
+              value += kernel_line[x+ks2] * map[offset - (i + x + 1)];
+            else if (i + x >= width)  // pass the right border
+              value += kernel_line[x+ks2] * map[offset + 2*width - 1 - (i + x)];
+            else if (offset != -1)
+              value += kernel_line[x+ks2] * map[offset + (i + x)];
+          }
+        }
+
+        if (abs_op(value) > max_value)
+          max_value = abs_op(value);
+
+        iRotateKernel(kernel_map, kernel_size);
+      }  
+
+      max_value /= total;
+
+      int size_of = sizeof(imbyte);
+      if (sizeof(T) == size_of)
+        new_map[new_offset + i] = (T)IM_BYTECROP(max_value);
+      else
+        new_map[new_offset + i] = (T)max_value;
+    }    
+
+    if (!imCounterInc(counter))
+    {
+      free(kernel_map);
+      return 0;
+    }
+  }
+
+  free(kernel_map);
+  return 1;
+}
+
+int imProcessCompassConvolve(const imImage* src_image, imImage* dst_image, imImage *kernel)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("Compass Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel, "Description", NULL, NULL);
+  if (!msg) msg = "Filtering...";
+  imCounterTotal(counter, src_image->depth*src_image->height, msg);
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      if (kernel->data_type == IM_INT)
+        ret = DoCompassConvolve((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, counter, (int)0);
+      else
+        ret = DoCompassConvolve((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, counter, (float)0);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoCompassConvolve((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, counter, (int)0);
+      else
+        ret = DoCompassConvolve((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, counter, (float)0);
+      break;                                                                                
+    case IM_INT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoCompassConvolve((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, counter, (int)0);
+      else
+        ret = DoCompassConvolve((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, counter, (float)0);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoCompassConvolve((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, counter, (float)0);
+      else
+        ret = DoCompassConvolve((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, counter, (float)0);
+      break;                                                                                
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+template <class T, class KT, class CT> 
+static int DoConvolveDual(T* map, T* new_map, int width, int height, KT* kernel_map1, KT* kernel_map2, int kernel_width, int kernel_height, int counter, CT)
+{
+  CT value1, value2, value;
+  KT total1, total2, *kernel_line;
+  int offset, new_offset, i, j, x, y;
+
+  int kh2 = kernel_height/2;
+  int kw2 = kernel_width/2;
+
+  if (kernel_height % 2 == 0) kh2--;
+  if (kernel_width % 2 == 0) kw2--;
+
+  total1 = 0;
+  for(j = 0; j < kernel_height; j++) 
+  {
+    for(i = 0; i < kernel_width; i++)
+      total1 += kernel_map1[j*kernel_width + i];
+  }
+
+  if (total1 == 0)
+    total1 = 1;
+
+  total2 = 0;
+  for(j = 0; j < kernel_height; j++) 
+  {
+    for(i = 0; i < kernel_width; i++)
+      total2 += kernel_map2[j*kernel_width + i];
+  }
+
+  if (total2 == 0)
+    total2 = 1;
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      value1 = 0;
+      value2 = 0;
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        if (j + y < 0)             // pass the bottom border
+          offset = -(y + j + 1) * width;
+        else if (j + y >= height)  // pass the top border
+          offset = (2*height - 1 - (j + y)) * width;
+        else
+          offset = (j + y) * width;
+
+        kernel_line = kernel_map1 + (y+kh2)*kernel_width;
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (i + x < 0)            // pass the left border
+            value1 += kernel_line[x+kw2] * map[offset - (i + x + 1)];
+          else if (i + x >= width)  // pass the right border
+            value1 += kernel_line[x+kw2] * map[offset + 2*width - 1 - (i + x)];
+          else if (offset != -1)
+            value1 += kernel_line[x+kw2] * map[offset + (i + x)];
+        }
+
+        kernel_line = kernel_map2 + (y+kh2)*kernel_width;
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (i + x < 0)            // pass the left border
+            value2 += kernel_line[x+kw2] * map[offset - (i + x + 1)];
+          else if (i + x >= width)  // pass the right border
+            value2 += kernel_line[x+kw2] * map[offset + 2*width - 1 - (i + x)];
+          else if (offset != -1)
+            value2 += kernel_line[x+kw2] * map[offset + (i + x)];
+        }
+      }
+      
+      value1 /= total1;
+      value2 /= total2;
+
+      value = (CT)sqrt((double)(value1*value1 + value2*value2));
+
+      int size_of = sizeof(imbyte);
+      if (sizeof(T) == size_of)
+        new_map[new_offset + i] = (T)IM_BYTECROP(value);
+      else
+        new_map[new_offset + i] = (T)value;
+    }    
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+template <class KT> 
+static int DoConvolveDualCpx(imcfloat* map, imcfloat* new_map, int width, int height, KT* kernel_map1, KT* kernel_map2, int kernel_width, int kernel_height, int counter)
+{
+  imcfloat value1, value2;
+  KT total1, total2, *kernel_line;
+  int offset, new_offset, i, j, x, y;
+
+  int kh2 = kernel_height/2;
+  int kw2 = kernel_width/2;
+
+  if (kernel_height % 2 == 0) kh2--;
+  if (kernel_width % 2 == 0) kw2--;
+
+  total1 = 0;
+  for(j = 0; j < kernel_height; j++) 
+  {
+    for(i = 0; i < kernel_width; i++)
+      total1 += kernel_map1[j*kernel_width + i];
+  }
+
+  if (total1 == 0)
+    total1 = 1;
+
+  total2 = 0;
+  for(j = 0; j < kernel_height; j++) 
+  {
+    for(i = 0; i < kernel_width; i++)
+      total2 += kernel_map1[j*kernel_width + i];
+  }
+
+  if (total2 == 0)
+    total2 = 1;
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      value1 = 0;
+      value2 = 0;
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        if (j + y < 0)             // pass the bottom border
+          offset = -(y + j + 1) * width;
+        else if (j + y >= height)  // pass the top border
+          offset = (2*height - 1 - (j + y)) * width;
+        else
+          offset = (j + y) * width;
+
+        kernel_line = kernel_map1 + (y+kh2)*kernel_width;
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (i + x < 0)            // pass the left border
+            value1 += map[offset - (i + x + 1)] * (float)kernel_line[x+kw2];
+          else if (i + x >= width)  // pass the right border
+            value1 += map[offset + 2*width - 1 - (i + x)] * (float)kernel_line[x+kw2];
+          else if (offset != -1)
+            value1 += map[offset + (i + x)] * (float)kernel_line[x+kw2];
+        }
+
+        kernel_line = kernel_map2 + (y+kh2)*kernel_width;
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (i + x < 0)            // pass the left border
+            value2 += map[offset - (i + x + 1)] * (float)kernel_line[x+kw2];
+          else if (i + x >= width)  // pass the right border
+            value2 += map[offset + 2*width - 1 - (i + x)] * (float)kernel_line[x+kw2];
+          else if (offset != -1)
+            value2 += map[offset + (i + x)] * (float)kernel_line[x+kw2];
+        }
+      }
+      
+      value1 /= (float)total1;
+      value2 /= (float)total2;
+
+      new_map[new_offset + i] = sqrt(value1*value1 + value2*value2);
+    }    
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+int imProcessConvolveDual(const imImage* src_image, imImage* dst_image, const imImage *kernel1, const imImage *kernel2)
+{
+  int counter = imCounterBegin("Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel1, "Description", NULL, NULL);
+  if (!msg) msg = "Filtering...";
+  imCounterTotal(counter, src_image->depth*src_image->height, msg);
+
+  int ret = 0;
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      if (kernel1->data_type == IM_INT)
+        ret = DoConvolveDual((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel1->data[0], (int*)kernel2->data[0], kernel1->width, kernel1->height, counter, (int)0);
+      else
+        ret = DoConvolveDual((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel1->data[0], (float*)kernel2->data[0], kernel1->width, kernel1->height, counter, (float)0);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      if (kernel1->data_type == IM_INT)
+        ret = DoConvolveDual((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel1->data[0], (int*)kernel2->data[0], kernel1->width, kernel1->height, counter, (int)0);
+      else
+        ret = DoConvolveDual((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel1->data[0], (float*)kernel2->data[0], kernel1->width, kernel1->height, counter, (float)0);
+      break;                                                                                
+    case IM_INT:                                                                           
+      if (kernel1->data_type == IM_INT)
+        ret = DoConvolveDual((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel1->data[0], (int*)kernel2->data[0], kernel1->width, kernel1->height, counter, (int)0);
+      else
+        ret = DoConvolveDual((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel1->data[0], (float*)kernel2->data[0], kernel1->width, kernel1->height, counter, (float)0);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      if (kernel1->data_type == IM_INT)
+        ret = DoConvolveDual((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel1->data[0], (int*)kernel2->data[0], kernel1->width, kernel1->height, counter, (float)0);
+      else
+        ret = DoConvolveDual((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel1->data[0], (float*)kernel2->data[0], kernel1->width, kernel1->height, counter, (float)0);
+      break;                                                                                
+    case IM_CFLOAT:            
+      if (kernel1->data_type == IM_INT)
+        ret = DoConvolveDualCpx((imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel1->data[0], (int*)kernel2->data[0], kernel1->width, kernel1->height, counter);
+      else
+        ret = DoConvolveDualCpx((imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel1->data[0], (float*)kernel2->data[0], kernel1->width, kernel1->height, counter);
+      break;
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+template <class T, class KT, class CT> 
+static int DoConvolve(T* map, T* new_map, int width, int height, KT* kernel_map, int kernel_width, int kernel_height, int counter, CT)
+{
+  CT value;
+  KT total, *kernel_line;
+  int offset, new_offset, i, j, x, y;
+
+  int kh2 = kernel_height/2;
+  int kw2 = kernel_width/2;
+
+  if (kernel_height % 2 == 0) kh2--;
+  if (kernel_width % 2 == 0) kw2--;
+
+  total = 0;
+  for(j = 0; j < kernel_height; j++) 
+  {
+    for(i = 0; i < kernel_width; i++)
+      total += kernel_map[j*kernel_width + i];
+  }
+
+  if (total == 0)
+    total = 1;
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      value = 0;
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        kernel_line = kernel_map + (y+kh2)*kernel_width;
+
+        if (j + y < 0)             // pass the bottom border
+          offset = -(y + j + 1) * width;
+        else if (j + y >= height)  // pass the top border
+          offset = (2*height - 1 - (j + y)) * width;
+        else
+          offset = (j + y) * width;
+
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (i + x < 0)            // pass the left border
+            value += kernel_line[x+kw2] * map[offset - (i + x + 1)];
+          else if (i + x >= width)  // pass the right border
+            value += kernel_line[x+kw2] * map[offset + 2*width - 1 - (i + x)];
+          else if (offset != -1)
+            value += kernel_line[x+kw2] * map[offset + (i + x)];
+        }
+      }
+      
+      value /= total;
+
+      int size_of = sizeof(imbyte);
+      if (sizeof(T) == size_of)
+        new_map[new_offset + i] = (T)IM_BYTECROP(value);
+      else
+        new_map[new_offset + i] = (T)value;
+    }    
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+template <class KT> 
+static int DoConvolveCpx(imcfloat* map, imcfloat* new_map, int width, int height, KT* kernel_map, int kernel_width, int kernel_height, int counter)
+{
+  imcfloat value;
+  KT total, *kernel_line;
+  int offset, new_offset, i, j, x, y;
+
+  int kh2 = kernel_height/2;
+  int kw2 = kernel_width/2;
+
+  if (kernel_height % 2 == 0) kh2--;
+  if (kernel_width % 2 == 0) kw2--;
+
+  total = 0;
+  for(j = 0; j < kernel_height; j++) 
+  {
+    for(i = 0; i < kernel_width; i++)
+      total += kernel_map[j*kernel_width + i];
+  }
+
+  if (total == 0)
+    total = 1;
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      value = 0;
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        kernel_line = kernel_map + (y+kh2)*kernel_width;
+
+        if (j + y < 0)             // pass the bottom border
+          offset = -(y + j + 1) * width;
+        else if (j + y >= height)  // pass the top border
+          offset = (2*height - 1 - (j + y)) * width;
+        else
+          offset = (j + y) * width;
+
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (i + x < 0)            // pass the left border
+            value += map[offset - (i + x + 1)] * (float)kernel_line[x+kw2];
+          else if (i + x >= width)  // pass the right border
+            value += map[offset + 2*width - 1 - (i + x)] * (float)kernel_line[x+kw2];
+          else if (offset != -1)
+            value += map[offset + (i + x)] * (float)kernel_line[x+kw2];
+        }
+      }
+      
+      value /= (float)total;
+
+      new_map[new_offset + i] = value;
+    }    
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+static int DoConvolveStep(const imImage* src_image, imImage* dst_image, const imImage *kernel, int counter)
+{
+  int ret = 0;
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolve((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (int)0);
+      else
+        ret = DoConvolve((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolve((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (int)0);
+      else
+        ret = DoConvolve((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_INT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolve((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (int)0);
+      else
+        ret = DoConvolve((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolve((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      else
+        ret = DoConvolve((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_CFLOAT:            
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolveCpx((imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter);
+      else
+        ret = DoConvolveCpx((imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter);
+      break;
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  return ret;
+}
+
+int imProcessConvolve(const imImage* src_image, imImage* dst_image, const imImage *kernel)
+{
+  int counter = imCounterBegin("Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel, "Description", NULL, NULL);
+  if (!msg) msg = "Filtering...";
+  imCounterTotal(counter, src_image->depth*src_image->height, msg);
+
+  int ret = DoConvolveStep(src_image, dst_image, kernel, counter);
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+int imProcessConvolveRep(const imImage* src_image, imImage* dst_image, const imImage *kernel, int ntimes)
+{
+  imImage *AuxImage = imImageClone(dst_image);
+  if (!AuxImage)
+    return 0;
+
+  int counter = imCounterBegin("Repeated Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel, "Description", NULL, NULL);
+  if (!msg) msg = "Filtering...";
+  imCounterTotal(counter, src_image->depth*src_image->height*ntimes, msg);
+
+  const imImage *image1 = src_image;
+  imImage *image2 = dst_image;
+
+  for (int i = 0; i < ntimes; i++)
+  {
+    if (!DoConvolveStep(image1, image2, kernel, counter))
+    {
+      imCounterEnd(counter);
+      imImageDestroy(AuxImage);
+      return 0;
+    }
+    
+    image1 = image2;
+
+    if (image1 == dst_image)
+      image2 = AuxImage;
+    else
+      image2 = dst_image;
+  }
+
+  // The result is in image1, if in the Aux swap the data
+  if (image1 == AuxImage)
+  {
+    void** temp = (void**)dst_image->data;
+    dst_image->data = AuxImage->data;
+    AuxImage->data = (void**)temp;
+  }
+
+  imCounterEnd(counter);
+  imImageDestroy(AuxImage);
+
+  return 1;
+}
+
+template <class T, class KT, class CT> 
+static int DoConvolveSep(T* map, T* new_map, int width, int height, KT* kernel_map, int kernel_width, int kernel_height, int counter, CT)
+{
+  CT value;
+  KT totalV, totalH, *kernel_line;
+  T* aux_line;
+  int offset, new_offset, i, j;
+
+  int kh2 = kernel_height/2;
+  int kw2 = kernel_width/2;
+
+  if (kernel_height % 2 == 0) kh2--;
+  if (kernel_width % 2 == 0) kw2--;
+
+  // use only the first line and the first column of the kernel
+
+  totalV = 0;
+  for(j = 0; j < kernel_height; j++) 
+    totalV += kernel_map[j*kernel_width];
+
+  if (totalV == 0)
+    totalV = 1;
+
+  totalH = 0;
+  for(i = 0; i < kernel_width; i++)
+    totalH += kernel_map[i];
+
+  if (totalH == 0)
+    totalH = 1;
+
+  aux_line = (T*)malloc(width*sizeof(T));
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      int y;
+      value = 0;
+
+      // first pass, only for columns
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        kernel_line = kernel_map + (y+kh2)*kernel_width;
+
+        if (j + y < 0)             // pass the bottom border
+          offset = -(y + j + 1) * width;
+        else if (j + y >= height)  // pass the top border
+          offset = (2*height - 1 - (j + y)) * width;
+        else
+          offset = (j + y) * width;
+
+        if (offset != -1)
+          value += kernel_line[0] * map[offset + i];
+      }
+      
+      value /= totalV;
+
+      int size_of = sizeof(imbyte);
+      if (sizeof(T) == size_of)
+        new_map[new_offset + i] = (T)IM_BYTECROP(value);
+      else
+        new_map[new_offset + i] = (T)value;
+    }    
+
+    if (!imCounterInc(counter))
+    {
+      free(aux_line);
+      return 0;
+    }
+  }
+
+  for(j = 0; j < height; j++)
+  {
+    offset = new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      int x;
+      value = 0;
+
+      // second pass, only for lines, but has to use an auxiliar buffer
+    
+      kernel_line = kernel_map;
+
+      for(x = -kw2; x <= kw2; x++)
+      {
+        if (i + x < 0)            // pass the left border
+          value += kernel_line[x+kw2] * new_map[offset - (i + x + 1)];
+        else if (i + x >= width)  // pass the right border
+          value += kernel_line[x+kw2] * new_map[offset + 2*width - 1 - (i + x)];
+        else
+          value += kernel_line[x+kw2] * new_map[offset + (i + x)];
+      }
+      
+      value /= totalH;
+
+      int size_of = sizeof(imbyte);
+      if (sizeof(T) == size_of)
+        aux_line[i] = (T)IM_BYTECROP(value);
+      else
+        aux_line[i] = (T)value;
+    }    
+
+    memcpy(new_map + new_offset, aux_line, width*sizeof(T));
+
+    if (!imCounterInc(counter))
+    {
+      free(aux_line);
+      return 0;
+    }
+  }
+
+  free(aux_line);
+  return 1;
+}
+
+
+template <class KT> 
+static int DoConvolveSepCpx(imcfloat* map, imcfloat* new_map, int width, int height, KT* kernel_map, int kernel_width, int kernel_height, int counter)
+{
+  imcfloat value;
+  KT totalV, totalH, *kernel_line;
+  imcfloat* aux_line;
+  int offset, new_offset, i, j;
+
+  int kh2 = kernel_height/2;
+  int kw2 = kernel_width/2;
+
+  if (kernel_height % 2 == 0) kh2--;
+  if (kernel_width % 2 == 0) kw2--;
+
+  // use only the first line and the first column of the kernel
+
+  totalV = 0;
+  for(j = 0; j < kernel_height; j++) 
+    totalV += kernel_map[j*kernel_width];
+
+  if (totalV == 0)
+    totalV = 1;
+
+  totalH = 0;
+  for(i = 0; i < kernel_width; i++)
+    totalH += kernel_map[i];
+
+  if (totalH == 0)
+    totalH = 1;
+
+  aux_line = (imcfloat*)malloc(width*sizeof(imcfloat));
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      int y;
+      value = 0;
+
+      // first pass, only for columns
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        kernel_line = kernel_map + (y+kh2)*kernel_width;
+
+        if (j + y < 0)             // pass the bottom border
+          offset = -(y + j + 1) * width;
+        else if (j + y >= height)  // pass the top border
+          offset = (2*height - 1 - (j + y)) * width;
+        else
+          offset = (j + y) * width;
+
+        if (offset != -1)
+          value += map[offset + i] * (float)kernel_line[0];
+      }
+      
+      value /= (float)totalV;
+
+      new_map[new_offset + i] = value;
+    }    
+
+    if (!imCounterInc(counter))
+    {
+      free(aux_line);
+      return 0;
+    }
+  }
+
+  for(j = 0; j < height; j++)
+  {
+    offset = new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      int x;
+      value = 0;
+
+      // second pass, only for lines, but has to use an auxiliar buffer
+    
+      kernel_line = kernel_map;
+    
+      for(x = -kw2; x <= kw2; x++)
+      {
+        if (i + x < 0)            // pass the left border
+          value += new_map[offset - (i + x + 1)] * (float)kernel_line[x+kw2];
+        else if (i + x >= width)  // pass the right border
+          value += new_map[offset + 2*width - 1 - (i + x)] * (float)kernel_line[x+kw2];
+        else if (offset != -1)
+          value += new_map[offset + (i + x)] * (float)kernel_line[x+kw2];
+      }
+      
+      value /= (float)totalH;
+
+      aux_line[i] = value;
+    }    
+
+    memcpy(new_map + new_offset, aux_line, width*sizeof(imcfloat));
+
+    if (!imCounterInc(counter))
+    {
+      free(aux_line);
+      return 0;
+    }
+  }
+
+  free(aux_line);
+  return 1;
+}
+
+int imProcessConvolveSep(const imImage* src_image, imImage* dst_image, const imImage *kernel)
+{
+  int counter = imCounterBegin("Separable Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel, "Description", NULL, NULL);
+  if (!msg) msg = "Filtering...";
+  imCounterTotal(counter, 2*src_image->depth*src_image->height, msg);
+
+  int ret = 0;
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolveSep((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (int)0);
+      else
+        ret = DoConvolveSep((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolveSep((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (int)0);
+      else
+        ret = DoConvolveSep((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_INT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolveSep((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (int)0);
+      else
+        ret = DoConvolveSep((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolveSep((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      else
+        ret = DoConvolveSep((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter, (float)0);
+      break;                                                                                
+    case IM_CFLOAT:            
+      if (kernel->data_type == IM_INT)
+        ret = DoConvolveSepCpx((imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], src_image->width, src_image->height, (int*)kernel->data[0], kernel->width, kernel->height, counter);
+      else
+        ret = DoConvolveSepCpx((imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], src_image->width, src_image->height, (float*)kernel->data[0], kernel->width, kernel->height, counter);
+      break;
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+/*
+Description:	
+    Can be used to find zero crossing of second derivative,
+		laplace. Can also be used to determine any other kind
+		of crossing. Pixels below or equal to 't' are set if the pixel
+		to the right or below is above 't', pixels above 't' are
+		set if the pixel to the right or below is below or equal to
+		't'. Pixels that are "set" are set to the maximum absolute
+		difference of the two neighbours, to indicate the strength
+		of the edge.
+
+		| IF (crossing t)
+		|   out(x,y) = MAX(ABS(in(x,y)-in(x+1,y)), ABS(in(x,y)-in(x,y+1)))
+		| ELSE
+		|   out(x,y) = 0
+
+Author:		Tor L�nnestad, BLAB, Ifi, UiO
+
+Copyright 1991, Blab, UiO
+Image processing lab, Department of Informatics
+University of Oslo
+*/
+template <class T> 
+static void do_crossing(T* iband, T* oband, int width, int height, T t)
+{
+  int x, y, offset00 = 0, offset10 = 0, offset01 = 0;
+  T v, diff;
+
+  for (y=0; y < height-1; y++)
+  {
+    offset00 = y*width;
+    offset10 = (y+1)*width;
+    offset01 = offset00 + 1;
+
+    for (x=0; x < width-1; x++)
+    {
+      v = 0;
+
+      if (iband[offset00] <= t)
+      {
+        if (iband[offset10] > t) 
+          v = iband[offset10]-iband[offset00];
+
+	      if (iband[offset01] > t) 
+        {
+          diff = iband[offset01]-iband[offset00];
+          if (diff > v) v = diff;
+        }
+      }
+      else
+      {
+	      if (iband[offset10] <= t) 
+          v = iband[offset00]-iband[offset10];
+
+	      if (iband[offset01] <= t) 
+        {
+          diff = iband[offset00]-iband[offset01];
+          if (diff > v) v = diff;
+        }
+      }
+
+      oband[offset00] = v;
+
+      offset00++;
+      offset10++;
+      offset01++;
+    }
+
+    /* last pixel on line */
+    offset00++;
+    offset10++;
+
+    v = 0;
+
+    if (iband[offset00] <= t)
+    {
+      if (iband[offset10] > t)
+        v = iband[offset10]-iband[offset00];
+    }
+    else
+    {
+      if (iband[offset10] <= t)
+        v = iband[offset00]-iband[offset10];
+    }
+
+    oband[offset00] = v;
+  }
+
+  /* last line */
+  offset00 = y*width;
+  offset01 = offset00 + 1;
+
+  for (x=0; x < width-1; x++)
+  {
+    v = 0;
+
+    if (iband[offset00] <= t)
+    {
+      if (iband[offset01] > t)
+        v = iband[offset01]-iband[offset00];
+    }
+    else
+    {
+      if (iband[offset01] <= t)
+        v = iband[offset00]-iband[offset01];
+    }
+
+    oband[offset00] = v;
+
+    offset00++;
+    offset01++;
+  }
+
+  offset00++;
+
+  /* last pixel */
+  oband[offset00] = 0;
+}
+
+void imProcessZeroCrossing(const imImage* src_image, imImage* dst_image)
+{
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_INT:                                                                           
+      do_crossing((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, 0);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      do_crossing((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, 0.0f);
+      break;                                                                                
+    }
+  }
+}
+
+int imProcessBarlettConvolve(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_INT);
+  if (!kernel)
+    return 0;
+
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Barlett");
+
+  int* kernel_data = (int*)kernel->data[0];
+  int half = kernel_size / 2;
+  for (int i = 0; i < kernel_size; i++)
+  {
+    if (i <= half)
+      kernel_data[i] = i+1;
+    else
+      kernel_data[i] = kernel_size-i;
+  }
+  for (int j = 0; j < kernel_size; j++)
+  {
+    if (j <= half)
+      kernel_data[j*kernel_size] = j+1;
+    else
+      kernel_data[j*kernel_size] = kernel_size-j;
+  }
+
+  int ret = imProcessConvolveSep(src_image, dst_image, kernel);
+
+  imImageDestroy(kernel);
+
+  return ret;
+}
+
+int imProcessSobelConvolve(const imImage* src_image, imImage* dst_image)
+{
+	int ret = 0;
+
+  imImage* kernel1 = imKernelSobel();
+  imImage* kernel2 = imImageCreate(3, 3, IM_GRAY, IM_INT);
+  imProcessRotate90(kernel1, kernel2, 1);
+
+  ret = imProcessConvolveDual(src_image, dst_image, kernel1, kernel2);
+
+  imImageDestroy(kernel1);
+  imImageDestroy(kernel2);
+
+  return ret;
+}
+
+int imProcessPrewittConvolve(const imImage* src_image, imImage* dst_image)
+{
+	int ret = 0;
+
+  imImage* kernel1 = imKernelPrewitt();
+  imImage* kernel2 = imImageClone(kernel1);
+  imProcessRotate90(kernel1, kernel2, 1);
+
+  ret = imProcessConvolveDual(src_image, dst_image, kernel1, kernel2);
+
+  imImageDestroy(kernel1);
+  imImageDestroy(kernel2);
+
+  return ret;
+}
+
+int imProcessSplineEdgeConvolve(const imImage* src_image, imImage* dst_image)
+{
+	int ret = 0;
+
+  imImage* tmp_image = imImageClone(src_image);
+  if (!tmp_image) return 0;
+
+  imImage* kernel1 = imImageCreate(5, 5, IM_GRAY, IM_INT);
+  imImageSetAttribute(kernel1, "Description", IM_BYTE, -1, (void*)"SplineEdge");
+
+  int* kernel_data = (int*)kernel1->data[0];
+  kernel_data[10] = -1;
+  kernel_data[11] = 8;
+  kernel_data[12] = 0;
+  kernel_data[13] = -8;
+  kernel_data[14] = 1;
+
+  imImage* kernel2 = imImageClone(kernel1);
+  imProcessRotate90(kernel1, kernel2, 1);
+
+  imImage* kernel3 = imImageClone(kernel1);
+  imProcessRotateKernel(kernel3);
+
+  imImage* kernel4 = imImageClone(kernel1);
+  imProcessRotate90(kernel3, kernel4, 1);
+
+  ret = imProcessConvolveDual(src_image, tmp_image, kernel1, kernel2);
+  ret = imProcessConvolveDual(src_image, dst_image, kernel3, kernel4);
+
+  imProcessArithmeticConstOp(tmp_image, (float)sqrt(2.0), tmp_image, IM_BIN_MUL);
+  imProcessArithmeticOp(tmp_image, dst_image, dst_image, IM_BIN_ADD);
+
+  imImageDestroy(kernel1);
+  imImageDestroy(kernel2);
+  imImageDestroy(kernel3);
+  imImageDestroy(kernel4);
+  imImageDestroy(tmp_image);
+
+  return ret;
+}
+
+int imGaussianStdDev2KernelSize(float stddev)
+{
+  if (stddev < 0)
+    return (int)-stddev;
+  else
+  {
+	  int width = (int)(3.35*stddev + 0.3333);
+    return 2*width + 1;
+  }
+}
+
+float imGaussianKernelSize2StdDev(int kernel_size)
+{
+  int width = (kernel_size - 1)/2;
+	return (width - 0.3333f)/3.35f;
+}
+
+int imProcessGaussianConvolve(const imImage* src_image, imImage* dst_image, float stddev)
+{
+  int kernel_size = imGaussianStdDev2KernelSize(stddev);
+
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_FLOAT);
+  if (!kernel)
+    return 0;
+
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Gaussian");
+  imProcessRenderGaussian(kernel, stddev);
+
+  int ret = imProcessConvolveSep(src_image, dst_image, kernel);
+
+  imImageDestroy(kernel);
+
+  return ret;
+}
+
+int imProcessLapOfGaussianConvolve(const imImage* src_image, imImage* dst_image, float stddev)
+{
+  int kernel_size = imGaussianStdDev2KernelSize(stddev);
+
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_FLOAT);
+  if (!kernel)
+    return 0;
+
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Laplacian Of Gaussian");
+  imProcessRenderLapOfGaussian(kernel, stddev);
+
+  int ret;
+  if (src_image->data_type == IM_BYTE || src_image->data_type == IM_USHORT)
+  {
+    imImage* aux_image = imImageClone(dst_image);
+    if (!aux_image)
+    {
+      imImageDestroy(kernel);
+      return 0;
+    }
+
+    imProcessUnArithmeticOp(src_image, aux_image, IM_UN_EQL);  // Convert to IM_INT
+    ret = imProcessConvolve(aux_image, dst_image, kernel);
+    imImageDestroy(aux_image);
+  }
+  else
+    ret = imProcessConvolve(src_image, dst_image, kernel);
+
+  imImageDestroy(kernel);
+
+  return ret;
+}
+
+int imProcessDiffOfGaussianConvolve(const imImage* src_image, imImage* dst_image, float stddev1, float stddev2)
+{
+  imImage* aux_image1 = imImageClone(src_image);
+  imImage* aux_image2 = imImageClone(src_image);
+  if (!aux_image1 || !aux_image2)
+  {
+    if (aux_image1) imImageDestroy(aux_image1);
+    return 0;
+  }
+
+  int kernel_size1 = imGaussianStdDev2KernelSize(stddev1);
+  int kernel_size2 = imGaussianStdDev2KernelSize(stddev2);
+  int size = kernel_size1;
+  if (kernel_size1 < kernel_size2) size = kernel_size2;
+
+  imImage* kernel1 = imImageCreate(size, size, IM_GRAY, IM_FLOAT);
+  imImage* kernel2 = imImageCreate(size, size, IM_GRAY, IM_FLOAT);
+  if (!kernel1 || !kernel2)
+  {
+    if (kernel1) imImageDestroy(kernel1);
+    if (kernel2) imImageDestroy(kernel2);
+    imImageDestroy(aux_image1);
+    imImageDestroy(aux_image2);
+    return 0;
+  }
+
+  imImageSetAttribute(kernel1, "Description", IM_BYTE, -1, (void*)"Gaussian1");
+  imImageSetAttribute(kernel2, "Description", IM_BYTE, -1, (void*)"Gaussian2");
+
+  imProcessRenderGaussian(kernel1, stddev1);
+  imProcessRenderGaussian(kernel2, stddev2);
+
+  if (!imProcessConvolve(src_image, aux_image1, kernel1) ||
+      !imProcessConvolve(src_image, aux_image2, kernel2))
+  {
+    imImageDestroy(kernel1);
+    imImageDestroy(kernel2);
+    imImageDestroy(aux_image1);
+    imImageDestroy(aux_image2);
+    return 0;
+  }
+
+  imProcessArithmeticOp(aux_image1, aux_image2, dst_image, IM_BIN_SUB);
+
+  imImageDestroy(kernel1);
+  imImageDestroy(kernel2);
+  imImageDestroy(aux_image1);
+  imImageDestroy(aux_image2);
+
+  return 1;
+}
+
+#ifdef _TEST_CODE_
+int imProcessDiffOfGaussianConvolveTEST(const imImage* src_image, imImage* dst_image, float stddev1, float stddev2)
+{
+  int kernel_size1 = imGaussianStdDev2KernelSize(stddev1);
+  int kernel_size2 = imGaussianStdDev2KernelSize(stddev2);
+  int size = kernel_size1;
+  if (kernel_size1 < kernel_size2) size = kernel_size2;
+
+  imImage* kernel1 = imImageCreate(size, size, IM_GRAY, IM_FLOAT);
+  imImage* kernel2 = imImageCreate(size, size, IM_GRAY, IM_FLOAT);
+  if (!kernel1 || !kernel2)
+  {
+    if (kernel1) imImageDestroy(kernel1);
+    if (kernel2) imImageDestroy(kernel2);
+    return 0;
+  }
+
+  imImageSetAttribute(kernel1, "Description", IM_BYTE, -1, (void*)"Gaussian");
+  imImageSetAttribute(kernel2, "Description", IM_BYTE, -1, (void*)"Gaussian");
+
+  imProcessRenderGaussian(kernel1, stddev1);
+  imProcessRenderGaussian(kernel2, stddev2);
+
+  // ERROR: kernel 1 should be multiplied by a factor to improve the difference.
+
+  imProcessArithmeticOp(kernel1, kernel2, kernel1, IM_BIN_SUB);
+  imImageSetAttribute(kernel1, "Description", IM_BYTE, -1, (void*)"Difference of Gaussian");
+
+  int ret = 0;
+  if (src_image->data_type == IM_BYTE || src_image->data_type == IM_USHORT)
+  {
+    imImage* aux_image = imImageClone(dst_image);
+    if (!aux_image)
+    {
+      imImageDestroy(kernel1);
+      imImageDestroy(kernel2);
+      return 0;
+    }
+
+    imProcessUnArithmeticOp(src_image, aux_image, IM_UN_EQL);  // Convert to IM_INT
+    ret = imProcessConvolve(aux_image, dst_image, kernel1);
+    imImageDestroy(aux_image);
+  }
+  else
+    ret = imProcessConvolve(src_image, dst_image, kernel1);
+
+  imImageDestroy(kernel1);
+  imImageDestroy(kernel2);
+
+  return ret;
+}
+#endif
+
+int imProcessMeanConvolve(const imImage* src_image, imImage* dst_image, int ks)
+{
+  int counter = imCounterBegin("Mean Convolve");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  imImage* kernel = imImageCreate(ks, ks, IM_GRAY, IM_INT);
+
+  int* kernel_data = (int*)kernel->data[0];
+
+  int ks2 = ks/2;
+  for(int ky = 0; ky < ks; ky++)
+  {
+    int ky2 = ky-ks2;
+    ky2 = ky2*ky2;
+    for(int kx = 0; kx < ks; kx++) 
+    {
+      int kx2 = kx-ks2;
+      kx2 = kx2*kx2;
+      int radius = imRound(sqrt(double(kx2 + ky2)));
+      if (radius <= ks2)
+        kernel_data[ky*ks + kx] = 1;
+    }
+  }
+
+  int ret = DoConvolveStep(src_image, dst_image, kernel, counter);
+
+  imImageDestroy(kernel);
+  imCounterEnd(counter);
+
+  return ret;
+}
diff --git a/src/process/im_convolve_rank.cpp b/src/process/im_convolve_rank.cpp
new file mode 100644
index 0000000..5488a78
--- /dev/null
+++ b/src/process/im_convolve_rank.cpp
@@ -0,0 +1,701 @@
+/** \file
+ * \brief Rank Convolution Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_convolve_rank.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_counter.h>
+#include <im_math.h>
+
+#include "im_process_loc.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+
+template <class T, class DT> 
+static int DoConvolveRankFunc(T *map, DT* new_map, int width, int height, int kw, int kh, T (*func)(T* value, int count, int center), int counter)
+{
+  T* value = new T[kw*kh];
+  int offset, new_offset, i, j, x, y, v, c;
+  int kh1, kw1, kh2, kw2;
+
+  kh2 = kh/2;
+  kw2 = kw/2;
+  kh1 = -kh2;
+  kw1 = -kw2;
+  if (kh%2==0) kh2--;  // if not odd decrease 1
+  if (kw%2==0) kw2--;
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      v = 0; c = 0;
+    
+      for(y = kh1; y <= kh2; y++)
+      {
+        if ((j + y < 0) ||        // pass the bottom border
+            (j + y >= height))    // pass the top border
+          continue;
+
+        offset = (j + y) * width;
+
+        for(x = kw1; x <= kw2; x++)
+        {
+          if ((i + x < 0) ||      // pass the left border
+              (i + x >= width))   // pass the right border
+            continue;
+
+          if (x == 0 && y == 0)
+            c = v;
+
+          value[v] = map[offset + (i + x)];
+          v++;
+        }
+      }
+      
+      new_map[new_offset + i] = (DT)func(value, v, c);
+    }    
+
+    if (!imCounterInc(counter))
+    {
+      delete[] value;
+      return 0;
+    }
+  }
+
+  delete[] value;
+  return 1;
+}
+
+static int compare_imReal(const void *elem1, const void *elem2) 
+{
+  float* v1 = (float*)elem1;
+  float* v2 = (float*)elem2;
+
+  if (*v1 < *v2)
+    return -1;
+
+  if (*v1 > *v2)
+    return 1;
+
+  return 0;
+}
+
+static int compare_imInt(const void *elem1, const void *elem2) 
+{
+  int* v1 = (int*)elem1;
+  int* v2 = (int*)elem2;
+
+  if (*v1 < *v2)
+    return -1;
+
+  if (*v1 > *v2)
+    return 1;
+
+  return 0;
+}
+
+static int compare_imUShort(const void *elem1, const void *elem2) 
+{
+  imushort* v1 = (imushort*)elem1;
+  imushort* v2 = (imushort*)elem2;
+
+  if (*v1 < *v2)
+    return -1;
+
+  if (*v1 > *v2)
+    return 1;
+
+  return 0;
+}
+
+static int compare_imByte(const void *elem1, const void *elem2) 
+{
+  imbyte* v1 = (imbyte*)elem1;
+  imbyte* v2 = (imbyte*)elem2;
+
+  if (*v1 < *v2)
+    return -1;
+
+  if (*v1 > *v2)
+    return 1;
+
+  return 0;
+}
+
+static imbyte median_op_byte(imbyte* value, int count, int center)
+{
+  (void)center;
+  qsort(value, count, sizeof(imbyte), compare_imByte);
+  return value[count/2];
+}
+
+static imushort median_op_ushort(imushort* value, int count, int center)
+{
+  (void)center;
+  qsort(value, count, sizeof(imushort), compare_imUShort);
+  return value[count/2];
+}
+
+static int median_op_int(int* value, int count, int center)
+{
+  (void)center;
+  qsort(value, count, sizeof(int), compare_imInt);
+  return value[count/2];
+}
+
+static float median_op_real(float* value, int count, int center)
+{
+  (void)center;
+  qsort(value, count, sizeof(float), compare_imReal);
+  return value[count/2];
+}
+
+int imProcessMedianConvolve(const imImage* src_image, imImage* dst_image, int ks)
+{
+  int i, ret = 0;
+  int counter;
+
+  counter = imCounterBegin("Median Filter");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoConvolveRankFunc((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, median_op_byte, counter);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoConvolveRankFunc((imushort*)src_image->data[i], (imushort*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, median_op_ushort, counter);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoConvolveRankFunc((int*)src_image->data[i], (int*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, median_op_int, counter);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoConvolveRankFunc((float*)src_image->data[i], (float*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, median_op_real, counter);
+      break;                                                                                
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static imbyte range_op_byte(imbyte* value, int count, int center)
+{
+  imbyte min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max-min;
+}
+
+static imushort range_op_ushort(imushort* value, int count, int center)
+{
+  imushort min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max-min;
+}
+
+static int range_op_int(int* value, int count, int center)
+{
+  int min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max-min;
+}
+
+static float range_op_real(float* value, int count, int center)
+{
+  float min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max-min;
+}
+
+int imProcessRangeConvolve(const imImage* src_image, imImage* dst_image, int ks)
+{
+  int i, ret = 0;
+  int counter;
+
+  counter = imCounterBegin("Range Filter");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoConvolveRankFunc((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, range_op_byte, counter);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoConvolveRankFunc((imushort*)src_image->data[i], (imushort*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, range_op_ushort, counter);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoConvolveRankFunc((int*)src_image->data[i], (int*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, range_op_int, counter);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoConvolveRankFunc((float*)src_image->data[i], (float*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, range_op_real, counter);
+      break;                                                                                
+    }
+
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+/*
+Local variable threshold by the method of Bernsen.
+
+Description:	
+    If the difference between the largest and the smallest
+		pixel value within the 'dx'*'dy' window is greater than
+		or equal to 'cmin' (local contrast threshold), the average
+		of the two values is used as threshold.
+
+		Pixels in homogenous areas (difference below 'cmin') 
+    are assumed to be below the threshold.
+
+Reference:	
+    Bernsen, J: "Dynamic thresholding of grey-level images"
+		Proc. of the 8th ICPR, Paris, Oct 1986, 1251-1255.
+
+Author:         Oivind Due Trier
+
+Copyright 1990, Blab, UiO
+Image processing lab, Department of Informatics
+University of Oslo
+*/
+
+static int thresAux = 0;
+
+static imbyte contrast_thres_op_byte(imbyte* value, int count, int center)
+{
+  int c, t;
+  imbyte v = value[center], min, max;
+
+  imMinMax(value, count, min, max);
+
+  c = max-min;
+
+  if (c < thresAux) 
+    return 0;
+  else
+  { 
+    t = ((int)max + (int)min) / 2;
+
+    if (v >= t)
+      return 1;
+    else
+      return 0;
+  }
+}
+
+static imushort contrast_thres_op_ushort(imushort* value, int count, int center)
+{
+  int c, t;
+  imushort v = value[center], min, max;
+
+  imMinMax(value, count, min, max);
+
+  c = max-min;
+
+  if (c < thresAux) 
+    return 0;
+  else
+  { 
+    t = ((int)max + (int)min) / 2;
+
+    if (v >= t)
+      return 1;
+    else
+      return 0;
+  }
+}
+
+static int contrast_thres_op_int(int* value, int count, int center)
+{
+  int c, t;
+  int v = value[center], min, max;
+
+  imMinMax(value, count, min, max);
+
+  c = max-min;
+
+  if (c < thresAux) 
+    return 0;
+  else
+  { 
+    t = ((int)max + (int)min) / 2;
+
+    if (v >= t)
+      return 1;
+    else
+      return 0;
+  }
+}
+
+int imProcessRangeContrastThreshold(const imImage* src_image, imImage* dst_image, int ks, int min_range)
+{
+  int ret = 0;
+  int counter = imCounterBegin("Range Contrast Threshold");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  thresAux = min_range;
+
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    ret = DoConvolveRankFunc((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->width, src_image->height, ks, ks, contrast_thres_op_byte, counter);
+    break;                                                                                
+  case IM_USHORT:                                                                           
+    ret = DoConvolveRankFunc((imushort*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->width, src_image->height, ks, ks, contrast_thres_op_ushort, counter);
+    break;                                                                                
+  case IM_INT:                                                                           
+    ret = DoConvolveRankFunc((int*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->width, src_image->height, ks, ks, contrast_thres_op_int, counter);
+    break;                                                                                
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static imbyte max_thres_op_byte(imbyte* value, int count, int center)
+{
+  imbyte v = value[center], min, max;
+
+  if (v < thresAux) 
+    return 0;
+
+  imMinMax(value, count, min, max);
+
+  if (v < max)
+    return 0;
+
+  return 1;
+}
+
+static imushort max_thres_op_ushort(imushort* value, int count, int center)
+{
+  imushort v = value[center], min, max;
+
+  if (v < thresAux) 
+    return 0;
+
+  imMinMax(value, count, min, max);
+
+  if (v < max)
+    return 0;
+
+  return 1;
+}
+
+static int max_thres_op_int(int* value, int count, int center)
+{
+  int v = value[center], min, max;
+
+  if (v < thresAux) 
+    return 0;
+
+  imMinMax(value, count, min, max);
+
+  if (v < max)
+    return 0;
+
+  return 1;
+}
+
+int imProcessLocalMaxThreshold(const imImage* src_image, imImage* dst_image, int ks, int min_thres)
+{
+  int ret = 0;
+  int counter = imCounterBegin("Local Max Threshold");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  thresAux = min_thres;
+
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    ret = DoConvolveRankFunc((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->width, src_image->height, ks, ks, max_thres_op_byte, counter);
+    break;                                                                                
+  case IM_USHORT:                                                                           
+    ret = DoConvolveRankFunc((imushort*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->width, src_image->height, ks, ks, max_thres_op_ushort, counter);
+    break;                                                                                
+  case IM_INT:                                                                           
+    ret = DoConvolveRankFunc((int*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->width, src_image->height, ks, ks, max_thres_op_int, counter);
+    break;                                                                                
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static imbyte rank_closest_op_byte(imbyte* value, int count, int center)
+{
+  imbyte v = value[center];
+  imbyte min, max;
+
+  imMinMax(value, count, min, max);
+
+  if (v - min < max - v) 
+    return min;
+  else
+    return max;
+}
+
+static imushort rank_closest_op_ushort(imushort* value, int count, int center)
+{
+  imushort v = value[center];
+  imushort min, max;
+
+  imMinMax(value, count, min, max);
+
+  if (v - min < max - v) 
+    return min;
+  else
+    return max;
+}
+
+static int rank_closest_op_int(int* value, int count, int center)
+{
+  int v = value[center];
+  int min, max;
+
+  imMinMax(value, count, min, max);
+
+  if (v - min < max - v) 
+    return min;
+  else
+    return max;
+}
+
+static float rank_closest_op_real(float* value, int count, int center)
+{
+  float v = value[center];
+  float min, max;
+
+  imMinMax(value, count, min, max);
+
+  if (v - min < max - v) 
+    return min;
+  else
+    return max;
+}
+
+
+int imProcessRankClosestConvolve(const imImage* src_image, imImage* dst_image, int ks)
+{
+  int i, ret = 0;
+  int counter;
+
+  counter = imCounterBegin("Rank Closest");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoConvolveRankFunc((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_closest_op_byte, counter);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoConvolveRankFunc((imushort*)src_image->data[i], (imushort*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_closest_op_ushort, counter);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoConvolveRankFunc((int*)src_image->data[i], (int*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_closest_op_int, counter);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoConvolveRankFunc((float*)src_image->data[i], (float*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_closest_op_real, counter);
+      break;                                                                                
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static imbyte rank_max_op_byte(imbyte* value, int count, int center)
+{
+  imbyte min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max;
+}
+
+static imushort rank_max_op_ushort(imushort* value, int count, int center)
+{
+  imushort min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max;
+}
+
+static int rank_max_op_int(int* value, int count, int center)
+{
+  int min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max;
+}
+
+static float rank_max_op_real(float* value, int count, int center)
+{
+  float min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return max;
+}
+
+int imProcessRankMaxConvolve(const imImage* src_image, imImage* dst_image, int ks)
+{
+  int i, ret = 0;
+  int counter;
+
+  counter = imCounterBegin("Rank Max");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoConvolveRankFunc((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_max_op_byte, counter);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoConvolveRankFunc((imushort*)src_image->data[i], (imushort*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_max_op_ushort, counter);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoConvolveRankFunc((int*)src_image->data[i], (int*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_max_op_int, counter);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoConvolveRankFunc((float*)src_image->data[i], (float*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_max_op_real, counter);
+      break;                                                                                
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static imbyte rank_min_op_byte(imbyte* value, int count, int center)
+{
+  imbyte min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return min;
+}
+
+static imushort rank_min_op_ushort(imushort* value, int count, int center)
+{
+  imushort min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return min;
+}
+
+static int rank_min_op_int(int* value, int count, int center)
+{
+  int min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return min;
+}
+
+static float rank_min_op_real(float* value, int count, int center)
+{
+  float min, max;
+  (void)center;
+  imMinMax(value, count, min, max);
+  return min;
+}
+
+int imProcessRankMinConvolve(const imImage* src_image, imImage* dst_image, int ks)
+{
+  int i, ret = 0;
+  int counter;
+
+  counter = imCounterBegin("Rank Min");
+  imCounterTotal(counter, src_image->depth*src_image->height, "Filtering...");
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoConvolveRankFunc((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_min_op_byte, counter);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoConvolveRankFunc((imushort*)src_image->data[i], (imushort*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_min_op_ushort, counter);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoConvolveRankFunc((int*)src_image->data[i], (int*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_min_op_int, counter);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoConvolveRankFunc((float*)src_image->data[i], (float*)dst_image->data[i], 
+                               src_image->width, src_image->height, ks, ks, rank_min_op_real, counter);
+      break;                                                                                
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
diff --git a/src/process/im_distance.cpp b/src/process/im_distance.cpp
new file mode 100644
index 0000000..019356d
--- /dev/null
+++ b/src/process/im_distance.cpp
@@ -0,0 +1,512 @@
+/** \file
+ * \brief Distance Transform
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_distance.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+#include <im.h>
+#include <im_util.h>
+
+#include "im_process_glo.h"
+
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+
+const float DT_ONE    = 1.0f;             // 1x0
+const float DT_SQRT2  = 1.414213562373f;  // 1x1
+const float DT_SQRT5  = 2.2360679775f;    // 2x1
+const float DT_SQRT10 = 3.1622776601684f; // 3x1
+const float DT_SQRT13 = 3.605551275464f;  // 3x2
+const float DT_SQRT17 = 4.12310562562f;   // 4x1
+const float DT_SQRT25 = 5.0f;             // 4x3
+
+static inline void setValue(int r, int r1, int r2, int r3, int r4, float *image_data, int f) 
+{
+  float v;
+  float minv = image_data[r];        // (x,y)
+
+  if (f)
+    v = image_data[r - 1] + DT_ONE;      // (x-1,y)
+  else
+    v = image_data[r + 1] + DT_ONE;      // (x+1,y)
+  if (v < minv) minv = v;
+
+  v = image_data[r1] + DT_ONE;          // (x,y-1)           (x,y+1)
+  if (v < minv) minv = v; 
+
+  if (minv < DT_SQRT2)
+    goto min_attrib;
+
+  v = image_data[r1 - 1] + DT_SQRT2;    // (x-1,y-1)         (x-1,y+1)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r1 + 1] + DT_SQRT2;    // (x+1,y-1)         (x+1,y+1)
+  if (v < minv) minv = v;                                      
+
+  if (minv < DT_SQRT5)
+    goto min_attrib;
+
+  v = image_data[r1 + 2] + DT_SQRT5;    // (x+2,y-1)         (x+2,y+1)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r1 - 2] + DT_SQRT5;    // (x-2,y-1)         (x-2,y+1)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r2 - 1] + DT_SQRT5;    // (x-1,y-2)         (x-1,y+2)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r2 + 1] + DT_SQRT5;    // (x+1,y-2)         (x+1,y+2)
+  if (v < minv) minv = v;                                      
+
+  if (minv < DT_SQRT10)
+    goto min_attrib;
+                                                             
+  v = image_data[r1 + 3] + DT_SQRT10;   // (x+3,y-1)         (x+3,y+1)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r1 - 3] + DT_SQRT10;   // (x-3,y-1)         (x-3,y+1)
+  if (v < minv) minv = v;                                      
+
+  v = image_data[r3 - 1] + DT_SQRT10;   // (x-1,y-3)         (x-1,y+3)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r3 + 1] + DT_SQRT10;   // (x+1,y-3)         (x+1,y+3)
+  if (v < minv) minv = v;                                      
+
+  if (minv < DT_SQRT13)
+    goto min_attrib;
+
+  v = image_data[r2 - 3] + DT_SQRT13;   // (x-3,y-2)         (x-3,y+2)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r2 + 3] + DT_SQRT13;   // (x+3,y-2)         (x+3,y+2)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r3 + 2] + DT_SQRT13;   // (x+2,y-3)         (x+2,y+3)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r3 - 2] + DT_SQRT13;   // (x-2,y-3)         (x-2,y+3)
+  if (v < minv) minv = v;
+
+  if (minv < DT_SQRT17)
+    goto min_attrib;
+                                                             
+  v = image_data[r1 + 4] + DT_SQRT17;   // (x+4,y-1)         (x+4,y+1)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r1 - 4] + DT_SQRT17;   // (x-4,y-1)         (x-4,y+1)
+  if (v < minv) minv = v;                                      
+
+  v = image_data[r4 - 1] + DT_SQRT17;   // (x-1,y-4)         (x-1,y+4)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r4 + 1] + DT_SQRT17;   // (x+1,y-4)         (x+1,y+4)
+  if (v < minv) minv = v;                                      
+
+  if (minv < DT_SQRT25)
+    goto min_attrib;
+
+  v = image_data[r3 - 4] + DT_SQRT25;   // (x-4,y-3)         (x-4,y+3)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r3 + 4] + DT_SQRT25;   // (x+4,y-3)         (x+4,y+3)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r4 + 3] + DT_SQRT25;   // (x+3,y-4)         (x+3,y+4)
+  if (v < minv) minv = v;                                      
+                                                             
+  v = image_data[r4 - 3] + DT_SQRT25;   // (x-3,y-4)         (x-3,y+4)
+  if (v < minv) minv = v;
+
+min_attrib:
+  image_data[r] = minv;
+}
+
+static inline void setValueForwardEdge(int r, int r1, int r2, int width, int x, int y, float *image_data) 
+{
+  float v;
+  float minv = image_data[r];        // (x,y)
+
+  if (y > 0)
+  {
+    v = image_data[r1] + DT_ONE;         // (x,y-1)
+    if (v < minv) minv = v;
+  }
+
+  if (x > 0)
+  {
+    v = image_data[r - 1] + DT_ONE;      // (x-1,y)
+    if (v < minv) minv = v;
+  }
+
+  if (x > 0 && y > 0)
+  {
+    v = image_data[r1 - 1] + DT_SQRT2;   // (x-1,y-1)
+    if (v < minv) minv = v;
+  }
+
+  if (x < width-2 && y > 0)
+  {
+    v = image_data[r1 + 1] + DT_SQRT2;   // (x+1,y-1)
+    if (v < minv) minv = v;
+  }
+
+  if (x > 0 && y > 1)
+  {
+    v = image_data[r2 - 1] + DT_SQRT5;   // (x-1,y-2)
+    if (v < minv) minv = v;
+  }
+
+  if (x < width-2 && y > 1)
+  {
+    v = image_data[r2 + 1] + DT_SQRT5;   // (x+1,y-2)
+    if (v < minv) minv = v;
+  }
+
+  if (x < width-3 && y > 0)
+  {
+    v = image_data[r1 + 2] + DT_SQRT5;   // (x+2,y-1)
+    if (v < minv) minv = v;
+  }
+
+  if (x > 1 && y > 0)
+  {
+    v = image_data[r1 - 2] + DT_SQRT5;   // (x-2,y-1)
+    if (v < minv) minv = v;
+  }
+
+  image_data[r] = minv;
+}
+
+static inline void setValueBackwardEdge(int r, int r1, int r2, int width, int height, int x, int y, float *image_data) 
+{
+  float  v;
+  float minv = image_data[r];        // (x,y)
+
+  if (x < width-2)
+  {
+    v = image_data[r + 1] + DT_ONE;      // (x+1,y)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-2)
+  {
+    v = image_data[r1] + DT_ONE;         // (x,y+1)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-2 && x > 0)
+  {
+    v = image_data[r1 - 1] + DT_SQRT2;   // (x-1,y+1)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-2 && x < width-2)
+  {
+    v = image_data[r1 + 1] + DT_SQRT2;   // (x+1,y+1)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-2 && x < width-3)
+  {
+    v = image_data[r1 + 2] + DT_SQRT5;   // (x+2,y+1)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-3 && x < width-2)
+  {
+    v = image_data[r2 + 1] + DT_SQRT5;   // (x+1,y+2)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-3 && x > 0)
+  {
+    v = image_data[r2 - 1] + DT_SQRT5;   // (x-1,y+2)
+    if (v < minv) minv = v;
+  }
+
+  if (y < height-2 && x > 1)
+  {
+    v = image_data[r1 - 2] + DT_SQRT5;   // (x-2,y+1)
+    if (v < minv) minv = v;
+  }
+
+  image_data[r] = minv;
+}
+
+void imProcessDistanceTransform(const imImage* src_image, imImage* dst_image)
+{
+  int i, x, y, 
+    offset, offset1, offset2, offset3, offset4,
+    width = src_image->width,
+    height = src_image->height;
+
+  imbyte* src_data = (imbyte*)src_image->data[0];
+  float* dst_data = (float*)dst_image->data[0];
+
+  float max_dist = (float)sqrt(double(width*width + height*height));
+
+  for (i = 0; i < src_image->count; i++)
+  {
+    // if pixel is background, then distance is zero.
+    if (src_data[i])
+      dst_data[i] = max_dist;
+  }
+
+  /* down->top, left->right */
+  for (y = 0; y < height; y++) 
+  {
+    offset = y * width;
+    offset1 = offset - width;
+    offset2 = offset - 2*width;
+    offset3 = offset - 3*width;
+    offset4 = offset - 4*width;
+
+    for (x = 0; x < width; x++) 
+    {
+      if (src_data[offset])
+      {
+        if (x < 4 || x > width-5 || y < 4 || y > height-5)
+          setValueForwardEdge(offset, offset1, offset2, width, x, y, dst_data);
+        else
+          setValue(offset, offset1, offset2, offset3, offset4, dst_data, 1);
+      }
+
+      offset++;
+      offset1++;
+      offset2++;
+      offset3++;
+      offset4++;
+    }
+  }
+
+  /* top->down, right->left */
+  for (y = height-1; y >= 0; y--) 
+  {
+    offset = y * width + width-1;
+    offset1 = offset + width;
+    offset2 = offset + 2*width;
+    offset3 = offset + 3*width;
+    offset4 = offset + 4*width;
+
+    for (x = width-1; x >= 0; x--) 
+    {
+      if (src_data[offset]) 
+      {
+        if (x < 4 || x > width-5 || y < 4 || y > height-5)
+          setValueBackwardEdge(offset, offset1, offset2, width, height, x, y, dst_data);
+        else
+          setValue(offset, offset1, offset2, offset3, offset4, dst_data, 0);
+      }
+
+      offset--;
+      offset1--;
+      offset2--;
+      offset3--;
+      offset4--;
+    }
+  }
+}
+
+static void iFillValue(imbyte* img_data, int x, int y, int width, int value)
+{
+  int r = y * width + x;
+  int r1a = r - width;
+  int r1b = r + width;
+  int v;
+
+  int old_value = img_data[r];
+  img_data[r] = (imbyte)value;
+
+  v = img_data[r1a];        // (x,y-1)
+  if (v == old_value) 
+    iFillValue(img_data, x, y-1, width, value);
+
+  v = img_data[r - 1];      // (x-1,y)
+  if (v == old_value) 
+    iFillValue(img_data, x-1, y, width, value);
+
+  v = img_data[r1a - 1];    // (x-1,y-1)
+  if (v == old_value) 
+    iFillValue(img_data, x-1, y-1, width, value);
+
+  v = img_data[r1a + 1];    // (x+1,y-1)
+  if (v == old_value) 
+    iFillValue(img_data, x+1, y-1, width, value);
+
+  v = img_data[r + 1];      // (x+1,y)
+  if (v == old_value) 
+    iFillValue(img_data, x+1, y, width, value);
+
+  v = img_data[r1b];        // (x,y+1)
+  if (v == old_value) 
+    iFillValue(img_data, x, y+1, width, value);
+
+  v = img_data[r1b - 1];    // (x-1,y+1)
+  if (v == old_value) 
+    iFillValue(img_data, x-1, y+1, width, value);
+
+  v = img_data[r1b + 1];    // (x+1,y+1)
+  if (v == old_value) 
+    iFillValue(img_data, x+1, y+1, width, value);
+}
+
+static inline int iCheckFalseMaximum(int r, int r2a, int r2b, int width, float *src_data) 
+{
+  /* we are ignoring valeys of 1 pixel. */
+  /* this is not 100% fail proof */
+  float v;
+  float maxv = src_data[r];  // (x,y)
+  int r1a = r - width;
+  int r1b = r + width;
+
+  v = src_data[r2a - 1];    // (x-1,y-2)
+  if (v > maxv) return 1;
+
+  v = src_data[r2a];        // (x,y-2)
+  if (v > maxv) return 1;
+
+  v = src_data[r2a + 1];    // (x+1,y-2)
+  if (v > maxv) return 1;
+
+  v = src_data[r2b - 1];    // (x-1,y+2)
+  if (v > maxv) return 1;
+
+  v = src_data[r2b];        // (x,y+2)
+  if (v > maxv) return 1;
+
+  v = src_data[r2b + 1];    // (x+1,y+2)
+  if (v > maxv) return 1;
+
+
+  v = src_data[r2b - 2];    // (x-2,y+2)
+  if (v > maxv) return 1;
+
+  v = src_data[r1b - 2];    // (x-2,y+1)
+  if (v > maxv) return 1;
+
+  v = src_data[r - 2];      // (x-2,y)
+  if (v > maxv) return 1;
+
+  v = src_data[r1a - 2];    // (x-2,y-1)
+  if (v > maxv) return 1;
+
+  v = src_data[r2a - 2];    // (x-2,y-2)
+  if (v > maxv) return 1;
+
+
+  v = src_data[r2a + 2];    // (x+2,y-2)
+  if (v > maxv) return 1;
+
+  v = src_data[r1a + 2];    // (x+2,y-1)
+  if (v > maxv) return 1;
+
+  v = src_data[r + 2];      // (x+2,y)
+  if (v > maxv) return 1;
+
+  v = src_data[r1b + 2];    // (x+2,y+1)
+  if (v > maxv) return 1;
+
+  v = src_data[r2b + 2];    // (x+2,y+2)
+  if (v > maxv) return 1;
+
+  return 0;
+}
+
+static inline void iCheckMaximum(int r, int r1a, int r1b, float *src_data, imbyte* dst_data) 
+{
+  int unique = 1;
+  float v;
+  float maxv = src_data[r];  // (x,y)
+
+  v = src_data[r1a];        // (x,y-1)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r - 1];      // (x-1,y)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r1a - 1];    // (x-1,y-1)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r1a + 1];    // (x+1,y-1)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r + 1];      // (x+1,y)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r1b];        // (x,y+1)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r1b - 1];    // (x-1,y+1)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  v = src_data[r1b + 1];    // (x+1,y+1)
+  if (v >= maxv) { maxv = v; unique = 0; }
+
+  if (src_data[r] < maxv)   // not a maximum
+    dst_data[r] = 0;
+  else
+  {
+    if (unique)            // unique maximum
+      dst_data[r] = 1;
+    else                   // can be maximum
+      dst_data[r] = 2;
+  }
+}
+
+void imProcessRegionalMaximum(const imImage* src_image, imImage* dst_image)
+{
+  int i, x, y, offset, offsetA, offsetB,
+    width = src_image->width,
+    height = src_image->height;
+
+  float* src_data = (float*)src_image->data[0];
+  imbyte* dst_data = (imbyte*)dst_image->data[0];
+
+  for (y = 1; y < height-1; y++) 
+  {
+    offset = y * width + 1;
+    offsetA = offset - width;
+    offsetB = offset + width;
+
+    for (x = 1; x < width-1; x++) 
+    {
+      if (src_data[offset]) 
+        iCheckMaximum(offset, offsetA, offsetB, src_data, dst_data);
+
+      offset++;
+      offsetA++; 
+      offsetB++; 
+    }
+  }
+
+  // remove false maximum
+  for (y = 2; y < height-2; y++) 
+  {
+    offset = y * width + 2;
+    offsetA = offset - 2*width;
+    offsetB = offset + 2*width;
+
+    for (x = 2; x < width-2; x++) 
+    {
+      if (dst_data[offset] == 2)
+      {
+        if (iCheckFalseMaximum(offset, offsetA, offsetB, width, src_data))
+          iFillValue(dst_data, x, y, width, 0);
+      }
+
+      offset++;
+      offsetA++; 
+      offsetB++; 
+    }
+  }
+
+  // update destiny with remaining maximum
+  for (i = 0; i < src_image->count; i++) 
+  {
+    if (dst_data[i] == 2)
+      dst_data[i] = 1;
+  }
+}
diff --git a/src/process/im_effects.cpp b/src/process/im_effects.cpp
new file mode 100644
index 0000000..7f65ce6
--- /dev/null
+++ b/src/process/im_effects.cpp
@@ -0,0 +1,86 @@
+/** \file
+ * \brief Effects
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_effects.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+#include <im_complex.h>
+
+#include "im_process_pon.h"
+#include "im_math_op.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+static unsigned char BoxMean(imbyte *map, int offset, int shift, int hbox_size, int vbox_size)
+{
+  map += offset;
+  int acum = 0;
+  for (int i = 0; i < vbox_size; i++)
+  {
+    for (int j = 0; j < hbox_size; j++)
+    {
+      acum += *map++;
+    }
+
+    map += shift;
+  }
+
+  return (unsigned char)(acum / (vbox_size*hbox_size));
+}
+
+static void BoxSet(imbyte *map, int offset, int shift, int hbox_size, int vbox_size, unsigned char value)
+{
+  map += offset;
+  for (int i = 0; i < vbox_size; i++)
+  {
+    for (int j = 0; j < hbox_size; j++)
+    {
+      *map++ = value;
+    }
+
+    map += shift;
+  }
+}
+
+void imProcessPixelate(const imImage* src_image, imImage* dst_image, int box_size)
+{
+  int hbox = ((src_image->width + box_size-1)/ box_size);
+  int vbox = ((src_image->height + box_size-1)/ box_size);
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    imbyte *src_map=(imbyte*)src_image->data[i];
+    imbyte *dst_map=(imbyte*)dst_image->data[i];
+    int vbox_size = box_size;
+
+    for (int bv = 0; bv < vbox; bv++)
+    {
+      int bv_pos = bv*box_size;
+      if (bv == vbox-1) vbox_size = src_image->height - bv_pos;
+      int hbox_size = box_size;
+
+      for (int bh = 0; bh < hbox; bh++)
+      {
+        int bh_pos = bh*box_size;
+        if (bh == hbox-1) hbox_size = src_image->width - bh_pos;
+        int offset = bv_pos*src_image->width + bh_pos;
+        int shift = src_image->width - hbox_size;
+        unsigned char mean = BoxMean(src_map, offset, shift, hbox_size, vbox_size);
+        BoxSet(dst_map, offset, shift, hbox_size, vbox_size, mean);
+      }
+    }
+  }
+}
+
+void imProcessPosterize(const imImage* src_image, imImage* dst_image, int level)
+{
+  unsigned char mask = (unsigned char)(0xFF << level);
+  imProcessBitMask(src_image, dst_image, mask, IM_BIT_AND);
+}
+
diff --git a/src/process/im_fft.cpp b/src/process/im_fft.cpp
new file mode 100644
index 0000000..5ab1642
--- /dev/null
+++ b/src/process/im_fft.cpp
@@ -0,0 +1,218 @@
+/** \file
+ * \brief Fast Fourier Transform using FFTW library
+ *
+ * Comments only for FFTW 3:
+ *
+ * Where used only non optimal file for better portability.
+ * You must change the makefile to add other files.
+ *
+ * Duplicated files: buffered.c conf.c direct.c indirect.c generic.c 
+ *                   nop.c plan.c problem.c rader.c rank0.c rank-geq2.c
+ *                   vrank-geq1.c solve.c ct.c codlist.c
+ *    These were renamed to "r*" when in the rdft folder, and to "k*" when in the kernel folder.
+ * 
+ * New File:  api\config.h
+ *
+ * From the FTW manual:
+\verbatim
+    "FFTW is best at handling sizes of the form 2a 3b 5c 7d 11e 13f, 
+     where e+f is either 0 or 1, and the other exponents are arbitrary. 
+     Other sizes are computed by means of a slow, 
+     general-purpose algorithm (which nevertheless retains O(n log n)."
+\endverbatim
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_fft.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+#include <im.h>
+#include <im_util.h>
+#include <im_complex.h>
+#include <im_convert.h>
+
+#include "im_process.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <memory.h>
+
+#ifdef USE_FFTW3
+#include "fftw3.h"
+#else
+#include "fftw.h"
+#endif
+
+static void iCopyCol(imcfloat *map1, imcfloat *map2, int height, int width1, int width2)
+{
+  int i;
+  for(i = 0; i < height; i++)
+  {
+    *map1 = *map2;
+    map1 += width1;
+    map2 += width2;
+  }
+}
+
+static void iCenterFFT(imcfloat *map, int width, int height, int inverse)
+{
+  imcfloat *map1, *map2, *map3, *tmp;
+  int i, half1_width, half2_width, half1_height, half2_height;
+
+  if (inverse)
+  {
+    half1_width = width/2;
+    half1_height = height/2;
+
+    half2_width = (width+1)/2;
+    half2_height = (height+1)/2;
+  }
+  else
+  {
+    half1_width = (width+1)/2;
+    half1_height = (height+1)/2;
+
+    half2_width = width/2;
+    half2_height = height/2;
+  }
+
+  tmp = (imcfloat*)malloc(half1_width*sizeof(imcfloat));
+
+  map1 = map;
+  map2 = map + half1_width;
+  map3 = map + half2_width;
+  for(i = 0; i < height; i++)
+  {
+    memcpy(tmp, map1, half1_width*sizeof(imcfloat));
+    memcpy(map1, map2, half2_width*sizeof(imcfloat));
+    memcpy(map3, tmp, half1_width*sizeof(imcfloat));
+
+    map1 += width;
+    map2 += width;
+    map3 += width;
+  }
+
+  free(tmp);
+
+  tmp = (imcfloat*)malloc(half1_height*sizeof(imcfloat));
+
+  map1 = map;
+  map2 = map + half1_height*width;
+  map3 = map + half2_height*width;
+  for(i = 0; i < width; i++)
+  {
+    iCopyCol(tmp, map1, half1_height, 1, width);
+    iCopyCol(map1, map2, half2_height, width, width);
+    iCopyCol(map3, tmp, half1_height, width, 1);
+
+    map1++;
+    map2++;
+    map3++;
+  }
+
+  free(tmp);
+}
+
+static void iDoFFT(void *map, int width, int height, int inverse, int center, int normalize)
+{
+  if (inverse && center)
+    iCenterFFT((imcfloat*)map, width, height, inverse);
+
+#ifdef USE_FFTW3
+  fftwf_plan plan = fftwf_plan_dft_2d(height, width, 
+                      (fftwf_complex*)map, (fftwf_complex*)map, // in-place transform
+                      inverse?FFTW_BACKWARD:FFTW_FORWARD, FFTW_ESTIMATE);
+  fftwf_execute(plan);
+  fftwf_destroy_plan(plan);
+#else
+  fftwnd_plan plan = fftw2d_create_plan(height, width, inverse?FFTW_BACKWARD:FFTW_FORWARD, FFTW_ESTIMATE|FFTW_IN_PLACE);
+  fftwnd(plan, 1, (FFTW_COMPLEX*)map, 1, 0, 0, 0, 0);
+  fftwnd_destroy_plan(plan);
+#endif
+
+  if (!inverse && center)
+    iCenterFFT((imcfloat*)map, width, height, inverse);
+
+  if (normalize)
+  {
+    float NM = (float)(width * height);
+    int count = (int)(2*NM);
+
+    if (normalize == 1)
+      NM = (float)sqrt(NM);
+
+    float *fmap = (float*)map;
+    for (int i = 0; i < count; i++)
+      *fmap++ /= NM;
+  }
+}
+
+void imProcessSwapQuadrants(imImage* image, int inverse)
+{
+  for (int i = 0; i < image->depth; i++)
+    iCenterFFT((imcfloat*)image->data[i], image->width, image->height, inverse);
+}
+
+void imProcessFFTraw(imImage* image, int inverse, int center, int normalize)
+{
+  for (int i = 0; i < image->depth; i++)
+    iDoFFT(image->data[i], image->width, image->height, inverse, center, normalize);
+}
+
+void imProcessFFT(const imImage* src_image, imImage* dst_image)
+{
+  if (src_image->data_type != IM_CFLOAT)
+    imConvertDataType(src_image, dst_image, 0, 0, 0, 0);
+  else
+    imImageCopy(src_image, dst_image);
+
+  imProcessFFTraw(dst_image, 0, 1, 0); // forward, centered, unnormalized
+}
+
+void imProcessIFFT(const imImage* src_image, imImage* dst_image)
+{
+  imImageCopy(src_image, dst_image);
+
+  imProcessFFTraw(dst_image, 1, 1, 2); // inverse, uncentered, double normalized
+}
+
+void imProcessCrossCorrelation(const imImage* src_image1, const imImage* src_image2, imImage* dst_image)
+{
+  imImage *tmp_image = imImageCreate(src_image2->width, src_image2->height, src_image2->color_space, IM_CFLOAT);
+  if (!tmp_image) 
+    return;
+
+  if (src_image2->data_type != IM_CFLOAT)
+    imConvertDataType(src_image2, tmp_image, 0, 0, 0, 0);
+  else
+    imImageCopy(src_image2, tmp_image);
+
+  if (src_image1->data_type != IM_CFLOAT)
+    imConvertDataType(src_image1, dst_image, 0, 0, 0, 0);
+  else
+    imImageCopy(src_image1, dst_image);
+
+  imProcessFFTraw(tmp_image, 0, 1, 1);   // forward, centered, normalized
+  imProcessFFTraw(dst_image, 0, 1, 1);
+
+  imProcessMultiplyConj(dst_image, tmp_image, dst_image);
+
+  imProcessFFTraw(dst_image, 1, 1, 1);   // inverse, uncentered, normalized
+  imProcessSwapQuadrants(dst_image, 0);  // from origin to center
+
+  imImageDestroy(tmp_image);
+}
+
+void imProcessAutoCorrelation(const imImage* src_image, imImage* dst_image)
+{
+  if (src_image->data_type != IM_CFLOAT)
+    imConvertDataType(src_image, dst_image, 0, 0, 0, 0);
+  else
+    imImageCopy(src_image, dst_image);
+
+  imProcessFFTraw(dst_image, 0, 0, 1);   // forward, at origin, normalized
+
+  imProcessMultiplyConj(dst_image, dst_image, dst_image);
+
+  imProcessFFTraw(dst_image, 1, 0, 1);   // inverse, at origin, normalized
+  imProcessSwapQuadrants(dst_image, 0);  // from origin to center
+}
diff --git a/src/process/im_geometric.cpp b/src/process/im_geometric.cpp
new file mode 100644
index 0000000..a0b5129
--- /dev/null
+++ b/src/process/im_geometric.cpp
@@ -0,0 +1,724 @@
+/** \file
+ * \brief Geometric Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_geometric.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_counter.h>
+
+#include "im_process_loc.h"
+#include "im_math_op.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+static inline void imRect2Polar(float x, float y, float *radius, float *theta)
+{
+  *radius = sqrtf(x*x + y*y);
+  *theta = atan2f(y, x);
+}
+
+static inline void imPolar2Rect(float radius, float theta, float *x, float *y)
+{
+  *x = radius * cosf(theta);
+  *y = radius * sinf(theta);
+}
+
+static inline void swirl_invtransf(int x, int y, float *xl, float *yl, float k, float xc, float yc)
+{
+  float radius, theta;
+  x -= (int)xc;
+  y -= (int)yc;
+
+  imRect2Polar((float)x, (float)y, &radius, &theta);
+
+  theta += k * radius;
+
+  imPolar2Rect(radius, theta, xl, yl);
+
+  *xl += xc;
+  *yl += yc;
+}
+
+template <class DT, class DTU> 
+static int Swirl(int width, int height, DT *src_map, DT *dst_map, 
+                         float k, int counter, DTU Dummy, int order)
+{
+  float xl, yl;
+  float xc = float(width/2.);
+  float yc = float(height/2.);
+         
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      swirl_invtransf(x, y, &xl, &yl, k, xc, yc);
+                   
+      // if inside the original image broad area
+      if (xl > 0.0 && yl > 0.0 && xl < width && yl < height)
+      {
+        if (order == 1)
+          *dst_map = imBilinearInterpolation(width, height, src_map, xl, yl);
+        else if (order == 3)
+          *dst_map = imBicubicInterpolation(width, height, src_map, xl, yl, Dummy);
+        else
+          *dst_map = imZeroOrderInterpolation(width, height, src_map, xl, yl);
+      }
+
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+static inline void radial_invtransf(int x, int y, float *xl, float *yl, float k1, float xc, float yc)
+{
+  float aux;
+  x -= (int)xc;
+  y -= (int)yc;
+  aux = 1.0f + k1*(x*x + y*y);
+  *xl = x*aux + xc;
+  *yl = y*aux + yc;
+}
+
+template <class DT, class DTU> 
+static int Radial(int width, int height, DT *src_map, DT *dst_map, 
+                         float k1, int counter, DTU Dummy, int order)
+{
+  float xl, yl;
+  float xc = float(width/2.);
+  float yc = float(height/2.);
+  int diag = (int)sqrt(float(width*width + height*height));
+
+  k1 /= (diag * diag);
+         
+  for (int y = 0; y < height; y++)
+  {
+    for (int x = 0; x < width; x++)
+    {
+      radial_invtransf(x, y, &xl, &yl, k1, xc, yc);
+                   
+      // if inside the original image broad area
+      if (xl > 0.0 && yl > 0.0 && xl < width && yl < height)
+      {
+        if (order == 1)
+          *dst_map = imBilinearInterpolation(width, height, src_map, xl, yl);
+        else if (order == 3)
+          *dst_map = imBicubicInterpolation(width, height, src_map, xl, yl, Dummy);
+        else
+          *dst_map = imZeroOrderInterpolation(width, height, src_map, xl, yl);
+      }
+
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+//*******************************************************************************************
+//rotate_invtransf
+//   shift the center to the origin of the destiny image
+//   rotates centrered in the origin
+//   shift the origin back to the center of the original image
+//*******************************************************************************************
+
+inline void rotate_invtransf(int x, int y, float *xl, float *yl, double cos0, double sin0, float dcx, float dcy, float scx, float scy)
+{
+  double xr = x+0.5 - dcx;
+  double yr = y+0.5 - dcy;
+  *xl = float(xr * cos0 - yr * sin0 + scx);
+  *yl = float(xr * sin0 + yr * cos0 + scy);
+}
+
+template <class DT, class DTU> 
+static int RotateCenter(int src_width, int src_height, DT *src_map, 
+                        int dst_width, int dst_height, DT *dst_map, 
+                        double cos0, double sin0, int counter, DTU Dummy, int order)
+{
+  float xl, yl;
+  float dcx = float(dst_width/2.);
+  float dcy = float(dst_height/2.);
+  float scx = float(src_width/2.);
+  float scy = float(src_height/2.);
+
+  for (int y = 0; y < dst_height; y++)
+  {
+    for (int x = 0; x < dst_width; x++)
+    {
+      rotate_invtransf(x, y, &xl, &yl, cos0, sin0, dcx, dcy, scx, scy);
+                   
+      // if inside the original image broad area
+      if (xl > 0.0 && yl > 0.0 && xl < src_width && yl < src_height)
+      {
+        if (order == 1)
+          *dst_map = imBilinearInterpolation(src_width, src_height, src_map, xl, yl);
+        else if (order == 3)
+          *dst_map = imBicubicInterpolation(src_width, src_height, src_map, xl, yl, Dummy);
+        else
+          *dst_map = imZeroOrderInterpolation(src_width, src_height, src_map, xl, yl);
+      }
+
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+template <class DT, class DTU> 
+static int Rotate(int src_width, int src_height, DT *src_map, 
+                  int dst_width, int dst_height, DT *dst_map, 
+                  double cos0, double sin0, int ref_x, int ref_y, int to_origin, 
+                  int counter, DTU Dummy, int order)
+{
+  float xl, yl;
+  float sx = float(ref_x);
+  float sy = float(ref_y);
+  float dx = sx;
+  float dy = sy;
+  if (to_origin)
+  {
+    dx = 0;
+    dy = 0;
+  }
+
+  for (int y = 0; y < dst_height; y++)
+  {
+    for (int x = 0; x < dst_width; x++)
+    {
+      rotate_invtransf(x, y, &xl, &yl, cos0, sin0, dx, dy, sx, sy);
+                   
+      // if inside the original image broad area
+      if (xl > 0.0 && yl > 0.0 && xl < src_width && yl < src_height)
+      {
+        if (order == 1)
+          *dst_map = imBilinearInterpolation(src_width, src_height, src_map, xl, yl);
+        else if (order == 3)
+          *dst_map = imBicubicInterpolation(src_width, src_height, src_map, xl, yl, Dummy);
+        else
+          *dst_map = imZeroOrderInterpolation(src_width, src_height, src_map, xl, yl);
+      }
+
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+template <class DT> 
+static void Rotate90(int src_width, 
+                   int src_height, 
+                   DT *src_map, 
+                   int dst_width,
+                   int dst_height,
+                   DT *dst_map, 
+                   int dir)
+{
+  int xd,yd,x,y;
+
+  if (dir == 1)
+    xd = 0;
+  else
+    xd = dst_width - 1;
+
+  for(y = 0 ; y < src_height ; y++)
+  {
+    if (dir == 1)
+      yd = dst_height - 1;
+    else
+      yd = 0;
+
+    for(x = 0 ; x < src_width ; x++)
+    {
+      dst_map[yd * dst_width + xd] = src_map[y * src_width + x];
+
+      if (dir == 1)
+        yd--;
+      else
+        yd++;
+    }        
+
+    if (dir == 1)
+      xd++;
+    else
+      xd--;
+  }
+}
+
+template <class DT> 
+static void Rotate180(int src_width, 
+                   int src_height, 
+                   DT *src_map, 
+                   int dst_width,
+                   int dst_height,
+                   DT *dst_map)
+{
+  int xd,yd,x,y;
+
+  yd = dst_height - 1;
+
+  for(y = 0 ; y < src_height ; y++)
+  {
+    xd = dst_width - 1;
+
+    for(x = 0 ; x < src_width ; x++)
+    {
+      dst_map[yd * dst_width + xd] = src_map[y * src_width + x];
+      xd--;
+    }        
+
+    yd--;
+  }
+}
+
+template <class DT> 
+static void Mirror(int src_width, 
+                   int src_height, 
+                   DT *src_map, 
+                   int dst_width,
+                   int dst_height,
+                   DT *dst_map)
+{
+  int xd,x,y;
+  (void)dst_height;
+
+  if (src_map == dst_map) // check of in-place operation
+  {
+    int half_width = src_width/2;
+    for(y = 0 ; y < src_height; y++)
+    {
+      xd = dst_width - 1;
+
+      for(x = 0 ; x < half_width; x++)
+      {
+        DT temp_value = src_map[y * dst_width + xd];
+        src_map[y * dst_width + xd] = src_map[y * src_width + x];
+        src_map[y * src_width + x] = temp_value;
+        xd--;
+      }        
+    }
+  }
+  else
+  {
+    for(y = 0 ; y < src_height; y++)
+    {
+      xd = dst_width - 1;
+
+      for(x = 0 ; x < src_width; x++)
+      {
+        dst_map[y * dst_width + xd] = src_map[y * src_width + x];
+        xd--;
+      }        
+    }
+  }
+}
+
+template <class DT> 
+static void Flip(int src_width, 
+                   int src_height, 
+                   DT *src_map, 
+                   int dst_width,
+                   int dst_height,
+                   DT *dst_map)
+{
+  int yd,y;
+
+  yd = dst_height - 1;
+
+  if (src_map == dst_map) // check of in-place operation
+  {
+    DT* temp_line = (DT*)malloc(src_width*sizeof(DT));
+    int half_height = src_height/2;
+
+    for(y = 0 ; y < half_height; y++)
+    {
+      memcpy(temp_line, dst_map+yd*dst_width, src_width * sizeof(DT));
+      memcpy(dst_map+yd*dst_width, src_map+y*src_width, src_width * sizeof(DT));
+      memcpy(src_map+y*src_width, temp_line,src_width * sizeof(DT));
+      yd--;
+    }
+
+    free(temp_line);
+  }
+  else
+  {
+    for(y = 0 ; y < src_height; y++)
+    {
+      memcpy(dst_map+yd*dst_width,src_map+y*src_width,src_width * sizeof(DT));
+      yd--;
+    }
+  }
+}
+
+template <class DT> 
+static void InterlaceSplit(int src_width, 
+                   int src_height, 
+                   DT *src_map, 
+                   int dst_width,
+                   DT *dst_map1,
+                   DT *dst_map2)
+{
+  int yd = 0, y;
+
+  for(y = 0; y < src_height; y++)
+  {
+    if (y%2)
+    {
+      memcpy(dst_map2+yd*dst_width, src_map+y*src_width, src_width * sizeof(DT));
+      yd++;  // increment only when odd
+    }
+    else
+      memcpy(dst_map1+yd*dst_width, src_map+y*src_width, src_width * sizeof(DT));
+  }
+}
+
+void imProcessRotate90(const imImage* src_image, imImage* dst_image, int dir)
+{
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      Rotate90(src_image->width, src_image->height, (imbyte*)src_image->data[i],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[i], dir);
+      break;
+    case IM_USHORT:
+      Rotate90(src_image->width, src_image->height, (imushort*)src_image->data[i],  dst_image->width, dst_image->height, (imushort*)dst_image->data[i], dir);
+      break;
+    case IM_INT:
+      Rotate90(src_image->width, src_image->height, (int*)src_image->data[i],  dst_image->width, dst_image->height, (int*)dst_image->data[i], dir);
+      break;
+    case IM_FLOAT:
+      Rotate90(src_image->width, src_image->height, (float*)src_image->data[i],  dst_image->width, dst_image->height, (float*)dst_image->data[i], dir);
+      break;
+    case IM_CFLOAT:
+      Rotate90(src_image->width, src_image->height, (imcfloat*)src_image->data[i],  dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i], dir);
+      break;
+    }
+  }
+}
+
+void imProcessRotate180(const imImage* src_image, imImage* dst_image)
+{
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      Rotate180(src_image->width, src_image->height, (imbyte*)src_image->data[i],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[i]);
+      break;
+    case IM_USHORT:
+      Rotate180(src_image->width, src_image->height, (imushort*)src_image->data[i],  dst_image->width, dst_image->height, (imushort*)dst_image->data[i]);
+      break;
+    case IM_INT:
+      Rotate180(src_image->width, src_image->height, (int*)src_image->data[i],  dst_image->width, dst_image->height, (int*)dst_image->data[i]);
+      break;
+    case IM_FLOAT:
+      Rotate180(src_image->width, src_image->height, (float*)src_image->data[i],  dst_image->width, dst_image->height, (float*)dst_image->data[i]);
+      break;
+    case IM_CFLOAT:
+      Rotate180(src_image->width, src_image->height, (imcfloat*)src_image->data[i],  dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i]);
+      break;
+    }
+  }
+}
+
+int imProcessRadial(const imImage* src_image, imImage* dst_image, float k1, int order)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("Radial Distort");
+  imCounterTotal(counter, dst_image->depth*dst_image->height, "Processing...");
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = Radial(src_image->width, src_image->height, (imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], k1, counter, float(0), order);
+      break;
+    case IM_USHORT:
+      ret = Radial(src_image->width, src_image->height, (imushort*)src_image->data[i], (imushort*)dst_image->data[i], k1, counter, float(0), order);
+      break;
+    case IM_INT:
+      ret = Radial(src_image->width, src_image->height, (int*)src_image->data[i], (int*)dst_image->data[i], k1, counter, float(0), order);
+      break;
+    case IM_FLOAT:
+      ret = Radial(src_image->width, src_image->height, (float*)src_image->data[i], (float*)dst_image->data[i], k1, counter, float(0), order);
+      break;
+    case IM_CFLOAT:
+      ret = Radial(src_image->width, src_image->height, (imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], k1, counter, imcfloat(0,0), order);
+      break;
+    }
+
+    if (!ret)
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+int imProcessSwirl(const imImage* src_image, imImage* dst_image, float k, int order)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("Swirl Distort");
+  imCounterTotal(counter, dst_image->depth*dst_image->height, "Processing...");
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = Swirl(src_image->width, src_image->height, (imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], k, counter, float(0), order);
+      break;
+    case IM_USHORT:
+      ret = Swirl(src_image->width, src_image->height, (imushort*)src_image->data[i], (imushort*)dst_image->data[i], k, counter, float(0), order);
+      break;
+    case IM_INT:
+      ret = Swirl(src_image->width, src_image->height, (int*)src_image->data[i], (int*)dst_image->data[i], k, counter, float(0), order);
+      break;
+    case IM_FLOAT:
+      ret = Swirl(src_image->width, src_image->height, (float*)src_image->data[i], (float*)dst_image->data[i], k, counter, float(0), order);
+      break;
+    case IM_CFLOAT:
+      ret = Swirl(src_image->width, src_image->height, (imcfloat*)src_image->data[i], (imcfloat*)dst_image->data[i], k, counter, imcfloat(0,0), order);
+      break;
+    }
+
+    if (!ret)
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+//*******************************************************************************************
+//rotate_transf
+//   In this case shift to the origin, rotate, but do NOT shift back
+//*******************************************************************************************
+
+static void rotate_transf(float cx, float cy, int x, int y, float *xl, float *yl, double cos0, double sin0)
+{
+  double xr = x+0.5 - cx;
+  double yr = y+0.5 - cy;
+  *xl = float( xr*cos0 + yr*sin0);
+  *yl = float(-xr*sin0 + yr*cos0);
+}
+
+void imProcessCalcRotateSize(int width, int height, int *new_width, int *new_height, double cos0, double sin0)
+{
+  float xl, yl, xmin, xmax, ymin, ymax;
+  float wd2 = float(width)/2;
+  float hd2 = float(height)/2;
+
+  rotate_transf(wd2, hd2, 0, 0, &xl, &yl, cos0, sin0);
+  xmin = xl; ymin = yl;
+  xmax = xl; ymax = yl;
+
+  rotate_transf(wd2, hd2, width-1, height-1, &xl, &yl, cos0, sin0);
+  xmin = min_op(xmin, xl); ymin = min_op(ymin, yl);
+  xmax = max_op(xmax, xl); ymax = max_op(ymax, yl);
+
+  rotate_transf(wd2, hd2, 0, height-1, &xl, &yl, cos0, sin0);
+  xmin = min_op(xmin, xl); ymin = min_op(ymin, yl);
+  xmax = max_op(xmax, xl); ymax = max_op(ymax, yl);
+
+  rotate_transf(wd2, hd2, width-1, 0, &xl, &yl, cos0, sin0);
+  xmin = min_op(xmin, xl); ymin = min_op(ymin, yl);
+  xmax = max_op(xmax, xl); ymax = max_op(ymax, yl);
+
+  *new_width = (int)(xmax - xmin + 2.0);
+  *new_height = (int)(ymax - ymin + 2.0);
+}
+
+int imProcessRotate(const imImage* src_image, imImage* dst_image, double cos0, double sin0, int order)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("Rotate");
+  imCounterTotal(counter, dst_image->depth*dst_image->height, "Processing...");
+
+  if (src_image->color_space == IM_MAP)
+  {
+    ret = RotateCenter(src_image->width, src_image->height, (imbyte*)src_image->data[0],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[0], cos0, sin0, counter, float(0), 0);
+  }
+  else
+  {
+     for (int i = 0; i < src_image->depth; i++)
+    {
+      switch(src_image->data_type)
+      {
+      case IM_BYTE:
+        ret = RotateCenter(src_image->width, src_image->height, (imbyte*)src_image->data[i], dst_image->width, dst_image->height, (imbyte*)dst_image->data[i], cos0, sin0, counter, float(0), order);
+        break;
+      case IM_USHORT:
+        ret = RotateCenter(src_image->width, src_image->height, (imushort*)src_image->data[i], dst_image->width, dst_image->height, (imushort*)dst_image->data[i], cos0, sin0, counter, float(0), order);
+        break;
+      case IM_INT:
+        ret = RotateCenter(src_image->width, src_image->height, (int*)src_image->data[i], dst_image->width, dst_image->height, (int*)dst_image->data[i], cos0, sin0, counter, float(0), order);
+        break;
+      case IM_FLOAT:
+        ret = RotateCenter(src_image->width, src_image->height, (float*)src_image->data[i], dst_image->width, dst_image->height, (float*)dst_image->data[i], cos0, sin0, counter, float(0), order);
+        break;
+      case IM_CFLOAT:
+        ret = RotateCenter(src_image->width, src_image->height, (imcfloat*)src_image->data[i], dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i], cos0, sin0, counter, imcfloat(0,0), order);
+        break;
+      }
+
+      if (!ret)
+        break;
+    }
+   }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+int imProcessRotateRef(const imImage* src_image, imImage* dst_image, double cos0, double sin0, int x, int y, int to_origin, int order)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("RotateRef");
+  imCounterTotal(counter, dst_image->depth*dst_image->height, "Processing...");
+
+  if (src_image->color_space == IM_MAP)
+  {
+    ret = Rotate(src_image->width, src_image->height, (imbyte*)src_image->data[0],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[0], cos0, sin0, x, y, to_origin, counter, float(0), 0);
+  }
+  else
+  {
+     for (int i = 0; i < src_image->depth; i++)
+    {
+      switch(src_image->data_type)
+      {
+      case IM_BYTE:
+        ret = Rotate(src_image->width, src_image->height, (imbyte*)src_image->data[i], dst_image->width, dst_image->height, (imbyte*)dst_image->data[i], cos0, sin0, x, y, to_origin, counter, float(0), order);
+        break;
+      case IM_USHORT:
+        ret = Rotate(src_image->width, src_image->height, (imushort*)src_image->data[i], dst_image->width, dst_image->height, (imushort*)dst_image->data[i], cos0, sin0, x, y, to_origin, counter, float(0), order);
+        break;
+      case IM_INT:
+        ret = Rotate(src_image->width, src_image->height, (int*)src_image->data[i], dst_image->width, dst_image->height, (int*)dst_image->data[i], cos0, sin0, x, y, to_origin, counter, float(0), order);
+        break;
+      case IM_FLOAT:
+        ret = Rotate(src_image->width, src_image->height, (float*)src_image->data[i], dst_image->width, dst_image->height, (float*)dst_image->data[i], cos0, sin0, x, y, to_origin, counter, float(0), order);
+        break;
+      case IM_CFLOAT:
+        ret = Rotate(src_image->width, src_image->height, (imcfloat*)src_image->data[i], dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i], cos0, sin0, x, y, to_origin, counter, imcfloat(0,0), order);
+        break;
+      }
+
+      if (!ret)
+        break;
+    }
+   }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+void imProcessMirror(const imImage* src_image, imImage* dst_image)
+{
+  int i;
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      Mirror(src_image->width, src_image->height, (imbyte*)src_image->data[i],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[i]);
+      break;
+    case IM_USHORT:
+      Mirror(src_image->width, src_image->height, (imushort*)src_image->data[i],  dst_image->width, dst_image->height, (imushort*)dst_image->data[i]);
+      break;
+    case IM_INT:
+      Mirror(src_image->width, src_image->height, (int*)src_image->data[i],  dst_image->width, dst_image->height, (int*)dst_image->data[i]);
+      break;
+    case IM_FLOAT:
+      Mirror(src_image->width, src_image->height, (float*)src_image->data[i],  dst_image->width, dst_image->height, (float*)dst_image->data[i]);
+      break;
+    case IM_CFLOAT:
+      Mirror(src_image->width, src_image->height, (imcfloat*)src_image->data[i],  dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i]);
+      break;
+    }
+  }
+}
+
+void imProcessFlip(const imImage* src_image, imImage* dst_image)
+{
+  int i;
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      Flip(src_image->width, src_image->height, (imbyte*)src_image->data[i],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[i]);
+      break;
+    case IM_USHORT:
+      Flip(src_image->width, src_image->height, (imushort*)src_image->data[i],  dst_image->width, dst_image->height, (imushort*)dst_image->data[i]);
+      break;
+    case IM_INT:
+      Flip(src_image->width, src_image->height, (int*)src_image->data[i],  dst_image->width, dst_image->height, (int*)dst_image->data[i]);
+      break;
+    case IM_FLOAT:
+      Flip(src_image->width, src_image->height, (float*)src_image->data[i],  dst_image->width, dst_image->height, (float*)dst_image->data[i]);
+      break;
+    case IM_CFLOAT:
+      Flip(src_image->width, src_image->height, (imcfloat*)src_image->data[i],  dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i]);
+      break;
+    }
+  }
+}
+
+void imProcessInterlaceSplit(const imImage* src_image, imImage* dst_image1, imImage* dst_image2)
+{
+  int i;
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      InterlaceSplit(src_image->width, src_image->height, (imbyte*)src_image->data[i],  dst_image1->width, (imbyte*)dst_image1->data[i], (imbyte*)dst_image2->data[i]);
+      break;
+    case IM_USHORT:
+      InterlaceSplit(src_image->width, src_image->height, (imushort*)src_image->data[i],  dst_image1->width, (imushort*)dst_image1->data[i], (imushort*)dst_image2->data[i]);
+      break;
+    case IM_INT:
+      InterlaceSplit(src_image->width, src_image->height, (int*)src_image->data[i],  dst_image1->width, (int*)dst_image1->data[i], (int*)dst_image2->data[i]);
+      break;
+    case IM_FLOAT:
+      InterlaceSplit(src_image->width, src_image->height, (float*)src_image->data[i],  dst_image1->width, (float*)dst_image1->data[i], (float*)dst_image2->data[i]);
+      break;
+    case IM_CFLOAT:
+      InterlaceSplit(src_image->width, src_image->height, (imcfloat*)src_image->data[i],  dst_image1->width, (imcfloat*)dst_image1->data[i], (imcfloat*)dst_image2->data[i]);
+      break;
+    }
+  }
+}
diff --git a/src/process/im_histogram.cpp b/src/process/im_histogram.cpp
new file mode 100644
index 0000000..e6796fe
--- /dev/null
+++ b/src/process/im_histogram.cpp
@@ -0,0 +1,105 @@
+/** \file
+ * \brief Histogram Based Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_histogram.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+
+#include "im_process_pon.h"
+#include "im_process_ana.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+static void iExpandHistogram(const imImage* src_image, imImage* dst_image, int low_level, int high_level)
+{
+  int i, value;
+
+  imbyte re_map[256];
+  memset(re_map, 0, 256);
+
+  int range = high_level-low_level+1;
+  float factor = 256.0f / (float)range;
+
+  for (i = 0; i < 256; i++)
+  {             
+    if (i < low_level)
+      re_map[i] = 0;
+    else if (i > high_level)
+      re_map[i] = 255;
+    else
+    {
+      value = imResample(i - low_level, factor);
+      re_map[i] = (imbyte)IM_BYTECROP(value);
+    }
+  }
+
+  imbyte* dst_map = (imbyte*)dst_image->data[0];
+  imbyte* src_map = (imbyte*)src_image->data[0];
+  int total_count = src_image->count*src_image->depth;
+  for (i = 0; i < total_count; i++)
+    dst_map[i] = re_map[src_map[i]];
+}
+
+void imProcessExpandHistogram(const imImage* src_image, imImage* dst_image, float percent)
+{
+  unsigned long histo[256];
+  imCalcGrayHistogram(src_image, histo, 0);
+
+  unsigned long acum, cut = (unsigned long)((src_image->count * percent) / 100.0f);
+  int low_level, high_level;
+
+  acum = 0;
+  for (low_level = 0; low_level < 256; low_level++)
+  {  
+    acum += histo[low_level];
+    if (acum > cut)
+      break;
+  }
+
+  acum = 0;
+  for (high_level = 255; high_level > 0; high_level--)
+  {  
+    acum += histo[high_level];
+    if (acum > cut)
+      break;
+  }
+
+  if (low_level >= high_level)
+  {
+    low_level = 0;
+    high_level = 255;
+  }
+
+  iExpandHistogram(src_image, dst_image, low_level, high_level);
+}
+
+void imProcessEqualizeHistogram(const imImage* src_image, imImage* dst_image)
+{
+  int i, value;
+
+  imbyte re_map[256];
+  memset(re_map, 0, 256);
+
+  unsigned long histo[256];
+  imCalcGrayHistogram(src_image, histo, 1);
+
+  float factor = 256.0f / (float)src_image->count;
+
+  for (i = 0; i < 256; i++)
+  {             
+    value = imResample(histo[i], factor);
+    re_map[i] = (imbyte)IM_BYTECROP(value);
+  }
+
+  imbyte* dst_map = (imbyte*)dst_image->data[0];
+  imbyte* src_map = (imbyte*)src_image->data[0];
+  int total_count = src_image->count*src_image->depth;
+  for (i = 0; i < total_count; i++)
+    dst_map[i] = re_map[src_map[i]];
+}
diff --git a/src/process/im_houghline.cpp b/src/process/im_houghline.cpp
new file mode 100644
index 0000000..6ead982
--- /dev/null
+++ b/src/process/im_houghline.cpp
@@ -0,0 +1,435 @@
+/** \file
+ * \brief Hough Transform
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_houghline.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+#include <im.h>
+#include <im_util.h>
+#include <im_complex.h>
+#include <im_convert.h>
+#include <im_counter.h>
+
+#include "im_process_glo.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+static double *costab=NULL, *sintab=NULL;
+
+static int hgAbs(int x)
+{
+  return x < 0? -x: x;
+}
+
+typedef struct _point
+{
+  int rho, theta, count;
+} point;
+
+typedef struct _listnode
+{
+  struct _listnode *next;
+  point pt;
+} listnode;
+
+static listnode* listnew(point *pt)
+{
+  listnode* node = (listnode*)malloc(sizeof(listnode));
+  node->next = NULL;
+  node->pt = *pt;
+  return node;
+}
+
+static listnode* listadd(listnode* node, point *pt)
+{
+  node->next = listnew(pt);
+  return node->next;
+}
+
+/* minimum angle to match similar angles */
+#define THETA_DELTA1   0.05  /* radians */
+#define THETA_DELTA2   3     /* degrees */
+
+static int ptNear(point* pt1, point* pt2, int rho_delta)
+{
+  int theta_diff = hgAbs(pt1->theta - pt2->theta);
+  if ((hgAbs(pt1->rho - pt2->rho) < rho_delta && theta_diff < THETA_DELTA2) ||
+      (hgAbs(pt1->rho + pt2->rho) < rho_delta && 180-theta_diff < THETA_DELTA2))
+  {
+    if (pt2->count > pt1->count)
+      return 2;   /* replace the line */
+    else
+      return 1;
+  }
+  else
+    return 0;
+}
+
+static listnode* listadd_filtered(listnode* list, listnode* cur_node, point *pt, int rho_delta)
+{
+  int ret;
+  listnode* lt = list;
+  while (lt)
+  {
+    ret = ptNear(&lt->pt, pt, rho_delta);
+    if (ret)
+    {
+      if (ret == 2)
+        lt->pt = *pt;  /* replace the line */
+      return cur_node;
+    }
+    lt = lt->next;
+  }
+
+  cur_node->next = listnew(pt);
+  return cur_node->next;
+}
+
+/*C* Initial version from XITE
+
+        houghLine
+        $Id: im_houghline.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+        Copyright 1990, Blab, UiO
+        Image processing lab, Department of Informatics
+        University of Oslo
+        E-mail: blab@ifi.uio.no
+________________________________________________________________
+
+  houghLine - Hough transform for line detection
+
+  Description:
+    Performs a Hough transform to detect lines. Every band in the
+    input image 'inimage' is transformed to a two dimensional
+    Hough space, a (theta, rho) space.
+
+		After creating the transform, the Hough space may be searched
+		for local maxima. Within each band, only the largest local
+		maximum (maxima) within a 'ws'x'ws' area is registered.
+		Besides, only maxima with number of updates above a limit
+		given by the ul option are used.
+
+		updateLimit determines the minimum number of updates for a maximum
+		to be used. The minimum number is determined from 'updateLimit'
+		and the size of the hough space image:
+		| updateLimit * MAX(horizontal size, vertical size)
+		Default: 0.1.
+
+    All pixels above zero in the 'input' band are
+		transformed to (theta,rho) space in the 'output'
+		band. The 'input' band may have any size, while
+		the 'output' band currently must be at least
+		| xsize: 180
+		| ysize: 2 * sqrt(inputXsize*inputXsize +
+		|             inputYsize*inputYsize) + 1
+
+		Notice that band x coordinates 1..180 correspond
+		to angles theta = 0 .. 179, and y coordinates
+		1..YSIZE correspond to rho = -(ysize/2) .. ysize/2.
+
+  Restrictions:
+    'input' must have pixel type imbyte.
+    'output' must have pixel type int.
+
+  Author:		Tor L�nnestad, BLAB, Ifi, UiO
+*/
+
+
+static int houghLine(const imImage* input, imImage* output, int counter)
+{
+  int ixsize, iysize, ixhalf, iyhalf, thetamax, x, y, rho, theta, rhomax;
+  imbyte *input_map = (imbyte*)input->data[0];
+  int *output_map = (int*)output->data[0];
+
+  ixsize = input->width;
+  iysize = input->height;
+  ixhalf = ixsize/2;
+  iyhalf = iysize/2;
+
+  thetamax = output->width;   /* theta max = 180 */
+  rhomax = output->height/2;  /* rho shift to 0, -rmax <= r <= +rmax */
+
+  costab = (double*)malloc(thetamax*sizeof(double));
+  sintab = (double*)malloc(thetamax*sizeof(double));
+
+  for (theta=0; theta < thetamax; theta++)
+  {
+    double th = (M_PI*theta)/thetamax;
+    costab[theta] = cos(th);
+    sintab[theta] = sin(th);
+  }
+
+  for (y=0; y < iysize; y++)
+  {
+    for (x=0; x < ixsize; x++)
+    {
+      if (input_map[y*ixsize + x])
+      {
+        for (theta=0; theta < thetamax; theta++)
+        {
+          rho = imRound((x-ixhalf)*costab[theta] + (y-iyhalf)*sintab[theta]);
+          if (rho > rhomax) continue;
+          if (rho < -rhomax) continue;
+          output_map[(rho+rhomax)*thetamax + theta]++;
+	      }
+      }
+    }
+
+    if (!imCounterInc(counter))
+    {
+      free(costab); costab = NULL;
+      free(sintab); sintab = NULL;
+      return 0;
+    }
+  }
+
+  free(costab); costab = NULL;
+  free(sintab); sintab = NULL;
+
+  return 1;
+}
+
+static listnode* findMaxima(const imImage* hough_points, int *line_count, const imImage* hough)
+{
+  int x, y, xsize, ysize, rhomax, offset, rho_delta = 0;
+  listnode* maxima = NULL, *cur_node = NULL;
+  point pt;
+  imbyte *map = (imbyte*)hough_points->data[0];
+  int *hough_map = NULL;
+
+  xsize = hough_points->width;   /* X = theta */
+  ysize = hough_points->height;  /* Y = rho   */
+  rhomax = ysize/2;
+  
+  if (hough)
+  {
+    hough_map = (int*)hough->data[0];
+    rho_delta = (int)(rhomax*tan(THETA_DELTA1));
+  }
+
+  for (y=0; y < ysize; y++)
+  {
+    for (x=0; x < xsize; x++)
+    {
+      offset = y*xsize + x;
+
+      if (map[offset])
+      {
+        pt.theta = x;
+        pt.rho = y-rhomax;
+
+        if (!maxima)
+        {
+          cur_node = maxima = listnew(&pt);
+          (*line_count)++;
+        }
+        else
+        {
+          if (hough_map)
+          {
+            listnode* old_node = cur_node;
+            pt.count = hough_map[offset];
+            cur_node = listadd_filtered(maxima, cur_node, &pt, rho_delta);
+            if (cur_node != old_node)
+              (*line_count)++;
+          }
+          else
+          {
+            cur_node = listadd(cur_node, &pt);
+            (*line_count)++;
+          }
+        }
+	    }
+    }
+  }
+
+  return maxima;
+}
+
+#define SWAPINT(a, b) {int t = a; a = b; b = t; }
+
+static void drawLine(imImage* image, int theta, int rho)
+{
+  int xsize, ysize, xstart, xstop, ystart, ystop, xhalf, yhalf;
+  float a, b;
+  imbyte *map = (imbyte*)image->data[0];
+
+  xsize = image->width;
+  ysize = image->height;
+  xhalf = xsize/2;
+  yhalf = ysize/2;
+
+  if (theta == 0)  /* vertical line */
+  {
+    int y;
+    if (rho+xhalf < 0 || rho+xhalf > xsize-1) return;
+    for (y=0; y < ysize; y++)
+      map[y*xsize + rho+xhalf]=254;
+
+    return;
+  }
+
+  if (theta == 90)  /* horizontal line */
+  {
+    int x;
+    if (rho+yhalf < 0 || rho+yhalf > ysize-1) return;
+    for (x=0; x < xsize; x++)
+      map[(rho+yhalf)*xsize + x]=254;
+
+    return;
+  }
+
+  a = (float)(-costab[theta]/sintab[theta]);
+  b = (float)((rho + xhalf*costab[theta] + yhalf*sintab[theta])/sintab[theta]);
+
+  {
+    int x[2];
+    int y[2];
+    int c = 0;
+    int y1 = imRound(b);              /* x = 0 */
+    int y2 = imRound(a*(xsize-1)+b);  /* x = xsize-1 */
+
+    int x1 = imRound(-b/a);           /* y = 0 */
+    int x2 = imRound((ysize-1-b)/a);  /* y = ysize-1 */
+
+    if (y1 >= 0 && y1 < ysize)
+    {
+      y[c] = y1;
+      x[c] = 0;
+      c++;
+    }
+
+    if (y2 >= 0 && y2 < ysize)
+    {
+      y[c] = y2;
+      x[c] = xsize-1;
+      c++;
+    }
+
+    if (c < 2 && x1 >= 0 && x1 < xsize)
+    {
+      x[c] = x1;
+      y[c] = 0;
+      c++;
+    }
+
+    if (c < 2 && x2 >= 0 && x2 < xsize)
+    {
+      x[c] = x2;
+      y[c] = ysize-1;
+      c++;
+    }
+
+    if (c < 2) return;
+
+    ystart = y[0];
+    xstart = x[0];
+    ystop = y[1];
+    xstop = x[1];
+  }
+
+  {
+    int x, y;
+    if (45 <= theta && theta <= 135)
+    {
+      if (xstart > xstop)
+        SWAPINT(xstart, xstop);
+
+      for (x=xstart; x <= xstop; x++)
+      {
+        y = imRound(a*x + b);
+        if (y < 0) continue;
+        if (y > ysize-1) continue;
+        map[y*xsize + x]=254;
+      }
+    }
+    else
+    {
+      if (ystart > ystop)
+        SWAPINT(ystart, ystop);
+
+      for (y=ystart; y <= ystop; y++)
+      {
+        x = imRound((y-b)/a);
+        if (x < 0) continue;
+        if (x > xsize-1) continue;
+        map[y*xsize + x]=254;
+      }
+    }
+  }
+}
+
+int imProcessHoughLines(const imImage* image, imImage *NewImage)
+{
+  int counter = imCounterBegin("Hough Line Transform");
+  imCounterTotal(counter, image->height, "Processing...");
+
+  int ret = houghLine(image, NewImage, counter);
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static void DrawPoints(imImage *image, listnode* maxima)
+{
+  listnode* cur_node;
+  while (maxima)
+  {
+    cur_node = maxima;
+    drawLine(image, cur_node->pt.theta, cur_node->pt.rho);
+    maxima = cur_node->next;
+    free(cur_node);
+  }
+}
+
+static void ReplaceColor(imImage* NewImage)
+{
+  int i;
+  imbyte* map = (imbyte*)NewImage->data[0];
+
+  NewImage->color_space = IM_MAP;
+  NewImage->palette[254] = imColorEncode(255, 0, 0);
+
+  for (i = 0; i < NewImage->count; i++)
+  {
+    if (map[i] == 254)
+      map[i] = 255;
+  }
+}
+
+int imProcessHoughLinesDraw(const imImage* original_image, const imImage *hough, const imImage *hough_points, imImage *NewImage)
+{
+  int theta, line_count = 0;
+
+  if (original_image != NewImage)
+    imImageCopyData(original_image, NewImage);
+
+  listnode* maxima = findMaxima(hough_points, &line_count, hough);
+
+  ReplaceColor(NewImage);
+
+  costab = (double*)malloc(180*sizeof(double));
+  sintab = (double*)malloc(180*sizeof(double));
+
+  for (theta=0; theta < 180; theta++)
+  {
+    double th = (M_PI*theta)/180.;
+    costab[theta] = cos(th);
+    sintab[theta] = sin(th);
+  }
+
+  DrawPoints(NewImage, maxima);
+
+  free(costab); costab = NULL;
+  free(sintab); sintab = NULL;
+
+  return line_count;
+}
+
diff --git a/src/process/im_kernel.cpp b/src/process/im_kernel.cpp
new file mode 100644
index 0000000..d5e976e
--- /dev/null
+++ b/src/process/im_kernel.cpp
@@ -0,0 +1,293 @@
+/** \file
+ * \brief Kernel Generators
+ * Creates several known kernels
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_kernel.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+#include "im.h"
+#include "im_util.h"
+#include "im_image.h"
+#include "im_kernel.h"
+
+#include <stdlib.h>
+#include <memory.h>
+#include <assert.h>
+#include <math.h>
+
+
+static imImage* iKernelCreate(int w, int h, int* data, char* desc)
+{
+  imImage* kernel = imImageCreate(w, h, IM_GRAY, IM_INT);
+  int* kernel_data = (int*)kernel->data[0];
+  memcpy(kernel_data, data, kernel->size);
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)desc);
+  return kernel;
+}
+
+imImage* imKernelSobel(void)
+{
+  int kernel_data[3*3] = {
+    -1, -2, -1,
+     0,  0,  0,
+     1,  2,  1
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Sobel");
+}
+
+imImage* imKernelPrewitt(void)
+{
+  int kernel_data[3*3] = {
+    -1, -1, -1,
+     0,  0,  0,
+     1,  1,  1
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Prewitt");
+}
+
+imImage* imKernelKirsh(void)
+{
+  int kernel_data[3*3] = {
+    -3, -3, -3,
+    -3,  0, -3,
+     5,  5,  5
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Kirsh");
+}
+
+imImage* imKernelLaplacian4(void)
+{
+  int kernel_data[3*3] = {
+     0, -1, 0,
+    -1,  4, -1,
+     0, -1, 0
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Laplacian4");
+}
+
+imImage* imKernelLaplacian8(void)
+{
+  int kernel_data[3*3] = {
+    -1, -1, -1,
+    -1,  8, -1,
+    -1, -1, -1
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Laplacian8");
+}
+
+imImage* imKernelLaplacian5x5(void)
+{
+  int kernel_data[5*5] = {
+     0, -1, -1, -1,  0,
+    -1,  0,  1,  0, -1,
+    -1,  1,  8,  1, -1,
+    -1,  0,  1,  0, -1,
+     0, -1, -1, -1,  0
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "Laplacian5x5");
+}
+
+imImage* imKernelLaplacian7x7(void)
+{
+  int kernel_data[7*7] = {
+    -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, 48, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1
+  };
+
+  return iKernelCreate(7, 7, kernel_data, "Laplacian7x7");
+}
+
+imImage* imKernelGradian3x3(void)
+{
+  int kernel_data[3*3] = {
+     0, -1, 0,
+     0,  1, 0,
+     0,  0, 0
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Gradian3x3");
+}
+
+imImage* imKernelGradian7x7(void)
+{
+  int kernel_data[7*7] = {
+     0, -1, -1,  0,  1,  1,  0,
+    -1, -2, -2,  0,  2,  2,  1,
+    -1, -2, -3,  0,  3,  2,  1,
+    -1, -2, -3,  0,  3,  2,  1,
+    -1, -2, -3,  0,  3,  2,  1,
+    -1, -2, -2,  0,  2,  2,  1,
+     0, -1, -1,  0,  1,  1,  0
+  };
+
+  return iKernelCreate(7, 7, kernel_data, "Gradian7x7");
+}
+
+imImage* imKernelSculpt(void)
+{
+  int kernel_data[3*3] = {
+     0, 0, 1,
+     0, 0, 0, 
+    -1, 0, 0 
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Sculpt");
+}
+
+imImage* imKernelMean3x3(void)
+{
+  int kernel_data[3*3] = {
+    1, 1, 1, 
+    1, 1, 1, 
+    1, 1, 1 
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Mean3x3");
+}
+
+imImage* imKernelMean5x5(void)
+{
+  int kernel_data[5*5] = {
+    1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "Mean5x5");
+}
+
+imImage* imKernelCircularMean5x5(void)
+{
+  int kernel_data[5*5] = {
+    0, 1, 1, 1, 0,
+    1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1,
+    0, 1, 1, 1, 0
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "CircularMean5x5");
+}
+
+imImage* imKernelMean7x7(void)
+{
+  int kernel_data[7*7] = {
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1
+  };
+
+  return iKernelCreate(7, 7, kernel_data, "Mean7x7");
+}
+
+imImage* imKernelCircularMean7x7(void)
+{
+  int kernel_data[7*7] = {
+    0, 0, 1, 1, 1, 0, 0,
+    0, 1, 1, 1, 1, 1, 0,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1,
+    0, 1, 1, 1, 1, 1, 0,
+    0, 0, 1, 1, 1, 0, 0
+  };
+
+  return iKernelCreate(7, 7, kernel_data, "CircularMean7x7");
+}
+
+imImage* imKernelGaussian3x3(void)
+{
+  int kernel_data[3*3] = {
+    1, 2, 1, 
+    2, 4, 2, 
+    1, 2, 1 
+  };
+
+  return iKernelCreate(3, 3, kernel_data, "Gaussian3x3");
+}
+
+imImage* imKernelGaussian5x5(void)
+{
+  int kernel_data[5*5] = {
+    1,  4,  6,  4, 1, 
+    4, 16, 24, 16, 4, 
+    6, 24, 36, 24, 6, 
+    4, 16, 24, 16, 4, 
+    1,  4,  6,  4, 1 
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "Gaussian5x5");
+}
+
+imImage* imKernelBarlett5x5(void)
+{
+  int kernel_data[5*5] = {
+    1, 2, 3, 2, 1, 
+    2, 4, 6, 4, 2, 
+    3, 6, 9, 6, 3, 
+    2, 4, 6, 4, 2, 
+    1, 2, 3, 2, 1
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "Barlett5x5");
+}
+
+imImage* imKernelTopHat5x5(void)
+{
+  int kernel_data[5*5] = {
+     0, -1, -1, -1,  0, 
+    -1, -1,  3, -1, -1, 
+    -1,  3,  4,  3, -1, 
+    -1, -1,  3, -1, -1, 
+     0, -1, -1, -1,  0 
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "TopHat5x5");
+}
+
+imImage* imKernelTopHat7x7(void)
+{
+  int kernel_data[7*7] = {
+     0,  0, -1, -1, -1,  0,  0,
+     0, -1, -1, -1, -1, -1,  0, 
+    -1, -1,  3,  3,  3, -1, -1, 
+    -1, -1,  3,  4,  3, -1, -1, 
+    -1, -1,  3,  3,  3, -1, -1, 
+     0, -1, -1, -1, -1, -1,  0,
+     0,  0, -1, -1, -1,  0,  0 
+  };
+
+  return iKernelCreate(7, 7, kernel_data, "TopHat7x7");
+}
+
+imImage* imKernelEnhance(void)
+{
+  int kernel_data[5*5] = {
+     0, -1, -2, -1,  0, 
+    -1, -4,  0, -4, -1, 
+    -2,  0, 40,  0, -2, 
+    -1, -4,  0, -4, -1, 
+     0, -1, -2, -1,  0 
+  };
+
+  return iKernelCreate(5, 5, kernel_data, "Enhance");
+}
+
diff --git a/src/process/im_logic.cpp b/src/process/im_logic.cpp
new file mode 100644
index 0000000..82e607d
--- /dev/null
+++ b/src/process/im_logic.cpp
@@ -0,0 +1,136 @@
+/** \file
+ * \brief Logical Arithmetic Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_logic.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+template <class T> 
+static void DoBitwiseOp(T *map1, T *map2, T *map, int count, int op)
+{
+  int i;
+
+  switch(op)
+  {
+  case IM_BIT_AND:
+    for (i = 0; i < count; i++)
+      map[i] = map1[i] & map2[i];
+    break;
+  case IM_BIT_OR:
+    for (i = 0; i < count; i++)
+      map[i] = map1[i] | map2[i];
+    break;
+  case IM_BIT_XOR:
+    for (i = 0; i < count; i++)
+      map[i] = (T)~(map1[i] | map2[i]);
+    break;
+  }
+}
+
+void imProcessBitwiseOp(const imImage* src_image1, const imImage* src_image2, imImage* dst_image, int op)
+{
+  int count = src_image1->count*src_image1->depth;
+
+  switch(src_image1->data_type)
+  {
+  case IM_BYTE:
+    DoBitwiseOp((imbyte*)src_image1->data[0], (imbyte*)src_image2->data[0], (imbyte*)dst_image->data[0], count, op);
+    break;                                                                                
+  case IM_USHORT:
+    DoBitwiseOp((imushort*)src_image1->data[0], (imushort*)src_image2->data[0], (imushort*)dst_image->data[0], count, op);
+    break;                                                                                
+  case IM_INT:                                                                           
+    DoBitwiseOp((int*)src_image1->data[0], (int*)src_image2->data[0], (int*)dst_image->data[0], count, op);
+    break;                                                                                
+  }
+}
+
+template <class T> 
+static void DoBitwiseNot(T *map1, T *map, int count)
+{
+  for (int i = 0; i < count; i++)
+    map[i] = ~map1[i];
+}
+
+static void DoBitwiseNotBin(imbyte *map1, imbyte *map, int count)
+{
+  for (int i = 0; i < count; i++)
+    map[i] = map1[i]? 0: 1;
+}
+
+void imProcessBitwiseNot(const imImage* src_image, imImage* dst_image)
+{
+  int count = src_image->count*src_image->depth;
+
+  if (dst_image->color_space == IM_BINARY)
+  {
+    DoBitwiseNotBin((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], count);
+    return;
+  }
+
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    DoBitwiseNot((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], count);
+    break;                                                                                
+  case IM_USHORT:
+    DoBitwiseNot((imushort*)src_image->data[0], (imushort*)dst_image->data[0], count);
+    break;                                                                                
+  case IM_INT:                                                                           
+    DoBitwiseNot((int*)src_image->data[0], (int*)dst_image->data[0], count);
+    break;                                                                                
+  }
+}
+
+void imProcessBitMask(const imImage* src_image, imImage* dst_image, unsigned char mask, int op)
+{
+  imbyte* src_map = (imbyte*)src_image->data[0];
+  imbyte* dst_map = (imbyte*)dst_image->data[0];
+  int i;
+  int count = dst_image->count * dst_image->depth;
+  switch(op)
+  {
+  case IM_BIT_AND:
+    for (i = 0; i < count; i++)
+      *dst_map++ = *src_map++ & mask;
+    break;
+  case IM_BIT_OR:
+    for (i = 0; i < count; i++)
+      *dst_map++ = *src_map++ | mask;
+    break;
+  case IM_BIT_XOR:
+    for (i = 0; i < count; i++)
+      *dst_map++ = (imbyte)~(*src_map++ | mask);
+    break;
+  }
+
+  if ((op == IM_BIT_XOR || op == IM_BIT_OR) && dst_image->color_space == IM_BINARY && mask > 1)
+    dst_image->color_space = IM_GRAY;
+}
+
+void imProcessBitPlane(const imImage* src_image, imImage* dst_image, int plane, int reset)
+{
+  imbyte mask = imbyte(0x01 << plane);
+  if (reset) mask = ~mask;
+  imbyte* src_map = (imbyte*)src_image->data[0];
+  imbyte* dst_map = (imbyte*)dst_image->data[0];
+  int count = dst_image->count * dst_image->depth;
+  for (int i = 0; i < count; i++)
+  {
+    if (reset) 
+      *dst_map++ = *src_map & mask;
+    else
+      *dst_map++ = (*src_map & mask)? 1: 0;
+
+    src_map++;
+  }
+}
diff --git a/src/process/im_morphology_bin.cpp b/src/process/im_morphology_bin.cpp
new file mode 100644
index 0000000..9405ff6
--- /dev/null
+++ b/src/process/im_morphology_bin.cpp
@@ -0,0 +1,317 @@
+/** \file
+ * \brief Morphology Operations for Binary Images
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_morphology_bin.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_counter.h>
+
+#include "im_process_loc.h"
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+static int DoBinMorphConvolve(imbyte *map, imbyte* new_map, int width, int height, const imImage* kernel, int counter, int hit_value, int miss_value)
+{
+  int *kernel_line;
+  int offset, new_offset, i, j, x, y;
+  int kh, kw, kh2, kw2, hit;
+
+  kh = kernel->height;
+  kw = kernel->width;
+  kh2 = kernel->height/2;
+  kw2 = kernel->width/2;
+
+  int* kernel_data = (int*)kernel->data[0];
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      hit = 1;
+    
+      for(y = -kh2; y <= kh2 && hit; y++)
+      {
+        kernel_line = kernel_data + (y+kh2)*kernel->width;
+
+        if ((j + y < 0) ||       // pass the bottom border
+            (j + y >= height))   // pass the top border
+          offset = -1;
+        else
+          offset = (j + y) * width;
+
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if ((offset == -1) ||
+              (i + x < 0) ||     // pass the left border
+              (i + x >= width))  // pass the right border
+          {
+            if(kernel_line[x+kw2] != -1 && kernel_line[x+kw2] != 0)  // 0 extension beyond borders
+              hit = 0;
+          }
+          else
+          {
+            if(kernel_line[x+kw2] != -1 && kernel_line[x+kw2] != map[offset + (i + x)])
+              hit = 0;
+          }
+        }
+      }
+
+      new_map[new_offset + i] = (imbyte)(hit? hit_value: miss_value);
+    }    
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+int imProcessBinMorphConvolve(const imImage* src_image, imImage* dst_image, const imImage *kernel, int hit_white, int iter)
+{
+  int j, ret = 0, hit_value, miss_value;
+  void *tmp = NULL;
+  int counter;
+
+  if (hit_white)
+  {
+    hit_value = 1;
+    miss_value = 0;
+  }
+  else
+  {
+    hit_value = 0;
+    miss_value = 1;
+  }
+
+  counter = imCounterBegin("Binary Morphological Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel, "Description", NULL, NULL);
+  if (!msg) msg = "Processing...";
+  imCounterTotal(counter, src_image->height*iter, msg);
+
+  if (iter > 1)
+    tmp = malloc(src_image->size);
+
+  for (j = 0; j < iter; j++)
+  {
+    if (j == 0)
+      ret = DoBinMorphConvolve((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], src_image->width, src_image->height, kernel, counter, hit_value, miss_value);
+    else
+    {
+      memcpy(tmp, dst_image->data[0], src_image->size);
+      ret = DoBinMorphConvolve((imbyte*)tmp, (imbyte*)dst_image->data[0], src_image->width, src_image->height, kernel, counter, hit_value, miss_value);
+    }
+
+    if (!ret) 
+      break;
+  }
+
+  if (tmp) free(tmp);
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+int imProcessBinMorphErode(const imImage* src_image, imImage* dst_image, int kernel_size, int iter)
+{
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_INT);
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Erode");
+
+  int* kernel_data = (int*)kernel->data[0];
+  for(int i = 0; i < kernel->count; i++)
+      kernel_data[i] = 1;
+
+  int ret = imProcessBinMorphConvolve(src_image, dst_image, kernel, 1, iter);
+  imImageDestroy(kernel);
+  return ret;
+}
+
+int imProcessBinMorphDilate(const imImage* src_image, imImage* dst_image, int kernel_size, int iter)
+{
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_INT);
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Dilate");
+  // Kernel is all zeros
+  int ret = imProcessBinMorphConvolve(src_image, dst_image, kernel, 0, iter);
+  imImageDestroy(kernel);
+  return ret;
+}
+
+int imProcessBinMorphOpen(const imImage* src_image, imImage* dst_image, int kernel_size, int iter)
+{
+  imImage*temp = imImageClone(src_image);
+  if (!temp)
+    return 0;
+
+  if (!imProcessBinMorphErode(src_image, temp, kernel_size, iter)) {imImageDestroy(temp); return 0;}
+  if (!imProcessBinMorphDilate(temp, dst_image, kernel_size, iter)) {imImageDestroy(temp); return 0;}
+
+  imImageDestroy(temp);
+  return 1;
+}
+
+int imProcessBinMorphClose(const imImage* src_image, imImage* dst_image, int kernel_size, int iter)
+{
+  imImage*temp = imImageClone(src_image);
+  if (!temp)
+    return 0;
+
+  if (!imProcessBinMorphDilate(src_image, temp, kernel_size, iter)) {imImageDestroy(temp); return 0;}
+  if (!imProcessBinMorphErode(temp, dst_image, kernel_size, iter)) {imImageDestroy(temp); return 0;}
+
+  imImageDestroy(temp);
+  return 1;
+}
+
+int imProcessBinMorphOutline(const imImage* src_image, imImage* dst_image, int kernel_size, int iter)
+{
+  if (!imProcessBinMorphErode(src_image, dst_image, kernel_size, iter)) return 0;
+  imProcessArithmeticOp(src_image, dst_image, dst_image, IM_BIN_DIFF);
+  return 1;
+}
+
+/* Direction masks:      */
+/*   N     S   W     E    */
+static int masks[] = { 0200, 0002, 0040, 0010 };
+
+/*  True if pixel neighbor map indicates the pixel is 8-simple and  */
+/*  not an end point and thus can be deleted.  The neighborhood  */
+/*  map is defined as an integer of bits abcdefghi with a non-zero  */
+/*  bit representing a non-zero pixel.  The bit assignment for the  */
+/*  neighborhood is:            */
+/*                  */
+/*        a b c          */
+/*        d e f          */
+/*        g h i          */
+
+static unsigned char isdelete[512] = 
+{
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+static void DoThinImage(imbyte *map, int xsize, int ysize)
+{
+  int    x, y;    /* Pixel location    */
+  int    i;    /* Pass index      */
+  int    pc  = 0;  /* Pass count      */
+  int    count  = 1;  /* Deleted pixel count    */
+  int    p, q;    /* Neighborhood maps of adjacent cells      */
+  imbyte    *qb;    /* Neighborhood maps of previous scanline      */
+  int    m;    /* Deletion direction mask  */
+  
+  qb = (imbyte *) malloc(xsize);
+  qb[xsize-1] = 0;    /* Used for lower-right pixel  */
+  
+  while ( count ) 
+  {    
+    /* Scan src_image while deletions  */
+    pc++;
+    count = 0;
+    
+    for ( i = 0 ; i < 4 ; i++ ) 
+    {
+      m = masks[i];
+      
+      /* Build initial previous scan buffer.      */
+      
+      p = map[0] != 0;
+      for (x = 0 ; x < xsize-1 ; x++)
+      {
+        p = ((p<<1)&0006) | (map[x+1] != 0);
+        qb[x] = (imbyte)p;
+      }
+      
+      /* Scan src_image for pixel deletion candidates.    */
+      
+      for ( y = 0 ; y < ysize-1 ; y++ ) 
+      {
+        q = qb[0];
+        p = ((q<<3)&0110) | (map[(y+1)*xsize] != 0);
+        
+        for ( x = 0 ; x < xsize-1 ; x++ ) 
+        {
+          q = qb[x];
+          p = ((p<<1)&0666) | ((q<<3)&0110) | (map[(y+1)*xsize + x+1] != 0);
+          qb[x] = (imbyte)p;
+
+          if  (((p&m) == 0) && isdelete[p] ) 
+          {
+            count++;
+            map[y*xsize + x] = 0;
+          }
+        }
+        
+        /* Process right edge pixel.      */
+       
+        p = (p<<1)&0666;
+        if  ( (p&m) == 0 && isdelete[p] ) 
+        {
+          count++;
+          map[y*xsize + xsize-1] = 0;
+        }
+      }
+      
+      /* Process bottom scan line.        */
+      
+      for ( x = 0 ; x < xsize ; x++ ) 
+      {
+        q = qb[x];
+        p = ((p<<1)&0666) | ((q<<3)&0110);
+
+        if  ( (p&m) == 0 && isdelete[p] ) 
+        {
+          count++;
+          map[(ysize-1)*xsize + x] = 0;
+        }
+      }
+    }
+  }
+  
+  free (qb);
+}
+
+void imProcessBinMorphThin(const imImage* src_image, imImage* dst_image)
+{
+  imImageCopyData(src_image, dst_image);
+  DoThinImage((imbyte*)dst_image->data[0], dst_image->width, dst_image->height);
+}
diff --git a/src/process/im_morphology_gray.cpp b/src/process/im_morphology_gray.cpp
new file mode 100644
index 0000000..c3c9d45
--- /dev/null
+++ b/src/process/im_morphology_gray.cpp
@@ -0,0 +1,231 @@
+/** \file
+ * \brief Morphology Operations for Gray Images
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_morphology_gray.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_counter.h>
+#include <im_convert.h>
+
+#include "im_process_loc.h"
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+
+template <class T, class DT> 
+static int DoGrayMorphConvolve(T *map, T* new_map, int width, int height, const imImage* kernel, int counter, int ismax, DT)
+{
+  DT value, *kernel_line, max = 0, min = 0;
+  int offset, new_offset, i, j, x, y, init;
+  int kh, kw, kh2, kw2;
+
+  kh = kernel->height;
+  kw = kernel->width;
+  kh2 = kernel->height/2;
+  kw2 = kernel->width/2;
+
+  DT* kernel_data = (DT*)kernel->data[0];
+
+  for(j = 0; j < height; j++)
+  {
+    new_offset = j * width;
+
+    for(i = 0; i < width; i++)
+    {
+      init = 0;
+    
+      for(y = -kh2; y <= kh2; y++)
+      {
+        kernel_line = kernel_data + (y+kh2)*kw;
+
+        if ((j + y < 0) ||          // pass the bottom border
+            (j + y >= height))      // pass the top border
+          continue;
+        else
+          offset = (j + y) * width;
+
+        for(x = -kw2; x <= kw2; x++)
+        {
+          if (kernel_line[x+kw2] != -1)
+          {
+            if ((i + x < 0) ||      // pass the left border
+                (i + x >= width))   // pass the right border
+              continue;
+            else
+              value = kernel_line[x+kw2] + map[offset + (i + x)];
+
+            if (init == 0)  // first time here for each pass
+            {
+              if (ismax)
+                max = value;
+              else
+                min = value;
+
+              init = 1;
+            }
+            else
+            {
+              if (ismax && value > max)
+                max = value;
+
+              if (!ismax && value < min)
+                min = value;
+            }
+          }
+        }
+      }
+      
+      int size_of = sizeof(imbyte);
+      if (sizeof(T) == size_of)
+      {
+        if (ismax)
+          new_map[new_offset + i] = (T)IM_BYTECROP(max);
+        else
+          new_map[new_offset + i] = (T)IM_BYTECROP(min);
+      }
+      else
+      {
+        if (ismax)
+          new_map[new_offset + i] = (T)max;
+        else
+          new_map[new_offset + i] = (T)min;
+      }
+    }    
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+int imProcessGrayMorphConvolve(const imImage* src_image, imImage* dst_image, const imImage *kernel, int ismax)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin("Gray Morphological Convolution");
+  const char* msg = (const char*)imImageGetAttribute(kernel, "Description", NULL, NULL);
+  if (!msg) msg = "Processing...";
+  imCounterTotal(counter, src_image->depth*src_image->height, msg);
+
+  imImage* fkernel = NULL;
+    
+  if (src_image->data_type == IM_FLOAT && kernel->data_type != IM_FLOAT)
+  {
+    fkernel = imImageCreate(kernel->width, kernel->height, IM_GRAY, IM_FLOAT);
+    imConvertDataType(kernel, fkernel, 0, 0, 0, IM_CAST_DIRECT);
+    kernel = fkernel;
+  }
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoGrayMorphConvolve((imbyte*)src_image->data[i], (imbyte*)dst_image->data[i], src_image->width, src_image->height, kernel, counter, ismax, (int)0);
+      break;                                                                                
+    case IM_USHORT:
+      ret = DoGrayMorphConvolve((imushort*)src_image->data[i], (imushort*)dst_image->data[i], src_image->width, src_image->height, kernel, counter, ismax, (int)0);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoGrayMorphConvolve((int*)src_image->data[i], (int*)dst_image->data[i], src_image->width, src_image->height, kernel, counter, ismax, (int)0);
+      break;                                                                                
+    case IM_FLOAT:
+      ret = DoGrayMorphConvolve((float*)src_image->data[i], (float*)dst_image->data[i], src_image->width, src_image->height, kernel, counter, ismax, (float)0);
+      break;                                                                                
+    }
+    
+    if (!ret) 
+      break;
+  }
+
+  if (fkernel) imImageDestroy(fkernel);
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+int imProcessGrayMorphErode(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_INT);
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Erode");
+  // Kernel is all zeros
+  int ret = imProcessGrayMorphConvolve(src_image, dst_image, kernel, 0);
+  imImageDestroy(kernel);
+  return ret;
+}
+
+int imProcessGrayMorphDilate(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  imImage* kernel = imImageCreate(kernel_size, kernel_size, IM_GRAY, IM_INT);
+  imImageSetAttribute(kernel, "Description", IM_BYTE, -1, (void*)"Dilate");
+  // Kernel is all zeros
+  int ret = imProcessGrayMorphConvolve(src_image, dst_image, kernel, 1);
+  imImageDestroy(kernel);
+  return ret;
+}
+
+int imProcessGrayMorphOpen(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  imImage*temp = imImageClone(src_image);
+  if (!temp)
+    return 0;
+
+  if (!imProcessGrayMorphErode(src_image, temp, kernel_size)) {imImageDestroy(temp); return 0;}
+  if (!imProcessGrayMorphDilate(temp, dst_image, kernel_size)) {imImageDestroy(temp); return 0;}
+
+  imImageDestroy(temp);
+  return 1;
+}
+
+int imProcessGrayMorphClose(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  imImage*temp = imImageClone(src_image);
+  if (!temp)
+    return 0;
+
+  if (!imProcessGrayMorphDilate(src_image, temp, kernel_size)) {imImageDestroy(temp); return 0;}
+  if (!imProcessGrayMorphErode(temp, dst_image, kernel_size)) {imImageDestroy(temp); return 0;}
+
+  imImageDestroy(temp);
+  return 1;
+}
+
+int imProcessGrayMorphTopHat(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  if (!imProcessGrayMorphOpen(src_image, dst_image, kernel_size)) return 0;
+  imProcessArithmeticOp(src_image, dst_image, dst_image, IM_BIN_DIFF);
+  return 1;
+}
+
+int imProcessGrayMorphWell(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  if (!imProcessGrayMorphClose(src_image, dst_image, kernel_size)) return 0;
+  imProcessArithmeticOp(src_image, dst_image, dst_image, IM_BIN_DIFF);
+  return 1;
+}
+
+int imProcessGrayMorphGradient(const imImage* src_image, imImage* dst_image, int kernel_size)
+{
+  imImage*temp = imImageClone(src_image);
+  if (!temp)
+    return 0;
+
+  if (!imProcessGrayMorphDilate(src_image, temp, kernel_size)) {imImageDestroy(temp); return 0;}
+  if (!imProcessGrayMorphErode(src_image, dst_image, kernel_size)) {imImageDestroy(temp); return 0;}
+
+  imProcessArithmeticOp(temp, dst_image, dst_image, IM_BIN_DIFF);
+
+  imImageDestroy(temp);
+  return 1;
+}
+
diff --git a/src/process/im_quantize.cpp b/src/process/im_quantize.cpp
new file mode 100644
index 0000000..9a65f4c
--- /dev/null
+++ b/src/process/im_quantize.cpp
@@ -0,0 +1,65 @@
+/** \file
+ * \brief Additional Image Quantization Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_quantize.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_palette.h>
+#include <im_math.h>
+
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+
+void imProcessQuantizeRGBUniform(const imImage* src_image, imImage* dst_image, int dither)
+{
+  imbyte *dst_map=(imbyte*)dst_image->data[0], 
+         *red_map=(imbyte*)src_image->data[0],
+         *green_map=(imbyte*)src_image->data[1],
+         *blue_map=(imbyte*)src_image->data[2];
+
+  long *palette = imPaletteUniform();
+  imImageSetPalette(dst_image, palette, 256);
+
+  for (int y = 0; y < src_image->height; y++)
+  {
+    for (int x = 0; x < src_image->width; x++)
+    {
+      if (dither)
+        *dst_map++ = (imbyte)imPaletteUniformIndexHalftoned(imColorEncode(*red_map++, *green_map++, *blue_map++), x, y);
+      else
+        *dst_map++ = (imbyte)imPaletteUniformIndex(imColorEncode(*red_map++, *green_map++, *blue_map++));
+    }
+  }
+}
+
+void imProcessQuantizeGrayUniform(const imImage* src_image, imImage* dst_image, int grays)
+{
+  int i, value;
+
+  imbyte *dst_map=(imbyte*)dst_image->data[0], 
+         *src_map=(imbyte*)src_image->data[0];
+
+  imbyte re_map[256];
+  memset(re_map, 0, 256);
+
+  float factor = (float)grays/256.0f;
+  float factor256 = 256.0f/(float)grays;
+
+  for (i = 0; i < 256; i++)
+  {             
+    value = imResample(i, factor);
+    value = imResample(value, factor256);
+    re_map[i] = (imbyte)IM_BYTECROP(value);
+  }
+
+  int total_count = src_image->count*src_image->depth;
+  for (i = 0; i < total_count; i++)
+    dst_map[i] = re_map[src_map[i]];
+}
diff --git a/src/process/im_render.cpp b/src/process/im_render.cpp
new file mode 100644
index 0000000..f5d296f
--- /dev/null
+++ b/src/process/im_render.cpp
@@ -0,0 +1,532 @@
+/** \file
+ * \brief Synthetic Image Render
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_render.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_counter.h>
+#include <im_math.h>
+
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <time.h>
+
+static float iGetFactor(int data_type)
+{
+  if (data_type == IM_BYTE)
+    return 255.0f;
+  else if (data_type == IM_INT || data_type == IM_USHORT)
+    return 65535.0f;
+  else
+    return 1.0f;
+}
+
+template <class T> 
+static int DoRenderCondOp(T *map, int width, int height, int d, imRenderCondFunc render_func, float* param, int counter)
+{
+  int offset, cond = 1;
+  T Value;
+
+  for(int y = 0; y < height; y++)
+  {
+    offset = y * width;
+
+    for(int x = 0; x < width; x++)
+    {
+      Value = (T)(render_func(x, y, d, &cond, param));
+      if (cond) map[offset + x] = Value;
+    }
+  
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+int imProcessRenderCondOp(imImage* image, imRenderCondFunc render_func, char* render_name, float* param)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin(render_name);
+  imCounterTotal(counter, image->depth*image->height, "Rendering...");
+
+  for (int d = 0; d < image->depth; d++)
+  {
+    switch(image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoRenderCondOp((imbyte*)image->data[d], image->width, image->height, d, render_func, param, counter);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoRenderCondOp((imushort*)image->data[d], image->width, image->height, d, render_func, param, counter);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoRenderCondOp((int*)image->data[d], image->width, image->height, d, render_func, param, counter);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoRenderCondOp((float*)image->data[d], image->width, image->height, d, render_func, param, counter);
+      break;                                                                                
+    }
+
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+template <class T> 
+static int DoRenderOp(T *map, int width, int height, int d, imRenderFunc render_func, float* param, int counter, int plus)
+{
+  int offset;
+
+  for(int y = 0; y < height; y++)
+  {
+    offset = y * width;
+
+    for(int x = 0; x < width; x++)
+    {
+      if (plus)
+      {
+        int size_of = sizeof(imbyte);
+        float value = map[offset + x] + render_func(x, y, d, param);
+        if (sizeof(T) == size_of)
+          map[offset + x] = (T)IM_BYTECROP(value);
+        else
+          map[offset + x] = (T)value;
+
+      }
+      else
+        map[offset + x] = (T)render_func(x, y, d, param);
+    }
+  
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+int imProcessRenderOp(imImage* image, imRenderFunc render_func, char* render_name, float* param, int plus)
+{
+  int ret = 0;
+
+  int counter = imCounterBegin(render_name);
+  imCounterTotal(counter, image->depth*image->height, "Rendering...");
+
+  for (int d = 0; d < image->depth; d++)
+  {
+    switch(image->data_type)
+    {
+    case IM_BYTE:
+      ret = DoRenderOp((imbyte*)image->data[d], image->width, image->height, d, render_func, param, counter, plus);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      ret = DoRenderOp((imushort*)image->data[d], image->width, image->height, d, render_func, param, counter, plus);
+      break;                                                                                
+    case IM_INT:                                                                           
+      ret = DoRenderOp((int*)image->data[d], image->width, image->height, d, render_func, param, counter, plus);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      ret = DoRenderOp((float*)image->data[d], image->width, image->height, d, render_func, param, counter, plus);
+      break;                                                                                
+    }
+
+    if (!ret) 
+      break;
+  }
+
+  imCounterEnd(counter);
+
+  return ret;
+}
+
+static float do_add_specklenoise(int, int, int, int *cond, float* param)
+{
+  float rnd = float(rand()) / RAND_MAX;
+  if (rnd < param[1])
+  {
+    *cond = 1;
+    return (rand() * param[0]) / RAND_MAX;
+  }
+  else
+  {
+    *cond = 0;
+    return 0;
+  }
+}
+
+int imProcessRenderAddSpeckleNoise(const imImage* src_image, imImage* dst_image, float percent)
+{
+  float param[2];
+  param[0] = iGetFactor(src_image->data_type);
+  param[1] = percent / 100.0f;
+  srand((unsigned)time(NULL));
+  imImageCopyData(src_image, dst_image);
+  return imProcessRenderCondOp(dst_image, do_add_specklenoise, "Add Speckle Noise", param);
+}
+
+static float do_add_gaussiannoise(int, int, int, float* param)
+{
+  float rnd, x1, x2;
+    
+  do
+  {
+    x1 = float(rand()) / RAND_MAX;  /* [0,1]  */
+    x2 = float(rand()) / RAND_MAX;  /* [0,1]  */
+    x1 = 2*x1 - 1;                   /* [-1,1] */
+    x2 = 2*x2 - 1;                   /* [-1,1] */
+    rnd = x1*x1 + x2*x2;
+  } while( rnd >= 1 || rnd == 0);
+
+  rnd = (float)sqrt(-2 * log(rnd) / rnd) * x1;
+  return rnd * param[1] + param[0];
+}
+
+int imProcessRenderAddGaussianNoise(const imImage* src_image, imImage* dst_image, float mean, float stddev)
+{
+  float param[2];
+  param[0] = mean;
+  param[1] = stddev;
+  srand((unsigned)time(NULL));
+  imImageCopyData(src_image, dst_image);
+  return imProcessRenderOp(dst_image, do_add_gaussiannoise, "Add Gaussian Noise", param, 1);
+}
+   
+static float do_add_uniformnoise(int, int, int, float* param)
+{
+  float rnd = float(rand()) / RAND_MAX;
+  rnd = 2*rnd - 1;                          /* [-1,1] */
+  return 1.7320508f * rnd * param[1] + param[0];
+}
+
+int imProcessRenderAddUniformNoise(const imImage* src_image, imImage* dst_image, float mean, float stddev)
+{
+  float param[2];
+  param[0] = mean;
+  param[1] = stddev;
+  srand((unsigned)time(NULL));
+  imImageCopyData(src_image, dst_image);
+  return imProcessRenderOp(dst_image, do_add_uniformnoise, "Add Uniform Noise", param, 1);
+}
+   
+static float do_const(int, int, int d, float* param)
+{
+  return param[d];
+}
+
+int imProcessRenderConstant(imImage* image, float* value)
+{
+  return imProcessRenderOp(image, do_const, "Constant", value, 0);
+}
+
+static float do_noise(int, int, int, float* param)
+{
+  return (rand() * param[0]) / RAND_MAX;
+}
+
+int imProcessRenderRandomNoise(imImage* image)
+{
+  static float param[1];
+  param[0] = iGetFactor(image->data_type);
+  srand((unsigned)time(NULL));
+  return imProcessRenderOp(image, do_noise, "Random Noise", param, 0);
+}
+
+static float do_cosine(int x, int y, int, float* param)
+{
+  return float((cos(param[1]*(x-param[3])) * cos(param[2]*(y-param[4])) + param[5]) * param[0]);
+}
+
+int imProcessRenderCosine(imImage* image, float xperiod, float yperiod)
+{
+  float param[6];
+  param[0] = iGetFactor(image->data_type);
+
+  if (xperiod == 0.0f) param[1] = 0.0;
+  else param[1] = 2.0f * 3.1416f / xperiod;
+
+  if (yperiod == 0.0f) param[2] = 0.0;
+  else param[2] = 2.0f * 3.1416f / yperiod;
+
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+
+  if (image->data_type < IM_FLOAT)
+    param[0] = param[0] / 2.0f;
+
+  if (image->data_type == IM_BYTE)
+    param[5] = 1.0f;
+  else
+    param[5] = 0.0f;
+
+  return imProcessRenderOp(image, do_cosine, "Cosine", param, 0);
+}
+
+static float do_gaussian(int x, int y, int, float* param)
+{
+  int xd = x - (int)param[2];
+  int yd = y - (int)param[3];
+  xd *= xd;
+  yd *= yd;
+  return float(exp((xd + yd)*param[1])*param[0]);
+}
+
+int imProcessRenderGaussian(imImage* image, float stddev)
+{
+  float param[4];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = -1.0f / (2.0f * stddev * stddev);
+  param[2] = image->width/2.0f;
+  param[3] = image->height/2.0f;
+  return imProcessRenderOp(image, do_gaussian, "Gaussian", param, 0);
+}
+
+static float do_lapgauss(int x, int y, int, float* param)
+{
+  int xd = x - (int)param[2];
+  int yd = y - (int)param[3];
+  xd *= xd;
+  yd *= yd;
+  xd += yd;
+  return float((xd - param[4])*exp(xd*param[1])*param[0]);
+}
+
+int imProcessRenderLapOfGaussian(imImage* image, float stddev)
+{
+  float param[5];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = -1.0f / (2.0f * stddev * stddev);
+  param[2] = image->width/2.0f;
+  param[3] = image->height/2.0f;
+  param[4] = 2.0f * stddev * stddev;
+  param[0] /= param[4];
+  return imProcessRenderOp(image, do_lapgauss, "Laplacian of Gaussian", param, 0);
+}
+
+static inline float sinc(float x)
+{
+  if (x == 0.0f)
+    return 1.0f;
+  else
+    return float(sin(x)/x);
+}
+
+static float do_sinc(int x, int y, int, float* param)
+{
+  return float((sinc((x - param[3])*param[1])*sinc((y - param[4])*param[2]) + param[5])*param[0]);
+}
+
+int imProcessRenderSinc(imImage* image, float xperiod, float yperiod)
+{
+  float param[6];
+  param[0] = iGetFactor(image->data_type);
+
+  if (xperiod == 0.0f) param[1] = 0.0;
+  else param[1] = 2.0f * 3.1416f / xperiod;
+
+  if (yperiod == 0.0f) param[2] = 0.0;
+  else param[2] = 2.0f * 3.1416f / yperiod;
+
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+
+  if (image->data_type < IM_FLOAT)
+    param[0] = param[0] / 1.3f;
+
+  if (image->data_type == IM_BYTE)
+    param[5] = 0.3f;
+  else
+    param[5] = 0.0f;
+
+  return imProcessRenderOp(image, do_sinc, "Sinc", param, 0);
+}
+
+static float do_box(int x, int y, int, float* param)
+{
+  int xr = x - (int)param[3];
+  int yr = y - (int)param[4];
+  if (xr < -(int)param[1] || xr > (int)param[1] ||
+      yr < -(int)param[2] || yr > (int)param[2])
+    return 0;
+  else
+    return param[0];
+}
+
+int imProcessRenderBox(imImage* image, int width, int height)
+{
+  float param[5];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = width/2.0f;
+  param[2] = height/2.0f;
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+  return imProcessRenderOp(image, do_box, "Box", param, 0);
+}
+
+static float do_ramp(int x, int y, int, float* param)
+{
+  if (param[3])
+  {
+    if (y < param[1])
+      return 0;
+    if (y > param[2])
+      return 0;
+
+    return (y-param[1])*param[0];
+  }
+  else
+  {
+    if (x < param[1])
+      return 0;
+    if (x > param[2])
+      return 0;
+
+    return (x-param[1])*param[0];
+  }
+}
+
+int imProcessRenderRamp(imImage* image, int start, int end, int dir)
+{
+  float param[4];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = (float)start;
+  param[2] = (float)end;
+  param[3] = (float)dir;
+  param[0] /= float(end-start);
+  return imProcessRenderOp(image, do_ramp, "Ramp", param, 0);
+}
+
+static inline int Tent(int t, int T)
+{
+  if (t < 0)
+    return (t + T);
+  else
+    return (T - t);
+}
+
+static float do_tent(int x, int y, int, float* param)
+{
+  int xr = x - (int)param[3];
+  int yr = y - (int)param[4];
+  if (xr < -(int)param[1] || xr > (int)param[1] ||
+      yr < -(int)param[2] || yr > (int)param[2])
+    return 0;
+  else
+    return Tent(xr, (int)param[1]) * Tent(yr, (int)param[2]) * param[0];
+}
+
+int imProcessRenderTent(imImage* image, int width, int height)
+{
+  float param[5];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = width/2.0f;
+  param[2] = height/2.0f;
+  param[0] /= param[1]*param[2];
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+  return imProcessRenderOp(image, do_tent, "Tent", param, 0);
+}
+
+static float do_cone(int x, int y, int, float* param)
+{
+  int xr = x - (int)param[2];
+  int yr = y - (int)param[3];
+  int radius = imRound(sqrt((double)(xr*xr + yr*yr)));
+  if (radius > (int)param[1])
+    return 0;
+  else
+    return ((int)param[1] - radius)*param[0];
+}
+
+int imProcessRenderCone(imImage* image, int radius)
+{
+  float param[4];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = (float)radius;
+  param[0] /= param[1];
+  param[2] = image->width/2.0f;
+  param[3] = image->height/2.0f;
+  return imProcessRenderOp(image, do_cone, "Cone", param, 0);
+}
+
+static float do_wheel(int x, int y, int, float* param)
+{
+  int xr = x - (int)param[3];
+  int yr = y - (int)param[4];
+  int radius = imRound(sqrt((double)(xr*xr + yr*yr)));
+  if (radius < (int)param[1] || radius > (int)param[2])
+    return 0;
+  else
+    return param[0];
+}
+
+int imProcessRenderWheel(imImage* image, int int_radius, int ext_radius)
+{
+  float param[5];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = (float)int_radius;
+  param[2] = (float)ext_radius;
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+  return imProcessRenderOp(image, do_wheel, "Wheel", param, 0);
+}
+
+static float do_grid(int x, int y, int, float* param)
+{
+  int xr = x - (int)param[3];
+  int yr = y - (int)param[4];
+  if (xr % (int)param[1] == 0 && yr % (int)param[2] == 0)
+    return param[0];
+  else
+    return 0;
+}
+
+int imProcessRenderGrid(imImage* image, int x_space, int y_space)
+{
+  float param[5];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = (float)x_space;
+  param[2] = (float)y_space;
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+  return imProcessRenderOp(image, do_grid, "Grid", param, 0);
+}
+
+static float do_chessboard(int x, int y, int, float* param)
+{
+  int xr = x - (int)param[3];
+  int yr = y - (int)param[4];
+  int xp = xr % (int)param[1];
+  int yp = yr % (int)param[2];
+  int xc = (int)param[1]/2;
+  int yc = (int)param[2]/2;
+  if (xr < 0) xc = -xc;
+  if (yr < 0) yc = -yc;
+  if ((xp < xc && yp < yc) ||
+      (xp > xc && yp > yc))
+    return param[0];
+  else
+    return 0;
+}
+
+int imProcessRenderChessboard(imImage* image, int x_space, int y_space)
+{
+  float param[5];
+  param[0] = iGetFactor(image->data_type);
+  param[1] = (float)x_space*2;
+  param[2] = (float)y_space*2;
+  param[3] = image->width/2.0f;
+  param[4] = image->height/2.0f;
+  return imProcessRenderOp(image, do_chessboard, "Chessboard", param, 0);
+}
diff --git a/src/process/im_resize.cpp b/src/process/im_resize.cpp
new file mode 100644
index 0000000..ddf6e47
--- /dev/null
+++ b/src/process/im_resize.cpp
@@ -0,0 +1,332 @@
+/** \file
+ * \brief Image Resize
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_resize.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+#include <im_complex.h>
+#include <im_counter.h>
+
+#include "im_process_loc.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+
+static inline void iResizeInverse(int x, int y, float *xl, float *yl, float x_invfactor, float y_invfactor)
+{
+  *xl = (x + 0.5f) * x_invfactor;
+  *yl = (y + 0.5f) * y_invfactor;
+}
+
+template <class DT, class DTU> 
+static int iResize(int src_width, int src_height, const DT *src_map, 
+                         int dst_width, int dst_height, DT *dst_map, 
+                         DTU Dummy, int order, int counter)
+{
+  float xl, yl;
+  float x_invfactor = float(src_width)/float(dst_width);
+  float y_invfactor = float(src_height)/float(dst_height);
+
+  for (int y = 0; y < dst_height; y++)
+  {
+    for (int x = 0; x < dst_width; x++)
+    {
+      iResizeInverse(x, y, &xl, &yl, x_invfactor, y_invfactor);
+                   
+      // if inside the original image
+      if (xl > 0.0 && yl > 0.0 && xl < src_width && yl < src_height)
+      {
+        if (order == 1)
+          *dst_map = imBilinearInterpolation(src_width, src_height, src_map, xl, yl);
+        else if (order == 3)
+          *dst_map = imBicubicInterpolation(src_width, src_height, src_map, xl, yl, Dummy);
+        else
+          *dst_map = imZeroOrderInterpolation(src_width, src_height, src_map, xl, yl);
+      }
+
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+
+  return 1;
+}
+
+template <class DT, class DTU> 
+static int iReduce(int src_width, int src_height, const DT *src_map, 
+                         int dst_width, int dst_height, DT *dst_map, 
+                         DTU Dummy, int order, int counter)
+{
+  float xl, yl;
+  float x_invfactor = float(src_width)/float(dst_width);
+  float y_invfactor = float(src_height)/float(dst_height);
+
+  iResizeInverse(1, 1, &xl, &yl, x_invfactor, y_invfactor);
+  float xl0 = xl, yl0 = yl;
+  iResizeInverse(2, 2, &xl, &yl, x_invfactor, y_invfactor);
+  float xl1 = xl, yl1 = yl;
+  
+  float box_width = xl1 - xl0;
+  float box_height = yl1 - yl0;
+
+  for (int y = 0; y < dst_height; y++)
+  {
+    for (int x = 0; x < dst_width; x++)
+    {
+      iResizeInverse(x, y, &xl, &yl, x_invfactor, y_invfactor);
+                   
+      // if inside the original image
+      if (xl > 0.0 && yl > 0.0 && xl < src_width && yl < src_height)
+      {
+        if (order == 0)
+          *dst_map = imZeroOrderDecimation(src_width, src_height, src_map, xl, yl, box_width, box_height, Dummy);
+        else
+          *dst_map = imBilinearDecimation(src_width, src_height, src_map, xl, yl, box_width, box_height, Dummy);
+      }
+
+      dst_map++;
+    }
+
+    if (!imCounterInc(counter))
+      return 0;
+  }
+  return 1;
+}
+
+int imProcessReduce(const imImage* src_image, imImage* dst_image, int order)
+{
+  int ret = 0;
+  int counter = imCounterBegin("Reduce Size");
+  const char* int_msg = (order == 1)? "Bilinear Decimation": "Zero Order Decimation";
+  imCounterTotal(counter, src_image->depth*dst_image->height, int_msg);
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = iReduce(src_image->width, src_image->height, (const imbyte*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (imbyte*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_USHORT:
+      ret = iReduce(src_image->width, src_image->height, (const imushort*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (imushort*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_INT:
+      ret = iReduce(src_image->width, src_image->height, (const int*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (int*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_FLOAT:
+      ret = iReduce(src_image->width, src_image->height, (const float*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (float*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_CFLOAT:
+      ret = iReduce(src_image->width, src_image->height, (const imcfloat*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i], 
+                    imcfloat(0,0), order, counter);
+      break;
+    }
+  }
+
+  imCounterEnd(counter);
+  return ret;
+}
+
+int imProcessResize(const imImage* src_image, imImage* dst_image, int order)
+{
+  int ret = 0;
+  int counter = imCounterBegin("Resize");
+  const char* int_msg = (order == 3)? "Bicubic Interpolation": (order == 1)? "Bilinear Interpolation": "Zero Order Interpolation";
+  imCounterTotal(counter, src_image->depth*dst_image->height, int_msg);
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ret = iResize(src_image->width, src_image->height, (const imbyte*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (imbyte*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_USHORT:
+      ret = iResize(src_image->width, src_image->height, (const imushort*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (imushort*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_INT:
+      ret = iResize(src_image->width, src_image->height, (const int*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (int*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_FLOAT:
+      ret = iResize(src_image->width, src_image->height, (const float*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (float*)dst_image->data[i], 
+                    float(0), order, counter);
+      break;
+    case IM_CFLOAT:
+      ret = iResize(src_image->width, src_image->height, (const imcfloat*)src_image->data[i],  
+                    dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i], 
+                    imcfloat(0,0), order, counter);
+      break;
+    }
+  }
+
+  imCounterEnd(counter);
+  return ret;
+}
+
+template <class DT> 
+static void ReduceBy4(int src_width, 
+                      int src_height, 
+                      DT *src_map, 
+                      int dst_width,
+                      int dst_height,
+                      DT *dst_map)
+{
+  int x,y,yd,xd;
+  (void)dst_height;
+
+  // make an even size
+  int height = (src_height/2)*2;
+  int width = (src_width/2)*2;
+
+  for(y = 0 ; y < height ; y += 2)
+  {
+    yd = y/2;
+    for(x = 0 ; x < width ; x += 2)
+    {
+      xd = x/2;
+      dst_map[yd * dst_width + xd] = ((src_map[y * src_width + x] + 
+                                       src_map[y * src_width + (x+1)] +
+                                       src_map[(y+1) * src_width + x] +
+                                       src_map[(y+1) * src_width + (x+1)])/4);
+    }        
+  }
+}
+
+void imProcessReduceBy4(const imImage* src_image, imImage* dst_image)
+{
+  int i;
+
+  for (i = 0; i < src_image->depth; i++)
+  {
+    switch(src_image->data_type)
+    {
+    case IM_BYTE:
+      ReduceBy4(src_image->width, src_image->height, (imbyte*)src_image->data[i],  dst_image->width, dst_image->height, (imbyte*)dst_image->data[i]);
+      break;
+    case IM_USHORT:
+      ReduceBy4(src_image->width, src_image->height, (imushort*)src_image->data[i],  dst_image->width, dst_image->height, (imushort*)dst_image->data[i]);
+      break;
+    case IM_INT:
+      ReduceBy4(src_image->width, src_image->height, (int*)src_image->data[i],  dst_image->width, dst_image->height, (int*)dst_image->data[i]);
+      break;
+    case IM_FLOAT:
+      ReduceBy4(src_image->width, src_image->height, (float*)src_image->data[i],  dst_image->width, dst_image->height, (float*)dst_image->data[i]);
+      break;
+    case IM_CFLOAT:
+      ReduceBy4(src_image->width, src_image->height, (imcfloat*)src_image->data[i],  dst_image->width, dst_image->height, (imcfloat*)dst_image->data[i]);
+      break;
+    }
+  }
+}
+
+void imProcessCrop(const imImage* src_image, imImage* dst_image, int xmin, int ymin)
+{
+  int type_size = imDataTypeSize(src_image->data_type);
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    imbyte *src_map = (imbyte*)src_image->data[i];
+    imbyte *dst_map = (imbyte*)dst_image->data[i];
+
+    for (int y = 0; y < dst_image->height; y++)
+    {
+      int src_offset = (y + ymin)*src_image->line_size + xmin*type_size;
+      int dst_offset = y*dst_image->line_size;
+
+      memcpy(&dst_map[dst_offset], &src_map[src_offset], dst_image->line_size);
+    }
+  }
+}
+
+void imProcessInsert(const imImage* src_image, const imImage* rgn_image, imImage* dst_image, int xmin, int ymin)
+{
+  int type_size = imDataTypeSize(src_image->data_type);
+  int dst_size1 = xmin*type_size;
+  int dst_size2 = src_image->line_size - (rgn_image->line_size + dst_size1);
+  int dst_offset2 = dst_size1+rgn_image->line_size;
+  int ymax = ymin+rgn_image->height-1;
+  int rgn_size = rgn_image->line_size;
+
+  if (dst_size2 < 0)
+  {
+    dst_size2 = 0;
+    rgn_size = src_image->line_size - dst_size1;
+    dst_offset2 = dst_size1+rgn_size;
+  }
+
+  if (ymax > src_image->height-1)
+    ymax = src_image->height-1;
+
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    imbyte *src_map = (imbyte*)src_image->data[i];
+    imbyte *rgn_map = (imbyte*)rgn_image->data[i];
+    imbyte *dst_map = (imbyte*)dst_image->data[i];
+
+    for (int y = 0; y < src_image->height; y++)
+    {
+      if (y < ymin || y > ymax)
+      {
+        memcpy(dst_map, src_map, src_image->line_size);
+      }
+      else
+      {
+        if (dst_size1)
+          memcpy(dst_map, src_map, dst_size1);
+
+        memcpy(dst_map + dst_size1, rgn_map, rgn_size);
+
+        if (dst_size2)
+          memcpy(dst_map + dst_offset2, 
+                 src_map + dst_offset2, dst_size2);
+
+        rgn_map += rgn_image->line_size;
+      }
+
+      src_map += src_image->line_size;
+      dst_map += dst_image->line_size;
+    }
+  }
+}
+
+void imProcessAddMargins(const imImage* src_image, imImage* dst_image, int xmin, int ymin)
+{
+  int type_size = imDataTypeSize(src_image->data_type);
+  for (int i = 0; i < src_image->depth; i++)
+  {
+    imbyte *dst_map = (imbyte*)dst_image->data[i];
+    imbyte *src_map = (imbyte*)src_image->data[i];
+
+    for (int y = 0; y < src_image->height; y++)
+    {
+      int src_offset = y*src_image->line_size;
+      int dst_offset = (y + ymin)*dst_image->line_size + xmin*type_size;
+
+      memcpy(&dst_map[dst_offset], &src_map[src_offset], src_image->line_size);
+    }
+  }
+}
+
diff --git a/src/process/im_statistics.cpp b/src/process/im_statistics.cpp
new file mode 100644
index 0000000..b9f086d
--- /dev/null
+++ b/src/process/im_statistics.cpp
@@ -0,0 +1,341 @@
+/** \file
+ * \brief Image Statistics Calculations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_statistics.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math_op.h>
+
+#include "im_process_ana.h"
+
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+
+static unsigned long count_map(const imImage* image)
+{
+  unsigned long histo[256];
+  int size = image->width * image->height;
+  imCalcHistogram((imbyte*)image->data[0], size, histo, 0);
+  unsigned long numcolor = 0;
+
+  for (int i = 0; i < 256; i++)
+  {             
+    if(histo[i] != 0)
+      numcolor++;
+  }
+
+  return numcolor;
+}
+
+// will count also all the 3 components color spaces
+static unsigned long count_rgb(const imImage* image)
+{
+  imbyte *count = (imbyte*)calloc(sizeof(imbyte), 1 << 21 ); /* (2^24)/8=2^21 ~ 2Mb */
+  if (!count)
+    return (unsigned long)-1;
+
+  int size = image->width * image->height;
+  imbyte *red = (imbyte*)image->data[0];
+  imbyte *green = (imbyte*)image->data[1];
+  imbyte *blue = (imbyte*)image->data[2];
+
+  int index;
+  unsigned long numcolor = 0;
+
+  for(int i = 0; i < size; i++)
+  {
+    index = red[i] << 16 | green[i] << 8 | blue[i];
+
+    if(imDataBitGet(count, index) == 0)
+      numcolor++;
+
+    imDataBitSet(count, index, 1);
+  }
+
+  free(count);
+
+  return numcolor;
+}
+
+unsigned long imCalcCountColors(const imImage* image)
+{
+  if (imColorModeDepth(image->color_space) > 1)
+    return count_rgb(image);
+  else
+    return count_map(image);
+}
+
+void imCalcHistogram(const imbyte* map, int size, unsigned long* histo, int cumulative)
+{
+  int i;
+
+  memset(histo, 0, 256 * sizeof(unsigned long));
+
+  for (i = 0; i < size; i++)
+    histo[*map++]++;
+
+  if (cumulative)
+  {
+    /* make cumulative histogram */
+    for (i = 1; i < 256; i++)
+      histo[i] += histo[i-1];
+  }
+}
+
+void imCalcUShortHistogram(const imushort* map, int size, unsigned long* histo, int cumulative)
+{
+  int i;
+
+  memset(histo, 0, 65535 * sizeof(unsigned long));
+
+  for (i = 0; i < size; i++)
+    histo[*map++]++;
+
+  if (cumulative)
+  {
+    /* make cumulative histogram */
+    for (i = 1; i < 65535; i++)
+      histo[i] += histo[i-1];
+  }
+}
+
+void imCalcGrayHistogram(const imImage* image, unsigned long* histo, int cumulative)
+{
+  int i;
+
+  memset(histo, 0, 256 * sizeof(unsigned long));
+
+  if (image->color_space == IM_GRAY)
+  {
+    imbyte* map = (imbyte*)image->data[0];
+    for (i = 0; i < image->count; i++)
+      histo[*map++]++;
+  }
+  else if (image->color_space == IM_MAP || image->color_space == IM_BINARY)
+  {
+    imbyte* map = (imbyte*)image->data[0];
+    imbyte gray_map[256], r, g, b;
+
+    for (i = 0; i < image->palette_count; i++)
+    {
+      imColorDecode(&r, &g, &b, image->palette[i]);
+      gray_map[i] = (imbyte)((299*r + 587*g + 114*b) / 1000);
+    }
+
+    for (i = 0; i < image->count; i++)
+    {
+      int index = *map++;
+      histo[gray_map[index]]++;
+    }
+  }
+  else
+  {
+    imbyte gray;
+    imbyte* r = (imbyte*)image->data[0];
+    imbyte* g = (imbyte*)image->data[1];
+    imbyte* b = (imbyte*)image->data[2];
+    for (i = 0; i < image->count; i++)
+    {
+      gray = (imbyte)((299*(*r++) + 587*(*g++) + 114*(*b++)) / 1000);
+      histo[gray]++;
+    }
+  }
+
+  if (cumulative)
+  {
+    /* make cumulative histogram */
+    for (i = 1; i < 256; i++)
+      histo[i] += histo[i-1];
+  }
+}
+
+template <class T>
+static void DoStats(T* data, int count, imStats* stats)
+{
+  memset(stats, 0, sizeof(imStats));
+
+  stats->min = (float)data[0];
+  stats->max = (float)data[0];
+
+  for (int i = 0; i < count; i++)
+  {
+		if (data[i] < stats->min)
+		  stats->min = (float)data[i];
+
+		if (data[i] > stats->max)
+		  stats->max = (float)data[i];
+
+    if (data[i] > 0)
+      stats->positive++;
+
+    if (data[i] < 0)
+      stats->negative++;
+
+    if (data[i] == 0)
+      stats->zeros++;
+
+    stats->mean += (float)data[i];
+    stats->stddev += ((float)data[i])*((float)data[i]);
+  }
+
+  stats->mean /= float(count);
+  stats->stddev = (float)sqrt((stats->stddev - count * stats->mean*stats->mean)/(count-1.0));
+}
+
+void imCalcImageStatistics(const imImage* image, imStats* stats)
+{
+  int count = image->width * image->height;
+
+  for (int i = 0; i < image->depth; i++)
+  {
+    switch(image->data_type)
+    {
+    case IM_BYTE:
+      DoStats((imbyte*)image->data[i], count, &stats[i]);
+      break;                                                                                
+    case IM_USHORT:                                                                           
+      DoStats((imushort*)image->data[i], count, &stats[i]);
+      break;                                                                                
+    case IM_INT:                                                                           
+      DoStats((int*)image->data[i], count, &stats[i]);
+      break;                                                                                
+    case IM_FLOAT:                                                                           
+      DoStats((float*)image->data[i], count, &stats[i]);
+      break;                                                                                
+    }
+  }
+}
+
+void imCalcHistogramStatistics(const imImage* image, imStats* stats)
+{
+  int image_size = image->width * image->height;
+  unsigned long histo[256];
+
+  for (int d = 0; d < image->depth; d++)
+  {
+    imCalcHistogram((imbyte*)image->data[d], image_size, histo, 0);
+    DoStats((unsigned long*)histo, 256, &stats[d]);
+  }
+}
+
+void imCalcHistoImageStatistics(const imImage* image, int* median, int* mode)
+{
+  unsigned long histo[256];
+
+  for (int d = 0; d < image->depth; d++)
+  {
+    int i;
+    imCalcHistogram((imbyte*)image->data[d], image->count, histo, 0);
+
+    unsigned long half = image->count/2;
+    unsigned long count = histo[0];
+    for (i = 1; i < 256; i++)
+    {
+      if (count > half)
+      {
+        median[d] = i-1;
+        break;
+      }
+
+      count += histo[i];
+    }
+
+    unsigned long max = histo[0];
+    for (i = 1; i < 256; i++)
+    {
+      if (max < histo[i])
+        max = histo[i];
+    }
+
+    int found_mode = 0;
+    for (i = 0; i < 256; i++)
+    {
+      if (histo[i] == max)
+      {
+        if (found_mode)
+        {
+          mode[d] = -1;
+          break;
+        }
+
+        mode[d] = i;
+        found_mode = 1;
+      }
+    }
+  }
+}
+
+float imCalcSNR(const imImage* image, const imImage* noise_image)
+{
+  imStats stats[3];
+  imCalcImageStatistics((imImage*)image, stats);
+
+  imStats noise_stats[3];
+  imCalcImageStatistics((imImage*)noise_image, noise_stats);
+
+  if (image->color_space == IM_RGB)
+  {
+    noise_stats[0].stddev += noise_stats[1].stddev;
+    noise_stats[0].stddev += noise_stats[2].stddev;
+    noise_stats[0].stddev /= 3;
+    stats[0].stddev += stats[1].stddev;
+    stats[0].stddev += stats[2].stddev;
+    stats[0].stddev /= 3;
+  }
+
+  if (noise_stats[0].stddev == 0)
+    return 0;
+
+  return float(20.*log10(stats[0].stddev / noise_stats[0].stddev));
+}
+
+template <class T> 
+static float DoRMSOp(T *map1, T *map2, int count)
+{
+  float rmserror = 0.0f;
+  float diff;
+
+  for (int i = 0; i < count; i++)
+  {
+    diff = float(map1[i] - map2[i]);
+    rmserror += diff * diff;
+  }
+
+  return rmserror;
+}
+  
+float imCalcRMSError(const imImage* image1, const imImage* image2)
+{
+  float rmserror = 0.0f;
+
+  int count = image1->count*image1->depth;
+
+  switch(image1->data_type)
+  {
+  case IM_BYTE:
+    rmserror = DoRMSOp((imbyte*)image1->data[0], (imbyte*)image2->data[0], count);
+    break;
+  case IM_USHORT:
+    rmserror = DoRMSOp((imushort*)image1->data[0], (imushort*)image2->data[0], count);
+    break;
+  case IM_INT:
+    rmserror = DoRMSOp((int*)image1->data[0], (int*)image2->data[0], count);
+    break;
+  case IM_FLOAT:
+    rmserror = DoRMSOp((float*)image1->data[0], (float*)image2->data[0], count);
+    break;
+  case IM_CFLOAT:
+    rmserror = DoRMSOp((float*)image1->data[0], (float*)image2->data[0], 2*count);
+    break;
+  }
+
+  rmserror = float(sqrt(rmserror / float((count * image1->depth))));
+
+  return rmserror;
+}
+
diff --git a/src/process/im_threshold.cpp b/src/process/im_threshold.cpp
new file mode 100644
index 0000000..4af72ee
--- /dev/null
+++ b/src/process/im_threshold.cpp
@@ -0,0 +1,391 @@
+/** \file
+ * \brief Threshold Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_threshold.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+
+#include "im_process_pon.h"
+#include "im_process_ana.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+
+void imProcessSliceThreshold(const imImage* src_image, imImage* dst_image, int start_level, int end_level)
+{
+  float params[3];
+  params[0] = (float)start_level;
+  params[1] = (float)end_level;
+  params[2] = (float)1; /* binarize 0-255 */
+  imProcessToneGamut(src_image, dst_image, IM_GAMUT_SLICE, params);
+  imImageMakeBinary(dst_image); /* this compensates the returned values in IM_GAMUT_SLICE */
+}
+
+void imProcessThresholdByDiff(const imImage* image1, const imImage* image2, imImage* NewImage)
+{
+  imbyte *src_map1 = (imbyte*)image1->data[0];
+  imbyte *src_map2 = (imbyte*)image2->data[0];
+  imbyte *dst_map = (imbyte*)NewImage->data[0];
+  int size = image1->count;
+
+  for (int i = 0; i < size; i++)
+  {
+    if (*src_map1++ <= *src_map2++)
+      *dst_map++ = 0;
+    else
+      *dst_map++ = 1;
+  }
+}
+
+template <class T> 
+static void doThreshold(T *src_map, imbyte *dst_map, int count, int level, int value)
+{
+  for (int i = 0; i < count; i++)
+  {
+    if (*src_map++ <= level)
+      *dst_map++ = 0;
+    else
+      *dst_map++ = (imbyte)value;
+  }
+}
+
+void imProcessThreshold(const imImage* src_image, imImage* dst_image, int level, int value)
+{
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    doThreshold((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->count, level, value);
+    break;                                                                                
+  case IM_USHORT:                                                                           
+    doThreshold((imushort*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->count, level, value);
+    break;                                                                                
+  case IM_INT:                                                                           
+    doThreshold((int*)src_image->data[0], (imbyte*)dst_image->data[0], 
+                             src_image->count, level, value);
+    break;                                                                                
+  }
+}
+
+static int compare_int(const void *elem1, const void *elem2) 
+{
+  int* v1 = (int*)elem1;
+  int* v2 = (int*)elem2;
+
+  if (*v1 < *v2)
+    return -1;
+
+  if (*v1 > *v2)
+    return 1;
+
+  return 0;
+}
+
+static int thresUniErr(unsigned char* band, int width, int height)
+{
+  int x, y, i, bottom, top, ant2x2, maks1, maks2, maks4, t;
+  int xsize, ysize, offset1, offset2;
+  double a, b, c, phi;
+  int g[4], tab1[256], tab2[256], tab4[256];
+
+  memset(tab1, 0, sizeof(int)*256);
+  memset(tab2, 0, sizeof(int)*256);
+  memset(tab4, 0, sizeof(int)*256);
+
+  xsize = width;
+  ysize = height;
+
+  if (xsize%2 != 0)
+    xsize--;
+
+  if (ysize%2 != 0)
+    ysize--;
+  
+  /* examine all 2x2 neighborhoods */
+
+  for (y=0; y<ysize; y+=2)
+  {
+    offset1 = y*width;
+    offset2 = (y+1)*width;
+
+    for (x=0; x<xsize; x+=2) 
+    {
+      g[0] = band[offset1 + x];
+      g[1] = band[offset1 + x+1];
+      g[2] = band[offset2 + x];
+      g[3] = band[offset2 + x+1];
+
+      /* Sorting */
+      qsort(g, 4, sizeof(int), compare_int);
+
+      /* Accumulating */
+      tab1[g[0]] += 1; 
+      tab1[g[1]] += 1; 
+      tab1[g[2]] += 1; 
+      tab1[g[3]] += 1; 
+
+      tab2[g[0]] +=3;
+      tab2[g[1]] +=2;
+      tab2[g[2]] +=1;
+
+      tab4[g[0]] +=1;
+    }
+  }
+
+  /* Summing */
+  for (i=254; i>=0; i--) 
+  {
+    tab1[i] += tab1[i+1];
+    tab2[i] += tab2[i+1];
+    tab4[i] += tab4[i+1];
+  }
+  
+  /* Tables are ready, find threshold */
+  bottom = 0; top = 255;
+  ant2x2 = (xsize/2)*(ysize/2);
+  maks1 = tab1[0]; /* = ant2x2 * 4; */
+  maks2 = tab2[0]; /* = ant2x2 * 6; */
+  maks4 = tab4[0]; /* = ant2x2;     */
+
+  /* binary search */
+  t = 0;
+  while (bottom != top-1) 
+  {
+    t = (int) ((bottom+top)/2);
+
+    /* Calculate probabilities */
+    a = (double) tab1[t+1]/maks1;
+    b = (double) tab2[t+1]/maks2;
+    c = (double) tab4[t+1]/maks4;
+
+    phi = sqrt((b*b - c) / (a*a - b));
+
+    if (phi> 1)  
+      bottom = t;
+    else                        
+      top = t;
+  }
+  
+  return t;
+}
+
+int imProcessUniformErrThreshold(const imImage* image, imImage* NewImage)
+{
+  int level = thresUniErr((imbyte*)image->data[0], image->width, image->height);
+  imProcessThreshold(image, NewImage, level, 1);
+  return level;
+}
+
+static void do_dither_error(imbyte* data1, imbyte* data2, int size, int t, int value)
+{
+  int i, error;
+  float scale = (float)(t/(255.0-t));
+
+  error = 0; /* always in [-127,127] */ 
+
+  for (i = 0; i < size; i++)
+  {
+    if ((int)(*data1 + error) > t)
+    {
+      error -= (int)(((int)255 - (int)*data1++)*scale);
+      *data2++ = (imbyte)value;
+    }
+    else
+    {
+      error += (int)*data1++;
+      *data2++ = (imbyte)0;
+    }
+  }
+}
+
+void imProcessDifusionErrThreshold(const imImage* image, imImage* NewImage, int level)
+{
+  int value = image->depth > 1? 255: 1;
+  int size = image->width * image->height;
+  for (int i = 0; i < image->depth; i++)
+  {
+    do_dither_error((imbyte*)image->data[i], (imbyte*)NewImage->data[i], size, level, value);
+  }
+}
+
+int imProcessPercentThreshold(const imImage* image, imImage* NewImage, float percent)
+{
+  unsigned long histo[256], cut;
+
+  cut = (int)((image->width * image->height * percent)/100.);
+
+  imCalcHistogram((imbyte*)image->data[0], image->width * image->height, histo, 1);
+
+  int i;
+  for (i = 0; i < 256; i++)
+  {
+    if (histo[i] > cut)
+      break;
+  }
+
+  int level = (i==0? 0: i==256? 254: i-1);
+
+  imProcessThreshold(image, NewImage, level, 1);
+  return level;
+}
+
+static int MaximizeDiscriminantFunction(double * p)
+{
+  double mi_255 = 0;
+  int k;
+  for (k=0; k<256; k++) 
+    mi_255 += k*p[k];
+
+  int index = 0;
+  double max = 0;
+  double mi_k = 0;
+  double w_k = 0;
+  double value;
+  for (k=0; k<256; k++) 
+  {
+    mi_k += k*p[k];
+    w_k += p[k];
+    value = ((w_k == 0) || (w_k == 1))? -1 : ((mi_255*w_k - mi_k)*(mi_255*w_k - mi_k))/(w_k*(1-w_k));
+    if (value >= max) 
+    {
+      index = k;
+      max = value;
+    }
+  }
+
+  return index;
+}
+
+static unsigned char Otsu(const imImage *image)
+{
+  unsigned long histo[256];
+  imCalcHistogram((imbyte*)image->data[0], image->count, histo, 0);
+
+  double totalPixels = image->count;
+  double p[256];
+  for (int i=0; i<256; i++) 
+    p[i] = histo[i]/totalPixels;
+
+  return (unsigned char)MaximizeDiscriminantFunction(p);
+}
+
+int imProcessOtsuThreshold(const imImage* image, imImage* NewImage)
+{
+  int level = Otsu(image);
+  imProcessThreshold(image, NewImage, level, 1);
+  return level;
+}
+
+int imProcessMinMaxThreshold(const imImage* image, imImage* NewImage)
+{
+  imStats stats;
+  imCalcImageStatistics(image, &stats);
+  int level = (int)((stats.max - stats.min)/2.0f);
+  imProcessThreshold(image, NewImage, level, 1);
+  return level;
+}
+
+void imProcessHysteresisThresEstimate(const imImage* image, int *low_thres, int *high_thres)
+{
+  unsigned long hist[256];
+  imCalcHistogram((imbyte*)image->data[0], image->count, hist, 0);
+
+  /* The high threshold should be > 80 or 90% of the pixels */
+  unsigned long cut = (int)(0.1*image->count);
+
+  int k = 255;
+  unsigned long count = hist[255];
+  while (count < cut)
+  {
+    k--;
+    count += hist[k];
+  }
+  *high_thres = k;
+
+  k=0;
+  while (hist[k]==0) k++;
+
+  *low_thres = (int)((*high_thres + k)/2.0) + k;
+}
+
+void imProcessHysteresisThreshold(const imImage* image, imImage* NewImage, int low_thres, int high_thres)
+{
+  imbyte *src_map = (imbyte*)image->data[0];
+  imbyte *dst_map = (imbyte*)NewImage->data[0];
+  int i, j, size = image->count;
+
+  for (i = 0; i < size; i++)
+  {
+    if (*src_map > high_thres)
+      *dst_map++ = 1;
+    else if (*src_map > low_thres)
+      *dst_map++ = 2;          // mark for future replace
+    else
+      *dst_map++ = 0;
+
+    src_map++;
+  }
+
+  // now loop multiple times until there is no "2"s or no one was changed
+  dst_map = (imbyte*)NewImage->data[0];
+  int changed = 1;
+  while (changed) 
+  {
+    changed = 0;
+    for (j=1; j<image->height-1; j++) 
+    {
+      for (i=1; i<image->width-1; i++)
+      {
+        int offset = i+j*image->width;
+        if (dst_map[offset] == 2)
+        {
+          // if there is an edge neighbor mark this as edge too
+          if (dst_map[offset+1] == 1 || dst_map[offset-1] == 1 ||
+              dst_map[offset+image->width] == 1 || dst_map[offset-image->width] == 1 ||
+              dst_map[offset+image->width-1] == 1 || dst_map[offset+image->width+1] == 1 ||
+              dst_map[offset-image->width-1] == 1 || dst_map[offset-image->width+1] == 1)
+          {
+            dst_map[offset] = 1;
+            changed = 1;
+          }
+        }
+      }
+    }
+  }
+
+  // Clear the remaining "2"s
+  dst_map = (imbyte*)NewImage->data[0];
+  for (i = 0; i < size; i++)
+  {
+    if (*dst_map == 2)
+      *dst_map = 0;
+    dst_map++;
+  }
+}
+
+void imProcessLocalMaxThresEstimate(const imImage* image, int *thres)
+{
+  unsigned long hist[256];
+  imCalcHistogram((imbyte*)image->data[0], image->count, hist, 0);
+
+  int high_count = 0;
+  int index = 255;
+  while (high_count < 10 && index > 0)
+  {
+    if (hist[index] != 0)
+      high_count++;
+
+    index--;
+  }
+  *thres = index+1;
+}
+
diff --git a/src/process/im_tonegamut.cpp b/src/process/im_tonegamut.cpp
new file mode 100644
index 0000000..cf63350
--- /dev/null
+++ b/src/process/im_tonegamut.cpp
@@ -0,0 +1,322 @@
+/** \file
+ * \brief Tone Gamut Operations
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: im_tonegamut.cpp,v 1.1 2008/10/17 06:16:33 scuri Exp $
+ */
+
+
+#include <im.h>
+#include <im_util.h>
+#include <im_math.h>
+
+#include "im_process_pon.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+
+
+template <class T>
+static inline T line_op(const T& v, const T& min, const T& max, const float& a, const float& b)
+{
+  float r = v * a + b;
+  if (r > (float)max) return max;
+  if (r < (float)min) return min;
+  return (T)r;
+}
+
+template <class T>
+static inline T normal_op(const T& v, const T& min, const T& range)
+{
+  return (T)(float(v - min) / float(range));
+}
+
+template <class T>
+static inline T zerostart_op(const T& v, const T& min)
+{
+  return (T)(v - min);
+}
+
+template <class T>
+static inline float invert_op(const T& v, const T& min, const T& range)
+{
+  return 1.0f - float(v - min) / float(range);
+}
+
+template <class T>
+static inline T solarize_op(const T& v, const T& level, const float& A, const float& B)
+{
+  if (v > level)
+    return (T)(v * A + B);
+  else
+    return v;
+}
+
+template <class T>
+static inline T slice_op(const T& v, const T& min, const T& max, const T& start, const T& end, int bin)
+{
+  if (v < start || v > end)
+    return min;
+  else
+  {
+    if (bin)
+      return max;
+    else
+      return v;
+  }
+}
+
+template <class T>
+static inline T tonecrop_op(const T& v, const T& start, const T& end)
+{
+  if (v < start)
+    return start;
+  if (v > end)
+    return end;
+  else
+    return v;
+}
+
+template <class T>
+static inline T expand_op(const T& v, const T& min, const T& max, const T& start, const float& norm)
+{
+  float r = (v - start)*norm + min;
+  if (r > (float)max) return max;
+  if (r < (float)min) return min;
+  return (T)r;
+}
+
+template <class T>
+static inline float norm_pow_op(const T& v, const T& min, const T& range, const float& gamma)
+{
+  return (float)pow(float(v - min) / float(range), gamma);
+}
+
+template <class T>
+static inline float norm_log_op(const T& v, const T& min, const T& range, const float& norm, const float& K)
+{
+  return (float)(log(K * float(v - min) / float(range) + 1) / norm);
+}
+
+template <class T>
+static inline float norm_exp_op(const T& v, const T& min, const T& range, const float& norm, const float& K)
+{
+  return (float)((exp(K * float(v - min) / float(range)) - 1) / norm);
+}
+
+template <class T> 
+static void DoNormalizedUnaryOp(T *map, T *new_map, int count, int op, float *args)
+{
+  int i;
+  T min, max, range;
+
+  int size_of = sizeof(imbyte);
+  if (sizeof(T) == size_of)
+  {
+    min = 0;
+    max = 255;
+  }
+  else
+  {
+    imMinMax(map, count, min, max);
+
+    if (min == max)
+    {
+      max = min + 1;
+
+      if (min != 0)
+        min = min - 1;
+    }
+  }
+
+  range = max-min;
+  
+  switch(op)
+  {
+  case IM_GAMUT_NORMALIZE:
+    {
+      if (min >= 0 && max <= 1)
+      {
+        for (i = 0; i < count; i++)
+          new_map[i] = (T)map[i];
+      }
+      else
+      {
+        for (i = 0; i < count; i++)
+          new_map[i] = normal_op(map[i], min, range);
+      }
+      break;
+    }
+  case IM_GAMUT_INVERT:
+    for (i = 0; i < count; i++)
+      new_map[i] = (T)(invert_op(map[i], min, range)*range + min);
+    break;
+  case IM_GAMUT_ZEROSTART:
+    for (i = 0; i < count; i++)
+      new_map[i] = (T)zerostart_op(map[i], min);
+    break;
+  case IM_GAMUT_SOLARIZE:
+    {
+      T level =  (T)(((100 - args[0]) * range) / 100.0f + min);
+      float A = float(level - min) / float(level - max);
+      float B = float(level * range) / float(max - level);
+      for (i = 0; i < count; i++)
+        new_map[i] = solarize_op(map[i], level, A, B);
+      break;
+    }
+  case IM_GAMUT_POW:
+    for (i = 0; i < count; i++)
+      new_map[i] = (T)(norm_pow_op(map[i], min, range, args[0])*range + min);
+    break;
+  case IM_GAMUT_LOG:
+    {
+      float norm = float(log(args[0] + 1));
+      for (i = 0; i < count; i++)
+        new_map[i] = (T)(norm_log_op(map[i], min, range, norm, args[0])*range + min);
+      break;
+    }
+  case IM_GAMUT_EXP:
+    {
+      float norm = float(exp(args[0]) - 1);
+      for (i = 0; i < count; i++)
+        new_map[i] = (T)(norm_exp_op(map[i], min, range, norm, args[0])*range + min);
+      break;
+    }
+  case IM_GAMUT_SLICE:
+    {
+      if (args[0] > args[1]) { float tmp = args[1]; args[1] = args[0]; args[0] = tmp; }
+      if (args[1] > max) args[1] = (float)max;
+      if (args[0] < min) args[0] = (float)min;
+      for (i = 0; i < count; i++)
+        new_map[i] = slice_op(map[i], min, max, (T)args[0], (T)args[1], (int)args[2]);
+      break;
+    }
+  case IM_GAMUT_CROP:
+    {
+      if (args[0] > args[1]) { float tmp = args[1]; args[1] = args[0]; args[0] = tmp; }
+      if (args[1] > max) args[1] = (float)max;
+      if (args[0] < min) args[0] = (float)min;
+      for (i = 0; i < count; i++)
+        new_map[i] = tonecrop_op(map[i], (T)args[0], (T)args[1]);
+      break;
+    }
+  case IM_GAMUT_EXPAND:
+    {
+      if (args[0] > args[1]) { float tmp = args[1]; args[1] = args[0]; args[0] = tmp; }
+      if (args[1] > max) args[1] = (float)max;
+      if (args[0] < min) args[0] = (float)min;
+      float norm = float(max - min)/(args[1] - args[0]);
+      for (i = 0; i < count; i++)
+        new_map[i] = expand_op(map[i], min, max, (T)args[0], norm);
+      break;
+    }
+  case IM_GAMUT_BRIGHTCONT:
+    {
+      float bs = (args[0] * range) / 100.0f;
+      float a = (float)tan((45+args[1]*0.449999)/57.2957795);
+      float b = bs + (float)range*(1.0f - a)/2.0f;
+      for (i = 0; i < count; i++)
+        new_map[i] = line_op(map[i], min, max, a, b);
+      break;
+    }
+  }
+}
+
+void imProcessToneGamut(const imImage* src_image, imImage* dst_image, int op, float *args)
+{
+  int count = src_image->count*src_image->depth;
+
+  switch(src_image->data_type)
+  {
+  case IM_BYTE:
+    DoNormalizedUnaryOp((imbyte*)src_image->data[0], (imbyte*)dst_image->data[0], count, op, args);
+    break;                                                                                
+  case IM_USHORT:                                                                           
+    DoNormalizedUnaryOp((imushort*)src_image->data[0], (imushort*)dst_image->data[0], count, op, args);
+    break;                                                                                
+  case IM_INT:                                                                           
+    DoNormalizedUnaryOp((int*)src_image->data[0], (int*)dst_image->data[0], count, op, args);
+    break;                                                                                
+  case IM_FLOAT:                                                                           
+    DoNormalizedUnaryOp((float*)src_image->data[0], (float*)dst_image->data[0], count, op, args);
+    break;                                                                                
+  }
+}
+
+void imProcessUnNormalize(const imImage* image, imImage* NewImage)
+{
+  int count = image->count*image->depth;
+
+  float* map = (float*)image->data[0];
+  imbyte* new_map = (imbyte*)NewImage->data[0];
+
+  for (int i = 0; i < count; i++)
+  {
+    if (map[i] > 1)
+      new_map[i] = (imbyte)255;
+    else if (map[i] < 0)
+      new_map[i] = (imbyte)0;
+    else
+      new_map[i] = (imbyte)(map[i]*255);
+  }
+}
+
+template <class T> 
+static void DoDirectConv(T* map, imbyte* new_map, int count)
+{
+  for (int i = 0; i < count; i++)
+  {
+    if (map[i] > 255)
+      new_map[i] = (imbyte)255;
+    else if (map[i] < 0)
+      new_map[i] = (imbyte)0;
+    else
+      new_map[i] = (imbyte)(map[i]);
+  }
+}
+
+void imProcessDirectConv(const imImage* image, imImage* NewImage)
+{
+  int count = image->count*image->depth;
+
+  switch(image->data_type)
+  {
+  case IM_USHORT:                                                                           
+    DoDirectConv((imushort*)image->data[0], (imbyte*)NewImage->data[0], count);
+    break;                                                                                
+  case IM_INT:                                                                           
+    DoDirectConv((int*)image->data[0], (imbyte*)NewImage->data[0], count);
+    break;                                                                                
+  case IM_FLOAT:                                                                           
+    DoDirectConv((float*)image->data[0], (imbyte*)NewImage->data[0], count);
+    break;                                                                                
+  }
+}
+
+void imProcessNegative(const imImage* src_image, imImage* dst_image)
+{
+  if (src_image->color_space == IM_MAP)
+  {
+    unsigned char r, g, b;
+    for (int i = 0; i < src_image->palette_count; i++)
+    {
+      imColorDecode(&r, &g, &b, src_image->palette[i]);
+      r = ~r; g = ~g; b = ~b;
+      dst_image->palette[i] = imColorEncode(r, g, b);
+    }
+
+    imImageCopyData(src_image, dst_image);
+  }
+  else if (src_image->color_space == IM_BINARY)
+  {
+    imbyte* map1 = (imbyte*)src_image->data[0];
+    imbyte* map = (imbyte*)dst_image->data[0];
+    for (int i = 0; i < src_image->count; i++)
+      map[i] = map1[i]? 0: 1;
+  }
+  else
+    imProcessToneGamut(src_image, dst_image, IM_GAMUT_INVERT, NULL);
+}
diff --git a/src/tecmake_compact.mak b/src/tecmake_compact.mak
new file mode 100644
index 0000000..77c92b6
--- /dev/null
+++ b/src/tecmake_compact.mak
@@ -0,0 +1,1080 @@
+#-------------------------------------------------------------------------#
+#- Tecmake  (Compact Version)                                            -#
+#- Generic Makefile to build applications and libraries at TeCGraf       -#
+#- The user makefile usually has the name "config.mak".                  -#   
+#-------------------------------------------------------------------------#
+
+# Tecmake Version
+VERSION = 3.15
+
+# First target 
+.PHONY: build
+build: tecmake
+
+
+#---------------------------------#
+# System Variables Definitions
+
+# Base Defintions
+TEC_SYSNAME:=$(shell uname -s)
+TEC_SYSVERSION:=$(shell uname -r|cut -f1 -d.)
+TEC_SYSMINOR:=$(shell uname -r|cut -f2 -d.)
+TEC_SYSARCH:=$(shell uname -m)
+
+# Fixes
+ifeq ($(TEC_SYSNAME), SunOS)
+	TEC_SYSARCH:=$(shell uname -p)
+endif
+ifeq ($(TEC_SYSNAME), IRIX)
+	TEC_SYSARCH:=$(shell uname -p)
+endif
+ifeq ($(TEC_SYSNAME), FreeBSD)
+	TEC_SYSMINOR:=$(shell uname -r|cut -f2 -d.|cut -f1 -d-)
+endif
+ifeq ($(TEC_SYSNAME), AIX)
+	TEC_SYSVERSION:=$(shell uname -v)
+	TEC_SYSMINOR:=$(shell uname -r)
+	TEC_SYSARCH:=ppc
+endif
+ifeq ($(TEC_SYSNAME), Darwin)
+	TEC_SYSARCH:=$(shell uname -p)
+endif
+
+ifeq ($(TEC_SYSARCH), powerpc)
+	TEC_SYSARCH:=ppc
+endif
+ifeq ($(TEC_SYSARCH), i686)
+	TEC_SYSARCH:=x86
+endif
+ifeq ($(TEC_SYSARCH), i386)
+	TEC_SYSARCH:=x86
+endif
+
+# Compose
+TEC_SYSRELEASE:=$(TEC_SYSVERSION).$(TEC_SYSMINOR)
+TEC_UNAME:=$(TEC_SYSNAME)$(TEC_SYSVERSION)$(TEC_SYSMINOR)
+
+# Linux 2.4 and GCC 3.x
+ifeq ($(TEC_UNAME), Linux24)
+	GCCVER:=$(shell gcc -dumpversion|cut -f1 -d.)
+	ifeq ($(GCCVER), 3)
+		TEC_UNAME:=$(TEC_UNAME)g3
+	endif
+endif
+
+# Linux 2.6 and GCC 4.x
+ifeq ($(TEC_UNAME), Linux26)
+	GCCVER:=$(shell gcc -dumpversion|cut -f1 -d.)
+	ifeq ($(GCCVER), 4)
+		TEC_UNAME:=$(TEC_UNAME)g4
+	endif
+endif
+
+# Linux and PowerPC
+ifeq ($(TEC_SYSNAME), Linux)
+	ifeq ($(TEC_SYSARCH), ppc)
+		TEC_UNAME:=$(TEC_UNAME)ppc
+	endif
+endif
+
+# 64-bits Linux
+ifeq ($(TEC_SYSARCH), x86_64)
+	BUILD_64=Yes
+	TEC_UNAME:=$(TEC_UNAME)_64
+endif
+
+ifeq ($(TEC_SYSARCH), ia64)
+	BUILD_64=Yes
+	TEC_UNAME:=$(TEC_UNAME)_ia64
+endif
+
+# Solaris and Intel
+ifeq ($(TEC_SYSNAME), SunOS)
+	ifeq ($(TEC_SYSARCH) , x86)
+		TEC_UNAME:=$(TEC_UNAME)x86
+	endif
+endif
+
+# Darwin and Intel
+ifeq ($(TEC_SYSNAME), Darwin)
+ifeq ($(TEC_SYSARCH), x86)
+		TEC_UNAME:=$(TEC_UNAME)x86
+	endif
+endif
+
+# System Info
+.PHONY: sysinfo
+sysinfo:
+	@echo ''; echo 'Tecmake - System Info'
+	@echo 'TEC_SYSNAME = $(TEC_SYSNAME)'
+	@echo 'TEC_SYSVERSION = $(TEC_SYSVERSION)'
+	@echo 'TEC_SYSMINOR = $(TEC_SYSMINOR)'
+	@echo 'TEC_SYSARCH = $(TEC_SYSARCH)'
+	@echo 'TEC_UNAME = $(TEC_UNAME)'; echo ''
+
+#---------------------------------#
+# Directories Definitions
+PROJDIR = ..
+SRCDIR  = .
+OBJROOT = $(PROJDIR)/obj
+
+
+#---------------------------------#
+# Byte Order and Word Size
+
+ifneq ($(findstring x86, $(TEC_SYSARCH)), )
+  TEC_BYTEORDER = TEC_LITTLEENDIAN
+else
+  TEC_BYTEORDER = TEC_BIGENDIAN
+endif
+
+ifeq ($(TEC_SYSARCH), x86_64)
+  TEC_WORDSIZE = TEC_64
+else
+ifdef BUILD_64
+  TEC_WORDSIZE = TEC_64
+else
+  TEC_WORDSIZE = TEC_32
+endif
+endif
+
+# Itanium Exception
+ifeq ($(TEC_SYSARCH), ia64)
+  TEC_BYTEORDER = TEC_LITTLEENDIAN
+  TEC_WORDSIZE = TEC_64
+endif
+
+
+#---------------------------------#
+# Compilation Flags
+STDFLAGS := -Wall
+STDDEFS  := -DTEC_UNAME=$(TEC_UNAME) -DTEC_SYSNAME=$(TEC_SYSNAME) -D$(TEC_SYSNAME)=$(TEC_SYSRELEASE) -D$(TEC_BYTEORDER) -D$(TEC_WORDSIZE) -DFUNCPROTO=15
+STDINCS  := 
+OPTFLAGS := -O2
+STDLFLAGS  := r
+DEBUGFLAGS := -g  
+STDLDFLAGS := -shared
+DLIBEXT := so
+
+#---------------------------------#
+# Build Tools
+
+CC       := gcc
+CPPC     := g++
+FF       := g77
+RANLIB   := ranlib
+AR       := ar
+DEBUGGER := gdb
+RCC      := windres 
+LD       := gcc
+
+ifeq ($(TEC_UNAME), gcc2)
+  ifdef USE_GCC_2
+    CC := $(CC)-2
+    CPPC := $(CPPC)-2
+    FF := $(FF)-2
+  endif
+endif
+
+
+#---------------------------------#
+# User Configuration File
+
+MAKENAME = config.mak
+
+ifdef MF
+  MAKENAME = $(MF).mak
+endif
+
+###################
+include $(MAKENAME)
+###################
+
+
+#---------------------------------#
+# Definitions of public variables 
+
+ifdef LIBNAME
+  TARGETNAME = $(LIBNAME)
+  MAKETYPE = LIB
+else
+  TARGETNAME = $(APPNAME)
+  MAKETYPE = APP
+endif
+
+ifndef TARGETNAME
+  $(error LIBNAME nor APPNAME defined in $(MAKENAME)) 
+endif
+
+PROJNAME ?= $(TARGETNAME)
+
+DEPEND := $(TARGETNAME).dep
+
+ifdef DEPENDDIR
+  DEPEND := $(DEPENDDIR)/$(TARGETNAME).dep.$(TEC_UNAME)
+endif
+
+SRCLUADIR ?= $(SRCDIR)
+LOHDIR ?= $(SRCLUADIR)
+
+ifeq ($(MAKETYPE), APP)
+  TARGETROOT ?= $(PROJDIR)/bin
+else
+  TARGETROOT ?= $(PROJDIR)/lib
+endif
+
+ifneq ($(PROJNAME), $(TARGETNAME))
+  OBJROOT := $(OBJROOT)/$(TARGETNAME)
+endif
+
+ifdef DBG
+  STDFLAGS += $(DEBUGFLAGS)
+  STDDEFS += -DDEBUG
+else
+  STDDEFS += -DNDEBUG
+  ifdef OPT
+    STDFLAGS += $(OPTFLAGS)
+    ifeq ($(findstring gcc, $(TEC_UNAME)), )
+      STRIP ?= Yes
+    endif
+  endif
+endif
+
+ifdef BUILD_64
+  ifneq ($(findstring SunOS, $(TEC_UNAME)), )
+    USE_CC = Yes
+    BUILD_64_DIR = Yes   
+  endif
+  ifneq ($(findstring AIX, $(TEC_UNAME)), )
+    USE_CC = Yes
+    BUILD_64_DIR = Yes
+  endif
+  ifneq ($(findstring IRIX, $(TEC_UNAME)), )
+    USE_CC = Yes
+    BUILD_64_DIR = Yes
+  endif
+endif
+  
+ifdef USE_CC
+  CC := cc
+  CPPC := CC
+  STDFLAGS = 
+  ifdef USE_CC_DIR
+    TEC_UNAME := $(TEC_UNAME)cc
+  endif
+endif
+
+ifdef BUILD_64
+  ifdef BUILD_64_DIR
+    TEC_UNAME := $(TEC_UNAME)_64
+  endif
+endif
+
+TEC_UNAME_DIR := $(TEC_UNAME)
+ifdef DBG
+  ifdef DBG_DIR
+    TEC_UNAME_DIR := $(TEC_UNAME_DIR)d
+  endif
+endif
+
+OBJDIR := $(OBJROOT)/$(TEC_UNAME_DIR)
+TARGETDIR := $(TARGETROOT)/$(TEC_UNAME_DIR)
+
+# Change linker if any C++ source
+ifndef LINKER
+  ifneq "$(findstring .cpp, $(SRC))" ""
+    LINKER := $(CPPC)
+  else
+    LINKER := $(CC)
+  endif
+endif
+
+
+#---------------------------------#
+# LO and LOH Suffix
+
+ifeq ($(TEC_BYTEORDER), TEC_BIGENDIAN)
+  ifeq ($(TEC_WORDSIZE), TEC_64)
+    LO_SUFFIX ?= _be64
+  else
+    LO_SUFFIX ?= _be32
+  endif
+else
+  ifeq ($(TEC_WORDSIZE), TEC_64)
+    LO_SUFFIX ?= _le64
+  else
+    LO_SUFFIX ?=
+  endif
+endif
+
+
+#---------------------------------#
+#  Platform specific variables
+
+# Definicoes para o X11
+X11_LIBS := Xmu Xt Xext X11
+#X11_LIB := 
+#X11_INC :=                     #include <X11/X.h>
+
+# Definicoes para o OpenGL
+OPENGL_LIBS := GLU GL
+#OPENGL_LIB := 
+#OPENGL_INC :=                  #include <GL/gl.h>  and possibly  
+MOTIFGL_LIB := GLw              #include <GL/GLwMDrawA.h>
+
+# Definicoes para o Motif
+#MOTIF_LIB := 
+#MOTIF_INC :=                   #include <Xm/Xm.h>
+
+# Definicoes para o GLUT
+#GLUT_LIB := 
+#GLUT_INC := 
+
+
+ifneq ($(findstring cygw, $(TEC_UNAME)), ) 
+  NO_DYNAMIC ?= Yes
+  X11_LIBS := Xpm $(X11_LIBS)
+  ifdef BUILD_64
+    X11_LIB := /usr/X11R6/lib64
+  else
+    X11_LIB := /usr/X11R6/lib
+  endif
+  X11_INC := /usr/X11R6/include
+  MOTIFGL_LIB :=
+endif
+
+ifneq ($(findstring Linux, $(TEC_UNAME)), )
+  X11_LIBS := Xpm $(X11_LIBS)
+  ifdef BUILD_64
+    ifeq ($(TEC_SYSARCH), ia64)
+      STDFLAGS += -fPIC
+      X11_LIB := /usr/X11R6/lib
+    else
+      STDFLAGS += -m64 -fPIC
+      X11_LIB := /usr/X11R6/lib64
+    endif
+  else
+    X11_LIB := /usr/X11R6/lib
+  endif
+  X11_INC := /usr/X11R6/include
+  MOTIFGL_LIB := 
+endif
+
+ifneq ($(findstring IRIX, $(TEC_UNAME)), ) # any IRIX
+  LD = ld
+  STDLDFLAGS := -elf -shared -rdata_shared -soname lib$(TARGETNAME).so
+  RANLIB := /bin/true
+  X11_LIBS := Xmu Xt X11
+  ifdef BUILD_64    
+    ifdef USE_CC  
+      STDFLAGS += -64 -KPIC
+      STDLDFLAGS += -64
+      LINKER += -64
+    endif
+    X11_LIB := /usr/Motif-2.1/lib64 /usr/lib64 # 64-bit libs
+  else
+    X11_LIB := /usr/Motif-2.1/lib32 /usr/lib32 # N32 libs
+  endif
+  MOTIF_INC = /usr/Motif-2.1/include
+endif
+
+ifneq ($(findstring AIX, $(TEC_UNAME)), ) 
+  NO_DYNAMIC ?= Yes
+  ifdef BUILD_64
+    ifdef USE_CC  
+      STDFLAGS += -q64 # to compilers C and C++
+      STDLFLAGS := -X64 $(STDLFLAGS) # to librarian
+      STDLDFLAGS += -64
+      LINKER += -q64 # to linker
+    endif
+  endif
+endif
+
+ifneq ($(findstring HP-UX, $(TEC_UNAME)), )
+  NO_DYNAMIC ?= Yes
+  MOTIF_INC := /usr/include/Motif2.1
+  X11_LIBS := Xt Xext X11
+  OPENGL_LIB := /opt/graphics/OpenGL/lib
+  OPENGL_INC := /opt/graphics/OpenGL/include
+  STDDEFS := -DTEC_UNAME=$(TEC_UNAME) -DTEC_SYSNAME=$(TEC_SYSNAME) -D$(TEC_BYTEORDER) -D$(TEC_WORDSIZE) -DFUNCPROTO=15
+  CC := aCC
+  CPPC := aCC
+  LINKER := aCC
+endif
+
+ifneq ($(findstring SunOS, $(TEC_UNAME)), )
+  LD = ld
+  STDLDFLAGS := -G
+  X11_INC := /usr/openwin/share/include
+  X11_LIB := /usr/openwin/lib
+  MOTIF_INC := /usr/dt/share/include
+  MOTIF_LIB := /usr/dt/lib
+  OPENGL_INC := /usr/openwin/share/include/X11
+  GLUT_LIB := /usr/local/glut-3.7/lib/glut
+  GLUT_INC := /usr/local/glut-3.7/include
+  ifdef BUILD_64
+    ifdef USE_CC  
+      STDFLAGS += -xarch=v9 -KPIC
+      # have to force these PATHs because of a conflict with standard PATHs
+      STDLDFLAGS += -64 -L/usr/lib/64 -L/usr/ucblib/sparcv9  
+      LINKER += -xarch=v9
+    endif
+  endif
+endif
+
+ifneq ($(findstring Darwin, $(TEC_UNAME)), )
+  X11_LIBS := Xmu Xp Xt Xext X11
+  X11_LIB := /usr/X11R6/lib
+  X11_INC := /usr/X11R6/include
+  MOTIF_INC := /usr/OpenMotif/include
+  MOTIF_LIB := /usr/OpenMotif/lib
+  ifdef BUILD_DYLIB
+    STDLDFLAGS := -dynamiclib -install_name lib$(TARGETNAME).dylib
+    DLIBEXT := dylib
+  else
+    STDLDFLAGS := -bundle -undefined dynamic_lookup
+  endif
+endif
+
+ifneq ($(findstring FreeBSD, $(TEC_UNAME)), )
+  X11_LIB := /usr/X11R6/lib
+  X11_INC := /usr/X11R6/include
+endif
+
+
+################################
+# Allows an extra configuration file.
+ifdef EXTRA_CONFIG
+include $(EXTRA_CONFIG)
+endif
+################################
+
+
+#---------------------------------#
+# Tecgraf Libraries Location
+TECTOOLS_HOME ?= /home/tecgraf
+
+IUP=$(TECTOOLS_HOME)/iup
+CD=$(TECTOOLS_HOME)/cd
+IM=$(TECTOOLS_HOME)/im
+LUA=$(TECTOOLS_HOME)/lua
+LUA51?=$(TECTOOLS_HOME)/lua5.1
+
+
+#---------------------------------#
+#  Pre-defined libraries
+
+# Library order:
+#   user + iupcd + cd + iup + motif + X
+# Library path order is the oposite
+
+ifdef USE_LUA
+  LUASUFX :=
+  LIBLUASUFX := 3
+endif
+
+ifdef USE_LUA4
+  LUASUFX := 4
+  LIBLUASUFX := 4
+  override USE_LUA = Yes
+  LUA := $(LUA4)
+endif
+
+ifdef USE_LUA5
+  LUASUFX := 5
+  LIBLUASUFX := 5
+  override USE_LUA = Yes
+  LUA := $(LUA5)
+endif
+
+ifdef USE_LUA50
+  LUASUFX := 50
+  LIBLUASUFX := 5
+  override USE_LUA = Yes
+  LUA := $(LUA50)
+  NO_LUALIB := Yes
+endif
+
+ifdef USE_LUA51
+  LUASUFX := 5.1
+  LIBLUASUFX := 51
+  override USE_LUA = Yes
+  LUA := $(LUA51)
+  NO_LUALIB := Yes
+endif
+
+ifdef USE_IUPBETA
+  IUP := $(IUP)/beta
+endif 
+
+ifdef USE_CDBETA
+  CD := $(CD)/beta
+endif 
+
+ifdef USE_IMBETA
+  IM := $(IM)/beta
+endif 
+
+ifdef USE_GLUT
+  override USE_OPENGL = Yes
+endif
+
+ifdef USE_IUPCONTROLS
+  override USE_CD = Yes
+  override USE_IUP = Yes
+  ifdef USE_IUPLUA
+    ifdef USE_STATIC
+      SLIB += $(IUP)/lib/$(TEC_UNAME)/libiupluacontrols$(LIBLUASUFX).a
+    else
+      LIBS += iupluacontrols$(LIBLUASUFX)
+    endif
+    override USE_CDLUA = Yes
+  endif
+  ifdef USE_STATIC
+    SLIB += $(IUP)/lib/$(TEC_UNAME)/libiupcontrols.a
+  else
+    LIBS += iupcontrols
+  endif
+endif
+  
+ifdef USE_IMLUA
+  override USE_IM = Yes
+  ifdef USE_STATIC
+    SLIB += $(IM)/lib/$(TEC_UNAME)/libimlua$(LIBLUASUFX).a
+  else
+    LIBS += imlua$(LIBLUASUFX)
+  endif
+endif
+
+ifdef USE_CDLUA
+  override USE_CD = Yes
+  ifdef USE_STATIC
+    ifdef USE_IUP
+      ifndef USE_NEWNAMES
+        SLIB += $(CD)/lib/$(TEC_UNAME)/libcdluaiup$(LIBLUASUFX).a
+      endif
+    endif
+    SLIB += $(CD)/lib/$(TEC_UNAME)/libcdlua$(LIBLUASUFX).a
+  else
+    ifdef USE_IUP
+      ifndef USE_NEWNAMES
+        LIBS += cdluaiup$(LIBLUASUFX)
+      endif
+    endif
+    LIBS += cdlua$(LIBLUASUFX)
+  endif
+endif
+
+ifdef USE_IUPLUA
+  override USE_IUP = Yes
+  ifdef USE_STATIC
+    ifdef USE_CD
+      ifdef USE_NEWNAMES
+        SLIB += $(IUP)/lib/$(TEC_UNAME)/libiupluacd$(LIBLUASUFX).a
+      endif
+    endif
+    ifdef USE_OPENGL
+      SLIB += $(IUP)/lib/$(TEC_UNAME)/libiupluagl$(LIBLUASUFX).a
+    endif
+    SLIB += $(IUP)/lib/$(TEC_UNAME)/libiuplua$(LIBLUASUFX).a
+  else
+    ifdef USE_CD
+      ifdef USE_NEWNAMES
+        LIBS += iupluacd$(LIBLUASUFX)
+      endif
+    endif
+    ifdef USE_OPENGL
+      LIBS += iupluagl$(LIBLUASUFX)
+    endif
+    LIBS += iuplua$(LIBLUASUFX)
+  endif
+endif
+
+ifdef USE_LUA
+  ifdef USE_STATIC
+    ifndef NO_LUALIB
+      SLIB += $(LUA)/lib/$(TEC_UNAME)/liblualib$(LUASUFX).a
+    endif
+    SLIB += $(LUA)/lib/$(TEC_UNAME)/liblua$(LUASUFX).a
+  else
+    ifndef NO_LUALIB
+      LIBS += lualib$(LUASUFX)
+    endif
+    LIBS += lua$(LUASUFX)
+    LDIR += $(LUA)/lib/$(TEC_UNAME)
+  endif
+  INCLUDES += $(LUA)/include
+  LUABINDIR := $(LUA)/bin/$(TEC_UNAME)
+  BIN2C     := $(LUABINDIR)/bin2c$(LUASUFX)
+  LUAC      := $(LUABINDIR)/luac$(LUASUFX)
+  LUABIN    := $(LUABINDIR)/lua$(LUASUFX)
+endif
+
+ifdef USE_IUP   
+  ifdef USE_GTK
+    override USE_X11 = Yes
+    LIB_SFX = gtk
+  else
+    override USE_MOTIF = Yes
+    LIB_SFX =
+  endif
+  ifdef USE_STATIC
+    ifdef USE_CD
+      ifdef USE_NEWNAMES
+        SLIB += $(IUP)/lib/$(TEC_UNAME)/libiupcd.a
+      endif
+    endif
+    ifdef USE_OPENGL
+      SLIB += $(IUP)/lib/$(TEC_UNAME)/libiupgl.a
+    endif
+    SLIB += $(IUP)/lib/$(TEC_UNAME)/libiup$(LIB_SFX).a
+  else
+    ifdef USE_CD
+      ifdef USE_NEWNAMES
+        LIBS += iupcd
+      endif
+    endif
+    ifdef USE_OPENGL
+      LIBS += iupgl
+    endif
+    LIBS += iup$(LIB_SFX)
+    LDIR += $(IUP)/lib/$(TEC_UNAME)
+  endif
+  INCLUDES += $(IUP)/include
+endif
+
+ifdef USE_CD
+  override USE_X11 = Yes
+  ifdef USE_STATIC
+    ifdef USE_IUP
+      ifndef USE_NEWNAMES
+        SLIB += $(CD)/lib/$(TEC_UNAME)/libcdiup.a
+      endif
+    endif
+    ifdef USE_XRENDER
+      ifndef USE_NEWNAMES
+        SLIB += $(CD)/lib/$(TEC_UNAME)/libcdxrender.a
+      else
+        SLIB += $(CD)/lib/$(TEC_UNAME)/libcdcontextplus.a
+      endif
+    endif
+    SLIB += $(CD)/lib/$(TEC_UNAME)/libcd.a
+    ifdef USE_XRENDER
+      LIBS += Xrender Xft
+    else
+      ifndef USE_GTK
+        ifdef USE_NEWNAMES
+          # Freetype is included in GTK
+          SLIB += $(CD)/lib/$(TEC_UNAME)/libfreetype.a
+        endif 
+      endif
+    endif
+  else
+    ifdef USE_XRENDER
+      ifndef USE_NEWNAMES
+        LIBS += cdxrender
+      else
+        LIBS += cdcontextplus
+      endif
+    endif
+    LIBS += cd
+    LDIR += $(CD)/lib/$(TEC_UNAME)
+    ifdef USE_XRENDER
+      LIBS += Xrender Xft
+    else
+      ifndef USE_GTK
+        ifdef USE_NEWNAMES
+          # Freetype is included in GTK
+          LIBS += freetype
+        endif
+      endif
+    endif
+  endif
+  INCLUDES += $(CD)/include
+endif
+
+ifdef USE_IM
+  ifdef USE_STATIC
+    SLIB += $(IM)/lib/$(TEC_UNAME)/libim.a
+  else
+    LIBS += im
+    LDIR += $(IM)/lib/$(TEC_UNAME)
+  endif
+  INCLUDES += $(IM)/include
+endif
+
+# All except gcc in Windows (Cygwin)
+ifeq ($(findstring gcc, $(TEC_UNAME)), )
+
+ifdef USE_GLUT
+  LIBS += glut
+  LDIR += $(GLUT_LIB)
+  STDINCS += $(GLUT_INC)
+endif 
+
+ifdef USE_OPENGL
+  override USE_X11 = Yes
+  ifdef USE_MOTIF
+    LIBS += $(MOTIFGL_LIB)
+  endif
+  LIBS += $(OPENGL_LIBS)
+  LDIR += $(OPENGL_LIB)
+  STDINCS += $(OPENGL_INC)
+endif 
+
+ifdef USE_MOTIF
+  override USE_X11 = Yes
+  LIBS += Xm
+  LDIR += $(MOTIF_LIB)
+  STDINCS += $(MOTIF_INC)
+endif
+
+ifdef USE_GTK
+  override USE_X11 = Yes
+  LIBS += gtk-x11-2.0 gdk-x11-2.0 gdk_pixbuf-2.0 pango-1.0 pangox-1.0 gobject-2.0 gmodule-2.0 glib-2.0
+  STDINCS += /usr/include/atk-1.0 /usr/include/gtk-2.0 /usr/include/cairo /usr/include/pango-1.0 /usr/include/glib-2.0 /usr/lib/glib-2.0/include /usr/lib/gtk-2.0/include
+endif
+
+ifdef USE_X11
+  LIBS += $(X11_LIBS)
+  LDIR += $(X11_LIB)
+  STDINCS += $(X11_INC)
+endif 
+
+LIBS += m
+
+else
+  # gcc in Windows
+  NO_DYNAMIC ?= Yes
+  STDDEFS += -DWIN32
+  
+  ifdef USE_NOCYGWIN
+    STDFLAGS += -mno-cygwin
+  endif
+  
+  ifeq ($(MAKETYPE), APP)
+    TARGETDIR := $(TARGETROOT)/$(TEC_SYSNAME)
+  endif
+
+  ifdef USE_GLUT
+    LIBS += glut32
+  endif 
+
+  ifdef USE_OPENGL
+    LIBS += opengl32 glu32 glaux
+  endif 
+
+  LIBS += gdi32 winspool comdlg32 comctl32 ole32
+  
+  ifdef USE_GTK
+    LIBS += gtk-win32-2.0 gdk-win32-2.0 gdk_pixbuf-2.0 pango-1.0 pangowin32-1.0 gobject-2.0 gmodule-2.0 glib-2.0
+    LDIR += $(GTK)/lib
+    STDINCS += $(GTK)/include/atk-1.0 $(GTK)/include/gtk-2.0 $(GTK)/include/cairo $(GTK)/include/pango-1.0 $(GTK)/include/glib-2.0 $(GTK)/lib/glib-2.0/include $(GTK)/lib/gtk-2.0/include
+  endif
+  
+  APPTYPE ?= windows
+  
+  ifeq ($(APPTYPE), windows)
+    LFLAGS += -mwindows 
+  
+    ifdef USE_NOCYGWIN
+      LFLAGS += -mno-cygwin
+    endif
+  endif
+endif
+
+
+#---------------------------------#
+#  Building compilation flags that are sets
+
+INCLUDES := $(addprefix -I, $(INCLUDES))
+STDINCS := $(addprefix -I, $(STDINCS))
+EXTRAINCS := $(addprefix -I, $(EXTRAINCS))
+DEFINES := $(addprefix -D, $(DEFINES))
+
+LIBS := $(addprefix -l, $(LIBS))
+ifdef LDIR
+  LDIR := $(addprefix -L, $(LDIR))
+endif
+
+
+#---------------------------------#
+# Definitions of private variables
+
+# Library flags for application linker
+LFLAGS += $(LDIR) $(LIBS)
+# Library flags for dynamic library linker
+ifdef ADDTO_LDFLAGS
+  LDFLAGS += $(LFLAGS)
+endif
+
+# C compiler flags
+CFLAGS   = $(FLAGS) $(STDFLAGS) $(INCLUDES) $(STDINCS) $(EXTRAINCS) $(DEFINES) $(STDDEFS)
+# C++ compiler flags
+CXXFLAGS = $(CPPFLAGS) $(STDFLAGS) $(INCLUDES) $(STDINCS) $(EXTRAINCS) $(DEFINES) $(STDDEFS)
+
+# Sources with relative path
+SOURCES := $(addprefix $(SRCDIR)/, $(SRC))
+
+# Target for applications or libraries
+ifeq ($(MAKETYPE), APP)
+  TARGET := $(TARGETDIR)/$(TARGETNAME)
+else
+  ifeq ($(NO_DYNAMIC), Yes) 
+    TARGET := $(TARGETDIR)/lib$(TARGETNAME).a
+  else
+    TARGET := $(TARGETDIR)/lib$(TARGETNAME).a $(TARGETDIR)/lib$(TARGETNAME).$(DLIBEXT)
+  endif
+endif
+
+# OBJ: list of .o, without path
+# OBJS: list of .o with relative path
+OBJ = $(notdir $(SRC))
+OBJ := $(OBJ:.c=.o)
+OBJ := $(OBJ:.cpp=.o)
+OBJ := $(OBJ:.cxx=.o)
+OBJ := $(OBJ:.cc=.o)
+OBJ := $(OBJ:.f=.o)
+OBJ := $(OBJ:.for=.o)
+OBJ := $(OBJ:.rc=.ro)
+OBJS = $(addprefix $(OBJDIR)/, $(OBJ))
+
+# LOH: list of .loh, without path
+# LOHS: list of .loh, with relative path
+LO = $(notdir $(SRCLUA))
+LO := $(LO:.lua=$(LO_SUFFIX).lo)
+LOS = $(addprefix $(OBJROOT)/, $(LO))
+
+LOH = $(notdir $(SRCLUA))
+LOH := $(LOH:.lua=$(LO_SUFFIX).loh)
+LOHS = $(addprefix $(LOHDIR)/, $(LOH))
+
+# Construct VPATH variable
+P-SRC = $(dir $(SRC))
+P-SRC += $(dir $(SRCLUA))
+VPATH = .:$(foreach dir,$(P-SRC),$(if $(dir)="./",:$(dir)))
+
+
+#---------------------------------#
+# Main Rule - Build Everything that it is necessary
+
+.PHONY: tecmake 
+ifeq ($(MAKETYPE), APP)
+  tecmake: print-start directories application scripts
+else
+  ifeq ($(NO_DYNAMIC), Yes) 
+    tecmake: print-start directories static-lib
+  else
+    tecmake: print-start directories static-lib dynamic-lib
+  endif
+endif
+
+.PHONY: print-start
+print-start:
+	@echo ''; echo 'Tecmake - Starting [ $(TARGETNAME):$(TEC_UNAME) ]'
+
+  
+#---------------------------------#
+# Dynamic Library Build
+
+.PHONY: dynamic-lib
+dynamic-lib: $(TARGETDIR)/lib$(TARGETNAME).$(DLIBEXT)
+
+$(TARGETDIR)/lib$(TARGETNAME).$(DLIBEXT) : $(LOHS) $(OBJS) $(EXTRADEPS)
+	$(LD) $(STDLDFLAGS) -o $@ $(OBJS) $(SLIB) $(LDFLAGS)
+	@echo 'Tecmake - Dynamic Library ($@) Done.'; echo ''
+
+  
+#---------------------------------#
+# Static Library Build
+
+.PHONY: static-lib
+static-lib: $(TARGETDIR)/lib$(TARGETNAME).a
+
+$(TARGETDIR)/lib$(TARGETNAME).a : $(LOHS) $(OBJS) $(EXTRADEPS)
+	$(AR) $(STDLFLAGS) $@ $(OBJS) $(SLIB)
+	-$(RANLIB) $@
+	@echo 'Tecmake - Static Library ($@) Done.'; echo ''
+
+  
+#---------------------------------#
+# Application Build
+
+.PHONY: application
+application: $(TARGETDIR)/$(TARGETNAME)
+
+$(TARGETDIR)/$(TARGETNAME) : $(LOHS) $(OBJS) $(EXTRADEPS)
+	$(LINKER) -o $@ $(OBJS) $(SLIB) $(LFLAGS)
+	@if [ ! -z "$(STRIP)" ]; then \
+	   echo "Striping debug information" ;\
+	   strip $@ ;\
+	 fi
+	@echo 'Tecmake - Application ($@) Done.'; echo ''
+
+  
+#---------------------------------#
+#  Application Scripts
+
+# Script name
+SRELEASE := $(SRCDIR)/$(TARGETNAME)
+
+.PHONY: scripts
+ifdef NO_SCRIPTS
+  scripts: ;
+else
+  scripts: $(SRELEASE) ;
+endif
+  
+$(SRELEASE): $(MAKENAME)
+	@echo 'Building script $(@F)'
+	@echo "#!/bin/csh" > $@
+	@echo "# Script generated automatically by tecmake v$(VERSION)" >> $@
+	@echo "# Remove the comment bellow to set the LD_LIBRARY_PATH if needed." >> $@
+	@echo '#setenv LD_LIBRARY_PATH $(MYLIB1)/lib/$${TEC_UNAME}:$(MYLIB2)/lib/$${TEC_UNAME}:$$LD_LIBRARY_PATH' >> $@
+	@echo 'if ( -r app.env ) source app.env' >> $@
+	@echo 'exec $(TARGETROOT)/$$TEC_UNAME/$(TARGETNAME) $$*' >> $@
+	@chmod a+x $@
+
+  
+#---------------------------------#
+# Directories Creation
+
+.PHONY: directories
+directories: $(OBJDIR) $(TARGETDIR) $(EXTRADIR)
+
+$(OBJDIR) $(TARGETDIR):
+	if [ ! -d $@ ] ; then mkdir -p $@ ; fi
+
+ifdef EXTRADIR
+  $(EXTRADIR):
+	  if [ ! -d $@ ] ; then mkdir -p $@ ; fi
+else
+  $(EXTRADIR): ;
+endif
+
+
+#---------------------------------#
+# Compilation Rules
+
+$(OBJDIR)/%.o:  $(SRCDIR)/%.c
+	@echo Compiling $(<F)...
+	$(CC) -c $(CFLAGS) -o $@ $<
+
+$(OBJDIR)/%.o:  $(SRCDIR)/%.cpp
+	@echo Compiling $(<F)...
+	$(CPPC) -c $(CXXFLAGS) -o $@ $<
+
+$(OBJDIR)/%.o:  $(SRCDIR)/%.cxx
+	@echo Compiling $(<F)...
+	$(CPPC) -c $(CXXFLAGS) -o $@ $<
+
+$(OBJDIR)/%.o:  $(SRCDIR)/%.cc
+	@echo Compiling $(<F)...
+	$(CPPC) -c $(CXXFLAGS) -o $@ $<
+
+$(OBJDIR)/%.o: $(SRCDIR)/%.f
+	@echo Compiling $(<F)...
+	$(FC) -c $(FFLAGS) -o $@ $<
+
+$(OBJDIR)/%.o: $(SRCDIR)/%.for
+	@echo Compiling $(<F)...
+	$(FC) -c $(FFLAGS) -o $@ $<
+
+$(OBJDIR)/%.ro:  $(SRCDIR)/%.rc
+	@echo Compiling $(<F)...
+	$(RCC) $(RCFLAGS) -O coff -o $@ $<
+
+$(LOHDIR)/%.loh:  $(OBJROOT)/%.lo
+	@echo Generating $(<F)...
+	$(BIN2C) $< > $@
+
+$(OBJROOT)/%$(LO_SUFFIX).lo:  $(SRCLUADIR)/%.lua
+	@echo Compiling $(<F)...
+	$(LUAC) -o $@ $<
+
+  
+#---------------------------------#
+# Dependencies
+
+# make depend
+#   Build dependencies
+.PHONY: depend
+depend: $(DEPEND)
+
+$(DEPEND): $(MAKENAME)
+  ifdef SRC
+	  @echo "" > $(DEPEND)
+	  @which gcc 2> /dev/null 1>&2 ;\
+	  if [ $$? -eq 0 ]; then \
+	    echo "Building dependencies... (can be slow)" ;\
+	    g++ $(INCLUDES) $(DEFINES) $(STDDEFS) -MM $(SOURCES) | \
+	    sed -e '1,$$s/^\([^ ]\)/$$(OBJDIR)\/\1/' > $(DEPEND) ;\
+	  else \
+	    echo "" ;\
+	    echo "g++ not found. Dependencies can not be built." ;\
+	    echo "Must set USE_NODEPEND=Yes." ;\
+	    echo "" ;\
+	    exit 1 ;\
+	  fi
+  endif
+
+###################
+ifndef USE_NODEPEND
+include $(DEPEND)
+endif
+###################
+
+
+#---------------------------------#
+# Management Rules
+
+# make clean-extra
+#   Remove extra files
+.PHONY: clean-extra
+clean-extra:
+	rm -f $(DEPEND) $(SRELEASE) so_locations
+	
+# make clean-lohs
+#   Remove Lua object inclusion files
+.PHONY: clean-lohs
+clean-lohs:
+	rm -f $(LOS) $(LOHS)
+	
+# make clean-obj
+#   Remove object files
+.PHONY: clean-obj
+clean-obj:
+	rm -f $(OBJS)
+
+# make clean-target
+#   Remove target
+.PHONY: clean-target
+clean-target:
+	rm -f $(TARGET)
+
+# make clean
+#   Remove target and object files
+.PHONY: clean
+clean: clean-target clean-obj
+
+# make rebuild
+#   Remove symbols from executables
+.PHONY: strip
+strip:
+	test -r $(TARGETDIR)/$(TARGETNAME) && strip $(TARGETDIR)/$(TARGETNAME)
+
+# make rebuild
+#   Rebuild target and object files 
+.PHONY: rebuild
+rebuild: clean-extra clean-lohs clean-obj clean-target tecmake
+
+# make relink
+#   Rebuild target without rebuilding object files 
+.PHONY: relink
+relink: clean-target tecmake
+
+.PHONY: version
+version:
+	@echo "Tecmake Compact Version $(VERSION)"
+
+#---------------------------------#
diff --git a/src/tiff_binfile.c b/src/tiff_binfile.c
new file mode 100644
index 0000000..e782b94
--- /dev/null
+++ b/src/tiff_binfile.c
@@ -0,0 +1,139 @@
+/** \file
+ * \brief libTIFF I/O and error handlers.
+ * I/O uses imBinFile instead of libTIFF original handlers.
+ *
+ * See Copyright Notice in im_lib.h
+ * $Id: tiff_binfile.c,v 1.1 2008/10/17 06:10:16 scuri Exp $
+ */
+
+#include "tiffiop.h"
+
+#include "im_binfile.h"
+
+#include <stdlib.h>
+#include <memory.h>
+
+static tsize_t iTIFFReadProc(thandle_t fd, tdata_t buf, tsize_t size)
+{
+  imBinFile* file_bin = (imBinFile*)fd;
+  return imBinFileRead(file_bin, buf, size, 1);
+}
+
+static tsize_t iTIFFWriteProc(thandle_t fd, tdata_t buf, tsize_t size)
+{
+  imBinFile* file_bin = (imBinFile*)fd;
+  return imBinFileWrite(file_bin, buf, size, 1);
+}
+
+static toff_t iTIFFSeekProc(thandle_t fd, toff_t off, int whence)
+{
+  imBinFile* file_bin = (imBinFile*)fd;
+  switch (whence)
+  {
+  case SEEK_SET:
+    imBinFileSeekTo(file_bin, off);
+    break;
+  case SEEK_CUR:
+    imBinFileSeekOffset(file_bin, off);
+    break;
+  case SEEK_END: 
+    imBinFileSeekFrom(file_bin, off);
+    break;
+  }
+
+  return imBinFileTell(file_bin);
+}
+
+static int iTIFFCloseProc(thandle_t fd)
+{
+  imBinFile* file_bin = (imBinFile*)fd;
+  imBinFileClose(file_bin);
+  return 0;
+}
+
+static toff_t iTIFFSizeProc(thandle_t fd)
+{
+  imBinFile* file_bin = (imBinFile*)fd;
+  return imBinFileSize(file_bin);
+}
+
+static int iTIFFMapProc(thandle_t fd, tdata_t* pbase, toff_t* psize)
+{
+  (void) fd; (void) pbase; (void) psize;
+  return (0);
+}
+
+static void iTIFFUnmapProc(thandle_t fd, tdata_t base, toff_t size)
+{
+  (void) fd; (void) base; (void) size;
+}
+
+TIFF* TIFFFdOpen(int fd, const char* name, const char* mode)
+{
+  TIFF* tif;
+
+  tif = TIFFClientOpen(name, mode, (thandle_t) fd,  iTIFFReadProc, iTIFFWriteProc,
+                                                    iTIFFSeekProc, iTIFFCloseProc, 
+                                                    iTIFFSizeProc, iTIFFMapProc, 
+                                                    iTIFFUnmapProc);
+  if (tif)
+    tif->tif_fd = fd;
+
+  return (tif);
+}
+
+TIFF* TIFFOpen(const char* name, const char* mode)
+{
+  imBinFile* bin_file;
+  TIFF* tiff;
+
+  if (mode[0] == 'r')
+    bin_file = imBinFileOpen(name);
+  else
+    bin_file = imBinFileNew(name);
+
+  if (!bin_file)
+    return NULL;
+  
+  tiff = TIFFClientOpen(name, mode, (thandle_t)bin_file,  iTIFFReadProc, iTIFFWriteProc,
+                                                          iTIFFSeekProc, iTIFFCloseProc, 
+                                                          iTIFFSizeProc, iTIFFMapProc, 
+                                                          iTIFFUnmapProc);
+  if (!tiff)
+    imBinFileClose(bin_file);
+
+  return tiff;
+}
+
+void* _TIFFmalloc(tsize_t s)
+{
+  return (malloc((size_t) s));
+}
+
+void _TIFFfree(tdata_t p)
+{
+  free(p);
+}
+
+void* _TIFFrealloc(tdata_t p, tsize_t s)
+{
+  return (realloc(p, (size_t) s));
+}
+
+void _TIFFmemset(tdata_t p, int v, tsize_t c)
+{
+  memset(p, v, (size_t) c);
+}
+
+void _TIFFmemcpy(tdata_t d, const tdata_t s, tsize_t c)
+{
+  memcpy(d, s, (size_t) c);
+}
+
+int _TIFFmemcmp(const tdata_t p1, const tdata_t p2, tsize_t c)
+{
+  return (memcmp(p1, p2, (size_t) c));
+}
+
+TIFFErrorHandler _TIFFwarningHandler = NULL;
+TIFFErrorHandler _TIFFerrorHandler = NULL;
diff --git a/src/zlib/adler32.c b/src/zlib/adler32.c
new file mode 100644
index 0000000..34fe2f0
--- /dev/null
+++ b/src/zlib/adler32.c
@@ -0,0 +1,149 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id: adler32.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+#define BASE 65521UL    /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+
+/* use NO_DIVIDE if your processor does not do division in hardware */
+#ifdef NO_DIVIDE
+#  define MOD(a) \
+    do { \
+        if (a >= (BASE << 16)) a -= (BASE << 16); \
+        if (a >= (BASE << 15)) a -= (BASE << 15); \
+        if (a >= (BASE << 14)) a -= (BASE << 14); \
+        if (a >= (BASE << 13)) a -= (BASE << 13); \
+        if (a >= (BASE << 12)) a -= (BASE << 12); \
+        if (a >= (BASE << 11)) a -= (BASE << 11); \
+        if (a >= (BASE << 10)) a -= (BASE << 10); \
+        if (a >= (BASE << 9)) a -= (BASE << 9); \
+        if (a >= (BASE << 8)) a -= (BASE << 8); \
+        if (a >= (BASE << 7)) a -= (BASE << 7); \
+        if (a >= (BASE << 6)) a -= (BASE << 6); \
+        if (a >= (BASE << 5)) a -= (BASE << 5); \
+        if (a >= (BASE << 4)) a -= (BASE << 4); \
+        if (a >= (BASE << 3)) a -= (BASE << 3); \
+        if (a >= (BASE << 2)) a -= (BASE << 2); \
+        if (a >= (BASE << 1)) a -= (BASE << 1); \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#  define MOD4(a) \
+    do { \
+        if (a >= (BASE << 4)) a -= (BASE << 4); \
+        if (a >= (BASE << 3)) a -= (BASE << 3); \
+        if (a >= (BASE << 2)) a -= (BASE << 2); \
+        if (a >= (BASE << 1)) a -= (BASE << 1); \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#else
+#  define MOD(a) a %= BASE
+#  define MOD4(a) a %= BASE
+#endif
+
+/* ========================================================================= */
+uLong ZEXPORT adler32(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    uInt len;
+{
+    unsigned long sum2;
+    unsigned n;
+
+    /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (len == 1) {
+        adler += buf[0];
+        if (adler >= BASE)
+            adler -= BASE;
+        sum2 += adler;
+        if (sum2 >= BASE)
+            sum2 -= BASE;
+        return adler | (sum2 << 16);
+    }
+
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (buf == Z_NULL)
+        return 1L;
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (len < 16) {
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        if (adler >= BASE)
+            adler -= BASE;
+        MOD4(sum2);             /* only added so many BASE's */
+        return adler | (sum2 << 16);
+    }
+
+    /* do length NMAX blocks -- requires just one modulo operation */
+    while (len >= NMAX) {
+        len -= NMAX;
+        n = NMAX / 16;          /* NMAX is divisible by 16 */
+        do {
+            DO16(buf);          /* 16 sums unrolled */
+            buf += 16;
+        } while (--n);
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* do remaining bytes (less than NMAX, still just one modulo) */
+    if (len) {                  /* avoid modulos if none remaining */
+        while (len >= 16) {
+            len -= 16;
+            DO16(buf);
+            buf += 16;
+        }
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_combine(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off_t len2;
+{
+    unsigned long sum1;
+    unsigned long sum2;
+    unsigned rem;
+
+    /* the derivation of this formula is left as an exercise for the reader */
+    rem = (unsigned)(len2 % BASE);
+    sum1 = adler1 & 0xffff;
+    sum2 = rem * sum1;
+    MOD(sum2);
+    sum1 += (adler2 & 0xffff) + BASE - 1;
+    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+    if (sum1 > BASE) sum1 -= BASE;
+    if (sum1 > BASE) sum1 -= BASE;
+    if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
+    if (sum2 > BASE) sum2 -= BASE;
+    return sum1 | (sum2 << 16);
+}
diff --git a/src/zlib/compress.c b/src/zlib/compress.c
new file mode 100644
index 0000000..9ac4c06
--- /dev/null
+++ b/src/zlib/compress.c
@@ -0,0 +1,79 @@
+/* compress.c -- compress a memory buffer
+ * Copyright (C) 1995-2003 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id: compress.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+/* ===========================================================================
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least 0.1% larger than sourceLen plus
+   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+    int level;
+{
+    z_stream stream;
+    int err;
+
+    stream.next_in = (Bytef*)source;
+    stream.avail_in = (uInt)sourceLen;
+#ifdef MAXSEG_64K
+    /* Check for source > 64K on 16-bit machine: */
+    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
+#endif
+    stream.next_out = dest;
+    stream.avail_out = (uInt)*destLen;
+    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = deflateInit(&stream, level);
+    if (err != Z_OK) return err;
+
+    err = deflate(&stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        deflateEnd(&stream);
+        return err == Z_OK ? Z_BUF_ERROR : err;
+    }
+    *destLen = stream.total_out;
+
+    err = deflateEnd(&stream);
+    return err;
+}
+
+/* ===========================================================================
+ */
+int ZEXPORT compress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
+}
+
+/* ===========================================================================
+     If the default memLevel or windowBits for deflateInit() is changed, then
+   this function needs to be updated.
+ */
+uLong ZEXPORT compressBound (sourceLen)
+    uLong sourceLen;
+{
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11;
+}
diff --git a/src/zlib/crc32.c b/src/zlib/crc32.c
new file mode 100644
index 0000000..2ba8f58
--- /dev/null
+++ b/src/zlib/crc32.c
@@ -0,0 +1,423 @@
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors.  This results in about a
+ * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+/* @(#) $Id: crc32.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+/*
+  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+  protection on the static variables used to control the first-use generation
+  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+  first call get_crc_table() to initialize the tables before allowing more than
+  one thread to use crc32().
+ */
+
+#ifdef MAKECRCH
+#  include <stdio.h>
+#  ifndef DYNAMIC_CRC_TABLE
+#    define DYNAMIC_CRC_TABLE
+#  endif /* !DYNAMIC_CRC_TABLE */
+#endif /* MAKECRCH */
+
+#include "zutil.h"      /* for STDC and FAR definitions */
+
+#define local static
+
+/* Find a four-byte integer type for crc32_little() and crc32_big(). */
+#ifndef NOBYFOUR
+#  ifdef STDC           /* need ANSI C limits.h to determine sizes */
+#    include <limits.h>
+#    define BYFOUR
+#    if (UINT_MAX == 0xffffffffUL)
+       typedef unsigned int u4;
+#    else
+#      if (ULONG_MAX == 0xffffffffUL)
+         typedef unsigned long u4;
+#      else
+#        if (USHRT_MAX == 0xffffffffUL)
+           typedef unsigned short u4;
+#        else
+#          undef BYFOUR     /* can't find a four-byte integer type! */
+#        endif
+#      endif
+#    endif
+#  endif /* STDC */
+#endif /* !NOBYFOUR */
+
+/* Definitions for doing the crc four data bytes at a time. */
+#ifdef BYFOUR
+#  define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \
+                (((w)&0xff00)<<8)+(((w)&0xff)<<24))
+   local unsigned long crc32_little OF((unsigned long,
+                        const unsigned char FAR *, unsigned));
+   local unsigned long crc32_big OF((unsigned long,
+                        const unsigned char FAR *, unsigned));
+#  define TBLS 8
+#else
+#  define TBLS 1
+#endif /* BYFOUR */
+
+/* Local functions for crc concatenation */
+local unsigned long gf2_matrix_times OF((unsigned long *mat,
+                                         unsigned long vec));
+local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local volatile int crc_table_empty = 1;
+local unsigned long FAR crc_table[TBLS][256];
+local void make_crc_table OF((void));
+#ifdef MAKECRCH
+   local void write_table OF((FILE *, const unsigned long FAR *));
+#endif /* MAKECRCH */
+/*
+  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+  Polynomials over GF(2) are represented in binary, one bit per coefficient,
+  with the lowest powers in the most significant bit.  Then adding polynomials
+  is just exclusive-or, and multiplying a polynomial by x is a right shift by
+  one.  If we call the above polynomial p, and represent a byte as the
+  polynomial q, also with the lowest power in the most significant bit (so the
+  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+  where a mod b means the remainder after dividing a by b.
+
+  This calculation is done using the shift-register method of multiplying and
+  taking the remainder.  The register is initialized to zero, and for each
+  incoming bit, x^32 is added mod p to the register if the bit is a one (where
+  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+  x (which is shifting right by one and adding x^32 mod p if the bit shifted
+  out is a one).  We start with the highest power (least significant bit) of
+  q and repeat for all eight bits of q.
+
+  The first table is simply the CRC of all possible eight bit values.  This is
+  all the information needed to generate CRCs on data a byte at a time for all
+  combinations of CRC register values and incoming bytes.  The remaining tables
+  allow for word-at-a-time CRC calculation for both big-endian and little-
+  endian machines, where a word is four bytes.
+*/
+local void make_crc_table()
+{
+    unsigned long c;
+    int n, k;
+    unsigned long poly;                 /* polynomial exclusive-or pattern */
+    /* terms of polynomial defining this crc (except x^32): */
+    static volatile int first = 1;      /* flag to limit concurrent making */
+    static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+    /* See if another task is already doing this (not thread-safe, but better
+       than nothing -- significantly reduces duration of vulnerability in
+       case the advice about DYNAMIC_CRC_TABLE is ignored) */
+    if (first) {
+        first = 0;
+
+        /* make exclusive-or pattern from polynomial (0xedb88320UL) */
+        poly = 0UL;
+        for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
+            poly |= 1UL << (31 - p[n]);
+
+        /* generate a crc for every 8-bit value */
+        for (n = 0; n < 256; n++) {
+            c = (unsigned long)n;
+            for (k = 0; k < 8; k++)
+                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+            crc_table[0][n] = c;
+        }
+
+#ifdef BYFOUR
+        /* generate crc for each value followed by one, two, and three zeros,
+           and then the byte reversal of those as well as the first table */
+        for (n = 0; n < 256; n++) {
+            c = crc_table[0][n];
+            crc_table[4][n] = REV(c);
+            for (k = 1; k < 4; k++) {
+                c = crc_table[0][c & 0xff] ^ (c >> 8);
+                crc_table[k][n] = c;
+                crc_table[k + 4][n] = REV(c);
+            }
+        }
+#endif /* BYFOUR */
+
+        crc_table_empty = 0;
+    }
+    else {      /* not first */
+        /* wait for the other guy to finish (not efficient, but rare) */
+        while (crc_table_empty)
+            ;
+    }
+
+#ifdef MAKECRCH
+    /* write out CRC tables to crc32.h */
+    {
+        FILE *out;
+
+        out = fopen("crc32.h", "w");
+        if (out == NULL) return;
+        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
+        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
+        fprintf(out, "local const unsigned long FAR ");
+        fprintf(out, "crc_table[TBLS][256] =\n{\n  {\n");
+        write_table(out, crc_table[0]);
+#  ifdef BYFOUR
+        fprintf(out, "#ifdef BYFOUR\n");
+        for (k = 1; k < 8; k++) {
+            fprintf(out, "  },\n  {\n");
+            write_table(out, crc_table[k]);
+        }
+        fprintf(out, "#endif\n");
+#  endif /* BYFOUR */
+        fprintf(out, "  }\n};\n");
+        fclose(out);
+    }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+local void write_table(out, table)
+    FILE *out;
+    const unsigned long FAR *table;
+{
+    int n;
+
+    for (n = 0; n < 256; n++)
+        fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : "    ", table[n],
+                n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+}
+#endif /* MAKECRCH */
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+ */
+#include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const unsigned long FAR * ZEXPORT get_crc_table()
+{
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+    return (const unsigned long FAR *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    unsigned len;
+{
+    if (buf == Z_NULL) return 0UL;
+
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+#ifdef BYFOUR
+    if (sizeof(void *) == sizeof(ptrdiff_t)) {
+        u4 endian;
+
+        endian = 1;
+        if (*((unsigned char *)(&endian)))
+            return crc32_little(crc, buf, len);
+        else
+            return crc32_big(crc, buf, len);
+    }
+#endif /* BYFOUR */
+    crc = crc ^ 0xffffffffUL;
+    while (len >= 8) {
+        DO8;
+        len -= 8;
+    }
+    if (len) do {
+        DO1;
+    } while (--len);
+    return crc ^ 0xffffffffUL;
+}
+
+#ifdef BYFOUR
+
+/* ========================================================================= */
+#define DOLIT4 c ^= *buf4++; \
+        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+
+/* ========================================================================= */
+local unsigned long crc32_little(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    unsigned len;
+{
+    register u4 c;
+    register const u4 FAR *buf4;
+
+    c = (u4)crc;
+    c = ~c;
+    while (len && ((ptrdiff_t)buf & 3)) {
+        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+        len--;
+    }
+
+    buf4 = (const u4 FAR *)(const void FAR *)buf;
+    while (len >= 32) {
+        DOLIT32;
+        len -= 32;
+    }
+    while (len >= 4) {
+        DOLIT4;
+        len -= 4;
+    }
+    buf = (const unsigned char FAR *)buf4;
+
+    if (len) do {
+        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+    } while (--len);
+    c = ~c;
+    return (unsigned long)c;
+}
+
+/* ========================================================================= */
+#define DOBIG4 c ^= *++buf4; \
+        c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+            crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+
+/* ========================================================================= */
+local unsigned long crc32_big(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    unsigned len;
+{
+    register u4 c;
+    register const u4 FAR *buf4;
+
+    c = REV((u4)crc);
+    c = ~c;
+    while (len && ((ptrdiff_t)buf & 3)) {
+        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+        len--;
+    }
+
+    buf4 = (const u4 FAR *)(const void FAR *)buf;
+    buf4--;
+    while (len >= 32) {
+        DOBIG32;
+        len -= 32;
+    }
+    while (len >= 4) {
+        DOBIG4;
+        len -= 4;
+    }
+    buf4++;
+    buf = (const unsigned char FAR *)buf4;
+
+    if (len) do {
+        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+    } while (--len);
+    c = ~c;
+    return (unsigned long)(REV(c));
+}
+
+#endif /* BYFOUR */
+
+#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
+
+/* ========================================================================= */
+local unsigned long gf2_matrix_times(mat, vec)
+    unsigned long *mat;
+    unsigned long vec;
+{
+    unsigned long sum;
+
+    sum = 0;
+    while (vec) {
+        if (vec & 1)
+            sum ^= *mat;
+        vec >>= 1;
+        mat++;
+    }
+    return sum;
+}
+
+/* ========================================================================= */
+local void gf2_matrix_square(square, mat)
+    unsigned long *square;
+    unsigned long *mat;
+{
+    int n;
+
+    for (n = 0; n < GF2_DIM; n++)
+        square[n] = gf2_matrix_times(mat, mat[n]);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off_t len2;
+{
+    int n;
+    unsigned long row;
+    unsigned long even[GF2_DIM];    /* even-power-of-two zeros operator */
+    unsigned long odd[GF2_DIM];     /* odd-power-of-two zeros operator */
+
+    /* degenerate case */
+    if (len2 == 0)
+        return crc1;
+
+    /* put operator for one zero bit in odd */
+    odd[0] = 0xedb88320L;           /* CRC-32 polynomial */
+    row = 1;
+    for (n = 1; n < GF2_DIM; n++) {
+        odd[n] = row;
+        row <<= 1;
+    }
+
+    /* put operator for two zero bits in even */
+    gf2_matrix_square(even, odd);
+
+    /* put operator for four zero bits in odd */
+    gf2_matrix_square(odd, even);
+
+    /* apply len2 zeros to crc1 (first square will put the operator for one
+       zero byte, eight zero bits, in even) */
+    do {
+        /* apply zeros operator for this bit of len2 */
+        gf2_matrix_square(even, odd);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(even, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+        if (len2 == 0)
+            break;
+
+        /* another iteration of the loop with odd and even swapped */
+        gf2_matrix_square(odd, even);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(odd, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+    } while (len2 != 0);
+
+    /* return combined crc */
+    crc1 ^= crc2;
+    return crc1;
+}
diff --git a/src/zlib/crc32.h b/src/zlib/crc32.h
new file mode 100644
index 0000000..8053b61
--- /dev/null
+++ b/src/zlib/crc32.h
@@ -0,0 +1,441 @@
+/* crc32.h -- tables for rapid CRC calculation
+ * Generated automatically by crc32.c
+ */
+
+local const unsigned long FAR crc_table[TBLS][256] =
+{
+  {
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+#ifdef BYFOUR
+  },
+  {
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL
+  },
+  {
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL
+  },
+  {
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL
+  },
+  {
+    0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+    0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+    0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+    0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+    0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+    0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+    0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+    0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+    0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+    0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+    0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+    0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+    0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+    0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+    0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+    0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+    0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+    0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+    0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+    0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+    0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+    0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+    0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+    0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+    0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+    0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+    0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+    0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+    0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+    0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+    0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+    0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+    0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+    0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+    0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+    0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+    0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+    0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+    0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+    0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+    0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+    0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+    0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+    0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+    0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+    0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+    0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+    0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+    0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+    0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+    0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+    0x8def022dUL
+  },
+  {
+    0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+    0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+    0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+    0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+    0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+    0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+    0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+    0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+    0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+    0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+    0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+    0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+    0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+    0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+    0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+    0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+    0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+    0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+    0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+    0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+    0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+    0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+    0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+    0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+    0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+    0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+    0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+    0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+    0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+    0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+    0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+    0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+    0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+    0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+    0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+    0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+    0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+    0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+    0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+    0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+    0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+    0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+    0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+    0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+    0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+    0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+    0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+    0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+    0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+    0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+    0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+    0x72fd2493UL
+  },
+  {
+    0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+    0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+    0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+    0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+    0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+    0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+    0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+    0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+    0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+    0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+    0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+    0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+    0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+    0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+    0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+    0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+    0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+    0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+    0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+    0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+    0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+    0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+    0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+    0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+    0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+    0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+    0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+    0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+    0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+    0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+    0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+    0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+    0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+    0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+    0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+    0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+    0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+    0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+    0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+    0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+    0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+    0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+    0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+    0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+    0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+    0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+    0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+    0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+    0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+    0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+    0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+    0xed3498beUL
+  },
+  {
+    0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+    0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+    0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+    0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+    0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+    0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+    0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+    0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+    0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+    0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+    0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+    0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+    0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+    0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+    0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+    0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+    0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+    0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+    0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+    0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+    0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+    0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+    0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+    0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+    0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+    0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+    0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+    0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+    0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+    0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+    0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+    0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+    0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+    0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+    0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+    0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+    0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+    0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+    0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+    0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+    0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+    0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+    0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+    0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+    0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+    0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+    0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+    0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+    0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+    0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+    0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+    0xf10605deUL
+#endif
+  }
+};
diff --git a/src/zlib/deflate.c b/src/zlib/deflate.c
new file mode 100644
index 0000000..057ac7f
--- /dev/null
+++ b/src/zlib/deflate.c
@@ -0,0 +1,1736 @@
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process depends on being able to identify portions
+ *      of the input text which are identical to earlier input (within a
+ *      sliding window trailing behind the input currently being processed).
+ *
+ *      The most straightforward technique turns out to be the fastest for
+ *      most input files: try all possible matches and select the longest.
+ *      The key feature of this algorithm is that insertions into the string
+ *      dictionary are very simple and thus fast, and deletions are avoided
+ *      completely. Insertions are performed at each input character, whereas
+ *      string matches are performed only when the previous match ends. So it
+ *      is preferable to spend more time in matches to allow very fast string
+ *      insertions and avoid deletions. The matching algorithm for small
+ *      strings is inspired from that of Rabin & Karp. A brute force approach
+ *      is used to find longer strings when a small match has been found.
+ *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ *      (by Leonid Broukhis).
+ *         A previous version of this file used a more sophisticated algorithm
+ *      (by Fiala and Greene) which is guaranteed to run in linear amortized
+ *      time, but has a larger average cost, uses more memory and is patented.
+ *      However the F&G algorithm may be faster for some highly redundant
+ *      files if the parameter max_chain_length (described below) is too large.
+ *
+ *  ACKNOWLEDGEMENTS
+ *
+ *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ *      I found it in 'freeze' written by Leonid Broukhis.
+ *      Thanks to many people for bug reports and testing.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ *      Available in http://www.ietf.org/rfc/rfc1951.txt
+ *
+ *      A description of the Rabin and Karp algorithm is given in the book
+ *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ *      Fiala,E.R., and Greene,D.H.
+ *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+/* @(#) $Id: deflate.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#include "deflate.h"
+
+const char deflate_copyright[] =
+   " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/* ===========================================================================
+ *  Function prototypes.
+ */
+typedef enum {
+    need_more,      /* block not completed, need more input or more output */
+    block_done,     /* block flush performed */
+    finish_started, /* finish started, need only more output at next deflate */
+    finish_done     /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+/* Compression function. Returns the block state after the call. */
+
+local void fill_window    OF((deflate_state *s));
+local block_state deflate_stored OF((deflate_state *s, int flush));
+local block_state deflate_fast   OF((deflate_state *s, int flush));
+#ifndef FASTEST
+local block_state deflate_slow   OF((deflate_state *s, int flush));
+#endif
+local void lm_init        OF((deflate_state *s));
+local void putShortMSB    OF((deflate_state *s, uInt b));
+local void flush_pending  OF((z_streamp strm));
+local int read_buf        OF((z_streamp strm, Bytef *buf, unsigned size));
+#ifndef FASTEST
+#ifdef ASMV
+      void match_init OF((void)); /* asm code initialization */
+      uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#else
+local uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#endif
+#endif
+local uInt longest_match_fast OF((deflate_state *s, IPos cur_match));
+
+#ifdef DEBUG
+local  void check_match OF((deflate_state *s, IPos start, IPos match,
+                            int length));
+#endif
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+#  define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+   ush good_length; /* reduce lazy search above this match length */
+   ush max_lazy;    /* do not perform lazy search above this match length */
+   ush nice_length; /* quit search above this match length */
+   ush max_chain;
+   compress_func func;
+} config;
+
+#ifdef FASTEST
+local const config configuration_table[2] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}}; /* max speed, no lazy matches */
+#else
+local const config configuration_table[10] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}, /* max speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, deflate_fast},
+/* 3 */ {4,    6, 32,   32, deflate_fast},
+
+/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
+/* 5 */ {8,   16, 32,   32, deflate_slow},
+/* 6 */ {8,   16, 128, 128, deflate_slow},
+/* 7 */ {8,   32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
+#endif
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+#ifndef NO_DUMMY_DECL
+struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
+#endif
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
+ *    input characters, so that a running hash key can be computed from the
+ *    previous key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * If this file is compiled with -DFASTEST, the compression level is forced
+ * to 1, and no hash chains are maintained.
+ * IN  assertion: all calls to to INSERT_STRING are made with consecutive
+ *    input characters and the first MIN_MATCH bytes of str are valid
+ *    (except for the last MIN_MATCH-1 bytes of the input file).
+ */
+#ifdef FASTEST
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#else
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#endif
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+    s->head[s->hash_size-1] = NIL; \
+    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ========================================================================= */
+int ZEXPORT deflateInit_(strm, level, version, stream_size)
+    z_streamp strm;
+    int level;
+    const char *version;
+    int stream_size;
+{
+    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+                         Z_DEFAULT_STRATEGY, version, stream_size);
+    /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+                  version, stream_size)
+    z_streamp strm;
+    int  level;
+    int  method;
+    int  windowBits;
+    int  memLevel;
+    int  strategy;
+    const char *version;
+    int stream_size;
+{
+    deflate_state *s;
+    int wrap = 1;
+    static const char my_version[] = ZLIB_VERSION;
+
+    ushf *overlay;
+    /* We overlay pending_buf and d_buf+l_buf. This works since the average
+     * output size for (length,distance) codes is <= 24 bits.
+     */
+
+    if (version == Z_NULL || version[0] != my_version[0] ||
+        stream_size != sizeof(z_stream)) {
+        return Z_VERSION_ERROR;
+    }
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+
+    strm->msg = Z_NULL;
+    if (strm->zalloc == (alloc_func)0) {
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+    }
+    if (strm->zfree == (free_func)0) strm->zfree = zcfree;
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+
+    if (windowBits < 0) { /* suppress zlib wrapper */
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+#ifdef GZIP
+    else if (windowBits > 15) {
+        wrap = 2;       /* write gzip wrapper instead */
+        windowBits -= 16;
+    }
+#endif
+    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+        windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
+        strategy < 0 || strategy > Z_FIXED) {
+        return Z_STREAM_ERROR;
+    }
+    if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
+    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+    if (s == Z_NULL) return Z_MEM_ERROR;
+    strm->state = (struct internal_state FAR *)s;
+    s->strm = strm;
+
+    s->wrap = wrap;
+    s->gzhead = Z_NULL;
+    s->w_bits = windowBits;
+    s->w_size = 1 << s->w_bits;
+    s->w_mask = s->w_size - 1;
+
+    s->hash_bits = memLevel + 7;
+    s->hash_size = 1 << s->hash_bits;
+    s->hash_mask = s->hash_size - 1;
+    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
+    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
+
+    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
+    s->pending_buf = (uchf *) overlay;
+    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+        s->pending_buf == Z_NULL) {
+        s->status = FINISH_STATE;
+        strm->msg = (char*)ERR_MSG(Z_MEM_ERROR);
+        deflateEnd (strm);
+        return Z_MEM_ERROR;
+    }
+    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+    s->level = level;
+    s->strategy = strategy;
+    s->method = (Byte)method;
+
+    return deflateReset(strm);
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    const Bytef *dictionary;
+    uInt  dictLength;
+{
+    deflate_state *s;
+    uInt length = dictLength;
+    uInt n;
+    IPos hash_head = 0;
+
+    if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL ||
+        strm->state->wrap == 2 ||
+        (strm->state->wrap == 1 && strm->state->status != INIT_STATE))
+        return Z_STREAM_ERROR;
+
+    s = strm->state;
+    if (s->wrap)
+        strm->adler = adler32(strm->adler, dictionary, dictLength);
+
+    if (length < MIN_MATCH) return Z_OK;
+    if (length > MAX_DIST(s)) {
+        length = MAX_DIST(s);
+        dictionary += dictLength - length; /* use the tail of the dictionary */
+    }
+    zmemcpy(s->window, dictionary, length);
+    s->strstart = length;
+    s->block_start = (long)length;
+
+    /* Insert all strings in the hash table (except for the last two bytes).
+     * s->lookahead stays null, so s->ins_h will be recomputed at the next
+     * call of fill_window.
+     */
+    s->ins_h = s->window[0];
+    UPDATE_HASH(s, s->ins_h, s->window[1]);
+    for (n = 0; n <= length - MIN_MATCH; n++) {
+        INSERT_STRING(s, n, hash_head);
+    }
+    if (hash_head) hash_head = 0;  /* to make compiler happy */
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateReset (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+
+    if (strm == Z_NULL || strm->state == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) {
+        return Z_STREAM_ERROR;
+    }
+
+    strm->total_in = strm->total_out = 0;
+    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+    strm->data_type = Z_UNKNOWN;
+
+    s = (deflate_state *)strm->state;
+    s->pending = 0;
+    s->pending_out = s->pending_buf;
+
+    if (s->wrap < 0) {
+        s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
+    }
+    s->status = s->wrap ? INIT_STATE : BUSY_STATE;
+    strm->adler =
+#ifdef GZIP
+        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
+#endif
+        adler32(0L, Z_NULL, 0);
+    s->last_flush = Z_NO_FLUSH;
+
+    _tr_init(s);
+    lm_init(s);
+
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetHeader (strm, head)
+    z_streamp strm;
+    gz_headerp head;
+{
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (strm->state->wrap != 2) return Z_STREAM_ERROR;
+    strm->state->gzhead = head;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePrime (strm, bits, value)
+    z_streamp strm;
+    int bits;
+    int value;
+{
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    strm->state->bi_valid = bits;
+    strm->state->bi_buf = (ush)(value & ((1 << bits) - 1));
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateParams(strm, level, strategy)
+    z_streamp strm;
+    int level;
+    int strategy;
+{
+    deflate_state *s;
+    compress_func func;
+    int err = Z_OK;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    s = strm->state;
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
+        return Z_STREAM_ERROR;
+    }
+    func = configuration_table[s->level].func;
+
+    if (func != configuration_table[level].func && strm->total_in != 0) {
+        /* Flush the last buffer: */
+        err = deflate(strm, Z_PARTIAL_FLUSH);
+    }
+    if (s->level != level) {
+        s->level = level;
+        s->max_lazy_match   = configuration_table[level].max_lazy;
+        s->good_match       = configuration_table[level].good_length;
+        s->nice_match       = configuration_table[level].nice_length;
+        s->max_chain_length = configuration_table[level].max_chain;
+    }
+    s->strategy = strategy;
+    return err;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
+    z_streamp strm;
+    int good_length;
+    int max_lazy;
+    int nice_length;
+    int max_chain;
+{
+    deflate_state *s;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    s = strm->state;
+    s->good_match = good_length;
+    s->max_lazy_match = max_lazy;
+    s->nice_match = nice_length;
+    s->max_chain_length = max_chain;
+    return Z_OK;
+}
+
+/* =========================================================================
+ * For the default windowBits of 15 and memLevel of 8, this function returns
+ * a close to exact, as well as small, upper bound on the compressed size.
+ * They are coded as constants here for a reason--if the #define's are
+ * changed, then this function needs to be changed as well.  The return
+ * value for 15 and 8 only works for those exact settings.
+ *
+ * For any setting other than those defaults for windowBits and memLevel,
+ * the value returned is a conservative worst case for the maximum expansion
+ * resulting from using fixed blocks instead of stored blocks, which deflate
+ * can emit on compressed data for some combinations of the parameters.
+ *
+ * This function could be more sophisticated to provide closer upper bounds
+ * for every combination of windowBits and memLevel, as well as wrap.
+ * But even the conservative upper bound of about 14% expansion does not
+ * seem onerous for output buffer allocation.
+ */
+uLong ZEXPORT deflateBound(strm, sourceLen)
+    z_streamp strm;
+    uLong sourceLen;
+{
+    deflate_state *s;
+    uLong destLen;
+
+    /* conservative upper bound */
+    destLen = sourceLen +
+              ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11;
+
+    /* if can't get parameters, return conservative bound */
+    if (strm == Z_NULL || strm->state == Z_NULL)
+        return destLen;
+
+    /* if not default parameters, return conservative bound */
+    s = strm->state;
+    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
+        return destLen;
+
+    /* default settings: return tight bound for that case */
+    return compressBound(sourceLen);
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local void putShortMSB (s, b)
+    deflate_state *s;
+    uInt b;
+{
+    put_byte(s, (Byte)(b >> 8));
+    put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output goes
+ * through this function so some applications may wish to modify it
+ * to avoid allocating a large strm->next_out buffer and copying into it.
+ * (See also read_buf()).
+ */
+local void flush_pending(strm)
+    z_streamp strm;
+{
+    unsigned len = strm->state->pending;
+
+    if (len > strm->avail_out) len = strm->avail_out;
+    if (len == 0) return;
+
+    zmemcpy(strm->next_out, strm->state->pending_out, len);
+    strm->next_out  += len;
+    strm->state->pending_out  += len;
+    strm->total_out += len;
+    strm->avail_out  -= len;
+    strm->state->pending -= len;
+    if (strm->state->pending == 0) {
+        strm->state->pending_out = strm->state->pending_buf;
+    }
+}
+
+/* ========================================================================= */
+int ZEXPORT deflate (strm, flush)
+    z_streamp strm;
+    int flush;
+{
+    int old_flush; /* value of flush param for previous deflate call */
+    deflate_state *s;
+
+    if (strm == Z_NULL || strm->state == Z_NULL ||
+        flush > Z_FINISH || flush < 0) {
+        return Z_STREAM_ERROR;
+    }
+    s = strm->state;
+
+    if (strm->next_out == Z_NULL ||
+        (strm->next_in == Z_NULL && strm->avail_in != 0) ||
+        (s->status == FINISH_STATE && flush != Z_FINISH)) {
+        ERR_RETURN(strm, Z_STREAM_ERROR);
+    }
+    if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+    s->strm = strm; /* just in case */
+    old_flush = s->last_flush;
+    s->last_flush = flush;
+
+    /* Write the header */
+    if (s->status == INIT_STATE) {
+#ifdef GZIP
+        if (s->wrap == 2) {
+            strm->adler = crc32(0L, Z_NULL, 0);
+            put_byte(s, 31);
+            put_byte(s, 139);
+            put_byte(s, 8);
+            if (s->gzhead == NULL) {
+                put_byte(s, 0);
+                put_byte(s, 0);
+                put_byte(s, 0);
+                put_byte(s, 0);
+                put_byte(s, 0);
+                put_byte(s, s->level == 9 ? 2 :
+                            (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                             4 : 0));
+                put_byte(s, OS_CODE);
+                s->status = BUSY_STATE;
+            }
+            else {
+                put_byte(s, (s->gzhead->text ? 1 : 0) +
+                            (s->gzhead->hcrc ? 2 : 0) +
+                            (s->gzhead->extra == Z_NULL ? 0 : 4) +
+                            (s->gzhead->name == Z_NULL ? 0 : 8) +
+                            (s->gzhead->comment == Z_NULL ? 0 : 16)
+                        );
+                put_byte(s, (Byte)(s->gzhead->time & 0xff));
+                put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
+                put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
+                put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
+                put_byte(s, s->level == 9 ? 2 :
+                            (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                             4 : 0));
+                put_byte(s, s->gzhead->os & 0xff);
+                if (s->gzhead->extra != NULL) {
+                    put_byte(s, s->gzhead->extra_len & 0xff);
+                    put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
+                }
+                if (s->gzhead->hcrc)
+                    strm->adler = crc32(strm->adler, s->pending_buf,
+                                        s->pending);
+                s->gzindex = 0;
+                s->status = EXTRA_STATE;
+            }
+        }
+        else
+#endif
+        {
+            uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+            uInt level_flags;
+
+            if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+                level_flags = 0;
+            else if (s->level < 6)
+                level_flags = 1;
+            else if (s->level == 6)
+                level_flags = 2;
+            else
+                level_flags = 3;
+            header |= (level_flags << 6);
+            if (s->strstart != 0) header |= PRESET_DICT;
+            header += 31 - (header % 31);
+
+            s->status = BUSY_STATE;
+            putShortMSB(s, header);
+
+            /* Save the adler32 of the preset dictionary: */
+            if (s->strstart != 0) {
+                putShortMSB(s, (uInt)(strm->adler >> 16));
+                putShortMSB(s, (uInt)(strm->adler & 0xffff));
+            }
+            strm->adler = adler32(0L, Z_NULL, 0);
+        }
+    }
+#ifdef GZIP
+    if (s->status == EXTRA_STATE) {
+        if (s->gzhead->extra != NULL) {
+            uInt beg = s->pending;  /* start of bytes to update crc */
+
+            while (s->gzindex < (s->gzhead->extra_len & 0xffff)) {
+                if (s->pending == s->pending_buf_size) {
+                    if (s->gzhead->hcrc && s->pending > beg)
+                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
+                                            s->pending - beg);
+                    flush_pending(strm);
+                    beg = s->pending;
+                    if (s->pending == s->pending_buf_size)
+                        break;
+                }
+                put_byte(s, s->gzhead->extra[s->gzindex]);
+                s->gzindex++;
+            }
+            if (s->gzhead->hcrc && s->pending > beg)
+                strm->adler = crc32(strm->adler, s->pending_buf + beg,
+                                    s->pending - beg);
+            if (s->gzindex == s->gzhead->extra_len) {
+                s->gzindex = 0;
+                s->status = NAME_STATE;
+            }
+        }
+        else
+            s->status = NAME_STATE;
+    }
+    if (s->status == NAME_STATE) {
+        if (s->gzhead->name != NULL) {
+            uInt beg = s->pending;  /* start of bytes to update crc */
+            int val;
+
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    if (s->gzhead->hcrc && s->pending > beg)
+                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
+                                            s->pending - beg);
+                    flush_pending(strm);
+                    beg = s->pending;
+                    if (s->pending == s->pending_buf_size) {
+                        val = 1;
+                        break;
+                    }
+                }
+                val = s->gzhead->name[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            if (s->gzhead->hcrc && s->pending > beg)
+                strm->adler = crc32(strm->adler, s->pending_buf + beg,
+                                    s->pending - beg);
+            if (val == 0) {
+                s->gzindex = 0;
+                s->status = COMMENT_STATE;
+            }
+        }
+        else
+            s->status = COMMENT_STATE;
+    }
+    if (s->status == COMMENT_STATE) {
+        if (s->gzhead->comment != NULL) {
+            uInt beg = s->pending;  /* start of bytes to update crc */
+            int val;
+
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    if (s->gzhead->hcrc && s->pending > beg)
+                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
+                                            s->pending - beg);
+                    flush_pending(strm);
+                    beg = s->pending;
+                    if (s->pending == s->pending_buf_size) {
+                        val = 1;
+                        break;
+                    }
+                }
+                val = s->gzhead->comment[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            if (s->gzhead->hcrc && s->pending > beg)
+                strm->adler = crc32(strm->adler, s->pending_buf + beg,
+                                    s->pending - beg);
+            if (val == 0)
+                s->status = HCRC_STATE;
+        }
+        else
+            s->status = HCRC_STATE;
+    }
+    if (s->status == HCRC_STATE) {
+        if (s->gzhead->hcrc) {
+            if (s->pending + 2 > s->pending_buf_size)
+                flush_pending(strm);
+            if (s->pending + 2 <= s->pending_buf_size) {
+                put_byte(s, (Byte)(strm->adler & 0xff));
+                put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+                strm->adler = crc32(0L, Z_NULL, 0);
+                s->status = BUSY_STATE;
+            }
+        }
+        else
+            s->status = BUSY_STATE;
+    }
+#endif
+
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+            /* Since avail_out is 0, deflate will be called again with
+             * more output space, but possibly with both pending and
+             * avail_in equal to zero. There won't be anything to do,
+             * but this is not an error situation so make sure we
+             * return OK instead of BUF_ERROR at next call of deflate:
+             */
+            s->last_flush = -1;
+            return Z_OK;
+        }
+
+    /* Make sure there is something to do and avoid duplicate consecutive
+     * flushes. For repeated and useless calls with Z_FINISH, we keep
+     * returning Z_STREAM_END instead of Z_BUF_ERROR.
+     */
+    } else if (strm->avail_in == 0 && flush <= old_flush &&
+               flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* Start a new block or continue the current one.
+     */
+    if (strm->avail_in != 0 || s->lookahead != 0 ||
+        (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+        block_state bstate;
+
+        bstate = (*(configuration_table[s->level].func))(s, flush);
+
+        if (bstate == finish_started || bstate == finish_done) {
+            s->status = FINISH_STATE;
+        }
+        if (bstate == need_more || bstate == finish_started) {
+            if (strm->avail_out == 0) {
+                s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+            }
+            return Z_OK;
+            /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+             * of deflate should use the same flush parameter to make sure
+             * that the flush is complete. So we don't have to output an
+             * empty block here, this will be done at next call. This also
+             * ensures that for a very small output buffer, we emit at most
+             * one empty block.
+             */
+        }
+        if (bstate == block_done) {
+            if (flush == Z_PARTIAL_FLUSH) {
+                _tr_align(s);
+            } else { /* FULL_FLUSH or SYNC_FLUSH */
+                _tr_stored_block(s, (char*)0, 0L, 0);
+                /* For a full flush, this empty block will be recognized
+                 * as a special marker by inflate_sync().
+                 */
+                if (flush == Z_FULL_FLUSH) {
+                    CLEAR_HASH(s);             /* forget history */
+                }
+            }
+            flush_pending(strm);
+            if (strm->avail_out == 0) {
+              s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+              return Z_OK;
+            }
+        }
+    }
+    Assert(strm->avail_out > 0, "bug2");
+
+    if (flush != Z_FINISH) return Z_OK;
+    if (s->wrap <= 0) return Z_STREAM_END;
+
+    /* Write the trailer */
+#ifdef GZIP
+    if (s->wrap == 2) {
+        put_byte(s, (Byte)(strm->adler & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
+        put_byte(s, (Byte)(strm->total_in & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
+    }
+    else
+#endif
+    {
+        putShortMSB(s, (uInt)(strm->adler >> 16));
+        putShortMSB(s, (uInt)(strm->adler & 0xffff));
+    }
+    flush_pending(strm);
+    /* If avail_out is zero, the application will call deflate again
+     * to flush the rest.
+     */
+    if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
+    return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateEnd (strm)
+    z_streamp strm;
+{
+    int status;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+
+    status = strm->state->status;
+    if (status != INIT_STATE &&
+        status != EXTRA_STATE &&
+        status != NAME_STATE &&
+        status != COMMENT_STATE &&
+        status != HCRC_STATE &&
+        status != BUSY_STATE &&
+        status != FINISH_STATE) {
+      return Z_STREAM_ERROR;
+    }
+
+    /* Deallocate in reverse order of allocations: */
+    TRY_FREE(strm, strm->state->pending_buf);
+    TRY_FREE(strm, strm->state->head);
+    TRY_FREE(strm, strm->state->prev);
+    TRY_FREE(strm, strm->state->window);
+
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+
+    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* =========================================================================
+ * Copy the source state to the destination state.
+ * To simplify the source, this is not supported for 16-bit MSDOS (which
+ * doesn't have enough memory anyway to duplicate compression states).
+ */
+int ZEXPORT deflateCopy (dest, source)
+    z_streamp dest;
+    z_streamp source;
+{
+#ifdef MAXSEG_64K
+    return Z_STREAM_ERROR;
+#else
+    deflate_state *ds;
+    deflate_state *ss;
+    ushf *overlay;
+
+
+    if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
+        return Z_STREAM_ERROR;
+    }
+
+    ss = source->state;
+
+    zmemcpy(dest, source, sizeof(z_stream));
+
+    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+    if (ds == Z_NULL) return Z_MEM_ERROR;
+    dest->state = (struct internal_state FAR *) ds;
+    zmemcpy(ds, ss, sizeof(deflate_state));
+    ds->strm = dest;
+
+    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
+    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
+    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
+    ds->pending_buf = (uchf *) overlay;
+
+    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+        ds->pending_buf == Z_NULL) {
+        deflateEnd (dest);
+        return Z_MEM_ERROR;
+    }
+    /* following zmemcpy do not work for 16-bit MSDOS */
+    zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+    zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
+    zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
+    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+
+    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+
+    ds->l_desc.dyn_tree = ds->dyn_ltree;
+    ds->d_desc.dyn_tree = ds->dyn_dtree;
+    ds->bl_desc.dyn_tree = ds->bl_tree;
+
+    return Z_OK;
+#endif /* MAXSEG_64K */
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local int read_buf(strm, buf, size)
+    z_streamp strm;
+    Bytef *buf;
+    unsigned size;
+{
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    if (strm->state->wrap == 1) {
+        strm->adler = adler32(strm->adler, strm->next_in, len);
+    }
+#ifdef GZIP
+    else if (strm->state->wrap == 2) {
+        strm->adler = crc32(strm->adler, strm->next_in, len);
+    }
+#endif
+    zmemcpy(buf, strm->next_in, len);
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return (int)len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init (s)
+    deflate_state *s;
+{
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+#ifndef FASTEST
+#ifdef ASMV
+    match_init(); /* initialize the asm code */
+#endif
+#endif
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+#ifndef ASMV
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    int best_len = s->prev_length;              /* best match length so far */
+    int nice_match = s->nice_match;             /* stop if match long enough */
+    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+        s->strstart - (IPos)MAX_DIST(s) : NIL;
+    /* Stop when cur_match becomes <= limit. To simplify the code,
+     * we prevent matches with the string of window index 0.
+     */
+    Posf *prev = s->prev;
+    uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+    /* Compare two bytes at a time. Note: this is not always beneficial.
+     * Try with and without -DUNALIGNED_OK to check.
+     */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+    register ush scan_start = *(ushf*)scan;
+    register ush scan_end   = *(ushf*)(scan+best_len-1);
+#else
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+    register Byte scan_end1  = scan[best_len-1];
+    register Byte scan_end   = scan[best_len];
+#endif
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    /* Do not waste too much time if we already have a good match: */
+    if (s->prev_length >= s->good_match) {
+        chain_length >>= 2;
+    }
+    /* Do not look for matches beyond the end of the input. This is necessary
+     * to make deflate deterministic.
+     */
+    if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    do {
+        Assert(cur_match < s->strstart, "no future");
+        match = s->window + cur_match;
+
+        /* Skip to next match if the match length cannot increase
+         * or if the match length is less than 2.  Note that the checks below
+         * for insufficient lookahead only occur occasionally for performance
+         * reasons.  Therefore uninitialized memory will be accessed, and
+         * conditional jumps will be made that depend on those values.
+         * However the length of the match is limited to the lookahead, so
+         * the output of deflate is not affected by the uninitialized values.
+         */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+        /* This code assumes sizeof(unsigned short) == 2. Do not use
+         * UNALIGNED_OK if your compiler uses a different size.
+         */
+        if (*(ushf*)(match+best_len-1) != scan_end ||
+            *(ushf*)match != scan_start) continue;
+
+        /* It is not necessary to compare scan[2] and match[2] since they are
+         * always equal when the other bytes match, given that the hash keys
+         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+         * lookahead only every 4th comparison; the 128th check will be made
+         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+         * necessary to put more guard bytes at the end of the window, or
+         * to check more often for insufficient lookahead.
+         */
+        Assert(scan[2] == match[2], "scan[2]?");
+        scan++, match++;
+        do {
+        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 scan < strend);
+        /* The funny "do {}" generates better code on most compilers */
+
+        /* Here, scan <= window+strstart+257 */
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        if (*scan == *match) scan++;
+
+        len = (MAX_MATCH - 1) - (int)(strend-scan);
+        scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+        if (match[best_len]   != scan_end  ||
+            match[best_len-1] != scan_end1 ||
+            *match            != *scan     ||
+            *++match          != scan[1])      continue;
+
+        /* The check at best_len-1 can be removed because it will be made
+         * again later. (This heuristic is not always a win.)
+         * It is not necessary to compare scan[2] and match[2] since they
+         * are always equal when the other bytes match, given that
+         * the hash keys are equal and that HASH_BITS >= 8.
+         */
+        scan += 2, match++;
+        Assert(*scan == *match, "match[2]?");
+
+        /* We check for insufficient lookahead only every 8th comparison;
+         * the 256th check will be made at strstart+258.
+         */
+        do {
+        } while (*++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 scan < strend);
+
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+        len = MAX_MATCH - (int)(strend - scan);
+        scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+        if (len > best_len) {
+            s->match_start = cur_match;
+            best_len = len;
+            if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+            scan_end = *(ushf*)(scan+best_len-1);
+#else
+            scan_end1  = scan[best_len-1];
+            scan_end   = scan[best_len];
+#endif
+        }
+    } while ((cur_match = prev[cur_match & wmask]) > limit
+             && --chain_length != 0);
+
+    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+    return s->lookahead;
+}
+#endif /* ASMV */
+#endif /* FASTEST */
+
+/* ---------------------------------------------------------------------------
+ * Optimized version for level == 1 or strategy == Z_RLE only
+ */
+local uInt longest_match_fast(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    Assert(cur_match < s->strstart, "no future");
+
+    match = s->window + cur_match;
+
+    /* Return failure if the match length is less than 2:
+     */
+    if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
+
+    /* The check at best_len-1 can be removed because it will be made
+     * again later. (This heuristic is not always a win.)
+     * It is not necessary to compare scan[2] and match[2] since they
+     * are always equal when the other bytes match, given that
+     * the hash keys are equal and that HASH_BITS >= 8.
+     */
+    scan += 2, match += 2;
+    Assert(*scan == *match, "match[2]?");
+
+    /* We check for insufficient lookahead only every 8th comparison;
+     * the 256th check will be made at strstart+258.
+     */
+    do {
+    } while (*++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             scan < strend);
+
+    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+    len = MAX_MATCH - (int)(strend - scan);
+
+    if (len < MIN_MATCH) return MIN_MATCH - 1;
+
+    s->match_start = cur_match;
+    return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
+}
+
+#ifdef DEBUG
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+local void check_match(s, start, match, length)
+    deflate_state *s;
+    IPos start, match;
+    int length;
+{
+    /* check that the match is indeed a match */
+    if (zmemcmp(s->window + match,
+                s->window + start, length) != EQUAL) {
+        fprintf(stderr, " start %u, match %u, length %d\n",
+                start, match, length);
+        do {
+            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+        } while (--length != 0);
+        z_error("invalid match");
+    }
+    if (z_verbose > 1) {
+        fprintf(stderr,"\\[%d,%d]", start-match, length);
+        do { putc(s->window[start++], stderr); } while (--length != 0);
+    }
+}
+#else
+#  define check_match(s, start, match, length)
+#endif /* DEBUG */
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(s)
+    deflate_state *s;
+{
+    register unsigned n, m;
+    register Posf *p;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize+MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+
+            /* Slide the hash table (could be avoided with 32 bit values
+               at the expense of memory usage). We slide even when level == 0
+               to keep the hash table consistent if we switch back to level > 0
+               later. (Using level 0 permanently is not an optimal usage of
+               zlib, so we don't care about this pathological case.)
+             */
+            /* %%% avoid this when Z_RLE */
+            n = s->hash_size;
+            p = &s->head[n];
+            do {
+                m = *--p;
+                *p = (Pos)(m >= wsize ? m-wsize : NIL);
+            } while (--n);
+
+            n = wsize;
+#ifndef FASTEST
+            p = &s->prev[n];
+            do {
+                m = *--p;
+                *p = (Pos)(m >= wsize ? m-wsize : NIL);
+                /* If n is not on any hash chain, prev[n] is garbage but
+                 * its value will never be used.
+                 */
+            } while (--n);
+#endif
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) return;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead >= MIN_MATCH) {
+            s->ins_h = s->window[s->strstart];
+            UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, eof) { \
+   _tr_flush_block(s, (s->block_start >= 0L ? \
+                   (charf *)&s->window[(unsigned)s->block_start] : \
+                   (charf *)Z_NULL), \
+                (ulg)((long)s->strstart - s->block_start), \
+                (eof)); \
+   s->block_start = s->strstart; \
+   flush_pending(s->strm); \
+   Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, eof) { \
+   FLUSH_BLOCK_ONLY(s, eof); \
+   if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
+}
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ * This function does not insert new strings in the dictionary since
+ * uncompressible data is probably not useful. This function is used
+ * only for the level=0 compression option.
+ * NOTE: this function should be optimized to avoid extra copying from
+ * window to pending_buf.
+ */
+local block_state deflate_stored(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
+     * to pending_buf_size, and each stored block has a 5 byte header:
+     */
+    ulg max_block_size = 0xffff;
+    ulg max_start;
+
+    if (max_block_size > s->pending_buf_size - 5) {
+        max_block_size = s->pending_buf_size - 5;
+    }
+
+    /* Copy as much as possible from input to output: */
+    for (;;) {
+        /* Fill the window as much as possible: */
+        if (s->lookahead <= 1) {
+
+            Assert(s->strstart < s->w_size+MAX_DIST(s) ||
+                   s->block_start >= (long)s->w_size, "slide too late");
+
+            fill_window(s);
+            if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
+
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+        Assert(s->block_start >= 0L, "block gone");
+
+        s->strstart += s->lookahead;
+        s->lookahead = 0;
+
+        /* Emit a stored block if pending_buf will be full: */
+        max_start = s->block_start + max_block_size;
+        if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
+            /* strstart == 0 is possible when wraparound on 16-bit machine */
+            s->lookahead = (uInt)(s->strstart - max_start);
+            s->strstart = (uInt)max_start;
+            FLUSH_BLOCK(s, 0);
+        }
+        /* Flush if we may have to slide, otherwise block_start may become
+         * negative and the data will be gone:
+         */
+        if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
+            FLUSH_BLOCK(s, 0);
+        }
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+local block_state deflate_fast(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head = NIL; /* head of the hash chain */
+    int bflush;           /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         * At this point we have always match_length < MIN_MATCH
+         */
+        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+#ifdef FASTEST
+            if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) ||
+                (s->strategy == Z_RLE && s->strstart - hash_head == 1)) {
+                s->match_length = longest_match_fast (s, hash_head);
+            }
+#else
+            if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
+                s->match_length = longest_match (s, hash_head);
+            } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
+                s->match_length = longest_match_fast (s, hash_head);
+            }
+#endif
+            /* longest_match() or longest_match_fast() sets match_start */
+        }
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->match_start, s->match_length);
+
+            _tr_tally_dist(s, s->strstart - s->match_start,
+                           s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+
+            /* Insert new strings in the hash table only if the match length
+             * is not too large. This saves time but degrades compression.
+             */
+#ifndef FASTEST
+            if (s->match_length <= s->max_insert_length &&
+                s->lookahead >= MIN_MATCH) {
+                s->match_length--; /* string at strstart already in table */
+                do {
+                    s->strstart++;
+                    INSERT_STRING(s, s->strstart, hash_head);
+                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+                     * always MIN_MATCH bytes ahead.
+                     */
+                } while (--s->match_length != 0);
+                s->strstart++;
+            } else
+#endif
+            {
+                s->strstart += s->match_length;
+                s->match_length = 0;
+                s->ins_h = s->window[s->strstart];
+                UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+                Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+                /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+                 * matter since it will be recomputed at next deflate call.
+                 */
+            }
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+local block_state deflate_slow(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head = NIL;    /* head of hash chain */
+    int bflush;              /* set if current block must be flushed */
+
+    /* Process the input block. */
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         */
+        s->prev_length = s->match_length, s->prev_match = s->match_start;
+        s->match_length = MIN_MATCH-1;
+
+        if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+            s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) {
+                s->match_length = longest_match (s, hash_head);
+            } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) {
+                s->match_length = longest_match_fast (s, hash_head);
+            }
+            /* longest_match() or longest_match_fast() sets match_start */
+
+            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
+#if TOO_FAR <= 32767
+                || (s->match_length == MIN_MATCH &&
+                    s->strstart - s->match_start > TOO_FAR)
+#endif
+                )) {
+
+                /* If prev_match is also MIN_MATCH, match_start is garbage
+                 * but we will ignore the current match anyway.
+                 */
+                s->match_length = MIN_MATCH-1;
+            }
+        }
+        /* If there was a match at the previous step and the current
+         * match is not better, output the previous match:
+         */
+        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+            /* Do not insert strings in hash table beyond this. */
+
+            check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+                           s->prev_length - MIN_MATCH, bflush);
+
+            /* Insert in hash table all strings up to the end of the match.
+             * strstart-1 and strstart are already inserted. If there is not
+             * enough lookahead, the last two strings are not inserted in
+             * the hash table.
+             */
+            s->lookahead -= s->prev_length-1;
+            s->prev_length -= 2;
+            do {
+                if (++s->strstart <= max_insert) {
+                    INSERT_STRING(s, s->strstart, hash_head);
+                }
+            } while (--s->prev_length != 0);
+            s->match_available = 0;
+            s->match_length = MIN_MATCH-1;
+            s->strstart++;
+
+            if (bflush) FLUSH_BLOCK(s, 0);
+
+        } else if (s->match_available) {
+            /* If there was no match at the previous position, output a
+             * single literal. If there was a match but the current match
+             * is longer, truncate the previous match to a single literal.
+             */
+            Tracevv((stderr,"%c", s->window[s->strstart-1]));
+            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+            if (bflush) {
+                FLUSH_BLOCK_ONLY(s, 0);
+            }
+            s->strstart++;
+            s->lookahead--;
+            if (s->strm->avail_out == 0) return need_more;
+        } else {
+            /* There is no previous match to compare with, wait for
+             * the next step to decide.
+             */
+            s->match_available = 1;
+            s->strstart++;
+            s->lookahead--;
+        }
+    }
+    Assert (flush != Z_NO_FLUSH, "no flush?");
+    if (s->match_available) {
+        Tracevv((stderr,"%c", s->window[s->strstart-1]));
+        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+        s->match_available = 0;
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
+#endif /* FASTEST */
+
+#if 0
+/* ===========================================================================
+ * For Z_RLE, simply look for runs of bytes, generate matches only of distance
+ * one.  Do not maintain a hash table.  (It will be regenerated if this run of
+ * deflate switches away from Z_RLE.)
+ */
+local block_state deflate_rle(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;         /* set if current block must be flushed */
+    uInt run;           /* length of run */
+    uInt max;           /* maximum length of run */
+    uInt prev;          /* byte at distance one to match */
+    Bytef *scan;        /* scan for end of run */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the longest encodable run.
+         */
+        if (s->lookahead < MAX_MATCH) {
+            fill_window(s);
+            if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* See how many times the previous byte repeats */
+        run = 0;
+        if (s->strstart > 0) {      /* if there is a previous byte, that is */
+            max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH;
+            scan = s->window + s->strstart - 1;
+            prev = *scan++;
+            do {
+                if (*scan++ != prev)
+                    break;
+            } while (++run < max);
+        }
+
+        /* Emit match if have run of MIN_MATCH or longer, else emit literal */
+        if (run >= MIN_MATCH) {
+            check_match(s, s->strstart, s->strstart - 1, run);
+            _tr_tally_dist(s, 1, run - MIN_MATCH, bflush);
+            s->lookahead -= run;
+            s->strstart += run;
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    FLUSH_BLOCK(s, flush == Z_FINISH);
+    return flush == Z_FINISH ? finish_done : block_done;
+}
+#endif
diff --git a/src/zlib/deflate.h b/src/zlib/deflate.h
new file mode 100644
index 0000000..033b71f
--- /dev/null
+++ b/src/zlib/deflate.h
@@ -0,0 +1,331 @@
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995-2004 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id: deflate.h,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#ifndef DEFLATE_H
+#define DEFLATE_H
+
+#include "zutil.h"
+
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip encoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GZIP
+#endif
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS  256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES   30
+/* number of distance codes */
+
+#define BL_CODES  19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define INIT_STATE    42
+#define EXTRA_STATE   69
+#define NAME_STATE    73
+#define COMMENT_STATE 91
+#define HCRC_STATE   103
+#define BUSY_STATE   113
+#define FINISH_STATE 666
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+    union {
+        ush  freq;       /* frequency count */
+        ush  code;       /* bit string */
+    } fc;
+    union {
+        ush  dad;        /* father node in Huffman tree */
+        ush  len;        /* length of bit string */
+    } dl;
+} FAR ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad  dl.dad
+#define Len  dl.len
+
+typedef struct static_tree_desc_s  static_tree_desc;
+
+typedef struct tree_desc_s {
+    ct_data *dyn_tree;           /* the dynamic tree */
+    int     max_code;            /* largest code with non zero frequency */
+    static_tree_desc *stat_desc; /* the corresponding static tree */
+} FAR tree_desc;
+
+typedef ush Pos;
+typedef Pos FAR Posf;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct internal_state {
+    z_streamp strm;      /* pointer back to this zlib stream */
+    int   status;        /* as the name implies */
+    Bytef *pending_buf;  /* output still pending */
+    ulg   pending_buf_size; /* size of pending_buf */
+    Bytef *pending_out;  /* next pending byte to output to the stream */
+    uInt   pending;      /* nb of bytes in the pending buffer */
+    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
+    gz_headerp  gzhead;  /* gzip header information to write */
+    uInt   gzindex;      /* where in extra, name, or comment */
+    Byte  method;        /* STORED (for zip only) or DEFLATED */
+    int   last_flush;    /* value of flush param for previous deflate call */
+
+                /* used by deflate.c: */
+
+    uInt  w_size;        /* LZ77 window size (32K by default) */
+    uInt  w_bits;        /* log2(w_size)  (8..16) */
+    uInt  w_mask;        /* w_size - 1 */
+
+    Bytef *window;
+    /* Sliding window. Input bytes are read into the second half of the window,
+     * and move to the first half later to keep a dictionary of at least wSize
+     * bytes. With this organization, matches are limited to a distance of
+     * wSize-MAX_MATCH bytes, but this ensures that IO is always
+     * performed with a length multiple of the block size. Also, it limits
+     * the window size to 64K, which is quite useful on MSDOS.
+     * To do: use the user input buffer as sliding window.
+     */
+
+    ulg window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */
+
+    Posf *prev;
+    /* Link to older string with same hash index. To limit the size of this
+     * array to 64K, this link is maintained only for the last 32K strings.
+     * An index in this array is thus a window index modulo 32K.
+     */
+
+    Posf *head; /* Heads of the hash chains or NIL. */
+
+    uInt  ins_h;          /* hash index of string to be inserted */
+    uInt  hash_size;      /* number of elements in hash table */
+    uInt  hash_bits;      /* log2(hash_size) */
+    uInt  hash_mask;      /* hash_size-1 */
+
+    uInt  hash_shift;
+    /* Number of bits by which ins_h must be shifted at each input
+     * step. It must be such that after MIN_MATCH steps, the oldest
+     * byte no longer takes part in the hash key, that is:
+     *   hash_shift * MIN_MATCH >= hash_bits
+     */
+
+    long block_start;
+    /* Window position at the beginning of the current output block. Gets
+     * negative when the window is moved backwards.
+     */
+
+    uInt match_length;           /* length of best match */
+    IPos prev_match;             /* previous match */
+    int match_available;         /* set if previous match exists */
+    uInt strstart;               /* start of string to insert */
+    uInt match_start;            /* start of matching string */
+    uInt lookahead;              /* number of valid bytes ahead in window */
+
+    uInt prev_length;
+    /* Length of the best match at previous step. Matches not greater than this
+     * are discarded. This is used in the lazy match evaluation.
+     */
+
+    uInt max_chain_length;
+    /* To speed up deflation, hash chains are never searched beyond this
+     * length.  A higher limit improves compression ratio but degrades the
+     * speed.
+     */
+
+    uInt max_lazy_match;
+    /* Attempt to find a better match only when the current match is strictly
+     * smaller than this value. This mechanism is used only for compression
+     * levels >= 4.
+     */
+#   define max_insert_length  max_lazy_match
+    /* Insert new strings in the hash table only if the match length is not
+     * greater than this length. This saves time but degrades compression.
+     * max_insert_length is used only for compression levels <= 3.
+     */
+
+    int level;    /* compression level (1..9) */
+    int strategy; /* favor or force Huffman coding*/
+
+    uInt good_match;
+    /* Use a faster search when the previous match is longer than this */
+
+    int nice_match; /* Stop searching when current match exceeds this */
+
+                /* used by trees.c: */
+    /* Didn't use ct_data typedef below to supress compiler warning */
+    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
+    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
+
+    struct tree_desc_s l_desc;               /* desc. for literal tree */
+    struct tree_desc_s d_desc;               /* desc. for distance tree */
+    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
+
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
+    int heap_len;               /* number of elements in the heap */
+    int heap_max;               /* element of largest frequency */
+    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+     * The same heap array is used to build all trees.
+     */
+
+    uch depth[2*L_CODES+1];
+    /* Depth of each subtree used as tie breaker for trees of equal frequency
+     */
+
+    uchf *l_buf;          /* buffer for literals or lengths */
+
+    uInt  lit_bufsize;
+    /* Size of match buffer for literals/lengths.  There are 4 reasons for
+     * limiting lit_bufsize to 64K:
+     *   - frequencies can be kept in 16 bit counters
+     *   - if compression is not successful for the first block, all input
+     *     data is still in the window so we can still emit a stored block even
+     *     when input comes from standard input.  (This can also be done for
+     *     all blocks if lit_bufsize is not greater than 32K.)
+     *   - if compression is not successful for a file smaller than 64K, we can
+     *     even emit a stored file instead of a stored block (saving 5 bytes).
+     *     This is applicable only for zip (not gzip or zlib).
+     *   - creating new Huffman trees less frequently may not provide fast
+     *     adaptation to changes in the input data statistics. (Take for
+     *     example a binary file with poorly compressible code followed by
+     *     a highly compressible string table.) Smaller buffer sizes give
+     *     fast adaptation but have of course the overhead of transmitting
+     *     trees more frequently.
+     *   - I can't count above 4
+     */
+
+    uInt last_lit;      /* running index in l_buf */
+
+    ushf *d_buf;
+    /* Buffer for distances. To simplify the code, d_buf and l_buf have
+     * the same number of elements. To use different lengths, an extra flag
+     * array would be necessary.
+     */
+
+    ulg opt_len;        /* bit length of current block with optimal trees */
+    ulg static_len;     /* bit length of current block with static trees */
+    uInt matches;       /* number of string matches in current block */
+    int last_eob_len;   /* bit length of EOB code for last block */
+
+#ifdef DEBUG
+    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
+#endif
+
+    ush bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least
+     * significant bits).
+     */
+    int bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit
+     * are always zero.
+     */
+
+} FAR deflate_state;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+        /* in trees.c */
+void _tr_init         OF((deflate_state *s));
+int  _tr_tally        OF((deflate_state *s, unsigned dist, unsigned lc));
+void _tr_flush_block  OF((deflate_state *s, charf *buf, ulg stored_len,
+                          int eof));
+void _tr_align        OF((deflate_state *s));
+void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
+                          int eof));
+
+#define d_code(dist) \
+   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * used.
+ */
+
+#ifndef DEBUG
+/* Inline versions of _tr_tally for speed: */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+  extern uch _length_code[];
+  extern uch _dist_code[];
+#else
+  extern const uch _length_code[];
+  extern const uch _dist_code[];
+#endif
+
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->d_buf[s->last_lit] = 0; \
+    s->l_buf[s->last_lit++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (length); \
+    ush dist = (distance); \
+    s->d_buf[s->last_lit] = dist; \
+    s->l_buf[s->last_lit++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+  }
+#else
+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+# define _tr_tally_dist(s, distance, length, flush) \
+              flush = _tr_tally(s, distance, length)
+#endif
+
+#endif /* DEFLATE_H */
diff --git a/src/zlib/gzio.c b/src/zlib/gzio.c
new file mode 100644
index 0000000..6f0acad
--- /dev/null
+++ b/src/zlib/gzio.c
@@ -0,0 +1,1026 @@
+/* gzio.c -- IO on .gz files
+ * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Compile this file with -DNO_GZCOMPRESS to avoid the compression code.
+ */
+
+/* @(#) $Id: gzio.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#include <stdio.h>
+
+#include "zutil.h"
+
+#ifdef NO_DEFLATE       /* for compatibility with old definition */
+#  define NO_GZCOMPRESS
+#endif
+
+#ifndef NO_DUMMY_DECL
+struct internal_state {int dummy;}; /* for buggy compilers */
+#endif
+
+#ifndef Z_BUFSIZE
+#  ifdef MAXSEG_64K
+#    define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */
+#  else
+#    define Z_BUFSIZE 16384
+#  endif
+#endif
+#ifndef Z_PRINTF_BUFSIZE
+#  define Z_PRINTF_BUFSIZE 4096
+#endif
+
+#ifdef __MVS__
+#  pragma map (fdopen , "\174\174FDOPEN")
+   FILE *fdopen(int, const char *);
+#endif
+
+#ifndef STDC
+extern voidp  malloc OF((uInt size));
+extern void   free   OF((voidpf ptr));
+#endif
+
+#define ALLOC(size) malloc(size)
+#define TRYFREE(p) {if (p) free(p);}
+
+static int const gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
+#define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define RESERVED     0xE0 /* bits 5..7: reserved */
+
+typedef struct gz_stream {
+    z_stream stream;
+    int      z_err;   /* error code for last stream operation */
+    int      z_eof;   /* set if end of input file */
+    FILE     *file;   /* .gz file */
+    Byte     *inbuf;  /* input buffer */
+    Byte     *outbuf; /* output buffer */
+    uLong    crc;     /* crc32 of uncompressed data */
+    char     *msg;    /* error message */
+    char     *path;   /* path name for debugging only */
+    int      transparent; /* 1 if input file is not a .gz file */
+    char     mode;    /* 'w' or 'r' */
+    z_off_t  start;   /* start of compressed data in file (header skipped) */
+    z_off_t  in;      /* bytes into deflate or inflate */
+    z_off_t  out;     /* bytes out of deflate or inflate */
+    int      back;    /* one character push-back */
+    int      last;    /* true if push-back is last character */
+} gz_stream;
+
+
+local gzFile gz_open      OF((const char *path, const char *mode, int  fd));
+local int do_flush        OF((gzFile file, int flush));
+local int    get_byte     OF((gz_stream *s));
+local void   check_header OF((gz_stream *s));
+local int    destroy      OF((gz_stream *s));
+local void   putLong      OF((FILE *file, uLong x));
+local uLong  getLong      OF((gz_stream *s));
+
+/* ===========================================================================
+     Opens a gzip (.gz) file for reading or writing. The mode parameter
+   is as in fopen ("rb" or "wb"). The file is given either by file descriptor
+   or path name (if fd == -1).
+     gz_open returns NULL if the file could not be opened or if there was
+   insufficient memory to allocate the (de)compression state; errno
+   can be checked to distinguish the two cases (if errno is zero, the
+   zlib error is Z_MEM_ERROR).
+*/
+local gzFile gz_open (path, mode, fd)
+    const char *path;
+    const char *mode;
+    int  fd;
+{
+    int err;
+    int level = Z_DEFAULT_COMPRESSION; /* compression level */
+    int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */
+    char *p = (char*)mode;
+    gz_stream *s;
+    char fmode[80]; /* copy of mode, without the compression level */
+    char *m = fmode;
+
+    if (!path || !mode) return Z_NULL;
+
+    s = (gz_stream *)ALLOC(sizeof(gz_stream));
+    if (!s) return Z_NULL;
+
+    s->stream.zalloc = (alloc_func)0;
+    s->stream.zfree = (free_func)0;
+    s->stream.opaque = (voidpf)0;
+    s->stream.next_in = s->inbuf = Z_NULL;
+    s->stream.next_out = s->outbuf = Z_NULL;
+    s->stream.avail_in = s->stream.avail_out = 0;
+    s->file = NULL;
+    s->z_err = Z_OK;
+    s->z_eof = 0;
+    s->in = 0;
+    s->out = 0;
+    s->back = EOF;
+    s->crc = crc32(0L, Z_NULL, 0);
+    s->msg = NULL;
+    s->transparent = 0;
+
+    s->path = (char*)ALLOC(strlen(path)+1);
+    if (s->path == NULL) {
+        return destroy(s), (gzFile)Z_NULL;
+    }
+    strcpy(s->path, path); /* do this early for debugging */
+
+    s->mode = '\0';
+    do {
+        if (*p == 'r') s->mode = 'r';
+        if (*p == 'w' || *p == 'a') s->mode = 'w';
+        if (*p >= '0' && *p <= '9') {
+            level = *p - '0';
+        } else if (*p == 'f') {
+          strategy = Z_FILTERED;
+        } else if (*p == 'h') {
+          strategy = Z_HUFFMAN_ONLY;
+        } else if (*p == 'R') {
+          strategy = Z_RLE;
+        } else {
+            *m++ = *p; /* copy the mode */
+        }
+    } while (*p++ && m != fmode + sizeof(fmode));
+    if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL;
+
+    if (s->mode == 'w') {
+#ifdef NO_GZCOMPRESS
+        err = Z_STREAM_ERROR;
+#else
+        err = deflateInit2(&(s->stream), level,
+                           Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy);
+        /* windowBits is passed < 0 to suppress zlib header */
+
+        s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
+#endif
+        if (err != Z_OK || s->outbuf == Z_NULL) {
+            return destroy(s), (gzFile)Z_NULL;
+        }
+    } else {
+        s->stream.next_in  = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE);
+
+        err = inflateInit2(&(s->stream), -MAX_WBITS);
+        /* windowBits is passed < 0 to tell that there is no zlib header.
+         * Note that in this case inflate *requires* an extra "dummy" byte
+         * after the compressed stream in order to complete decompression and
+         * return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
+         * present after the compressed stream.
+         */
+        if (err != Z_OK || s->inbuf == Z_NULL) {
+            return destroy(s), (gzFile)Z_NULL;
+        }
+    }
+    s->stream.avail_out = Z_BUFSIZE;
+
+    errno = 0;
+    s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode);
+
+    if (s->file == NULL) {
+        return destroy(s), (gzFile)Z_NULL;
+    }
+    if (s->mode == 'w') {
+        /* Write a very simple .gz header:
+         */
+        fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1],
+             Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE);
+        s->start = 10L;
+        /* We use 10L instead of ftell(s->file) to because ftell causes an
+         * fflush on some systems. This version of the library doesn't use
+         * start anyway in write mode, so this initialization is not
+         * necessary.
+         */
+    } else {
+        check_header(s); /* skip the .gz header */
+        s->start = ftell(s->file) - s->stream.avail_in;
+    }
+
+    return (gzFile)s;
+}
+
+/* ===========================================================================
+     Opens a gzip (.gz) file for reading or writing.
+*/
+gzFile ZEXPORT gzopen (path, mode)
+    const char *path;
+    const char *mode;
+{
+    return gz_open (path, mode, -1);
+}
+
+/* ===========================================================================
+     Associate a gzFile with the file descriptor fd. fd is not dup'ed here
+   to mimic the behavio(u)r of fdopen.
+*/
+gzFile ZEXPORT gzdopen (fd, mode)
+    int fd;
+    const char *mode;
+{
+    char name[46];      /* allow for up to 128-bit integers */
+
+    if (fd < 0) return (gzFile)Z_NULL;
+    sprintf(name, "<fd:%d>", fd); /* for debugging */
+
+    return gz_open (name, mode, fd);
+}
+
+/* ===========================================================================
+ * Update the compression level and strategy
+ */
+int ZEXPORT gzsetparams (file, level, strategy)
+    gzFile file;
+    int level;
+    int strategy;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
+
+    /* Make room to allow flushing */
+    if (s->stream.avail_out == 0) {
+
+        s->stream.next_out = s->outbuf;
+        if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
+            s->z_err = Z_ERRNO;
+        }
+        s->stream.avail_out = Z_BUFSIZE;
+    }
+
+    return deflateParams (&(s->stream), level, strategy);
+}
+
+/* ===========================================================================
+     Read a byte from a gz_stream; update next_in and avail_in. Return EOF
+   for end of file.
+   IN assertion: the stream s has been sucessfully opened for reading.
+*/
+local int get_byte(s)
+    gz_stream *s;
+{
+    if (s->z_eof) return EOF;
+    if (s->stream.avail_in == 0) {
+        errno = 0;
+        s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file);
+        if (s->stream.avail_in == 0) {
+            s->z_eof = 1;
+            if (ferror(s->file)) s->z_err = Z_ERRNO;
+            return EOF;
+        }
+        s->stream.next_in = s->inbuf;
+    }
+    s->stream.avail_in--;
+    return *(s->stream.next_in)++;
+}
+
+/* ===========================================================================
+      Check the gzip header of a gz_stream opened for reading. Set the stream
+    mode to transparent if the gzip magic header is not present; set s->err
+    to Z_DATA_ERROR if the magic header is present but the rest of the header
+    is incorrect.
+    IN assertion: the stream s has already been created sucessfully;
+       s->stream.avail_in is zero for the first time, but may be non-zero
+       for concatenated .gz files.
+*/
+local void check_header(s)
+    gz_stream *s;
+{
+    int method; /* method byte */
+    int flags;  /* flags byte */
+    uInt len;
+    int c;
+
+    /* Assure two bytes in the buffer so we can peek ahead -- handle case
+       where first byte of header is at the end of the buffer after the last
+       gzip segment */
+    len = s->stream.avail_in;
+    if (len < 2) {
+        if (len) s->inbuf[0] = s->stream.next_in[0];
+        errno = 0;
+        len = (uInt)fread(s->inbuf + len, 1, Z_BUFSIZE >> len, s->file);
+        if (len == 0 && ferror(s->file)) s->z_err = Z_ERRNO;
+        s->stream.avail_in += len;
+        s->stream.next_in = s->inbuf;
+        if (s->stream.avail_in < 2) {
+            s->transparent = s->stream.avail_in;
+            return;
+        }
+    }
+
+    /* Peek ahead to check the gzip magic header */
+    if (s->stream.next_in[0] != gz_magic[0] ||
+        s->stream.next_in[1] != gz_magic[1]) {
+        s->transparent = 1;
+        return;
+    }
+    s->stream.avail_in -= 2;
+    s->stream.next_in += 2;
+
+    /* Check the rest of the gzip header */
+    method = get_byte(s);
+    flags = get_byte(s);
+    if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
+        s->z_err = Z_DATA_ERROR;
+        return;
+    }
+
+    /* Discard time, xflags and OS code: */
+    for (len = 0; len < 6; len++) (void)get_byte(s);
+
+    if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
+        len  =  (uInt)get_byte(s);
+        len += ((uInt)get_byte(s))<<8;
+        /* len is garbage if EOF but the loop below will quit anyway */
+        while (len-- != 0 && get_byte(s) != EOF) ;
+    }
+    if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
+        while ((c = get_byte(s)) != 0 && c != EOF) ;
+    }
+    if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
+        while ((c = get_byte(s)) != 0 && c != EOF) ;
+    }
+    if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
+        for (len = 0; len < 2; len++) (void)get_byte(s);
+    }
+    s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK;
+}
+
+ /* ===========================================================================
+ * Cleanup then free the given gz_stream. Return a zlib error code.
+   Try freeing in the reverse order of allocations.
+ */
+local int destroy (s)
+    gz_stream *s;
+{
+    int err = Z_OK;
+
+    if (!s) return Z_STREAM_ERROR;
+
+    TRYFREE(s->msg);
+
+    if (s->stream.state != NULL) {
+        if (s->mode == 'w') {
+#ifdef NO_GZCOMPRESS
+            err = Z_STREAM_ERROR;
+#else
+            err = deflateEnd(&(s->stream));
+#endif
+        } else if (s->mode == 'r') {
+            err = inflateEnd(&(s->stream));
+        }
+    }
+    if (s->file != NULL && fclose(s->file)) {
+#ifdef ESPIPE
+        if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */
+#endif
+            err = Z_ERRNO;
+    }
+    if (s->z_err < 0) err = s->z_err;
+
+    TRYFREE(s->inbuf);
+    TRYFREE(s->outbuf);
+    TRYFREE(s->path);
+    TRYFREE(s);
+    return err;
+}
+
+/* ===========================================================================
+     Reads the given number of uncompressed bytes from the compressed file.
+   gzread returns the number of bytes actually read (0 for end of file).
+*/
+int ZEXPORT gzread (file, buf, len)
+    gzFile file;
+    voidp buf;
+    unsigned len;
+{
+    gz_stream *s = (gz_stream*)file;
+    Bytef *start = (Bytef*)buf; /* starting point for crc computation */
+    Byte  *next_out; /* == stream.next_out but not forced far (for MSDOS) */
+
+    if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR;
+
+    if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1;
+    if (s->z_err == Z_STREAM_END) return 0;  /* EOF */
+
+    next_out = (Byte*)buf;
+    s->stream.next_out = (Bytef*)buf;
+    s->stream.avail_out = len;
+
+    if (s->stream.avail_out && s->back != EOF) {
+        *next_out++ = s->back;
+        s->stream.next_out++;
+        s->stream.avail_out--;
+        s->back = EOF;
+        s->out++;
+        start++;
+        if (s->last) {
+            s->z_err = Z_STREAM_END;
+            return 1;
+        }
+    }
+
+    while (s->stream.avail_out != 0) {
+
+        if (s->transparent) {
+            /* Copy first the lookahead bytes: */
+            uInt n = s->stream.avail_in;
+            if (n > s->stream.avail_out) n = s->stream.avail_out;
+            if (n > 0) {
+                zmemcpy(s->stream.next_out, s->stream.next_in, n);
+                next_out += n;
+                s->stream.next_out = next_out;
+                s->stream.next_in   += n;
+                s->stream.avail_out -= n;
+                s->stream.avail_in  -= n;
+            }
+            if (s->stream.avail_out > 0) {
+                s->stream.avail_out -=
+                    (uInt)fread(next_out, 1, s->stream.avail_out, s->file);
+            }
+            len -= s->stream.avail_out;
+            s->in  += len;
+            s->out += len;
+            if (len == 0) s->z_eof = 1;
+            return (int)len;
+        }
+        if (s->stream.avail_in == 0 && !s->z_eof) {
+
+            errno = 0;
+            s->stream.avail_in = (uInt)fread(s->inbuf, 1, Z_BUFSIZE, s->file);
+            if (s->stream.avail_in == 0) {
+                s->z_eof = 1;
+                if (ferror(s->file)) {
+                    s->z_err = Z_ERRNO;
+                    break;
+                }
+            }
+            s->stream.next_in = s->inbuf;
+        }
+        s->in += s->stream.avail_in;
+        s->out += s->stream.avail_out;
+        s->z_err = inflate(&(s->stream), Z_NO_FLUSH);
+        s->in -= s->stream.avail_in;
+        s->out -= s->stream.avail_out;
+
+        if (s->z_err == Z_STREAM_END) {
+            /* Check CRC and original size */
+            s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
+            start = s->stream.next_out;
+
+            if (getLong(s) != s->crc) {
+                s->z_err = Z_DATA_ERROR;
+            } else {
+                (void)getLong(s);
+                /* The uncompressed length returned by above getlong() may be
+                 * different from s->out in case of concatenated .gz files.
+                 * Check for such files:
+                 */
+                check_header(s);
+                if (s->z_err == Z_OK) {
+                    inflateReset(&(s->stream));
+                    s->crc = crc32(0L, Z_NULL, 0);
+                }
+            }
+        }
+        if (s->z_err != Z_OK || s->z_eof) break;
+    }
+    s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
+
+    if (len == s->stream.avail_out &&
+        (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO))
+        return -1;
+    return (int)(len - s->stream.avail_out);
+}
+
+
+/* ===========================================================================
+      Reads one byte from the compressed file. gzgetc returns this byte
+   or -1 in case of end of file or error.
+*/
+int ZEXPORT gzgetc(file)
+    gzFile file;
+{
+    unsigned char c;
+
+    return gzread(file, &c, 1) == 1 ? c : -1;
+}
+
+
+/* ===========================================================================
+      Push one byte back onto the stream.
+*/
+int ZEXPORT gzungetc(c, file)
+    int c;
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'r' || c == EOF || s->back != EOF) return EOF;
+    s->back = c;
+    s->out--;
+    s->last = (s->z_err == Z_STREAM_END);
+    if (s->last) s->z_err = Z_OK;
+    s->z_eof = 0;
+    return c;
+}
+
+
+/* ===========================================================================
+      Reads bytes from the compressed file until len-1 characters are
+   read, or a newline character is read and transferred to buf, or an
+   end-of-file condition is encountered.  The string is then terminated
+   with a null character.
+      gzgets returns buf, or Z_NULL in case of error.
+
+      The current implementation is not optimized at all.
+*/
+char * ZEXPORT gzgets(file, buf, len)
+    gzFile file;
+    char *buf;
+    int len;
+{
+    char *b = buf;
+    if (buf == Z_NULL || len <= 0) return Z_NULL;
+
+    while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ;
+    *buf = '\0';
+    return b == buf && len > 0 ? Z_NULL : b;
+}
+
+
+#ifndef NO_GZCOMPRESS
+/* ===========================================================================
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of bytes actually written (0 in case of error).
+*/
+int ZEXPORT gzwrite (file, buf, len)
+    gzFile file;
+    voidpc buf;
+    unsigned len;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
+
+    s->stream.next_in = (Bytef*)buf;
+    s->stream.avail_in = len;
+
+    while (s->stream.avail_in != 0) {
+
+        if (s->stream.avail_out == 0) {
+
+            s->stream.next_out = s->outbuf;
+            if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
+                s->z_err = Z_ERRNO;
+                break;
+            }
+            s->stream.avail_out = Z_BUFSIZE;
+        }
+        s->in += s->stream.avail_in;
+        s->out += s->stream.avail_out;
+        s->z_err = deflate(&(s->stream), Z_NO_FLUSH);
+        s->in -= s->stream.avail_in;
+        s->out -= s->stream.avail_out;
+        if (s->z_err != Z_OK) break;
+    }
+    s->crc = crc32(s->crc, (const Bytef *)buf, len);
+
+    return (int)(len - s->stream.avail_in);
+}
+
+
+/* ===========================================================================
+     Converts, formats, and writes the args to the compressed file under
+   control of the format string, as in fprintf. gzprintf returns the number of
+   uncompressed bytes actually written (0 in case of error).
+*/
+#ifdef STDC
+#include <stdarg.h>
+
+int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...)
+{
+    char buf[Z_PRINTF_BUFSIZE];
+    va_list va;
+    int len;
+
+    buf[sizeof(buf) - 1] = 0;
+    va_start(va, format);
+#ifdef NO_vsnprintf
+#  ifdef HAS_vsprintf_void
+    (void)vsprintf(buf, format, va);
+    va_end(va);
+    for (len = 0; len < sizeof(buf); len++)
+        if (buf[len] == 0) break;
+#  else
+    len = vsprintf(buf, format, va);
+    va_end(va);
+#  endif
+#else
+#  ifdef HAS_vsnprintf_void
+    (void)vsnprintf(buf, sizeof(buf), format, va);
+    va_end(va);
+    len = strlen(buf);
+#  else
+    len = vsnprintf(buf, sizeof(buf), format, va);
+    va_end(va);
+#  endif
+#endif
+    if (len <= 0 || len >= (int)sizeof(buf) || buf[sizeof(buf) - 1] != 0)
+        return 0;
+    return gzwrite(file, buf, (unsigned)len);
+}
+#else /* not ANSI C */
+
+int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
+                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
+    gzFile file;
+    const char *format;
+    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
+        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
+{
+    char buf[Z_PRINTF_BUFSIZE];
+    int len;
+
+    buf[sizeof(buf) - 1] = 0;
+#ifdef NO_snprintf
+#  ifdef HAS_sprintf_void
+    sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8,
+            a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    for (len = 0; len < sizeof(buf); len++)
+        if (buf[len] == 0) break;
+#  else
+    len = sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8,
+                a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#  endif
+#else
+#  ifdef HAS_snprintf_void
+    snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8,
+             a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    len = strlen(buf);
+#  else
+    len = snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8,
+                 a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#  endif
+#endif
+    if (len <= 0 || len >= sizeof(buf) || buf[sizeof(buf) - 1] != 0)
+        return 0;
+    return gzwrite(file, buf, len);
+}
+#endif
+
+/* ===========================================================================
+      Writes c, converted to an unsigned char, into the compressed file.
+   gzputc returns the value that was written, or -1 in case of error.
+*/
+int ZEXPORT gzputc(file, c)
+    gzFile file;
+    int c;
+{
+    unsigned char cc = (unsigned char) c; /* required for big endian systems */
+
+    return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1;
+}
+
+
+/* ===========================================================================
+      Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+      gzputs returns the number of characters written, or -1 in case of error.
+*/
+int ZEXPORT gzputs(file, s)
+    gzFile file;
+    const char *s;
+{
+    return gzwrite(file, (char*)s, (unsigned)strlen(s));
+}
+
+
+/* ===========================================================================
+     Flushes all pending output into the compressed file. The parameter
+   flush is as in the deflate() function.
+*/
+local int do_flush (file, flush)
+    gzFile file;
+    int flush;
+{
+    uInt len;
+    int done = 0;
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
+
+    s->stream.avail_in = 0; /* should be zero already anyway */
+
+    for (;;) {
+        len = Z_BUFSIZE - s->stream.avail_out;
+
+        if (len != 0) {
+            if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) {
+                s->z_err = Z_ERRNO;
+                return Z_ERRNO;
+            }
+            s->stream.next_out = s->outbuf;
+            s->stream.avail_out = Z_BUFSIZE;
+        }
+        if (done) break;
+        s->out += s->stream.avail_out;
+        s->z_err = deflate(&(s->stream), flush);
+        s->out -= s->stream.avail_out;
+
+        /* Ignore the second of two consecutive flushes: */
+        if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK;
+
+        /* deflate has finished flushing only when it hasn't used up
+         * all the available space in the output buffer:
+         */
+        done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END);
+
+        if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break;
+    }
+    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
+}
+
+int ZEXPORT gzflush (file, flush)
+     gzFile file;
+     int flush;
+{
+    gz_stream *s = (gz_stream*)file;
+    int err = do_flush (file, flush);
+
+    if (err) return err;
+    fflush(s->file);
+    return  s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
+}
+#endif /* NO_GZCOMPRESS */
+
+/* ===========================================================================
+      Sets the starting position for the next gzread or gzwrite on the given
+   compressed file. The offset represents a number of bytes in the
+      gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error.
+      SEEK_END is not implemented, returns error.
+      In this version of the library, gzseek can be extremely slow.
+*/
+z_off_t ZEXPORT gzseek (file, offset, whence)
+    gzFile file;
+    z_off_t offset;
+    int whence;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || whence == SEEK_END ||
+        s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) {
+        return -1L;
+    }
+
+    if (s->mode == 'w') {
+#ifdef NO_GZCOMPRESS
+        return -1L;
+#else
+        if (whence == SEEK_SET) {
+            offset -= s->in;
+        }
+        if (offset < 0) return -1L;
+
+        /* At this point, offset is the number of zero bytes to write. */
+        if (s->inbuf == Z_NULL) {
+            s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */
+            if (s->inbuf == Z_NULL) return -1L;
+            zmemzero(s->inbuf, Z_BUFSIZE);
+        }
+        while (offset > 0)  {
+            uInt size = Z_BUFSIZE;
+            if (offset < Z_BUFSIZE) size = (uInt)offset;
+
+            size = gzwrite(file, s->inbuf, size);
+            if (size == 0) return -1L;
+
+            offset -= size;
+        }
+        return s->in;
+#endif
+    }
+    /* Rest of function is for reading only */
+
+    /* compute absolute position */
+    if (whence == SEEK_CUR) {
+        offset += s->out;
+    }
+    if (offset < 0) return -1L;
+
+    if (s->transparent) {
+        /* map to fseek */
+        s->back = EOF;
+        s->stream.avail_in = 0;
+        s->stream.next_in = s->inbuf;
+        if (fseek(s->file, offset, SEEK_SET) < 0) return -1L;
+
+        s->in = s->out = offset;
+        return offset;
+    }
+
+    /* For a negative seek, rewind and use positive seek */
+    if (offset >= s->out) {
+        offset -= s->out;
+    } else if (gzrewind(file) < 0) {
+        return -1L;
+    }
+    /* offset is now the number of bytes to skip. */
+
+    if (offset != 0 && s->outbuf == Z_NULL) {
+        s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
+        if (s->outbuf == Z_NULL) return -1L;
+    }
+    if (offset && s->back != EOF) {
+        s->back = EOF;
+        s->out++;
+        offset--;
+        if (s->last) s->z_err = Z_STREAM_END;
+    }
+    while (offset > 0)  {
+        int size = Z_BUFSIZE;
+        if (offset < Z_BUFSIZE) size = (int)offset;
+
+        size = gzread(file, s->outbuf, (uInt)size);
+        if (size <= 0) return -1L;
+        offset -= size;
+    }
+    return s->out;
+}
+
+/* ===========================================================================
+     Rewinds input file.
+*/
+int ZEXPORT gzrewind (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'r') return -1;
+
+    s->z_err = Z_OK;
+    s->z_eof = 0;
+    s->back = EOF;
+    s->stream.avail_in = 0;
+    s->stream.next_in = s->inbuf;
+    s->crc = crc32(0L, Z_NULL, 0);
+    if (!s->transparent) (void)inflateReset(&s->stream);
+    s->in = 0;
+    s->out = 0;
+    return fseek(s->file, s->start, SEEK_SET);
+}
+
+/* ===========================================================================
+     Returns the starting position for the next gzread or gzwrite on the
+   given compressed file. This position represents a number of bytes in the
+   uncompressed data stream.
+*/
+z_off_t ZEXPORT gztell (file)
+    gzFile file;
+{
+    return gzseek(file, 0L, SEEK_CUR);
+}
+
+/* ===========================================================================
+     Returns 1 when EOF has previously been detected reading the given
+   input stream, otherwise zero.
+*/
+int ZEXPORT gzeof (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    /* With concatenated compressed files that can have embedded
+     * crc trailers, z_eof is no longer the only/best indicator of EOF
+     * on a gz_stream. Handle end-of-stream error explicitly here.
+     */
+    if (s == NULL || s->mode != 'r') return 0;
+    if (s->z_eof) return 1;
+    return s->z_err == Z_STREAM_END;
+}
+
+/* ===========================================================================
+     Returns 1 if reading and doing so transparently, otherwise zero.
+*/
+int ZEXPORT gzdirect (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL || s->mode != 'r') return 0;
+    return s->transparent;
+}
+
+/* ===========================================================================
+   Outputs a long in LSB order to the given file
+*/
+local void putLong (file, x)
+    FILE *file;
+    uLong x;
+{
+    int n;
+    for (n = 0; n < 4; n++) {
+        fputc((int)(x & 0xff), file);
+        x >>= 8;
+    }
+}
+
+/* ===========================================================================
+   Reads a long in LSB order from the given gz_stream. Sets z_err in case
+   of error.
+*/
+local uLong getLong (s)
+    gz_stream *s;
+{
+    uLong x = (uLong)get_byte(s);
+    int c;
+
+    x += ((uLong)get_byte(s))<<8;
+    x += ((uLong)get_byte(s))<<16;
+    c = get_byte(s);
+    if (c == EOF) s->z_err = Z_DATA_ERROR;
+    x += ((uLong)c)<<24;
+    return x;
+}
+
+/* ===========================================================================
+     Flushes all pending output if necessary, closes the compressed file
+   and deallocates all the (de)compression state.
+*/
+int ZEXPORT gzclose (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL) return Z_STREAM_ERROR;
+
+    if (s->mode == 'w') {
+#ifdef NO_GZCOMPRESS
+        return Z_STREAM_ERROR;
+#else
+        if (do_flush (file, Z_FINISH) != Z_OK)
+            return destroy((gz_stream*)file);
+
+        putLong (s->file, s->crc);
+        putLong (s->file, (uLong)(s->in & 0xffffffff));
+#endif
+    }
+    return destroy((gz_stream*)file);
+}
+
+#ifdef STDC
+#  define zstrerror(errnum) strerror(errnum)
+#else
+#  define zstrerror(errnum) ""
+#endif
+
+/* ===========================================================================
+     Returns the error message for the last error which occurred on the
+   given compressed file. errnum is set to zlib error number. If an
+   error occurred in the file system and not in the compression library,
+   errnum is set to Z_ERRNO and the application may consult errno
+   to get the exact error code.
+*/
+const char * ZEXPORT gzerror (file, errnum)
+    gzFile file;
+    int *errnum;
+{
+    char *m;
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL) {
+        *errnum = Z_STREAM_ERROR;
+        return (const char*)ERR_MSG(Z_STREAM_ERROR);
+    }
+    *errnum = s->z_err;
+    if (*errnum == Z_OK) return (const char*)"";
+
+    m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg);
+
+    if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err);
+
+    TRYFREE(s->msg);
+    s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3);
+    if (s->msg == Z_NULL) return (const char*)ERR_MSG(Z_MEM_ERROR);
+    strcpy(s->msg, s->path);
+    strcat(s->msg, ": ");
+    strcat(s->msg, m);
+    return (const char*)s->msg;
+}
+
+/* ===========================================================================
+     Clear the error and end-of-file flags, and do the same for the real file.
+*/
+void ZEXPORT gzclearerr (file)
+    gzFile file;
+{
+    gz_stream *s = (gz_stream*)file;
+
+    if (s == NULL) return;
+    if (s->z_err != Z_STREAM_END) s->z_err = Z_OK;
+    s->z_eof = 0;
+    clearerr(s->file);
+}
diff --git a/src/zlib/infback.c b/src/zlib/infback.c
new file mode 100644
index 0000000..455dbc9
--- /dev/null
+++ b/src/zlib/infback.c
@@ -0,0 +1,623 @@
+/* infback.c -- inflate using a call-back interface
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+   This code is largely copied from inflate.c.  Normally either infback.o or
+   inflate.o would be linked into an application--not both.  The interface
+   with inffast.c is retained so that optimized assembler-coded versions of
+   inflate_fast() can be used with either inflate.c or infback.c.
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+/* function prototypes */
+local void fixedtables OF((struct inflate_state FAR *state));
+
+/*
+   strm provides memory allocation functions in zalloc and zfree, or
+   Z_NULL to use the library memory allocation functions.
+
+   windowBits is in the range 8..15, and window is a user-supplied
+   window and output buffer that is 2**windowBits bytes.
+ */
+int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
+z_streamp strm;
+int windowBits;
+unsigned char FAR *window;
+const char *version;
+int stream_size;
+{
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL || window == Z_NULL ||
+        windowBits < 8 || windowBits > 15)
+        return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+    }
+    if (strm->zfree == (free_func)0) strm->zfree = zcfree;
+    state = (struct inflate_state FAR *)ZALLOC(strm, 1,
+                                               sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev((stderr, "inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->dmax = 32768U;
+    state->wbits = windowBits;
+    state->wsize = 1U << windowBits;
+    state->window = window;
+    state->write = 0;
+    state->whave = 0;
+    return Z_OK;
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
+ */
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
+#ifdef BUILDFIXED
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
+#else /* !BUILDFIXED */
+#   include "inffixed.h"
+#endif /* BUILDFIXED */
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
+}
+
+/* Macros for inflateBack(): */
+
+/* Load returned state from inflate_fast() */
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
+/* Set state from registers for inflate_fast() */
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
+
+/* Assure that some input is available.  If input is requested, but denied,
+   then return a Z_BUF_ERROR from inflateBack(). */
+#define PULL() \
+    do { \
+        if (have == 0) { \
+            have = in(in_desc, &next); \
+            if (have == 0) { \
+                next = Z_NULL; \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflateBack()
+   with an error if there is no input available. */
+#define PULLBYTE() \
+    do { \
+        PULL(); \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator.  If there is
+   not enough available input to do that, then return from inflateBack() with
+   an error. */
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
+
+/* Assure that some output space is available, by writing out the window
+   if it's full.  If the write fails, return from inflateBack() with a
+   Z_BUF_ERROR. */
+#define ROOM() \
+    do { \
+        if (left == 0) { \
+            put = state->window; \
+            left = state->wsize; \
+            state->whave = left; \
+            if (out(out_desc, put, left)) { \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
+
+/*
+   strm provides the memory allocation functions and window buffer on input,
+   and provides information on the unused input on return.  For Z_DATA_ERROR
+   returns, strm will also provide an error message.
+
+   in() and out() are the call-back input and output functions.  When
+   inflateBack() needs more input, it calls in().  When inflateBack() has
+   filled the window with output, or when it completes with data in the
+   window, it calls out() to write out the data.  The application must not
+   change the provided input until in() is called again or inflateBack()
+   returns.  The application must not change the window/output buffer until
+   inflateBack() returns.
+
+   in() and out() are called with a descriptor parameter provided in the
+   inflateBack() call.  This parameter can be a structure that provides the
+   information required to do the read or write, as well as accumulated
+   information on the input and output such as totals and check values.
+
+   in() should return zero on failure.  out() should return non-zero on
+   failure.  If either in() or out() fails, than inflateBack() returns a
+   Z_BUF_ERROR.  strm->next_in can be checked for Z_NULL to see whether it
+   was in() or out() that caused in the error.  Otherwise,  inflateBack()
+   returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format
+   error, or Z_MEM_ERROR if it could not allocate memory for the state.
+   inflateBack() can also return Z_STREAM_ERROR if the input parameters
+   are not correct, i.e. strm is Z_NULL or the state was not initialized.
+ */
+int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
+z_streamp strm;
+in_func in;
+void FAR *in_desc;
+out_func out;
+void FAR *out_desc;
+{
+    struct inflate_state FAR *state;
+    unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code this;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    /* Check that the strm exists and that the state was initialized */
+    if (strm == Z_NULL || strm->state == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* Reset the state */
+    strm->msg = Z_NULL;
+    state->mode = TYPE;
+    state->last = 0;
+    state->whave = 0;
+    next = strm->next_in;
+    have = next != Z_NULL ? strm->avail_in : 0;
+    hold = 0;
+    bits = 0;
+    put = state->window;
+    left = state->wsize;
+
+    /* Inflate until end of block marked as last */
+    for (;;)
+        switch (state->mode) {
+        case TYPE:
+            /* determine and dispatch block type */
+            if (state->last) {
+                BYTEBITS();
+                state->mode = DONE;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev((stderr, "inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev((stderr, "inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN;              /* decode codes */
+                break;
+            case 2:                             /* dynamic block */
+                Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+
+        case STORED:
+            /* get and verify stored block length */
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev((stderr, "inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+
+            /* copy stored block from input to output */
+            while (state->length != 0) {
+                copy = state->length;
+                PULL();
+                ROOM();
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+            }
+            Tracev((stderr, "inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+
+        case TABLE:
+            /* get dynamic table entries descriptor */
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev((stderr, "inflate:       table sizes ok\n"));
+
+            /* get code length code lengths (not a typo) */
+            state->have = 0;
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       code lengths ok\n"));
+
+            /* get length and distance code code lengths */
+            state->have = 0;
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    this = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(this.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (this.val < 16) {
+                    NEEDBITS(this.bits);
+                    DROPBITS(this.bits);
+                    state->lens[state->have++] = this.val;
+                }
+                else {
+                    if (this.val == 16) {
+                        NEEDBITS(this.bits + 2);
+                        DROPBITS(this.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = (unsigned)(state->lens[state->have - 1]);
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (this.val == 17) {
+                        NEEDBITS(this.bits + 3);
+                        DROPBITS(this.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(this.bits + 7);
+                        DROPBITS(this.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* build code tables */
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 9;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (code const FAR *)(state->next);
+            state->distbits = 6;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN;
+
+        case LEN:
+            /* use inflate_fast() if we have enough input and output */
+            if (have >= 6 && left >= 258) {
+                RESTORE();
+                if (state->whave < state->wsize)
+                    state->whave = state->wsize - left;
+                inflate_fast(strm, state->wsize);
+                LOAD();
+                break;
+            }
+
+            /* get a literal, length, or end-of-block code */
+            for (;;) {
+                this = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(this.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (this.op && (this.op & 0xf0) == 0) {
+                last = this;
+                for (;;) {
+                    this = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(this.bits);
+            state->length = (unsigned)this.val;
+
+            /* process literal */
+            if (this.op == 0) {
+                Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", this.val));
+                ROOM();
+                *put++ = (unsigned char)(state->length);
+                left--;
+                state->mode = LEN;
+                break;
+            }
+
+            /* process end of block */
+            if (this.op & 32) {
+                Tracevv((stderr, "inflate:         end of block\n"));
+                state->mode = TYPE;
+                break;
+            }
+
+            /* invalid code */
+            if (this.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+
+            /* length code -- get extra bits, if any */
+            state->extra = (unsigned)(this.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            Tracevv((stderr, "inflate:         length %u\n", state->length));
+
+            /* get distance code */
+            for (;;) {
+                this = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(this.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((this.op & 0xf0) == 0) {
+                last = this;
+                for (;;) {
+                    this = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(this.bits);
+            if (this.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)this.val;
+
+            /* get distance extra bits, if any */
+            state->extra = (unsigned)(this.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            if (state->offset > state->wsize - (state->whave < state->wsize ?
+                                                left : 0)) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
+
+            /* copy match from window to output */
+            do {
+                ROOM();
+                copy = state->wsize - state->offset;
+                if (copy < left) {
+                    from = put + copy;
+                    copy = left - copy;
+                }
+                else {
+                    from = put - state->offset;
+                    copy = left;
+                }
+                if (copy > state->length) copy = state->length;
+                state->length -= copy;
+                left -= copy;
+                do {
+                    *put++ = *from++;
+                } while (--copy);
+            } while (state->length != 0);
+            break;
+
+        case DONE:
+            /* inflate stream terminated properly -- write leftover output */
+            ret = Z_STREAM_END;
+            if (left < state->wsize) {
+                if (out(out_desc, state->window, state->wsize - left))
+                    ret = Z_BUF_ERROR;
+            }
+            goto inf_leave;
+
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+
+        default:                /* can't happen, but makes compilers happy */
+            ret = Z_STREAM_ERROR;
+            goto inf_leave;
+        }
+
+    /* Return unused input */
+  inf_leave:
+    strm->next_in = next;
+    strm->avail_in = have;
+    return ret;
+}
+
+int ZEXPORT inflateBackEnd(strm)
+z_streamp strm;
+{
+    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+        return Z_STREAM_ERROR;
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev((stderr, "inflate: end\n"));
+    return Z_OK;
+}
diff --git a/src/zlib/inffast.c b/src/zlib/inffast.c
new file mode 100644
index 0000000..bbee92e
--- /dev/null
+++ b/src/zlib/inffast.c
@@ -0,0 +1,318 @@
+/* inffast.c -- fast decoding
+ * Copyright (C) 1995-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifndef ASMINF
+
+/* Allow machine dependent optimization for post-increment or pre-increment.
+   Based on testing to date,
+   Pre-increment preferred for:
+   - PowerPC G3 (Adler)
+   - MIPS R5000 (Randers-Pehrson)
+   Post-increment preferred for:
+   - none
+   No measurable difference:
+   - Pentium III (Anderson)
+   - M68060 (Nikl)
+ */
+#ifdef POSTINC
+#  define OFF 0
+#  define PUP(a) *(a)++
+#else
+#  define OFF 1
+#  define PUP(a) *++(a)
+#endif
+
+/*
+   Decode literal, length, and distance codes and write out the resulting
+   literal and match bytes until either not enough input or output is
+   available, an end-of-block is encountered, or a data error is encountered.
+   When large enough input and output buffers are supplied to inflate(), for
+   example, a 16K input buffer and a 64K output buffer, more than 95% of the
+   inflate execution time is spent in this routine.
+
+   Entry assumptions:
+
+        state->mode == LEN
+        strm->avail_in >= 6
+        strm->avail_out >= 258
+        start >= strm->avail_out
+        state->bits < 8
+
+   On return, state->mode is one of:
+
+        LEN -- ran out of enough output space or enough available input
+        TYPE -- reached end of block code, inflate() to interpret next block
+        BAD -- error in block data
+
+   Notes:
+
+    - The maximum input bits used by a length/distance pair is 15 bits for the
+      length code, 5 bits for the length extra, 15 bits for the distance code,
+      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
+      Therefore if strm->avail_in >= 6, then there is enough input to avoid
+      checking for available input while decoding.
+
+    - The maximum bytes that a single length/distance pair can output is 258
+      bytes, which is the maximum length that can be coded.  inflate_fast()
+      requires strm->avail_out >= 258 for each loop to avoid checking for
+      output space.
+ */
+void inflate_fast(strm, start)
+z_streamp strm;
+unsigned start;         /* inflate()'s starting value for strm->avail_out */
+{
+    struct inflate_state FAR *state;
+    unsigned char FAR *in;      /* local strm->next_in */
+    unsigned char FAR *last;    /* while in < last, enough input available */
+    unsigned char FAR *out;     /* local strm->next_out */
+    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
+    unsigned char FAR *end;     /* while out < end, enough space available */
+#ifdef INFLATE_STRICT
+    unsigned dmax;              /* maximum distance from zlib header */
+#endif
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned write;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
+    unsigned long hold;         /* local strm->hold */
+    unsigned bits;              /* local strm->bits */
+    code const FAR *lcode;      /* local strm->lencode */
+    code const FAR *dcode;      /* local strm->distcode */
+    unsigned lmask;             /* mask for first level of length codes */
+    unsigned dmask;             /* mask for first level of distance codes */
+    code this;                  /* retrieved table entry */
+    unsigned op;                /* code bits, operation, extra bits, or */
+                                /*  window position, window bytes to copy */
+    unsigned len;               /* match length, unused bytes */
+    unsigned dist;              /* match distance */
+    unsigned char FAR *from;    /* where to copy match from */
+
+    /* copy state to local variables */
+    state = (struct inflate_state FAR *)strm->state;
+    in = strm->next_in - OFF;
+    last = in + (strm->avail_in - 5);
+    out = strm->next_out - OFF;
+    beg = out - (start - strm->avail_out);
+    end = out + (strm->avail_out - 257);
+#ifdef INFLATE_STRICT
+    dmax = state->dmax;
+#endif
+    wsize = state->wsize;
+    whave = state->whave;
+    write = state->write;
+    window = state->window;
+    hold = state->hold;
+    bits = state->bits;
+    lcode = state->lencode;
+    dcode = state->distcode;
+    lmask = (1U << state->lenbits) - 1;
+    dmask = (1U << state->distbits) - 1;
+
+    /* decode literals and length/distances until end-of-block or not enough
+       input data or output space */
+    do {
+        if (bits < 15) {
+            hold += (unsigned long)(PUP(in)) << bits;
+            bits += 8;
+            hold += (unsigned long)(PUP(in)) << bits;
+            bits += 8;
+        }
+        this = lcode[hold & lmask];
+      dolen:
+        op = (unsigned)(this.bits);
+        hold >>= op;
+        bits -= op;
+        op = (unsigned)(this.op);
+        if (op == 0) {                          /* literal */
+            Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
+                    "inflate:         literal '%c'\n" :
+                    "inflate:         literal 0x%02x\n", this.val));
+            PUP(out) = (unsigned char)(this.val);
+        }
+        else if (op & 16) {                     /* length base */
+            len = (unsigned)(this.val);
+            op &= 15;                           /* number of extra bits */
+            if (op) {
+                if (bits < op) {
+                    hold += (unsigned long)(PUP(in)) << bits;
+                    bits += 8;
+                }
+                len += (unsigned)hold & ((1U << op) - 1);
+                hold >>= op;
+                bits -= op;
+            }
+            Tracevv((stderr, "inflate:         length %u\n", len));
+            if (bits < 15) {
+                hold += (unsigned long)(PUP(in)) << bits;
+                bits += 8;
+                hold += (unsigned long)(PUP(in)) << bits;
+                bits += 8;
+            }
+            this = dcode[hold & dmask];
+          dodist:
+            op = (unsigned)(this.bits);
+            hold >>= op;
+            bits -= op;
+            op = (unsigned)(this.op);
+            if (op & 16) {                      /* distance base */
+                dist = (unsigned)(this.val);
+                op &= 15;                       /* number of extra bits */
+                if (bits < op) {
+                    hold += (unsigned long)(PUP(in)) << bits;
+                    bits += 8;
+                    if (bits < op) {
+                        hold += (unsigned long)(PUP(in)) << bits;
+                        bits += 8;
+                    }
+                }
+                dist += (unsigned)hold & ((1U << op) - 1);
+#ifdef INFLATE_STRICT
+                if (dist > dmax) {
+                    strm->msg = (char *)"invalid distance too far back";
+                    state->mode = BAD;
+                    break;
+                }
+#endif
+                hold >>= op;
+                bits -= op;
+                Tracevv((stderr, "inflate:         distance %u\n", dist));
+                op = (unsigned)(out - beg);     /* max distance in output */
+                if (dist > op) {                /* see if copy from window */
+                    op = dist - op;             /* distance back in window */
+                    if (op > whave) {
+                        strm->msg = (char *)"invalid distance too far back";
+                        state->mode = BAD;
+                        break;
+                    }
+                    from = window - OFF;
+                    if (write == 0) {           /* very common case */
+                        from += wsize - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                PUP(out) = PUP(from);
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    else if (write < op) {      /* wrap around window */
+                        from += wsize + write - op;
+                        op -= write;
+                        if (op < len) {         /* some from end of window */
+                            len -= op;
+                            do {
+                                PUP(out) = PUP(from);
+                            } while (--op);
+                            from = window - OFF;
+                            if (write < len) {  /* some from start of window */
+                                op = write;
+                                len -= op;
+                                do {
+                                    PUP(out) = PUP(from);
+                                } while (--op);
+                                from = out - dist;      /* rest from output */
+                            }
+                        }
+                    }
+                    else {                      /* contiguous in window */
+                        from += write - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                PUP(out) = PUP(from);
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    while (len > 2) {
+                        PUP(out) = PUP(from);
+                        PUP(out) = PUP(from);
+                        PUP(out) = PUP(from);
+                        len -= 3;
+                    }
+                    if (len) {
+                        PUP(out) = PUP(from);
+                        if (len > 1)
+                            PUP(out) = PUP(from);
+                    }
+                }
+                else {
+                    from = out - dist;          /* copy direct from output */
+                    do {                        /* minimum length is three */
+                        PUP(out) = PUP(from);
+                        PUP(out) = PUP(from);
+                        PUP(out) = PUP(from);
+                        len -= 3;
+                    } while (len > 2);
+                    if (len) {
+                        PUP(out) = PUP(from);
+                        if (len > 1)
+                            PUP(out) = PUP(from);
+                    }
+                }
+            }
+            else if ((op & 64) == 0) {          /* 2nd level distance code */
+                this = dcode[this.val + (hold & ((1U << op) - 1))];
+                goto dodist;
+            }
+            else {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+        }
+        else if ((op & 64) == 0) {              /* 2nd level length code */
+            this = lcode[this.val + (hold & ((1U << op) - 1))];
+            goto dolen;
+        }
+        else if (op & 32) {                     /* end-of-block */
+            Tracevv((stderr, "inflate:         end of block\n"));
+            state->mode = TYPE;
+            break;
+        }
+        else {
+            strm->msg = (char *)"invalid literal/length code";
+            state->mode = BAD;
+            break;
+        }
+    } while (in < last && out < end);
+
+    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+    len = bits >> 3;
+    in -= len;
+    bits -= len << 3;
+    hold &= (1U << bits) - 1;
+
+    /* update state and return */
+    strm->next_in = in + OFF;
+    strm->next_out = out + OFF;
+    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+    strm->avail_out = (unsigned)(out < end ?
+                                 257 + (end - out) : 257 - (out - end));
+    state->hold = hold;
+    state->bits = bits;
+    return;
+}
+
+/*
+   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
+   - Using bit fields for code structure
+   - Different op definition to avoid & for extra bits (do & for table bits)
+   - Three separate decoding do-loops for direct, window, and write == 0
+   - Special case for distance > 1 copies to do overlapped load and store copy
+   - Explicit branch predictions (based on measured branch probabilities)
+   - Deferring match copy and interspersed it with decoding subsequent codes
+   - Swapping literal/length else
+   - Swapping window/direct else
+   - Larger unrolled copy loops (three is about right)
+   - Moving len -= 3 statement into middle of loop
+ */
+
+#endif /* !ASMINF */
diff --git a/src/zlib/inffast.h b/src/zlib/inffast.h
new file mode 100644
index 0000000..1e88d2d
--- /dev/null
+++ b/src/zlib/inffast.h
@@ -0,0 +1,11 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+void inflate_fast OF((z_streamp strm, unsigned start));
diff --git a/src/zlib/inffixed.h b/src/zlib/inffixed.h
new file mode 100644
index 0000000..75ed4b5
--- /dev/null
+++ b/src/zlib/inffixed.h
@@ -0,0 +1,94 @@
+    /* inffixed.h -- table for decoding fixed codes
+     * Generated automatically by makefixed().
+     */
+
+    /* WARNING: this file should *not* be used by applications. It
+       is part of the implementation of the compression library and
+       is subject to change. Applications should only use zlib.h.
+     */
+
+    static const code lenfix[512] = {
+        {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+        {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+        {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+        {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+        {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+        {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+        {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+        {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+        {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+        {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+        {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+        {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+        {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+        {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+        {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+        {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+        {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+        {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+        {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+        {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+        {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+        {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+        {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+        {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+        {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+        {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+        {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+        {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+        {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+        {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+        {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+        {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+        {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+        {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+        {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+        {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+        {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+        {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+        {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+        {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+        {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+        {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+        {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+        {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+        {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+        {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+        {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+        {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+        {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+        {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+        {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+        {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+        {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+        {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+        {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+        {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+        {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+        {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+        {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+        {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+        {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+        {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+        {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+        {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+        {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+        {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+        {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+        {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+        {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+        {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+        {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+        {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+        {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+        {0,9,255}
+    };
+
+    static const code distfix[32] = {
+        {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+        {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+        {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+        {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+        {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+        {22,5,193},{64,5,0}
+    };
diff --git a/src/zlib/inflate.c b/src/zlib/inflate.c
new file mode 100644
index 0000000..792fdee
--- /dev/null
+++ b/src/zlib/inflate.c
@@ -0,0 +1,1368 @@
+/* inflate.c -- zlib decompression
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * Change history:
+ *
+ * 1.2.beta0    24 Nov 2002
+ * - First version -- complete rewrite of inflate to simplify code, avoid
+ *   creation of window when not needed, minimize use of window when it is
+ *   needed, make inffast.c even faster, implement gzip decoding, and to
+ *   improve code readability and style over the previous zlib inflate code
+ *
+ * 1.2.beta1    25 Nov 2002
+ * - Use pointers for available input and output checking in inffast.c
+ * - Remove input and output counters in inffast.c
+ * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
+ * - Remove unnecessary second byte pull from length extra in inffast.c
+ * - Unroll direct copy to three copies per loop in inffast.c
+ *
+ * 1.2.beta2    4 Dec 2002
+ * - Change external routine names to reduce potential conflicts
+ * - Correct filename to inffixed.h for fixed tables in inflate.c
+ * - Make hbuf[] unsigned char to match parameter type in inflate.c
+ * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
+ *   to avoid negation problem on Alphas (64 bit) in inflate.c
+ *
+ * 1.2.beta3    22 Dec 2002
+ * - Add comments on state->bits assertion in inffast.c
+ * - Add comments on op field in inftrees.h
+ * - Fix bug in reuse of allocated window after inflateReset()
+ * - Remove bit fields--back to byte structure for speed
+ * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
+ * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
+ * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
+ * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
+ * - Use local copies of stream next and avail values, as well as local bit
+ *   buffer and bit count in inflate()--for speed when inflate_fast() not used
+ *
+ * 1.2.beta4    1 Jan 2003
+ * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
+ * - Move a comment on output buffer sizes from inffast.c to inflate.c
+ * - Add comments in inffast.c to introduce the inflate_fast() routine
+ * - Rearrange window copies in inflate_fast() for speed and simplification
+ * - Unroll last copy for window match in inflate_fast()
+ * - Use local copies of window variables in inflate_fast() for speed
+ * - Pull out common write == 0 case for speed in inflate_fast()
+ * - Make op and len in inflate_fast() unsigned for consistency
+ * - Add FAR to lcode and dcode declarations in inflate_fast()
+ * - Simplified bad distance check in inflate_fast()
+ * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
+ *   source file infback.c to provide a call-back interface to inflate for
+ *   programs like gzip and unzip -- uses window as output buffer to avoid
+ *   window copying
+ *
+ * 1.2.beta5    1 Jan 2003
+ * - Improved inflateBack() interface to allow the caller to provide initial
+ *   input in strm.
+ * - Fixed stored blocks bug in inflateBack()
+ *
+ * 1.2.beta6    4 Jan 2003
+ * - Added comments in inffast.c on effectiveness of POSTINC
+ * - Typecasting all around to reduce compiler warnings
+ * - Changed loops from while (1) or do {} while (1) to for (;;), again to
+ *   make compilers happy
+ * - Changed type of window in inflateBackInit() to unsigned char *
+ *
+ * 1.2.beta7    27 Jan 2003
+ * - Changed many types to unsigned or unsigned short to avoid warnings
+ * - Added inflateCopy() function
+ *
+ * 1.2.0        9 Mar 2003
+ * - Changed inflateBack() interface to provide separate opaque descriptors
+ *   for the in() and out() functions
+ * - Changed inflateBack() argument and in_func typedef to swap the length
+ *   and buffer address return values for the input function
+ * - Check next_in and next_out for Z_NULL on entry to inflate()
+ *
+ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifdef MAKEFIXED
+#  ifndef BUILDFIXED
+#    define BUILDFIXED
+#  endif
+#endif
+
+/* function prototypes */
+local void fixedtables OF((struct inflate_state FAR *state));
+local int updatewindow OF((z_streamp strm, unsigned out));
+#ifdef BUILDFIXED
+   void makefixed OF((void));
+#endif
+local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf,
+                              unsigned len));
+
+int ZEXPORT inflateReset(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    strm->total_in = strm->total_out = state->total = 0;
+    strm->msg = Z_NULL;
+    strm->adler = 1;        /* to support ill-conceived Java test suite */
+    state->mode = HEAD;
+    state->last = 0;
+    state->havedict = 0;
+    state->dmax = 32768U;
+    state->head = Z_NULL;
+    state->wsize = 0;
+    state->whave = 0;
+    state->write = 0;
+    state->hold = 0;
+    state->bits = 0;
+    state->lencode = state->distcode = state->next = state->codes;
+    Tracev((stderr, "inflate: reset\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflatePrime(strm, bits, value)
+z_streamp strm;
+int bits;
+int value;
+{
+    struct inflate_state FAR *state;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
+    value &= (1L << bits) - 1;
+    state->hold += value << state->bits;
+    state->bits += bits;
+    return Z_OK;
+}
+
+int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
+z_streamp strm;
+int windowBits;
+const char *version;
+int stream_size;
+{
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+    }
+    if (strm->zfree == (free_func)0) strm->zfree = zcfree;
+    state = (struct inflate_state FAR *)
+            ZALLOC(strm, 1, sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev((stderr, "inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    if (windowBits < 0) {
+        state->wrap = 0;
+        windowBits = -windowBits;
+    }
+    else {
+        state->wrap = (windowBits >> 4) + 1;
+#ifdef GUNZIP
+        if (windowBits < 48) windowBits &= 15;
+#endif
+    }
+    if (windowBits < 8 || windowBits > 15) {
+        ZFREE(strm, state);
+        strm->state = Z_NULL;
+        return Z_STREAM_ERROR;
+    }
+    state->wbits = (unsigned)windowBits;
+    state->window = Z_NULL;
+    return inflateReset(strm);
+}
+
+int ZEXPORT inflateInit_(strm, version, stream_size)
+z_streamp strm;
+const char *version;
+int stream_size;
+{
+    return inflateInit2_(strm, DEF_WBITS, version, stream_size);
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
+ */
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
+#ifdef BUILDFIXED
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
+#else /* !BUILDFIXED */
+#   include "inffixed.h"
+#endif /* BUILDFIXED */
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
+}
+
+#ifdef MAKEFIXED
+#include <stdio.h>
+
+/*
+   Write out the inffixed.h that is #include'd above.  Defining MAKEFIXED also
+   defines BUILDFIXED, so the tables are built on the fly.  makefixed() writes
+   those tables to stdout, which would be piped to inffixed.h.  A small program
+   can simply call makefixed to do this:
+
+    void makefixed(void);
+
+    int main(void)
+    {
+        makefixed();
+        return 0;
+    }
+
+   Then that can be linked with zlib built with MAKEFIXED defined and run:
+
+    a.out > inffixed.h
+ */
+void makefixed()
+{
+    unsigned low, size;
+    struct inflate_state state;
+
+    fixedtables(&state);
+    puts("    /* inffixed.h -- table for decoding fixed codes");
+    puts("     * Generated automatically by makefixed().");
+    puts("     */");
+    puts("");
+    puts("    /* WARNING: this file should *not* be used by applications.");
+    puts("       It is part of the implementation of this library and is");
+    puts("       subject to change. Applications should only use zlib.h.");
+    puts("     */");
+    puts("");
+    size = 1U << 9;
+    printf("    static const code lenfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 7) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits,
+               state.lencode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+    size = 1U << 5;
+    printf("\n    static const code distfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 6) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
+               state.distcode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+}
+#endif /* MAKEFIXED */
+
+/*
+   Update the window with the last wsize (normally 32K) bytes written before
+   returning.  If window does not exist yet, create it.  This is only called
+   when a window is already in use, or when output has been written during this
+   inflate call, but the end of the deflate stream has not been reached yet.
+   It is also called to create a window for dictionary data when a dictionary
+   is loaded.
+
+   Providing output buffers larger than 32K to inflate() should provide a speed
+   advantage, since only the last 32K of output is copied to the sliding window
+   upon return from inflate(), and since all distances after the first 32K of
+   output will fall in the output data, making match copies simpler and faster.
+   The advantage may be dependent on the size of the processor's data caches.
+ */
+local int updatewindow(strm, out)
+z_streamp strm;
+unsigned out;
+{
+    struct inflate_state FAR *state;
+    unsigned copy, dist;
+
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* if it hasn't been done already, allocate space for the window */
+    if (state->window == Z_NULL) {
+        state->window = (unsigned char FAR *)
+                        ZALLOC(strm, 1U << state->wbits,
+                               sizeof(unsigned char));
+        if (state->window == Z_NULL) return 1;
+    }
+
+    /* if window not in use yet, initialize */
+    if (state->wsize == 0) {
+        state->wsize = 1U << state->wbits;
+        state->write = 0;
+        state->whave = 0;
+    }
+
+    /* copy state->wsize or less output bytes into the circular window */
+    copy = out - strm->avail_out;
+    if (copy >= state->wsize) {
+        zmemcpy(state->window, strm->next_out - state->wsize, state->wsize);
+        state->write = 0;
+        state->whave = state->wsize;
+    }
+    else {
+        dist = state->wsize - state->write;
+        if (dist > copy) dist = copy;
+        zmemcpy(state->window + state->write, strm->next_out - copy, dist);
+        copy -= dist;
+        if (copy) {
+            zmemcpy(state->window, strm->next_out - copy, copy);
+            state->write = copy;
+            state->whave = state->wsize;
+        }
+        else {
+            state->write += dist;
+            if (state->write == state->wsize) state->write = 0;
+            if (state->whave < state->wsize) state->whave += dist;
+        }
+    }
+    return 0;
+}
+
+/* Macros for inflate(): */
+
+/* check function to use adler32() for zlib or crc32() for gzip */
+#ifdef GUNZIP
+#  define UPDATE(check, buf, len) \
+    (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
+#else
+#  define UPDATE(check, buf, len) adler32(check, buf, len)
+#endif
+
+/* check macros for header crc */
+#ifdef GUNZIP
+#  define CRC2(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        check = crc32(check, hbuf, 2); \
+    } while (0)
+
+#  define CRC4(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        hbuf[2] = (unsigned char)((word) >> 16); \
+        hbuf[3] = (unsigned char)((word) >> 24); \
+        check = crc32(check, hbuf, 4); \
+    } while (0)
+#endif
+
+/* Load registers with state in inflate() for speed */
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
+/* Restore state from registers in inflate() */
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflate()
+   if there is no input available. */
+#define PULLBYTE() \
+    do { \
+        if (have == 0) goto inf_leave; \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator.  If there is
+   not enough available input to do that, then return from inflate(). */
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
+
+/* Reverse the bytes in a 32-bit value */
+#define REVERSE(q) \
+    ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
+     (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
+
+/*
+   inflate() uses a state machine to process as much input data and generate as
+   much output data as possible before returning.  The state machine is
+   structured roughly as follows:
+
+    for (;;) switch (state) {
+    ...
+    case STATEn:
+        if (not enough input data or output space to make progress)
+            return;
+        ... make progress ...
+        state = STATEm;
+        break;
+    ...
+    }
+
+   so when inflate() is called again, the same case is attempted again, and
+   if the appropriate resources are provided, the machine proceeds to the
+   next state.  The NEEDBITS() macro is usually the way the state evaluates
+   whether it can proceed or should return.  NEEDBITS() does the return if
+   the requested bits are not available.  The typical use of the BITS macros
+   is:
+
+        NEEDBITS(n);
+        ... do something with BITS(n) ...
+        DROPBITS(n);
+
+   where NEEDBITS(n) either returns from inflate() if there isn't enough
+   input left to load n bits into the accumulator, or it continues.  BITS(n)
+   gives the low n bits in the accumulator.  When done, DROPBITS(n) drops
+   the low n bits off the accumulator.  INITBITS() clears the accumulator
+   and sets the number of available bits to zero.  BYTEBITS() discards just
+   enough bits to put the accumulator on a byte boundary.  After BYTEBITS()
+   and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
+
+   NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
+   if there is no input available.  The decoding of variable length codes uses
+   PULLBYTE() directly in order to pull just enough bytes to decode the next
+   code, and no more.
+
+   Some states loop until they get enough input, making sure that enough
+   state information is maintained to continue the loop where it left off
+   if NEEDBITS() returns in the loop.  For example, want, need, and keep
+   would all have to actually be part of the saved state in case NEEDBITS()
+   returns:
+
+    case STATEw:
+        while (want < need) {
+            NEEDBITS(n);
+            keep[want++] = BITS(n);
+            DROPBITS(n);
+        }
+        state = STATEx;
+    case STATEx:
+
+   As shown above, if the next state is also the next case, then the break
+   is omitted.
+
+   A state may also return if there is not enough output space available to
+   complete that state.  Those states are copying stored data, writing a
+   literal byte, and copying a matching string.
+
+   When returning, a "goto inf_leave" is used to update the total counters,
+   update the check value, and determine whether any progress has been made
+   during that inflate() call in order to return the proper return code.
+   Progress is defined as a change in either strm->avail_in or strm->avail_out.
+   When there is a window, goto inf_leave will update the window with the last
+   output written.  If a goto inf_leave occurs in the middle of decompression
+   and there is no window currently, goto inf_leave will create one and copy
+   output to the window for the next call of inflate().
+
+   In this implementation, the flush parameter of inflate() only affects the
+   return code (per zlib.h).  inflate() always writes as much as possible to
+   strm->next_out, given the space available and the provided input--the effect
+   documented in zlib.h of Z_SYNC_FLUSH.  Furthermore, inflate() always defers
+   the allocation of and copying into a sliding window until necessary, which
+   provides the effect documented in zlib.h for Z_FINISH when the entire input
+   stream available.  So the only thing the flush parameter actually does is:
+   when flush is set to Z_FINISH, inflate() cannot return Z_OK.  Instead it
+   will return Z_BUF_ERROR if it has not reached the end of the stream.
+ */
+
+int ZEXPORT inflate(strm, flush)
+z_streamp strm;
+int flush;
+{
+    struct inflate_state FAR *state;
+    unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned in, out;           /* save starting available input and output */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code this;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+#ifdef GUNZIP
+    unsigned char hbuf[4];      /* buffer for gzip header crc calculation */
+#endif
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL ||
+        (strm->next_in == Z_NULL && strm->avail_in != 0))
+        return Z_STREAM_ERROR;
+
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->mode == TYPE) state->mode = TYPEDO;      /* skip check */
+    LOAD();
+    in = have;
+    out = left;
+    ret = Z_OK;
+    for (;;)
+        switch (state->mode) {
+        case HEAD:
+            if (state->wrap == 0) {
+                state->mode = TYPEDO;
+                break;
+            }
+            NEEDBITS(16);
+#ifdef GUNZIP
+            if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
+                state->check = crc32(0L, Z_NULL, 0);
+                CRC2(state->check, hold);
+                INITBITS();
+                state->mode = FLAGS;
+                break;
+            }
+            state->flags = 0;           /* expect zlib header */
+            if (state->head != Z_NULL)
+                state->head->done = -1;
+            if (!(state->wrap & 1) ||   /* check if zlib header allowed */
+#else
+            if (
+#endif
+                ((BITS(8) << 8) + (hold >> 8)) % 31) {
+                strm->msg = (char *)"incorrect header check";
+                state->mode = BAD;
+                break;
+            }
+            if (BITS(4) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            DROPBITS(4);
+            len = BITS(4) + 8;
+            if (len > state->wbits) {
+                strm->msg = (char *)"invalid window size";
+                state->mode = BAD;
+                break;
+            }
+            state->dmax = 1U << len;
+            Tracev((stderr, "inflate:   zlib header ok\n"));
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = hold & 0x200 ? DICTID : TYPE;
+            INITBITS();
+            break;
+#ifdef GUNZIP
+        case FLAGS:
+            NEEDBITS(16);
+            state->flags = (int)(hold);
+            if ((state->flags & 0xff) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            if (state->flags & 0xe000) {
+                strm->msg = (char *)"unknown header flags set";
+                state->mode = BAD;
+                break;
+            }
+            if (state->head != Z_NULL)
+                state->head->text = (int)((hold >> 8) & 1);
+            if (state->flags & 0x0200) CRC2(state->check, hold);
+            INITBITS();
+            state->mode = TIME;
+        case TIME:
+            NEEDBITS(32);
+            if (state->head != Z_NULL)
+                state->head->time = hold;
+            if (state->flags & 0x0200) CRC4(state->check, hold);
+            INITBITS();
+            state->mode = OS;
+        case OS:
+            NEEDBITS(16);
+            if (state->head != Z_NULL) {
+                state->head->xflags = (int)(hold & 0xff);
+                state->head->os = (int)(hold >> 8);
+            }
+            if (state->flags & 0x0200) CRC2(state->check, hold);
+            INITBITS();
+            state->mode = EXLEN;
+        case EXLEN:
+            if (state->flags & 0x0400) {
+                NEEDBITS(16);
+                state->length = (unsigned)(hold);
+                if (state->head != Z_NULL)
+                    state->head->extra_len = (unsigned)hold;
+                if (state->flags & 0x0200) CRC2(state->check, hold);
+                INITBITS();
+            }
+            else if (state->head != Z_NULL)
+                state->head->extra = Z_NULL;
+            state->mode = EXTRA;
+        case EXTRA:
+            if (state->flags & 0x0400) {
+                copy = state->length;
+                if (copy > have) copy = have;
+                if (copy) {
+                    if (state->head != Z_NULL &&
+                        state->head->extra != Z_NULL) {
+                        len = state->head->extra_len - state->length;
+                        zmemcpy(state->head->extra + len, next,
+                                len + copy > state->head->extra_max ?
+                                state->head->extra_max - len : copy);
+                    }
+                    if (state->flags & 0x0200)
+                        state->check = crc32(state->check, next, copy);
+                    have -= copy;
+                    next += copy;
+                    state->length -= copy;
+                }
+                if (state->length) goto inf_leave;
+            }
+            state->length = 0;
+            state->mode = NAME;
+        case NAME:
+            if (state->flags & 0x0800) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->name != Z_NULL &&
+                            state->length < state->head->name_max)
+                        state->head->name[state->length++] = len;
+                } while (len && copy < have);
+                if (state->flags & 0x0200)
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->name = Z_NULL;
+            state->length = 0;
+            state->mode = COMMENT;
+        case COMMENT:
+            if (state->flags & 0x1000) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->comment != Z_NULL &&
+                            state->length < state->head->comm_max)
+                        state->head->comment[state->length++] = len;
+                } while (len && copy < have);
+                if (state->flags & 0x0200)
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->comment = Z_NULL;
+            state->mode = HCRC;
+        case HCRC:
+            if (state->flags & 0x0200) {
+                NEEDBITS(16);
+                if (hold != (state->check & 0xffff)) {
+                    strm->msg = (char *)"header crc mismatch";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+            }
+            if (state->head != Z_NULL) {
+                state->head->hcrc = (int)((state->flags >> 9) & 1);
+                state->head->done = 1;
+            }
+            strm->adler = state->check = crc32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+            break;
+#endif
+        case DICTID:
+            NEEDBITS(32);
+            strm->adler = state->check = REVERSE(hold);
+            INITBITS();
+            state->mode = DICT;
+        case DICT:
+            if (state->havedict == 0) {
+                RESTORE();
+                return Z_NEED_DICT;
+            }
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+        case TYPE:
+            if (flush == Z_BLOCK) goto inf_leave;
+        case TYPEDO:
+            if (state->last) {
+                BYTEBITS();
+                state->mode = CHECK;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev((stderr, "inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev((stderr, "inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN;              /* decode codes */
+                break;
+            case 2:                             /* dynamic block */
+                Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+        case STORED:
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev((stderr, "inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+            state->mode = COPY;
+        case COPY:
+            copy = state->length;
+            if (copy) {
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                if (copy == 0) goto inf_leave;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+                break;
+            }
+            Tracev((stderr, "inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+        case TABLE:
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev((stderr, "inflate:       table sizes ok\n"));
+            state->have = 0;
+            state->mode = LENLENS;
+        case LENLENS:
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       code lengths ok\n"));
+            state->have = 0;
+            state->mode = CODELENS;
+        case CODELENS:
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    this = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(this.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (this.val < 16) {
+                    NEEDBITS(this.bits);
+                    DROPBITS(this.bits);
+                    state->lens[state->have++] = this.val;
+                }
+                else {
+                    if (this.val == 16) {
+                        NEEDBITS(this.bits + 2);
+                        DROPBITS(this.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = state->lens[state->have - 1];
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (this.val == 17) {
+                        NEEDBITS(this.bits + 3);
+                        DROPBITS(this.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(this.bits + 7);
+                        DROPBITS(this.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* build code tables */
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 9;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (code const FAR *)(state->next);
+            state->distbits = 6;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN;
+        case LEN:
+            if (have >= 6 && left >= 258) {
+                RESTORE();
+                inflate_fast(strm, out);
+                LOAD();
+                break;
+            }
+            for (;;) {
+                this = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(this.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (this.op && (this.op & 0xf0) == 0) {
+                last = this;
+                for (;;) {
+                    this = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(this.bits);
+            state->length = (unsigned)this.val;
+            if ((int)(this.op) == 0) {
+                Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", this.val));
+                state->mode = LIT;
+                break;
+            }
+            if (this.op & 32) {
+                Tracevv((stderr, "inflate:         end of block\n"));
+                state->mode = TYPE;
+                break;
+            }
+            if (this.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+            state->extra = (unsigned)(this.op) & 15;
+            state->mode = LENEXT;
+        case LENEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            Tracevv((stderr, "inflate:         length %u\n", state->length));
+            state->mode = DIST;
+        case DIST:
+            for (;;) {
+                this = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(this.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((this.op & 0xf0) == 0) {
+                last = this;
+                for (;;) {
+                    this = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + this.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(this.bits);
+            if (this.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)this.val;
+            state->extra = (unsigned)(this.op) & 15;
+            state->mode = DISTEXT;
+        case DISTEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+#ifdef INFLATE_STRICT
+            if (state->offset > state->dmax) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            if (state->offset > state->whave + out - left) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
+            state->mode = MATCH;
+        case MATCH:
+            if (left == 0) goto inf_leave;
+            copy = out - left;
+            if (state->offset > copy) {         /* copy from window */
+                copy = state->offset - copy;
+                if (copy > state->write) {
+                    copy -= state->write;
+                    from = state->window + (state->wsize - copy);
+                }
+                else
+                    from = state->window + (state->write - copy);
+                if (copy > state->length) copy = state->length;
+            }
+            else {                              /* copy from output */
+                from = put - state->offset;
+                copy = state->length;
+            }
+            if (copy > left) copy = left;
+            left -= copy;
+            state->length -= copy;
+            do {
+                *put++ = *from++;
+            } while (--copy);
+            if (state->length == 0) state->mode = LEN;
+            break;
+        case LIT:
+            if (left == 0) goto inf_leave;
+            *put++ = (unsigned char)(state->length);
+            left--;
+            state->mode = LEN;
+            break;
+        case CHECK:
+            if (state->wrap) {
+                NEEDBITS(32);
+                out -= left;
+                strm->total_out += out;
+                state->total += out;
+                if (out)
+                    strm->adler = state->check =
+                        UPDATE(state->check, put - out, out);
+                out = left;
+                if ((
+#ifdef GUNZIP
+                     state->flags ? hold :
+#endif
+                     REVERSE(hold)) != state->check) {
+                    strm->msg = (char *)"incorrect data check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev((stderr, "inflate:   check matches trailer\n"));
+            }
+#ifdef GUNZIP
+            state->mode = LENGTH;
+        case LENGTH:
+            if (state->wrap && state->flags) {
+                NEEDBITS(32);
+                if (hold != (state->total & 0xffffffffUL)) {
+                    strm->msg = (char *)"incorrect length check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev((stderr, "inflate:   length matches trailer\n"));
+            }
+#endif
+            state->mode = DONE;
+        case DONE:
+            ret = Z_STREAM_END;
+            goto inf_leave;
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+        case MEM:
+            return Z_MEM_ERROR;
+        case SYNC:
+        default:
+            return Z_STREAM_ERROR;
+        }
+
+    /*
+       Return from inflate(), updating the total counts and the check value.
+       If there was no progress during the inflate() call, return a buffer
+       error.  Call updatewindow() to create and/or update the window state.
+       Note: a memory error from inflate() is non-recoverable.
+     */
+  inf_leave:
+    RESTORE();
+    if (state->wsize || (state->mode < CHECK && out != strm->avail_out))
+        if (updatewindow(strm, out)) {
+            state->mode = MEM;
+            return Z_MEM_ERROR;
+        }
+    in -= strm->avail_in;
+    out -= strm->avail_out;
+    strm->total_in += in;
+    strm->total_out += out;
+    state->total += out;
+    if (state->wrap && out)
+        strm->adler = state->check =
+            UPDATE(state->check, strm->next_out - out, out);
+    strm->data_type = state->bits + (state->last ? 64 : 0) +
+                      (state->mode == TYPE ? 128 : 0);
+    if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
+        ret = Z_BUF_ERROR;
+    return ret;
+}
+
+int ZEXPORT inflateEnd(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->window != Z_NULL) ZFREE(strm, state->window);
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev((stderr, "inflate: end\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+const Bytef *dictionary;
+uInt dictLength;
+{
+    struct inflate_state FAR *state;
+    unsigned long id;
+
+    /* check state */
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->wrap != 0 && state->mode != DICT)
+        return Z_STREAM_ERROR;
+
+    /* check for correct dictionary id */
+    if (state->mode == DICT) {
+        id = adler32(0L, Z_NULL, 0);
+        id = adler32(id, dictionary, dictLength);
+        if (id != state->check)
+            return Z_DATA_ERROR;
+    }
+
+    /* copy dictionary to window */
+    if (updatewindow(strm, strm->avail_out)) {
+        state->mode = MEM;
+        return Z_MEM_ERROR;
+    }
+    if (dictLength > state->wsize) {
+        zmemcpy(state->window, dictionary + dictLength - state->wsize,
+                state->wsize);
+        state->whave = state->wsize;
+    }
+    else {
+        zmemcpy(state->window + state->wsize - dictLength, dictionary,
+                dictLength);
+        state->whave = dictLength;
+    }
+    state->havedict = 1;
+    Tracev((stderr, "inflate:   dictionary set\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateGetHeader(strm, head)
+z_streamp strm;
+gz_headerp head;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
+
+    /* save header structure */
+    state->head = head;
+    head->done = 0;
+    return Z_OK;
+}
+
+/*
+   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
+   or when out of input.  When called, *have is the number of pattern bytes
+   found in order so far, in 0..3.  On return *have is updated to the new
+   state.  If on return *have equals four, then the pattern was found and the
+   return value is how many bytes were read including the last byte of the
+   pattern.  If *have is less than four, then the pattern has not been found
+   yet and the return value is len.  In the latter case, syncsearch() can be
+   called again with more data and the *have state.  *have is initialized to
+   zero for the first call.
+ */
+local unsigned syncsearch(have, buf, len)
+unsigned FAR *have;
+unsigned char FAR *buf;
+unsigned len;
+{
+    unsigned got;
+    unsigned next;
+
+    got = *have;
+    next = 0;
+    while (next < len && got < 4) {
+        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
+            got++;
+        else if (buf[next])
+            got = 0;
+        else
+            got = 4 - got;
+        next++;
+    }
+    *have = got;
+    return next;
+}
+
+int ZEXPORT inflateSync(strm)
+z_streamp strm;
+{
+    unsigned len;               /* number of bytes to look at or looked at */
+    unsigned long in, out;      /* temporary to save total_in and total_out */
+    unsigned char buf[4];       /* to restore bit buffer to byte string */
+    struct inflate_state FAR *state;
+
+    /* check parameters */
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
+
+    /* if first time, start search in bit buffer */
+    if (state->mode != SYNC) {
+        state->mode = SYNC;
+        state->hold <<= state->bits & 7;
+        state->bits -= state->bits & 7;
+        len = 0;
+        while (state->bits >= 8) {
+            buf[len++] = (unsigned char)(state->hold);
+            state->hold >>= 8;
+            state->bits -= 8;
+        }
+        state->have = 0;
+        syncsearch(&(state->have), buf, len);
+    }
+
+    /* search available input */
+    len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
+    strm->avail_in -= len;
+    strm->next_in += len;
+    strm->total_in += len;
+
+    /* return no joy or set up to restart inflate() on a new block */
+    if (state->have != 4) return Z_DATA_ERROR;
+    in = strm->total_in;  out = strm->total_out;
+    inflateReset(strm);
+    strm->total_in = in;  strm->total_out = out;
+    state->mode = TYPE;
+    return Z_OK;
+}
+
+/*
+   Returns true if inflate is currently at the end of a block generated by
+   Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
+   implementation to provide an additional safety check. PPP uses
+   Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
+   block. When decompressing, PPP checks that at the end of input packet,
+   inflate is waiting for these length bytes.
+ */
+int ZEXPORT inflateSyncPoint(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    return state->mode == STORED && state->bits == 0;
+}
+
+int ZEXPORT inflateCopy(dest, source)
+z_streamp dest;
+z_streamp source;
+{
+    struct inflate_state FAR *state;
+    struct inflate_state FAR *copy;
+    unsigned char FAR *window;
+    unsigned wsize;
+
+    /* check input */
+    if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL ||
+        source->zalloc == (alloc_func)0 || source->zfree == (free_func)0)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)source->state;
+
+    /* allocate space */
+    copy = (struct inflate_state FAR *)
+           ZALLOC(source, 1, sizeof(struct inflate_state));
+    if (copy == Z_NULL) return Z_MEM_ERROR;
+    window = Z_NULL;
+    if (state->window != Z_NULL) {
+        window = (unsigned char FAR *)
+                 ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
+        if (window == Z_NULL) {
+            ZFREE(source, copy);
+            return Z_MEM_ERROR;
+        }
+    }
+
+    /* copy state */
+    zmemcpy(dest, source, sizeof(z_stream));
+    zmemcpy(copy, state, sizeof(struct inflate_state));
+    if (state->lencode >= state->codes &&
+        state->lencode <= state->codes + ENOUGH - 1) {
+        copy->lencode = copy->codes + (state->lencode - state->codes);
+        copy->distcode = copy->codes + (state->distcode - state->codes);
+    }
+    copy->next = copy->codes + (state->next - state->codes);
+    if (window != Z_NULL) {
+        wsize = 1U << state->wbits;
+        zmemcpy(window, state->window, wsize);
+    }
+    copy->window = window;
+    dest->state = (struct internal_state FAR *)copy;
+    return Z_OK;
+}
diff --git a/src/zlib/inflate.h b/src/zlib/inflate.h
new file mode 100644
index 0000000..07bd3e7
--- /dev/null
+++ b/src/zlib/inflate.h
@@ -0,0 +1,115 @@
+/* inflate.h -- internal inflate state definition
+ * Copyright (C) 1995-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer decoding by inflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip decoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GUNZIP
+#endif
+
+/* Possible inflate modes between inflate() calls */
+typedef enum {
+    HEAD,       /* i: waiting for magic header */
+    FLAGS,      /* i: waiting for method and flags (gzip) */
+    TIME,       /* i: waiting for modification time (gzip) */
+    OS,         /* i: waiting for extra flags and operating system (gzip) */
+    EXLEN,      /* i: waiting for extra length (gzip) */
+    EXTRA,      /* i: waiting for extra bytes (gzip) */
+    NAME,       /* i: waiting for end of file name (gzip) */
+    COMMENT,    /* i: waiting for end of comment (gzip) */
+    HCRC,       /* i: waiting for header crc (gzip) */
+    DICTID,     /* i: waiting for dictionary check value */
+    DICT,       /* waiting for inflateSetDictionary() call */
+        TYPE,       /* i: waiting for type bits, including last-flag bit */
+        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
+        STORED,     /* i: waiting for stored size (length and complement) */
+        COPY,       /* i/o: waiting for input or output to copy stored block */
+        TABLE,      /* i: waiting for dynamic block table lengths */
+        LENLENS,    /* i: waiting for code length code lengths */
+        CODELENS,   /* i: waiting for length/lit and distance code lengths */
+            LEN,        /* i: waiting for length/lit code */
+            LENEXT,     /* i: waiting for length extra bits */
+            DIST,       /* i: waiting for distance code */
+            DISTEXT,    /* i: waiting for distance extra bits */
+            MATCH,      /* o: waiting for output space to copy string */
+            LIT,        /* o: waiting for output space to write literal */
+    CHECK,      /* i: waiting for 32-bit check value */
+    LENGTH,     /* i: waiting for 32-bit length (gzip) */
+    DONE,       /* finished check, done -- remain here until reset */
+    BAD,        /* got a data error -- remain here until reset */
+    MEM,        /* got an inflate() memory error -- remain here until reset */
+    SYNC        /* looking for synchronization bytes to restart inflate() */
+} inflate_mode;
+
+/*
+    State transitions between above modes -
+
+    (most modes can go to the BAD or MEM mode -- not shown for clarity)
+
+    Process header:
+        HEAD -> (gzip) or (zlib)
+        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME
+        NAME -> COMMENT -> HCRC -> TYPE
+        (zlib) -> DICTID or TYPE
+        DICTID -> DICT -> TYPE
+    Read deflate blocks:
+            TYPE -> STORED or TABLE or LEN or CHECK
+            STORED -> COPY -> TYPE
+            TABLE -> LENLENS -> CODELENS -> LEN
+    Read deflate codes:
+                LEN -> LENEXT or LIT or TYPE
+                LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
+                LIT -> LEN
+    Process trailer:
+        CHECK -> LENGTH -> DONE
+ */
+
+/* state maintained between inflate() calls.  Approximately 7K bytes. */
+struct inflate_state {
+    inflate_mode mode;          /* current inflate mode */
+    int last;                   /* true if processing last block */
+    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip */
+    int havedict;               /* true if dictionary provided */
+    int flags;                  /* gzip header method and flags (0 if zlib) */
+    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
+    unsigned long check;        /* protected copy of check value */
+    unsigned long total;        /* protected copy of output count */
+    gz_headerp head;            /* where to save gzip header information */
+        /* sliding window */
+    unsigned wbits;             /* log base 2 of requested window size */
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned write;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if needed */
+        /* bit accumulator */
+    unsigned long hold;         /* input bit accumulator */
+    unsigned bits;              /* number of bits in "in" */
+        /* for string and stored block copying */
+    unsigned length;            /* literal or length of data to copy */
+    unsigned offset;            /* distance back to copy string from */
+        /* for table and code decoding */
+    unsigned extra;             /* extra bits needed */
+        /* fixed and dynamic code tables */
+    code const FAR *lencode;    /* starting table for length/literal codes */
+    code const FAR *distcode;   /* starting table for distance codes */
+    unsigned lenbits;           /* index bits for lencode */
+    unsigned distbits;          /* index bits for distcode */
+        /* dynamic table building */
+    unsigned ncode;             /* number of code length code lengths */
+    unsigned nlen;              /* number of length code lengths */
+    unsigned ndist;             /* number of distance code lengths */
+    unsigned have;              /* number of code lengths in lens[] */
+    code FAR *next;             /* next available space in codes[] */
+    unsigned short lens[320];   /* temporary storage for code lengths */
+    unsigned short work[288];   /* work area for code table building */
+    code codes[ENOUGH];         /* space for code tables */
+};
diff --git a/src/zlib/inftrees.c b/src/zlib/inftrees.c
new file mode 100644
index 0000000..8a9c13f
--- /dev/null
+++ b/src/zlib/inftrees.c
@@ -0,0 +1,329 @@
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+
+#define MAXBITS 15
+
+const char inflate_copyright[] =
+   " inflate 1.2.3 Copyright 1995-2005 Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/*
+   Build a set of tables to decode the provided canonical Huffman code.
+   The code lengths are lens[0..codes-1].  The result starts at *table,
+   whose indices are 0..2^bits-1.  work is a writable array of at least
+   lens shorts, which is used as a work area.  type is the type of code
+   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
+   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
+   on return points to the next available entry's address.  bits is the
+   requested root table index bits, and on return it is the actual root
+   table index bits.  It will differ if the request is greater than the
+   longest code or if it is less than the shortest code.
+ */
+int inflate_table(type, lens, codes, table, bits, work)
+codetype type;
+unsigned short FAR *lens;
+unsigned codes;
+code FAR * FAR *table;
+unsigned FAR *bits;
+unsigned short FAR *work;
+{
+    unsigned len;               /* a code's length in bits */
+    unsigned sym;               /* index of code symbols */
+    unsigned min, max;          /* minimum and maximum code lengths */
+    unsigned root;              /* number of index bits for root table */
+    unsigned curr;              /* number of index bits for current table */
+    unsigned drop;              /* code bits to drop for sub-table */
+    int left;                   /* number of prefix codes available */
+    unsigned used;              /* code entries in table used */
+    unsigned huff;              /* Huffman code */
+    unsigned incr;              /* for incrementing code, index */
+    unsigned fill;              /* index for replicating entries */
+    unsigned low;               /* low bits for current root entry */
+    unsigned mask;              /* mask for low root bits */
+    code this;                  /* table entry for duplication */
+    code FAR *next;             /* next available space in table */
+    const unsigned short FAR *base;     /* base value table to use */
+    const unsigned short FAR *extra;    /* extra bits table to use */
+    int end;                    /* use base and extra for symbol > end */
+    unsigned short count[MAXBITS+1];    /* number of codes of each length */
+    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
+    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
+        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
+    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577, 0, 0};
+    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
+        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
+        28, 28, 29, 29, 64, 64};
+
+    /*
+       Process a set of code lengths to create a canonical Huffman code.  The
+       code lengths are lens[0..codes-1].  Each length corresponds to the
+       symbols 0..codes-1.  The Huffman code is generated by first sorting the
+       symbols by length from short to long, and retaining the symbol order
+       for codes with equal lengths.  Then the code starts with all zero bits
+       for the first code of the shortest length, and the codes are integer
+       increments for the same length, and zeros are appended as the length
+       increases.  For the deflate format, these bits are stored backwards
+       from their more natural integer increment ordering, and so when the
+       decoding tables are built in the large loop below, the integer codes
+       are incremented backwards.
+
+       This routine assumes, but does not check, that all of the entries in
+       lens[] are in the range 0..MAXBITS.  The caller must assure this.
+       1..MAXBITS is interpreted as that code length.  zero means that that
+       symbol does not occur in this code.
+
+       The codes are sorted by computing a count of codes for each length,
+       creating from that a table of starting indices for each length in the
+       sorted table, and then entering the symbols in order in the sorted
+       table.  The sorted table is work[], with that space being provided by
+       the caller.
+
+       The length counts are used for other purposes as well, i.e. finding
+       the minimum and maximum length codes, determining if there are any
+       codes at all, checking for a valid set of lengths, and looking ahead
+       at length counts to determine sub-table sizes when building the
+       decoding tables.
+     */
+
+    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
+    for (len = 0; len <= MAXBITS; len++)
+        count[len] = 0;
+    for (sym = 0; sym < codes; sym++)
+        count[lens[sym]]++;
+
+    /* bound code lengths, force root to be within code lengths */
+    root = *bits;
+    for (max = MAXBITS; max >= 1; max--)
+        if (count[max] != 0) break;
+    if (root > max) root = max;
+    if (max == 0) {                     /* no symbols to code at all */
+        this.op = (unsigned char)64;    /* invalid code marker */
+        this.bits = (unsigned char)1;
+        this.val = (unsigned short)0;
+        *(*table)++ = this;             /* make a table to force an error */
+        *(*table)++ = this;
+        *bits = 1;
+        return 0;     /* no symbols, but wait for decoding to report error */
+    }
+    for (min = 1; min <= MAXBITS; min++)
+        if (count[min] != 0) break;
+    if (root < min) root = min;
+
+    /* check for an over-subscribed or incomplete set of lengths */
+    left = 1;
+    for (len = 1; len <= MAXBITS; len++) {
+        left <<= 1;
+        left -= count[len];
+        if (left < 0) return -1;        /* over-subscribed */
+    }
+    if (left > 0 && (type == CODES || max != 1))
+        return -1;                      /* incomplete set */
+
+    /* generate offsets into symbol table for each length for sorting */
+    offs[1] = 0;
+    for (len = 1; len < MAXBITS; len++)
+        offs[len + 1] = offs[len] + count[len];
+
+    /* sort symbols by length, by symbol order within each length */
+    for (sym = 0; sym < codes; sym++)
+        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
+
+    /*
+       Create and fill in decoding tables.  In this loop, the table being
+       filled is at next and has curr index bits.  The code being used is huff
+       with length len.  That code is converted to an index by dropping drop
+       bits off of the bottom.  For codes where len is less than drop + curr,
+       those top drop + curr - len bits are incremented through all values to
+       fill the table with replicated entries.
+
+       root is the number of index bits for the root table.  When len exceeds
+       root, sub-tables are created pointed to by the root entry with an index
+       of the low root bits of huff.  This is saved in low to check for when a
+       new sub-table should be started.  drop is zero when the root table is
+       being filled, and drop is root when sub-tables are being filled.
+
+       When a new sub-table is needed, it is necessary to look ahead in the
+       code lengths to determine what size sub-table is needed.  The length
+       counts are used for this, and so count[] is decremented as codes are
+       entered in the tables.
+
+       used keeps track of how many table entries have been allocated from the
+       provided *table space.  It is checked when a LENS table is being made
+       against the space in *table, ENOUGH, minus the maximum space needed by
+       the worst case distance code, MAXD.  This should never happen, but the
+       sufficiency of ENOUGH has not been proven exhaustively, hence the check.
+       This assumes that when type == LENS, bits == 9.
+
+       sym increments through all symbols, and the loop terminates when
+       all codes of length max, i.e. all codes, have been processed.  This
+       routine permits incomplete codes, so another loop after this one fills
+       in the rest of the decoding tables with invalid code markers.
+     */
+
+    /* set up for code type */
+    switch (type) {
+    case CODES:
+        base = extra = work;    /* dummy value--not used */
+        end = 19;
+        break;
+    case LENS:
+        base = lbase;
+        base -= 257;
+        extra = lext;
+        extra -= 257;
+        end = 256;
+        break;
+    default:            /* DISTS */
+        base = dbase;
+        extra = dext;
+        end = -1;
+    }
+
+    /* initialize state for loop */
+    huff = 0;                   /* starting code */
+    sym = 0;                    /* starting code symbol */
+    len = min;                  /* starting code length */
+    next = *table;              /* current table to fill in */
+    curr = root;                /* current table index bits */
+    drop = 0;                   /* current bits to drop from code for index */
+    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
+    used = 1U << root;          /* use root table entries */
+    mask = used - 1;            /* mask for comparing low */
+
+    /* check available table space */
+    if (type == LENS && used >= ENOUGH - MAXD)
+        return 1;
+
+    /* process all codes and make table entries */
+    for (;;) {
+        /* create table entry */
+        this.bits = (unsigned char)(len - drop);
+        if ((int)(work[sym]) < end) {
+            this.op = (unsigned char)0;
+            this.val = work[sym];
+        }
+        else if ((int)(work[sym]) > end) {
+            this.op = (unsigned char)(extra[work[sym]]);
+            this.val = base[work[sym]];
+        }
+        else {
+            this.op = (unsigned char)(32 + 64);         /* end of block */
+            this.val = 0;
+        }
+
+        /* replicate for those indices with low len bits equal to huff */
+        incr = 1U << (len - drop);
+        fill = 1U << curr;
+        min = fill;                 /* save offset to next table */
+        do {
+            fill -= incr;
+            next[(huff >> drop) + fill] = this;
+        } while (fill != 0);
+
+        /* backwards increment the len-bit code huff */
+        incr = 1U << (len - 1);
+        while (huff & incr)
+            incr >>= 1;
+        if (incr != 0) {
+            huff &= incr - 1;
+            huff += incr;
+        }
+        else
+            huff = 0;
+
+        /* go to next symbol, update count, len */
+        sym++;
+        if (--(count[len]) == 0) {
+            if (len == max) break;
+            len = lens[work[sym]];
+        }
+
+        /* create new sub-table if needed */
+        if (len > root && (huff & mask) != low) {
+            /* if first time, transition to sub-tables */
+            if (drop == 0)
+                drop = root;
+
+            /* increment past last table */
+            next += min;            /* here min is 1 << curr */
+
+            /* determine length of next table */
+            curr = len - drop;
+            left = (int)(1 << curr);
+            while (curr + drop < max) {
+                left -= count[curr + drop];
+                if (left <= 0) break;
+                curr++;
+                left <<= 1;
+            }
+
+            /* check for enough space */
+            used += 1U << curr;
+            if (type == LENS && used >= ENOUGH - MAXD)
+                return 1;
+
+            /* point entry in root table to sub-table */
+            low = huff & mask;
+            (*table)[low].op = (unsigned char)curr;
+            (*table)[low].bits = (unsigned char)root;
+            (*table)[low].val = (unsigned short)(next - *table);
+        }
+    }
+
+    /*
+       Fill in rest of table for incomplete codes.  This loop is similar to the
+       loop above in incrementing huff for table indices.  It is assumed that
+       len is equal to curr + drop, so there is no loop needed to increment
+       through high index bits.  When the current sub-table is filled, the loop
+       drops back to the root table to fill in any remaining entries there.
+     */
+    this.op = (unsigned char)64;                /* invalid code marker */
+    this.bits = (unsigned char)(len - drop);
+    this.val = (unsigned short)0;
+    while (huff != 0) {
+        /* when done with sub-table, drop back to root table */
+        if (drop != 0 && (huff & mask) != low) {
+            drop = 0;
+            len = root;
+            next = *table;
+            this.bits = (unsigned char)len;
+        }
+
+        /* put invalid code marker in table */
+        next[huff >> drop] = this;
+
+        /* backwards increment the len-bit code huff */
+        incr = 1U << (len - 1);
+        while (huff & incr)
+            incr >>= 1;
+        if (incr != 0) {
+            huff &= incr - 1;
+            huff += incr;
+        }
+        else
+            huff = 0;
+    }
+
+    /* set return parameters */
+    *table += used;
+    *bits = root;
+    return 0;
+}
diff --git a/src/zlib/inftrees.h b/src/zlib/inftrees.h
new file mode 100644
index 0000000..b1104c8
--- /dev/null
+++ b/src/zlib/inftrees.h
@@ -0,0 +1,55 @@
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* Structure for decoding tables.  Each entry provides either the
+   information needed to do the operation requested by the code that
+   indexed that table entry, or it provides a pointer to another
+   table that indexes more bits of the code.  op indicates whether
+   the entry is a pointer to another table, a literal, a length or
+   distance, an end-of-block, or an invalid code.  For a table
+   pointer, the low four bits of op is the number of index bits of
+   that table.  For a length or distance, the low four bits of op
+   is the number of extra bits to get after the code.  bits is
+   the number of bits in this code or part of the code to drop off
+   of the bit buffer.  val is the actual byte to output in the case
+   of a literal, the base length or distance, or the offset from
+   the current table to the next table.  Each entry is four bytes. */
+typedef struct {
+    unsigned char op;           /* operation, extra bits, table bits */
+    unsigned char bits;         /* bits in this part of the code */
+    unsigned short val;         /* offset in table or code value */
+} code;
+
+/* op values as set by inflate_table():
+    00000000 - literal
+    0000tttt - table link, tttt != 0 is the number of table index bits
+    0001eeee - length or distance, eeee is the number of extra bits
+    01100000 - end of block
+    01000000 - invalid code
+ */
+
+/* Maximum size of dynamic tree.  The maximum found in a long but non-
+   exhaustive search was 1444 code structures (852 for length/literals
+   and 592 for distances, the latter actually the result of an
+   exhaustive search).  The true maximum is not known, but the value
+   below is more than safe. */
+#define ENOUGH 2048
+#define MAXD 592
+
+/* Type of code to build for inftable() */
+typedef enum {
+    CODES,
+    LENS,
+    DISTS
+} codetype;
+
+extern int inflate_table OF((codetype type, unsigned short FAR *lens,
+                             unsigned codes, code FAR * FAR *table,
+                             unsigned FAR *bits, unsigned short FAR *work));
diff --git a/src/zlib/trees.c b/src/zlib/trees.c
new file mode 100644
index 0000000..0fac24e
--- /dev/null
+++ b/src/zlib/trees.c
@@ -0,0 +1,1219 @@
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-2005 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process uses several Huffman trees. The more
+ *      common source values are represented by shorter bit sequences.
+ *
+ *      Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values).  The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ *      Storer, James A.
+ *          Data Compression:  Methods and Theory, pp. 49-50.
+ *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
+ *
+ *      Sedgewick, R.
+ *          Algorithms, p290.
+ *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* @(#) $Id: trees.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+/* #define GEN_TREES_H */
+
+#include "deflate.h"
+
+#ifdef DEBUG
+#  include <ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6      16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10    17
+/* repeat a zero length 3-10 times  (3 bits of repeat count) */
+
+#define REPZ_11_138  18
+/* repeat a zero length 11-138 times  (7 bits of repeat count) */
+
+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
+   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+local const uch bl_order[BL_CODES]
+   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+#define Buf_size (8 * 2*sizeof(char))
+/* Number of bits used within bi_buf. (bi_buf might be implemented on
+ * more than 16 bits on some systems.)
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+/* non ANSI compilers may not accept trees.h */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+uch _dist_code[DIST_CODE_LEN];
+/* Distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+uch _length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+#else
+#  include "trees.h"
+#endif /* GEN_TREES_H */
+
+struct static_tree_desc_s {
+    const ct_data *static_tree;  /* static tree or NULL */
+    const intf *extra_bits;      /* extra bits for each code or NULL */
+    int     extra_base;          /* base index for extra_bits */
+    int     elems;               /* max number of elements in the tree */
+    int     max_length;          /* max bit length for the codes */
+};
+
+local static_tree_desc  static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+local static_tree_desc  static_d_desc =
+{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
+
+local static_tree_desc  static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+local void tr_static_init OF((void));
+local void init_block     OF((deflate_state *s));
+local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
+local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
+local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
+local void build_tree     OF((deflate_state *s, tree_desc *desc));
+local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local int  build_bl_tree  OF((deflate_state *s));
+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
+                              int blcodes));
+local void compress_block OF((deflate_state *s, ct_data *ltree,
+                              ct_data *dtree));
+local void set_data_type  OF((deflate_state *s));
+local unsigned bi_reverse OF((unsigned value, int length));
+local void bi_windup      OF((deflate_state *s));
+local void bi_flush       OF((deflate_state *s));
+local void copy_block     OF((deflate_state *s, charf *buf, unsigned len,
+                              int header));
+
+#ifdef GEN_TREES_H
+local void gen_trees_header OF((void));
+#endif
+
+#ifndef DEBUG
+#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+   /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* DEBUG */
+#  define send_code(s, c, tree) \
+     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+       send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef DEBUG
+local void send_bits      OF((deflate_state *s, int value, int length));
+
+local void send_bits(s, value, length)
+    deflate_state *s;
+    int value;  /* value to send */
+    int length; /* number of bits */
+{
+    Tracevv((stderr," l %2d v %4x ", length, value));
+    Assert(length > 0 && length <= 15, "invalid length");
+    s->bits_sent += (ulg)length;
+
+    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+     * unused bits in value.
+     */
+    if (s->bi_valid > (int)Buf_size - length) {
+        s->bi_buf |= (value << s->bi_valid);
+        put_short(s, s->bi_buf);
+        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+        s->bi_valid += length - Buf_size;
+    } else {
+        s->bi_buf |= value << s->bi_valid;
+        s->bi_valid += length;
+    }
+}
+#else /* !DEBUG */
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+  if (s->bi_valid > (int)Buf_size - len) {\
+    int val = value;\
+    s->bi_buf |= (val << s->bi_valid);\
+    put_short(s, s->bi_buf);\
+    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+    s->bi_valid += len - Buf_size;\
+  } else {\
+    s->bi_buf |= (value) << s->bi_valid;\
+    s->bi_valid += len;\
+  }\
+}
+#endif /* DEBUG */
+
+
+/* the arguments must not have side effects */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables.
+ */
+local void tr_static_init()
+{
+#if defined(GEN_TREES_H) || !defined(STDC)
+    static int static_init_done = 0;
+    int n;        /* iterates over tree elements */
+    int bits;     /* bit counter */
+    int length;   /* length value */
+    int code;     /* code value */
+    int dist;     /* distance index */
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    if (static_init_done) return;
+
+    /* For some embedded targets, global variables are not initialized: */
+    static_l_desc.static_tree = static_ltree;
+    static_l_desc.extra_bits = extra_lbits;
+    static_d_desc.static_tree = static_dtree;
+    static_d_desc.extra_bits = extra_dbits;
+    static_bl_desc.extra_bits = extra_blbits;
+
+    /* Initialize the mapping length (0..255) -> length code (0..28) */
+    length = 0;
+    for (code = 0; code < LENGTH_CODES-1; code++) {
+        base_length[code] = length;
+        for (n = 0; n < (1<<extra_lbits[code]); n++) {
+            _length_code[length++] = (uch)code;
+        }
+    }
+    Assert (length == 256, "tr_static_init: length != 256");
+    /* Note that the length 255 (match length 258) can be represented
+     * in two different ways: code 284 + 5 bits or code 285, so we
+     * overwrite length_code[255] to use the best encoding:
+     */
+    _length_code[length-1] = (uch)code;
+
+    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+    dist = 0;
+    for (code = 0 ; code < 16; code++) {
+        base_dist[code] = dist;
+        for (n = 0; n < (1<<extra_dbits[code]); n++) {
+            _dist_code[dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: dist != 256");
+    dist >>= 7; /* from now on, all distances are divided by 128 */
+    for ( ; code < D_CODES; code++) {
+        base_dist[code] = dist << 7;
+        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+            _dist_code[256 + dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+    /* Construct the codes of the static literal tree */
+    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+    n = 0;
+    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+    /* Codes 286 and 287 do not exist, but we must include them in the
+     * tree construction to get a canonical Huffman tree (longest code
+     * all ones)
+     */
+    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+    /* The static distance tree is trivial: */
+    for (n = 0; n < D_CODES; n++) {
+        static_dtree[n].Len = 5;
+        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+    }
+    static_init_done = 1;
+
+#  ifdef GEN_TREES_H
+    gen_trees_header();
+#  endif
+#endif /* defined(GEN_TREES_H) || !defined(STDC) */
+}
+
+/* ===========================================================================
+ * Genererate the file trees.h describing the static trees.
+ */
+#ifdef GEN_TREES_H
+#  ifndef DEBUG
+#    include <stdio.h>
+#  endif
+
+#  define SEPARATOR(i, last, width) \
+      ((i) == (last)? "\n};\n\n" :    \
+       ((i) % (width) == (width)-1 ? ",\n" : ", "))
+
+void gen_trees_header()
+{
+    FILE *header = fopen("trees.h", "w");
+    int i;
+
+    Assert (header != NULL, "Can't open trees.h");
+    fprintf(header,
+            "/* header created automatically with -DGEN_TREES_H */\n\n");
+
+    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
+    for (i = 0; i < L_CODES+2; i++) {
+        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
+                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
+    }
+
+    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
+                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
+    }
+
+    fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
+    for (i = 0; i < DIST_CODE_LEN; i++) {
+        fprintf(header, "%2u%s", _dist_code[i],
+                SEPARATOR(i, DIST_CODE_LEN-1, 20));
+    }
+
+    fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
+        fprintf(header, "%2u%s", _length_code[i],
+                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
+    }
+
+    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
+    for (i = 0; i < LENGTH_CODES; i++) {
+        fprintf(header, "%1u%s", base_length[i],
+                SEPARATOR(i, LENGTH_CODES-1, 20));
+    }
+
+    fprintf(header, "local const int base_dist[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "%5u%s", base_dist[i],
+                SEPARATOR(i, D_CODES-1, 10));
+    }
+
+    fclose(header);
+}
+#endif /* GEN_TREES_H */
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void _tr_init(s)
+    deflate_state *s;
+{
+    tr_static_init();
+
+    s->l_desc.dyn_tree = s->dyn_ltree;
+    s->l_desc.stat_desc = &static_l_desc;
+
+    s->d_desc.dyn_tree = s->dyn_dtree;
+    s->d_desc.stat_desc = &static_d_desc;
+
+    s->bl_desc.dyn_tree = s->bl_tree;
+    s->bl_desc.stat_desc = &static_bl_desc;
+
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+    s->last_eob_len = 8; /* enough lookahead for inflate */
+#ifdef DEBUG
+    s->compressed_len = 0L;
+    s->bits_sent = 0L;
+#endif
+
+    /* Initialize the first block of the first file: */
+    init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(s)
+    deflate_state *s;
+{
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+    top = s->heap[SMALLEST]; \
+    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+    pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+   (tree[n].Freq < tree[m].Freq || \
+   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+local void pqdownheap(s, tree, k)
+    deflate_state *s;
+    ct_data *tree;  /* the tree to restore */
+    int k;               /* node to move down */
+{
+    int v = s->heap[k];
+    int j = k << 1;  /* left son of k */
+    while (j <= s->heap_len) {
+        /* Set j to the smallest of the two sons: */
+        if (j < s->heap_len &&
+            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+            j++;
+        }
+        /* Exit if v is smaller than both sons */
+        if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+        /* Exchange v with the smallest son */
+        s->heap[k] = s->heap[j];  k = j;
+
+        /* And continue down the tree, setting j to the left son of k */
+        j <<= 1;
+    }
+    s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ *    above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ *     array bl_count contains the frequencies for each bit length.
+ *     The length opt_len is updated; static_len is also updated if stree is
+ *     not null.
+ */
+local void gen_bitlen(s, desc)
+    deflate_state *s;
+    tree_desc *desc;    /* the tree descriptor */
+{
+    ct_data *tree        = desc->dyn_tree;
+    int max_code         = desc->max_code;
+    const ct_data *stree = desc->stat_desc->static_tree;
+    const intf *extra    = desc->stat_desc->extra_bits;
+    int base             = desc->stat_desc->extra_base;
+    int max_length       = desc->stat_desc->max_length;
+    int h;              /* heap index */
+    int n, m;           /* iterate over the tree elements */
+    int bits;           /* bit length */
+    int xbits;          /* extra bits */
+    ush f;              /* frequency */
+    int overflow = 0;   /* number of elements with bit length too large */
+
+    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+    /* In a first pass, compute the optimal bit lengths (which may
+     * overflow in the case of the bit length tree).
+     */
+    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+        n = s->heap[h];
+        bits = tree[tree[n].Dad].Len + 1;
+        if (bits > max_length) bits = max_length, overflow++;
+        tree[n].Len = (ush)bits;
+        /* We overwrite tree[n].Dad which is no longer needed */
+
+        if (n > max_code) continue; /* not a leaf node */
+
+        s->bl_count[bits]++;
+        xbits = 0;
+        if (n >= base) xbits = extra[n-base];
+        f = tree[n].Freq;
+        s->opt_len += (ulg)f * (bits + xbits);
+        if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+    }
+    if (overflow == 0) return;
+
+    Trace((stderr,"\nbit length overflow\n"));
+    /* This happens for example on obj2 and pic of the Calgary corpus */
+
+    /* Find the first bit length which could increase: */
+    do {
+        bits = max_length-1;
+        while (s->bl_count[bits] == 0) bits--;
+        s->bl_count[bits]--;      /* move one leaf down the tree */
+        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+        s->bl_count[max_length]--;
+        /* The brother of the overflow item also moves one step up,
+         * but this does not affect bl_count[max_length]
+         */
+        overflow -= 2;
+    } while (overflow > 0);
+
+    /* Now recompute all bit lengths, scanning in increasing frequency.
+     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+     * lengths instead of fixing only the wrong ones. This idea is taken
+     * from 'ar' written by Haruhiko Okumura.)
+     */
+    for (bits = max_length; bits != 0; bits--) {
+        n = s->bl_count[bits];
+        while (n != 0) {
+            m = s->heap[--h];
+            if (m > max_code) continue;
+            if ((unsigned) tree[m].Len != (unsigned) bits) {
+                Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((long)bits - (long)tree[m].Len)
+                              *(long)tree[m].Freq;
+                tree[m].Len = (ush)bits;
+            }
+            n--;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes (tree, max_code, bl_count)
+    ct_data *tree;             /* the tree to decorate */
+    int max_code;              /* largest code with non zero frequency */
+    ushf *bl_count;            /* number of codes at each bit length */
+{
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    ush code = 0;              /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        next_code[bits] = code = (code + bl_count[bits-1]) << 1;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = bi_reverse(next_code[len]++, len);
+
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+    }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ *     and corresponding code. The length opt_len is updated; static_len is
+ *     also updated if stree is not null. The field max_code is set.
+ */
+local void build_tree(s, desc)
+    deflate_state *s;
+    tree_desc *desc; /* the tree descriptor */
+{
+    ct_data *tree         = desc->dyn_tree;
+    const ct_data *stree  = desc->stat_desc->static_tree;
+    int elems             = desc->stat_desc->elems;
+    int n, m;          /* iterate over heap elements */
+    int max_code = -1; /* largest code with non zero frequency */
+    int node;          /* new node being created */
+
+    /* Construct the initial heap, with least frequent element in
+     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+     * heap[0] is not used.
+     */
+    s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+    for (n = 0; n < elems; n++) {
+        if (tree[n].Freq != 0) {
+            s->heap[++(s->heap_len)] = max_code = n;
+            s->depth[n] = 0;
+        } else {
+            tree[n].Len = 0;
+        }
+    }
+
+    /* The pkzip format requires that at least one distance code exists,
+     * and that at least one bit should be sent even if there is only one
+     * possible code. So to avoid special checks later on we force at least
+     * two codes of non zero frequency.
+     */
+    while (s->heap_len < 2) {
+        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+        tree[node].Freq = 1;
+        s->depth[node] = 0;
+        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+        /* node is 0 or 1 so it does not have extra bits */
+    }
+    desc->max_code = max_code;
+
+    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+     * establish sub-heaps of increasing lengths:
+     */
+    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+    /* Construct the Huffman tree by repeatedly combining the least two
+     * frequent nodes.
+     */
+    node = elems;              /* next internal node of the tree */
+    do {
+        pqremove(s, tree, n);  /* n = node of least frequency */
+        m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+        s->heap[--(s->heap_max)] = m;
+
+        /* Create a new node father of n and m */
+        tree[node].Freq = tree[n].Freq + tree[m].Freq;
+        s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
+                                s->depth[n] : s->depth[m]) + 1);
+        tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+        if (tree == s->bl_tree) {
+            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+        }
+#endif
+        /* and insert the new node in the heap */
+        s->heap[SMALLEST] = node++;
+        pqdownheap(s, tree, SMALLEST);
+
+    } while (s->heap_len >= 2);
+
+    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+    /* At this point, the fields freq and dad are set. We can now
+     * generate the bit lengths.
+     */
+    gen_bitlen(s, (tree_desc *)desc);
+
+    /* The field len is now set, we can generate the bit codes */
+    gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+local void scan_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree;   /* the tree to be scanned */
+    int max_code;    /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    if (nextlen == 0) max_count = 138, min_count = 3;
+    tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            s->bl_tree[curlen].Freq += count;
+        } else if (curlen != 0) {
+            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+            s->bl_tree[REP_3_6].Freq++;
+        } else if (count <= 10) {
+            s->bl_tree[REPZ_3_10].Freq++;
+        } else {
+            s->bl_tree[REPZ_11_138].Freq++;
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+local void send_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree; /* the tree to be scanned */
+    int max_code;       /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    /* tree[max_code+1].Len = -1; */  /* guard already set */
+    if (nextlen == 0) max_count = 138, min_count = 3;
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+        } else if (curlen != 0) {
+            if (curlen != prevlen) {
+                send_code(s, curlen, s->bl_tree); count--;
+            }
+            Assert(count >= 3 && count <= 6, " 3_6?");
+            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+        } else if (count <= 10) {
+            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+        } else {
+            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+local int build_bl_tree(s)
+    deflate_state *s;
+{
+    int max_blindex;  /* index of last bit length code of non zero freq */
+
+    /* Determine the bit length frequencies for literal and distance trees */
+    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+    /* Build the bit length tree: */
+    build_tree(s, (tree_desc *)(&(s->bl_desc)));
+    /* opt_len now includes the length of the tree representations, except
+     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+     */
+
+    /* Determine the number of bit length codes to send. The pkzip format
+     * requires that at least 4 bit length codes be sent. (appnote.txt says
+     * 3 but the actual value used is 4.)
+     */
+    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+    }
+    /* Update opt_len to include the bit length tree and counts */
+    s->opt_len += 3*(max_blindex+1) + 5+5+4;
+    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+            s->opt_len, s->static_len));
+
+    return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+local void send_all_trees(s, lcodes, dcodes, blcodes)
+    deflate_state *s;
+    int lcodes, dcodes, blcodes; /* number of codes for each tree */
+{
+    int rank;                    /* index in bl_order */
+
+    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+            "too many codes");
+    Tracev((stderr, "\nbl counts: "));
+    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+    send_bits(s, dcodes-1,   5);
+    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
+    for (rank = 0; rank < blcodes; rank++) {
+        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+    }
+    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void _tr_stored_block(s, buf, stored_len, eof)
+    deflate_state *s;
+    charf *buf;       /* input block */
+    ulg stored_len;   /* length of input block */
+    int eof;          /* true if this is the last block for a file */
+{
+    send_bits(s, (STORED_BLOCK<<1)+eof, 3);  /* send block type */
+#ifdef DEBUG
+    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+    s->compressed_len += (stored_len + 4) << 3;
+#endif
+    copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
+}
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ * The current inflate code requires 9 bits of lookahead. If the
+ * last two codes for the previous block (real code plus EOB) were coded
+ * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
+ * the last real code. In this case we send two empty static blocks instead
+ * of one. (There are no problems if the previous block is stored or fixed.)
+ * To simplify the code, we assume the worst case of last real code encoded
+ * on one bit only.
+ */
+void _tr_align(s)
+    deflate_state *s;
+{
+    send_bits(s, STATIC_TREES<<1, 3);
+    send_code(s, END_BLOCK, static_ltree);
+#ifdef DEBUG
+    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+#endif
+    bi_flush(s);
+    /* Of the 10 bits for the empty block, we have already sent
+     * (10 - bi_valid) bits. The lookahead for the last real code (before
+     * the EOB of the previous block) was thus at least one plus the length
+     * of the EOB plus what we have just sent of the empty static block.
+     */
+    if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
+        send_bits(s, STATIC_TREES<<1, 3);
+        send_code(s, END_BLOCK, static_ltree);
+#ifdef DEBUG
+        s->compressed_len += 10L;
+#endif
+        bi_flush(s);
+    }
+    s->last_eob_len = 7;
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and output the encoded block to the zip file.
+ */
+void _tr_flush_block(s, buf, stored_len, eof)
+    deflate_state *s;
+    charf *buf;       /* input block, or NULL if too old */
+    ulg stored_len;   /* length of input block */
+    int eof;          /* true if this is the last block for a file */
+{
+    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+    int max_blindex = 0;  /* index of last bit length code of non zero freq */
+
+    /* Build the Huffman trees unless a stored block is forced */
+    if (s->level > 0) {
+
+        /* Check if the file is binary or text */
+        if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN)
+            set_data_type(s);
+
+        /* Construct the literal and distance trees */
+        build_tree(s, (tree_desc *)(&(s->l_desc)));
+        Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+
+        build_tree(s, (tree_desc *)(&(s->d_desc)));
+        Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+        /* At this point, opt_len and static_len are the total bit lengths of
+         * the compressed block data, excluding the tree representations.
+         */
+
+        /* Build the bit length tree for the above two trees, and get the index
+         * in bl_order of the last bit length code to send.
+         */
+        max_blindex = build_bl_tree(s);
+
+        /* Determine the best encoding. Compute the block lengths in bytes. */
+        opt_lenb = (s->opt_len+3+7)>>3;
+        static_lenb = (s->static_len+3+7)>>3;
+
+        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+                s->last_lit));
+
+        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+    } else {
+        Assert(buf != (char*)0, "lost buf");
+        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+    }
+
+#ifdef FORCE_STORED
+    if (buf != (char*)0) { /* force stored block */
+#else
+    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+                       /* 4: two words for the lengths */
+#endif
+        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+         * Otherwise we can't have processed more than WSIZE input bytes since
+         * the last block flush, because compression would have been
+         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+         * transform a block into a stored block.
+         */
+        _tr_stored_block(s, buf, stored_len, eof);
+
+#ifdef FORCE_STATIC
+    } else if (static_lenb >= 0) { /* force static trees */
+#else
+    } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+#endif
+        send_bits(s, (STATIC_TREES<<1)+eof, 3);
+        compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
+#ifdef DEBUG
+        s->compressed_len += 3 + s->static_len;
+#endif
+    } else {
+        send_bits(s, (DYN_TREES<<1)+eof, 3);
+        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+                       max_blindex+1);
+        compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
+#ifdef DEBUG
+        s->compressed_len += 3 + s->opt_len;
+#endif
+    }
+    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+    /* The above check is made mod 2^32, for files larger than 512 MB
+     * and uLong implemented on 32 bits.
+     */
+    init_block(s);
+
+    if (eof) {
+        bi_windup(s);
+#ifdef DEBUG
+        s->compressed_len += 7;  /* align on byte boundary */
+#endif
+    }
+    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+           s->compressed_len-7*eof));
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int _tr_tally (s, dist, lc)
+    deflate_state *s;
+    unsigned dist;  /* distance of matched string */
+    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
+{
+    s->d_buf[s->last_lit] = (ush)dist;
+    s->l_buf[s->last_lit++] = (uch)lc;
+    if (dist == 0) {
+        /* lc is the unmatched char */
+        s->dyn_ltree[lc].Freq++;
+    } else {
+        s->matches++;
+        /* Here, lc is the match length - MIN_MATCH */
+        dist--;             /* dist = match distance - 1 */
+        Assert((ush)dist < (ush)MAX_DIST(s) &&
+               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
+
+        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+        s->dyn_dtree[d_code(dist)].Freq++;
+    }
+
+#ifdef TRUNCATE_BLOCK
+    /* Try to guess if it is profitable to stop the current block here */
+    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
+        /* Compute an upper bound for the compressed length */
+        ulg out_length = (ulg)s->last_lit*8L;
+        ulg in_length = (ulg)((long)s->strstart - s->block_start);
+        int dcode;
+        for (dcode = 0; dcode < D_CODES; dcode++) {
+            out_length += (ulg)s->dyn_dtree[dcode].Freq *
+                (5L+extra_dbits[dcode]);
+        }
+        out_length >>= 3;
+        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+               s->last_lit, in_length, out_length,
+               100L - out_length*100L/in_length));
+        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+    }
+#endif
+    return (s->last_lit == s->lit_bufsize-1);
+    /* We avoid equality with lit_bufsize because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(s, ltree, dtree)
+    deflate_state *s;
+    ct_data *ltree; /* literal tree */
+    ct_data *dtree; /* distance tree */
+{
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned lx = 0;    /* running index in l_buf */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->last_lit != 0) do {
+        dist = s->d_buf[lx];
+        lc = s->l_buf[lx++];
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code+LITERALS+1, ltree); /* send the length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
+               "pendingBuf overflow");
+
+    } while (lx < s->last_lit);
+
+    send_code(s, END_BLOCK, ltree);
+    s->last_eob_len = ltree[END_BLOCK].Len;
+}
+
+/* ===========================================================================
+ * Set the data type to BINARY or TEXT, using a crude approximation:
+ * set it to Z_TEXT if all symbols are either printable characters (33 to 255)
+ * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise.
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local void set_data_type(s)
+    deflate_state *s;
+{
+    int n;
+
+    for (n = 0; n < 9; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            break;
+    if (n == 9)
+        for (n = 14; n < 32; n++)
+            if (s->dyn_ltree[n].Freq != 0)
+                break;
+    s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY;
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local unsigned bi_reverse(code, len)
+    unsigned code; /* the value to invert */
+    int len;       /* its bit length */
+{
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(s)
+    deflate_state *s;
+{
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(s)
+    deflate_state *s;
+{
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef DEBUG
+    s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
+
+/* ===========================================================================
+ * Copy a stored block, storing first the length and its
+ * one's complement if requested.
+ */
+local void copy_block(s, buf, len, header)
+    deflate_state *s;
+    charf    *buf;    /* the input data */
+    unsigned len;     /* its length */
+    int      header;  /* true if block header must be written */
+{
+    bi_windup(s);        /* align on byte boundary */
+    s->last_eob_len = 8; /* enough lookahead for inflate */
+
+    if (header) {
+        put_short(s, (ush)len);
+        put_short(s, (ush)~len);
+#ifdef DEBUG
+        s->bits_sent += 2*16;
+#endif
+    }
+#ifdef DEBUG
+    s->bits_sent += (ulg)len<<3;
+#endif
+    while (len--) {
+        put_byte(s, *buf++);
+    }
+}
diff --git a/src/zlib/trees.h b/src/zlib/trees.h
new file mode 100644
index 0000000..72facf9
--- /dev/null
+++ b/src/zlib/trees.h
@@ -0,0 +1,128 @@
+/* header created automatically with -DGEN_TREES_H */
+
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
+{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
+{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
+{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
+{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
+{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
+{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
+{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
+{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
+{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
+{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
+{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
+{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
+{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
+{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
+{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
+{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
+{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
+{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
+{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
+{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
+{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
+{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
+{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
+{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
+{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
+{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
+{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
+{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
+{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
+{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
+{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
+{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
+{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
+{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
+{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
+{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
+{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
+{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
+{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
+{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
+{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
+{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
+{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
+{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
+{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
+{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
+{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
+{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
+{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
+{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
+{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
+{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
+{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
+{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
+{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
+{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
+{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
+};
+
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+};
+
+const uch _dist_code[DIST_CODE_LEN] = {
+ 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+ 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
+};
+
+local const int base_dist[D_CODES] = {
+    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+   32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
+ 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+};
+
diff --git a/src/zlib/uncompr.c b/src/zlib/uncompr.c
new file mode 100644
index 0000000..bf10000
--- /dev/null
+++ b/src/zlib/uncompr.c
@@ -0,0 +1,61 @@
+/* uncompr.c -- decompress a memory buffer
+ * Copyright (C) 1995-2003 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id: uncompr.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+/* ===========================================================================
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be large enough to hold the
+   entire uncompressed data. (The size of the uncompressed data must have
+   been saved previously by the compressor and transmitted to the decompressor
+   by some mechanism outside the scope of this compression library.)
+   Upon exit, destLen is the actual size of the compressed buffer.
+     This function can be used to decompress a whole file at once if the
+   input file is mmap'ed.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted.
+*/
+int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    z_stream stream;
+    int err;
+
+    stream.next_in = (Bytef*)source;
+    stream.avail_in = (uInt)sourceLen;
+    /* Check for source > 64K on 16-bit machine: */
+    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
+
+    stream.next_out = dest;
+    stream.avail_out = (uInt)*destLen;
+    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+
+    err = inflateInit(&stream);
+    if (err != Z_OK) return err;
+
+    err = inflate(&stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        inflateEnd(&stream);
+        if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0))
+            return Z_DATA_ERROR;
+        return err;
+    }
+    *destLen = stream.total_out;
+
+    err = inflateEnd(&stream);
+    return err;
+}
diff --git a/src/zlib/zconf.h b/src/zlib/zconf.h
new file mode 100644
index 0000000..1e66402
--- /dev/null
+++ b/src/zlib/zconf.h
@@ -0,0 +1,332 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id: zconf.h,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ */
+#ifdef Z_PREFIX
+#  define deflateInit_          z_deflateInit_
+#  define deflate               z_deflate
+#  define deflateEnd            z_deflateEnd
+#  define inflateInit_          z_inflateInit_
+#  define inflate               z_inflate
+#  define inflateEnd            z_inflateEnd
+#  define deflateInit2_         z_deflateInit2_
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateCopy           z_deflateCopy
+#  define deflateReset          z_deflateReset
+#  define deflateParams         z_deflateParams
+#  define deflateBound          z_deflateBound
+#  define deflatePrime          z_deflatePrime
+#  define inflateInit2_         z_inflateInit2_
+#  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateSync           z_inflateSync
+#  define inflateSyncPoint      z_inflateSyncPoint
+#  define inflateCopy           z_inflateCopy
+#  define inflateReset          z_inflateReset
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define compress              z_compress
+#  define compress2             z_compress2
+#  define compressBound         z_compressBound
+#  define uncompress            z_uncompress
+#  define adler32               z_adler32
+#  define crc32                 z_crc32
+#  define get_crc_table         z_get_crc_table
+#  define zError                z_zError
+
+#  define alloc_func            z_alloc_func
+#  define free_func             z_free_func
+#  define in_func               z_in_func
+#  define out_func              z_out_func
+#  define Byte                  z_Byte
+#  define uInt                  z_uInt
+#  define uLong                 z_uLong
+#  define Bytef                 z_Bytef
+#  define charf                 z_charf
+#  define intf                  z_intf
+#  define uIntf                 z_uIntf
+#  define uLongf                z_uLongf
+#  define voidpf                z_voidpf
+#  define voidp                 z_voidp
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+#  define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+#  define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+#  ifndef WIN32
+#    define WIN32
+#  endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+#    ifndef SYS16BIT
+#      define SYS16BIT
+#    endif
+#  endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+#  ifndef STDC
+#    define STDC
+#  endif
+#  if __STDC_VERSION__ >= 199901L
+#    ifndef STDC99
+#      define STDC99
+#    endif
+#  endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+#  define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
+#  define STDC
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const       /* note: need a more gentle solution here */
+#  endif
+#endif
+
+/* Some Mac compilers merge all .h files incorrectly: */
+#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__)
+#  define NO_DUMMY_DECL
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  ifdef ZLIB_DLL
+#    ifdef ZLIB_INTERNAL
+#      define ZEXPORT   __declspec(dllexport)
+#      define ZEXPORTVA __declspec(dllexport)
+#    else
+#      define ZEXPORT   __declspec(dllimport)
+#      define ZEXPORTVA __declspec(dllimport)
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if 0           /* HAVE_UNISTD_H -- this line is updated by ./configure */
+#  include <sys/types.h> /* for off_t */
+#  include <unistd.h>    /* for SEEK_* and off_t */
+#  ifdef VMS
+#    include <unixio.h>   /* for off_t */
+#  endif
+#  define z_off_t off_t
+#endif
+#ifndef SEEK_SET
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+#ifndef z_off_t
+#  define z_off_t long
+#endif
+
+#if defined(__OS400__)
+#  define NO_vsnprintf
+#endif
+
+#if defined(__MVS__)
+#  define NO_vsnprintf
+#  ifdef FAR
+#    undef FAR
+#  endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+#   pragma map(deflateInit_,"DEIN")
+#   pragma map(deflateInit2_,"DEIN2")
+#   pragma map(deflateEnd,"DEEND")
+#   pragma map(deflateBound,"DEBND")
+#   pragma map(inflateInit_,"ININ")
+#   pragma map(inflateInit2_,"ININ2")
+#   pragma map(inflateEnd,"INEND")
+#   pragma map(inflateSync,"INSY")
+#   pragma map(inflateSetDictionary,"INSEDI")
+#   pragma map(compressBound,"CMBND")
+#   pragma map(inflate_table,"INTABL")
+#   pragma map(inflate_fast,"INFA")
+#   pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/src/zlib/zlib.h b/src/zlib/zlib.h
new file mode 100644
index 0000000..0228179
--- /dev/null
+++ b/src/zlib/zlib.h
@@ -0,0 +1,1357 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.2.3, July 18th, 2005
+
+  Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+
+  The data format used by the zlib library is described by RFCs (Request for
+  Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt
+  (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.3"
+#define ZLIB_VERNUM 0x1230
+
+/*
+     The 'zlib' compression library provides in-memory compression and
+  decompression functions, including integrity checks of the uncompressed
+  data.  This version of the library supports only one compression method
+  (deflation) but other algorithms will be added later and will have the same
+  stream interface.
+
+     Compression can be done in a single step if the buffers are large
+  enough (for example if an input file is mmap'ed), or can be done by
+  repeated calls of the compression function.  In the latter case, the
+  application must provide more input and/or consume the output
+  (providing more output space) before each call.
+
+     The compressed data format used by default by the in-memory functions is
+  the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+  around a deflate stream, which is itself documented in RFC 1951.
+
+     The library also supports reading and writing files in gzip (.gz) format
+  with an interface similar to that of stdio using the functions that start
+  with "gz".  The gzip format is different from the zlib format.  gzip is a
+  gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+     This library can optionally read and write gzip streams in memory as well.
+
+     The zlib format was designed to be compact and fast for use in memory
+  and on communications channels.  The gzip format was designed for single-
+  file compression on file systems, has a larger header than zlib to maintain
+  directory information, and uses a different, slower check method than zlib.
+
+     The library does not install any signal handler. The decoder checks
+  the consistency of the compressed data, so the library should never
+  crash even in case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    Bytef    *next_in;  /* next input byte */
+    uInt     avail_in;  /* number of bytes available at next_in */
+    uLong    total_in;  /* total nb of input bytes read so far */
+
+    Bytef    *next_out; /* next output byte should be put there */
+    uInt     avail_out; /* remaining free space at next_out */
+    uLong    total_out; /* total nb of bytes output so far */
+
+    char     *msg;      /* last error message, NULL if no error */
+    struct internal_state FAR *state; /* not visible by applications */
+
+    alloc_func zalloc;  /* used to allocate the internal state */
+    free_func  zfree;   /* used to free the internal state */
+    voidpf     opaque;  /* private data object passed to zalloc and zfree */
+
+    int     data_type;  /* best guess about the data type: binary or text */
+    uLong   adler;      /* adler32 value of the uncompressed data */
+    uLong   reserved;   /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+     gzip header information passed to and from zlib routines.  See RFC 1952
+  for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+    int     text;       /* true if compressed data believed to be text */
+    uLong   time;       /* modification time */
+    int     xflags;     /* extra flags (not used when writing a gzip file) */
+    int     os;         /* operating system */
+    Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
+    uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
+    uInt    extra_max;  /* space at extra (only when reading header) */
+    Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
+    uInt    name_max;   /* space at name (only when reading header) */
+    Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
+    uInt    comm_max;   /* space at comment (only when reading header) */
+    int     hcrc;       /* true if there was or will be a header crc */
+    int     done;       /* true when done reading gzip header (not used
+                           when writing a gzip file) */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+   The application must update next_in and avail_in when avail_in has
+   dropped to zero. It must update next_out and avail_out when avail_out
+   has dropped to zero. The application must initialize zalloc, zfree and
+   opaque before calling the init function. All other fields are set by the
+   compression library and must not be updated by the application.
+
+   The opaque value provided by the application will be passed as the first
+   parameter for calls of zalloc and zfree. This can be useful for custom
+   memory management. The compression library attaches no meaning to the
+   opaque value.
+
+   zalloc must return Z_NULL if there is not enough memory for the object.
+   If zlib is used in a multi-threaded application, zalloc and zfree must be
+   thread safe.
+
+   On 16-bit systems, the functions zalloc and zfree must be able to allocate
+   exactly 65536 bytes, but will not be required to allocate more than this
+   if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
+   pointers returned by zalloc for objects of exactly 65536 bytes *must*
+   have their offset normalized to zero. The default allocation function
+   provided by this library ensures this (see zutil.c). To reduce memory
+   requirements and avoid any allocation of 64K objects, at the expense of
+   compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
+
+   The fields total_in and total_out can be used for statistics or
+   progress reports. After compression, total_in holds the total size of
+   the uncompressed data and may be saved for use in the decompressor
+   (particularly if the decompressor wants to decompress everything in
+   a single step).
+*/
+
+                        /* constants */
+
+#define Z_NO_FLUSH      0
+#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
+#define Z_SYNC_FLUSH    2
+#define Z_FULL_FLUSH    3
+#define Z_FINISH        4
+#define Z_BLOCK         5
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK            0
+#define Z_STREAM_END    1
+#define Z_NEED_DICT     2
+#define Z_ERRNO        (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR   (-3)
+#define Z_MEM_ERROR    (-4)
+#define Z_BUF_ERROR    (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative
+ * values are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION         0
+#define Z_BEST_SPEED             1
+#define Z_BEST_COMPRESSION       9
+#define Z_DEFAULT_COMPRESSION  (-1)
+/* compression levels */
+
+#define Z_FILTERED            1
+#define Z_HUFFMAN_ONLY        2
+#define Z_RLE                 3
+#define Z_FIXED               4
+#define Z_DEFAULT_STRATEGY    0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY   0
+#define Z_TEXT     1
+#define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN  2
+/* Possible values of the data_type field (though see inflate()) */
+
+#define Z_DEFLATED   8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+                        /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+   If the first character differs, the library code actually used is
+   not compatible with the zlib.h header file used by the application.
+   This check is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+     Initializes the internal stream state for compression. The fields
+   zalloc, zfree and opaque must be initialized before by the caller.
+   If zalloc and zfree are set to Z_NULL, deflateInit updates them to
+   use default allocation functions.
+
+     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+   1 gives best speed, 9 gives best compression, 0 gives no compression at
+   all (the input data is simply copied a block at a time).
+   Z_DEFAULT_COMPRESSION requests a default compromise between speed and
+   compression (currently equivalent to level 6).
+
+     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if level is not a valid compression level,
+   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+   with the version assumed by the caller (ZLIB_VERSION).
+   msg is set to null if there is no error message.  deflateInit does not
+   perform any compression: this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+    deflate compresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full. It may introduce some
+  output latency (reading input without producing any output) except when
+  forced to flush.
+
+    The detailed semantics are as follows. deflate performs one or both of the
+  following actions:
+
+  - Compress more input starting at next_in and update next_in and avail_in
+    accordingly. If not all input can be processed (because there is not
+    enough room in the output buffer), next_in and avail_in are updated and
+    processing will resume at this point for the next call of deflate().
+
+  - Provide more output starting at next_out and update next_out and avail_out
+    accordingly. This action is forced if the parameter flush is non zero.
+    Forcing flush frequently degrades the compression ratio, so this parameter
+    should be set only when necessary (in interactive applications).
+    Some output may be provided even if flush is not set.
+
+  Before the call of deflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming
+  more output, and updating avail_in or avail_out accordingly; avail_out
+  should never be zero before the call. The application can consume the
+  compressed output when it wants, for example when the output buffer is full
+  (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
+  and with zero avail_out, it must be called again after making room in the
+  output buffer because there might be more output pending.
+
+    Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+  decide how much data to accumualte before producing output, in order to
+  maximize compression.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+  flushed to the output buffer and the output is aligned on a byte boundary, so
+  that the decompressor can get all input data available so far. (In particular
+  avail_in is zero after the call if enough output space has been provided
+  before the call.)  Flushing may degrade compression for some compression
+  algorithms and so it should be used only when necessary.
+
+    If flush is set to Z_FULL_FLUSH, all output is flushed as with
+  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+  restart from this point if previous compressed data has been damaged or if
+  random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
+  compression.
+
+    If deflate returns with avail_out == 0, this function must be called again
+  with the same value of the flush parameter and more output space (updated
+  avail_out), until the flush is complete (deflate returns with non-zero
+  avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+  avail_out is greater than six to avoid repeated flush markers due to
+  avail_out == 0 on return.
+
+    If the parameter flush is set to Z_FINISH, pending input is processed,
+  pending output is flushed and deflate returns with Z_STREAM_END if there
+  was enough output space; if deflate returns with Z_OK, this function must be
+  called again with Z_FINISH and more output space (updated avail_out) but no
+  more input data, until it returns with Z_STREAM_END or an error. After
+  deflate has returned Z_STREAM_END, the only possible operations on the
+  stream are deflateReset or deflateEnd.
+
+    Z_FINISH can be used immediately after deflateInit if all the compression
+  is to be done in a single step. In this case, avail_out must be at least
+  the value returned by deflateBound (see below). If deflate does not return
+  Z_STREAM_END, then it must be called again as described above.
+
+    deflate() sets strm->adler to the adler32 checksum of all input read
+  so far (that is, total_in bytes).
+
+    deflate() may update strm->data_type if it can make a good guess about
+  the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
+  binary. This field is only for information purposes and does not affect
+  the compression algorithm in any manner.
+
+    deflate() returns Z_OK if some progress has been made (more input
+  processed or more output produced), Z_STREAM_END if all input has been
+  consumed and all output has been produced (only when flush is set to
+  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+  if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
+  (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
+  fatal, and deflate() can be called again with more input and more output
+  space to continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any
+   pending output.
+
+     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+   prematurely (some input or output was discarded). In the error case,
+   msg may be set but then points to a static string (which must not be
+   deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+     Initializes the internal stream state for decompression. The fields
+   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+   the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
+   value depends on the compression method), inflateInit determines the
+   compression method from the zlib header and allocates all data structures
+   accordingly; otherwise the allocation will be deferred to the first call of
+   inflate.  If zalloc and zfree are set to Z_NULL, inflateInit updates them to
+   use default allocation functions.
+
+     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller.  msg is set to null if there is no error
+   message. inflateInit does not perform any decompression apart from reading
+   the zlib header if present: this will be done by inflate().  (So next_in and
+   avail_in may be modified, but next_out and avail_out are unchanged.)
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+    inflate decompresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full. It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+  The detailed semantics are as follows. inflate performs one or both of the
+  following actions:
+
+  - Decompress more input starting at next_in and update next_in and avail_in
+    accordingly. If not all input can be processed (because there is not
+    enough room in the output buffer), next_in is updated and processing
+    will resume at this point for the next call of inflate().
+
+  - Provide more output starting at next_out and update next_out and avail_out
+    accordingly.  inflate() provides as much output as possible, until there
+    is no more input data or no more space in the output buffer (see below
+    about the flush parameter).
+
+  Before the call of inflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming
+  more output, and updating the next_* and avail_* values accordingly.
+  The application can consume the uncompressed output when it wants, for
+  example when the output buffer is full (avail_out == 0), or after each
+  call of inflate(). If inflate returns Z_OK and with zero avail_out, it
+  must be called again after making room in the output buffer because there
+  might be more output pending.
+
+    The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH,
+  Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much
+  output as possible to the output buffer. Z_BLOCK requests that inflate() stop
+  if and when it gets to the next deflate block boundary. When decoding the
+  zlib or gzip format, this will cause inflate() to return immediately after
+  the header and before the first block. When doing a raw inflate, inflate()
+  will go ahead and process the first block, and will return when it gets to
+  the end of that block, or when it runs out of data.
+
+    The Z_BLOCK option assists in appending to or combining deflate streams.
+  Also to assist in this, on return inflate() will set strm->data_type to the
+  number of unused bits in the last byte taken from strm->next_in, plus 64
+  if inflate() is currently decoding the last block in the deflate stream,
+  plus 128 if inflate() returned immediately after decoding an end-of-block
+  code or decoding the complete header up to just before the first byte of the
+  deflate stream. The end-of-block will not be indicated until all of the
+  uncompressed data from that block has been written to strm->next_out.  The
+  number of unused bits may in general be greater than seven, except when
+  bit 7 of data_type is set, in which case the number of unused bits will be
+  less than eight.
+
+    inflate() should normally be called until it returns Z_STREAM_END or an
+  error. However if all decompression is to be performed in a single step
+  (a single call of inflate), the parameter flush should be set to
+  Z_FINISH. In this case all pending input is processed and all pending
+  output is flushed; avail_out must be large enough to hold all the
+  uncompressed data. (The size of the uncompressed data may have been saved
+  by the compressor for this purpose.) The next operation on this stream must
+  be inflateEnd to deallocate the decompression state. The use of Z_FINISH
+  is never required, but can be used to inform inflate that a faster approach
+  may be used for the single inflate() call.
+
+     In this implementation, inflate() always flushes as much output as
+  possible to the output buffer, and always uses the faster approach on the
+  first call. So the only effect of the flush parameter in this implementation
+  is on the return value of inflate(), as noted below, or when it returns early
+  because Z_BLOCK is used.
+
+     If a preset dictionary is needed after this call (see inflateSetDictionary
+  below), inflate sets strm->adler to the adler32 checksum of the dictionary
+  chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+  strm->adler to the adler32 checksum of all output produced so far (that is,
+  total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+  below. At the end of the stream, inflate() checks that its computed adler32
+  checksum is equal to that saved by the compressor and returns Z_STREAM_END
+  only if the checksum is correct.
+
+    inflate() will decompress and check either zlib-wrapped or gzip-wrapped
+  deflate data.  The header type is detected automatically.  Any information
+  contained in the gzip header is not retained, so applications that need that
+  information should instead use raw inflate, see inflateInit2() below, or
+  inflateBack() and perform their own processing of the gzip header and
+  trailer.
+
+    inflate() returns Z_OK if some progress has been made (more input processed
+  or more output produced), Z_STREAM_END if the end of the compressed data has
+  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+  corrupted (input stream not conforming to the zlib format or incorrect check
+  value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+  if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory,
+  Z_BUF_ERROR if no progress is possible or if there was not enough room in the
+  output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
+  inflate() can be called again with more input and more output space to
+  continue decompressing. If Z_DATA_ERROR is returned, the application may then
+  call inflateSync() to look for a good compression block if a partial recovery
+  of the data is desired.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any
+   pending output.
+
+     inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+   was inconsistent. In the error case, msg may be set but then points to a
+   static string (which must not be deallocated).
+*/
+
+                        /* Advanced functions */
+
+/*
+    The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+                                     int  level,
+                                     int  method,
+                                     int  windowBits,
+                                     int  memLevel,
+                                     int  strategy));
+
+     This is another version of deflateInit with more compression options. The
+   fields next_in, zalloc, zfree and opaque must be initialized before by
+   the caller.
+
+     The method parameter is the compression method. It must be Z_DEFLATED in
+   this version of the library.
+
+     The windowBits parameter is the base two logarithm of the window size
+   (the size of the history buffer). It should be in the range 8..15 for this
+   version of the library. Larger values of this parameter result in better
+   compression at the expense of memory usage. The default value is 15 if
+   deflateInit is used instead.
+
+     windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
+   determines the window size. deflate() will then generate raw deflate data
+   with no zlib header or trailer, and will not compute an adler32 check value.
+
+     windowBits can also be greater than 15 for optional gzip encoding. Add
+   16 to windowBits to write a simple gzip header and trailer around the
+   compressed data instead of a zlib wrapper. The gzip header will have no
+   file name, no extra data, no comment, no modification time (set to zero),
+   no header crc, and the operating system will be set to 255 (unknown).  If a
+   gzip stream is being written, strm->adler is a crc32 instead of an adler32.
+
+     The memLevel parameter specifies how much memory should be allocated
+   for the internal compression state. memLevel=1 uses minimum memory but
+   is slow and reduces compression ratio; memLevel=9 uses maximum memory
+   for optimal speed. The default value is 8. See zconf.h for total memory
+   usage as a function of windowBits and memLevel.
+
+     The strategy parameter is used to tune the compression algorithm. Use the
+   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+   filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+   string match), or Z_RLE to limit match distances to one (run-length
+   encoding). Filtered data consists mostly of small values with a somewhat
+   random distribution. In this case, the compression algorithm is tuned to
+   compress them better. The effect of Z_FILTERED is to force more Huffman
+   coding and less string matching; it is somewhat intermediate between
+   Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as
+   Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy
+   parameter only affects the compression ratio but not the correctness of the
+   compressed output even if it is not set appropriately.  Z_FIXED prevents the
+   use of dynamic Huffman codes, allowing for a simpler decoder for special
+   applications.
+
+      deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
+   method). msg is set to null if there is no error message.  deflateInit2 does
+   not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the compression dictionary from the given byte sequence
+   without producing any compressed output. This function must be called
+   immediately after deflateInit, deflateInit2 or deflateReset, before any
+   call of deflate. The compressor and decompressor must use exactly the same
+   dictionary (see inflateSetDictionary).
+
+     The dictionary should consist of strings (byte sequences) that are likely
+   to be encountered later in the data to be compressed, with the most commonly
+   used strings preferably put towards the end of the dictionary. Using a
+   dictionary is most useful when the data to be compressed is short and can be
+   predicted with good accuracy; the data can then be compressed better than
+   with the default empty dictionary.
+
+     Depending on the size of the compression data structures selected by
+   deflateInit or deflateInit2, a part of the dictionary may in effect be
+   discarded, for example if the dictionary is larger than the window size in
+   deflate or deflate2. Thus the strings most likely to be useful should be
+   put at the end of the dictionary, not at the front. In addition, the
+   current implementation of deflate will use at most the window size minus
+   262 bytes of the provided dictionary.
+
+     Upon return of this function, strm->adler is set to the adler32 value
+   of the dictionary; the decompressor may later use this value to determine
+   which dictionary has been used by the compressor. (The adler32 value
+   applies to the whole dictionary even if only a subset of the dictionary is
+   actually used by the compressor.) If a raw deflate was requested, then the
+   adler32 value is not computed and strm->adler is not set.
+
+     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+   parameter is invalid (such as NULL dictionary) or the stream state is
+   inconsistent (for example if deflate has already been called for this stream
+   or if the compression method is bsort). deflateSetDictionary does not
+   perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when several compression strategies will be
+   tried, for example when there are several ways of pre-processing the input
+   data with a filter. The streams that will be discarded should then be freed
+   by calling deflateEnd.  Note that deflateCopy duplicates the internal
+   compression state which can be quite large, so this strategy is slow and
+   can consume lots of memory.
+
+     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being NULL). msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to deflateEnd followed by deflateInit,
+   but does not free and reallocate all the internal compression state.
+   The stream will keep the same compression level and any other attributes
+   that may have been set by deflateInit2.
+
+      deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+                                      int level,
+                                      int strategy));
+/*
+     Dynamically update the compression level and compression strategy.  The
+   interpretation of level and strategy is as in deflateInit2.  This can be
+   used to switch between compression and straight copy of the input data, or
+   to switch to a different kind of input data requiring a different
+   strategy. If the compression level is changed, the input available so far
+   is compressed with the old level (and may be flushed); the new level will
+   take effect only at the next call of deflate().
+
+     Before the call of deflateParams, the stream state must be set as for
+   a call of deflate(), since the currently available input may have to
+   be compressed and flushed. In particular, strm->avail_out must be non-zero.
+
+     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
+   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
+   if strm->avail_out was zero.
+*/
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+                                    int good_length,
+                                    int max_lazy,
+                                    int nice_length,
+                                    int max_chain));
+/*
+     Fine tune deflate's internal compression parameters.  This should only be
+   used by someone who understands the algorithm used by zlib's deflate for
+   searching for the best matching string, and even then only by the most
+   fanatic optimizer trying to squeeze out the last compressed bit for their
+   specific input data.  Read the deflate.c source code for the meaning of the
+   max_lazy, good_length, nice_length, and max_chain parameters.
+
+     deflateTune() can be called after deflateInit() or deflateInit2(), and
+   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+                                       uLong sourceLen));
+/*
+     deflateBound() returns an upper bound on the compressed size after
+   deflation of sourceLen bytes.  It must be called after deflateInit()
+   or deflateInit2().  This would be used to allocate an output buffer
+   for deflation in a single pass, and so would be called before deflate().
+*/
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     deflatePrime() inserts bits in the deflate output stream.  The intent
+  is that this function is used to start off the deflate output with the
+  bits leftover from a previous deflate stream when appending to it.  As such,
+  this function can only be used for raw deflate, and must be used before the
+  first deflate() call after a deflateInit2() or deflateReset().  bits must be
+  less than or equal to 16, and that many of the least significant bits of
+  value will be inserted in the output.
+
+      deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+      deflateSetHeader() provides gzip header information for when a gzip
+   stream is requested by deflateInit2().  deflateSetHeader() may be called
+   after deflateInit2() or deflateReset() and before the first call of
+   deflate().  The text, time, os, extra field, name, and comment information
+   in the provided gz_header structure are written to the gzip header (xflag is
+   ignored -- the extra flags are set according to the compression level).  The
+   caller must assure that, if not Z_NULL, name and comment are terminated with
+   a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+   available there.  If hcrc is true, a gzip header crc is included.  Note that
+   the current versions of the command-line version of gzip (up through version
+   1.3.x) do not support header crc's, and will report that it is a "multi-part
+   gzip file" and give up.
+
+      If deflateSetHeader is not used, the default gzip header has text false,
+   the time set to zero, and os set to 255, with no extra, name, or comment
+   fields.  The gzip header is returned to the default state by deflateReset().
+
+      deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+                                     int  windowBits));
+
+     This is another version of inflateInit with an extra parameter. The
+   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+   before by the caller.
+
+     The windowBits parameter is the base two logarithm of the maximum window
+   size (the size of the history buffer).  It should be in the range 8..15 for
+   this version of the library. The default value is 15 if inflateInit is used
+   instead. windowBits must be greater than or equal to the windowBits value
+   provided to deflateInit2() while compressing, or it must be equal to 15 if
+   deflateInit2() was not used. If a compressed stream with a larger window
+   size is given as input, inflate() will return with the error code
+   Z_DATA_ERROR instead of trying to allocate a larger window.
+
+     windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
+   determines the window size. inflate() will then process raw deflate data,
+   not looking for a zlib or gzip header, not generating a check value, and not
+   looking for any check values for comparison at the end of the stream. This
+   is for use with other formats that use the deflate compressed data format
+   such as zip.  Those formats provide their own check values. If a custom
+   format is developed using the raw deflate format for compressed data, it is
+   recommended that a check value such as an adler32 or a crc32 be applied to
+   the uncompressed data as is done in the zlib, gzip, and zip formats.  For
+   most applications, the zlib format should be used as is. Note that comments
+   above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+     windowBits can also be greater than 15 for optional gzip decoding. Add
+   32 to windowBits to enable zlib and gzip decoding with automatic header
+   detection, or add 16 to decode only the gzip format (the zlib format will
+   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is
+   a crc32 instead of an adler32.
+
+     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg
+   is set to null if there is no error message.  inflateInit2 does not perform
+   any decompression apart from reading the zlib header if present: this will
+   be done by inflate(). (So next_in and avail_in may be modified, but next_out
+   and avail_out are unchanged.)
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the decompression dictionary from the given uncompressed byte
+   sequence. This function must be called immediately after a call of inflate,
+   if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
+   can be determined from the adler32 value returned by that call of inflate.
+   The compressor and decompressor must use exactly the same dictionary (see
+   deflateSetDictionary).  For raw inflate, this function can be called
+   immediately after inflateInit2() or inflateReset() and before any call of
+   inflate() to set the dictionary.  The application must insure that the
+   dictionary that was used for compression is provided.
+
+     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+   parameter is invalid (such as NULL dictionary) or the stream state is
+   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+   expected one (incorrect adler32 value). inflateSetDictionary does not
+   perform any decompression: this will be done by subsequent calls of
+   inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/*
+    Skips invalid compressed data until a full flush point (see above the
+  description of deflate with Z_FULL_FLUSH) can be found, or until all
+  available input is skipped. No output is provided.
+
+    inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
+  if no more input was provided, Z_DATA_ERROR if no flush point has been found,
+  or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
+  case, the application may save the current current value of total_in which
+  indicates where valid compressed data was found. In the error case, the
+  application may repeatedly call inflateSync, providing more input each time,
+  until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when randomly accessing a large stream.  The
+   first pass through the stream can periodically record the inflate state,
+   allowing restarting inflate at those points when randomly accessing the
+   stream.
+
+     inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being NULL). msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to inflateEnd followed by inflateInit,
+   but does not free and reallocate all the internal decompression state.
+   The stream will keep attributes that may have been set by inflateInit2.
+
+      inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     This function inserts bits in the inflate input stream.  The intent is
+  that this function is used to start inflating at a bit position in the
+  middle of a byte.  The provided bits will be used before any bytes are used
+  from next_in.  This function should only be used with raw inflate, and
+  should be used before the first inflate() call after inflateInit2() or
+  inflateReset().  bits must be less than or equal to 16, and that many of the
+  least significant bits of value will be inserted in the input.
+
+      inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+      inflateGetHeader() requests that gzip header information be stored in the
+   provided gz_header structure.  inflateGetHeader() may be called after
+   inflateInit2() or inflateReset(), and before the first call of inflate().
+   As inflate() processes the gzip stream, head->done is zero until the header
+   is completed, at which time head->done is set to one.  If a zlib stream is
+   being decoded, then head->done is set to -1 to indicate that there will be
+   no gzip header information forthcoming.  Note that Z_BLOCK can be used to
+   force inflate() to return immediately after header processing is complete
+   and before any actual data is decompressed.
+
+      The text, time, xflags, and os fields are filled in with the gzip header
+   contents.  hcrc is set to true if there is a header CRC.  (The header CRC
+   was valid if done is set to one.)  If extra is not Z_NULL, then extra_max
+   contains the maximum number of bytes to write to extra.  Once done is true,
+   extra_len contains the actual extra field length, and extra contains the
+   extra field, or that field truncated if extra_max is less than extra_len.
+   If name is not Z_NULL, then up to name_max characters are written there,
+   terminated with a zero unless the length is greater than name_max.  If
+   comment is not Z_NULL, then up to comm_max characters are written there,
+   terminated with a zero unless the length is greater than comm_max.  When
+   any of extra, name, or comment are not Z_NULL and the respective field is
+   not present in the header, then that field is set to Z_NULL to signal its
+   absence.  This allows the use of deflateSetHeader() with the returned
+   structure to duplicate the header.  However if those fields are set to
+   allocated memory, then the application will need to save those pointers
+   elsewhere so that they can be eventually freed.
+
+      If inflateGetHeader is not used, then the header information is simply
+   discarded.  The header is always checked for validity, including the header
+   CRC if present.  inflateReset() will reset the process to discard the header
+   information.  The application would need to call inflateGetHeader() again to
+   retrieve the header from the next gzip stream.
+
+      inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
+                                        unsigned char FAR *window));
+
+     Initialize the internal stream state for decompression using inflateBack()
+   calls.  The fields zalloc, zfree and opaque in strm must be initialized
+   before the call.  If zalloc and zfree are Z_NULL, then the default library-
+   derived memory allocation routines are used.  windowBits is the base two
+   logarithm of the window size, in the range 8..15.  window is a caller
+   supplied buffer of that size.  Except for special applications where it is
+   assured that deflate was used with small window sizes, windowBits must be 15
+   and a 32K byte window must be supplied to be able to decompress general
+   deflate streams.
+
+     See inflateBack() for the usage of these routines.
+
+     inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+   the paramaters are invalid, Z_MEM_ERROR if the internal state could not
+   be allocated, or Z_VERSION_ERROR if the version of the library does not
+   match the version of the header file.
+*/
+
+typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+                                    in_func in, void FAR *in_desc,
+                                    out_func out, void FAR *out_desc));
+/*
+     inflateBack() does a raw inflate with a single call using a call-back
+   interface for input and output.  This is more efficient than inflate() for
+   file i/o applications in that it avoids copying between the output and the
+   sliding window by simply making the window itself the output buffer.  This
+   function trusts the application to not change the output buffer passed by
+   the output function, at least until inflateBack() returns.
+
+     inflateBackInit() must be called first to allocate the internal state
+   and to initialize the state with the user-provided window buffer.
+   inflateBack() may then be used multiple times to inflate a complete, raw
+   deflate stream with each call.  inflateBackEnd() is then called to free
+   the allocated state.
+
+     A raw deflate stream is one with no zlib or gzip header or trailer.
+   This routine would normally be used in a utility that reads zip or gzip
+   files and writes out uncompressed files.  The utility would decode the
+   header and process the trailer on its own, hence this routine expects
+   only the raw deflate stream to decompress.  This is different from the
+   normal behavior of inflate(), which expects either a zlib or gzip header and
+   trailer around the deflate stream.
+
+     inflateBack() uses two subroutines supplied by the caller that are then
+   called by inflateBack() for input and output.  inflateBack() calls those
+   routines until it reads a complete deflate stream and writes out all of the
+   uncompressed data, or until it encounters an error.  The function's
+   parameters and return types are defined above in the in_func and out_func
+   typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
+   number of bytes of provided input, and a pointer to that input in buf.  If
+   there is no input available, in() must return zero--buf is ignored in that
+   case--and inflateBack() will return a buffer error.  inflateBack() will call
+   out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].  out()
+   should return zero on success, or non-zero on failure.  If out() returns
+   non-zero, inflateBack() will return with an error.  Neither in() nor out()
+   are permitted to change the contents of the window provided to
+   inflateBackInit(), which is also the buffer that out() uses to write from.
+   The length written by out() will be at most the window size.  Any non-zero
+   amount of input may be provided by in().
+
+     For convenience, inflateBack() can be provided input on the first call by
+   setting strm->next_in and strm->avail_in.  If that input is exhausted, then
+   in() will be called.  Therefore strm->next_in must be initialized before
+   calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
+   immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
+   must also be initialized, and then if strm->avail_in is not zero, input will
+   initially be taken from strm->next_in[0 .. strm->avail_in - 1].
+
+     The in_desc and out_desc parameters of inflateBack() is passed as the
+   first parameter of in() and out() respectively when they are called.  These
+   descriptors can be optionally used to pass any information that the caller-
+   supplied in() and out() functions need to do their job.
+
+     On return, inflateBack() will set strm->next_in and strm->avail_in to
+   pass back any unused input that was provided by the last in() call.  The
+   return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+   if in() or out() returned an error, Z_DATA_ERROR if there was a format
+   error in the deflate stream (in which case strm->msg is set to indicate the
+   nature of the error), or Z_STREAM_ERROR if the stream was not properly
+   initialized.  In the case of Z_BUF_ERROR, an input or output error can be
+   distinguished using strm->next_in which will be Z_NULL only if in() returned
+   an error.  If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to
+   out() returning non-zero.  (in() will always be called before out(), so
+   strm->next_in is assured to be defined if out() returns non-zero.)  Note
+   that inflateBack() cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+/*
+     All memory allocated by inflateBackInit() is freed.
+
+     inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+   state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+/* Return flags indicating compile-time options.
+
+    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+     1.0: size of uInt
+     3.2: size of uLong
+     5.4: size of voidpf (pointer)
+     7.6: size of z_off_t
+
+    Compiler, assembler, and debug options:
+     8: DEBUG
+     9: ASMV or ASMINF -- use ASM code
+     10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+     11: 0 (reserved)
+
+    One-time table building (smaller code, but not thread-safe if true):
+     12: BUILDFIXED -- build static block decoding tables when needed
+     13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+     14,15: 0 (reserved)
+
+    Library content (indicates missing functionality):
+     16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+                          deflate code when not needed)
+     17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+                    and decode gzip streams (to avoid linking crc code)
+     18-19: 0 (reserved)
+
+    Operation variations (changes in library functionality):
+     20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+     21: FASTEST -- deflate algorithm with only one, lowest compression level
+     22,23: 0 (reserved)
+
+    The sprintf variant used by gzprintf (zero is best):
+     24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+     25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+     26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+    Remainder:
+     27-31: 0 (reserved)
+ */
+
+
+                        /* utility functions */
+
+/*
+     The following utility functions are implemented on top of the
+   basic stream-oriented functions. To simplify the interface, some
+   default options are assumed (compression level and memory usage,
+   standard memory allocation functions). The source code of these
+   utility functions can easily be modified if you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
+                                 const Bytef *source, uLong sourceLen));
+/*
+     Compresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be at least the value returned
+   by compressBound(sourceLen). Upon exit, destLen is the actual size of the
+   compressed buffer.
+     This function can be used to compress a whole file at once if the
+   input file is mmap'ed.
+     compress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
+                                  const Bytef *source, uLong sourceLen,
+                                  int level));
+/*
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least the value returned by
+   compressBound(sourceLen). Upon exit, destLen is the actual size of the
+   compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+/*
+     compressBound() returns an upper bound on the compressed size after
+   compress() or compress2() on sourceLen bytes.  It would be used before
+   a compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
+                                   const Bytef *source, uLong sourceLen));
+/*
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer. Upon entry, destLen is the total
+   size of the destination buffer, which must be large enough to hold the
+   entire uncompressed data. (The size of the uncompressed data must have
+   been saved previously by the compressor and transmitted to the decompressor
+   by some mechanism outside the scope of this compression library.)
+   Upon exit, destLen is the actual size of the compressed buffer.
+     This function can be used to decompress a whole file at once if the
+   input file is mmap'ed.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.
+*/
+
+
+typedef voidp gzFile;
+
+ZEXTERN gzFile ZEXPORT gzopen  OF((const char *path, const char *mode));
+/*
+     Opens a gzip (.gz) file for reading or writing. The mode parameter
+   is as in fopen ("rb" or "wb") but can also include a compression level
+   ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
+   Huffman only compression as in "wb1h", or 'R' for run-length encoding
+   as in "wb1R". (See the description of deflateInit2 for more information
+   about the strategy parameter.)
+
+     gzopen can be used to read a file which is not in gzip format; in this
+   case gzread will directly read from the file without decompression.
+
+     gzopen returns NULL if the file could not be opened or if there was
+   insufficient memory to allocate the (de)compression state; errno
+   can be checked to distinguish the two cases (if errno is zero, the
+   zlib error is Z_MEM_ERROR).  */
+
+ZEXTERN gzFile ZEXPORT gzdopen  OF((int fd, const char *mode));
+/*
+     gzdopen() associates a gzFile with the file descriptor fd.  File
+   descriptors are obtained from calls like open, dup, creat, pipe or
+   fileno (in the file has been previously opened with fopen).
+   The mode parameter is as in gzopen.
+     The next call of gzclose on the returned gzFile will also close the
+   file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
+   descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
+     gzdopen returns NULL if there was insufficient memory to allocate
+   the (de)compression state.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+     Dynamically update the compression level or strategy. See the description
+   of deflateInit2 for the meaning of these parameters.
+     gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
+   opened for writing.
+*/
+
+ZEXTERN int ZEXPORT    gzread  OF((gzFile file, voidp buf, unsigned len));
+/*
+     Reads the given number of uncompressed bytes from the compressed file.
+   If the input file was not in gzip format, gzread copies the given number
+   of bytes into the buffer.
+     gzread returns the number of uncompressed bytes actually read (0 for
+   end of file, -1 for error). */
+
+ZEXTERN int ZEXPORT    gzwrite OF((gzFile file,
+                                   voidpc buf, unsigned len));
+/*
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of uncompressed bytes actually written
+   (0 in case of error).
+*/
+
+ZEXTERN int ZEXPORTVA   gzprintf OF((gzFile file, const char *format, ...));
+/*
+     Converts, formats, and writes the args to the compressed file under
+   control of the format string, as in fprintf. gzprintf returns the number of
+   uncompressed bytes actually written (0 in case of error).  The number of
+   uncompressed bytes written is limited to 4095. The caller should assure that
+   this limit is not exceeded. If it is exceeded, then gzprintf() will return
+   return an error (0) with nothing written. In this case, there may also be a
+   buffer overflow with unpredictable consequences, which is possible only if
+   zlib was compiled with the insecure functions sprintf() or vsprintf()
+   because the secure snprintf() or vsnprintf() functions were not available.
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+      Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+      gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+      Reads bytes from the compressed file until len-1 characters are read, or
+   a newline character is read and transferred to buf, or an end-of-file
+   condition is encountered.  The string is then terminated with a null
+   character.
+      gzgets returns buf, or Z_NULL in case of error.
+*/
+
+ZEXTERN int ZEXPORT    gzputc OF((gzFile file, int c));
+/*
+      Writes c, converted to an unsigned char, into the compressed file.
+   gzputc returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT    gzgetc OF((gzFile file));
+/*
+      Reads one byte from the compressed file. gzgetc returns this byte
+   or -1 in case of end of file or error.
+*/
+
+ZEXTERN int ZEXPORT    gzungetc OF((int c, gzFile file));
+/*
+      Push one character back onto the stream to be read again later.
+   Only one character of push-back is allowed.  gzungetc() returns the
+   character pushed, or -1 on failure.  gzungetc() will fail if a
+   character has been pushed but not read yet, or if c is -1. The pushed
+   character will be discarded if the stream is repositioned with gzseek()
+   or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT    gzflush OF((gzFile file, int flush));
+/*
+     Flushes all pending output into the compressed file. The parameter
+   flush is as in the deflate() function. The return value is the zlib
+   error number (see function gzerror below). gzflush returns Z_OK if
+   the flush parameter is Z_FINISH and all output could be flushed.
+     gzflush should be called only when strictly necessary because it can
+   degrade compression.
+*/
+
+ZEXTERN z_off_t ZEXPORT    gzseek OF((gzFile file,
+                                      z_off_t offset, int whence));
+/*
+      Sets the starting position for the next gzread or gzwrite on the
+   given compressed file. The offset represents a number of bytes in the
+   uncompressed data stream. The whence parameter is defined as in lseek(2);
+   the value SEEK_END is not supported.
+     If the file is opened for reading, this function is emulated but can be
+   extremely slow. If the file is opened for writing, only forward seeks are
+   supported; gzseek then compresses a sequence of zeroes up to the new
+   starting position.
+
+      gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error, in
+   particular if the file is opened for writing and the new starting position
+   would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+/*
+     Rewinds the given file. This function is supported only for reading.
+
+   gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+/*
+     Returns the starting position for the next gzread or gzwrite on the
+   given compressed file. This position represents a number of bytes in the
+   uncompressed data stream.
+
+   gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+     Returns 1 when EOF has previously been detected reading the given
+   input stream, otherwise zero.
+*/
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+/*
+     Returns 1 if file is being read directly without decompression, otherwise
+   zero.
+*/
+
+ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+/*
+     Flushes all pending output if necessary, closes the compressed file
+   and deallocates all the (de)compression state. The return value is the zlib
+   error number (see function gzerror below).
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+     Returns the error message for the last error which occurred on the
+   given compressed file. errnum is set to zlib error number. If an
+   error occurred in the file system and not in the compression library,
+   errnum is set to Z_ERRNO and the application may consult errno
+   to get the exact error code.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+/*
+     Clears the error and end-of-file flags for file. This is analogous to the
+   clearerr() function in stdio. This is useful for continuing to read a gzip
+   file that is being written concurrently.
+*/
+
+                        /* checksum functions */
+
+/*
+     These functions are not related to compression but are exported
+   anyway because they might be useful in applications using the
+   compression library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+/*
+     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+   return the updated checksum. If buf is NULL, this function returns
+   the required initial value for the checksum.
+   An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+   much faster. Usage example:
+
+     uLong adler = adler32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       adler = adler32(adler, buffer, length);
+     }
+     if (adler != original_adler) error();
+*/
+
+ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
+                                          z_off_t len2));
+/*
+     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
+   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+   each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
+   seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.
+*/
+
+ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+/*
+     Update a running CRC-32 with the bytes buf[0..len-1] and return the
+   updated CRC-32. If buf is NULL, this function returns the required initial
+   value for the for the crc. Pre- and post-conditioning (one's complement) is
+   performed within this function so it shouldn't be done by the application.
+   Usage example:
+
+     uLong crc = crc32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       crc = crc32(crc, buffer, length);
+     }
+     if (crc != original_crc) error();
+*/
+
+ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+
+/*
+     Combine two CRC-32 check values into one.  For two sequences of bytes,
+   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+   calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
+   check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+   len2.
+*/
+
+
+                        /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
+                                      int windowBits, int memLevel,
+                                      int strategy, const char *version,
+                                      int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
+                                      const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+                                         unsigned char FAR *window,
+                                         const char *version,
+                                         int stream_size));
+#define deflateInit(strm, level) \
+        deflateInit_((strm), (level),       ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit(strm) \
+        inflateInit_((strm),                ZLIB_VERSION, sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+        deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                      (strategy),           ZLIB_VERSION, sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+        inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
+#define inflateBackInit(strm, windowBits, window) \
+        inflateBackInit_((strm), (windowBits), (window), \
+        ZLIB_VERSION, sizeof(z_stream))
+
+
+#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
+    struct internal_state {int dummy;}; /* hack for buggy compilers */
+#endif
+
+ZEXTERN const char   * ZEXPORT zError           OF((int));
+ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp z));
+ZEXTERN const uLongf * ZEXPORT get_crc_table    OF((void));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
diff --git a/src/zlib/zutil.c b/src/zlib/zutil.c
new file mode 100644
index 0000000..a054437
--- /dev/null
+++ b/src/zlib/zutil.c
@@ -0,0 +1,318 @@
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id: zutil.c,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#include "zutil.h"
+
+#ifndef NO_DUMMY_DECL
+struct internal_state      {int dummy;}; /* for buggy compilers */
+#endif
+
+const char * const z_errmsg[10] = {
+"need dictionary",     /* Z_NEED_DICT       2  */
+"stream end",          /* Z_STREAM_END      1  */
+"",                    /* Z_OK              0  */
+"file error",          /* Z_ERRNO         (-1) */
+"stream error",        /* Z_STREAM_ERROR  (-2) */
+"data error",          /* Z_DATA_ERROR    (-3) */
+"insufficient memory", /* Z_MEM_ERROR     (-4) */
+"buffer error",        /* Z_BUF_ERROR     (-5) */
+"incompatible version",/* Z_VERSION_ERROR (-6) */
+""};
+
+
+const char * ZEXPORT zlibVersion()
+{
+    return ZLIB_VERSION;
+}
+
+uLong ZEXPORT zlibCompileFlags()
+{
+    uLong flags;
+
+    flags = 0;
+    switch (sizeof(uInt)) {
+    case 2:     break;
+    case 4:     flags += 1;     break;
+    case 8:     flags += 2;     break;
+    default:    flags += 3;
+    }
+    switch (sizeof(uLong)) {
+    case 2:     break;
+    case 4:     flags += 1 << 2;        break;
+    case 8:     flags += 2 << 2;        break;
+    default:    flags += 3 << 2;
+    }
+    switch (sizeof(voidpf)) {
+    case 2:     break;
+    case 4:     flags += 1 << 4;        break;
+    case 8:     flags += 2 << 4;        break;
+    default:    flags += 3 << 4;
+    }
+    switch (sizeof(z_off_t)) {
+    case 2:     break;
+    case 4:     flags += 1 << 6;        break;
+    case 8:     flags += 2 << 6;        break;
+    default:    flags += 3 << 6;
+    }
+#ifdef DEBUG
+    flags += 1 << 8;
+#endif
+#if defined(ASMV) || defined(ASMINF)
+    flags += 1 << 9;
+#endif
+#ifdef ZLIB_WINAPI
+    flags += 1 << 10;
+#endif
+#ifdef BUILDFIXED
+    flags += 1 << 12;
+#endif
+#ifdef DYNAMIC_CRC_TABLE
+    flags += 1 << 13;
+#endif
+#ifdef NO_GZCOMPRESS
+    flags += 1L << 16;
+#endif
+#ifdef NO_GZIP
+    flags += 1L << 17;
+#endif
+#ifdef PKZIP_BUG_WORKAROUND
+    flags += 1L << 20;
+#endif
+#ifdef FASTEST
+    flags += 1L << 21;
+#endif
+#ifdef STDC
+#  ifdef NO_vsnprintf
+        flags += 1L << 25;
+#    ifdef HAS_vsprintf_void
+        flags += 1L << 26;
+#    endif
+#  else
+#    ifdef HAS_vsnprintf_void
+        flags += 1L << 26;
+#    endif
+#  endif
+#else
+        flags += 1L << 24;
+#  ifdef NO_snprintf
+        flags += 1L << 25;
+#    ifdef HAS_sprintf_void
+        flags += 1L << 26;
+#    endif
+#  else
+#    ifdef HAS_snprintf_void
+        flags += 1L << 26;
+#    endif
+#  endif
+#endif
+    return flags;
+}
+
+#ifdef DEBUG
+
+#  ifndef verbose
+#    define verbose 0
+#  endif
+int z_verbose = verbose;
+
+void z_error (m)
+    char *m;
+{
+    fprintf(stderr, "%s\n", m);
+    exit(1);
+}
+#endif
+
+/* exported to allow conversion of error code to string for compress() and
+ * uncompress()
+ */
+const char * ZEXPORT zError(err)
+    int err;
+{
+    return ERR_MSG(err);
+}
+
+#if defined(_WIN32_WCE)
+    /* The Microsoft C Run-Time Library for Windows CE doesn't have
+     * errno.  We define it as a global variable to simplify porting.
+     * Its value is always 0 and should not be used.
+     */
+    int errno = 0;
+#endif
+
+#ifndef HAVE_MEMCPY
+
+void zmemcpy(dest, source, len)
+    Bytef* dest;
+    const Bytef* source;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = *source++; /* ??? to be unrolled */
+    } while (--len != 0);
+}
+
+int zmemcmp(s1, s2, len)
+    const Bytef* s1;
+    const Bytef* s2;
+    uInt  len;
+{
+    uInt j;
+
+    for (j = 0; j < len; j++) {
+        if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
+    }
+    return 0;
+}
+
+void zmemzero(dest, len)
+    Bytef* dest;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = 0;  /* ??? to be unrolled */
+    } while (--len != 0);
+}
+#endif
+
+
+#ifdef SYS16BIT
+
+#ifdef __TURBOC__
+/* Turbo C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
+ * and farmalloc(64K) returns a pointer with an offset of 8, so we
+ * must fix the pointer. Warning: the pointer must be put back to its
+ * original form in order to free it, use zcfree().
+ */
+
+#define MAX_PTR 10
+/* 10*64K = 640K */
+
+local int next_ptr = 0;
+
+typedef struct ptr_table_s {
+    voidpf org_ptr;
+    voidpf new_ptr;
+} ptr_table;
+
+local ptr_table table[MAX_PTR];
+/* This table is used to remember the original form of pointers
+ * to large buffers (64K). Such pointers are normalized with a zero offset.
+ * Since MSDOS is not a preemptive multitasking OS, this table is not
+ * protected from concurrent access. This hack doesn't work anyway on
+ * a protected system like OS/2. Use Microsoft C instead.
+ */
+
+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+    voidpf buf = opaque; /* just to make some compilers happy */
+    ulg bsize = (ulg)items*size;
+
+    /* If we allocate less than 65520 bytes, we assume that farmalloc
+     * will return a usable pointer which doesn't have to be normalized.
+     */
+    if (bsize < 65520L) {
+        buf = farmalloc(bsize);
+        if (*(ush*)&buf != 0) return buf;
+    } else {
+        buf = farmalloc(bsize + 16L);
+    }
+    if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
+    table[next_ptr].org_ptr = buf;
+
+    /* Normalize the pointer to seg:0 */
+    *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
+    *(ush*)&buf = 0;
+    table[next_ptr++].new_ptr = buf;
+    return buf;
+}
+
+void  zcfree (voidpf opaque, voidpf ptr)
+{
+    int n;
+    if (*(ush*)&ptr != 0) { /* object < 64K */
+        farfree(ptr);
+        return;
+    }
+    /* Find the original pointer */
+    for (n = 0; n < next_ptr; n++) {
+        if (ptr != table[n].new_ptr) continue;
+
+        farfree(table[n].org_ptr);
+        while (++n < next_ptr) {
+            table[n-1] = table[n];
+        }
+        next_ptr--;
+        return;
+    }
+    ptr = opaque; /* just to make some compilers happy */
+    Assert(0, "zcfree: ptr not found");
+}
+
+#endif /* __TURBOC__ */
+
+
+#ifdef M_I86
+/* Microsoft C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
+#  define _halloc  halloc
+#  define _hfree   hfree
+#endif
+
+voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+    if (opaque) opaque = 0; /* to make compiler happy */
+    return _halloc((long)items, size);
+}
+
+void  zcfree (voidpf opaque, voidpf ptr)
+{
+    if (opaque) opaque = 0; /* to make compiler happy */
+    _hfree(ptr);
+}
+
+#endif /* M_I86 */
+
+#endif /* SYS16BIT */
+
+
+#ifndef MY_ZCALLOC /* Any system without a special alloc function */
+
+#ifndef STDC
+extern voidp  malloc OF((uInt size));
+extern voidp  calloc OF((uInt items, uInt size));
+extern void   free   OF((voidpf ptr));
+#endif
+
+voidpf zcalloc (opaque, items, size)
+    voidpf opaque;
+    unsigned items;
+    unsigned size;
+{
+    if (opaque) items += size - size; /* make compiler happy */
+    return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
+                              (voidpf)calloc(items, size);
+}
+
+void  zcfree (opaque, ptr)
+    voidpf opaque;
+    voidpf ptr;
+{
+    free(ptr);
+    if (opaque) return; /* make compiler happy */
+}
+
+#endif /* MY_ZCALLOC */
diff --git a/src/zlib/zutil.h b/src/zlib/zutil.h
new file mode 100644
index 0000000..d74046a
--- /dev/null
+++ b/src/zlib/zutil.h
@@ -0,0 +1,269 @@
+/* zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id: zutil.h,v 1.1 2008/10/17 06:17:16 scuri Exp $ */
+
+#ifndef ZUTIL_H
+#define ZUTIL_H
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+#ifdef STDC
+#  ifndef _WIN32_WCE
+#    include <stddef.h>
+#  endif
+#  include <string.h>
+#  include <stdlib.h>
+#endif
+#ifdef NO_ERRNO_H
+#   ifdef _WIN32_WCE
+      /* The Microsoft C Run-Time Library for Windows CE doesn't have
+       * errno.  We define it as a global variable to simplify porting.
+       * Its value is always 0 and should not be used.  We rename it to
+       * avoid conflict with other libraries that use the same workaround.
+       */
+#     define errno z_errno
+#   endif
+    extern int errno;
+#else
+#  ifndef _WIN32_WCE
+#    include <errno.h>
+#  endif
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+typedef unsigned char  uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long  ulg;
+
+extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
+/* (size given to avoid silly warnings with Visual C++) */
+
+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+
+#define ERR_RETURN(strm,err) \
+  return (strm->msg = (char*)ERR_MSG(err), (err))
+/* To be used only when the state is known to be valid */
+
+        /* common constants */
+
+#ifndef DEF_WBITS
+#  define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES    2
+/* The three kinds of block type */
+
+#define MIN_MATCH  3
+#define MAX_MATCH  258
+/* The minimum and maximum match lengths */
+
+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
+
+        /* target dependencies */
+
+#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
+#  define OS_CODE  0x00
+#  if defined(__TURBOC__) || defined(__BORLANDC__)
+#    if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
+       /* Allow compilation with ANSI keywords only enabled */
+       void _Cdecl farfree( void *block );
+       void *_Cdecl farmalloc( unsigned long nbytes );
+#    else
+#      include <alloc.h>
+#    endif
+#  else /* MSC or DJGPP */
+#    include <malloc.h>
+#  endif
+#endif
+
+#ifdef AMIGA
+#  define OS_CODE  0x01
+#endif
+
+#if defined(VAXC) || defined(VMS)
+#  define OS_CODE  0x02
+#  define F_OPEN(name, mode) \
+     fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
+#endif
+
+#if defined(ATARI) || defined(atarist)
+#  define OS_CODE  0x05
+#endif
+
+#ifdef OS2
+#  define OS_CODE  0x06
+#  ifdef M_I86
+     #include <malloc.h>
+#  endif
+#endif
+
+#if defined(MACOS) || defined(TARGET_OS_MAC)
+#  define OS_CODE  0x07
+#  if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
+#    include <unix.h> /* for fdopen */
+#  else
+#    ifndef fdopen
+#      define fdopen(fd,mode) NULL /* No fdopen() */
+#    endif
+#  endif
+#endif
+
+#ifdef TOPS20
+#  define OS_CODE  0x0a
+#endif
+
+#ifdef WIN32
+#  ifndef __CYGWIN__  /* Cygwin is Unix, not Win32 */
+#    define OS_CODE  0x0b
+#  endif
+#endif
+
+#ifdef __50SERIES /* Prime/PRIMOS */
+#  define OS_CODE  0x0f
+#endif
+
+#if defined(_BEOS_) || defined(RISCOS)
+#  define fdopen(fd,mode) NULL /* No fdopen() */
+#endif
+
+#if (defined(_MSC_VER) && (_MSC_VER > 600))
+#  if defined(_WIN32_WCE)
+#    define fdopen(fd,mode) NULL /* No fdopen() */
+#    ifndef _PTRDIFF_T_DEFINED
+       typedef int ptrdiff_t;
+#      define _PTRDIFF_T_DEFINED
+#    endif
+#  else
+#    define fdopen(fd,type)  _fdopen(fd,type)
+#  endif
+#endif
+
+        /* common defaults */
+
+#ifndef OS_CODE
+#  define OS_CODE  0x03  /* assume Unix */
+#endif
+
+#ifndef F_OPEN
+#  define F_OPEN(name, mode) fopen((name), (mode))
+#endif
+
+         /* functions */
+
+#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+#if defined(__CYGWIN__)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+#ifndef HAVE_VSNPRINTF
+#  ifdef MSDOS
+     /* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
+        but for now we just assume it doesn't. */
+#    define NO_vsnprintf
+#  endif
+#  ifdef __TURBOC__
+#    define NO_vsnprintf
+#  endif
+#  ifdef WIN32
+     /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
+#    if !defined(vsnprintf) && !defined(NO_vsnprintf)
+#      define vsnprintf _vsnprintf
+#    endif
+#  endif
+#  ifdef __SASC
+#    define NO_vsnprintf
+#  endif
+#endif
+#ifdef VMS
+#  define NO_vsnprintf
+#endif
+
+#if defined(pyr)
+#  define NO_MEMCPY
+#endif
+#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+  * You may have to use the same strategy for Borland C (untested).
+  * The __SC__ check is for Symantec.
+  */
+#  define NO_MEMCPY
+#endif
+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
+#  define HAVE_MEMCPY
+#endif
+#ifdef HAVE_MEMCPY
+#  ifdef SMALL_MEDIUM /* MSDOS small or medium model */
+#    define zmemcpy _fmemcpy
+#    define zmemcmp _fmemcmp
+#    define zmemzero(dest, len) _fmemset(dest, 0, len)
+#  else
+#    define zmemcpy memcpy
+#    define zmemcmp memcmp
+#    define zmemzero(dest, len) memset(dest, 0, len)
+#  endif
+#else
+   extern void zmemcpy  OF((Bytef* dest, const Bytef* source, uInt len));
+   extern int  zmemcmp  OF((const Bytef* s1, const Bytef* s2, uInt len));
+   extern void zmemzero OF((Bytef* dest, uInt len));
+#endif
+
+/* Diagnostic functions */
+#ifdef DEBUG
+#  include <stdio.h>
+   extern int z_verbose;
+   extern void z_error    OF((char *m));
+#  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
+#  define Trace(x) {if (z_verbose>=0) fprintf x ;}
+#  define Tracev(x) {if (z_verbose>0) fprintf x ;}
+#  define Tracevv(x) {if (z_verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+
+voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
+void   zcfree  OF((voidpf opaque, voidpf ptr));
+
+#define ZALLOC(strm, items, size) \
+           (*((strm)->zalloc))((strm)->opaque, (items), (size))
+#define ZFREE(strm, addr)  (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
+#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
+
+#endif /* ZUTIL_H */
-- 
cgit v1.2.3